E.coli reconstruction¶

In [1]:

            
                Copied!
                
                    
                    
                
                

        
import pkg_resources
import cobra
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path
from moped import Reaction, Model
from moped.utils import blast_is_installed
from typing import Iterable

print(pkg_resources.get_distribution("moped").version)


def atp_maintenance_scan(
    model: cobra.Model,
    atpase_id: str,
    biomass_id,
    values: Iterable[float],
) -> pd.Series:
    result = {}
    atpase_rxn = model.reactions.get_by_id(atpase_id)
    previous_bounds = atpase_rxn.bounds
    for lower_bound in values:
        atpase_rxn.bounds = (lower_bound, 1000)
        result[lower_bound] = cobra.flux_analysis.pfba(model).fluxes[biomass_id]
    # Restore initial bound
    atpase_rxn.bounds = previous_bounds
    return pd.Series(result)
import pkg_resources
import cobra
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path
from moped import Reaction, Model
from moped.utils import blast_is_installed
from typing import Iterable

print(pkg_resources.get_distribution("moped").version)


def atp_maintenance_scan(
    model: cobra.Model,
    atpase_id: str,
    biomass_id,
    values: Iterable[float],
) -> pd.Series:
    result = {}
    atpase_rxn = model.reactions.get_by_id(atpase_id)
    previous_bounds = atpase_rxn.bounds
    for lower_bound in values:
        atpase_rxn.bounds = (lower_bound, 1000)
        result[lower_bound] = cobra.flux_analysis.pfba(model).fluxes[biomass_id]
    # Restore initial bound
    atpase_rxn.bounds = previous_bounds
    return pd.Series(result)

1.6.5

To construct a metabolic model for E. coli K-12, we first read a MetaCyc PDGB into a moped model.

In [2]:

            
                Copied!
                
                    
                    
                
                

        
# Parsing metacyc can take a bit, so we cached the results here
metacyc_cache_path = Path("../data/metacyc/25.1/metacyc.pickle")

if metacyc_cache_path.exists():
    metacyc = Model.load_from_pickle(metacyc_cache_path)
else:
    metacyc = Model()
    metacyc.read_from_pgdb(pgdb_path="../data/metacyc/25.1/data")
    metacyc.to_pickle(metacyc_cache_path)
# Parsing metacyc can take a bit, so we cached the results here
metacyc_cache_path = Path("../data/metacyc/25.1/metacyc.pickle")

if metacyc_cache_path.exists():
    metacyc = Model.load_from_pickle(metacyc_cache_path)
else:
    metacyc = Model()
    metacyc.read_from_pgdb(pgdb_path="../data/metacyc/25.1/data")
    metacyc.to_pickle(metacyc_cache_path)

Create model from genome¶

If we have BLAST installed, we can use the function create_submodel_from_genome to import a fasta file and BLAST it against the MetaCyc database to construct a model of all reactions that can be found in the genome sequence.

Alternatively, its possible to import the BioCyc PGDB files of E.coli K-12 using read_from_pgdb and update_from_reference to avoid version discrepancies.

In [3]:

            
                Copied!
                
                    
                    
                
                

        
if not blast_is_installed():
    raise OSError("Could not find blast. Please make sure its on your path.")

ecoli = metacyc.create_submodel_from_proteome(
    "../data/proteomes/ecoli.fasta",
    name="ecoli",
    cache_blast_results=True,
)

# Make sure that the central carbon metabolism is there

for rxn in metacyc.pathways["GLYCOLYSIS"]:
    if rxn not in ecoli.reactions:
        ecoli.add_reaction_from_reference(metacyc, rxn)

for rxn in metacyc.pathways["TCA"]:
    if rxn not in ecoli.reactions:
        ecoli.add_reaction_from_reference(metacyc, rxn)
if not blast_is_installed():
    raise OSError("Could not find blast. Please make sure its on your path.")

ecoli = metacyc.create_submodel_from_proteome(
    "../data/proteomes/ecoli.fasta",
    name="ecoli",
    cache_blast_results=True,
)

# Make sure that the central carbon metabolism is there

for rxn in metacyc.pathways["GLYCOLYSIS"]:
    if rxn not in ecoli.reactions:
        ecoli.add_reaction_from_reference(metacyc, rxn)

for rxn in metacyc.pathways["TCA"]:
    if rxn not in ecoli.reactions:
        ecoli.add_reaction_from_reference(metacyc, rxn)

Topological gap-filling¶

Our model right now probably cannot produce all biomass compounds.

To fix this, we apply topological gap-filling. For this we need to perform cofactor and reversibility duplication for both draft and reference models (see manuscript for an explanation why that is necessary).

We then define a minimal medium seed.

In [4]:

            
                Copied!
                
                    
                    
                
                

        
metacyc.cofactor_duplication()
metacyc.reversibility_duplication()
ecoli.cofactor_duplication()
ecoli.reversibility_duplication()

biomass_composition = ecoli.get_biomass_template(organism="ecoli")

medium = [
    "ALPHA-GLUCOSE_c",
    "WATER_c",
    "PROTON_c",
    "OXYGEN-MOLECULE_c",
    "Pi_c",
    "SULFATE_c",
    "AMMONIA_c",
]
metacyc.cofactor_duplication()
metacyc.reversibility_duplication()
ecoli.cofactor_duplication()
ecoli.reversibility_duplication()

biomass_composition = ecoli.get_biomass_template(organism="ecoli")

medium = [
    "ALPHA-GLUCOSE_c",
    "WATER_c",
    "PROTON_c",
    "OXYGEN-MOLECULE_c",
    "Pi_c",
    "SULFATE_c",
    "AMMONIA_c",
]

The function get_gapfilling_reactions() uses Meneco gap-filling to find a minimal set of missing reactions from the reference moped object to produce all targets, and returns the list of reaction IDs.
A result of previous gap-filling is provided, but can be commented out for new calculations.
These reactions are then added to the draft model from the reference model.
To help the topological gap-filling algorithm, we first gap-filled for nucleotides and then for the biomass composition.
Doing these things calculations step-wise can help finding a minimal set of required reactions, as the solution space is smaller.
However, this can lead to the set of reactions not being minimal, as there might have been a set in the combination of both searches that is smaller.
Use your own judgement here to whether the set of reactions needs to be as minimal as possible.

In [5]:

            
                Copied!
                
                    
                    
                
                

        
# Topological gap-filling can take quite a while, so we cached the results here

gapfill_result = [
    "GLUCOSE-1-DEHYDROGENASE-NAD+-RXN__var__0_c__cof__",
    "RXN0-1402_c__cof__",
    "OROTATE-REDUCTASE-NADH-RXN_c__cof__",
    "GLUCONATE-DEHYDRATASE-RXN_c",
]

if gapfill_result is None:
    gapfill_result = ecoli.get_gapfilling_reactions(
        reference_model=metacyc,
        seed=medium + metacyc.get_weak_cofactor_duplications(),
        targets=["UTP_c", "ATP_c", "GTP_c", "CTP_c"],
        verbose=True,
    )

# You can inspect the reactions before adding them to the model
for reaction_id in gapfill_result:
    ecoli.add_reaction_from_reference(reference_model=metacyc, reaction_id=reaction_id)
# Topological gap-filling can take quite a while, so we cached the results here

gapfill_result = [
    "GLUCOSE-1-DEHYDROGENASE-NAD+-RXN__var__0_c__cof__",
    "RXN0-1402_c__cof__",
    "OROTATE-REDUCTASE-NADH-RXN_c__cof__",
    "GLUCONATE-DEHYDRATASE-RXN_c",
]

if gapfill_result is None:
    gapfill_result = ecoli.get_gapfilling_reactions(
        reference_model=metacyc,
        seed=medium + metacyc.get_weak_cofactor_duplications(),
        targets=["UTP_c", "ATP_c", "GTP_c", "CTP_c"],
        verbose=True,
    )

# You can inspect the reactions before adding them to the model
for reaction_id in gapfill_result:
    ecoli.add_reaction_from_reference(reference_model=metacyc, reaction_id=reaction_id)

In [6]:

            
                Copied!
                
                    
                    
                
                

        
# Topological gap-filling can take quite a while, so we cached the results here

gapfill_result = [
    "RXN-18377_c",
    "ORNITHINE-CYCLODEAMINASE-RXN_c",
    "MALONATE-SEMIALDEHYDE-DEHYDROGENASE-RXN__var__1_c__cof__",
]

if gapfill_result is None:
    gapfill_result = ecoli.get_gapfilling_reactions(
        reference_model=metacyc,
        seed=medium + metacyc.get_weak_cofactor_duplications(),
        targets=biomass_composition,
        verbose=True,
    )
for reaction_id in gapfill_result:
    ecoli.add_reaction_from_reference(reference_model=metacyc, reaction_id=reaction_id)
# Topological gap-filling can take quite a while, so we cached the results here

gapfill_result = [
    "RXN-18377_c",
    "ORNITHINE-CYCLODEAMINASE-RXN_c",
    "MALONATE-SEMIALDEHYDE-DEHYDROGENASE-RXN__var__1_c__cof__",
]

if gapfill_result is None:
    gapfill_result = ecoli.get_gapfilling_reactions(
        reference_model=metacyc,
        seed=medium + metacyc.get_weak_cofactor_duplications(),
        targets=biomass_composition,
        verbose=True,
    )
for reaction_id in gapfill_result:
    ecoli.add_reaction_from_reference(reference_model=metacyc, reaction_id=reaction_id)

As the topological analysis is complete, we again remove the cofactor and reversibility duplications.

In [7]:

            
                Copied!
                
ecoli.remove_cofactor_duplication()
ecoli.remove_reversibility_duplication()
metacyc.remove_cofactor_duplication()
metacyc.remove_reversibility_duplication()
ecoli.remove_cofactor_duplication()
ecoli.remove_reversibility_duplication()
metacyc.remove_cofactor_duplication()
metacyc.remove_reversibility_duplication()

Flux Balance Analysis (FBA)¶

To simulate our model using FBA we add a biomass reaction, which is a simplified version of the E. coli iJO1366 reaction, and an ATPase reaction as an ATP maintenance requirement to the model.

In [8]:

            
                Copied!
                
                    
                    
                
                

        
biomass_rxn = Reaction(
    id="BIOMASS",
    stoichiometries=biomass_composition,
    bounds=(0.0, 1000.0),
)
atpase_rxn = Reaction(
    id="ATPase",
    stoichiometries={"ATP_c": -1, "WATER_c": -1, "ADP_c": 1, "Pi_c": 1, "PROTON_c": 1},
    bounds=(0.0, 1000.0),
)

ecoli.add_reaction(biomass_rxn)
ecoli.add_reaction(atpase_rxn)
biomass_rxn = Reaction(
    id="BIOMASS",
    stoichiometries=biomass_composition,
    bounds=(0.0, 1000.0),
)
atpase_rxn = Reaction(
    id="ATPase",
    stoichiometries={"ATP_c": -1, "WATER_c": -1, "ADP_c": 1, "Pi_c": 1, "PROTON_c": 1},
    bounds=(0.0, 1000.0),
)

ecoli.add_reaction(biomass_rxn)
ecoli.add_reaction(atpase_rxn)

We add exchange reactions (influx and outflux) for all compounds in the medium using add_medium_component and transporters into the cytosol via add_transport_reaction.
In this model, we allow carbon dioxide to leave the model via a respective efflux reaction.
We define a new variable in which we export our moped object into a CobraPy object, and set the objective coefficient of our Biomass reaction to 1.

In [9]:

            
                Copied!
                
for compound_id in medium + ["CARBON-DIOXIDE_c"]:
    if compound_id in ecoli.compounds:
        ecoli.add_transport_reaction(compound_id, "EXTRACELLULAR")
        ecoli.add_medium_component(compound_id, "EXTRACELLULAR")

ecoli.reactions["EX_ALPHA-GLUCOSE_e"].bounds = (-10, 10)
ecoli.objective = {"BIOMASS": 1}

cobra_model = ecoli.to_cobra()
cobra_model.optimize()["BIOMASS"]
for compound_id in medium + ["CARBON-DIOXIDE_c"]:
    if compound_id in ecoli.compounds:
        ecoli.add_transport_reaction(compound_id, "EXTRACELLULAR")
        ecoli.add_medium_component(compound_id, "EXTRACELLULAR")

ecoli.reactions["EX_ALPHA-GLUCOSE_e"].bounds = (-10, 10)
ecoli.objective = {"BIOMASS": 1}

cobra_model = ecoli.to_cobra()
cobra_model.optimize()["BIOMASS"]

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled

Out[9]:

1.4714438366073488

Let's do a quick test which compounds can flow in and out of the system

In [10]:

            
                Copied!
                
                    
                    
                
                

        
outfluxes = pd.Series(
    {
        reaction.id: reaction.flux
        for reaction in cobra_model.reactions
        if reaction.id.startswith("EX_") and abs(reaction.flux) > 0.1
    }
)
outfluxes.sort_values()
outfluxes = pd.Series(
    {
        reaction.id: reaction.flux
        for reaction in cobra_model.reactions
        if reaction.id.startswith("EX_") and abs(reaction.flux) > 0.1
    }
)
outfluxes.sort_values()

Out[10]:

EX_OXYGEN-MOLECULE_e   -14.280320
EX_AMMONIA_e           -14.280320
EX_ALPHA-GLUCOSE_e     -10.000000
EX_PROTON_e             -6.461316
EX_Pi_e                 -2.879555
EX_SULFATE_e            -0.351137
EX_CARBON-DIOXIDE_e     14.115187
EX_WATER_e              49.168990
dtype: float64

And how the metabolites in the medium behave

In [11]:

            
                Copied!
                
medium_fluxes = pd.Series({i: cobra_model.reactions.get_by_id(f"EX_{i[:-1]}e").flux for i in medium})
medium_fluxes.sort_values()
medium_fluxes = pd.Series({i: cobra_model.reactions.get_by_id(f"EX_{i[:-1]}e").flux for i in medium})
medium_fluxes.sort_values()

Out[11]:

OXYGEN-MOLECULE_c   -14.280320
AMMONIA_c           -14.280320
ALPHA-GLUCOSE_c     -10.000000
PROTON_c             -6.461316
Pi_c                 -2.879555
SULFATE_c            -0.351137
WATER_c              49.168990
dtype: float64

After this quick initial test, we now compare it to a well established model of E. coli metabolism (iJO1366).
For this we systematically increase the ATP maintenance requirement.
The draft reconstruction model does not react to increasing ATP maintenance requirement, which suggests that there is an energy generating cycle in the reconstructed model.

In [12]:

            
                Copied!
                
                    
                    
                
                

        
ijo = cobra.io.read_sbml_model("../data/models/iJO1366.xml")

atp_maintenance = np.linspace(2.0, 100, 10)
biomass_by_atp_maintenance_reconstruction = atp_maintenance_scan(
    cobra_model, "ATPase", "BIOMASS", atp_maintenance
)
biomass_by_atp_maintenance_ijo = atp_maintenance_scan(
    ijo, "ATPM", "BIOMASS_Ec_iJO1366_core_53p95M", atp_maintenance
)

fig, ax = plt.subplots()
biomass_by_atp_maintenance_reconstruction.plot(label="Moped E. coli Model", linewidth=3.5, ax=ax)
biomass_by_atp_maintenance_ijo.plot(label="iJO1366 Model", linewidth=3.5, ax=ax)
ax.legend(loc="best")
ax.set_xlabel("ATP maintenance Flux", fontsize=20)
ax.set_ylabel("Biomass Flux", fontsize=20)
plt.show()
ijo = cobra.io.read_sbml_model("../data/models/iJO1366.xml")

atp_maintenance = np.linspace(2.0, 100, 10)
biomass_by_atp_maintenance_reconstruction = atp_maintenance_scan(
    cobra_model, "ATPase", "BIOMASS", atp_maintenance
)
biomass_by_atp_maintenance_ijo = atp_maintenance_scan(
    ijo, "ATPM", "BIOMASS_Ec_iJO1366_core_53p95M", atp_maintenance
)

fig, ax = plt.subplots()
biomass_by_atp_maintenance_reconstruction.plot(label="Moped E. coli Model", linewidth=3.5, ax=ax)
biomass_by_atp_maintenance_ijo.plot(label="iJO1366 Model", linewidth=3.5, ax=ax)
ax.legend(loc="best")
ax.set_xlabel("ATP maintenance Flux", fontsize=20)
ax.set_ylabel("Biomass Flux", fontsize=20)
plt.show()

As an example manual curation, we identified reversible reactions producing ATP with an iterative process of doing parsimonious FBA and then manually checking backwards running reactions.
Reversible reactions that have been found to participate in the ATP producing loop were made irreversible, eliminating the ATP producing loop.

pfba = cobra.flux_analysis.pfba(cobra_model)
ecoli.get_producing_reactions(pfba, "ATP_c")

In [13]:

            
                Copied!
                
                    
                    
                
                

        
cobra_model.reactions.get_by_id("SULFATE-ADENYLYLTRANS-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("ADENYL-KIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("SUCCCOASYN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("GLYCEROL-KIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("PROPKIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("ACETATEKIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("PROPIONYL-COA-CARBOXY-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("SULFATE-ADENYLYLTRANS-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("ADENYL-KIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("SUCCCOASYN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("GLYCEROL-KIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("PROPKIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("ACETATEKIN-RXN_c").bounds = (0, 1000)
cobra_model.reactions.get_by_id("PROPIONYL-COA-CARBOXY-RXN_c").bounds = (0, 1000)

After the manual curation step, now the draft reconstruction and the iJO1366 model behave very similarly, however they are offset.

In [16]:

            
                Copied!
                
                    
                    
                
                

        
biomass_by_atp_maintenance_reconstruction = atp_maintenance_scan(
    cobra_model, "ATPase", "BIOMASS", atp_maintenance
)

fig, ax = plt.subplots()
biomass_by_atp_maintenance_reconstruction.plot(label="Moped E. coli Model", linewidth=3.5, ax=ax)
biomass_by_atp_maintenance_ijo.plot(label="iJO1366 Model", linewidth=3.5, ax=ax)
ax.legend(loc="best")
ax.set_xlabel("ATP maintenance Flux", fontsize=20)
ax.set_ylabel("Biomass Flux", fontsize=20)
plt.show()
biomass_by_atp_maintenance_reconstruction = atp_maintenance_scan(
    cobra_model, "ATPase", "BIOMASS", atp_maintenance
)

fig, ax = plt.subplots()
biomass_by_atp_maintenance_reconstruction.plot(label="Moped E. coli Model", linewidth=3.5, ax=ax)
biomass_by_atp_maintenance_ijo.plot(label="iJO1366 Model", linewidth=3.5, ax=ax)
ax.legend(loc="best")
ax.set_xlabel("ATP maintenance Flux", fontsize=20)
ax.set_ylabel("Biomass Flux", fontsize=20)
plt.show()

If we correct for the offset, we can see that the models indeed react very similarly to the change in ATP maintenance flux

In [26]:

            
                Copied!
                
                    
                    
                
                

        
scaled = (
    biomass_by_atp_maintenance_reconstruction
    * biomass_by_atp_maintenance_ijo.max()
    / biomass_by_atp_maintenance_reconstruction.max()
)

fig, ax = plt.subplots()
scaled.plot(label="Moped E. coli Model", linewidth=3.5, ax=ax)
biomass_by_atp_maintenance_ijo.plot(label="iJO1366 Model", linewidth=3.5, ax=ax)
ax.legend(loc="best")
ax.set_xlabel("ATP maintenance Flux", fontsize=20)
ax.set_ylabel("Biomass Flux", fontsize=20)
plt.show()
scaled = (
    biomass_by_atp_maintenance_reconstruction
    * biomass_by_atp_maintenance_ijo.max()
    / biomass_by_atp_maintenance_reconstruction.max()
)

fig, ax = plt.subplots()
scaled.plot(label="Moped E. coli Model", linewidth=3.5, ax=ax)
biomass_by_atp_maintenance_ijo.plot(label="iJO1366 Model", linewidth=3.5, ax=ax)
ax.legend(loc="best")
ax.set_xlabel("ATP maintenance Flux", fontsize=20)
ax.set_ylabel("Biomass Flux", fontsize=20)
plt.show()

No we export the model into a fully annotated SBML file to share it with the community.

In [15]:

            
                Copied!
                
ecoli.to_sbml("../temp/ecoli.xml")
ecoli.to_sbml("../temp/ecoli.xml")