📄 readmodel.m
字号:
function model = readGMDModel(fileName, defaultParameter, modelDescription)
%readModel read in a S-system model
%
% model = readGMDModel(fileName, defaultParameter, modelDescription)
%
% if no arguments are passed into the function, the user will be promoted for
% a file name.
%
% fileName File name for file to read in (optional)
% defaultParameter Default parameter, including order of reactions for
% each independent variables in every differential
% equations, and velocity constant for each reactions
% modelDescription Description of model contents(optional)
%
% Returns a model in the GMDModel Toolbox format:
%
% model
% description Description of model contents
% rxns Names of reactions
% mets Names of metabolites
% S Stoichiometric matrix
% vc Velocity constants for each reactions
% ro Orders of each related metabolite
% c Obejective coefficients
% subSystems Subsystem name for each reaction(opt)
% grRules Gene-reaction association rule for each reaction (opt)
% rules Gene-reaction association rule in computatable form (opt)
% rxnGeneMat reaction-to-gene Mappnig in sparse matrix form (opt)
% genes List of all genes (opt)
% rxnNames Reaction description (opt)
% metNames Metabolite description (opt)
% metFormulas Metabolite chemical formula (opt)
if (nargin < 2)
defaultParameter = 1.000000;
else
if (isempty(defaultParameter))
defaultParameter = 1.000000;
end
end
% open a dialog to select file
if (nargin < 1)
[fileNameFull, filePath] = uigetfile('*.xml');
if (fileNameFull)
[t1,t2,t3,t4,tokens] = regexp(fileNameFull,'(.*)\.(\w*)');
noPathName = tokens{1}{1};
fileTypeTmp = tokens{1}{2};
fileName = [filePath noPathName];
else
model = 0;
return;
end
end
if (nargin < 3)
if (exist('filePath'))
modelDescription = noPathName;
else
modelDescription = fileName;
end
end
if isempty(regexp(fileName,'\.xml$'))
model = readSBMLGMDModel([fileName '.xml'], defaultParameter);
else
model = readSBMLGMDModel(fileName, defaultParameter);
end
% Check reversibility
model = checkReversibility(model);
% Check uniqueness of metabolites and reactions names
model.b = zeros(length(model.mets),1);
model.description = modelDescription;
% End main function
%% Make sure reversibilities are correctly indicated in the model
function model = checkReversibility(model)
selRev = (model.lb < 0 & model.ub > 0);
model.rev(selRev) = 1;
% readSBMLGMDModel read SBML format genome-scale metabolic dynamic model
function model = readSBMLGMDModel(fileName, defaultParameter)
if ~(exist(fileName,'file'))
error(['Input file 'fileName' not found']);
end
% Read SBML
modelSBML = TranslateSBML(fileName);
nMetsTmp = length(modelSBML.species);
nRxns = length(modelSBML.reaction);
% Construct initial metabolite list
formulaCount = 0;
speciesList = {};
metFormulas = {};
haveFormulasFlag = false;
for i = 1:nMetsTmp
% Ignore exchange species
if (isempty(regexp(modelSBML.species(i).id,'_b$','ONCE'))); % if the id has no "_b$"
speciesList{end+1} = modelSBML.species(i).id;
notesField = modelSBML.species(i).notes;
% Get formula if in notes field
if (~isempty(notesField))
[tmp, tmp, tmp, tmp, formula] = parseSBMLNotesField(notesField);
metFormulas{end+1} = formula;
formulaCount = formulaCount + 1;
haveFormulasFlag = true;
end
end
end
nMets = length(speciesList); % finally obtain metaboliteslist without biomass chemicals
%Construct stoichiometric matrix and reaction list
S = sparse(nMets, nRxns);
reacOrder = sparse(nMets, nRxns);
veloConstant = ones(nRxns,1);
c = zeros(nRxns,1);
rxns = cell(nRxns,1);
rules = cell(nRxns,1);
genes = cell(nRxns,1);
allGenes = {};
h = waitbar(0, 'Reading SBML file...');
hasNotesField = false;
for i = 1:nRxns
if mod(i,10) == 0;
waitbar(i/nRxns,h);
end
% Read the gpra from the notes field
notesField = modelSBML.reaction(i).notes;
if (~isempty(notesField))
[geneList,rule,subSystem,grRule] = parseSBMLNotesField(notesField);
subSystems{i} = subSystem;
genes{i} = geneList;
allGenes = [allGenes geneList];
rules{i} = rule;
grRules{i} = grRule;
hasNotesField = true;
end
rev(i) = modelSBML.reaction(i).reversible;
rxnNameTmp = regexprep(modelSBML.reaction(i).name,'^R_','');
rxnNameTmp = strrep(rxnNameTmp,'__','_');
rxnNames{i} = strrep(rxnNameTmp,'_',' ');
rxnsTmp = regexprep(modelSBML.reaction(i).id,'^R_','');
rxnsTmp = regexprep(rxnsTmp,'_e_$','(e)');
rxnsTmp = regexprep(rxnsTmp,'_e$','(e)');
rxnsTmp = strrep(rxnsTmp,'_LPAREN_','(');
rxnsTmp = strrep(rxnsTmp,'_RPAREN_',')');
rxnsTmp = strrep(rxnsTmp,'_COMMA_',',');
rxnsTmp = strrep(rxnsTmp,'_APOS_','''');
rxns{i} = strrep(rxnsTmp,'_DASH_','-');
% Construct S-matrix
reactantStruct = modelSBML.reaction(i).reactant;
for j = 1:length(reactantStruct)
speciesID = find(strcmp(reactantStruct(j).species,speciesList));
if (~isempty(speciesID))
stoichCoeff = reactantStruct(j).stoichiometry;
S(speciesID,i) = -stoichCoeff;
end
end
productStruct = modelSBML.reaction(i).product;
for j = 1:length(productStruct)
speciesID = find(strcmp(productStruct(j).species,speciesList));
if (~isempty(speciesID))
stoichCoeff = productStruct(j).stoichiometry;
S(speciesID,i) = -stoichCoeff;
end
end
parameters = modelSBML.reaction(i).kineticLaw.parameter;
if (~isempty(parameters)) % if parameters exist
for j = 1:length(parameters)
paramStruct = parameters(j);
switch paramStruct.id
case 'VELOCITY_CONSTANTS'
vc(i) = paraStruct.value;
case 'REACTION_ORDERS'
ro(i) = paraStruct.value; %need more details
end
end
else
vc(i) = 1.000000;
ro(i) = 1.000000;
end
end
close(h);
allGenes = unique(allGenes);
if (hasNotesField)
% Construct gene to rxn mapping
rxnGeneMat = sparse(nRxns,length(allGenes));
h = waitbar(0,'Constructing GPR mapping ...');
for i = 1:nRxns
if mod(i,10) == 0
waitbar(i/nRxns,h);
end
[tmp,geneInd] = ismember(genes{i},allGenes);
rxnGeneMat(i,geneInd) = 1;
for j = 1:length(geneInd)
rules{i} = strrep(rules{i},['x(' num2str(j) ')'],['x(' num2str(geneInd(j)) ')']);
end
end
close(h);
end
%construct metabolite list
mets = cell(nMets,1);
h = waitbar(0,'Constructing metabolites list...');
for i =1:nMets
if mod(i,10) == 0
waitbar(i/nMets, h);
end
% Parse metabolite id's
% Get rid of M_ in the begining of every one
metID = regexprep(speciesList{i}, '^M_','');
metID = regexprep(metID,'^_','');
% Find compartment id
tmpCell = regexp(metID,'_(.)$','tokens');
if ~isempty(tmpCell)
compID = tmpCell{1};
metTmp = [regexprep(metID,'_(.)$','') '[' compID{1} ']'];
% build up a novel format for metabolites'IDs
% that is the information of compartment has been revised with []
metTmp = regexprep(metTmp,'_','-');
else
metTmp = metID;
end
mets{i} = metTmp;
% Parse metabolites names
% Clean up some of the weird stuff in the sbml files
metNamesTmp = regexprep(modelSBML.species(i).name,'^M_','');
metNamesTmp = regexprep(metNamesTmp,'^_','');
metNamesTmp = strrep(metNamesTmp,'_','-');
metNamesTmp = regexprep(metNamesTmp,'--','-');
metNamesTmp = regexprep(metNamesTmp,'-$','');
metNamesAlt{i} = metNamesTmp;
% Seperate formulas from names
% [tmp,tmp,tmp,tmp.token] =
% regexp(metNamesTmp,'(.*)-((([A(Ag)(As)C(Ca)(Cd)(Cl)(Co)(Cu)F(Fe)H(Hg)IKLM(Mg)(Mn)N(Na)(Ni)OPRS(Se)UWXY(Zn)]?)(\d*)))*$');
if (~haveFormulasFlag)
[tmp,tmp,tmp,tmp,tokens] = regexp(metNamesTmp,'(.*)-((((A|Ag|As|C|Ca|Cd|Cl|Co|Cu|F|Fe|H|Hg|I|K|L|M|Mg|Mn|N|Na|Ni|O|P|R|S|Se|U|W|X|Y|Zn)?)(\d*)))*$');
if (isempty(tokens))
metFormulas{i} = '';
metNames{i} = metNamesTmp;
else
formulaCount = formulaCount + 1;
metFormulas{i} = tokens{1}{2};
metNames{i} = tokens{1}{1};
end
else
metNames{i} = metNamesTmp;
end
end
close(h);
% Collect everything into a structure
model.rxns = rxns;
model.mets = mets;
model.S = S;
model.vc = vc;
model.ro = ro;
model.c = c;
if (hasNotesField)
model.rules = rules;
model.genes = transColumn(allGenes);
model.rxnGeneMat = rxnGeneMat;
model.grRules = transColumn(grRules);
allGenes = transColumn(allGenes);
model.subSystems = transColumn(subSystems);
end
model.rxnNames = transColumn(rxnNames);
% Only include formulas if at least 90% of metabolites have them (otherwise
% the "formulas" are probably just parts of metabolite names)
if (formulaCount < 0.9*nMets)
model.metNames = transColumn(metNamesAlt);
else
model.metNames = transColumn(metNames);
model.metFormulas = transColumn(metFormulas);
end
%%
function [genes,rule,subSystem,grRule,formula] = parseSBMLNotesField(notesField)
%parseSBMLNotesField Parse the notes field of an SBML file to extract
%gene-rxn associations
%
% [genes,rule] = parseSBMLNotesField(notesField)
if isempty(regexp(notesField,'html:p'))
tag = 'p';
else
tag = 'html:p';
end
subSystem = '';
grRule = '';
genes = {};
rule = '';
formula = '';
[tmp,fieldList] = regexp(notesField,['<' tag '>.*?</' tag '>'],'tokens','match');
for i = 1:length(fieldList)
fieldTmp = regexp(fieldList{i},['<' tag '>(.*)</' tag '>'],'tokens');
fieldStr = fieldTmp{1}{1};
if (regexp(fieldStr,'GENE_ASSOCIATION'))
gprStr = strrep(fieldStr,'GENE_ASSOCIATION: ','');
grRule = gprStr;
[genes,rule] = parseBoolean(gprStr);
elseif (regexp(fieldStr,'GENE ASSOCIATION'))
gprStr = strrep(fieldStr,'GENE ASSOCIATION: ','');
grRule = gprStr;
[genes,rule] = parseBoolean(gprStr);
elseif (regexp(fieldStr,'SUBSYSTEM'))
subSystem = strrep(fieldStr,'SUBSYSTEM: ','');
subSystem = strrep(subSystem,'S_','');
subSystem = strrep(subSystem,'__','_');
subSystem = strrep(subSystem,'_',' ');
if (length(subSystem) == 0)
subSystem = 'Exchange';
end
elseif (regexp(fieldStr,'FORMULA'))
formula = strrep(fieldStr,'FORMULA: ','');
end
end
%%
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -