⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readmodel.m

📁 本人练习将用SBML(系统生物学标记语言)编写的XML模型读入MATLAB结构体中的转换程序
💻 M
字号:
function model = readGMDModel(fileName, defaultParameter, modelDescription)
%readModel read in a S-system model
% 
% model = readGMDModel(fileName, defaultParameter, modelDescription)
% 
% if no arguments are passed into the function, the user will be promoted for
% a file name.
% 
% fileName            File name for file to read in (optional)
% defaultParameter   Default parameter, including order of reactions  for 
%                     each independent variables in every differential 
%                     equations, and velocity constant for each reactions
% modelDescription    Description of model contents(optional)
% 
% Returns a model in the GMDModel Toolbox format:
% 
% model
%   description   Description of model contents
%   rxns          Names of reactions
%   mets          Names of metabolites
%   S             Stoichiometric matrix
%   vc            Velocity constants for each reactions
%   ro            Orders of each related metabolite
%   c             Obejective coefficients
%   subSystems    Subsystem name for each reaction(opt)
%   grRules       Gene-reaction association rule for each reaction (opt)
%   rules         Gene-reaction association rule in computatable form (opt)
%   rxnGeneMat    reaction-to-gene Mappnig in sparse matrix form (opt)
%   genes         List of all genes (opt)
%   rxnNames      Reaction description (opt)
%   metNames      Metabolite description (opt)
%   metFormulas   Metabolite chemical formula (opt)

if (nargin < 2)
    defaultParameter = 1.000000;
else
    if (isempty(defaultParameter))
        defaultParameter = 1.000000;
    end
end

% open a dialog to select file
if (nargin < 1)
    [fileNameFull, filePath] = uigetfile('*.xml');
    if (fileNameFull)
        [t1,t2,t3,t4,tokens] = regexp(fileNameFull,'(.*)\.(\w*)');
        noPathName = tokens{1}{1};
        fileTypeTmp = tokens{1}{2};
        fileName = [filePath noPathName];
    else
        model = 0;
        return;
    end
end
if (nargin < 3)
    if (exist('filePath'))
        modelDescription = noPathName;
    else
        modelDescription = fileName;
    end
end

if isempty(regexp(fileName,'\.xml$'))
    model = readSBMLGMDModel([fileName '.xml'], defaultParameter);
else
    model = readSBMLGMDModel(fileName, defaultParameter);
end

% Check reversibility
model = checkReversibility(model);
% Check uniqueness of metabolites and reactions names
model.b = zeros(length(model.mets),1);

model.description = modelDescription;



% End main function

%% Make sure reversibilities are correctly indicated in the model
function model = checkReversibility(model)

selRev = (model.lb < 0 & model.ub > 0);
model.rev(selRev) = 1;

% readSBMLGMDModel read SBML format genome-scale metabolic dynamic model
function model = readSBMLGMDModel(fileName, defaultParameter)

if ~(exist(fileName,'file'))
    error(['Input file 'fileName' not found']);
end

% Read SBML
modelSBML = TranslateSBML(fileName);

nMetsTmp = length(modelSBML.species);
nRxns = length(modelSBML.reaction);

% Construct initial metabolite list

formulaCount = 0;
speciesList = {};
metFormulas = {};
haveFormulasFlag = false;
for i = 1:nMetsTmp
    % Ignore exchange species
    if (isempty(regexp(modelSBML.species(i).id,'_b$','ONCE'))); % if the id has no "_b$"
        speciesList{end+1} = modelSBML.species(i).id;
        notesField = modelSBML.species(i).notes;
        % Get formula if in notes field
        if (~isempty(notesField))
            [tmp, tmp, tmp, tmp, formula] = parseSBMLNotesField(notesField);
            metFormulas{end+1} = formula;
            formulaCount = formulaCount + 1;
            haveFormulasFlag = true;
        end
    end
end

nMets = length(speciesList);  % finally obtain metaboliteslist without biomass chemicals

%Construct stoichiometric matrix and reaction list
S = sparse(nMets, nRxns);
reacOrder = sparse(nMets, nRxns);
veloConstant = ones(nRxns,1);
c = zeros(nRxns,1);
rxns = cell(nRxns,1);
rules = cell(nRxns,1);
genes = cell(nRxns,1);
allGenes = {};

h = waitbar(0, 'Reading SBML file...');
hasNotesField = false;
for i = 1:nRxns
    if mod(i,10) == 0;
        waitbar(i/nRxns,h);
    end
    % Read the gpra from the notes field
    notesField = modelSBML.reaction(i).notes;
    if (~isempty(notesField))
        [geneList,rule,subSystem,grRule] = parseSBMLNotesField(notesField);
        subSystems{i} = subSystem;
        genes{i} = geneList;
        allGenes = [allGenes geneList];
        rules{i} = rule;
        grRules{i} = grRule;
        hasNotesField = true;
    end
    rev(i) = modelSBML.reaction(i).reversible;
    rxnNameTmp = regexprep(modelSBML.reaction(i).name,'^R_','');
    rxnNameTmp = strrep(rxnNameTmp,'__','_');
    rxnNames{i} = strrep(rxnNameTmp,'_',' ');
    rxnsTmp = regexprep(modelSBML.reaction(i).id,'^R_','');
    rxnsTmp = regexprep(rxnsTmp,'_e_$','(e)');
    rxnsTmp = regexprep(rxnsTmp,'_e$','(e)');
    rxnsTmp = strrep(rxnsTmp,'_LPAREN_','(');
    rxnsTmp = strrep(rxnsTmp,'_RPAREN_',')');
    rxnsTmp = strrep(rxnsTmp,'_COMMA_',',');
    rxnsTmp = strrep(rxnsTmp,'_APOS_','''');
    rxns{i} = strrep(rxnsTmp,'_DASH_','-');
    % Construct S-matrix
    reactantStruct = modelSBML.reaction(i).reactant;
    for j = 1:length(reactantStruct)
        speciesID = find(strcmp(reactantStruct(j).species,speciesList));
        if (~isempty(speciesID))
            stoichCoeff = reactantStruct(j).stoichiometry;
            S(speciesID,i) = -stoichCoeff;
        end
    end
    productStruct = modelSBML.reaction(i).product;
    for j = 1:length(productStruct)
        speciesID = find(strcmp(productStruct(j).species,speciesList));
        if (~isempty(speciesID))
            stoichCoeff = productStruct(j).stoichiometry;
            S(speciesID,i) = -stoichCoeff;
        end
    end
    parameters = modelSBML.reaction(i).kineticLaw.parameter;
    if (~isempty(parameters)) % if parameters exist
        for j = 1:length(parameters)
            paramStruct = parameters(j);
            switch paramStruct.id
                case 'VELOCITY_CONSTANTS'
                    vc(i) = paraStruct.value;
                case 'REACTION_ORDERS'
                    ro(i) = paraStruct.value; %need more details
            end
        end
    else
        vc(i) = 1.000000;
        ro(i) = 1.000000;
    end
end
close(h);

allGenes = unique(allGenes);

if (hasNotesField)
    % Construct gene to rxn mapping
    rxnGeneMat = sparse(nRxns,length(allGenes));
    h = waitbar(0,'Constructing GPR mapping ...');
    for i = 1:nRxns
        if mod(i,10) == 0
            waitbar(i/nRxns,h);
        end
        [tmp,geneInd] = ismember(genes{i},allGenes);
        rxnGeneMat(i,geneInd) = 1;
        for j = 1:length(geneInd)
            rules{i} = strrep(rules{i},['x(' num2str(j) ')'],['x(' num2str(geneInd(j)) ')']);
        end
    end
    close(h);
end

%construct metabolite list
mets = cell(nMets,1);
h = waitbar(0,'Constructing metabolites list...');

for i =1:nMets
    if mod(i,10) == 0
        waitbar(i/nMets, h);
    end
    % Parse metabolite id's
    % Get rid of M_ in the begining of every one
    metID = regexprep(speciesList{i}, '^M_','');
    metID = regexprep(metID,'^_','');
    % Find compartment id
    tmpCell = regexp(metID,'_(.)$','tokens'); 
    if ~isempty(tmpCell)
        compID = tmpCell{1};
        metTmp = [regexprep(metID,'_(.)$','') '[' compID{1} ']'];
                
        % build up a novel format for metabolites'IDs
        % that is the information of compartment has been revised with []
        
        metTmp = regexprep(metTmp,'_','-');
    else
        metTmp = metID;
    end
    mets{i} = metTmp;
    
% Parse metabolites names
% Clean up some of the weird stuff in the sbml files
    
    metNamesTmp = regexprep(modelSBML.species(i).name,'^M_','');
    metNamesTmp = regexprep(metNamesTmp,'^_','');
    metNamesTmp = strrep(metNamesTmp,'_','-');
    metNamesTmp = regexprep(metNamesTmp,'--','-');
    metNamesTmp = regexprep(metNamesTmp,'-$','');
    metNamesAlt{i} = metNamesTmp;
    % Seperate formulas from names
    % [tmp,tmp,tmp,tmp.token] =
    % regexp(metNamesTmp,'(.*)-((([A(Ag)(As)C(Ca)(Cd)(Cl)(Co)(Cu)F(Fe)H(Hg)IKLM(Mg)(Mn)N(Na)(Ni)OPRS(Se)UWXY(Zn)]?)(\d*)))*$');
    if (~haveFormulasFlag)
        [tmp,tmp,tmp,tmp,tokens] = regexp(metNamesTmp,'(.*)-((((A|Ag|As|C|Ca|Cd|Cl|Co|Cu|F|Fe|H|Hg|I|K|L|M|Mg|Mn|N|Na|Ni|O|P|R|S|Se|U|W|X|Y|Zn)?)(\d*)))*$');
        if (isempty(tokens))
            metFormulas{i} = '';
            metNames{i} = metNamesTmp;
        else
            formulaCount = formulaCount + 1;
            metFormulas{i} = tokens{1}{2};
            metNames{i} = tokens{1}{1};
        end        
    else
        metNames{i} = metNamesTmp;
    end

end
close(h);

% Collect everything into a structure
model.rxns = rxns;
model.mets = mets;
model.S = S;
model.vc = vc;
model.ro = ro;
model.c = c;
if (hasNotesField)
    model.rules = rules;
    model.genes = transColumn(allGenes);
    model.rxnGeneMat = rxnGeneMat;
    model.grRules = transColumn(grRules);
    allGenes = transColumn(allGenes);
    model.subSystems = transColumn(subSystems);
end
model.rxnNames = transColumn(rxnNames);
% Only include formulas if at least 90% of metabolites have them (otherwise
% the "formulas" are probably just parts of metabolite names)
if (formulaCount < 0.9*nMets)
    model.metNames = transColumn(metNamesAlt);
else
    model.metNames = transColumn(metNames);
    model.metFormulas = transColumn(metFormulas);
end

%%
function [genes,rule,subSystem,grRule,formula] = parseSBMLNotesField(notesField)
%parseSBMLNotesField Parse the notes field of an SBML file to extract
%gene-rxn associations
%
% [genes,rule] = parseSBMLNotesField(notesField)

if isempty(regexp(notesField,'html:p'))
    tag = 'p';
else
    tag = 'html:p';
end

subSystem = '';
grRule = '';
genes = {};
rule = '';
formula = '';

[tmp,fieldList] = regexp(notesField,['<' tag '>.*?</' tag '>'],'tokens','match');

for i = 1:length(fieldList)
    fieldTmp = regexp(fieldList{i},['<' tag '>(.*)</' tag '>'],'tokens');
    fieldStr = fieldTmp{1}{1};
    if (regexp(fieldStr,'GENE_ASSOCIATION'))
        gprStr = strrep(fieldStr,'GENE_ASSOCIATION: ','');
        grRule = gprStr;
        [genes,rule] = parseBoolean(gprStr);
    elseif (regexp(fieldStr,'GENE ASSOCIATION'))
        gprStr = strrep(fieldStr,'GENE ASSOCIATION: ','');
        grRule = gprStr;
        [genes,rule] = parseBoolean(gprStr);
    elseif (regexp(fieldStr,'SUBSYSTEM'))
        subSystem = strrep(fieldStr,'SUBSYSTEM: ','');
        subSystem = strrep(subSystem,'S_','');
        subSystem = strrep(subSystem,'__','_');
        subSystem = strrep(subSystem,'_',' ');
        if (length(subSystem) == 0)
            subSystem = 'Exchange';
        end
    elseif (regexp(fieldStr,'FORMULA'))
        formula = strrep(fieldStr,'FORMULA: ','');
    end
end

%%

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -