diff --git a/INIT/getINITModel.m b/INIT/getINITModel.m index 9626d9e6..e48bf1a2 100644 --- a/INIT/getINITModel.m +++ b/INIT/getINITModel.m @@ -391,6 +391,9 @@ if isfield(model,'geneShortNames') model.geneShortNames(I)=[]; end +if isfield(model,'proteins') + model.proteins(I)=[]; +end if isfield(model,'geneMiriams') model.geneMiriams(I)=[]; end diff --git a/INIT/mergeLinear.m b/INIT/mergeLinear.m index 4ce4c3af..1378353b 100644 --- a/INIT/mergeLinear.m +++ b/INIT/mergeLinear.m @@ -29,6 +29,9 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames={}; end +if isfield(reducedModel,'proteins') + reducedModel.proteins={}; +end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams={}; end diff --git a/INIT/removeLowScoreGenes.m b/INIT/removeLowScoreGenes.m index 589a54b9..76a5bdeb 100644 --- a/INIT/removeLowScoreGenes.m +++ b/INIT/removeLowScoreGenes.m @@ -119,6 +119,9 @@ if isfield(newModel,'geneShortNames') newModel.geneShortNames(remInd) = []; end +if isfield(newModel,'proteins') + newModel.proteins(remInd) = []; +end if isfield(newModel,'geneMiriams') newModel.geneMiriams(remInd) = []; end diff --git a/core/addGenesRaven.m b/core/addGenesRaven.m index 9be3abf4..cc44f153 100755 --- a/core/addGenesRaven.m +++ b/core/addGenesRaven.m @@ -14,6 +14,8 @@ % default '') % geneMiriams cell array with MIRIAM structures (optional, % default []) +% proteins cell array of protein names associated to +% each gene (optional, default '') % % newModel an updated model structure % @@ -56,6 +58,9 @@ if isfield(genesToAdd,'geneShortNames') genesToAdd.geneShortNames(I)=[]; end + if isfield(genesToAdd,'proteins') + genesToAdd.proteins(I)=[]; + end if isfield(genesToAdd,'geneMiriams') genesToAdd.geneMiriams(I)=[]; end @@ -81,6 +86,24 @@ newModel.geneShortNames=[newModel.geneShortNames;filler]; end end +if isfield(genesToAdd,'proteins') + genesToAdd.proteins=convertCharArray(genesToAdd.proteins); + if numel(genesToAdd.proteins)~=nGenes + EM='genesToAdd.proteins must have the same number of elements as genesToAdd.genes'; + dispEM(EM); + end + %Add empty field if it doesn't exist + if ~isfield(newModel,'proteins') + newModel.proteins=largeFiller; + end + newModel.proteins=[newModel.proteins;genesToAdd.proteins(:)]; +else + %Add empty strings if structure is in model + if isfield(newModel,'proteins') + newModel.proteins=[newModel.proteins;filler]; + end +end + %Don't check the type of geneMiriams if isfield(genesToAdd,'geneMiriams') diff --git a/core/checkModelStruct.m b/core/checkModelStruct.m index 7526d590..139c43d8 100755 --- a/core/checkModelStruct.m +++ b/core/checkModelStruct.m @@ -120,6 +120,21 @@ function checkModelStruct(model,throwErrors,trimWarnings) EM='The "grRules" field must be a cell array of strings'; dispEM(EM,throwErrors); end + if ~isfield(model,'genes') + EM='If "grRules" field exists, the model should also contain a "genes" field'; + dispEM(EM,throwErrors); + else + geneList = strjoin(model.grRules); + geneList = regexp(geneList,' |)|(|and|or','split'); % Remove all grRule punctuation + geneList = geneList(~cellfun(@isempty,geneList)); % Remove spaces and empty genes + geneList = setdiff(unique(geneList),model.genes); + if ~isempty(geneList) + problemGrRules = model.rxns(contains(model.grRules,geneList)); + problemGrRules = strjoin(problemGrRules(:),'; '); + EM=['The reaction(s) "' problemGrRules '" contain the following genes in its "grRules" field, but these are not in the "genes" field:']; + dispEM(EM,throwErrors,geneList); + end + end end if isfield(model,'rxnComps') if ~isnumeric(model.rxnComps) @@ -229,6 +244,26 @@ function checkModelStruct(model,throwErrors,trimWarnings) end end +%Validate format of ids +fields = {'rxns';'mets';'comps';'genes'}; +fieldNames = {'reaction';'metabolite';'compartment';'gene'}; +fieldPrefix = {'R_';'M_';'C_';'G_'}; +for i=1:numel(fields) + try + numIDs = ~startsWith(model.(fields{i}),regexpPattern('^[a-zA-Z_]')); + catch + numIDs = []; + end + if any(numIDs) + EM = ['The following ' fieldNames{i} ' identifiers do not start '... + 'with a letter or _ (conflicting with SBML specifications). '... + 'This does not impact RAVEN functionality, but be aware that '... + 'exportModel will automatically add ' fieldPrefix{i} ... + ' prefixes to all ' fieldNames{i} ' identifiers:']; + dispEM(EM,false,{model.(fields{i}){numIDs}},trimWarnings); + end +end + %Duplicates EM='The following reaction IDs are duplicates:'; dispEM(EM,throwErrors,model.rxns(duplicates(model.rxns)),trimWarnings); @@ -259,10 +294,10 @@ function checkModelStruct(model,throwErrors,trimWarnings) dispEM(EM,false,model.comps(I),trimWarnings); %Contradicting bounds -EM='The following reactions have contradicting bounds:'; +EM='The following reactions have contradicting bounds (lower bound is higher than upper bound):'; dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); -EM='The following reactions have bounds contradicting their reversibility:'; -dispEM(EM,throwErrors,model.rxns(model.lb<0 & model.rev==0),trimWarnings); +EM='The following reactions have lower and upper bounds that indicate reversibility, but are indicated as irreversible in model.rev:'; +dispEM(EM,false,model.rxns(model.lb < 0 & model.ub > 0 & model.rev==0),trimWarnings); %Multiple or no objective functions not allowed in SBML L3V1 FBCv2 if numel(find(model.c))>1 @@ -272,9 +307,6 @@ function checkModelStruct(model,throwErrors,trimWarnings) EM='No objective function found. This might be intended, but results in FBCv2 non-compliant SBML file when exported'; dispEM(EM,false); end - -EM='The following reactions have contradicting bounds:'; -dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); %Mapping of compartments if isfield(model,'compOutside') @@ -292,8 +324,8 @@ function checkModelStruct(model,throwErrors,trimWarnings) end end end -EM='The following metabolite IDs begin with a number directly followed by space:'; -dispEM(EM,throwErrors,model.mets(I),trimWarnings); +EM='The following metabolite names begin with a number directly followed by space, which could potentially cause problems:'; +dispEM(EM,false,model.metNames(I),trimWarnings); %Non-parseable composition if isfield(model,'metFormulas') diff --git a/core/constructS.m b/core/constructS.m index 87ac8c89..5412cf24 100755 --- a/core/constructS.m +++ b/core/constructS.m @@ -152,7 +152,8 @@ strjoin(unique(metsToS(~metsPresent)),', ')],'') else missingMet = find(~metsPresent); - missingMet = char(strcat(metsToS(missingMet),' (reaction:',rxns(rxnsToS(missingMet)),')\n')); + missingMet = strcat(metsToS(missingMet),' (reaction:',rxns(rxnsToS(missingMet)),')\n'); + missingMet = strjoin(missingMet,''); error(['Could not find the following metabolites (reaction indicated) in the metabolite list: \n' ... missingMet '%s'],''); end diff --git a/core/deleteUnusedGenes.m b/core/deleteUnusedGenes.m index 49cff71f..7d0427f9 100755 --- a/core/deleteUnusedGenes.m +++ b/core/deleteUnusedGenes.m @@ -37,6 +37,10 @@ reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); end +if isfield(reducedModel,'proteins') + reducedModel.proteins=reducedModel.proteins(toKeep); +end + if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); end diff --git a/core/dispEM.m b/core/dispEM.m index 8ece09ff..8ed45e8b 100755 --- a/core/dispEM.m +++ b/core/dispEM.m @@ -33,6 +33,10 @@ function dispEM(string,throwErrors,toList,trimWarnings) end if throwErrors==false errorText=['WARNING: ' string '\n']; + % Wrap text to command window size + sz = get(0, 'CommandWindowSize'); + errorText = textwrap({errorText},sz(1)); + errorText = strjoin(errorText,'\n'); else errorText=[string '\n']; end diff --git a/core/getExchangeRxns.m b/core/getExchangeRxns.m index 459bc095..fa197b21 100755 --- a/core/getExchangeRxns.m +++ b/core/getExchangeRxns.m @@ -1,47 +1,84 @@ function [exchangeRxns, exchangeRxnsIndexes]=getExchangeRxns(model,reactionType) % getExchangeRxns -% Retrieves the exchange reactions from a model +% Retrieves the exchange reactions from a model. Exchange reactions are +% identified by having either no substrates or products. % +% Input: % model a model structure -% reactionType retrieve all reactions ('both'), only production -% ('out'), or only consumption ('in') (optional, default -% 'both') +% reactionType which exchange reactions should be returned +% 'all' all reactions, irrespective of reaction +% bounds +% 'uptake' reactions with bounds that imply that +% only uptake are allowed. Reaction +% direction, upper and lower bounds are +% all considered +% 'excrete' reactions with bounds that imply that +% only excretion are allowed. Reaction +% direction, upper and lower bounds are +% all considered +% 'reverse' reactions with non-zero upper and lower +% bounds that imply that both uptake and +% excretion are allowed +% 'blocked' reactions that have zero upper and lower +% bounds, not allowing any flux +% 'in' reactions where the boundary metabolite +% is the substrate of the reaction, a +% positive flux value would imply uptake, +% but reaction bounds are not considered +% 'out' reactions where the boundary metabolite +% is the substrate of the reaction, a +% positive flux value would imply uptake, +% but reaction bounds are not considered. % +% Output: % exchangeRxns cell array with the IDs of the exchange reactions % exchangeRxnsIndexes vector with the indexes of the exchange reactions % -% Exchange reactions are defined as reactions which involve only products -% or only reactants. If the unconstrained field is present, then that is -% used instead. +% Note: +% The union of 'in' and 'out' equals 'all'. Also, the union of 'uptake', +% 'excrete', 'reverse' and 'blocked' equals all. % % Usage: [exchangeRxns,exchangeRxnsIndexes]=getExchangeRxns(model,reactionType) if nargin<2 - reactionType='both'; + reactionType='all'; else reactionType=char(reactionType); end -hasNoProducts=sparse(numel(model.rxns),1); -hasNoReactants=sparse(numel(model.rxns),1); - -if isfield(model,'unconstrained') - if strcmpi(reactionType,'both') || strcmpi(reactionType,'out') - [~, I]=find(model.S(model.unconstrained~=0,:)>0); - hasNoProducts(I)=true; - end - if strcmpi(reactionType,'both') || strcmpi(reactionType,'in') - [~, I]=find(model.S(model.unconstrained~=0,:)<0); - hasNoReactants(I)=true; - end +% Find exchange reactions +if isfield(model, 'unconstrained') + [~, I]=find(model.S(model.unconstrained~=0,:)>0); + hasNoProd(I)=true; + [~, I]=find(model.S(model.unconstrained~=0,:)<0); + hasNoSubs(I)=true; else - if strcmpi(reactionType,'both') || strcmpi(reactionType,'out') - hasNoProducts=sum((model.S>0))==0; - end - if strcmpi(reactionType,'both') || strcmpi(reactionType,'in') - hasNoReactants=sum((model.S<0))==0; - end + hasNoProd = transpose(find(sum(model.S>0)==0)); + hasNoSubs = transpose(find(sum(model.S<0)==0)); +end +allExch = [hasNoProd; hasNoSubs]; + +switch reactionType + case {'both','all'} % For legacy reasons, 'both' is also allowed + exchangeRxnsIndexes = allExch; + case 'in' + exchangeRxnsIndexes = hasNoSubs; + case 'out' + exchangeRxnsIndexes = hasNoProd; + case 'blocked' + exchangeRxnsIndexes = allExch(model.lb(allExch) == 0 & model.ub(allExch) == 0); + case 'reverse' + exchangeRxnsIndexes = allExch(model.lb(allExch) < 0 & model.ub(allExch) > 0); + case 'uptake' + + exchangeRxnsIndexes = allExch([(model.lb(hasNoProd) < 0 & model.ub(hasNoProd) <= 0); ... + (model.lb(hasNoSubs) >= 0 & model.ub(hasNoSubs) > 0)]); + case 'excrete' + exchangeRxnsIndexes = allExch([(model.lb(hasNoProd) >= 0 & model.ub(hasNoProd) > 0); ... + (model.lb(hasNoSubs) < 0 & model.ub(hasNoSubs) <= 0)]); + otherwise + error('Invalid reactionType specified') end -exchangeRxnsIndexes=find(hasNoProducts(:) | hasNoReactants(:)); -exchangeRxns=model.rxns(exchangeRxnsIndexes); +exchangeRxnsIndexes = sort(exchangeRxnsIndexes); +exchangeRxns = model.rxns(exchangeRxnsIndexes); end diff --git a/core/getModelFromHomology.m b/core/getModelFromHomology.m index 242e6f36..aa356b03 100755 --- a/core/getModelFromHomology.m +++ b/core/getModelFromHomology.m @@ -107,14 +107,17 @@ modelNames=cell(numel(models),1); for i=1:numel(models) modelNames{i}=models{i}.id; - %Gene short names and geneMiriams are often different between species, - %safer not to include them + %Gene short names, geneMiriams and proteins are often different + %between species, safer not to include them if isfield(models{i},'geneShortNames') models{i}=rmfield(models{i},'geneShortNames'); end if isfield(models{i},'geneMiriams') models{i}=rmfield(models{i},'geneMiriams'); end + if isfield(models{i},'proteins') + models{i}=rmfield(models{i},'proteins'); + end %The geneFrom field also loses meaning if the genes are replaced by %orthologs if isfield(models{i},'geneFrom') diff --git a/core/mergeModels.m b/core/mergeModels.m index 19de25c1..29faff96 100755 --- a/core/mergeModels.m +++ b/core/mergeModels.m @@ -492,7 +492,11 @@ if isfield(models{i},'geneShortNames') model.geneShortNames=models{i}.geneShortNames; end - + + if isfield(models{i},'proteins') + model.proteins=models{i}.proteins; + end + if isfield(models{i},'geneMiriams') model.geneMiriams=models{i}.geneMiriams; end @@ -530,7 +534,23 @@ model.geneShortNames=[model.geneShortNames;emptyGeneSN]; end end - + + if isfield(models{i},'proteins') + if isfield(model,'proteins') + model.proteins=[model.proteins;models{i}.proteins(genesToAdd)]; + else + emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); + emptyGeneSN(:)={''}; + model.proteins=[emptyGeneSN;models{i}.proteins(genesToAdd)]; + end + else + if isfield(model,'proteins') + emptyGeneSN=cell(numel(genesToAdd),1); + emptyGeneSN(:)={''}; + model.proteins=[model.proteins;emptyGeneSN]; + end + end + if isfield(models{i},'geneMiriams') if isfield(model,'geneMiriams') model.geneMiriams=[model.geneMiriams;models{i}.geneMiriams(genesToAdd)]; diff --git a/core/permuteModel.m b/core/permuteModel.m index 200bf283..4fad67c2 100755 --- a/core/permuteModel.m +++ b/core/permuteModel.m @@ -132,6 +132,9 @@ if isfield(newModel,'geneShortNames') newModel.geneShortNames=newModel.geneShortNames(indexes); end + if isfield(newModel,'proteins') + newModel.proteins=newModel.proteins(indexes); + end if isfield(newModel,'rxnGeneMat') newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); end diff --git a/core/predictLocalization.m b/core/predictLocalization.m index bab49bb0..2bfff833 100755 --- a/core/predictLocalization.m +++ b/core/predictLocalization.m @@ -199,6 +199,9 @@ if isfield(model,'geneMiriams') model.geneMiriams=[model.geneMiriams;{[]}]; end + if isfield(model,'proteins') + model.proteins=[model.proteins;{[]}]; + end if isfield(model,'geneFrom') model.geneFrom=[model.geneFrom;{{'FAKE'}}]; end @@ -258,6 +261,9 @@ if isfield(model,'geneShortNames') model.geneShortNames=[model.geneShortNames;{''}]; end + if isfield(model,'proteins') + model.proteins=[model.proteins;{''}]; + end if isfield(model,'geneFrom') model.geneFrom=[model.geneFrom;{'COMPLEX'}]; end @@ -759,6 +765,9 @@ if isfield(outModel,'geneShortNames') outModel.geneShortNames(I)=[]; end +if isfield(outModel,'proteins') + outModel.proteins(I)=[]; +end outModel.rxnGeneMat(:,I)=[]; %Fix grRules and reconstruct rxnGeneMat diff --git a/core/printOrange.m b/core/printOrange.m index 21625a08..b31b5639 100755 --- a/core/printOrange.m +++ b/core/printOrange.m @@ -1,4 +1,4 @@ -function printOrange(stringToPrint) +function orangeString = printOrange(stringToPrint) % printOrange % Print orange-colored stringToPrint to the MATLAB Command Window. Only % if MATLAB is open with GUI, does not work with command-line MATLAB. @@ -7,10 +7,18 @@ function printOrange(stringToPrint) % stringToPrint string that should be printed in orange color % % Usage: printOrange(stringToPrint) + try useDesktop = usejava('desktop'); catch, useDesktop = false; end if useDesktop - fprintf(['[\b' stringToPrint,']\b']) + orangeString = ['[\b' stringToPrint,']\b']; else - fprintf(stringToPrint) + orangeString = stringToPrint; +end +if nargout < 1 + % Wrap text to command window size + sz = get(0, 'CommandWindowSize'); + orangeString = textwrap({orangeString},sz(1)); + orangeString = strjoin(orangeString,'\n'); + fprintf(orangeString); end end diff --git a/core/removeReactions.m b/core/removeReactions.m index d2af5a4a..4255c6e2 100755 --- a/core/removeReactions.m +++ b/core/removeReactions.m @@ -129,6 +129,10 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); end + + if isfield(reducedModel,'proteins') + reducedModel.proteins=reducedModel.proteins(toKeep); + end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); diff --git a/core/simplifyModel.m b/core/simplifyModel.m index 2dbc72ec..d99f7ae6 100755 --- a/core/simplifyModel.m +++ b/core/simplifyModel.m @@ -221,6 +221,9 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames={}; end + if isfield(reducedModel,'proteins') + reducedModel.proteins={}; + end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams={}; end diff --git a/doc/INIT/getINITModel.html b/doc/INIT/getINITModel.html index 66b87a44..a1079434 100644 --- a/doc/INIT/getINITModel.html +++ b/doc/INIT/getINITModel.html @@ -544,77 +544,80 @@

SOURCE CODE ^if isfield(model,'geneShortNames') 0392 model.geneShortNames(I)=[]; 0393 end -0394 if isfield(model,'geneMiriams') -0395 model.geneMiriams(I)=[]; +0394 if isfield(model,'proteins') +0395 model.proteins(I)=[]; 0396 end -0397 if isfield(model,'geneFrom') -0398 model.geneFrom(I)=[]; +0397 if isfield(model,'geneMiriams') +0398 model.geneMiriams(I)=[]; 0399 end -0400 if isfield(model,'geneComps') -0401 model.geneComps(I)=[]; +0400 if isfield(model,'geneFrom') +0401 model.geneFrom(I)=[]; 0402 end -0403 -0404 %At this stage the model will contain some exchange reactions but probably -0405 %not all (and maybe zero). This can be inconvenient, so all exchange -0406 %reactions from the reference model are added, except for those which -0407 %involve metabolites that are not in the model. -0408 -0409 %First delete and included exchange reactions in order to prevent the order -0410 %from changing -0411 model=removeReactions(model,getExchangeRxns(model)); -0412 -0413 %Create a model with only the exchange reactions in refModel -0414 excModel=removeReactions(refModel,setdiff(refModel.rxns,getExchangeRxns(refModel)),true,true); +0403 if isfield(model,'geneComps') +0404 model.geneComps(I)=[]; +0405 end +0406 +0407 %At this stage the model will contain some exchange reactions but probably +0408 %not all (and maybe zero). This can be inconvenient, so all exchange +0409 %reactions from the reference model are added, except for those which +0410 %involve metabolites that are not in the model. +0411 +0412 %First delete and included exchange reactions in order to prevent the order +0413 %from changing +0414 model=removeReactions(model,getExchangeRxns(model)); 0415 -0416 %Find the metabolites there which are not exchange metabolites and which do -0417 %not exist in the output model -0418 I=~ismember(excModel.mets,model.mets) & excModel.unconstrained==0; -0419 -0420 %Then find those reactions and delete them -0421 [~, J]=find(excModel.S(I,:)); -0422 excModel=removeReactions(excModel,J,true,true); -0423 -0424 %Merge with the output model -0425 model=mergeModels({model;excModel},'metNames'); -0426 model.id='INITModel'; -0427 model.name=['Automatically generated model for ' tissue]; -0428 if any(celltype) -0429 model.name=[model.name ' - ' celltype]; -0430 end -0431 -0432 if printReport==true -0433 printScores(model,'Final model statistics',hpaData,arrayData,tissue,celltype); -0434 end -0435 -0436 %Add information about essential reactions and reactions included for -0437 %gap-filling and return a taskReport -0438 if ~isempty(taskStructure) -0439 I=find(taskReport.ok); %Ignore failed tasks -0440 for i=1:numel(I) -0441 taskReport.essential{I(i),1}=cModel.rxns(essentialRxnMat(:,I(i))); -0442 taskReport.gapfill{I(i),1}=refModelNoExc.rxns(addedRxnMat(:,i)); -0443 end -0444 else -0445 taskReport=[]; -0446 end -0447 -0448 %Fix grRules and reconstruct rxnGeneMat -0449 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0450 model.grRules = grRules; -0451 model.rxnGeneMat = rxnGeneMat; -0452 end -0453 -0454 %This is for printing a summary of a model -0455 function [rxnS, geneS]=printScores(model,name,hpaData,arrayData,tissue,celltype) -0456 [a, b]=scoreModel(model,hpaData,arrayData,tissue,celltype); -0457 rxnS=mean(a); -0458 geneS=mean(b(~isinf(b))); -0459 fprintf([name ':\n']); -0460 fprintf(['\t' num2str(numel(model.rxns)) ' reactions, ' num2str(numel(model.genes)) ' genes\n']); -0461 fprintf(['\tMean reaction score: ' num2str(rxnS) '\n']); -0462 fprintf(['\tMean gene score: ' num2str(geneS) '\n']); -0463 fprintf(['\tReactions with positive scores: ' num2str(100*sum(a>0)/numel(a)) '%%\n\n']); -0464 end +0416 %Create a model with only the exchange reactions in refModel +0417 excModel=removeReactions(refModel,setdiff(refModel.rxns,getExchangeRxns(refModel)),true,true); +0418 +0419 %Find the metabolites there which are not exchange metabolites and which do +0420 %not exist in the output model +0421 I=~ismember(excModel.mets,model.mets) & excModel.unconstrained==0; +0422 +0423 %Then find those reactions and delete them +0424 [~, J]=find(excModel.S(I,:)); +0425 excModel=removeReactions(excModel,J,true,true); +0426 +0427 %Merge with the output model +0428 model=mergeModels({model;excModel},'metNames'); +0429 model.id='INITModel'; +0430 model.name=['Automatically generated model for ' tissue]; +0431 if any(celltype) +0432 model.name=[model.name ' - ' celltype]; +0433 end +0434 +0435 if printReport==true +0436 printScores(model,'Final model statistics',hpaData,arrayData,tissue,celltype); +0437 end +0438 +0439 %Add information about essential reactions and reactions included for +0440 %gap-filling and return a taskReport +0441 if ~isempty(taskStructure) +0442 I=find(taskReport.ok); %Ignore failed tasks +0443 for i=1:numel(I) +0444 taskReport.essential{I(i),1}=cModel.rxns(essentialRxnMat(:,I(i))); +0445 taskReport.gapfill{I(i),1}=refModelNoExc.rxns(addedRxnMat(:,i)); +0446 end +0447 else +0448 taskReport=[]; +0449 end +0450 +0451 %Fix grRules and reconstruct rxnGeneMat +0452 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0453 model.grRules = grRules; +0454 model.rxnGeneMat = rxnGeneMat; +0455 end +0456 +0457 %This is for printing a summary of a model +0458 function [rxnS, geneS]=printScores(model,name,hpaData,arrayData,tissue,celltype) +0459 [a, b]=scoreModel(model,hpaData,arrayData,tissue,celltype); +0460 rxnS=mean(a); +0461 geneS=mean(b(~isinf(b))); +0462 fprintf([name ':\n']); +0463 fprintf(['\t' num2str(numel(model.rxns)) ' reactions, ' num2str(numel(model.genes)) ' genes\n']); +0464 fprintf(['\tMean reaction score: ' num2str(rxnS) '\n']); +0465 fprintf(['\tMean gene score: ' num2str(geneS) '\n']); +0466 fprintf(['\tReactions with positive scores: ' num2str(100*sum(a>0)/numel(a)) '%%\n\n']); +0467 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/INIT/mergeLinear.html b/doc/INIT/mergeLinear.html index 9bae294c..16467e80 100644 --- a/doc/INIT/mergeLinear.html +++ b/doc/INIT/mergeLinear.html @@ -92,187 +92,190 @@

SOURCE CODE ^if isfield(reducedModel,'geneShortNames') 0030 reducedModel.geneShortNames={}; 0031 end -0032 if isfield(reducedModel,'geneMiriams') -0033 reducedModel.geneMiriams={}; +0032 if isfield(reducedModel,'proteins') +0033 reducedModel.proteins={}; 0034 end -0035 if isfield(reducedModel,'geneComps') -0036 reducedModel.geneComps=[]; +0035 if isfield(reducedModel,'geneMiriams') +0036 reducedModel.geneMiriams={}; 0037 end -0038 -0039 nextGroupId = 1; -0040 origRxnIds = reducedModel.rxns; -0041 groupIds = zeros(numel(reducedModel.rxns),1); -0042 reversedRxns = false(numel(reducedModel.rxns),1); -0043 -0044 %Loop through and iteratively group linear reactions -0045 while 1 -0046 %Get the banned reaction indexes. Note that the indexes will change -0047 %in each iteration, but the names will not as they won't be merged -0048 %with any other reaction -0049 bannedIndexes=getIndexes(reducedModel,noMergeRxns,'rxns'); -0050 -0051 %Select all metabolites that are only present as reactants/products -0052 %in one reaction -0053 twoNonZero = find(sum(reducedModel.S ~= 0, 2) == 2); -0054 -0055 mergedSome=false; -0056 -0057 %Loop through each of them and see if the reactions should be -0058 %merged -0059 for i=1:numel(twoNonZero) -0060 involvedRxns=find(reducedModel.S(twoNonZero(i),:)); -0061 %Check that we can have one positive and one negative -0062 pos = sum(reducedModel.S(twoNonZero(i),involvedRxns).' > 0 | reducedModel.rev(involvedRxns)); -0063 neg = sum(reducedModel.S(twoNonZero(i),involvedRxns).' < 0 | reducedModel.rev(involvedRxns)); -0064 -0065 -0066 %Check so that one or both of the reactions haven't been merged -0067 %already -0068 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) && pos >= 1 && neg >= 1 -0069 %first, take care of a special case: If the first reaction is producing the metabolite and if it is reversible, -0070 %and the second is also producing it and is not reversible, change the order - the code below will not work otherwise -0071 if reducedModel.rev(involvedRxns(1)) && (~reducedModel.rev(involvedRxns(2))) && ... -0072 (reducedModel.S(twoNonZero(i),involvedRxns(1)) > 0) && (reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0) -0073 involvedRxns = flip(involvedRxns); -0074 end -0075 -0076 %first make sure the first reaction is producing the metabolite -0077 if reducedModel.S(twoNonZero(i),involvedRxns(1)) < 0 -0078 %it is not producing the metabolite - fix that -0079 %first choice: use the second reaction as producer if it is producing -0080 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 -0081 involvedRxns = flip(involvedRxns);%make the second the first -0082 else -0083 %now we know that the second reaction is not producing, so we can safely try to make the first a producer -0084 if reducedModel.rev(involvedRxns(1)) == 1 -0085 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(1), groupIds, reversedRxns); -0086 else %ok, finally try to flip the second reaction -0087 if reducedModel.rev(involvedRxns(2)) == 1 -0088 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); -0089 involvedRxns = flip(involvedRxns);%make the second the first -0090 else -0091 error('We should never end up here!'); -0092 end -0093 end -0094 end -0095 end -0096 %Now, make sure the second rxn is a consumer -0097 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 -0098 if reducedModel.rev(involvedRxns(2)) == 1 -0099 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); -0100 else -0101 error('We should never end up here!'); -0102 end -0103 end -0104 -0105 %Calculate how many times the second reaction has to be -0106 %multiplied before being merged with the first -0107 stoichRatio=abs(reducedModel.S(twoNonZero(i),involvedRxns(1))/reducedModel.S(twoNonZero(i),involvedRxns(2))); -0108 -0109 %Add the second to the first -0110 reducedModel.S(:,involvedRxns(1))=reducedModel.S(:,involvedRxns(1))+reducedModel.S(:,involvedRxns(2))*stoichRatio; +0038 if isfield(reducedModel,'geneComps') +0039 reducedModel.geneComps=[]; +0040 end +0041 +0042 nextGroupId = 1; +0043 origRxnIds = reducedModel.rxns; +0044 groupIds = zeros(numel(reducedModel.rxns),1); +0045 reversedRxns = false(numel(reducedModel.rxns),1); +0046 +0047 %Loop through and iteratively group linear reactions +0048 while 1 +0049 %Get the banned reaction indexes. Note that the indexes will change +0050 %in each iteration, but the names will not as they won't be merged +0051 %with any other reaction +0052 bannedIndexes=getIndexes(reducedModel,noMergeRxns,'rxns'); +0053 +0054 %Select all metabolites that are only present as reactants/products +0055 %in one reaction +0056 twoNonZero = find(sum(reducedModel.S ~= 0, 2) == 2); +0057 +0058 mergedSome=false; +0059 +0060 %Loop through each of them and see if the reactions should be +0061 %merged +0062 for i=1:numel(twoNonZero) +0063 involvedRxns=find(reducedModel.S(twoNonZero(i),:)); +0064 %Check that we can have one positive and one negative +0065 pos = sum(reducedModel.S(twoNonZero(i),involvedRxns).' > 0 | reducedModel.rev(involvedRxns)); +0066 neg = sum(reducedModel.S(twoNonZero(i),involvedRxns).' < 0 | reducedModel.rev(involvedRxns)); +0067 +0068 +0069 %Check so that one or both of the reactions haven't been merged +0070 %already +0071 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) && pos >= 1 && neg >= 1 +0072 %first, take care of a special case: If the first reaction is producing the metabolite and if it is reversible, +0073 %and the second is also producing it and is not reversible, change the order - the code below will not work otherwise +0074 if reducedModel.rev(involvedRxns(1)) && (~reducedModel.rev(involvedRxns(2))) && ... +0075 (reducedModel.S(twoNonZero(i),involvedRxns(1)) > 0) && (reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0) +0076 involvedRxns = flip(involvedRxns); +0077 end +0078 +0079 %first make sure the first reaction is producing the metabolite +0080 if reducedModel.S(twoNonZero(i),involvedRxns(1)) < 0 +0081 %it is not producing the metabolite - fix that +0082 %first choice: use the second reaction as producer if it is producing +0083 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 +0084 involvedRxns = flip(involvedRxns);%make the second the first +0085 else +0086 %now we know that the second reaction is not producing, so we can safely try to make the first a producer +0087 if reducedModel.rev(involvedRxns(1)) == 1 +0088 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(1), groupIds, reversedRxns); +0089 else %ok, finally try to flip the second reaction +0090 if reducedModel.rev(involvedRxns(2)) == 1 +0091 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); +0092 involvedRxns = flip(involvedRxns);%make the second the first +0093 else +0094 error('We should never end up here!'); +0095 end +0096 end +0097 end +0098 end +0099 %Now, make sure the second rxn is a consumer +0100 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 +0101 if reducedModel.rev(involvedRxns(2)) == 1 +0102 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); +0103 else +0104 error('We should never end up here!'); +0105 end +0106 end +0107 +0108 %Calculate how many times the second reaction has to be +0109 %multiplied before being merged with the first +0110 stoichRatio=abs(reducedModel.S(twoNonZero(i),involvedRxns(1))/reducedModel.S(twoNonZero(i),involvedRxns(2))); 0111 -0112 %Clear the second reaction -0113 reducedModel.S(:,involvedRxns(2))=0; +0112 %Add the second to the first +0113 reducedModel.S(:,involvedRxns(1))=reducedModel.S(:,involvedRxns(1))+reducedModel.S(:,involvedRxns(2))*stoichRatio; 0114 -0115 %This is to prevent numerical issues. It should be 0 -0116 %already -0117 reducedModel.S(twoNonZero(i),involvedRxns(1))=0; -0118 -0119 %At this point the second reaction is certain to be deleted -0120 %in a later step and can therefore be ignored +0115 %Clear the second reaction +0116 reducedModel.S(:,involvedRxns(2))=0; +0117 +0118 %This is to prevent numerical issues. It should be 0 +0119 %already +0120 reducedModel.S(twoNonZero(i),involvedRxns(1))=0; 0121 -0122 %Recalculate the bounds for the new reaction. This can be -0123 %problematic since the scale of the bounds may change -0124 %dramatically. Let the most constraining reaction determine -0125 %the new bound -0126 lb1=reducedModel.lb(involvedRxns(1)); -0127 lb2=reducedModel.lb(involvedRxns(2)); -0128 ub1=reducedModel.ub(involvedRxns(1)); -0129 ub2=reducedModel.ub(involvedRxns(2)); -0130 -0131 if lb2~=-inf -0132 reducedModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); -0133 end -0134 if ub2~=inf -0135 reducedModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0122 %At this point the second reaction is certain to be deleted +0123 %in a later step and can therefore be ignored +0124 +0125 %Recalculate the bounds for the new reaction. This can be +0126 %problematic since the scale of the bounds may change +0127 %dramatically. Let the most constraining reaction determine +0128 %the new bound +0129 lb1=reducedModel.lb(involvedRxns(1)); +0130 lb2=reducedModel.lb(involvedRxns(2)); +0131 ub1=reducedModel.ub(involvedRxns(1)); +0132 ub2=reducedModel.ub(involvedRxns(2)); +0133 +0134 if lb2~=-inf +0135 reducedModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); 0136 end -0137 -0138 %take care of the .rev flag - it could be that the combined rxn changes from rev to irrev -0139 reducedModel.rev(involvedRxns(1)) = reducedModel.rev(involvedRxns(1))*reducedModel.rev(involvedRxns(2));%this is a way to do an "and" operation with 0 and 1 numbers -0140 -0141 %Then recalculate the objective coefficient. The resulting -0142 %coefficient is the weighted sum of the previous -0143 reducedModel.c(involvedRxns(1))=reducedModel.c(involvedRxns(1))+reducedModel.c(involvedRxns(2))*stoichRatio; -0144 -0145 %store which reactions that have been merged -0146 rxnInd1 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(1)))); -0147 rxnInd2 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(2)))); -0148 grpId = max(groupIds(rxnInd1),groupIds(rxnInd2)); -0149 if grpId == 0 -0150 grpId = nextGroupId; -0151 nextGroupId = nextGroupId + 1; -0152 end -0153 -0154 if groupIds(rxnInd1) ~= grpId -0155 if groupIds(rxnInd1) == 0 -0156 %not merged before, just set the group id -0157 groupIds(rxnInd1) = grpId; -0158 else -0159 %merged before - all rxns with the same group id should be changed -0160 groupIds(groupIds == groupIds(rxnInd1)) = grpId; -0161 end -0162 end -0163 if groupIds(rxnInd2) ~= grpId -0164 if groupIds(rxnInd2) == 0 -0165 %not merged before, just set the group id -0166 groupIds(rxnInd2) = grpId; -0167 else -0168 %merged before - all rxns with the same group id should be changed -0169 groupIds(groupIds == groupIds(rxnInd2)) = grpId; -0170 end -0171 end -0172 -0173 %Iterate again -0174 mergedSome=true; -0175 end -0176 end -0177 -0178 %All possible reactions merged -0179 if mergedSome==false -0180 break; -0181 end -0182 -0183 %Now delete all reactions that involve no metabolites -0184 I=find(sum(reducedModel.S~=0,1)==0); +0137 if ub2~=inf +0138 reducedModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0139 end +0140 +0141 %take care of the .rev flag - it could be that the combined rxn changes from rev to irrev +0142 reducedModel.rev(involvedRxns(1)) = reducedModel.rev(involvedRxns(1))*reducedModel.rev(involvedRxns(2));%this is a way to do an "and" operation with 0 and 1 numbers +0143 +0144 %Then recalculate the objective coefficient. The resulting +0145 %coefficient is the weighted sum of the previous +0146 reducedModel.c(involvedRxns(1))=reducedModel.c(involvedRxns(1))+reducedModel.c(involvedRxns(2))*stoichRatio; +0147 +0148 %store which reactions that have been merged +0149 rxnInd1 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(1)))); +0150 rxnInd2 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(2)))); +0151 grpId = max(groupIds(rxnInd1),groupIds(rxnInd2)); +0152 if grpId == 0 +0153 grpId = nextGroupId; +0154 nextGroupId = nextGroupId + 1; +0155 end +0156 +0157 if groupIds(rxnInd1) ~= grpId +0158 if groupIds(rxnInd1) == 0 +0159 %not merged before, just set the group id +0160 groupIds(rxnInd1) = grpId; +0161 else +0162 %merged before - all rxns with the same group id should be changed +0163 groupIds(groupIds == groupIds(rxnInd1)) = grpId; +0164 end +0165 end +0166 if groupIds(rxnInd2) ~= grpId +0167 if groupIds(rxnInd2) == 0 +0168 %not merged before, just set the group id +0169 groupIds(rxnInd2) = grpId; +0170 else +0171 %merged before - all rxns with the same group id should be changed +0172 groupIds(groupIds == groupIds(rxnInd2)) = grpId; +0173 end +0174 end +0175 +0176 %Iterate again +0177 mergedSome=true; +0178 end +0179 end +0180 +0181 %All possible reactions merged +0182 if mergedSome==false +0183 break; +0184 end 0185 -0186 %Remove reactions -0187 reducedModel=removeReactions(reducedModel,I); +0186 %Now delete all reactions that involve no metabolites +0187 I=find(sum(reducedModel.S~=0,1)==0); 0188 -0189 %Remove metabolites -0190 notInUse=sum(reducedModel.S~=0,2)==0; -0191 reducedModel=removeMets(reducedModel,notInUse); -0192 end -0193 -0194 function [model1,reversedRxns1] = flipRxn(model1, rxnInd, groupIds1, reversedRxns1) -0195 model1.S(:,rxnInd) = model1.S(:,rxnInd)*-1; -0196 %swap the bounds -0197 ub = model1.ub(rxnInd); -0198 model1.ub(rxnInd) = -model1.lb(rxnInd); -0199 model1.lb(rxnInd) = -ub; -0200 %flip the objective -0201 model1.c(rxnInd) = -model1.c(rxnInd); -0202 -0203 %now take care of the reversedRxns - if this is a group, reverse all of the -0204 %reactions in the group in the reversedRxns index - they will all be reversed at the -0205 %same time since they are the same rxn. -0206 rxnIndices = rxnInd; -0207 if groupIds1(rxnInd) > 0 -0208 rxnIndices = find(groupIds1 == groupIds1(rxnInd)); -0209 end -0210 reversedRxns1(rxnIndices) = ~reversedRxns1(rxnIndices); -0211 end -0212 end +0189 %Remove reactions +0190 reducedModel=removeReactions(reducedModel,I); +0191 +0192 %Remove metabolites +0193 notInUse=sum(reducedModel.S~=0,2)==0; +0194 reducedModel=removeMets(reducedModel,notInUse); +0195 end +0196 +0197 function [model1,reversedRxns1] = flipRxn(model1, rxnInd, groupIds1, reversedRxns1) +0198 model1.S(:,rxnInd) = model1.S(:,rxnInd)*-1; +0199 %swap the bounds +0200 ub = model1.ub(rxnInd); +0201 model1.ub(rxnInd) = -model1.lb(rxnInd); +0202 model1.lb(rxnInd) = -ub; +0203 %flip the objective +0204 model1.c(rxnInd) = -model1.c(rxnInd); +0205 +0206 %now take care of the reversedRxns - if this is a group, reverse all of the +0207 %reactions in the group in the reversedRxns index - they will all be reversed at the +0208 %same time since they are the same rxn. +0209 rxnIndices = rxnInd; +0210 if groupIds1(rxnInd) > 0 +0211 rxnIndices = find(groupIds1 == groupIds1(rxnInd)); +0212 end +0213 reversedRxns1(rxnIndices) = ~reversedRxns1(rxnIndices); +0214 end +0215 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/INIT/removeLowScoreGenes.html b/doc/INIT/removeLowScoreGenes.html index 8ec08a62..34524a1e 100644 --- a/doc/INIT/removeLowScoreGenes.html +++ b/doc/INIT/removeLowScoreGenes.html @@ -223,140 +223,143 @@

SOURCE CODE ^if isfield(newModel,'geneShortNames') 0120 newModel.geneShortNames(remInd) = []; 0121 end -0122 if isfield(newModel,'geneMiriams') -0123 newModel.geneMiriams(remInd) = []; +0122 if isfield(newModel,'proteins') +0123 newModel.proteins(remInd) = []; 0124 end -0125 if isfield(newModel,'geneFrom') -0126 newModel.geneFrom(remInd) = []; +0125 if isfield(newModel,'geneMiriams') +0126 newModel.geneMiriams(remInd) = []; 0127 end -0128 if isfield(newModel,'geneComps') -0129 newModel.geneComps(remInd) = []; +0128 if isfield(newModel,'geneFrom') +0129 newModel.geneFrom(remInd) = []; 0130 end -0131 -0132 +0131 if isfield(newModel,'geneComps') +0132 newModel.geneComps(remInd) = []; 0133 end 0134 0135 -0136 -0137 function [updatedRule,rScore] = processSimpleRule(rule,genes,gScores,isozymeScoring,complexScoring) -0138 % Either score or modify a reaction gene rule containig only ANDs or ORs. -0139 % -0140 % If the rule contains an enzyme complex (all ANDs), the complex will be -0141 % scored based on the score of its subunits. Subunits without a score (NaN) -0142 % will be excluded from the score calculation. -0143 % -0144 % If the rule contains only isozymes (all ORs), the negative-score genes -0145 % will be removed from the rule. Isozymes without a score (NaN) will not be -0146 % removed from the rule. The resuling rule will then be scored. -0147 -0148 -0149 % get IDs and indices of genes involved in rule -0150 ruleGenes = unique(regexp(rule,'[^&|\(\) ]+','match')); -0151 [~,geneInd] = ismember(ruleGenes,genes); -0152 -0153 % rules with one or no genes remain unchanged -0154 if numel(ruleGenes) < 2 -0155 rScore = gScores(geneInd); -0156 updatedRule = rule; -0157 return -0158 end -0159 -0160 if ~contains(rule,'&') % rule contains isozymes -0161 -0162 scoreMethod = isozymeScoring; -0163 negInd = gScores(geneInd) < 0; % NaNs will return false here -0164 if all(negInd) -0165 % get the least negative gene, adding a small random value to avoid a tie -0166 [~,maxInd] = max(gScores(geneInd) + rand(size(geneInd))*(1e-8)); -0167 updatedRule = ruleGenes{maxInd}; -0168 elseif sum(~negInd) == 1 -0169 updatedRule = ruleGenes{~negInd}; -0170 else -0171 updatedRule = strjoin(ruleGenes(~negInd),' | '); -0172 if startsWith(rule,'(') -0173 updatedRule = ['(',updatedRule,')']; -0174 end -0175 end -0176 -0177 % update ruleGenes and their indices -0178 ruleGenes = unique(regexp(updatedRule,'[^&|\(\) ]+','match')); -0179 [~,geneInd] = ismember(ruleGenes,genes); -0180 -0181 elseif ~contains(rule,'|') % rule contains enzyme complex -0182 scoreMethod = complexScoring; -0183 updatedRule = rule; -0184 else -0185 error('This function cannot handle rules with both "OR" and "AND" expressions.'); -0186 end -0187 -0188 % score rule -0189 switch lower(scoreMethod) -0190 case 'min' -0191 rScore = min(gScores(geneInd),[],'omitnan'); -0192 case 'max' -0193 rScore = max(gScores(geneInd),[],'omitnan'); -0194 case 'median' -0195 rScore = median(gScores(geneInd),'omitnan'); -0196 case 'average' -0197 rScore = mean(gScores(geneInd),'omitnan'); -0198 end -0199 -0200 end -0201 +0136 end +0137 +0138 +0139 +0140 function [updatedRule,rScore] = processSimpleRule(rule,genes,gScores,isozymeScoring,complexScoring) +0141 % Either score or modify a reaction gene rule containig only ANDs or ORs. +0142 % +0143 % If the rule contains an enzyme complex (all ANDs), the complex will be +0144 % scored based on the score of its subunits. Subunits without a score (NaN) +0145 % will be excluded from the score calculation. +0146 % +0147 % If the rule contains only isozymes (all ORs), the negative-score genes +0148 % will be removed from the rule. Isozymes without a score (NaN) will not be +0149 % removed from the rule. The resuling rule will then be scored. +0150 +0151 +0152 % get IDs and indices of genes involved in rule +0153 ruleGenes = unique(regexp(rule,'[^&|\(\) ]+','match')); +0154 [~,geneInd] = ismember(ruleGenes,genes); +0155 +0156 % rules with one or no genes remain unchanged +0157 if numel(ruleGenes) < 2 +0158 rScore = gScores(geneInd); +0159 updatedRule = rule; +0160 return +0161 end +0162 +0163 if ~contains(rule,'&') % rule contains isozymes +0164 +0165 scoreMethod = isozymeScoring; +0166 negInd = gScores(geneInd) < 0; % NaNs will return false here +0167 if all(negInd) +0168 % get the least negative gene, adding a small random value to avoid a tie +0169 [~,maxInd] = max(gScores(geneInd) + rand(size(geneInd))*(1e-8)); +0170 updatedRule = ruleGenes{maxInd}; +0171 elseif sum(~negInd) == 1 +0172 updatedRule = ruleGenes{~negInd}; +0173 else +0174 updatedRule = strjoin(ruleGenes(~negInd),' | '); +0175 if startsWith(rule,'(') +0176 updatedRule = ['(',updatedRule,')']; +0177 end +0178 end +0179 +0180 % update ruleGenes and their indices +0181 ruleGenes = unique(regexp(updatedRule,'[^&|\(\) ]+','match')); +0182 [~,geneInd] = ismember(ruleGenes,genes); +0183 +0184 elseif ~contains(rule,'|') % rule contains enzyme complex +0185 scoreMethod = complexScoring; +0186 updatedRule = rule; +0187 else +0188 error('This function cannot handle rules with both "OR" and "AND" expressions.'); +0189 end +0190 +0191 % score rule +0192 switch lower(scoreMethod) +0193 case 'min' +0194 rScore = min(gScores(geneInd),[],'omitnan'); +0195 case 'max' +0196 rScore = max(gScores(geneInd),[],'omitnan'); +0197 case 'median' +0198 rScore = median(gScores(geneInd),'omitnan'); +0199 case 'average' +0200 rScore = mean(gScores(geneInd),'omitnan'); +0201 end 0202 -0203 -0204 function updatedRule = processComplexRule(rule,genes,gScores,isozymeScoring,complexScoring) -0205 % Update reactions containing both AND and OR expressions. -0206 % -0207 % Negative-score genes will be removed if they are isozymic, whereas they -0208 % will not be removed if they are part of an enzyme complex. However, if -0209 % the enzyme complex has a negative score, the entire complex will be -0210 % removed, as long as it is not the only remaining element in the rule. -0211 -0212 -0213 % Specify phrases to search for in the grRule. These phrases will find -0214 % genes grouped by all ANDs (first phrase) or all ORs (second phrase). -0215 search_phrases = {'\([^&|\(\) ]+( & [^&|\(\) ]+)+\)', '\([^&|\(\) ]+( \| [^&|\(\) ]+)+\)'}; -0216 -0217 % initialize some variables -0218 subsets = {}; % subsets are groups of genes grouped by all ANDs or all ORs -0219 c = 1; % counter to keep track of the group (subset) number -0220 r_orig = rule; % record original rule to determine when it stops changing -0221 for k = 1:100 % iterate some arbitrarily high number of times -0222 for j = 1:length(search_phrases) -0223 new_subset = regexp(rule,search_phrases{j},'match')'; % extract subsets -0224 if ~isempty(new_subset) -0225 subsets = [subsets; new_subset]; % append to list of subsets -0226 subset_nums = arrayfun(@num2str,(c:length(subsets))','UniformOutput',false); % get group numbers to be assigned to the new subsets, and convert to strings -0227 rule = regexprep(rule,search_phrases{j},strcat('#',subset_nums,'#'),'once'); % replace the subsets in the expression with their group numbers (enclosed by "#"s) -0228 c = c + length(new_subset); -0229 end -0230 end -0231 if isequal(rule,r_orig) -0232 break; % stop iterating when rule stops changing -0233 else -0234 r_orig = rule; -0235 end -0236 end -0237 subsets{end+1} = rule; % add final state of rule as the last subset -0238 -0239 % score and update each subset, and append to gene list and gene scores -0240 for i = 1:numel(subsets) -0241 [subsets{i},subset_score] = processSimpleRule(subsets{i},genes,gScores,isozymeScoring,complexScoring); -0242 gScores = [gScores; subset_score]; -0243 genes = [genes; {strcat('#',num2str(i),'#')}]; -0244 end -0245 -0246 % reconstruct the rule from its updated subsets -0247 updatedRule = subsets{end}; -0248 for i = c-1:-1:1 -0249 updatedRule = regexprep(updatedRule,strcat('#',num2str(i),'#'),subsets{i}); -0250 end -0251 -0252 end -0253 +0203 end +0204 +0205 +0206 +0207 function updatedRule = processComplexRule(rule,genes,gScores,isozymeScoring,complexScoring) +0208 % Update reactions containing both AND and OR expressions. +0209 % +0210 % Negative-score genes will be removed if they are isozymic, whereas they +0211 % will not be removed if they are part of an enzyme complex. However, if +0212 % the enzyme complex has a negative score, the entire complex will be +0213 % removed, as long as it is not the only remaining element in the rule. +0214 +0215 +0216 % Specify phrases to search for in the grRule. These phrases will find +0217 % genes grouped by all ANDs (first phrase) or all ORs (second phrase). +0218 search_phrases = {'\([^&|\(\) ]+( & [^&|\(\) ]+)+\)', '\([^&|\(\) ]+( \| [^&|\(\) ]+)+\)'}; +0219 +0220 % initialize some variables +0221 subsets = {}; % subsets are groups of genes grouped by all ANDs or all ORs +0222 c = 1; % counter to keep track of the group (subset) number +0223 r_orig = rule; % record original rule to determine when it stops changing +0224 for k = 1:100 % iterate some arbitrarily high number of times +0225 for j = 1:length(search_phrases) +0226 new_subset = regexp(rule,search_phrases{j},'match')'; % extract subsets +0227 if ~isempty(new_subset) +0228 subsets = [subsets; new_subset]; % append to list of subsets +0229 subset_nums = arrayfun(@num2str,(c:length(subsets))','UniformOutput',false); % get group numbers to be assigned to the new subsets, and convert to strings +0230 rule = regexprep(rule,search_phrases{j},strcat('#',subset_nums,'#'),'once'); % replace the subsets in the expression with their group numbers (enclosed by "#"s) +0231 c = c + length(new_subset); +0232 end +0233 end +0234 if isequal(rule,r_orig) +0235 break; % stop iterating when rule stops changing +0236 else +0237 r_orig = rule; +0238 end +0239 end +0240 subsets{end+1} = rule; % add final state of rule as the last subset +0241 +0242 % score and update each subset, and append to gene list and gene scores +0243 for i = 1:numel(subsets) +0244 [subsets{i},subset_score] = processSimpleRule(subsets{i},genes,gScores,isozymeScoring,complexScoring); +0245 gScores = [gScores; subset_score]; +0246 genes = [genes; {strcat('#',num2str(i),'#')}]; +0247 end +0248 +0249 % reconstruct the rule from its updated subsets +0250 updatedRule = subsets{end}; +0251 for i = c-1:-1:1 +0252 updatedRule = regexprep(updatedRule,strcat('#',num2str(i),'#'),subsets{i}); +0253 end 0254 -0255 +0255 end +0256 +0257 +0258
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/addGenesRaven.html b/doc/core/addGenesRaven.html index 38134ede..39d26818 100644 --- a/doc/core/addGenesRaven.html +++ b/doc/core/addGenesRaven.html @@ -42,6 +42,8 @@

DESCRIPTION ^SOURCE CODE ^% default '') 0015 % geneMiriams cell array with MIRIAM structures (optional, 0016 % default []) -0017 % -0018 % newModel an updated model structure +0017 % proteins cell array of protein names associated to +0018 % each gene (optional, default '') 0019 % -0020 % NOTE: This function does not make extensive checks about MIRIAM formats, -0021 % forbidden characters or such. -0022 % -0023 % Usage: newModel=addGenesRaven(model,genesToAdd) -0024 -0025 newModel=model; +0020 % newModel an updated model structure +0021 % +0022 % NOTE: This function does not make extensive checks about MIRIAM formats, +0023 % forbidden characters or such. +0024 % +0025 % Usage: newModel=addGenesRaven(model,genesToAdd) 0026 -0027 if isempty(genesToAdd) -0028 return; -0029 end -0030 -0031 %Check some stuff regarding the required fields -0032 if ~isfield(genesToAdd,'genes') -0033 EM='genes is a required field in genesToAdd'; -0034 dispEM(EM); -0035 else -0036 genesToAdd.genes=convertCharArray(genesToAdd.genes); -0037 end -0038 -0039 %Number of genes -0040 nGenes=numel(genesToAdd.genes); -0041 nOldGenes=numel(model.genes); -0042 filler=cell(nGenes,1); -0043 filler(:)={''}; -0044 largeFiller=cell(nOldGenes,1); -0045 largeFiller(:)={''}; -0046 -0047 %Check that no gene ids are already present in the model -0048 I=ismember(genesToAdd.genes,model.genes); -0049 if all(I) -0050 warning('All genes in genesToAdd.genes are already present in model.genes'); -0051 return -0052 elseif any(I) -0053 existingGenes=strjoin(genesToAdd.genes(I), ', '); -0054 warning(['The following genes are already present in model.genes and will therefore not be added: ', existingGenes]) -0055 genesToAdd.genes(I)=[]; -0056 if isfield(genesToAdd,'geneShortNames') -0057 genesToAdd.geneShortNames(I)=[]; -0058 end -0059 if isfield(genesToAdd,'geneMiriams') -0060 genesToAdd.geneMiriams(I)=[]; -0061 end -0062 else -0063 newModel.genes=[newModel.genes;genesToAdd.genes(:)]; -0064 end -0065 -0066 %Some more checks and if they pass then add each field to the structure -0067 if isfield(genesToAdd,'geneShortNames') -0068 genesToAdd.geneShortNames=convertCharArray(genesToAdd.geneShortNames); -0069 if numel(genesToAdd.geneShortNames)~=nGenes -0070 EM='genesToAdd.geneShortNames must have the same number of elements as genesToAdd.genes'; -0071 dispEM(EM); -0072 end -0073 %Add empty field if it doesn't exist -0074 if ~isfield(newModel,'geneShortNames') -0075 newModel.geneShortNames=largeFiller; -0076 end -0077 newModel.geneShortNames=[newModel.geneShortNames;genesToAdd.geneShortNames(:)]; -0078 else -0079 %Add empty strings if structure is in model -0080 if isfield(newModel,'geneShortNames') -0081 newModel.geneShortNames=[newModel.geneShortNames;filler]; -0082 end -0083 end -0084 -0085 %Don't check the type of geneMiriams -0086 if isfield(genesToAdd,'geneMiriams') -0087 if numel(genesToAdd.geneMiriams)~=nGenes -0088 EM='genesToAdd.geneMiriams must have the same number of elements as genesToAdd.genes'; -0089 dispEM(EM); -0090 end -0091 %Add empty field if it doesn't exist -0092 if ~isfield(newModel,'geneMiriams') -0093 newModel.geneMiriams=cell(nOldGenes,1); +0027 newModel=model; +0028 +0029 if isempty(genesToAdd) +0030 return; +0031 end +0032 +0033 %Check some stuff regarding the required fields +0034 if ~isfield(genesToAdd,'genes') +0035 EM='genes is a required field in genesToAdd'; +0036 dispEM(EM); +0037 else +0038 genesToAdd.genes=convertCharArray(genesToAdd.genes); +0039 end +0040 +0041 %Number of genes +0042 nGenes=numel(genesToAdd.genes); +0043 nOldGenes=numel(model.genes); +0044 filler=cell(nGenes,1); +0045 filler(:)={''}; +0046 largeFiller=cell(nOldGenes,1); +0047 largeFiller(:)={''}; +0048 +0049 %Check that no gene ids are already present in the model +0050 I=ismember(genesToAdd.genes,model.genes); +0051 if all(I) +0052 warning('All genes in genesToAdd.genes are already present in model.genes'); +0053 return +0054 elseif any(I) +0055 existingGenes=strjoin(genesToAdd.genes(I), ', '); +0056 warning(['The following genes are already present in model.genes and will therefore not be added: ', existingGenes]) +0057 genesToAdd.genes(I)=[]; +0058 if isfield(genesToAdd,'geneShortNames') +0059 genesToAdd.geneShortNames(I)=[]; +0060 end +0061 if isfield(genesToAdd,'proteins') +0062 genesToAdd.proteins(I)=[]; +0063 end +0064 if isfield(genesToAdd,'geneMiriams') +0065 genesToAdd.geneMiriams(I)=[]; +0066 end +0067 else +0068 newModel.genes=[newModel.genes;genesToAdd.genes(:)]; +0069 end +0070 +0071 %Some more checks and if they pass then add each field to the structure +0072 if isfield(genesToAdd,'geneShortNames') +0073 genesToAdd.geneShortNames=convertCharArray(genesToAdd.geneShortNames); +0074 if numel(genesToAdd.geneShortNames)~=nGenes +0075 EM='genesToAdd.geneShortNames must have the same number of elements as genesToAdd.genes'; +0076 dispEM(EM); +0077 end +0078 %Add empty field if it doesn't exist +0079 if ~isfield(newModel,'geneShortNames') +0080 newModel.geneShortNames=largeFiller; +0081 end +0082 newModel.geneShortNames=[newModel.geneShortNames;genesToAdd.geneShortNames(:)]; +0083 else +0084 %Add empty strings if structure is in model +0085 if isfield(newModel,'geneShortNames') +0086 newModel.geneShortNames=[newModel.geneShortNames;filler]; +0087 end +0088 end +0089 if isfield(genesToAdd,'proteins') +0090 genesToAdd.proteins=convertCharArray(genesToAdd.proteins); +0091 if numel(genesToAdd.proteins)~=nGenes +0092 EM='genesToAdd.proteins must have the same number of elements as genesToAdd.genes'; +0093 dispEM(EM); 0094 end -0095 newModel.geneMiriams=[newModel.geneMiriams;genesToAdd.geneMiriams(:)]; -0096 else -0097 if isfield(newModel,'geneMiriams') -0098 newModel.geneMiriams=[newModel.geneMiriams;cell(nGenes,1)]; -0099 end -0100 end -0101 -0102 if isfield(genesToAdd,'geneComps') -0103 if numel(genesToAdd.geneComps)~=nGenes -0104 EM='genesToAdd.geneComps must have the same number of elements as genesToAdd.genes'; -0105 dispEM(EM); -0106 end -0107 %Add empty field if it doesn't exist -0108 if ~isfield(newModel,'geneComps') -0109 newModel.geneComps=ones(nOldGenes,1); -0110 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; -0111 dispEM(EM,false); -0112 end -0113 newModel.geneComps=[newModel.geneComps;genesToAdd.geneComps(:)]; -0114 else -0115 if isfield(newModel,'geneComps') -0116 newModel.geneComps=[newModel.geneComps;ones(nGenes,1)]; -0117 fprintf('NOTE: The added genes will be assigned to the first compartment\n'); -0118 end -0119 end -0120 -0121 if isfield(newModel,'geneFrom') -0122 newModel.geneFrom=[newModel.geneFrom;filler]; +0095 %Add empty field if it doesn't exist +0096 if ~isfield(newModel,'proteins') +0097 newModel.proteins=largeFiller; +0098 end +0099 newModel.proteins=[newModel.proteins;genesToAdd.proteins(:)]; +0100 else +0101 %Add empty strings if structure is in model +0102 if isfield(newModel,'proteins') +0103 newModel.proteins=[newModel.proteins;filler]; +0104 end +0105 end +0106 +0107 +0108 %Don't check the type of geneMiriams +0109 if isfield(genesToAdd,'geneMiriams') +0110 if numel(genesToAdd.geneMiriams)~=nGenes +0111 EM='genesToAdd.geneMiriams must have the same number of elements as genesToAdd.genes'; +0112 dispEM(EM); +0113 end +0114 %Add empty field if it doesn't exist +0115 if ~isfield(newModel,'geneMiriams') +0116 newModel.geneMiriams=cell(nOldGenes,1); +0117 end +0118 newModel.geneMiriams=[newModel.geneMiriams;genesToAdd.geneMiriams(:)]; +0119 else +0120 if isfield(newModel,'geneMiriams') +0121 newModel.geneMiriams=[newModel.geneMiriams;cell(nGenes,1)]; +0122 end 0123 end 0124 -0125 if isfield(newModel,'rxnGeneMat') -0126 newModel.rxnGeneMat=[newModel.rxnGeneMat,zeros(length(newModel.rxns),nGenes)]; -0127 end -0128 end +0125 if isfield(genesToAdd,'geneComps') +0126 if numel(genesToAdd.geneComps)~=nGenes +0127 EM='genesToAdd.geneComps must have the same number of elements as genesToAdd.genes'; +0128 dispEM(EM); +0129 end +0130 %Add empty field if it doesn't exist +0131 if ~isfield(newModel,'geneComps') +0132 newModel.geneComps=ones(nOldGenes,1); +0133 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; +0134 dispEM(EM,false); +0135 end +0136 newModel.geneComps=[newModel.geneComps;genesToAdd.geneComps(:)]; +0137 else +0138 if isfield(newModel,'geneComps') +0139 newModel.geneComps=[newModel.geneComps;ones(nGenes,1)]; +0140 fprintf('NOTE: The added genes will be assigned to the first compartment\n'); +0141 end +0142 end +0143 +0144 if isfield(newModel,'geneFrom') +0145 newModel.geneFrom=[newModel.geneFrom;filler]; +0146 end +0147 +0148 if isfield(newModel,'rxnGeneMat') +0149 newModel.rxnGeneMat=[newModel.rxnGeneMat,zeros(length(newModel.rxns),nGenes)]; +0150 end +0151 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/checkModelStruct.html b/doc/core/checkModelStruct.html index 2b49a88b..288a1ff0 100644 --- a/doc/core/checkModelStruct.html +++ b/doc/core/checkModelStruct.html @@ -179,305 +179,337 @@

SOURCE CODE ^'The "grRules" field must be a cell array of strings'; 0121 dispEM(EM,throwErrors); 0122 end -0123 end -0124 if isfield(model,'rxnComps') -0125 if ~isnumeric(model.rxnComps) -0126 EM='The "rxnComps" field must be of type "double"'; -0127 dispEM(EM,throwErrors); -0128 end -0129 end -0130 if isfield(model,'inchis') -0131 if ~iscellstr(model.inchis) -0132 EM='The "inchis" field must be a cell array of strings'; -0133 dispEM(EM,throwErrors); -0134 end -0135 end -0136 if isfield(model,'metSmiles') -0137 if ~iscellstr(model.metSmiles) -0138 EM='The "metSmiles" field must be a cell array of strings'; -0139 dispEM(EM,throwErrors); -0140 end -0141 end -0142 if isfield(model,'metFormulas') -0143 if ~iscellstr(model.metFormulas) -0144 EM='The "metFormulas" field must be a cell array of strings'; -0145 dispEM(EM,throwErrors); -0146 end -0147 end -0148 if isfield(model,'metCharges') -0149 if ~isnumeric(model.metCharges) -0150 EM='The "metCharges" field must be a double'; -0151 dispEM(EM,throwErrors); -0152 end -0153 end -0154 if isfield(model,'metDeltaG') -0155 if ~isnumeric(model.metDeltaG) -0156 EM='The "metDeltaG" field must be a double'; -0157 dispEM(EM,throwErrors); -0158 end -0159 end -0160 if isfield(model,'subSystems') -0161 for i=1:numel(model.subSystems) -0162 if ~iscell(model.subSystems{i,1}) -0163 EM='The "subSystems" field must be a cell array'; -0164 dispEM(EM,throwErrors); -0165 end -0166 end -0167 end -0168 if isfield(model,'eccodes') -0169 if ~iscellstr(model.eccodes) -0170 EM='The "eccodes" field must be a cell array of strings'; -0171 dispEM(EM,throwErrors); -0172 end -0173 end -0174 if isfield(model,'unconstrained') -0175 if ~isnumeric(model.unconstrained) -0176 EM='The "unconstrained" field must be of type "double"'; -0177 dispEM(EM,throwErrors); -0178 end -0179 end -0180 if isfield(model,'rxnNotes') -0181 if ~iscellstr(model.rxnNotes) -0182 EM='The "rxnNotes" field must be a cell array of strings'; -0183 dispEM(EM,throwErrors); -0184 end -0185 end -0186 if isfield(model,'rxnReferences') -0187 if ~iscellstr(model.rxnReferences) -0188 EM='The "rxnReferences" field must be a cell array of strings'; -0189 dispEM(EM,throwErrors); -0190 end -0191 end -0192 if isfield(model,'rxnConfidenceScores') -0193 if ~isnumeric(model.rxnConfidenceScores) -0194 EM='The "rxnConfidenceScores" field must be a double'; -0195 dispEM(EM,throwErrors); -0196 end -0197 end -0198 if isfield(model,'rxnDeltaG') -0199 if ~isnumeric(model.rxnDeltaG) -0200 EM='The "rxnDeltaG" field must be a double'; -0201 dispEM(EM,throwErrors); -0202 end -0203 end -0204 -0205 %Empty strings -0206 if isempty(model.id) -0207 EM='The "id" field cannot be empty'; -0208 dispEM(EM,throwErrors); -0209 end -0210 if any(cellfun(@isempty,model.rxns)) -0211 EM='The model contains empty reaction IDs'; -0212 dispEM(EM,throwErrors); -0213 end -0214 if any(cellfun(@isempty,model.mets)) -0215 EM='The model contains empty metabolite IDs'; -0216 dispEM(EM,throwErrors); -0217 end -0218 if any(cellfun(@isempty,model.comps)) -0219 EM='The model contains empty compartment IDs'; -0220 dispEM(EM,throwErrors); -0221 end -0222 EM='The following metabolites have empty names:'; -0223 dispEM(EM,throwErrors,model.mets(cellfun(@isempty,model.metNames)),trimWarnings); -0224 -0225 if isfield(model,'genes') -0226 if any(cellfun(@isempty,model.genes)) -0227 EM='The model contains empty gene IDs'; -0228 dispEM(EM,throwErrors); -0229 end -0230 end -0231 -0232 %Duplicates -0233 EM='The following reaction IDs are duplicates:'; -0234 dispEM(EM,throwErrors,model.rxns(duplicates(model.rxns)),trimWarnings); -0235 EM='The following metabolite IDs are duplicates:'; -0236 dispEM(EM,throwErrors,model.mets(duplicates(model.mets)),trimWarnings); -0237 EM='The following compartment IDs are duplicates:'; -0238 dispEM(EM,throwErrors,model.comps(duplicates(model.comps)),trimWarnings); -0239 if isfield(model,'genes') -0240 EM='The following genes are duplicates:'; -0241 dispEM(EM,throwErrors,model.genes(duplicates(model.genes)),trimWarnings); -0242 end -0243 metInComp=strcat(model.metNames,'[',model.comps(model.metComps),']'); -0244 EM='The following metabolites already exist in the same compartment:'; -0245 dispEM(EM,throwErrors,metInComp(duplicates(metInComp)),trimWarnings); +0123 if ~isfield(model,'genes') +0124 EM='If "grRules" field exists, the model should also contain a "genes" field'; +0125 dispEM(EM,throwErrors); +0126 else +0127 geneList = strjoin(model.grRules); +0128 geneList = regexp(geneList,' |)|(|and|or','split'); % Remove all grRule punctuation +0129 geneList = geneList(~cellfun(@isempty,geneList)); % Remove spaces and empty genes +0130 geneList = setdiff(unique(geneList),model.genes); +0131 if ~isempty(geneList) +0132 problemGrRules = model.rxns(contains(model.grRules,geneList)); +0133 problemGrRules = strjoin(problemGrRules(:),'; '); +0134 EM=['The reaction(s) "' problemGrRules '" contain the following genes in its "grRules" field, but these are not in the "genes" field:']; +0135 dispEM(EM,throwErrors,geneList); +0136 end +0137 end +0138 end +0139 if isfield(model,'rxnComps') +0140 if ~isnumeric(model.rxnComps) +0141 EM='The "rxnComps" field must be of type "double"'; +0142 dispEM(EM,throwErrors); +0143 end +0144 end +0145 if isfield(model,'inchis') +0146 if ~iscellstr(model.inchis) +0147 EM='The "inchis" field must be a cell array of strings'; +0148 dispEM(EM,throwErrors); +0149 end +0150 end +0151 if isfield(model,'metSmiles') +0152 if ~iscellstr(model.metSmiles) +0153 EM='The "metSmiles" field must be a cell array of strings'; +0154 dispEM(EM,throwErrors); +0155 end +0156 end +0157 if isfield(model,'metFormulas') +0158 if ~iscellstr(model.metFormulas) +0159 EM='The "metFormulas" field must be a cell array of strings'; +0160 dispEM(EM,throwErrors); +0161 end +0162 end +0163 if isfield(model,'metCharges') +0164 if ~isnumeric(model.metCharges) +0165 EM='The "metCharges" field must be a double'; +0166 dispEM(EM,throwErrors); +0167 end +0168 end +0169 if isfield(model,'metDeltaG') +0170 if ~isnumeric(model.metDeltaG) +0171 EM='The "metDeltaG" field must be a double'; +0172 dispEM(EM,throwErrors); +0173 end +0174 end +0175 if isfield(model,'subSystems') +0176 for i=1:numel(model.subSystems) +0177 if ~iscell(model.subSystems{i,1}) +0178 EM='The "subSystems" field must be a cell array'; +0179 dispEM(EM,throwErrors); +0180 end +0181 end +0182 end +0183 if isfield(model,'eccodes') +0184 if ~iscellstr(model.eccodes) +0185 EM='The "eccodes" field must be a cell array of strings'; +0186 dispEM(EM,throwErrors); +0187 end +0188 end +0189 if isfield(model,'unconstrained') +0190 if ~isnumeric(model.unconstrained) +0191 EM='The "unconstrained" field must be of type "double"'; +0192 dispEM(EM,throwErrors); +0193 end +0194 end +0195 if isfield(model,'rxnNotes') +0196 if ~iscellstr(model.rxnNotes) +0197 EM='The "rxnNotes" field must be a cell array of strings'; +0198 dispEM(EM,throwErrors); +0199 end +0200 end +0201 if isfield(model,'rxnReferences') +0202 if ~iscellstr(model.rxnReferences) +0203 EM='The "rxnReferences" field must be a cell array of strings'; +0204 dispEM(EM,throwErrors); +0205 end +0206 end +0207 if isfield(model,'rxnConfidenceScores') +0208 if ~isnumeric(model.rxnConfidenceScores) +0209 EM='The "rxnConfidenceScores" field must be a double'; +0210 dispEM(EM,throwErrors); +0211 end +0212 end +0213 if isfield(model,'rxnDeltaG') +0214 if ~isnumeric(model.rxnDeltaG) +0215 EM='The "rxnDeltaG" field must be a double'; +0216 dispEM(EM,throwErrors); +0217 end +0218 end +0219 +0220 %Empty strings +0221 if isempty(model.id) +0222 EM='The "id" field cannot be empty'; +0223 dispEM(EM,throwErrors); +0224 end +0225 if any(cellfun(@isempty,model.rxns)) +0226 EM='The model contains empty reaction IDs'; +0227 dispEM(EM,throwErrors); +0228 end +0229 if any(cellfun(@isempty,model.mets)) +0230 EM='The model contains empty metabolite IDs'; +0231 dispEM(EM,throwErrors); +0232 end +0233 if any(cellfun(@isempty,model.comps)) +0234 EM='The model contains empty compartment IDs'; +0235 dispEM(EM,throwErrors); +0236 end +0237 EM='The following metabolites have empty names:'; +0238 dispEM(EM,throwErrors,model.mets(cellfun(@isempty,model.metNames)),trimWarnings); +0239 +0240 if isfield(model,'genes') +0241 if any(cellfun(@isempty,model.genes)) +0242 EM='The model contains empty gene IDs'; +0243 dispEM(EM,throwErrors); +0244 end +0245 end 0246 -0247 %Elements never used (print only as warnings -0248 EM='The following reactions are empty (no involved metabolites):'; -0249 dispEM(EM,false,model.rxns(~any(model.S,1)),trimWarnings); -0250 EM='The following metabolites are never used in a reaction:'; -0251 dispEM(EM,false,model.mets(~any(model.S,2)),trimWarnings); -0252 if isfield(model,'genes') -0253 EM='The following genes are not associated to a reaction:'; -0254 dispEM(EM,false,model.genes(~any(model.rxnGeneMat,1)),trimWarnings); -0255 end -0256 I=true(numel(model.comps),1); -0257 I(model.metComps)=false; -0258 EM='The following compartments contain no metabolites:'; -0259 dispEM(EM,false,model.comps(I),trimWarnings); -0260 -0261 %Contradicting bounds -0262 EM='The following reactions have contradicting bounds:'; -0263 dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); -0264 EM='The following reactions have bounds contradicting their reversibility:'; -0265 dispEM(EM,throwErrors,model.rxns(model.lb<0 & model.rev==0),trimWarnings); +0247 %Validate format of ids +0248 fields = {'rxns';'mets';'comps';'genes'}; +0249 fieldNames = {'reaction';'metabolite';'compartment';'gene'}; +0250 fieldPrefix = {'R_';'M_';'C_';'G_'}; +0251 for i=1:numel(fields) +0252 try +0253 numIDs = ~startsWith(model.(fields{i}),regexpPattern('^[a-zA-Z_]')); +0254 catch +0255 numIDs = []; +0256 end +0257 if any(numIDs) +0258 EM = ['The following ' fieldNames{i} ' identifiers do not start '... +0259 'with a letter or _ (conflicting with SBML specifications). '... +0260 'This does not impact RAVEN functionality, but be aware that '... +0261 'exportModel will automatically add ' fieldPrefix{i} ... +0262 ' prefixes to all ' fieldNames{i} ' identifiers:']; +0263 dispEM(EM,false,{model.(fields{i}){numIDs}},trimWarnings); +0264 end +0265 end 0266 -0267 %Multiple or no objective functions not allowed in SBML L3V1 FBCv2 -0268 if numel(find(model.c))>1 -0269 EM='Multiple objective functions found. This might be intended, but results in FBCv2 non-compliant SBML file when exported'; -0270 dispEM(EM,false,model.rxns(find(model.c)),trimWarnings); -0271 elseif ~any(model.c) -0272 EM='No objective function found. This might be intended, but results in FBCv2 non-compliant SBML file when exported'; -0273 dispEM(EM,false); -0274 end -0275 -0276 EM='The following reactions have contradicting bounds:'; -0277 dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); -0278 -0279 %Mapping of compartments -0280 if isfield(model,'compOutside') -0281 EM='The following compartments are in "compOutside" but not in "comps":'; -0282 dispEM(EM,throwErrors,setdiff(model.compOutside,[{''};model.comps]),trimWarnings); -0283 end -0284 -0285 %Met names which start with number -0286 I=false(numel(model.metNames),1); -0287 for i=1:numel(model.metNames) -0288 index=strfind(model.metNames{i},' '); -0289 if any(index) -0290 if any(str2double(model.metNames{i}(1:index(1)-1))) -0291 I(i)=true; -0292 end -0293 end -0294 end -0295 EM='The following metabolite IDs begin with a number directly followed by space:'; -0296 dispEM(EM,throwErrors,model.mets(I),trimWarnings); -0297 -0298 %Non-parseable composition -0299 if isfield(model,'metFormulas') -0300 [~, ~, exitFlag]=parseFormulas(model.metFormulas,true,false); -0301 EM='The composition for the following metabolites could not be parsed:'; -0302 dispEM(EM,false,model.mets(exitFlag==-1),trimWarnings); -0303 end -0304 -0305 %Check if there are metabolites with different names but the same MIRIAM -0306 %codes -0307 if isfield(model,'metMiriams') -0308 miriams=containers.Map(); -0309 for i=1:numel(model.mets) -0310 if ~isempty(model.metMiriams{i}) -0311 %Loop through and add for each miriam -0312 for j=1:numel(model.metMiriams{i}.name) -0313 %Get existing metabolite indexes -0314 current=strcat(model.metMiriams{i}.name{j},'/',model.metMiriams{i}.value{j}); -0315 if isKey(miriams,current) -0316 existing=miriams(current); -0317 else -0318 existing=[]; -0319 end -0320 miriams(current)=[existing;i]; -0321 end -0322 end -0323 end -0324 -0325 %Get all keys -0326 allMiriams=keys(miriams); -0327 -0328 hasMultiple=false(numel(allMiriams),1); -0329 for i=1:numel(allMiriams) -0330 if numel(miriams(allMiriams{i}))>1 -0331 %Check if they all have the same name -0332 if numel(unique(model.metNames(miriams(allMiriams{i}))))>1 -0333 if ~regexp(allMiriams{i},'^sbo\/SBO:') % SBO terms are expected to be multiple -0334 hasMultiple(i)=true; -0335 end -0336 end -0337 end -0338 end -0339 -0340 %Print output -0341 EM='The following MIRIAM strings are associated to more than one unique metabolite name:'; -0342 dispEM(EM,false,allMiriams(hasMultiple),trimWarnings); -0343 end -0344 -0345 %Check if there are metabolites with different names but the same InChI -0346 %codes -0347 if isfield(model,'inchis') -0348 inchis=containers.Map(); -0349 for i=1:numel(model.mets) -0350 if ~isempty(model.inchis{i}) -0351 %Get existing metabolite indexes -0352 if isKey(inchis,model.inchis{i}) -0353 existing=inchis(model.inchis{i}); -0354 else -0355 existing=[]; -0356 end -0357 inchis(model.inchis{i})=[existing;i]; -0358 end -0359 end -0360 -0361 %Get all keys -0362 allInchis=keys(inchis); -0363 -0364 hasMultiple=false(numel(allInchis),1); -0365 for i=1:numel(allInchis) -0366 if numel(inchis(allInchis{i}))>1 -0367 %Check if they all have the same name -0368 if numel(unique(model.metNames(inchis(allInchis{i}))))>1 -0369 hasMultiple(i)=true; -0370 end -0371 end -0372 end -0373 -0374 %Print output -0375 EM='The following InChI strings are associated to more than one unique metabolite name:'; -0376 dispEM(EM,false,allInchis(hasMultiple),trimWarnings); -0377 end -0378 -0379 % %Check if there are metabolites with different names but the same SMILES -0380 % if isfield(model,'metSmiles') -0381 % metSmiles=containers.Map(); -0382 % for i=1:numel(model.mets) -0383 % if ~isempty(model.metSmiles{i}) -0384 % %Get existing metabolite indexes -0385 % if isKey(metSmiles,model.metSmiles{i}) -0386 % existing=metSmiles(model.metSmiles{i}); -0387 % else -0388 % existing=[]; -0389 % end -0390 % metSmiles(model.metSmiles{i})=[existing;i]; -0391 % end -0392 % end -0393 % -0394 % %Get all keys -0395 % allmetSmiles=keys(metSmiles); -0396 % -0397 % hasMultiple=false(numel(metSmiles),1); -0398 % for i=1:numel(metSmiles) -0399 % if numel(metSmiles(metSmiles{i}))>1 -0400 % %Check if they all have the same name -0401 % if numel(unique(model.metNames(metSmiles(allmetSmiles{i}))))>1 -0402 % hasMultiple(i)=true; -0403 % end -0404 % end -0405 % end -0406 % -0407 % %Print output -0408 % EM='The following metSmiles strings are associated to more than one unique metabolite name:'; -0409 % dispEM(EM,false,allmetSmiles(hasMultiple),trimWarnings); -0410 % end -0411 end -0412 -0413 function I=duplicates(strings) -0414 I=false(numel(strings),1); -0415 [J, K]=unique(strings); -0416 if numel(J)~=numel(strings) -0417 L=1:numel(strings); -0418 L(K)=[]; -0419 I(L)=true; -0420 end -0421 end +0267 %Duplicates +0268 EM='The following reaction IDs are duplicates:'; +0269 dispEM(EM,throwErrors,model.rxns(duplicates(model.rxns)),trimWarnings); +0270 EM='The following metabolite IDs are duplicates:'; +0271 dispEM(EM,throwErrors,model.mets(duplicates(model.mets)),trimWarnings); +0272 EM='The following compartment IDs are duplicates:'; +0273 dispEM(EM,throwErrors,model.comps(duplicates(model.comps)),trimWarnings); +0274 if isfield(model,'genes') +0275 EM='The following genes are duplicates:'; +0276 dispEM(EM,throwErrors,model.genes(duplicates(model.genes)),trimWarnings); +0277 end +0278 metInComp=strcat(model.metNames,'[',model.comps(model.metComps),']'); +0279 EM='The following metabolites already exist in the same compartment:'; +0280 dispEM(EM,throwErrors,metInComp(duplicates(metInComp)),trimWarnings); +0281 +0282 %Elements never used (print only as warnings +0283 EM='The following reactions are empty (no involved metabolites):'; +0284 dispEM(EM,false,model.rxns(~any(model.S,1)),trimWarnings); +0285 EM='The following metabolites are never used in a reaction:'; +0286 dispEM(EM,false,model.mets(~any(model.S,2)),trimWarnings); +0287 if isfield(model,'genes') +0288 EM='The following genes are not associated to a reaction:'; +0289 dispEM(EM,false,model.genes(~any(model.rxnGeneMat,1)),trimWarnings); +0290 end +0291 I=true(numel(model.comps),1); +0292 I(model.metComps)=false; +0293 EM='The following compartments contain no metabolites:'; +0294 dispEM(EM,false,model.comps(I),trimWarnings); +0295 +0296 %Contradicting bounds +0297 EM='The following reactions have contradicting bounds (lower bound is higher than upper bound):'; +0298 dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); +0299 EM='The following reactions have lower and upper bounds that indicate reversibility, but are indicated as irreversible in model.rev:'; +0300 dispEM(EM,false,model.rxns(model.lb < 0 & model.ub > 0 & model.rev==0),trimWarnings); +0301 +0302 %Multiple or no objective functions not allowed in SBML L3V1 FBCv2 +0303 if numel(find(model.c))>1 +0304 EM='Multiple objective functions found. This might be intended, but results in FBCv2 non-compliant SBML file when exported'; +0305 dispEM(EM,false,model.rxns(find(model.c)),trimWarnings); +0306 elseif ~any(model.c) +0307 EM='No objective function found. This might be intended, but results in FBCv2 non-compliant SBML file when exported'; +0308 dispEM(EM,false); +0309 end +0310 +0311 %Mapping of compartments +0312 if isfield(model,'compOutside') +0313 EM='The following compartments are in "compOutside" but not in "comps":'; +0314 dispEM(EM,throwErrors,setdiff(model.compOutside,[{''};model.comps]),trimWarnings); +0315 end +0316 +0317 %Met names which start with number +0318 I=false(numel(model.metNames),1); +0319 for i=1:numel(model.metNames) +0320 index=strfind(model.metNames{i},' '); +0321 if any(index) +0322 if any(str2double(model.metNames{i}(1:index(1)-1))) +0323 I(i)=true; +0324 end +0325 end +0326 end +0327 EM='The following metabolite names begin with a number directly followed by space, which could potentially cause problems:'; +0328 dispEM(EM,false,model.metNames(I),trimWarnings); +0329 +0330 %Non-parseable composition +0331 if isfield(model,'metFormulas') +0332 [~, ~, exitFlag]=parseFormulas(model.metFormulas,true,false); +0333 EM='The composition for the following metabolites could not be parsed:'; +0334 dispEM(EM,false,model.mets(exitFlag==-1),trimWarnings); +0335 end +0336 +0337 %Check if there are metabolites with different names but the same MIRIAM +0338 %codes +0339 if isfield(model,'metMiriams') +0340 miriams=containers.Map(); +0341 for i=1:numel(model.mets) +0342 if ~isempty(model.metMiriams{i}) +0343 %Loop through and add for each miriam +0344 for j=1:numel(model.metMiriams{i}.name) +0345 %Get existing metabolite indexes +0346 current=strcat(model.metMiriams{i}.name{j},'/',model.metMiriams{i}.value{j}); +0347 if isKey(miriams,current) +0348 existing=miriams(current); +0349 else +0350 existing=[]; +0351 end +0352 miriams(current)=[existing;i]; +0353 end +0354 end +0355 end +0356 +0357 %Get all keys +0358 allMiriams=keys(miriams); +0359 +0360 hasMultiple=false(numel(allMiriams),1); +0361 for i=1:numel(allMiriams) +0362 if numel(miriams(allMiriams{i}))>1 +0363 %Check if they all have the same name +0364 if numel(unique(model.metNames(miriams(allMiriams{i}))))>1 +0365 if ~regexp(allMiriams{i},'^sbo\/SBO:') % SBO terms are expected to be multiple +0366 hasMultiple(i)=true; +0367 end +0368 end +0369 end +0370 end +0371 +0372 %Print output +0373 EM='The following MIRIAM strings are associated to more than one unique metabolite name:'; +0374 dispEM(EM,false,allMiriams(hasMultiple),trimWarnings); +0375 end +0376 +0377 %Check if there are metabolites with different names but the same InChI +0378 %codes +0379 if isfield(model,'inchis') +0380 inchis=containers.Map(); +0381 for i=1:numel(model.mets) +0382 if ~isempty(model.inchis{i}) +0383 %Get existing metabolite indexes +0384 if isKey(inchis,model.inchis{i}) +0385 existing=inchis(model.inchis{i}); +0386 else +0387 existing=[]; +0388 end +0389 inchis(model.inchis{i})=[existing;i]; +0390 end +0391 end +0392 +0393 %Get all keys +0394 allInchis=keys(inchis); +0395 +0396 hasMultiple=false(numel(allInchis),1); +0397 for i=1:numel(allInchis) +0398 if numel(inchis(allInchis{i}))>1 +0399 %Check if they all have the same name +0400 if numel(unique(model.metNames(inchis(allInchis{i}))))>1 +0401 hasMultiple(i)=true; +0402 end +0403 end +0404 end +0405 +0406 %Print output +0407 EM='The following InChI strings are associated to more than one unique metabolite name:'; +0408 dispEM(EM,false,allInchis(hasMultiple),trimWarnings); +0409 end +0410 +0411 % %Check if there are metabolites with different names but the same SMILES +0412 % if isfield(model,'metSmiles') +0413 % metSmiles=containers.Map(); +0414 % for i=1:numel(model.mets) +0415 % if ~isempty(model.metSmiles{i}) +0416 % %Get existing metabolite indexes +0417 % if isKey(metSmiles,model.metSmiles{i}) +0418 % existing=metSmiles(model.metSmiles{i}); +0419 % else +0420 % existing=[]; +0421 % end +0422 % metSmiles(model.metSmiles{i})=[existing;i]; +0423 % end +0424 % end +0425 % +0426 % %Get all keys +0427 % allmetSmiles=keys(metSmiles); +0428 % +0429 % hasMultiple=false(numel(metSmiles),1); +0430 % for i=1:numel(metSmiles) +0431 % if numel(metSmiles(metSmiles{i}))>1 +0432 % %Check if they all have the same name +0433 % if numel(unique(model.metNames(metSmiles(allmetSmiles{i}))))>1 +0434 % hasMultiple(i)=true; +0435 % end +0436 % end +0437 % end +0438 % +0439 % %Print output +0440 % EM='The following metSmiles strings are associated to more than one unique metabolite name:'; +0441 % dispEM(EM,false,allmetSmiles(hasMultiple),trimWarnings); +0442 % end +0443 end +0444 +0445 function I=duplicates(strings) +0446 I=false(numel(strings),1); +0447 [J, K]=unique(strings); +0448 if numel(J)~=numel(strings) +0449 L=1:numel(strings); +0450 L(K)=[]; +0451 I(L)=true; +0452 end +0453 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/constructS.html b/doc/core/constructS.html index b61c0b0b..0ba99957 100644 --- a/doc/core/constructS.html +++ b/doc/core/constructS.html @@ -219,31 +219,32 @@

SOURCE CODE ^', ')],'') 0153 else 0154 missingMet = find(~metsPresent); -0155 missingMet = char(strcat(metsToS(missingMet),' (reaction:',rxns(rxnsToS(missingMet)),')\n')); -0156 error(['Could not find the following metabolites (reaction indicated) in the metabolite list: \n' ... -0157 missingMet '%s'],''); -0158 end -0159 end -0160 linearIndices=sub2ind(size(S),metsLoc,rxnsToS); -0161 S(linearIndices)=coefToS; -0162 S=sparse(S); -0163 end -0164 -0165 function equ=fixEquations(equ) -0166 %If the equation starts with "=>" or "<=>" then add a space again. This is -0167 %an alternative way to represent uptake reactions. The opposite way for -0168 %producing reactions -0169 equ=equ(:); -0170 for i=1:numel(equ) -0171 if strcmp(equ{i}(1:2),'=>') || strcmp(equ{i}(1:3),'<=>') -0172 equ{i}=[' ' equ{i}]; -0173 else -0174 if strcmp(equ{i}(end-1:end),'=>') || strcmp(equ{i}(end-2:end),'<=>') -0175 equ{i}=[equ{i} ' ']; -0176 end -0177 end -0178 end -0179 end +0155 missingMet = strcat(metsToS(missingMet),' (reaction:',rxns(rxnsToS(missingMet)),')\n'); +0156 missingMet = strjoin(missingMet,''); +0157 error(['Could not find the following metabolites (reaction indicated) in the metabolite list: \n' ... +0158 missingMet '%s'],''); +0159 end +0160 end +0161 linearIndices=sub2ind(size(S),metsLoc,rxnsToS); +0162 S(linearIndices)=coefToS; +0163 S=sparse(S); +0164 end +0165 +0166 function equ=fixEquations(equ) +0167 %If the equation starts with "=>" or "<=>" then add a space again. This is +0168 %an alternative way to represent uptake reactions. The opposite way for +0169 %producing reactions +0170 equ=equ(:); +0171 for i=1:numel(equ) +0172 if strcmp(equ{i}(1:2),'=>') || strcmp(equ{i}(1:3),'<=>') +0173 equ{i}=[' ' equ{i}]; +0174 else +0175 if strcmp(equ{i}(end-1:end),'=>') || strcmp(equ{i}(end-2:end),'<=>') +0176 equ{i}=[equ{i} ' ']; +0177 end +0178 end +0179 end +0180 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/deleteUnusedGenes.html b/doc/core/deleteUnusedGenes.html index ff226d11..6a9bd7b1 100644 --- a/doc/core/deleteUnusedGenes.html +++ b/doc/core/deleteUnusedGenes.html @@ -90,18 +90,22 @@

SOURCE CODE ^end 0039 -0040 if isfield(reducedModel,'geneMiriams') -0041 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); +0040 if isfield(reducedModel,'proteins') +0041 reducedModel.proteins=reducedModel.proteins(toKeep); 0042 end 0043 -0044 if isfield(reducedModel,'geneFrom') -0045 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); +0044 if isfield(reducedModel,'geneMiriams') +0045 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); 0046 end 0047 -0048 if isfield(reducedModel,'geneComps') -0049 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0048 if isfield(reducedModel,'geneFrom') +0049 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); 0050 end -0051 end +0051 +0052 if isfield(reducedModel,'geneComps') +0053 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0054 end +0055 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/dispEM.html b/doc/core/dispEM.html index ed113eac..58f8f66c 100644 --- a/doc/core/dispEM.html +++ b/doc/core/dispEM.html @@ -90,22 +90,26 @@

SOURCE CODE ^end 0034 if throwErrors==false 0035 errorText=['WARNING: ' string '\n']; -0036 else -0037 errorText=[string '\n']; -0038 end -0039 if ~isempty(toList) -0040 for i=1:numel(toList) -0041 errorText=[errorText '\t' toList{i} '\n']; -0042 end -0043 end -0044 if throwErrors==false -0045 %Escape special characters, required for fprintf -0046 errorText=regexprep(errorText,'(\\|\%|'')(\\n)$','\\$0'); -0047 fprintf([errorText '\n']); -0048 else -0049 throw(MException('',errorText)); -0050 end -0051 end +0036 % Wrap text to command window size +0037 sz = get(0, 'CommandWindowSize'); +0038 errorText = textwrap({errorText},sz(1)); +0039 errorText = strjoin(errorText,'\n'); +0040 else +0041 errorText=[string '\n']; +0042 end +0043 if ~isempty(toList) +0044 for i=1:numel(toList) +0045 errorText=[errorText '\t' toList{i} '\n']; +0046 end +0047 end +0048 if throwErrors==false +0049 %Escape special characters, required for fprintf +0050 errorText=regexprep(errorText,'(\\|\%|'')(\\n)$','\\$0'); +0051 fprintf([errorText '\n']); +0052 else +0053 throw(MException('',errorText)); +0054 end +0055 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/getExchangeRxns.html b/doc/core/getExchangeRxns.html index 1baa037c..12648ae4 100644 --- a/doc/core/getExchangeRxns.html +++ b/doc/core/getExchangeRxns.html @@ -28,19 +28,43 @@

SYNOPSIS ^DESCRIPTION ^

 getExchangeRxns
-   Retrieves the exchange reactions from a model
+   Retrieves the exchange reactions from a model. Exchange reactions are
+   identified by having either no substrates or products.
 
+ Input:
    model               a model structure
-   reactionType        retrieve all reactions ('both'), only production
-                       ('out'), or only consumption ('in') (optional, default
-                       'both')
+   reactionType        which exchange reactions should be returned
+                       'all'     all reactions, irrespective of reaction
+                                 bounds
+                       'uptake'  reactions with bounds that imply that
+                                 only uptake are allowed. Reaction
+                                 direction, upper and lower bounds are
+                                 all considered
+                       'excrete' reactions with bounds that imply that
+                                 only excretion are allowed. Reaction
+                                 direction, upper and lower bounds are
+                                 all considered
+                       'reverse' reactions with non-zero upper and lower
+                                 bounds that imply that both uptake and
+                                 excretion are allowed
+                       'blocked' reactions that have zero upper and lower
+                                 bounds, not allowing any flux
+                       'in'      reactions where the boundary metabolite
+                                 is the substrate of the reaction, a
+                                 positive flux value would imply uptake,
+                                 but reaction bounds are not considered
+                       'out'     reactions where the boundary metabolite
+                                 is the substrate of the reaction, a
+                                 positive flux value would imply uptake,
+                                 but reaction bounds are not considered.
 
+ Output:
    exchangeRxns        cell array with the IDs of the exchange reactions
    exchangeRxnsIndexes vector with the indexes of the exchange reactions
 
-   Exchange reactions are defined as reactions which involve only products
-   or only reactants. If the unconstrained field is present, then that is
-   used instead.
+ Note:
+   The union of 'in' and 'out' equals 'all'. Also, the union of 'uptake',
+   'excrete', 'reverse' and 'blocked' equals all.
 
  Usage: [exchangeRxns,exchangeRxnsIndexes]=getExchangeRxns(model,reactionType)
@@ -59,51 +83,88 @@

CROSS-REFERENCE INFORMATION ^
 <h2><a name=SOURCE CODE ^

0001 function [exchangeRxns, exchangeRxnsIndexes]=getExchangeRxns(model,reactionType)
 0002 % getExchangeRxns
-0003 %   Retrieves the exchange reactions from a model
-0004 %
-0005 %   model               a model structure
-0006 %   reactionType        retrieve all reactions ('both'), only production
-0007 %                       ('out'), or only consumption ('in') (optional, default
-0008 %                       'both')
-0009 %
-0010 %   exchangeRxns        cell array with the IDs of the exchange reactions
-0011 %   exchangeRxnsIndexes vector with the indexes of the exchange reactions
-0012 %
-0013 %   Exchange reactions are defined as reactions which involve only products
-0014 %   or only reactants. If the unconstrained field is present, then that is
-0015 %   used instead.
-0016 %
-0017 % Usage: [exchangeRxns,exchangeRxnsIndexes]=getExchangeRxns(model,reactionType)
-0018 
-0019 if nargin<2
-0020     reactionType='both';
-0021 else
-0022     reactionType=char(reactionType);
-0023 end
-0024 
-0025 hasNoProducts=sparse(numel(model.rxns),1);
-0026 hasNoReactants=sparse(numel(model.rxns),1);
-0027 
-0028 if isfield(model,'unconstrained')
-0029     if strcmpi(reactionType,'both') || strcmpi(reactionType,'out')
-0030         [~, I]=find(model.S(model.unconstrained~=0,:)>0);
-0031         hasNoProducts(I)=true;
-0032     end
-0033     if strcmpi(reactionType,'both') || strcmpi(reactionType,'in')
-0034         [~, I]=find(model.S(model.unconstrained~=0,:)<0);
-0035         hasNoReactants(I)=true;
-0036     end
-0037 else
-0038     if strcmpi(reactionType,'both') || strcmpi(reactionType,'out')
-0039         hasNoProducts=sum((model.S>0))==0;
-0040     end
-0041     if strcmpi(reactionType,'both') || strcmpi(reactionType,'in')
-0042         hasNoReactants=sum((model.S<0))==0;
-0043     end
-0044 end
-0045 exchangeRxnsIndexes=find(hasNoProducts(:) | hasNoReactants(:));
-0046 exchangeRxns=model.rxns(exchangeRxnsIndexes);
-0047 end
+0003 % Retrieves the exchange reactions from a model. Exchange reactions are +0004 % identified by having either no substrates or products. +0005 % +0006 % Input: +0007 % model a model structure +0008 % reactionType which exchange reactions should be returned +0009 % 'all' all reactions, irrespective of reaction +0010 % bounds +0011 % 'uptake' reactions with bounds that imply that +0012 % only uptake are allowed. Reaction +0013 % direction, upper and lower bounds are +0014 % all considered +0015 % 'excrete' reactions with bounds that imply that +0016 % only excretion are allowed. Reaction +0017 % direction, upper and lower bounds are +0018 % all considered +0019 % 'reverse' reactions with non-zero upper and lower +0020 % bounds that imply that both uptake and +0021 % excretion are allowed +0022 % 'blocked' reactions that have zero upper and lower +0023 % bounds, not allowing any flux +0024 % 'in' reactions where the boundary metabolite +0025 % is the substrate of the reaction, a +0026 % positive flux value would imply uptake, +0027 % but reaction bounds are not considered +0028 % 'out' reactions where the boundary metabolite +0029 % is the substrate of the reaction, a +0030 % positive flux value would imply uptake, +0031 % but reaction bounds are not considered. +0032 % +0033 % Output: +0034 % exchangeRxns cell array with the IDs of the exchange reactions +0035 % exchangeRxnsIndexes vector with the indexes of the exchange reactions +0036 % +0037 % Note: +0038 % The union of 'in' and 'out' equals 'all'. Also, the union of 'uptake', +0039 % 'excrete', 'reverse' and 'blocked' equals all. +0040 % +0041 % Usage: [exchangeRxns,exchangeRxnsIndexes]=getExchangeRxns(model,reactionType) +0042 +0043 if nargin<2 +0044 reactionType='all'; +0045 else +0046 reactionType=char(reactionType); +0047 end +0048 +0049 % Find exchange reactions +0050 if isfield(model, 'unconstrained') +0051 [~, I]=find(model.S(model.unconstrained~=0,:)>0); +0052 hasNoProd(I)=true; +0053 [~, I]=find(model.S(model.unconstrained~=0,:)<0); +0054 hasNoSubs(I)=true; +0055 else +0056 hasNoProd = transpose(find(sum(model.S>0)==0)); +0057 hasNoSubs = transpose(find(sum(model.S<0)==0)); +0058 end +0059 allExch = [hasNoProd; hasNoSubs]; +0060 +0061 switch reactionType +0062 case {'both','all'} % For legacy reasons, 'both' is also allowed +0063 exchangeRxnsIndexes = allExch; +0064 case 'in' +0065 exchangeRxnsIndexes = hasNoSubs; +0066 case 'out' +0067 exchangeRxnsIndexes = hasNoProd; +0068 case 'blocked' +0069 exchangeRxnsIndexes = allExch(model.lb(allExch) == 0 & model.ub(allExch) == 0); +0070 case 'reverse' +0071 exchangeRxnsIndexes = allExch(model.lb(allExch) < 0 & model.ub(allExch) > 0); +0072 case 'uptake' +0073 +0074 exchangeRxnsIndexes = allExch([(model.lb(hasNoProd) < 0 & model.ub(hasNoProd) <= 0); ... +0075 (model.lb(hasNoSubs) >= 0 & model.ub(hasNoSubs) > 0)]); +0076 case 'excrete' +0077 exchangeRxnsIndexes = allExch([(model.lb(hasNoProd) >= 0 & model.ub(hasNoProd) > 0); ... +0078 (model.lb(hasNoSubs) < 0 & model.ub(hasNoSubs) <= 0)]); +0079 otherwise +0080 error('Invalid reactionType specified') +0081 end +0082 exchangeRxnsIndexes = sort(exchangeRxnsIndexes); +0083 exchangeRxns = model.rxns(exchangeRxnsIndexes); +0084 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/getModelFromHomology.html b/doc/core/getModelFromHomology.html index 6e0e7f06..e8f6d374 100644 --- a/doc/core/getModelFromHomology.html +++ b/doc/core/getModelFromHomology.html @@ -214,447 +214,450 @@

SOURCE CODE ^for i=1:numel(models) 0109 modelNames{i}=models{i}.id; -0110 %Gene short names and geneMiriams are often different between species, -0111 %safer not to include them +0110 %Gene short names, geneMiriams and proteins are often different +0111 %between species, safer not to include them 0112 if isfield(models{i},'geneShortNames') 0113 models{i}=rmfield(models{i},'geneShortNames'); 0114 end 0115 if isfield(models{i},'geneMiriams') 0116 models{i}=rmfield(models{i},'geneMiriams'); 0117 end -0118 %The geneFrom field also loses meaning if the genes are replaced by -0119 %orthologs -0120 if isfield(models{i},'geneFrom') -0121 models{i}=rmfield(models{i},'geneFrom'); -0122 end -0123 end -0124 -0125 %Check that genes do not begin with ( or end with ), as this makes problematic grRules -0126 for i=1:numel(blastStructure) -0127 problemGenes = startsWith(blastStructure(i).fromGenes,'(') | endsWith(blastStructure(i).fromGenes,')'); -0128 if any(problemGenes) -0129 error(['One or multiple gene identifiers from ' blastStructure(i).fromId ... -0130 ' starts with ''('' and/or ends with '')'', which is not allowed']) -0131 end -0132 end -0133 -0134 %Assume for now that all information is there and that it's correct. This -0135 %is important to fix since no further checks are being made! +0118 if isfield(models{i},'proteins') +0119 models{i}=rmfield(models{i},'proteins'); +0120 end +0121 %The geneFrom field also loses meaning if the genes are replaced by +0122 %orthologs +0123 if isfield(models{i},'geneFrom') +0124 models{i}=rmfield(models{i},'geneFrom'); +0125 end +0126 end +0127 +0128 %Check that genes do not begin with ( or end with ), as this makes problematic grRules +0129 for i=1:numel(blastStructure) +0130 problemGenes = startsWith(blastStructure(i).fromGenes,'(') | endsWith(blastStructure(i).fromGenes,')'); +0131 if any(problemGenes) +0132 error(['One or multiple gene identifiers from ' blastStructure(i).fromId ... +0133 ' starts with ''('' and/or ends with '')'', which is not allowed']) +0134 end +0135 end 0136 -0137 %Check whether provided fasta files use the same gene identifiers as -0138 %provided template models -0139 for i=1:numel(blastStructure) -0140 if ~strcmp(blastStructure(i).fromId,getModelFor) -0141 j=strcmpi(blastStructure(i).fromId,modelNames); -0142 if j==0 -0143 error(['While the blastStructure contains sequences from '... -0144 'organismID "%s" (as\nprovided in getBlast), none of '... -0145 'template models have this id (as model.id)'],... -0146 string(blastStructure(i).fromId)); -0147 end -0148 k=sum(ismember(blastStructure(i).fromGenes,models{j}.genes)); -0149 if k<(numel(models{j}.genes)*0.05) -0150 error(['Less than 5%% of the genes in the template model '... -0151 'with model.id "%s"\ncan be found in the blastStructure. '... -0152 'Ensure that the protein FASTA\nused in getBlast and '... -0153 'the template model used in getModelFromHomology\nuse '... -0154 'the same style of gene identifiers'],models{j}.id) -0155 end -0156 end -0157 end -0158 -0159 %Standardize grRules of template models -0160 for i=1:length(models) -0161 fprintf('\nStandardizing grRules of template model with ID "%s" ...',models{i}.id); -0162 [models{i}.grRules,models{i}.rxnGeneMat]=standardizeGrRules(models{i},false); -0163 end -0164 fprintf(' done\n'); -0165 -0166 %Remove all gene matches that are below the cutoffs -0167 for i=1:numel(blastStructure) -0168 indexes=blastStructure(i).evalue<maxE & blastStructure(i).aligLen>=minLen & blastStructure(i).identity>=minIde; %Do it in this direction to lose NaNs -0169 blastStructure(i).fromGenes(~indexes)=[]; -0170 blastStructure(i).toGenes(~indexes)=[]; -0171 blastStructure(i).evalue(~indexes)=[]; -0172 blastStructure(i).identity(~indexes)=[]; -0173 blastStructure(i).aligLen(~indexes)=[]; -0174 blastStructure(i).bitscore(~indexes)=[]; -0175 blastStructure(i).ppos(~indexes)=[]; -0176 end -0177 -0178 %Remove all reactions from the models that have no genes encoding for them. -0179 %Also remove all genes that encode for no reactions. There shouldn't be any -0180 %but there might be mistakes -0181 for i=1:numel(models) -0182 [hasGenes, ~]=find(models{i}.rxnGeneMat); -0183 hasNoGenes=1:numel(models{i}.rxns); -0184 hasNoGenes(hasGenes)=[]; -0185 models{i}=removeReactions(models{i},hasNoGenes,true,true); -0186 end -0187 -0188 %Create a structure that contains all genes used in the blasts in any -0189 %direction for each of the models in 'models' and for the new organism. The -0190 %first cell is for the new organism and then according to the preferred -0191 %order. If no such order is supplied, then according to the order in -0192 %'models' -0193 allGenes=cell(numel(models)+1,1); -0194 if isempty(preferredOrder) -0195 useOrder=modelNames; -0196 else -0197 useOrder=preferredOrder; -0198 end -0199 -0200 %Get the corresponding indexes for those models in the 'models' structure -0201 useOrderIndexes=zeros(numel(models),1); -0202 for i=1:numel(models) -0203 [~, index]=ismember(models{i}.id,useOrder); -0204 useOrderIndexes(index)=i; -0205 end -0206 -0207 %Remove all genes from the blast structure that have no genes in the models -0208 if onlyGenesInModels==true -0209 modelGenes={}; -0210 for i=1:numel(models) -0211 modelGenes=[modelGenes;models{i}.genes]; -0212 end -0213 for i=1:numel(blastStructure) -0214 %Check to see if it should match the toId or fromId -0215 if strcmpi(blastStructure(i).fromId,getModelFor) -0216 I=ismember(blastStructure(i).toGenes,modelGenes); -0217 else -0218 I=ismember(blastStructure(i).fromGenes,modelGenes); -0219 end -0220 blastStructure(i).fromGenes(~I)=[]; -0221 blastStructure(i).toGenes(~I)=[]; -0222 blastStructure(i).evalue(~I)=[]; -0223 blastStructure(i).identity(~I)=[]; -0224 blastStructure(i).aligLen(~I)=[]; -0225 blastStructure(i).bitscore(~I)=[]; -0226 blastStructure(i).ppos(~I)=[]; -0227 -0228 %Check that no matching in blastStructure is empty. This happens if -0229 %no genes in the models are present in the corresponding sheet -0230 if isempty(blastStructure(i).fromGenes) -0231 EM=['No genes in matching from ' blastStructure(i).fromId ' to ' blastStructure(i).toId ' are present in the corresponding model']; -0232 dispEM(EM); -0233 end -0234 end -0235 end -0236 -0237 %If only best orthologs are to be used then all other measurements are -0238 %deleted from the blastStructure. All code after this stays the same. This -0239 %means that preferred order can still matter. The best ortholog scoring is -0240 %based only on the E-value -0241 if strictness==3 -0242 for i=1:numel(blastStructure) -0243 keep=false(numel(blastStructure(i).toGenes),1); -0244 [allFromGenes, ~, I]=unique(blastStructure(i).fromGenes); -0245 -0246 %It would be nice to get rid of this loop -0247 for j=1:numel(allFromGenes) -0248 allMatches=find(I==j); -0249 bestMatches=allMatches(blastStructure(i).evalue(allMatches)==min(blastStructure(i).evalue(allMatches))); -0250 -0251 %Keep the best matches -0252 keep(bestMatches)=true; -0253 end -0254 -0255 %Delete all matches that were not best matches -0256 blastStructure(i).fromGenes(~keep)=[]; -0257 blastStructure(i).toGenes(~keep)=[]; -0258 blastStructure(i).evalue(~keep)=[]; -0259 blastStructure(i).identity(~keep)=[]; -0260 blastStructure(i).aligLen(~keep)=[]; -0261 blastStructure(i).bitscore(~keep)=[]; -0262 blastStructure(i).ppos(~keep)=[]; -0263 end -0264 end -0265 -0266 useOrder=[{getModelFor};useOrder]; -0267 -0268 for i=1:numel(blastStructure) -0269 [~, toIndex]=ismember(blastStructure(i).toId,useOrder); -0270 [~, fromIndex]=ismember(blastStructure(i).fromId,useOrder); -0271 -0272 %Add all genes to the corresponding list in allGenes -0273 allGenes{toIndex}=[allGenes{toIndex};blastStructure(i).toGenes]; -0274 allGenes{fromIndex}=[allGenes{fromIndex};blastStructure(i).fromGenes]; -0275 end -0276 -0277 %Keep only the unique gene names -0278 maxOtherGeneNr=0; %Determines the dimension of the connectivity matrixes -0279 for i=1:numel(allGenes) -0280 allGenes{i}=unique(allGenes{i}); -0281 if i>1 -0282 if numel(allGenes{i})>maxOtherGeneNr -0283 maxOtherGeneNr=numel(allGenes{i}); -0284 end -0285 end -0286 end -0287 -0288 %Generate a cell array of matrixes that describes how the genes in the new -0289 %organism map to the models. Each cell matches to the corresponding model -0290 %in useOrder (starting at 2 of course). First dimension is gene in new -0291 %organism, second which gene it is in the other organism. The second matrix -0292 %describes how they map back. -0293 -0294 %As it is now, a significant match is indicated by a 1. This could be -0295 %expanded to contain some kind of significance level. The first dimension -0296 %is still the genes in the new model. -0297 -0298 allTo=cell(numel(useOrder)-1,1); -0299 allFrom=cell(numel(useOrder)-1,1); +0137 %Assume for now that all information is there and that it's correct. This +0138 %is important to fix since no further checks are being made! +0139 +0140 %Check whether provided fasta files use the same gene identifiers as +0141 %provided template models +0142 for i=1:numel(blastStructure) +0143 if ~strcmp(blastStructure(i).fromId,getModelFor) +0144 j=strcmpi(blastStructure(i).fromId,modelNames); +0145 if j==0 +0146 error(['While the blastStructure contains sequences from '... +0147 'organismID "%s" (as\nprovided in getBlast), none of '... +0148 'template models have this id (as model.id)'],... +0149 string(blastStructure(i).fromId)); +0150 end +0151 k=sum(ismember(blastStructure(i).fromGenes,models{j}.genes)); +0152 if k<(numel(models{j}.genes)*0.05) +0153 error(['Less than 5%% of the genes in the template model '... +0154 'with model.id "%s"\ncan be found in the blastStructure. '... +0155 'Ensure that the protein FASTA\nused in getBlast and '... +0156 'the template model used in getModelFromHomology\nuse '... +0157 'the same style of gene identifiers'],models{j}.id) +0158 end +0159 end +0160 end +0161 +0162 %Standardize grRules of template models +0163 for i=1:length(models) +0164 fprintf('\nStandardizing grRules of template model with ID "%s" ...',models{i}.id); +0165 [models{i}.grRules,models{i}.rxnGeneMat]=standardizeGrRules(models{i},false); +0166 end +0167 fprintf(' done\n'); +0168 +0169 %Remove all gene matches that are below the cutoffs +0170 for i=1:numel(blastStructure) +0171 indexes=blastStructure(i).evalue<maxE & blastStructure(i).aligLen>=minLen & blastStructure(i).identity>=minIde; %Do it in this direction to lose NaNs +0172 blastStructure(i).fromGenes(~indexes)=[]; +0173 blastStructure(i).toGenes(~indexes)=[]; +0174 blastStructure(i).evalue(~indexes)=[]; +0175 blastStructure(i).identity(~indexes)=[]; +0176 blastStructure(i).aligLen(~indexes)=[]; +0177 blastStructure(i).bitscore(~indexes)=[]; +0178 blastStructure(i).ppos(~indexes)=[]; +0179 end +0180 +0181 %Remove all reactions from the models that have no genes encoding for them. +0182 %Also remove all genes that encode for no reactions. There shouldn't be any +0183 %but there might be mistakes +0184 for i=1:numel(models) +0185 [hasGenes, ~]=find(models{i}.rxnGeneMat); +0186 hasNoGenes=1:numel(models{i}.rxns); +0187 hasNoGenes(hasGenes)=[]; +0188 models{i}=removeReactions(models{i},hasNoGenes,true,true); +0189 end +0190 +0191 %Create a structure that contains all genes used in the blasts in any +0192 %direction for each of the models in 'models' and for the new organism. The +0193 %first cell is for the new organism and then according to the preferred +0194 %order. If no such order is supplied, then according to the order in +0195 %'models' +0196 allGenes=cell(numel(models)+1,1); +0197 if isempty(preferredOrder) +0198 useOrder=modelNames; +0199 else +0200 useOrder=preferredOrder; +0201 end +0202 +0203 %Get the corresponding indexes for those models in the 'models' structure +0204 useOrderIndexes=zeros(numel(models),1); +0205 for i=1:numel(models) +0206 [~, index]=ismember(models{i}.id,useOrder); +0207 useOrderIndexes(index)=i; +0208 end +0209 +0210 %Remove all genes from the blast structure that have no genes in the models +0211 if onlyGenesInModels==true +0212 modelGenes={}; +0213 for i=1:numel(models) +0214 modelGenes=[modelGenes;models{i}.genes]; +0215 end +0216 for i=1:numel(blastStructure) +0217 %Check to see if it should match the toId or fromId +0218 if strcmpi(blastStructure(i).fromId,getModelFor) +0219 I=ismember(blastStructure(i).toGenes,modelGenes); +0220 else +0221 I=ismember(blastStructure(i).fromGenes,modelGenes); +0222 end +0223 blastStructure(i).fromGenes(~I)=[]; +0224 blastStructure(i).toGenes(~I)=[]; +0225 blastStructure(i).evalue(~I)=[]; +0226 blastStructure(i).identity(~I)=[]; +0227 blastStructure(i).aligLen(~I)=[]; +0228 blastStructure(i).bitscore(~I)=[]; +0229 blastStructure(i).ppos(~I)=[]; +0230 +0231 %Check that no matching in blastStructure is empty. This happens if +0232 %no genes in the models are present in the corresponding sheet +0233 if isempty(blastStructure(i).fromGenes) +0234 EM=['No genes in matching from ' blastStructure(i).fromId ' to ' blastStructure(i).toId ' are present in the corresponding model']; +0235 dispEM(EM); +0236 end +0237 end +0238 end +0239 +0240 %If only best orthologs are to be used then all other measurements are +0241 %deleted from the blastStructure. All code after this stays the same. This +0242 %means that preferred order can still matter. The best ortholog scoring is +0243 %based only on the E-value +0244 if strictness==3 +0245 for i=1:numel(blastStructure) +0246 keep=false(numel(blastStructure(i).toGenes),1); +0247 [allFromGenes, ~, I]=unique(blastStructure(i).fromGenes); +0248 +0249 %It would be nice to get rid of this loop +0250 for j=1:numel(allFromGenes) +0251 allMatches=find(I==j); +0252 bestMatches=allMatches(blastStructure(i).evalue(allMatches)==min(blastStructure(i).evalue(allMatches))); +0253 +0254 %Keep the best matches +0255 keep(bestMatches)=true; +0256 end +0257 +0258 %Delete all matches that were not best matches +0259 blastStructure(i).fromGenes(~keep)=[]; +0260 blastStructure(i).toGenes(~keep)=[]; +0261 blastStructure(i).evalue(~keep)=[]; +0262 blastStructure(i).identity(~keep)=[]; +0263 blastStructure(i).aligLen(~keep)=[]; +0264 blastStructure(i).bitscore(~keep)=[]; +0265 blastStructure(i).ppos(~keep)=[]; +0266 end +0267 end +0268 +0269 useOrder=[{getModelFor};useOrder]; +0270 +0271 for i=1:numel(blastStructure) +0272 [~, toIndex]=ismember(blastStructure(i).toId,useOrder); +0273 [~, fromIndex]=ismember(blastStructure(i).fromId,useOrder); +0274 +0275 %Add all genes to the corresponding list in allGenes +0276 allGenes{toIndex}=[allGenes{toIndex};blastStructure(i).toGenes]; +0277 allGenes{fromIndex}=[allGenes{fromIndex};blastStructure(i).fromGenes]; +0278 end +0279 +0280 %Keep only the unique gene names +0281 maxOtherGeneNr=0; %Determines the dimension of the connectivity matrixes +0282 for i=1:numel(allGenes) +0283 allGenes{i}=unique(allGenes{i}); +0284 if i>1 +0285 if numel(allGenes{i})>maxOtherGeneNr +0286 maxOtherGeneNr=numel(allGenes{i}); +0287 end +0288 end +0289 end +0290 +0291 %Generate a cell array of matrixes that describes how the genes in the new +0292 %organism map to the models. Each cell matches to the corresponding model +0293 %in useOrder (starting at 2 of course). First dimension is gene in new +0294 %organism, second which gene it is in the other organism. The second matrix +0295 %describes how they map back. +0296 +0297 %As it is now, a significant match is indicated by a 1. This could be +0298 %expanded to contain some kind of significance level. The first dimension +0299 %is still the genes in the new model. 0300 -0301 for i=1:numel(useOrder)-1 -0302 allTo{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); -0303 allFrom{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); -0304 end -0305 -0306 %Fill the matches to other species -0307 for i=1:numel(blastStructure) -0308 if strcmp(blastStructure(i).toId,getModelFor) -0309 %This was 'to' the new organism. They should all match so no checks -0310 %are being made -0311 [~, a]=ismember(blastStructure(i).toGenes,allGenes{1}); -0312 [~, fromModel]=ismember(blastStructure(i).fromId,useOrder); -0313 [~, b]=ismember(blastStructure(i).fromGenes,allGenes{fromModel}); -0314 idx = sub2ind(size(allTo{fromModel-1}), a, b); -0315 allTo{fromModel-1}(idx)=1; -0316 else -0317 %This was 'from' the new organism -0318 [~, a]=ismember(blastStructure(i).fromGenes,allGenes{1}); -0319 [~, toModel]=ismember(blastStructure(i).toId,useOrder); -0320 [~, b]=ismember(blastStructure(i).toGenes,allGenes{toModel}); -0321 idx = sub2ind(size(allFrom{toModel-1}), a, b); -0322 allFrom{toModel-1}(idx)=1; -0323 end -0324 end -0325 -0326 %Now we have all the gene matches in a convenient way. For all the genes in -0327 %the new organism get the genes that should be included from other -0328 %organisms. If all genes should be included this simply means keep the -0329 %allFrom matrix as it is. If only orthologs which could be mapped in both -0330 %BLAST directions are to be included then only those elements are kept. -0331 -0332 finalMappings=cell(numel(useOrder)-1,1); -0333 if strictness==1 || strictness==3 -0334 for j=1:numel(allFrom) -0335 finalMappings{j}=allTo{j}~=0 & allFrom{j}~=0; -0336 end -0337 else -0338 if mapNewGenesToOld==true -0339 finalMappings=allFrom; -0340 else -0341 finalMappings=allTo; -0342 end -0343 end -0344 -0345 %Remove all genes from the mapping that are not in the models. This doesn't -0346 %do much if only genes in the models were used for the original mapping. -0347 %Also simplify the finalMapping and allGenes structures so that they only -0348 %contain mappings that exist -0349 usedNewGenes=false(numel(allGenes{1}),1); -0350 -0351 for i=1:numel(allGenes)-1 -0352 %First remove mappings for those genes that are not in the model -0353 if onlyGenesInModels==false -0354 a=ismember(allGenes{i+1},models{useOrderIndexes(i)}.genes); -0355 finalMappings{i}(:,~a)=false; -0356 end -0357 -0358 %Then remove unused ones and simplify -0359 [a, b]=find(finalMappings{i}); -0360 usedGenes=false(numel(allGenes{i+1}),1); -0361 usedNewGenes(a)=true; -0362 usedGenes(b)=true; -0363 finalMappings{i}=finalMappings{i}(:,usedGenes); -0364 allGenes{i+1}=allGenes{i+1}(usedGenes); -0365 end -0366 -0367 %Remove all new genes that have not been mapped to anything -0368 allGenes{1}=allGenes{1}(usedNewGenes); -0369 for i=1:numel(finalMappings) -0370 finalMappings{i}=finalMappings{i}(usedNewGenes,:); -0371 end -0372 -0373 %Now is it time to choose which reactions should be included from which -0374 %models. If there is a preferred order specified then each gene can only -0375 %result in reactions from one model, otherwise they should all be included -0376 -0377 %Start by simplifying the models by removing genes/reactions that are not -0378 %used. This is where it gets weird with complexes, especially "or" -0379 %complexes. In this step only reactions which are encoded by one single -0380 %gene, or where all genes should be deleted, are deleted. The info on the -0381 %full complex is still present in the grRules -0382 -0383 for i=1:numel(models) -0384 a=ismember(models{useOrderIndexes(i)}.genes,allGenes{i+1}); -0385 -0386 %Remove reactions that are not associated to any of the genes in -0387 %allGenes, thereby also keeping complexes where only for one of the -0388 %genes was matched -0389 [rxnsToKeep,~] = find(models{useOrderIndexes(i)}.rxnGeneMat(:,a)); -0390 rxnsToRemove = repmat(1,numel(models{useOrderIndexes(i)}.rxns),1); -0391 rxnsToRemove(rxnsToKeep) = 0; -0392 rxnsToRemove = find(rxnsToRemove); -0393 models{useOrderIndexes(i)}=removeReactions(models{useOrderIndexes(i)},rxnsToRemove,true,true,true); -0394 end -0395 -0396 %Since mergeModels function will be used in the end, the models are -0397 %simplified further by deleting genes/reactions in the order specified by -0398 %preferredOrder. This means that the last model will only contain reactions -0399 %for genes that mapped only to that model -0400 -0401 allUsedGenes=false(numel(allGenes{1}),1); -0402 -0403 if ~isempty(preferredOrder) && numel(models)>1 -0404 [usedGenes, ~]=find(finalMappings{1}); %All that are used in the first model in preferredOrder -0405 allUsedGenes(usedGenes)=true; -0406 for i=2:numel(finalMappings) -0407 [usedGenes, ~]=find(finalMappings{i}); -0408 usedGenes=unique(usedGenes); -0409 a=ismember(usedGenes,find(allUsedGenes)); -0410 -0411 [~, genesToDelete]=find(finalMappings{i}(usedGenes(a),:)); %IMPORTANT! IS it really correct to remove all genes that map? -0412 genesToDelete=unique(genesToDelete); %Maybe not needed, but for clarity if nothing else +0301 allTo=cell(numel(useOrder)-1,1); +0302 allFrom=cell(numel(useOrder)-1,1); +0303 +0304 for i=1:numel(useOrder)-1 +0305 allTo{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); +0306 allFrom{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); +0307 end +0308 +0309 %Fill the matches to other species +0310 for i=1:numel(blastStructure) +0311 if strcmp(blastStructure(i).toId,getModelFor) +0312 %This was 'to' the new organism. They should all match so no checks +0313 %are being made +0314 [~, a]=ismember(blastStructure(i).toGenes,allGenes{1}); +0315 [~, fromModel]=ismember(blastStructure(i).fromId,useOrder); +0316 [~, b]=ismember(blastStructure(i).fromGenes,allGenes{fromModel}); +0317 idx = sub2ind(size(allTo{fromModel-1}), a, b); +0318 allTo{fromModel-1}(idx)=1; +0319 else +0320 %This was 'from' the new organism +0321 [~, a]=ismember(blastStructure(i).fromGenes,allGenes{1}); +0322 [~, toModel]=ismember(blastStructure(i).toId,useOrder); +0323 [~, b]=ismember(blastStructure(i).toGenes,allGenes{toModel}); +0324 idx = sub2ind(size(allFrom{toModel-1}), a, b); +0325 allFrom{toModel-1}(idx)=1; +0326 end +0327 end +0328 +0329 %Now we have all the gene matches in a convenient way. For all the genes in +0330 %the new organism get the genes that should be included from other +0331 %organisms. If all genes should be included this simply means keep the +0332 %allFrom matrix as it is. If only orthologs which could be mapped in both +0333 %BLAST directions are to be included then only those elements are kept. +0334 +0335 finalMappings=cell(numel(useOrder)-1,1); +0336 if strictness==1 || strictness==3 +0337 for j=1:numel(allFrom) +0338 finalMappings{j}=allTo{j}~=0 & allFrom{j}~=0; +0339 end +0340 else +0341 if mapNewGenesToOld==true +0342 finalMappings=allFrom; +0343 else +0344 finalMappings=allTo; +0345 end +0346 end +0347 +0348 %Remove all genes from the mapping that are not in the models. This doesn't +0349 %do much if only genes in the models were used for the original mapping. +0350 %Also simplify the finalMapping and allGenes structures so that they only +0351 %contain mappings that exist +0352 usedNewGenes=false(numel(allGenes{1}),1); +0353 +0354 for i=1:numel(allGenes)-1 +0355 %First remove mappings for those genes that are not in the model +0356 if onlyGenesInModels==false +0357 a=ismember(allGenes{i+1},models{useOrderIndexes(i)}.genes); +0358 finalMappings{i}(:,~a)=false; +0359 end +0360 +0361 %Then remove unused ones and simplify +0362 [a, b]=find(finalMappings{i}); +0363 usedGenes=false(numel(allGenes{i+1}),1); +0364 usedNewGenes(a)=true; +0365 usedGenes(b)=true; +0366 finalMappings{i}=finalMappings{i}(:,usedGenes); +0367 allGenes{i+1}=allGenes{i+1}(usedGenes); +0368 end +0369 +0370 %Remove all new genes that have not been mapped to anything +0371 allGenes{1}=allGenes{1}(usedNewGenes); +0372 for i=1:numel(finalMappings) +0373 finalMappings{i}=finalMappings{i}(usedNewGenes,:); +0374 end +0375 +0376 %Now is it time to choose which reactions should be included from which +0377 %models. If there is a preferred order specified then each gene can only +0378 %result in reactions from one model, otherwise they should all be included +0379 +0380 %Start by simplifying the models by removing genes/reactions that are not +0381 %used. This is where it gets weird with complexes, especially "or" +0382 %complexes. In this step only reactions which are encoded by one single +0383 %gene, or where all genes should be deleted, are deleted. The info on the +0384 %full complex is still present in the grRules +0385 +0386 for i=1:numel(models) +0387 a=ismember(models{useOrderIndexes(i)}.genes,allGenes{i+1}); +0388 +0389 %Remove reactions that are not associated to any of the genes in +0390 %allGenes, thereby also keeping complexes where only for one of the +0391 %genes was matched +0392 [rxnsToKeep,~] = find(models{useOrderIndexes(i)}.rxnGeneMat(:,a)); +0393 rxnsToRemove = repmat(1,numel(models{useOrderIndexes(i)}.rxns),1); +0394 rxnsToRemove(rxnsToKeep) = 0; +0395 rxnsToRemove = find(rxnsToRemove); +0396 models{useOrderIndexes(i)}=removeReactions(models{useOrderIndexes(i)},rxnsToRemove,true,true,true); +0397 end +0398 +0399 %Since mergeModels function will be used in the end, the models are +0400 %simplified further by deleting genes/reactions in the order specified by +0401 %preferredOrder. This means that the last model will only contain reactions +0402 %for genes that mapped only to that model +0403 +0404 allUsedGenes=false(numel(allGenes{1}),1); +0405 +0406 if ~isempty(preferredOrder) && numel(models)>1 +0407 [usedGenes, ~]=find(finalMappings{1}); %All that are used in the first model in preferredOrder +0408 allUsedGenes(usedGenes)=true; +0409 for i=2:numel(finalMappings) +0410 [usedGenes, ~]=find(finalMappings{i}); +0411 usedGenes=unique(usedGenes); +0412 a=ismember(usedGenes,find(allUsedGenes)); 0413 -0414 %Remove all the genes that were already found and add the other -0415 %ones to allUsedGenes -0416 models{useOrderIndexes(i)}=removeGenes(models{useOrderIndexes(i)},allGenes{i+1}(genesToDelete),true,true,false); -0417 allUsedGenes(usedGenes)=true; -0418 -0419 %Remove the deleted genes from finalMappings and allGenes. -0420 finalMappings{i}(:,genesToDelete)=[]; -0421 allGenes{i+1}(genesToDelete)=[]; -0422 end -0423 end -0424 -0425 %Now loop through the models and update the gene associations. Genes not -0426 %belonging to the new organism will be renamed as 'OLD_MODELID_gene' -0427 for i=1:numel(models) -0428 %Find all the new genes that should be used for this model -0429 [newGenes, oldGenes]=find(finalMappings{i}); -0430 -0431 %Create a new gene list with the genes from the new organism and those -0432 %genes that could not be removed -0433 replaceableGenes=allGenes{i+1}(unique(oldGenes)); -0434 nonReplaceableGenes=setdiff(models{useOrderIndexes(i)}.genes,replaceableGenes); -0435 fullGeneList=[allGenes{1}(unique(newGenes));nonReplaceableGenes]; -0436 -0437 %Just to save some indexing later. This is the LAST index of -0438 %replaceable ones -0439 nonRepStartIndex=numel(unique(newGenes)); -0440 -0441 %Construct a new rxnGeneMat -0442 newRxnGeneMat=sparse(numel(models{useOrderIndexes(i)}.rxns),numel(fullGeneList)); +0414 [~, genesToDelete]=find(finalMappings{i}(usedGenes(a),:)); %IMPORTANT! IS it really correct to remove all genes that map? +0415 genesToDelete=unique(genesToDelete); %Maybe not needed, but for clarity if nothing else +0416 +0417 %Remove all the genes that were already found and add the other +0418 %ones to allUsedGenes +0419 models{useOrderIndexes(i)}=removeGenes(models{useOrderIndexes(i)},allGenes{i+1}(genesToDelete),true,true,false); +0420 allUsedGenes(usedGenes)=true; +0421 +0422 %Remove the deleted genes from finalMappings and allGenes. +0423 finalMappings{i}(:,genesToDelete)=[]; +0424 allGenes{i+1}(genesToDelete)=[]; +0425 end +0426 end +0427 +0428 %Now loop through the models and update the gene associations. Genes not +0429 %belonging to the new organism will be renamed as 'OLD_MODELID_gene' +0430 for i=1:numel(models) +0431 %Find all the new genes that should be used for this model +0432 [newGenes, oldGenes]=find(finalMappings{i}); +0433 +0434 %Create a new gene list with the genes from the new organism and those +0435 %genes that could not be removed +0436 replaceableGenes=allGenes{i+1}(unique(oldGenes)); +0437 nonReplaceableGenes=setdiff(models{useOrderIndexes(i)}.genes,replaceableGenes); +0438 fullGeneList=[allGenes{1}(unique(newGenes));nonReplaceableGenes]; +0439 +0440 %Just to save some indexing later. This is the LAST index of +0441 %replaceable ones +0442 nonRepStartIndex=numel(unique(newGenes)); 0443 -0444 %Now update the rxnGeneMat. This is a little tricky and could -0445 %probably be done in a more efficient way, but I just loop through the -0446 %reactions and add them one by one -0447 for j=1:numel(models{useOrderIndexes(i)}.rxns) -0448 %Get the old genes encoding for this reaction -0449 [~, oldGeneIds]=find(models{useOrderIndexes(i)}.rxnGeneMat(j,:)); -0450 -0451 %Update the matrix for each gene. This includes replacing one gene -0452 %with several new ones if there were several matches -0453 for k=1:numel(oldGeneIds) -0454 %Match the gene to one in the gene list. This is done as a text -0455 %match. Could probably be done better, but I'm a little lost in -0456 %the indexing -0457 -0458 geneName=models{useOrderIndexes(i)}.genes(oldGeneIds(k)); -0459 -0460 %First search in the mappable genes -0461 mapIndex=find(ismember(allGenes{i+1},geneName)); +0444 %Construct a new rxnGeneMat +0445 newRxnGeneMat=sparse(numel(models{useOrderIndexes(i)}.rxns),numel(fullGeneList)); +0446 +0447 %Now update the rxnGeneMat. This is a little tricky and could +0448 %probably be done in a more efficient way, but I just loop through the +0449 %reactions and add them one by one +0450 for j=1:numel(models{useOrderIndexes(i)}.rxns) +0451 %Get the old genes encoding for this reaction +0452 [~, oldGeneIds]=find(models{useOrderIndexes(i)}.rxnGeneMat(j,:)); +0453 +0454 %Update the matrix for each gene. This includes replacing one gene +0455 %with several new ones if there were several matches +0456 for k=1:numel(oldGeneIds) +0457 %Match the gene to one in the gene list. This is done as a text +0458 %match. Could probably be done better, but I'm a little lost in +0459 %the indexing +0460 +0461 geneName=models{useOrderIndexes(i)}.genes(oldGeneIds(k)); 0462 -0463 if ~isempty(mapIndex) -0464 % add the old genes -0465 hitGenes.oldGenes = [hitGenes.oldGenes, {geneName}]; -0466 -0467 %Get the new genes for that gene -0468 a=find(finalMappings{i}(:,mapIndex)); +0463 %First search in the mappable genes +0464 mapIndex=find(ismember(allGenes{i+1},geneName)); +0465 +0466 if ~isempty(mapIndex) +0467 % add the old genes +0468 hitGenes.oldGenes = [hitGenes.oldGenes, {geneName}]; 0469 -0470 %Find the positions of these genes in the final gene list -0471 [~, b]=ismember(allGenes{1}(a),fullGeneList); +0470 %Get the new genes for that gene +0471 a=find(finalMappings{i}(:,mapIndex)); 0472 -0473 %Update the matrix -0474 newRxnGeneMat(j,b)=1; +0473 %Find the positions of these genes in the final gene list +0474 [~, b]=ismember(allGenes{1}(a),fullGeneList); 0475 -0476 %Update the grRules string. This is tricky, but I hope that -0477 %it's ok to replace the old gene name with the new one and -0478 %add ') or (' if there were several matches. Be sure of -0479 %this! -0480 repString=fullGeneList{b(1)}; -0481 if numel(b)>1 -0482 for l=2:numel(b) -0483 repString=[repString ' or ' fullGeneList{b(l)}]; -0484 end -0485 repString=['(' repString ')']; -0486 end -0487 -0488 % add the new matched genes -0489 hitGenes.newGenes = [hitGenes.newGenes, {repString}]; +0476 %Update the matrix +0477 newRxnGeneMat(j,b)=1; +0478 +0479 %Update the grRules string. This is tricky, but I hope that +0480 %it's ok to replace the old gene name with the new one and +0481 %add ') or (' if there were several matches. Be sure of +0482 %this! +0483 repString=fullGeneList{b(1)}; +0484 if numel(b)>1 +0485 for l=2:numel(b) +0486 repString=[repString ' or ' fullGeneList{b(l)}]; +0487 end +0488 repString=['(' repString ')']; +0489 end 0490 -0491 %Use regexprep instead of strrep to prevent partial matches -0492 models{useOrderIndexes(i)}.grRules{j}=regexprep(models{useOrderIndexes(i)}.grRules{j},['(^|\s|\()' geneName{1} '($|\s|\))'],['$1' repString '$2']); -0493 else -0494 %Then search in the non-replaceable genes. There could only -0495 %be one match here -0496 index=find(ismember(nonReplaceableGenes,geneName)); -0497 -0498 %Update the matrix -0499 newRxnGeneMat(j,nonRepStartIndex+index)=1; +0491 % add the new matched genes +0492 hitGenes.newGenes = [hitGenes.newGenes, {repString}]; +0493 +0494 %Use regexprep instead of strrep to prevent partial matches +0495 models{useOrderIndexes(i)}.grRules{j}=regexprep(models{useOrderIndexes(i)}.grRules{j},['(^|\s|\()' geneName{1} '($|\s|\))'],['$1' repString '$2']); +0496 else +0497 %Then search in the non-replaceable genes. There could only +0498 %be one match here +0499 index=find(ismember(nonReplaceableGenes,geneName)); 0500 -0501 models{useOrderIndexes(i)}.grRules{j}=strrep(models{useOrderIndexes(i)}.grRules{j},geneName{1},strcat('OLD_',models{useOrderIndexes(i)}.id,'_',geneName{1})); -0502 end -0503 end -0504 end -0505 -0506 %Add the new list of genes -0507 models{useOrderIndexes(i)}.rxnGeneMat=newRxnGeneMat; -0508 if ~isempty(nonReplaceableGenes) -0509 models{useOrderIndexes(i)}.genes=[allGenes{1}(unique(newGenes));strcat('OLD_',models{useOrderIndexes(i)}.id,'_',nonReplaceableGenes)]; -0510 else -0511 models{useOrderIndexes(i)}.genes=allGenes{1}(unique(newGenes)); -0512 end -0513 if isfield(models{useOrderIndexes(i)},'geneComps') -0514 geneComps=models{useOrderIndexes(i)}.geneComps(1); -0515 models{useOrderIndexes(i)}.geneComps=zeros(numel(models{useOrderIndexes(i)}.genes),1); -0516 %Assume that all genes are in the same compartment, and this -0517 %compartment is specified for the first gene -0518 models{useOrderIndexes(i)}.geneComps(:)=geneComps; -0519 end -0520 end -0521 -0522 %Now merge the models. All information should be correct except for 'or' -0523 %complexes -0524 draftModel=mergeModels(models,'metNames'); -0525 -0526 %Remove unnecessary OLD_ genes, that were added with OR relationships -0527 regexStr=['OLD_(', strjoin(modelNames(:),'|'),')_(\S^\))+']; -0528 draftModel.grRules=regexprep(draftModel.grRules,[' or ' regexStr],''); -0529 draftModel.grRules=regexprep(draftModel.grRules,[regexStr ' or '],''); -0530 -0531 %Change name of the resulting model -0532 draftModel.id=getModelFor; -0533 name='Generated by getModelFromHomology using '; -0534 for i=1:numel(models) -0535 if i<numel(models) -0536 name=[name models{i}.id ', ']; -0537 else -0538 name=[name models{i}.id]; -0539 end -0540 end -0541 draftModel.name=name; -0542 draftModel.rxnNotes=cell(length(draftModel.rxns),1); -0543 draftModel.rxnNotes(:)={'Included by getModelFromHomology'}; -0544 draftModel.rxnConfidenceScores=NaN(length(draftModel.rxns),1); -0545 draftModel.rxnConfidenceScores(:)=2; -0546 draftModel=deleteUnusedGenes(draftModel,0); -0547 %Standardize grRules and notify if problematic grRules are found -0548 [draftModel.grRules,draftModel.rxnGeneMat]=standardizeGrRules(draftModel,false); -0549 draftModel=deleteUnusedGenes(draftModel,false); -0550 end +0501 %Update the matrix +0502 newRxnGeneMat(j,nonRepStartIndex+index)=1; +0503 +0504 models{useOrderIndexes(i)}.grRules{j}=strrep(models{useOrderIndexes(i)}.grRules{j},geneName{1},strcat('OLD_',models{useOrderIndexes(i)}.id,'_',geneName{1})); +0505 end +0506 end +0507 end +0508 +0509 %Add the new list of genes +0510 models{useOrderIndexes(i)}.rxnGeneMat=newRxnGeneMat; +0511 if ~isempty(nonReplaceableGenes) +0512 models{useOrderIndexes(i)}.genes=[allGenes{1}(unique(newGenes));strcat('OLD_',models{useOrderIndexes(i)}.id,'_',nonReplaceableGenes)]; +0513 else +0514 models{useOrderIndexes(i)}.genes=allGenes{1}(unique(newGenes)); +0515 end +0516 if isfield(models{useOrderIndexes(i)},'geneComps') +0517 geneComps=models{useOrderIndexes(i)}.geneComps(1); +0518 models{useOrderIndexes(i)}.geneComps=zeros(numel(models{useOrderIndexes(i)}.genes),1); +0519 %Assume that all genes are in the same compartment, and this +0520 %compartment is specified for the first gene +0521 models{useOrderIndexes(i)}.geneComps(:)=geneComps; +0522 end +0523 end +0524 +0525 %Now merge the models. All information should be correct except for 'or' +0526 %complexes +0527 draftModel=mergeModels(models,'metNames'); +0528 +0529 %Remove unnecessary OLD_ genes, that were added with OR relationships +0530 regexStr=['OLD_(', strjoin(modelNames(:),'|'),')_(\S^\))+']; +0531 draftModel.grRules=regexprep(draftModel.grRules,[' or ' regexStr],''); +0532 draftModel.grRules=regexprep(draftModel.grRules,[regexStr ' or '],''); +0533 +0534 %Change name of the resulting model +0535 draftModel.id=getModelFor; +0536 name='Generated by getModelFromHomology using '; +0537 for i=1:numel(models) +0538 if i<numel(models) +0539 name=[name models{i}.id ', ']; +0540 else +0541 name=[name models{i}.id]; +0542 end +0543 end +0544 draftModel.name=name; +0545 draftModel.rxnNotes=cell(length(draftModel.rxns),1); +0546 draftModel.rxnNotes(:)={'Included by getModelFromHomology'}; +0547 draftModel.rxnConfidenceScores=NaN(length(draftModel.rxns),1); +0548 draftModel.rxnConfidenceScores(:)=2; +0549 draftModel=deleteUnusedGenes(draftModel,0); +0550 %Standardize grRules and notify if problematic grRules are found +0551 [draftModel.grRules,draftModel.rxnGeneMat]=standardizeGrRules(draftModel,false); +0552 draftModel=deleteUnusedGenes(draftModel,false); +0553 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/mergeModels.html b/doc/core/mergeModels.html index 66f07d14..8264db25 100644 --- a/doc/core/mergeModels.html +++ b/doc/core/mergeModels.html @@ -553,104 +553,124 @@

SOURCE CODE ^if isfield(models{i},'geneShortNames') 0493 model.geneShortNames=models{i}.geneShortNames; 0494 end -0495 -0496 if isfield(models{i},'geneMiriams') -0497 model.geneMiriams=models{i}.geneMiriams; +0495 +0496 if isfield(models{i},'proteins') +0497 model.proteins=models{i}.proteins; 0498 end -0499 -0500 if isfield(models{i},'geneComps') -0501 model.geneComps=models{i}.geneComps; +0499 +0500 if isfield(models{i},'geneMiriams') +0501 model.geneMiriams=models{i}.geneMiriams; 0502 end -0503 else -0504 %If gene info should be merged -0505 a=ismember(models{i}.genes,model.genes); -0506 -0507 genesToAdd=find(~a); -0508 -0509 %Only add extra gene info on new genes. This might not be -0510 %correct and should be changed later... -0511 if ~isempty(genesToAdd) -0512 model.genes=[model.genes;models{i}.genes(genesToAdd)]; -0513 emptyGene=cell(numel(genesToAdd),1); -0514 emptyGene(:)={models{i}.id}; -0515 model.geneFrom=[model.geneFrom;emptyGene]; -0516 model.rxnGeneMat=[model.rxnGeneMat sparse(size(model.rxnGeneMat,1),numel(genesToAdd))]; -0517 -0518 if isfield(models{i},'geneShortNames') -0519 if isfield(model,'geneShortNames') -0520 model.geneShortNames=[model.geneShortNames;models{i}.geneShortNames(genesToAdd)]; -0521 else -0522 emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); -0523 emptyGeneSN(:)={''}; -0524 model.geneShortNames=[emptyGeneSN;models{i}.geneShortNames(genesToAdd)]; -0525 end -0526 else -0527 if isfield(model,'geneShortNames') -0528 emptyGeneSN=cell(numel(genesToAdd),1); -0529 emptyGeneSN(:)={''}; -0530 model.geneShortNames=[model.geneShortNames;emptyGeneSN]; -0531 end -0532 end -0533 -0534 if isfield(models{i},'geneMiriams') -0535 if isfield(model,'geneMiriams') -0536 model.geneMiriams=[model.geneMiriams;models{i}.geneMiriams(genesToAdd)]; -0537 else -0538 emptyGeneMir=cell(numel(model.genes)-numel(genesToAdd),1); -0539 model.geneMiriams=[emptyGeneMir;models{i}.geneMiriams(genesToAdd)]; -0540 end -0541 else -0542 if isfield(model,'geneMiriams') -0543 emptyGeneMir=cell(numel(genesToAdd),1); -0544 model.geneMiriams=[model.geneMiriams;emptyGeneMir]; +0503 +0504 if isfield(models{i},'geneComps') +0505 model.geneComps=models{i}.geneComps; +0506 end +0507 else +0508 %If gene info should be merged +0509 a=ismember(models{i}.genes,model.genes); +0510 +0511 genesToAdd=find(~a); +0512 +0513 %Only add extra gene info on new genes. This might not be +0514 %correct and should be changed later... +0515 if ~isempty(genesToAdd) +0516 model.genes=[model.genes;models{i}.genes(genesToAdd)]; +0517 emptyGene=cell(numel(genesToAdd),1); +0518 emptyGene(:)={models{i}.id}; +0519 model.geneFrom=[model.geneFrom;emptyGene]; +0520 model.rxnGeneMat=[model.rxnGeneMat sparse(size(model.rxnGeneMat,1),numel(genesToAdd))]; +0521 +0522 if isfield(models{i},'geneShortNames') +0523 if isfield(model,'geneShortNames') +0524 model.geneShortNames=[model.geneShortNames;models{i}.geneShortNames(genesToAdd)]; +0525 else +0526 emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); +0527 emptyGeneSN(:)={''}; +0528 model.geneShortNames=[emptyGeneSN;models{i}.geneShortNames(genesToAdd)]; +0529 end +0530 else +0531 if isfield(model,'geneShortNames') +0532 emptyGeneSN=cell(numel(genesToAdd),1); +0533 emptyGeneSN(:)={''}; +0534 model.geneShortNames=[model.geneShortNames;emptyGeneSN]; +0535 end +0536 end +0537 +0538 if isfield(models{i},'proteins') +0539 if isfield(model,'proteins') +0540 model.proteins=[model.proteins;models{i}.proteins(genesToAdd)]; +0541 else +0542 emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); +0543 emptyGeneSN(:)={''}; +0544 model.proteins=[emptyGeneSN;models{i}.proteins(genesToAdd)]; 0545 end -0546 end -0547 -0548 if isfield(models{i},'geneComps') -0549 if isfield(model,'geneComps') -0550 model.geneComps=[model.geneComps;models{i}.geneComps(genesToAdd)]; -0551 else -0552 emptyGeneMir=ones(numel(model.genes)-numel(genesToAdd),1); -0553 model.geneComps=[emptyGeneMir;models{i}.geneComps(genesToAdd)]; -0554 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; -0555 dispEM(EM,false); -0556 end -0557 else -0558 if isfield(model,'geneComps') -0559 emptyGeneMir=ones(numel(genesToAdd),1); -0560 model.geneComps=[model.geneComps;emptyGeneMir]; -0561 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; -0562 dispEM(EM,false); -0563 end -0564 end -0565 end -0566 -0567 %Remap the genes from the new model. The same thing as with -0568 %mets; this is a wasteful way to do it but I don't care right -0569 %now -0570 [a, b]=ismember(models{i}.genes,model.genes); -0571 -0572 %Just a check -0573 if ~all(a) -0574 EM='There was an unexpected error in matching genes'; -0575 dispEM(EM); -0576 end -0577 model.grRules=[model.grRules;models{i}.grRules]; -0578 end -0579 else -0580 %Add empty gene associations -0581 if isfield(model,'genes') -0582 emptyGene=cell(numel(models{i}.rxns),1); -0583 emptyGene(:)={''}; -0584 model.grRules=[model.grRules;emptyGene]; -0585 end -0586 end -0587 end -0588 %Fix grRules and reconstruct rxnGeneMat -0589 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0590 model.grRules = grRules; -0591 model.rxnGeneMat = rxnGeneMat; -0592 end +0546 else +0547 if isfield(model,'proteins') +0548 emptyGeneSN=cell(numel(genesToAdd),1); +0549 emptyGeneSN(:)={''}; +0550 model.proteins=[model.proteins;emptyGeneSN]; +0551 end +0552 end +0553 +0554 if isfield(models{i},'geneMiriams') +0555 if isfield(model,'geneMiriams') +0556 model.geneMiriams=[model.geneMiriams;models{i}.geneMiriams(genesToAdd)]; +0557 else +0558 emptyGeneMir=cell(numel(model.genes)-numel(genesToAdd),1); +0559 model.geneMiriams=[emptyGeneMir;models{i}.geneMiriams(genesToAdd)]; +0560 end +0561 else +0562 if isfield(model,'geneMiriams') +0563 emptyGeneMir=cell(numel(genesToAdd),1); +0564 model.geneMiriams=[model.geneMiriams;emptyGeneMir]; +0565 end +0566 end +0567 +0568 if isfield(models{i},'geneComps') +0569 if isfield(model,'geneComps') +0570 model.geneComps=[model.geneComps;models{i}.geneComps(genesToAdd)]; +0571 else +0572 emptyGeneMir=ones(numel(model.genes)-numel(genesToAdd),1); +0573 model.geneComps=[emptyGeneMir;models{i}.geneComps(genesToAdd)]; +0574 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; +0575 dispEM(EM,false); +0576 end +0577 else +0578 if isfield(model,'geneComps') +0579 emptyGeneMir=ones(numel(genesToAdd),1); +0580 model.geneComps=[model.geneComps;emptyGeneMir]; +0581 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; +0582 dispEM(EM,false); +0583 end +0584 end +0585 end +0586 +0587 %Remap the genes from the new model. The same thing as with +0588 %mets; this is a wasteful way to do it but I don't care right +0589 %now +0590 [a, b]=ismember(models{i}.genes,model.genes); +0591 +0592 %Just a check +0593 if ~all(a) +0594 EM='There was an unexpected error in matching genes'; +0595 dispEM(EM); +0596 end +0597 model.grRules=[model.grRules;models{i}.grRules]; +0598 end +0599 else +0600 %Add empty gene associations +0601 if isfield(model,'genes') +0602 emptyGene=cell(numel(models{i}.rxns),1); +0603 emptyGene(:)={''}; +0604 model.grRules=[model.grRules;emptyGene]; +0605 end +0606 end +0607 end +0608 %Fix grRules and reconstruct rxnGeneMat +0609 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0610 model.grRules = grRules; +0611 model.rxnGeneMat = rxnGeneMat; +0612 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/permuteModel.html b/doc/core/permuteModel.html index f9ef17da..f86f25f5 100644 --- a/doc/core/permuteModel.html +++ b/doc/core/permuteModel.html @@ -189,37 +189,40 @@

SOURCE CODE ^if isfield(newModel,'geneShortNames') 0133 newModel.geneShortNames=newModel.geneShortNames(indexes); 0134 end -0135 if isfield(newModel,'rxnGeneMat') -0136 newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); +0135 if isfield(newModel,'proteins') +0136 newModel.proteins=newModel.proteins(indexes); 0137 end -0138 case 'comps' -0139 if isfield(newModel,'comps') -0140 newModel.comps=newModel.comps(indexes); -0141 end -0142 if isfield(newModel,'compNames') -0143 newModel.compNames=newModel.compNames(indexes); +0138 if isfield(newModel,'rxnGeneMat') +0139 newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); +0140 end +0141 case 'comps' +0142 if isfield(newModel,'comps') +0143 newModel.comps=newModel.comps(indexes); 0144 end -0145 if isfield(newModel,'compOutside') -0146 newModel.compOutside=newModel.compOutside(indexes); +0145 if isfield(newModel,'compNames') +0146 newModel.compNames=newModel.compNames(indexes); 0147 end -0148 if isfield(newModel,'compMiriams') -0149 newModel.compMiriams=newModel.compMiriams(indexes); +0148 if isfield(newModel,'compOutside') +0149 newModel.compOutside=newModel.compOutside(indexes); 0150 end -0151 [~,J]=sort(indexes); % The *index* of compartment is used in next fields -0152 if isfield(newModel,'metComps') -0153 [toreplace, bywhat] = ismember(newModel.metComps,1:length(J)); -0154 newModel.metComps(toreplace) = J(bywhat(toreplace)); -0155 end -0156 if isfield(model,'rxnComps') -0157 [toreplace, bywhat] = ismember(model.rxnComps,1:length(J)); -0158 model.rxnComps(toreplace) = J(bywhat(toreplace)); -0159 end -0160 if isfield(model,'geneComps') -0161 [toreplace, bywhat] = ismember(model.geneComps,1:length(J)); -0162 model.geneComps(toreplace) = J(bywhat(toreplace)); -0163 end -0164 end -0165 end +0151 if isfield(newModel,'compMiriams') +0152 newModel.compMiriams=newModel.compMiriams(indexes); +0153 end +0154 [~,J]=sort(indexes); % The *index* of compartment is used in next fields +0155 if isfield(newModel,'metComps') +0156 [toreplace, bywhat] = ismember(newModel.metComps,1:length(J)); +0157 newModel.metComps(toreplace) = J(bywhat(toreplace)); +0158 end +0159 if isfield(model,'rxnComps') +0160 [toreplace, bywhat] = ismember(model.rxnComps,1:length(J)); +0161 model.rxnComps(toreplace) = J(bywhat(toreplace)); +0162 end +0163 if isfield(model,'geneComps') +0164 [toreplace, bywhat] = ismember(model.geneComps,1:length(J)); +0165 model.geneComps(toreplace) = J(bywhat(toreplace)); +0166 end +0167 end +0168 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/predictLocalization.html b/doc/core/predictLocalization.html index c204b708..2622161e 100644 --- a/doc/core/predictLocalization.html +++ b/doc/core/predictLocalization.html @@ -304,931 +304,940 @@

SOURCE CODE ^if isfield(model,'geneMiriams') 0200 model.geneMiriams=[model.geneMiriams;{[]}]; 0201 end -0202 if isfield(model,'geneFrom') -0203 model.geneFrom=[model.geneFrom;{{'FAKE'}}]; +0202 if isfield(model,'proteins') +0203 model.proteins=[model.proteins;{[]}]; 0204 end -0205 model.rxnGeneMat(I(i),numel(model.genes))=1; -0206 model.grRules{I(i)}=''; -0207 end -0208 -0209 %Update the GSS. All genes, fake or real, for which there is no evidence -0210 %gets a score 0.5 in all compartments. Also just to make it easier further -0211 %on -0212 I=setdiff(model.genes,GSS.genes); -0213 GSS.genes=[GSS.genes;I]; -0214 GSS.scores=[GSS.scores;ones(numel(I),numel(GSS.compartments))*0.5]; -0215 -0216 %Gene complexes should be moved together in order to be biologically -0217 %relevant. The average score for the genes is used for each compartment. -0218 %This is done by changing the model so that gene complexes are used as a -0219 %single gene name and then a score is calculated for that "gene". -0220 -0221 %Only "and"-relationships exist after expandModel -0222 genes=unique(model.grRules); -0223 nGenes=strrep(genes,'(',''); -0224 nGenes=strrep(nGenes,')',''); -0225 %nGenes=strrep(nGenes,' and ','_and_'); -0226 complexes=setdiff(nGenes,model.genes); -0227 if ~isempty(complexes) -0228 if isempty(complexes{1}) %Empty grRules also come up here -0229 complexes(1)=[]; -0230 end -0231 end -0232 cScores=zeros(numel(complexes),numel(GSS.compartments)); -0233 for i=1:numel(complexes) -0234 genesInComplex=regexp(complexes{i},' and ','split'); -0235 -0236 %Find these genes in GSS -0237 [I, J]=ismember(genesInComplex,GSS.genes); +0205 if isfield(model,'geneFrom') +0206 model.geneFrom=[model.geneFrom;{{'FAKE'}}]; +0207 end +0208 model.rxnGeneMat(I(i),numel(model.genes))=1; +0209 model.grRules{I(i)}=''; +0210 end +0211 +0212 %Update the GSS. All genes, fake or real, for which there is no evidence +0213 %gets a score 0.5 in all compartments. Also just to make it easier further +0214 %on +0215 I=setdiff(model.genes,GSS.genes); +0216 GSS.genes=[GSS.genes;I]; +0217 GSS.scores=[GSS.scores;ones(numel(I),numel(GSS.compartments))*0.5]; +0218 +0219 %Gene complexes should be moved together in order to be biologically +0220 %relevant. The average score for the genes is used for each compartment. +0221 %This is done by changing the model so that gene complexes are used as a +0222 %single gene name and then a score is calculated for that "gene". +0223 +0224 %Only "and"-relationships exist after expandModel +0225 genes=unique(model.grRules); +0226 nGenes=strrep(genes,'(',''); +0227 nGenes=strrep(nGenes,')',''); +0228 %nGenes=strrep(nGenes,' and ','_and_'); +0229 complexes=setdiff(nGenes,model.genes); +0230 if ~isempty(complexes) +0231 if isempty(complexes{1}) %Empty grRules also come up here +0232 complexes(1)=[]; +0233 end +0234 end +0235 cScores=zeros(numel(complexes),numel(GSS.compartments)); +0236 for i=1:numel(complexes) +0237 genesInComplex=regexp(complexes{i},' and ','split'); 0238 -0239 if any(I) -0240 %Get the average of the genes that were found -0241 mScores=mean(GSS.scores(J(I),:)); -0242 -0243 %And add 0.5 for the genes that were not found in order to be -0244 %consistent with non-complexes -0245 mScores=(mScores.*sum(I)+(numel(genesInComplex)-sum(I))*0.5)/numel(genesInComplex); -0246 else -0247 EM=['Could not parse grRule "' complexes{i} '". Assigning score 0.0 in all compartments']; -0248 dispEM(EM,false); -0249 mScores=ones(1,numel(genesInComplex))*0.5; -0250 end -0251 cScores(i,:)=mScores; -0252 -0253 %Add this complex as a new gene -0254 model.genes=[model.genes;complexes{i}]; -0255 if isfield(model,'geneMiriams') -0256 model.geneMiriams=[model.geneMiriams;{[]}]; -0257 end -0258 if isfield(model,'geneShortNames') -0259 model.geneShortNames=[model.geneShortNames;{''}]; +0239 %Find these genes in GSS +0240 [I, J]=ismember(genesInComplex,GSS.genes); +0241 +0242 if any(I) +0243 %Get the average of the genes that were found +0244 mScores=mean(GSS.scores(J(I),:)); +0245 +0246 %And add 0.5 for the genes that were not found in order to be +0247 %consistent with non-complexes +0248 mScores=(mScores.*sum(I)+(numel(genesInComplex)-sum(I))*0.5)/numel(genesInComplex); +0249 else +0250 EM=['Could not parse grRule "' complexes{i} '". Assigning score 0.0 in all compartments']; +0251 dispEM(EM,false); +0252 mScores=ones(1,numel(genesInComplex))*0.5; +0253 end +0254 cScores(i,:)=mScores; +0255 +0256 %Add this complex as a new gene +0257 model.genes=[model.genes;complexes{i}]; +0258 if isfield(model,'geneMiriams') +0259 model.geneMiriams=[model.geneMiriams;{[]}]; 0260 end -0261 if isfield(model,'geneFrom') -0262 model.geneFrom=[model.geneFrom;{'COMPLEX'}]; +0261 if isfield(model,'geneShortNames') +0262 model.geneShortNames=[model.geneShortNames;{''}]; 0263 end -0264 %Find the reactions which had the original complex and change them to -0265 %use the new "gene" -0266 I=ismember(model.grRules,['(' complexes{i} ')']); -0267 -0268 %Should check more carefully if there can be an error here -0269 if ~isempty(I) -0270 model.rxnGeneMat(I,:)=0; %Ok since the split on "or" was applied -0271 model.rxnGeneMat(I,numel(model.genes))=1; -0272 end -0273 end -0274 -0275 %Add the new "genes" -0276 GSS.genes=[GSS.genes;complexes]; -0277 GSS.scores=[GSS.scores;cScores]; -0278 -0279 %After merging the complexes it could happen that there are genes that are -0280 %no longer in use. Delete such genes -0281 model=removeReactions(model,{},false,true); -0282 -0283 %Exchange reactions, defined as involving an unconstrained metabolite, are -0284 %special in that they have to stay in the defaultCompartment. This means -0285 %that uptake/excretion of metabolites is always via the default -0286 %compartment. This is a small simplification, but should be valid in most -0287 %cases -0288 [~, I]=getExchangeRxns(model); -0289 -0290 %It will be easier later on if the same place. Put them in the beginning -0291 J=1:numel(model.rxns); -0292 J(I)=[]; -0293 model=permuteModel(model,[I;J'],'rxns'); -0294 -0295 %Number of exchange reactions -0296 nER=numel(I); -0297 -0298 %Also put the exchange metabolites in the beginning -0299 if isfield(model,'unconstrained') -0300 I=find(model.unconstrained); -0301 J=1:numel(model.mets); -0302 J(I)=[]; -0303 model=permuteModel(model,[I;J'],'mets'); -0304 %Also reorder the transport costs -0305 transportCost=transportCost([I;J']); -0306 %Number of exchange metabolites -0307 nEM=numel(I); -0308 else -0309 nEM=0; -0310 end -0311 -0312 %There is no point of having genes for exchange reactions, so delete them. -0313 %Also to make computations easier -0314 model.rxnGeneMat(1:nER,:)=0; -0315 model.grRules(1:nER)={''}; -0316 -0317 %Remove unused genes -0318 model=removeReactions(model,{},false,true); -0319 -0320 %Remove genes with no match to the model and reorder so that the genes are -0321 %in the same order as model.genes. Since the fake genes are already added -0322 %so that all genes in model exist in GSS it is fine to do like this -0323 [~, J]=ismember(model.genes,GSS.genes); -0324 GSS.genes=model.genes; -0325 GSS.scores=GSS.scores(J,:); -0326 -0327 %Reorder the GSS so that the first index corresponds to the default -0328 %compartment -0329 [~, J]=ismember(defaultCompartment,GSS.compartments); -0330 reorder=1:numel(GSS.compartments); -0331 reorder(J)=[]; -0332 reorder=[J reorder]; -0333 GSS.scores=GSS.scores(:,reorder); -0334 GSS.compartments=GSS.compartments(reorder); -0335 -0336 %Since it is only checked whether the metabolites can be synthesized, there -0337 %is no need to care about the stoichiometry. Change to -1/1 to simplify -0338 %later. Keep the S matrix for later though -0339 oldS=model.S; -0340 model.S(model.S>0)=1; -0341 model.S(model.S<0)=-1; -0342 -0343 %Here is a bit of a trick. To avoid the recurring calculation which -0344 %reactions are reversible, the reversible reactions have the coefficients -0345 %-10/10 instead of -1/1 -0346 model.S(:,model.rev==1)=model.S(:,model.rev==1).*10; -0347 -0348 %***Begin problem formulation -0349 -0350 %Some numbers that are good to have -0351 nRxns=numel(model.rxns)-nER; %Excluding exchange rxns -0352 nMets=numel(model.mets)-nEM; %Excluding exchange mets -0353 nGenes=numel(model.genes); -0354 nComps=numel(GSS.compartments); +0264 if isfield(model,'proteins') +0265 model.proteins=[model.proteins;{''}]; +0266 end +0267 if isfield(model,'geneFrom') +0268 model.geneFrom=[model.geneFrom;{'COMPLEX'}]; +0269 end +0270 %Find the reactions which had the original complex and change them to +0271 %use the new "gene" +0272 I=ismember(model.grRules,['(' complexes{i} ')']); +0273 +0274 %Should check more carefully if there can be an error here +0275 if ~isempty(I) +0276 model.rxnGeneMat(I,:)=0; %Ok since the split on "or" was applied +0277 model.rxnGeneMat(I,numel(model.genes))=1; +0278 end +0279 end +0280 +0281 %Add the new "genes" +0282 GSS.genes=[GSS.genes;complexes]; +0283 GSS.scores=[GSS.scores;cScores]; +0284 +0285 %After merging the complexes it could happen that there are genes that are +0286 %no longer in use. Delete such genes +0287 model=removeReactions(model,{},false,true); +0288 +0289 %Exchange reactions, defined as involving an unconstrained metabolite, are +0290 %special in that they have to stay in the defaultCompartment. This means +0291 %that uptake/excretion of metabolites is always via the default +0292 %compartment. This is a small simplification, but should be valid in most +0293 %cases +0294 [~, I]=getExchangeRxns(model); +0295 +0296 %It will be easier later on if the same place. Put them in the beginning +0297 J=1:numel(model.rxns); +0298 J(I)=[]; +0299 model=permuteModel(model,[I;J'],'rxns'); +0300 +0301 %Number of exchange reactions +0302 nER=numel(I); +0303 +0304 %Also put the exchange metabolites in the beginning +0305 if isfield(model,'unconstrained') +0306 I=find(model.unconstrained); +0307 J=1:numel(model.mets); +0308 J(I)=[]; +0309 model=permuteModel(model,[I;J'],'mets'); +0310 %Also reorder the transport costs +0311 transportCost=transportCost([I;J']); +0312 %Number of exchange metabolites +0313 nEM=numel(I); +0314 else +0315 nEM=0; +0316 end +0317 +0318 %There is no point of having genes for exchange reactions, so delete them. +0319 %Also to make computations easier +0320 model.rxnGeneMat(1:nER,:)=0; +0321 model.grRules(1:nER)={''}; +0322 +0323 %Remove unused genes +0324 model=removeReactions(model,{},false,true); +0325 +0326 %Remove genes with no match to the model and reorder so that the genes are +0327 %in the same order as model.genes. Since the fake genes are already added +0328 %so that all genes in model exist in GSS it is fine to do like this +0329 [~, J]=ismember(model.genes,GSS.genes); +0330 GSS.genes=model.genes; +0331 GSS.scores=GSS.scores(J,:); +0332 +0333 %Reorder the GSS so that the first index corresponds to the default +0334 %compartment +0335 [~, J]=ismember(defaultCompartment,GSS.compartments); +0336 reorder=1:numel(GSS.compartments); +0337 reorder(J)=[]; +0338 reorder=[J reorder]; +0339 GSS.scores=GSS.scores(:,reorder); +0340 GSS.compartments=GSS.compartments(reorder); +0341 +0342 %Since it is only checked whether the metabolites can be synthesized, there +0343 %is no need to care about the stoichiometry. Change to -1/1 to simplify +0344 %later. Keep the S matrix for later though +0345 oldS=model.S; +0346 model.S(model.S>0)=1; +0347 model.S(model.S<0)=-1; +0348 +0349 %Here is a bit of a trick. To avoid the recurring calculation which +0350 %reactions are reversible, the reversible reactions have the coefficients +0351 %-10/10 instead of -1/1 +0352 model.S(:,model.rev==1)=model.S(:,model.rev==1).*10; +0353 +0354 %***Begin problem formulation 0355 -0356 %Create a big stoichiometric matrix that will be the current model. In -0357 %order to have faster simulations the maximal model size is declared and -0358 %reactions are then moved within it. -0359 -0360 %First the original model (with the first nE being exchange rxns), then -0361 %reserve space for number of rxns minus exchange rxns for each non-default -0362 %compartment, then transport reactions for all non-exchange mets between -0363 %the default compartment and all others. -0364 %NOTE: Kept eye()*0 since eye() can be used to include all transport from -0365 %the beginning -0366 s=repmat(eye(nMets)*0,1,nComps-1); -0367 s=[zeros(numel(model.mets)-nMets,size(s,2));s]; -0368 S=[model.S sparse(numel(model.mets),nRxns*(nComps-1)) s]; -0369 s=[sparse(nMets*(nComps-1),numel(model.rxns)+nRxns*(nComps-1)) eye(nMets*(nComps-1))*0]; -0370 S=[S;s]; -0371 -0372 %Also replicate the transport costs -0373 transportCost=[transportCost(1:nEM);repmat(transportCost(nEM+1:end),nComps,1)]; -0374 -0375 %Create a binary matrix that says where the genes are in the current -0376 %solution -0377 g2c=false(nGenes,nComps); -0378 %All genes start in the default compartment -0379 g2c(:,1)=true; +0356 %Some numbers that are good to have +0357 nRxns=numel(model.rxns)-nER; %Excluding exchange rxns +0358 nMets=numel(model.mets)-nEM; %Excluding exchange mets +0359 nGenes=numel(model.genes); +0360 nComps=numel(GSS.compartments); +0361 +0362 %Create a big stoichiometric matrix that will be the current model. In +0363 %order to have faster simulations the maximal model size is declared and +0364 %reactions are then moved within it. +0365 +0366 %First the original model (with the first nE being exchange rxns), then +0367 %reserve space for number of rxns minus exchange rxns for each non-default +0368 %compartment, then transport reactions for all non-exchange mets between +0369 %the default compartment and all others. +0370 %NOTE: Kept eye()*0 since eye() can be used to include all transport from +0371 %the beginning +0372 s=repmat(eye(nMets)*0,1,nComps-1); +0373 s=[zeros(numel(model.mets)-nMets,size(s,2));s]; +0374 S=[model.S sparse(numel(model.mets),nRxns*(nComps-1)) s]; +0375 s=[sparse(nMets*(nComps-1),numel(model.rxns)+nRxns*(nComps-1)) eye(nMets*(nComps-1))*0]; +0376 S=[S;s]; +0377 +0378 %Also replicate the transport costs +0379 transportCost=[transportCost(1:nEM);repmat(transportCost(nEM+1:end),nComps,1)]; 0380 -0381 %Start of main optimization loop -0382 tic; -0383 bestScore=-inf; -0384 bestS=[]; -0385 bestg2c=[]; +0381 %Create a binary matrix that says where the genes are in the current +0382 %solution +0383 g2c=false(nGenes,nComps); +0384 %All genes start in the default compartment +0385 g2c(:,1)=true; 0386 -0387 %Temp for testing -0388 plotScore=[]; -0389 nTrans=[]; -0390 totScore=[]; -0391 minScore=sum(min(GSS.scores,[],2)); -0392 maxScore=sum(max(GSS.scores,[],2)); -0393 -0394 while toc<maxTime*60 -0395 %Pick a random gene, weighted by it is current score minus the best -0396 %score for that gene (often 1.0, but can be 0.5 for no genes or average -0397 %for complexes). Genes with bad fits are more likely to be moved. This -0398 %formulation never moves a gene from its best compartment. Therefore a -0399 %small uniform weight is added -0400 [I, J]=find(g2c); -0401 geneToMove=randsample(nGenes,1,true,max(GSS.scores(I,:),[],2)-GSS.scores(sub2ind(size(g2c),I,J))+0.1); -0402 -0403 %Sample among possible compartments to move to. Add a larger weight to -0404 %even out the odds a little. Also a way of getting rid of loops where -0405 %the same set of genes are moved back and forth several times -0406 toComp=randsample(nComps,1,true,GSS.scores(geneToMove,:)+0.2); -0407 -0408 %Check that it moves to a new compartment -0409 if toComp==find(g2c(geneToMove,:)) -0410 continue; -0411 end -0412 -0413 %Moves the gene -0414 [newS, newg2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets); -0415 -0416 %Tries to connect the network. If this was not possible in 10 -0417 %iterations, then abort. If more than 20 modifications were needed then -0418 %it is unlikely that it will be a lower score -0419 wasConnected=false; -0420 for j=1:10 -0421 %Find the metabolites that are now unconnected -0422 unconnected=findUnconnected(newS,nEM); -0423 -0424 %Continue if there are still unconnected -0425 if any(unconnected) -0426 %For each gene find out how many of these could be connected if -0427 %the gene was moved and how many would be disconnected by -0428 %moving that gene -0429 [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(newS,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS); -0430 -0431 %Score which gene would be the best to move. The highest -0432 %deltaScore is 1.0. It should be possible to move a gene from -0433 %worst to best compartment even if it disconnects, say, 1.5 -0434 %more metabolites -0435 [score, I]=max(1.5*deltaScore+deltaConnected); +0387 %Start of main optimization loop +0388 tic; +0389 bestScore=-inf; +0390 bestS=[]; +0391 bestg2c=[]; +0392 +0393 %Temp for testing +0394 plotScore=[]; +0395 nTrans=[]; +0396 totScore=[]; +0397 minScore=sum(min(GSS.scores,[],2)); +0398 maxScore=sum(max(GSS.scores,[],2)); +0399 +0400 while toc<maxTime*60 +0401 %Pick a random gene, weighted by it is current score minus the best +0402 %score for that gene (often 1.0, but can be 0.5 for no genes or average +0403 %for complexes). Genes with bad fits are more likely to be moved. This +0404 %formulation never moves a gene from its best compartment. Therefore a +0405 %small uniform weight is added +0406 [I, J]=find(g2c); +0407 geneToMove=randsample(nGenes,1,true,max(GSS.scores(I,:),[],2)-GSS.scores(sub2ind(size(g2c),I,J))+0.1); +0408 +0409 %Sample among possible compartments to move to. Add a larger weight to +0410 %even out the odds a little. Also a way of getting rid of loops where +0411 %the same set of genes are moved back and forth several times +0412 toComp=randsample(nComps,1,true,GSS.scores(geneToMove,:)+0.2); +0413 +0414 %Check that it moves to a new compartment +0415 if toComp==find(g2c(geneToMove,:)) +0416 continue; +0417 end +0418 +0419 %Moves the gene +0420 [newS, newg2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets); +0421 +0422 %Tries to connect the network. If this was not possible in 10 +0423 %iterations, then abort. If more than 20 modifications were needed then +0424 %it is unlikely that it will be a lower score +0425 wasConnected=false; +0426 for j=1:10 +0427 %Find the metabolites that are now unconnected +0428 unconnected=findUnconnected(newS,nEM); +0429 +0430 %Continue if there are still unconnected +0431 if any(unconnected) +0432 %For each gene find out how many of these could be connected if +0433 %the gene was moved and how many would be disconnected by +0434 %moving that gene +0435 [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(newS,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS); 0436 -0437 %Check if it has to add a transport or if there is a gene that -0438 %could be moved order to have a more connected network -0439 hasToAddTransport=true; -0440 if ~isempty(deltaConnected) -0441 if score>0 -0442 hasToAddTransport=false; -0443 end -0444 end -0445 -0446 %If it is possible to move any gene in order to have a more -0447 %connected network, then move the best one -0448 if hasToAddTransport==false -0449 [newS, newg2c]=moveGene(newS,model,g2c,geneIndex(I),moveTo(I),nRxns,nMets); -0450 else -0451 %Choose a random unconnected metabolite that should be -0452 %connected -0453 transMet=unconnected(randsample(numel(unconnected),1)); -0454 -0455 %First get where the metabolite is now -0456 comps=ceil((transMet-nEM)/((size(S,1)-nEM)/nComps)); -0457 -0458 %Find the corresponding metabolite index if it were in the -0459 %default compartment -0460 dcIndex=transMet-(comps-1)*nMets; -0461 -0462 %Then get the indexes of that metabolite in all -0463 %compartments -0464 allIndexes=dcIndex; -0465 for k=1:nComps-1 -0466 allIndexes=[allIndexes;dcIndex+nMets*k]; -0467 end -0468 -0469 %It could be that some of these are not used in any -0470 %reaction. Get only the ones which are -0471 I=sum(newS(allIndexes,:)~=0,2)>0; -0472 -0473 %Then get the ones that are used but not in unconnected. -0474 %These are metabolites that could potentially be -0475 %transported to connect transMet -0476 connectedUsed=setdiff(allIndexes(I),unconnected); -0477 -0478 %This may be an error but leave it for now. It seems to -0479 %happen if nothing can be connected in one step -0480 if isempty(connectedUsed) -0481 break; -0482 end +0437 %Score which gene would be the best to move. The highest +0438 %deltaScore is 1.0. It should be possible to move a gene from +0439 %worst to best compartment even if it disconnects, say, 1.5 +0440 %more metabolites +0441 [score, I]=max(1.5*deltaScore+deltaConnected); +0442 +0443 %Check if it has to add a transport or if there is a gene that +0444 %could be moved order to have a more connected network +0445 hasToAddTransport=true; +0446 if ~isempty(deltaConnected) +0447 if score>0 +0448 hasToAddTransport=false; +0449 end +0450 end +0451 +0452 %If it is possible to move any gene in order to have a more +0453 %connected network, then move the best one +0454 if hasToAddTransport==false +0455 [newS, newg2c]=moveGene(newS,model,g2c,geneIndex(I),moveTo(I),nRxns,nMets); +0456 else +0457 %Choose a random unconnected metabolite that should be +0458 %connected +0459 transMet=unconnected(randsample(numel(unconnected),1)); +0460 +0461 %First get where the metabolite is now +0462 comps=ceil((transMet-nEM)/((size(S,1)-nEM)/nComps)); +0463 +0464 %Find the corresponding metabolite index if it were in the +0465 %default compartment +0466 dcIndex=transMet-(comps-1)*nMets; +0467 +0468 %Then get the indexes of that metabolite in all +0469 %compartments +0470 allIndexes=dcIndex; +0471 for k=1:nComps-1 +0472 allIndexes=[allIndexes;dcIndex+nMets*k]; +0473 end +0474 +0475 %It could be that some of these are not used in any +0476 %reaction. Get only the ones which are +0477 I=sum(newS(allIndexes,:)~=0,2)>0; +0478 +0479 %Then get the ones that are used but not in unconnected. +0480 %These are metabolites that could potentially be +0481 %transported to connect transMet +0482 connectedUsed=setdiff(allIndexes(I),unconnected); 0483 -0484 %If transMet is in the default compartment then everything -0485 %is fine, just connect it to a random one -0486 if transMet==dcIndex -0487 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,connectedUsed(randsample(numel(connectedUsed),1))); -0488 else -0489 %If one of the connectedUsed is in the default -0490 %compartment then connect to that one -0491 I=connectedUsed(connectedUsed<(nMets+nEM)); -0492 if any(I) -0493 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,I(randsample(numel(I),1))); -0494 else -0495 %This is if the only way to connect it is by adding -0496 %two transport reactions, going via the default -0497 %compartment -0498 break; -0499 end -0500 end -0501 end -0502 else -0503 wasConnected=true; -0504 break; -0505 end -0506 end -0507 -0508 %If the network was connected in a new way, it is possible that some -0509 %transport reactions are no longer needed. They should be removed -0510 if wasConnected==true -0511 %These are the metabolites that are being transported -0512 activeTransport=find(sum(newS(:,nER+nRxns*nComps+1:end),2)); -0513 -0514 %Get the metabolites that are unconnected if transport was not used -0515 unconnected=findUnconnected(newS(:,1:nER+nRxns*nComps),nEM); -0516 -0517 %Find the transport reactions that are not needed and delete them -0518 I=setdiff(activeTransport,unconnected); +0484 %This may be an error but leave it for now. It seems to +0485 %happen if nothing can be connected in one step +0486 if isempty(connectedUsed) +0487 break; +0488 end +0489 +0490 %If transMet is in the default compartment then everything +0491 %is fine, just connect it to a random one +0492 if transMet==dcIndex +0493 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,connectedUsed(randsample(numel(connectedUsed),1))); +0494 else +0495 %If one of the connectedUsed is in the default +0496 %compartment then connect to that one +0497 I=connectedUsed(connectedUsed<(nMets+nEM)); +0498 if any(I) +0499 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,I(randsample(numel(I),1))); +0500 else +0501 %This is if the only way to connect it is by adding +0502 %two transport reactions, going via the default +0503 %compartment +0504 break; +0505 end +0506 end +0507 end +0508 else +0509 wasConnected=true; +0510 break; +0511 end +0512 end +0513 +0514 %If the network was connected in a new way, it is possible that some +0515 %transport reactions are no longer needed. They should be removed +0516 if wasConnected==true +0517 %These are the metabolites that are being transported +0518 activeTransport=find(sum(newS(:,nER+nRxns*nComps+1:end),2)); 0519 -0520 %Since both metabolites in a transport rxns must be connected for -0521 %the reaction to be deleted, the sum over the colums should be 4 -0522 newS(:,find(sum(newS(I,nER+nRxns*nComps+1:end))==4)+nER+nRxns*nComps)=0; -0523 -0524 %Score the solution and determine whether to keep it as a new -0525 %solution -0526 [score, geneScore, trCost]=scoreModel(newS,newg2c,GSS,transportCost); -0527 -0528 %If it was the best solution so far, keep it -0529 if score>bestScore -0530 bestScore=score; -0531 bestS=newS; -0532 bestg2c=newg2c; -0533 end -0534 -0535 %This should not be steepest descent later -0536 if score>=bestScore% || exp((score-bestScore)*7)>rand() -0537 plotScore=[plotScore;geneScore]; -0538 nTrans=[nTrans;trCost]; -0539 totScore=[totScore;score]; -0540 S=newS; -0541 g2c=newg2c; -0542 -0543 if plotResults==true -0544 subplot(3,2,1); -0545 spy(S); -0546 subplot(3,2,2); -0547 plot(plotScore,'r'); -0548 xlabel('Gene score'); -0549 subplot(3,2,3); -0550 plot((plotScore-minScore)/(maxScore-minScore),'r'); -0551 xlabel('Gene score relative to predictions'); -0552 subplot(3,2,4); -0553 plot(nTrans,'g'); -0554 xlabel('Transport cost'); -0555 subplot(3,2,5); -0556 plot(totScore,'b'); -0557 xlabel('Total score'); -0558 subplot(3,2,6); -0559 pause(0.2); -0560 end -0561 end -0562 end -0563 end -0564 scores.totScore=score; -0565 scores.geneScore=geneScore; -0566 scores.transCost=trCost; -0567 -0568 %Find which metabolites are transported and to where -0569 [I, J]=find(bestS(nEM+1:nEM+nMets,end-nMets*(nComps-1)+1:end)); -0570 J=ceil(J/nMets+1); -0571 transportStruct.mets=model.metNames(I+nEM); -0572 transportStruct.toComp=GSS.compartments(J); +0520 %Get the metabolites that are unconnected if transport was not used +0521 unconnected=findUnconnected(newS(:,1:nER+nRxns*nComps),nEM); +0522 +0523 %Find the transport reactions that are not needed and delete them +0524 I=setdiff(activeTransport,unconnected); +0525 +0526 %Since both metabolites in a transport rxns must be connected for +0527 %the reaction to be deleted, the sum over the colums should be 4 +0528 newS(:,find(sum(newS(I,nER+nRxns*nComps+1:end))==4)+nER+nRxns*nComps)=0; +0529 +0530 %Score the solution and determine whether to keep it as a new +0531 %solution +0532 [score, geneScore, trCost]=scoreModel(newS,newg2c,GSS,transportCost); +0533 +0534 %If it was the best solution so far, keep it +0535 if score>bestScore +0536 bestScore=score; +0537 bestS=newS; +0538 bestg2c=newg2c; +0539 end +0540 +0541 %This should not be steepest descent later +0542 if score>=bestScore% || exp((score-bestScore)*7)>rand() +0543 plotScore=[plotScore;geneScore]; +0544 nTrans=[nTrans;trCost]; +0545 totScore=[totScore;score]; +0546 S=newS; +0547 g2c=newg2c; +0548 +0549 if plotResults==true +0550 subplot(3,2,1); +0551 spy(S); +0552 subplot(3,2,2); +0553 plot(plotScore,'r'); +0554 xlabel('Gene score'); +0555 subplot(3,2,3); +0556 plot((plotScore-minScore)/(maxScore-minScore),'r'); +0557 xlabel('Gene score relative to predictions'); +0558 subplot(3,2,4); +0559 plot(nTrans,'g'); +0560 xlabel('Transport cost'); +0561 subplot(3,2,5); +0562 plot(totScore,'b'); +0563 xlabel('Total score'); +0564 subplot(3,2,6); +0565 pause(0.2); +0566 end +0567 end +0568 end +0569 end +0570 scores.totScore=score; +0571 scores.geneScore=geneScore; +0572 scores.transCost=trCost; 0573 -0574 [I, J]=find(bestg2c); -0575 geneLocalization.genes=GSS.genes(I); -0576 geneLocalization.comps=GSS.compartments(J); -0577 -0578 %Resort the gene names -0579 [~, I]=sort(geneLocalization.genes); -0580 geneLocalization.genes=geneLocalization.genes(I); -0581 geneLocalization.comps=geneLocalization.comps(I); -0582 -0583 %Remove the fake genes -0584 I=strncmp('&&FAKE&&',geneLocalization.genes,8); -0585 geneLocalization.genes(I)=[]; -0586 geneLocalization.comps(I)=[]; -0587 -0588 %Put together the model. This is done by first duplicating the S matrix -0589 %into the different compartments. Then the transport reactions are added -0590 %based on transportStruct. By now model.S should have the same size as the -0591 %S matrix used in the optimization, but with conserved stoichiometry. In -0592 %the final step all reactions and metabolites that are not used in the S -0593 %matrix from the optimization are deleted from the model -0594 outModel=model; -0595 outModel.S=oldS; -0596 -0597 %This is the S matrix without exchange rxns or metabolites -0598 copyPart=outModel.S(nEM+1:end,nER+1:end); -0599 -0600 %Replicate to give the rxnGeneMat for the full system -0601 copyRxnGeneMat=outModel.rxnGeneMat(nER+1:end,:); -0602 outModel.rxnGeneMat=[outModel.rxnGeneMat;repmat(copyRxnGeneMat,nComps-1,1)]; -0603 -0604 %First fix the compartments. The model is already ordered with the exchange -0605 %metabolites first. The original model may contain one or two compartments, -0606 %depending on whether any exchange metabolites are defined -0607 nStartComps=numel(outModel.comps); -0608 if nStartComps==1 -0609 outModel.comps={'1'}; -0610 outModel.compNames=GSS.compartments(1); -0611 else -0612 if model.metComps(1)==1 -0613 outModel.compNames(1)=GSS.compartments(1); -0614 else -0615 outModel.compNames(2)=GSS.compartments(1); -0616 end -0617 end -0618 outModel.compNames=[outModel.compNames;GSS.compartments(2:end)']; -0619 -0620 %Ugly little loop -0621 for i=1:numel(GSS.compartments)-1 -0622 outModel.comps=[outModel.comps;num2str(numel(outModel.comps)+1)]; +0574 %Find which metabolites are transported and to where +0575 [I, J]=find(bestS(nEM+1:nEM+nMets,end-nMets*(nComps-1)+1:end)); +0576 J=ceil(J/nMets+1); +0577 transportStruct.mets=model.metNames(I+nEM); +0578 transportStruct.toComp=GSS.compartments(J); +0579 +0580 [I, J]=find(bestg2c); +0581 geneLocalization.genes=GSS.genes(I); +0582 geneLocalization.comps=GSS.compartments(J); +0583 +0584 %Resort the gene names +0585 [~, I]=sort(geneLocalization.genes); +0586 geneLocalization.genes=geneLocalization.genes(I); +0587 geneLocalization.comps=geneLocalization.comps(I); +0588 +0589 %Remove the fake genes +0590 I=strncmp('&&FAKE&&',geneLocalization.genes,8); +0591 geneLocalization.genes(I)=[]; +0592 geneLocalization.comps(I)=[]; +0593 +0594 %Put together the model. This is done by first duplicating the S matrix +0595 %into the different compartments. Then the transport reactions are added +0596 %based on transportStruct. By now model.S should have the same size as the +0597 %S matrix used in the optimization, but with conserved stoichiometry. In +0598 %the final step all reactions and metabolites that are not used in the S +0599 %matrix from the optimization are deleted from the model +0600 outModel=model; +0601 outModel.S=oldS; +0602 +0603 %This is the S matrix without exchange rxns or metabolites +0604 copyPart=outModel.S(nEM+1:end,nER+1:end); +0605 +0606 %Replicate to give the rxnGeneMat for the full system +0607 copyRxnGeneMat=outModel.rxnGeneMat(nER+1:end,:); +0608 outModel.rxnGeneMat=[outModel.rxnGeneMat;repmat(copyRxnGeneMat,nComps-1,1)]; +0609 +0610 %First fix the compartments. The model is already ordered with the exchange +0611 %metabolites first. The original model may contain one or two compartments, +0612 %depending on whether any exchange metabolites are defined +0613 nStartComps=numel(outModel.comps); +0614 if nStartComps==1 +0615 outModel.comps={'1'}; +0616 outModel.compNames=GSS.compartments(1); +0617 else +0618 if model.metComps(1)==1 +0619 outModel.compNames(1)=GSS.compartments(1); +0620 else +0621 outModel.compNames(2)=GSS.compartments(1); +0622 end 0623 end -0624 %This information is not known from the data, so empty fields are added -0625 outModel.compOutside=cell(numel(outModel.comps),1); -0626 outModel.compOutside(:)={''}; -0627 -0628 for i=1:nComps-1 -0629 outModel.S=[outModel.S sparse(size(outModel.S,1),nRxns)]; -0630 outModel.S=[outModel.S;[sparse(nMets,nRxns*i+nER) copyPart]]; -0631 outModel.rxns=[outModel.rxns;strcat(outModel.rxns(nER+1:nER+nRxns),'_',GSS.compartments{i+1})]; -0632 outModel.rxnNames=[outModel.rxnNames;strcat(outModel.rxnNames(nER+1:nER+nRxns),' (',GSS.compartments{i+1},')')]; -0633 outModel.lb=[outModel.lb;outModel.lb(nER+1:nER+nRxns)]; -0634 outModel.ub=[outModel.ub;outModel.ub(nER+1:nER+nRxns)]; -0635 outModel.rev=[outModel.rev;outModel.rev(nER+1:nER+nRxns)]; -0636 outModel.c=[outModel.c;outModel.c(nER+1:nER+nRxns)]; -0637 if isfield(outModel,'grRules') -0638 outModel.grRules=[outModel.grRules;outModel.grRules(nER+1:nER+nRxns)]; -0639 end -0640 if isfield(outModel,'subSystems') -0641 outModel.subSystems=[outModel.subSystems;outModel.subSystems(nER+1:nER+nRxns)]; -0642 end -0643 if isfield(outModel,'eccodes') -0644 outModel.eccodes=[outModel.eccodes;outModel.eccodes(nER+1:nER+nRxns)]; +0624 outModel.compNames=[outModel.compNames;GSS.compartments(2:end)']; +0625 +0626 %Ugly little loop +0627 for i=1:numel(GSS.compartments)-1 +0628 outModel.comps=[outModel.comps;num2str(numel(outModel.comps)+1)]; +0629 end +0630 %This information is not known from the data, so empty fields are added +0631 outModel.compOutside=cell(numel(outModel.comps),1); +0632 outModel.compOutside(:)={''}; +0633 +0634 for i=1:nComps-1 +0635 outModel.S=[outModel.S sparse(size(outModel.S,1),nRxns)]; +0636 outModel.S=[outModel.S;[sparse(nMets,nRxns*i+nER) copyPart]]; +0637 outModel.rxns=[outModel.rxns;strcat(outModel.rxns(nER+1:nER+nRxns),'_',GSS.compartments{i+1})]; +0638 outModel.rxnNames=[outModel.rxnNames;strcat(outModel.rxnNames(nER+1:nER+nRxns),' (',GSS.compartments{i+1},')')]; +0639 outModel.lb=[outModel.lb;outModel.lb(nER+1:nER+nRxns)]; +0640 outModel.ub=[outModel.ub;outModel.ub(nER+1:nER+nRxns)]; +0641 outModel.rev=[outModel.rev;outModel.rev(nER+1:nER+nRxns)]; +0642 outModel.c=[outModel.c;outModel.c(nER+1:nER+nRxns)]; +0643 if isfield(outModel,'grRules') +0644 outModel.grRules=[outModel.grRules;outModel.grRules(nER+1:nER+nRxns)]; 0645 end -0646 if isfield(outModel,'rxnFrom') -0647 outModel.rxnFrom=[outModel.rxnFrom;outModel.rxnFrom(nER+1:nER+nRxns)]; +0646 if isfield(outModel,'subSystems') +0647 outModel.subSystems=[outModel.subSystems;outModel.subSystems(nER+1:nER+nRxns)]; 0648 end -0649 if isfield(outModel,'rxnMiriams') -0650 outModel.rxnMiriams=[outModel.rxnMiriams;outModel.rxnMiriams(nER+1:nER+nRxns)]; +0649 if isfield(outModel,'eccodes') +0650 outModel.eccodes=[outModel.eccodes;outModel.eccodes(nER+1:nER+nRxns)]; 0651 end -0652 if isfield(outModel,'rxnNotes') -0653 outModel.rxnNotes=[outModel.rxnNotes;outModel.rxnNotes(nER+1:nER+nRxns)]; +0652 if isfield(outModel,'rxnFrom') +0653 outModel.rxnFrom=[outModel.rxnFrom;outModel.rxnFrom(nER+1:nER+nRxns)]; 0654 end -0655 if isfield(outModel,'rxnReferences') -0656 outModel.rxnReferences=[outModel.rxnReferences;outModel.rxnReferences(nER+1:nER+nRxns)]; +0655 if isfield(outModel,'rxnMiriams') +0656 outModel.rxnMiriams=[outModel.rxnMiriams;outModel.rxnMiriams(nER+1:nER+nRxns)]; 0657 end -0658 if isfield(outModel,'rxnConfidenceScores') -0659 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;outModel.rxnConfidenceScores(nER+1:nER+nRxns)]; +0658 if isfield(outModel,'rxnNotes') +0659 outModel.rxnNotes=[outModel.rxnNotes;outModel.rxnNotes(nER+1:nER+nRxns)]; 0660 end -0661 if isfield(outModel,'rxnDeltaG') -0662 outModel.rxnDeltaG=[outModel.rxnDeltaG;outModel.rxnDeltaG(nER+1:nER+nRxns)]; +0661 if isfield(outModel,'rxnReferences') +0662 outModel.rxnReferences=[outModel.rxnReferences;outModel.rxnReferences(nER+1:nER+nRxns)]; 0663 end -0664 outModel.mets=[outModel.mets;strcat(outModel.mets(nEM+1:nEM+nMets),'_',GSS.compartments{i+1})]; -0665 outModel.metNames=[outModel.metNames;outModel.metNames(nEM+1:nEM+nMets)]; -0666 outModel.b=[outModel.b;outModel.b(nEM+1:nEM+nMets,:)]; -0667 I=ones(nMets,1)*nStartComps+i; -0668 outModel.metComps=[outModel.metComps;I]; -0669 if isfield(outModel,'inchis') -0670 outModel.inchis=[outModel.inchis;outModel.inchis(nEM+1:nEM+nMets)]; -0671 end -0672 if isfield(outModel,'metSmiles') -0673 outModel.metSmiles=[outModel.metSmiles;outModel.metSmiles(nEM+1:nEM+nMets)]; -0674 end -0675 if isfield(outModel,'unconstrained') -0676 outModel.unconstrained=[outModel.unconstrained;outModel.unconstrained(nEM+1:nEM+nMets)]; +0664 if isfield(outModel,'rxnConfidenceScores') +0665 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;outModel.rxnConfidenceScores(nER+1:nER+nRxns)]; +0666 end +0667 if isfield(outModel,'rxnDeltaG') +0668 outModel.rxnDeltaG=[outModel.rxnDeltaG;outModel.rxnDeltaG(nER+1:nER+nRxns)]; +0669 end +0670 outModel.mets=[outModel.mets;strcat(outModel.mets(nEM+1:nEM+nMets),'_',GSS.compartments{i+1})]; +0671 outModel.metNames=[outModel.metNames;outModel.metNames(nEM+1:nEM+nMets)]; +0672 outModel.b=[outModel.b;outModel.b(nEM+1:nEM+nMets,:)]; +0673 I=ones(nMets,1)*nStartComps+i; +0674 outModel.metComps=[outModel.metComps;I]; +0675 if isfield(outModel,'inchis') +0676 outModel.inchis=[outModel.inchis;outModel.inchis(nEM+1:nEM+nMets)]; 0677 end -0678 if isfield(outModel,'metMiriams') -0679 outModel.metMiriams=[outModel.metMiriams;outModel.metMiriams(nEM+1:nEM+nMets)]; +0678 if isfield(outModel,'metSmiles') +0679 outModel.metSmiles=[outModel.metSmiles;outModel.metSmiles(nEM+1:nEM+nMets)]; 0680 end -0681 if isfield(outModel,'metFormulas') -0682 outModel.metFormulas=[outModel.metFormulas;outModel.metFormulas(nEM+1:nEM+nMets)]; +0681 if isfield(outModel,'unconstrained') +0682 outModel.unconstrained=[outModel.unconstrained;outModel.unconstrained(nEM+1:nEM+nMets)]; 0683 end -0684 if isfield(outModel,'metFrom') -0685 outModel.metFrom=[outModel.metFrom;outModel.metFrom(nEM+1:nEM+nMets)]; +0684 if isfield(outModel,'metMiriams') +0685 outModel.metMiriams=[outModel.metMiriams;outModel.metMiriams(nEM+1:nEM+nMets)]; 0686 end -0687 if isfield(outModel,'metCharges') -0688 outModel.metCharges=[outModel.metCharges;outModel.metCharges(nEM+1:nEM+nMets)]; +0687 if isfield(outModel,'metFormulas') +0688 outModel.metFormulas=[outModel.metFormulas;outModel.metFormulas(nEM+1:nEM+nMets)]; 0689 end -0690 if isfield(outModel,'metDeltaG') -0691 outModel.metDeltaG=[outModel.metDeltaG;outModel.metDeltaG(nEM+1:nEM+nMets)]; +0690 if isfield(outModel,'metFrom') +0691 outModel.metFrom=[outModel.metFrom;outModel.metFrom(nEM+1:nEM+nMets)]; 0692 end -0693 end -0694 -0695 %Add the transport reactions -0696 transS=bestS(:,numel(outModel.rxns)+1:end); -0697 J=sum(transS)>0; %Active rxns -0698 -0699 %Transport reactions are written in a different way compared to a "real" -0700 %stoichimetric matrix. This is to fix that -0701 transS(transS~=0)=1; -0702 transS(1:nEM+nMets,:)=transS(1:nEM+nMets,:)*-1; -0703 I=find(sum(transS>0,2)); -0704 nTransRxns=numel(I); -0705 outModel.S=[outModel.S transS(:,J)]; -0706 filler=ones(nTransRxns,1); -0707 outModel.lb=[outModel.lb;filler*-1000]; -0708 outModel.ub=[outModel.ub;filler*1000]; -0709 outModel.rev=[outModel.rev;filler]; -0710 outModel.c=[outModel.c;filler*0]; -0711 outModel.rxnGeneMat=[outModel.rxnGeneMat;sparse(nTransRxns,numel(outModel.genes))]; -0712 -0713 for i=1:numel(I) -0714 outModel.rxns=[outModel.rxns;strcat('transport',num2str(i))]; -0715 outModel.rxnNames=[outModel.rxnNames;['Transport of ',outModel.metNames{I(i)}]]; -0716 if isfield(outModel,'grRules') -0717 outModel.grRules=[outModel.grRules;{''}]; -0718 end -0719 if isfield(outModel,'rxnMiriams') -0720 outModel.rxnMiriams=[outModel.rxnMiriams;{[]}]; -0721 end -0722 if isfield(outModel,'subSystems') -0723 outModel.subSystems=[outModel.subSystems;{{'Inferred transport reactions'}}]; +0693 if isfield(outModel,'metCharges') +0694 outModel.metCharges=[outModel.metCharges;outModel.metCharges(nEM+1:nEM+nMets)]; +0695 end +0696 if isfield(outModel,'metDeltaG') +0697 outModel.metDeltaG=[outModel.metDeltaG;outModel.metDeltaG(nEM+1:nEM+nMets)]; +0698 end +0699 end +0700 +0701 %Add the transport reactions +0702 transS=bestS(:,numel(outModel.rxns)+1:end); +0703 J=sum(transS)>0; %Active rxns +0704 +0705 %Transport reactions are written in a different way compared to a "real" +0706 %stoichimetric matrix. This is to fix that +0707 transS(transS~=0)=1; +0708 transS(1:nEM+nMets,:)=transS(1:nEM+nMets,:)*-1; +0709 I=find(sum(transS>0,2)); +0710 nTransRxns=numel(I); +0711 outModel.S=[outModel.S transS(:,J)]; +0712 filler=ones(nTransRxns,1); +0713 outModel.lb=[outModel.lb;filler*-1000]; +0714 outModel.ub=[outModel.ub;filler*1000]; +0715 outModel.rev=[outModel.rev;filler]; +0716 outModel.c=[outModel.c;filler*0]; +0717 outModel.rxnGeneMat=[outModel.rxnGeneMat;sparse(nTransRxns,numel(outModel.genes))]; +0718 +0719 for i=1:numel(I) +0720 outModel.rxns=[outModel.rxns;strcat('transport',num2str(i))]; +0721 outModel.rxnNames=[outModel.rxnNames;['Transport of ',outModel.metNames{I(i)}]]; +0722 if isfield(outModel,'grRules') +0723 outModel.grRules=[outModel.grRules;{''}]; 0724 end -0725 if isfield(outModel,'eccodes') -0726 outModel.eccodes=[outModel.eccodes;{''}]; +0725 if isfield(outModel,'rxnMiriams') +0726 outModel.rxnMiriams=[outModel.rxnMiriams;{[]}]; 0727 end -0728 if isfield(outModel,'rxnFrom') -0729 outModel.rxnFrom=[outModel.rxnFrom;{''}]; +0728 if isfield(outModel,'subSystems') +0729 outModel.subSystems=[outModel.subSystems;{{'Inferred transport reactions'}}]; 0730 end -0731 if isfield(outModel,'rxnNotes') -0732 outModel.rxnNotes=[outModel.rxnNotes;{''}]; +0731 if isfield(outModel,'eccodes') +0732 outModel.eccodes=[outModel.eccodes;{''}]; 0733 end -0734 if isfield(outModel,'rxnReferences') -0735 outModel.rxnReferences=[outModel.rxnReferences;{''}]; +0734 if isfield(outModel,'rxnFrom') +0735 outModel.rxnFrom=[outModel.rxnFrom;{''}]; 0736 end -0737 if isfield(outModel,'rxnConfidenceScores') -0738 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;NaN]; +0737 if isfield(outModel,'rxnNotes') +0738 outModel.rxnNotes=[outModel.rxnNotes;{''}]; 0739 end -0740 if isfield(outModel,'rxnDeltaG') -0741 outModel.rxnDeltaG=[outModel.rxnDeltaG;NaN]; +0740 if isfield(outModel,'rxnReferences') +0741 outModel.rxnReferences=[outModel.rxnReferences;{''}]; 0742 end -0743 end -0744 -0745 %Then remove all reactions and metabolites that aren't used in the final -0746 %solution from the optimization -0747 [~, J]=find(bestS(:,1:nER+nComps*nRxns)); -0748 K=true(numel(outModel.rxns),1); -0749 K(J)=false; -0750 K(end-nTransRxns+1:end)=false; -0751 outModel=removeReactions(outModel,K,true); -0752 -0753 %Remove all fake genes -0754 I=strncmp('&&FAKE&&',outModel.genes,8); -0755 outModel.genes(I)=[]; -0756 if isfield(outModel,'geneMiriams') -0757 outModel.geneMiriams(I)=[]; -0758 end -0759 if isfield(outModel,'geneShortNames') -0760 outModel.geneShortNames(I)=[]; -0761 end -0762 outModel.rxnGeneMat(:,I)=[]; -0763 -0764 %Fix grRules and reconstruct rxnGeneMat -0765 [grRules,rxnGeneMat] = standardizeGrRules(outModel,true); -0766 outModel.grRules = grRules; -0767 outModel.rxnGeneMat = rxnGeneMat; -0768 end -0769 -0770 %Moves a gene and all associated reactions from one compartment to another -0771 function [S, g2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets) -0772 %Find the current compartment and update to the new one -0773 currentComp=find(g2c(geneToMove,:)); -0774 g2c(geneToMove,:)=false; -0775 g2c(geneToMove,toComp)=true; -0776 -0777 %Find the reactions in the original model that the gene controls -0778 [I, ~]=find(model.rxnGeneMat(:,geneToMove)); -0779 -0780 %Calculate their current positions in the S matrix -0781 oldRxns=I+(currentComp-1)*nRxns; -0782 -0783 %And their new positions -0784 newRxns=I+(toComp-1)*nRxns; +0743 if isfield(outModel,'rxnConfidenceScores') +0744 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;NaN]; +0745 end +0746 if isfield(outModel,'rxnDeltaG') +0747 outModel.rxnDeltaG=[outModel.rxnDeltaG;NaN]; +0748 end +0749 end +0750 +0751 %Then remove all reactions and metabolites that aren't used in the final +0752 %solution from the optimization +0753 [~, J]=find(bestS(:,1:nER+nComps*nRxns)); +0754 K=true(numel(outModel.rxns),1); +0755 K(J)=false; +0756 K(end-nTransRxns+1:end)=false; +0757 outModel=removeReactions(outModel,K,true); +0758 +0759 %Remove all fake genes +0760 I=strncmp('&&FAKE&&',outModel.genes,8); +0761 outModel.genes(I)=[]; +0762 if isfield(outModel,'geneMiriams') +0763 outModel.geneMiriams(I)=[]; +0764 end +0765 if isfield(outModel,'geneShortNames') +0766 outModel.geneShortNames(I)=[]; +0767 end +0768 if isfield(outModel,'proteins') +0769 outModel.proteins(I)=[]; +0770 end +0771 outModel.rxnGeneMat(:,I)=[]; +0772 +0773 %Fix grRules and reconstruct rxnGeneMat +0774 [grRules,rxnGeneMat] = standardizeGrRules(outModel,true); +0775 outModel.grRules = grRules; +0776 outModel.rxnGeneMat = rxnGeneMat; +0777 end +0778 +0779 %Moves a gene and all associated reactions from one compartment to another +0780 function [S, g2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets) +0781 %Find the current compartment and update to the new one +0782 currentComp=find(g2c(geneToMove,:)); +0783 g2c(geneToMove,:)=false; +0784 g2c(geneToMove,toComp)=true; 0785 -0786 %The metabolite ids also have to be changed in order to match the new -0787 %compartment -0788 metChange=nMets*(toComp-currentComp); -0789 -0790 %Update the reactions -0791 [I, J, K]=find(S(:,oldRxns)); -0792 I=I+metChange; -0793 -0794 %Move the reactions -0795 S(:,oldRxns)=0; -0796 S(sub2ind(size(S),I,newRxns(J)))=K; -0797 end +0786 %Find the reactions in the original model that the gene controls +0787 [I, ~]=find(model.rxnGeneMat(:,geneToMove)); +0788 +0789 %Calculate their current positions in the S matrix +0790 oldRxns=I+(currentComp-1)*nRxns; +0791 +0792 %And their new positions +0793 newRxns=I+(toComp-1)*nRxns; +0794 +0795 %The metabolite ids also have to be changed in order to match the new +0796 %compartment +0797 metChange=nMets*(toComp-currentComp); 0798 -0799 %Finds which metabolites are unconnected, in the sense that they are never -0800 %a product or only a product in a reversible reaction where one reactant is -0801 %only a product in the opposite direction of that reaction. This function -0802 %ignores exchange metabolites. Returns a vector of metabolite indexes. -0803 %metsToCheck is an array of metabolite indexes to check for connectivity. -0804 %If not supplied then all metabolites are checked -0805 function unconnected=findUnconnected(S,nEM,metsToCheck) -0806 if nargin>2 -0807 %Do this by deleting everything from the network that is not in -0808 %metsToCheck and that is not exchange metabolites -0809 I=false(size(S,1),1); -0810 I(1:nEM)=true; -0811 I(metsToCheck)=true; -0812 S=S(I,:); -0813 end -0814 -0815 em=false(size(S,1),1); -0816 em(1:nEM)=true; -0817 -0818 %Construct a matrix in which the reversible reactions are inverted -0819 I=sum(S>2,1) | sum(S>2,1); -0820 revS=S; -0821 revS(:,I)=revS(:,I)*-1; -0822 -0823 %First calculate the ones that are ok -0824 %Produced in 2 rxns, is exchange, is not used at all, is produced in -0825 %non-reversible, involved in more than 1 reversible reactions -0826 connected=sum(S>0,2)>1 | em | sum(S~=0,2)==0 | sum(S(:,~I)>0,2)>0 | sum(S(:,I)~=0,2)>1; -0827 -0828 %Then get the ones that are unconnected because they are never produced -0829 unconnected=sum(S>0 | revS>0,2)==0 & connected==false; -0830 -0831 %Then get the ones that are potentially unconnected -0832 maybeUnconnected=~connected & ~unconnected; -0833 %maybeUnconnected=find(maybeUnconnectedS); -0834 -0835 %The metabolites in maybeUnconnected are involved in one reversible -0836 %reaction and not produced in any other reaction. This means that the -0837 %reactions which have at least one met in maybeUnconnected as reactant and -0838 %one as product are unconnected. The metabolites in maybeUnconnected that -0839 %are present in those reactions are then dead ends -0840 deadRxns=any(S(maybeUnconnected,:)>0) & any(S(maybeUnconnected,:)<0); -0841 -0842 %Get the mets involved in any of those reactions -0843 problematic=any(S(:,deadRxns)~=0,2); -0844 -0845 %If any of these are in the maybeUnconnected list then the metabolite is -0846 %unconnected -0847 unconnected(problematic & maybeUnconnected)=true; -0848 -0849 %Map back to metsToCheck -0850 if nargin>2 -0851 unconnected=metsToCheck(unconnected(nEM+1:end)); -0852 else -0853 unconnected=find(unconnected); -0854 end -0855 end -0856 -0857 %Given a set of unconnected metabolites, this function tries to move each -0858 %gene that could connect any of them, calculates the number of newly -0859 %connected metabolites minus the number of newly disconnected metabolites. -0860 %As some metabolites are very connected, only 25 genes are checked. Genes -0861 %that have a low score in their current compartment are more likely to be -0862 %moved -0863 function [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(S,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS) -0864 %If moveTo is 0 then the gene cannot connect any of the metabolites -0865 moveTo=zeros(numel(model.genes),1); -0866 deltaConnected=zeros(numel(model.genes),1); -0867 -0868 %First get where the metabolites are now -0869 nComps=size(g2c,2); -0870 comps=ceil((unconnected-nEM)/((size(S,1)-nEM)/nComps)); -0871 -0872 %Find the corresponding metabolite indexes if they all were in the default -0873 %compartment -0874 dcIndexes=unique(unconnected-(comps-1)*nMets); -0875 -0876 %Then find them if they were in any other compartment -0877 allIndexes=dcIndexes; -0878 for i=1:nComps-1 -0879 allIndexes=[allIndexes;dcIndexes+nMets*i]; -0880 end -0881 -0882 %Also check which reversible reactions that could be used -0883 I=sum(S>2,1) | sum(S>2,1); -0884 revS=S; -0885 revS(:,I)=revS(:,I)*-1; -0886 -0887 %Find all reactions that could make any of the unconnected metabolites in -0888 %some other compartment -0889 newMets=setdiff(allIndexes,unconnected); -0890 [~, potential]=find(S(newMets,:)>0 | revS(newMets,:)>0); -0891 potential(potential<=nER | potential>nER+nRxns*nComps)=[]; %No exchange rxns or transport rxns -0892 -0893 %Map J to the real metabolic reactions in model -0894 rxnComps=ceil((potential-nER)/(nRxns)); +0799 %Update the reactions +0800 [I, J, K]=find(S(:,oldRxns)); +0801 I=I+metChange; +0802 +0803 %Move the reactions +0804 S(:,oldRxns)=0; +0805 S(sub2ind(size(S),I,newRxns(J)))=K; +0806 end +0807 +0808 %Finds which metabolites are unconnected, in the sense that they are never +0809 %a product or only a product in a reversible reaction where one reactant is +0810 %only a product in the opposite direction of that reaction. This function +0811 %ignores exchange metabolites. Returns a vector of metabolite indexes. +0812 %metsToCheck is an array of metabolite indexes to check for connectivity. +0813 %If not supplied then all metabolites are checked +0814 function unconnected=findUnconnected(S,nEM,metsToCheck) +0815 if nargin>2 +0816 %Do this by deleting everything from the network that is not in +0817 %metsToCheck and that is not exchange metabolites +0818 I=false(size(S,1),1); +0819 I(1:nEM)=true; +0820 I(metsToCheck)=true; +0821 S=S(I,:); +0822 end +0823 +0824 em=false(size(S,1),1); +0825 em(1:nEM)=true; +0826 +0827 %Construct a matrix in which the reversible reactions are inverted +0828 I=sum(S>2,1) | sum(S>2,1); +0829 revS=S; +0830 revS(:,I)=revS(:,I)*-1; +0831 +0832 %First calculate the ones that are ok +0833 %Produced in 2 rxns, is exchange, is not used at all, is produced in +0834 %non-reversible, involved in more than 1 reversible reactions +0835 connected=sum(S>0,2)>1 | em | sum(S~=0,2)==0 | sum(S(:,~I)>0,2)>0 | sum(S(:,I)~=0,2)>1; +0836 +0837 %Then get the ones that are unconnected because they are never produced +0838 unconnected=sum(S>0 | revS>0,2)==0 & connected==false; +0839 +0840 %Then get the ones that are potentially unconnected +0841 maybeUnconnected=~connected & ~unconnected; +0842 %maybeUnconnected=find(maybeUnconnectedS); +0843 +0844 %The metabolites in maybeUnconnected are involved in one reversible +0845 %reaction and not produced in any other reaction. This means that the +0846 %reactions which have at least one met in maybeUnconnected as reactant and +0847 %one as product are unconnected. The metabolites in maybeUnconnected that +0848 %are present in those reactions are then dead ends +0849 deadRxns=any(S(maybeUnconnected,:)>0) & any(S(maybeUnconnected,:)<0); +0850 +0851 %Get the mets involved in any of those reactions +0852 problematic=any(S(:,deadRxns)~=0,2); +0853 +0854 %If any of these are in the maybeUnconnected list then the metabolite is +0855 %unconnected +0856 unconnected(problematic & maybeUnconnected)=true; +0857 +0858 %Map back to metsToCheck +0859 if nargin>2 +0860 unconnected=metsToCheck(unconnected(nEM+1:end)); +0861 else +0862 unconnected=find(unconnected); +0863 end +0864 end +0865 +0866 %Given a set of unconnected metabolites, this function tries to move each +0867 %gene that could connect any of them, calculates the number of newly +0868 %connected metabolites minus the number of newly disconnected metabolites. +0869 %As some metabolites are very connected, only 25 genes are checked. Genes +0870 %that have a low score in their current compartment are more likely to be +0871 %moved +0872 function [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(S,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS) +0873 %If moveTo is 0 then the gene cannot connect any of the metabolites +0874 moveTo=zeros(numel(model.genes),1); +0875 deltaConnected=zeros(numel(model.genes),1); +0876 +0877 %First get where the metabolites are now +0878 nComps=size(g2c,2); +0879 comps=ceil((unconnected-nEM)/((size(S,1)-nEM)/nComps)); +0880 +0881 %Find the corresponding metabolite indexes if they all were in the default +0882 %compartment +0883 dcIndexes=unique(unconnected-(comps-1)*nMets); +0884 +0885 %Then find them if they were in any other compartment +0886 allIndexes=dcIndexes; +0887 for i=1:nComps-1 +0888 allIndexes=[allIndexes;dcIndexes+nMets*i]; +0889 end +0890 +0891 %Also check which reversible reactions that could be used +0892 I=sum(S>2,1) | sum(S>2,1); +0893 revS=S; +0894 revS(:,I)=revS(:,I)*-1; 0895 -0896 %Find the corresponding reaction indexes if they all were in the default -0897 %compartment -0898 dcRxnIndexes=potential-(rxnComps-1)*nRxns; -0899 -0900 %Get the genes for those reactions -0901 genes=find(sum(model.rxnGeneMat(dcRxnIndexes,:)>0,1)); -0902 -0903 %For some cases there can be very many reactions to connect something. This -0904 %is in particular true in the beginning of the optimization if, say, ATP is -0905 %unconnected. Therefore limit the number of genes to be checked to 25. -0906 %Weigh so that genes with bad scores in their current compartment are more -0907 %likely to be moved. +0896 %Find all reactions that could make any of the unconnected metabolites in +0897 %some other compartment +0898 newMets=setdiff(allIndexes,unconnected); +0899 [~, potential]=find(S(newMets,:)>0 | revS(newMets,:)>0); +0900 potential(potential<=nER | potential>nER+nRxns*nComps)=[]; %No exchange rxns or transport rxns +0901 +0902 %Map J to the real metabolic reactions in model +0903 rxnComps=ceil((potential-nER)/(nRxns)); +0904 +0905 %Find the corresponding reaction indexes if they all were in the default +0906 %compartment +0907 dcRxnIndexes=potential-(rxnComps-1)*nRxns; 0908 -0909 %Get scores for these genes -0910 [~, J]=find(g2c(genes,:)); +0909 %Get the genes for those reactions +0910 genes=find(sum(model.rxnGeneMat(dcRxnIndexes,:)>0,1)); 0911 -0912 %Add a small weight so that genes in their best compartment could be moved -0913 %as well -0914 geneScores=GSS.scores(sub2ind(size(g2c),genes(:),J)); -0915 modGeneScores=1.1-geneScores; -0916 if numel(genes)>25 -0917 rGenes=genes(randsample(numel(genes),min(numel(genes),25),true,modGeneScores)); -0918 -0919 %The sampling with weights could give duplicates -0920 rGenes=unique(rGenes); -0921 -0922 %Reorder the geneScores to match -0923 [~, I]=ismember(rGenes,genes); -0924 geneScores=geneScores(I); -0925 genes=rGenes; -0926 end -0927 for i=1:numel(genes) -0928 %Since one gene is moved at a time, only metabolites involved in any of -0929 %the reactions for that gene can become unconnected. This helps to -0930 %speed up the algorithm. First get all involved reactions in the -0931 %default compartment -0932 rxns=find(model.rxnGeneMat(:,genes(i))); -0933 -0934 %Then get their mets -0935 mets=find(sum(model.S(:,rxns)~=0,2)>0); -0936 -0937 %Then get their indexes in all compartments -0938 allIndexes=mets; -0939 for j=1:nComps-1 -0940 allIndexes=[allIndexes;mets+nMets*j]; -0941 end +0912 %For some cases there can be very many reactions to connect something. This +0913 %is in particular true in the beginning of the optimization if, say, ATP is +0914 %unconnected. Therefore limit the number of genes to be checked to 25. +0915 %Weigh so that genes with bad scores in their current compartment are more +0916 %likely to be moved. +0917 +0918 %Get scores for these genes +0919 [~, J]=find(g2c(genes,:)); +0920 +0921 %Add a small weight so that genes in their best compartment could be moved +0922 %as well +0923 geneScores=GSS.scores(sub2ind(size(g2c),genes(:),J)); +0924 modGeneScores=1.1-geneScores; +0925 if numel(genes)>25 +0926 rGenes=genes(randsample(numel(genes),min(numel(genes),25),true,modGeneScores)); +0927 +0928 %The sampling with weights could give duplicates +0929 rGenes=unique(rGenes); +0930 +0931 %Reorder the geneScores to match +0932 [~, I]=ismember(rGenes,genes); +0933 geneScores=geneScores(I); +0934 genes=rGenes; +0935 end +0936 for i=1:numel(genes) +0937 %Since one gene is moved at a time, only metabolites involved in any of +0938 %the reactions for that gene can become unconnected. This helps to +0939 %speed up the algorithm. First get all involved reactions in the +0940 %default compartment +0941 rxns=find(model.rxnGeneMat(:,genes(i))); 0942 -0943 %Check which of the unconnected metabolites that these reactions -0944 %correspond to. This could have been done earlier, but it is fast. The -0945 %reversibility check is skipped because it is unlikely to be an issue -0946 %here. Worst case is that the gene is tested once to much -0947 [I, ~]=find(model.S(:,rxns)); -0948 moveToComps=unique(comps(ismember(dcIndexes,I))); -0949 -0950 %Try to move the gene to each of the compartments -0951 bestMove=-inf; -0952 bestComp=[]; -0953 for j=1:numel(moveToComps) -0954 newS=moveGene(S,model,g2c,genes(i),moveToComps(j),nRxns,nMets); -0955 -0956 %Check how many metabolites that are unconnected after moving the -0957 %gene -0958 dConnected=numel(unconnected)-numel(findUnconnected(newS,nEM,[allIndexes;unconnected])); -0959 if dConnected>bestMove -0960 bestMove=dConnected; -0961 bestComp=moveToComps(j); -0962 end -0963 end -0964 -0965 %Add the difference in connectivity and where the genes should be moved -0966 moveTo(genes(i))=bestComp; -0967 deltaConnected(genes(i))=bestMove; -0968 end -0969 -0970 %Finish up -0971 geneIndex=genes(:); -0972 moveTo=moveTo(geneIndex); -0973 deltaConnected=deltaConnected(geneIndex); -0974 deltaScore=GSS.scores(sub2ind(size(g2c),geneIndex(:),moveTo))-geneScores; -0975 end -0976 -0977 %Small function to add a transport reactions between two metabolites. -0978 %Transport reactions are written as having a coefficient 2.0 for both -0979 %reactant and product. This is not a "real" reaction, but since all normal -0980 %reactions have coefficient -1/1 or -10/10 it is a compact way of writing -0981 %it -0982 function S=addTransport(S,nRxns,nER,nMets,nEM,nComps,metA,metB) -0983 mets=[metA;metB]; -0984 %Find the current compartments for the metabolites -0985 comps=ceil((mets-nEM)/((size(S,1)-nEM)/nComps)); -0986 -0987 if sum(comps==1)~=1 -0988 EM='Tried to create a transport reaction from a non-default compartment'; -0989 dispEM(EM); -0990 end -0991 -0992 %Calculate the reaction index -0993 rIndex=(nER+nRxns*nComps)+mets(comps~=1)-nEM-nMets; -0994 -0995 S(mets,rIndex)=2; -0996 end -0997 -0998 %Scores a network based on the localization of the genes and the number of -0999 %transporter reactions used -1000 function [score, geneScore, transportCost]=scoreModel(S,g2c,GSS,transportCost) -1001 [I, J]=find(g2c); -1002 geneScore=sum(GSS.scores(sub2ind(size(g2c),I,J))); -1003 [I, ~]=find(S==2); -1004 I=unique(I); -1005 transportCost=sum(transportCost(I)); -1006 score=geneScore-transportCost; -1007 end -1008 -1009 % To avoid dependency on stats toolbox, use this alternative implementation -1010 % of randsample, source: -1011 % https://github.com/gpeyre/numerical-tours/blob/dacee30081c04ef5f67b26b387ead85f2b193af9/matlab/toolbox_signal/randsample.m -1012 function y = randsample(n, k, replace, w) -1013 %RANDSAMPLE Random sample, with or without replacement. -1014 % Y = RANDSAMPLE(N,K) returns Y as a vector of K values sampled uniformly -1015 % at random, without replacement, from the integers 1:N. -1016 % -1017 % Y = RANDSAMPLE(POPULATION,K) returns K values sampled uniformly at -1018 % random, without replacement, from the values in the vector POPULATION. -1019 % -1020 % Y = RANDSAMPLE(...,REPLACE) returns a sample taken with replacement if -1021 % REPLACE is true, or without replacement if REPLACE is false (the default). -1022 % -1023 % Y = RANDSAMPLE(...,true,W) returns a weighted sample, using positive -1024 % weights W, taken with replacement. W is often a vector of probabilities. -1025 % This function does not support weighted sampling without replacement. -1026 % -1027 % Example: Generate a random sequence of the characters ACGT, with -1028 % replacement, according to specified probabilities. -1029 % -1030 % R = randsample('ACGT',48,true,[0.15 0.35 0.35 0.15]) +0943 %Then get their mets +0944 mets=find(sum(model.S(:,rxns)~=0,2)>0); +0945 +0946 %Then get their indexes in all compartments +0947 allIndexes=mets; +0948 for j=1:nComps-1 +0949 allIndexes=[allIndexes;mets+nMets*j]; +0950 end +0951 +0952 %Check which of the unconnected metabolites that these reactions +0953 %correspond to. This could have been done earlier, but it is fast. The +0954 %reversibility check is skipped because it is unlikely to be an issue +0955 %here. Worst case is that the gene is tested once to much +0956 [I, ~]=find(model.S(:,rxns)); +0957 moveToComps=unique(comps(ismember(dcIndexes,I))); +0958 +0959 %Try to move the gene to each of the compartments +0960 bestMove=-inf; +0961 bestComp=[]; +0962 for j=1:numel(moveToComps) +0963 newS=moveGene(S,model,g2c,genes(i),moveToComps(j),nRxns,nMets); +0964 +0965 %Check how many metabolites that are unconnected after moving the +0966 %gene +0967 dConnected=numel(unconnected)-numel(findUnconnected(newS,nEM,[allIndexes;unconnected])); +0968 if dConnected>bestMove +0969 bestMove=dConnected; +0970 bestComp=moveToComps(j); +0971 end +0972 end +0973 +0974 %Add the difference in connectivity and where the genes should be moved +0975 moveTo(genes(i))=bestComp; +0976 deltaConnected(genes(i))=bestMove; +0977 end +0978 +0979 %Finish up +0980 geneIndex=genes(:); +0981 moveTo=moveTo(geneIndex); +0982 deltaConnected=deltaConnected(geneIndex); +0983 deltaScore=GSS.scores(sub2ind(size(g2c),geneIndex(:),moveTo))-geneScores; +0984 end +0985 +0986 %Small function to add a transport reactions between two metabolites. +0987 %Transport reactions are written as having a coefficient 2.0 for both +0988 %reactant and product. This is not a "real" reaction, but since all normal +0989 %reactions have coefficient -1/1 or -10/10 it is a compact way of writing +0990 %it +0991 function S=addTransport(S,nRxns,nER,nMets,nEM,nComps,metA,metB) +0992 mets=[metA;metB]; +0993 %Find the current compartments for the metabolites +0994 comps=ceil((mets-nEM)/((size(S,1)-nEM)/nComps)); +0995 +0996 if sum(comps==1)~=1 +0997 EM='Tried to create a transport reaction from a non-default compartment'; +0998 dispEM(EM); +0999 end +1000 +1001 %Calculate the reaction index +1002 rIndex=(nER+nRxns*nComps)+mets(comps~=1)-nEM-nMets; +1003 +1004 S(mets,rIndex)=2; +1005 end +1006 +1007 %Scores a network based on the localization of the genes and the number of +1008 %transporter reactions used +1009 function [score, geneScore, transportCost]=scoreModel(S,g2c,GSS,transportCost) +1010 [I, J]=find(g2c); +1011 geneScore=sum(GSS.scores(sub2ind(size(g2c),I,J))); +1012 [I, ~]=find(S==2); +1013 I=unique(I); +1014 transportCost=sum(transportCost(I)); +1015 score=geneScore-transportCost; +1016 end +1017 +1018 % To avoid dependency on stats toolbox, use this alternative implementation +1019 % of randsample, source: +1020 % https://github.com/gpeyre/numerical-tours/blob/dacee30081c04ef5f67b26b387ead85f2b193af9/matlab/toolbox_signal/randsample.m +1021 function y = randsample(n, k, replace, w) +1022 %RANDSAMPLE Random sample, with or without replacement. +1023 % Y = RANDSAMPLE(N,K) returns Y as a vector of K values sampled uniformly +1024 % at random, without replacement, from the integers 1:N. +1025 % +1026 % Y = RANDSAMPLE(POPULATION,K) returns K values sampled uniformly at +1027 % random, without replacement, from the values in the vector POPULATION. +1028 % +1029 % Y = RANDSAMPLE(...,REPLACE) returns a sample taken with replacement if +1030 % REPLACE is true, or without replacement if REPLACE is false (the default). 1031 % -1032 % See also RAND, RANDPERM. -1033 -1034 % Copyright 1993-2008 The MathWorks, Inc. -1035 % $Revision: 1.1.4.3 $ $Date: 2008/12/01 08:09:34 $ -1036 -1037 if nargin < 2 -1038 error('stats:randsample:TooFewInputs','Requires two input arguments.'); -1039 elseif numel(n) == 1 -1040 population = []; -1041 else -1042 population = n; -1043 n = numel(population); -1044 if length(population)~=n -1045 error('stats:randsample:BadPopulation','POPULATION must be a vector.'); -1046 end -1047 end -1048 -1049 if nargin < 3 -1050 replace = false; -1051 end -1052 -1053 if nargin < 4 -1054 w = []; -1055 elseif ~isempty(w) -1056 if length(w) ~= n -1057 if isempty(population) -1058 error('stats:randsample:InputSizeMismatch',... -1059 'W must have length equal to N.'); -1060 else -1061 error('stats:randsample:InputSizeMismatch',... -1062 'W must have the same length as the population.'); -1063 end -1064 else -1065 p = w(:)' / sum(w); -1066 end -1067 end -1068 -1069 switch replace -1070 -1071 % Sample with replacement -1072 case {true, 'true', 1} -1073 if isempty(w) -1074 y = ceil(n .* rand(k,1)); -1075 else -1076 [dum, y] = histc(rand(k,1),[0 cumsum(p)]); -1077 end -1078 -1079 % Sample without replacement -1080 case {false, 'false', 0} -1081 if k > n -1082 if isempty(population) -1083 error('stats:randsample:SampleTooLarge',... -1084 'K must be less than or equal to N for sampling without replacement.'); -1085 else -1086 error('stats:randsample:SampleTooLarge',... -1087 'K must be less than or equal to the population size.'); -1088 end -1089 end -1090 -1091 if isempty(w) -1092 % If the sample is a sizeable fraction of the population, -1093 % just randomize the whole population (which involves a full -1094 % sort of n random values), and take the first k. -1095 if 4*k > n -1096 rp = randperm(n); -1097 y = rp(1:k); -1098 -1099 % If the sample is a small fraction of the population, a full sort -1100 % is wasteful. Repeatedly sample with replacement until there are -1101 % k unique values. -1102 else -1103 x = zeros(1,n); % flags -1104 sumx = 0; -1105 while sumx < k -1106 x(ceil(n * rand(1,k-sumx))) = 1; % sample w/replacement -1107 sumx = sum(x); % count how many unique elements so far -1108 end -1109 y = find(x > 0); -1110 y = y(randperm(k)); -1111 end -1112 else -1113 error('stats:randsample:NoWeighting',... -1114 'Weighted sampling without replacement is not supported.'); -1115 end -1116 otherwise -1117 error('stats:randsample:BadReplaceValue',... -1118 'REPLACE must be either true or false.'); -1119 end -1120 -1121 if ~isempty(population) -1122 y = population(y); -1123 else -1124 y = y(:); -1125 end -1126 end +1032 % Y = RANDSAMPLE(...,true,W) returns a weighted sample, using positive +1033 % weights W, taken with replacement. W is often a vector of probabilities. +1034 % This function does not support weighted sampling without replacement. +1035 % +1036 % Example: Generate a random sequence of the characters ACGT, with +1037 % replacement, according to specified probabilities. +1038 % +1039 % R = randsample('ACGT',48,true,[0.15 0.35 0.35 0.15]) +1040 % +1041 % See also RAND, RANDPERM. +1042 +1043 % Copyright 1993-2008 The MathWorks, Inc. +1044 % $Revision: 1.1.4.3 $ $Date: 2008/12/01 08:09:34 $ +1045 +1046 if nargin < 2 +1047 error('stats:randsample:TooFewInputs','Requires two input arguments.'); +1048 elseif numel(n) == 1 +1049 population = []; +1050 else +1051 population = n; +1052 n = numel(population); +1053 if length(population)~=n +1054 error('stats:randsample:BadPopulation','POPULATION must be a vector.'); +1055 end +1056 end +1057 +1058 if nargin < 3 +1059 replace = false; +1060 end +1061 +1062 if nargin < 4 +1063 w = []; +1064 elseif ~isempty(w) +1065 if length(w) ~= n +1066 if isempty(population) +1067 error('stats:randsample:InputSizeMismatch',... +1068 'W must have length equal to N.'); +1069 else +1070 error('stats:randsample:InputSizeMismatch',... +1071 'W must have the same length as the population.'); +1072 end +1073 else +1074 p = w(:)' / sum(w); +1075 end +1076 end +1077 +1078 switch replace +1079 +1080 % Sample with replacement +1081 case {true, 'true', 1} +1082 if isempty(w) +1083 y = ceil(n .* rand(k,1)); +1084 else +1085 [dum, y] = histc(rand(k,1),[0 cumsum(p)]); +1086 end +1087 +1088 % Sample without replacement +1089 case {false, 'false', 0} +1090 if k > n +1091 if isempty(population) +1092 error('stats:randsample:SampleTooLarge',... +1093 'K must be less than or equal to N for sampling without replacement.'); +1094 else +1095 error('stats:randsample:SampleTooLarge',... +1096 'K must be less than or equal to the population size.'); +1097 end +1098 end +1099 +1100 if isempty(w) +1101 % If the sample is a sizeable fraction of the population, +1102 % just randomize the whole population (which involves a full +1103 % sort of n random values), and take the first k. +1104 if 4*k > n +1105 rp = randperm(n); +1106 y = rp(1:k); +1107 +1108 % If the sample is a small fraction of the population, a full sort +1109 % is wasteful. Repeatedly sample with replacement until there are +1110 % k unique values. +1111 else +1112 x = zeros(1,n); % flags +1113 sumx = 0; +1114 while sumx < k +1115 x(ceil(n * rand(1,k-sumx))) = 1; % sample w/replacement +1116 sumx = sum(x); % count how many unique elements so far +1117 end +1118 y = find(x > 0); +1119 y = y(randperm(k)); +1120 end +1121 else +1122 error('stats:randsample:NoWeighting',... +1123 'Weighted sampling without replacement is not supported.'); +1124 end +1125 otherwise +1126 error('stats:randsample:BadReplaceValue',... +1127 'REPLACE must be either true or false.'); +1128 end +1129 +1130 if ~isempty(population) +1131 y = population(y); +1132 else +1133 y = y(:); +1134 end +1135 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/printOrange.html b/doc/core/printOrange.html index b4c7e653..20753a03 100644 --- a/doc/core/printOrange.html +++ b/doc/core/printOrange.html @@ -24,7 +24,7 @@

PURPOSE ^printOrange

SYNOPSIS ^

-
function printOrange(stringToPrint)
+
function orangeString = printOrange(stringToPrint)

DESCRIPTION ^

 printOrange
@@ -49,7 +49,7 @@ 

CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

-
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/removeReactions.html b/doc/core/removeReactions.html index e74f0152..efc52428 100644 --- a/doc/core/removeReactions.html +++ b/doc/core/removeReactions.html @@ -192,23 +192,27 @@

SOURCE CODE ^if isfield(reducedModel,'geneShortNames') 0130 reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); 0131 end -0132 -0133 if isfield(reducedModel,'geneMiriams') -0134 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); +0132 +0133 if isfield(reducedModel,'proteins') +0134 reducedModel.proteins=reducedModel.proteins(toKeep); 0135 end 0136 -0137 if isfield(reducedModel,'geneFrom') -0138 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); +0137 if isfield(reducedModel,'geneMiriams') +0138 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); 0139 end 0140 -0141 if isfield(reducedModel,'geneComps') -0142 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0141 if isfield(reducedModel,'geneFrom') +0142 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); 0143 end -0144 end -0145 else -0146 reducedModel=model; -0147 end -0148 end

+0144 +0145 if isfield(reducedModel,'geneComps') +0146 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0147 end +0148 end +0149 else +0150 reducedModel=model; +0151 end +0152 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/simplifyModel.html b/doc/core/simplifyModel.html index 01d16cc8..f6f4baf6 100644 --- a/doc/core/simplifyModel.html +++ b/doc/core/simplifyModel.html @@ -299,102 +299,105 @@

SOURCE CODE ^if isfield(reducedModel,'geneShortNames') 0222 reducedModel.geneShortNames={}; 0223 end -0224 if isfield(reducedModel,'geneMiriams') -0225 reducedModel.geneMiriams={}; +0224 if isfield(reducedModel,'proteins') +0225 reducedModel.proteins={}; 0226 end -0227 if isfield(reducedModel,'geneComps') -0228 reducedModel.geneComps=[]; +0227 if isfield(reducedModel,'geneMiriams') +0228 reducedModel.geneMiriams={}; 0229 end -0230 -0231 %Convert the model to irreversible -0232 irrevModel=convertToIrrev(reducedModel); +0230 if isfield(reducedModel,'geneComps') +0231 reducedModel.geneComps=[]; +0232 end 0233 -0234 %Loop through and iteratively group linear reactions -0235 while 1 -0236 %Get the banned reaction indexes. Note that the indexes will change -0237 %in each iteration, but the names will not as they won't be merged -0238 %with any other reaction -0239 bannedIndexes=getIndexes(irrevModel,reservedRxns,'rxns'); -0240 -0241 %Select all metabolites that are only present as reactants/products -0242 %in one reaction -0243 singleNegative=find(sum(irrevModel.S'<0)==1); -0244 singlePositive=find(sum(irrevModel.S'>0)==1); -0245 -0246 %Retrieve the common metabolites -0247 common=intersect(singleNegative,singlePositive); +0234 %Convert the model to irreversible +0235 irrevModel=convertToIrrev(reducedModel); +0236 +0237 %Loop through and iteratively group linear reactions +0238 while 1 +0239 %Get the banned reaction indexes. Note that the indexes will change +0240 %in each iteration, but the names will not as they won't be merged +0241 %with any other reaction +0242 bannedIndexes=getIndexes(irrevModel,reservedRxns,'rxns'); +0243 +0244 %Select all metabolites that are only present as reactants/products +0245 %in one reaction +0246 singleNegative=find(sum(irrevModel.S'<0)==1); +0247 singlePositive=find(sum(irrevModel.S'>0)==1); 0248 -0249 mergedSome=false; -0250 -0251 %Loop through each of them and see if the reactions should be -0252 %merged -0253 for i=1:numel(common) -0254 involvedRxns=find(irrevModel.S(common(i),:)); -0255 -0256 %Check so that one or both of the reactions haven't been merged -0257 %already -0258 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) -0259 %Calculate how many times the second reaction has to be -0260 %multiplied before being merged with the first -0261 stoichRatio=abs(irrevModel.S(common(i),involvedRxns(1))/irrevModel.S(common(i),involvedRxns(2))); -0262 -0263 %Add the second to the first -0264 irrevModel.S(:,involvedRxns(1))=irrevModel.S(:,involvedRxns(1))+irrevModel.S(:,involvedRxns(2))*stoichRatio; +0249 %Retrieve the common metabolites +0250 common=intersect(singleNegative,singlePositive); +0251 +0252 mergedSome=false; +0253 +0254 %Loop through each of them and see if the reactions should be +0255 %merged +0256 for i=1:numel(common) +0257 involvedRxns=find(irrevModel.S(common(i),:)); +0258 +0259 %Check so that one or both of the reactions haven't been merged +0260 %already +0261 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) +0262 %Calculate how many times the second reaction has to be +0263 %multiplied before being merged with the first +0264 stoichRatio=abs(irrevModel.S(common(i),involvedRxns(1))/irrevModel.S(common(i),involvedRxns(2))); 0265 -0266 %Clear the second reaction -0267 irrevModel.S(:,involvedRxns(2))=0; +0266 %Add the second to the first +0267 irrevModel.S(:,involvedRxns(1))=irrevModel.S(:,involvedRxns(1))+irrevModel.S(:,involvedRxns(2))*stoichRatio; 0268 -0269 %This is to prevent numerical issues. It should be 0 -0270 %already -0271 irrevModel.S(common(i),involvedRxns(1))=0; -0272 -0273 %At this point the second reaction is certain to be deleted -0274 %in a later step and can therefore be ignored +0269 %Clear the second reaction +0270 irrevModel.S(:,involvedRxns(2))=0; +0271 +0272 %This is to prevent numerical issues. It should be 0 +0273 %already +0274 irrevModel.S(common(i),involvedRxns(1))=0; 0275 -0276 %Recalculate the bounds for the new reaction. This can be -0277 %problematic since the scale of the bounds may change -0278 %dramatically. Let the most constraining reaction determine -0279 %the new bound -0280 lb1=irrevModel.lb(involvedRxns(1)); -0281 lb2=irrevModel.lb(involvedRxns(2)); -0282 ub1=irrevModel.ub(involvedRxns(1)); -0283 ub2=irrevModel.ub(involvedRxns(2)); -0284 -0285 if lb2~=-inf -0286 irrevModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); -0287 end -0288 if ub2~=inf -0289 irrevModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0276 %At this point the second reaction is certain to be deleted +0277 %in a later step and can therefore be ignored +0278 +0279 %Recalculate the bounds for the new reaction. This can be +0280 %problematic since the scale of the bounds may change +0281 %dramatically. Let the most constraining reaction determine +0282 %the new bound +0283 lb1=irrevModel.lb(involvedRxns(1)); +0284 lb2=irrevModel.lb(involvedRxns(2)); +0285 ub1=irrevModel.ub(involvedRxns(1)); +0286 ub2=irrevModel.ub(involvedRxns(2)); +0287 +0288 if lb2~=-inf +0289 irrevModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); 0290 end -0291 -0292 %Then recalculate the objective coefficient. The resulting -0293 %coefficient is the weighted sum of the previous -0294 irrevModel.c(involvedRxns(1))=irrevModel.c(involvedRxns(1))+irrevModel.c(involvedRxns(2))*stoichRatio; -0295 -0296 %Iterate again -0297 mergedSome=true; -0298 end -0299 end -0300 -0301 %All possible reactions merged -0302 if mergedSome==false -0303 break; -0304 end -0305 -0306 %Now delete all reactions that involve no metabolites -0307 I=find(sum(irrevModel.S~=0)==0); +0291 if ub2~=inf +0292 irrevModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0293 end +0294 +0295 %Then recalculate the objective coefficient. The resulting +0296 %coefficient is the weighted sum of the previous +0297 irrevModel.c(involvedRxns(1))=irrevModel.c(involvedRxns(1))+irrevModel.c(involvedRxns(2))*stoichRatio; +0298 +0299 %Iterate again +0300 mergedSome=true; +0301 end +0302 end +0303 +0304 %All possible reactions merged +0305 if mergedSome==false +0306 break; +0307 end 0308 -0309 %Remove reactions -0310 irrevModel=removeReactions(irrevModel,I); +0309 %Now delete all reactions that involve no metabolites +0310 I=find(sum(irrevModel.S~=0)==0); 0311 -0312 %Remove metabolites -0313 notInUse=sum(irrevModel.S~=0,2)==0; -0314 irrevModel=removeMets(irrevModel,notInUse); -0315 end -0316 -0317 reducedModel=irrevModel; -0318 end -0319 end +0312 %Remove reactions +0313 irrevModel=removeReactions(irrevModel,I); +0314 +0315 %Remove metabolites +0316 notInUse=sum(irrevModel.S~=0,2)==0; +0317 irrevModel=removeMets(irrevModel,notInUse); +0318 end +0319 +0320 reducedModel=irrevModel; +0321 end +0322 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/external/kegg/getKEGGModelForOrganism.html b/doc/external/kegg/getKEGGModelForOrganism.html index 7010b5eb..e5ec6a91 100644 --- a/doc/external/kegg/getKEGGModelForOrganism.html +++ b/doc/external/kegg/getKEGGModelForOrganism.html @@ -63,15 +63,12 @@

DESCRIPTION ^SOURCE CODE ^% The hidden Markov models as generated in 2b or 0039 % downloaded from BioMet Toolbox (see below) 0040 % The final directory in dataDir should be styled as -0041 % proXXX_keggYY or eukXXX_keggYY, indicating whether +0041 % prok90_kegg105 or euk90_kegg105, indicating whether 0042 % the HMMs were trained on pro- or eukaryotic -0043 % sequences, using a sequence similarity threshold of -0044 % XXX %, fitting the KEGG version YY. E.g. -0045 % euk90_kegg105. (optional, see note about fastaFile. Note -0046 % that in order to rebuild the KEGG model from a -0047 % database dump, as opposed to using the version -0048 % supplied with RAVEN, you would still need to supply -0049 % this) -0050 % outDir directory to save the results from the quering of -0051 % the hidden Markov models. The output is specific -0052 % for the input sequences and the settings used. It -0053 % is stored in this manner so that the function can -0054 % continue if interrupted or if it should run in -0055 % parallel. Be careful not to leave output files from -0056 % different organisms or runs with different settings -0057 % in the same folder. They will not be overwritten -0058 % (optional, default is a temporary dir where all *.out -0059 % files are deleted before and after doing the -0060 % reconstruction) -0061 % keepSpontaneous include reactions labeled as "spontaneous". (optional, -0062 % default true) -0063 % keepUndefinedStoich include reactions in the form n A <=> n+1 A. These -0064 % will be dealt with as two separate metabolites -0065 % (optional, default true) -0066 % keepIncomplete include reactions which have been labelled as -0067 % "incomplete", "erroneous" or "unclear" (optional, -0068 % default true) -0069 % keepGeneral include reactions which have been labelled as -0070 % "general reaction". These are reactions on the form -0071 % "an aldehyde <=> an alcohol", and are therefore -0072 % unsuited for modelling purposes. Note that not all -0073 % reactions have this type of annotation, and the -0074 % script will therefore not be able to remove all -0075 % such reactions (optional, default false) -0076 % cutOff significance score from HMMer needed to assign -0077 % genes to a KO (optional, default 10^-50) -0078 % minScoreRatioG a gene is only assigned to KOs for which the score -0079 % is >=log(score)/log(best score) for that gene. This -0080 % is to prevent that a gene which clearly belongs to -0081 % one KO is assigned also to KOs with much lower -0082 % scores (optional, default 0.8 (lower is less strict)) -0083 % minScoreRatioKO ignore genes in a KO if their score is -0084 % <log(score)/log(best score in KO). This is to -0085 % "prune" KOs which have many genes and where some are -0086 % clearly a better fit (optional, default 0.3 (lower is -0087 % less strict)) -0088 % maxPhylDist -1: only use sequences from the same domain -0089 % (Prokaryota, Eukaryota) -0090 % other (positive) value: only use sequences for -0091 % organisms where the phylogenetic distance is at the -0092 % most this large (as calculated in getPhylDist) -0093 % (optional, default Inf, which means that all sequences -0094 % will be used) -0095 % nSequences for each KO, use up to this many sequences from the -0096 % most closely related species. This is mainly to -0097 % speed up the alignment process for KOs with very -0098 % many genes. This subsampling is performed before -0099 % running CD-HIT (optional, default inf) -0100 % seqIdentity sequence identity threshold in CD-HIT, referred as -0101 % "global sequence identity" in CD-HIT User's Guide. -0102 % If -1 is provided, CD-HIT is skipped (optional, default 0.9) -0103 % globalModel structure containing both model and KOModel -0104 % structures as generated by getModelFromKEGG. These -0105 % will otherwise be loaded by via getModelFromKEGG. -0106 % Providing globalKEGGmodel can speed up model -0107 % generation if getKEGGModelForOrganism is run -0108 % multiple times for different strains. Example: -0109 % [globalModel.model,globalModel.KOModel] = getModelFromKEGG; -0110 % (optional, default empty, global model is loaded by -0111 % getModelFromKEGG) +0043 % sequences; using which sequence similarity treshold +0044 % (first set of digits); using which KEGG version +0045 % (second set of digits). (this parameter should +0046 % ALWAYS be provided) +0047 % outDir directory to save the results from the quering of +0048 % the hidden Markov models. The output is specific +0049 % for the input sequences and the settings used. It +0050 % is stored in this manner so that the function can +0051 % continue if interrupted or if it should run in +0052 % parallel. Be careful not to leave output files from +0053 % different organisms or runs with different settings +0054 % in the same folder. They will not be overwritten +0055 % (optional, default is a temporary dir where all *.out +0056 % files are deleted before and after doing the +0057 % reconstruction) +0058 % keepSpontaneous include reactions labeled as "spontaneous". (optional, +0059 % default true) +0060 % keepUndefinedStoich include reactions in the form n A <=> n+1 A. These +0061 % will be dealt with as two separate metabolites +0062 % (optional, default true) +0063 % keepIncomplete include reactions which have been labelled as +0064 % "incomplete", "erroneous" or "unclear" (optional, +0065 % default true) +0066 % keepGeneral include reactions which have been labelled as +0067 % "general reaction". These are reactions on the form +0068 % "an aldehyde <=> an alcohol", and are therefore +0069 % unsuited for modelling purposes. Note that not all +0070 % reactions have this type of annotation, and the +0071 % script will therefore not be able to remove all +0072 % such reactions (optional, default false) +0073 % cutOff significance score from HMMer needed to assign +0074 % genes to a KO (optional, default 10^-50) +0075 % minScoreRatioG a gene is only assigned to KOs for which the score +0076 % is >=log(score)/log(best score) for that gene. This +0077 % is to prevent that a gene which clearly belongs to +0078 % one KO is assigned also to KOs with much lower +0079 % scores (optional, default 0.8 (lower is less strict)) +0080 % minScoreRatioKO ignore genes in a KO if their score is +0081 % <log(score)/log(best score in KO). This is to +0082 % "prune" KOs which have many genes and where some are +0083 % clearly a better fit (optional, default 0.3 (lower is +0084 % less strict)) +0085 % maxPhylDist -1: only use sequences from the same domain +0086 % (Prokaryota, Eukaryota) +0087 % other (positive) value: only use sequences for +0088 % organisms where the phylogenetic distance is at the +0089 % most this large (as calculated in getPhylDist) +0090 % (optional, default Inf, which means that all sequences +0091 % will be used) +0092 % nSequences for each KO, use up to this many sequences from the +0093 % most closely related species. This is mainly to +0094 % speed up the alignment process for KOs with very +0095 % many genes. This subsampling is performed before +0096 % running CD-HIT (optional, default inf) +0097 % seqIdentity sequence identity threshold in CD-HIT, referred as +0098 % "global sequence identity" in CD-HIT User's Guide. +0099 % If -1 is provided, CD-HIT is skipped (optional, default 0.9) +0100 % globalModel structure containing both model and KOModel +0101 % structures as generated by getModelFromKEGG. These +0102 % will otherwise be loaded by via getModelFromKEGG. +0103 % Providing globalKEGGmodel can speed up model +0104 % generation if getKEGGModelForOrganism is run +0105 % multiple times for different strains. Example: +0106 % [globalModel.model,globalModel.KOModel] = getModelFromKEGG; +0107 % (optional, default empty, global model is loaded by +0108 % getModelFromKEGG) +0109 % +0110 % Output: +0111 % model the reconstructed model 0112 % -0113 % Output: -0114 % model the reconstructed model -0115 % -0116 % PLEASE READ THIS: The input to this function can be confusing, because -0117 % it is intended to be run in parallel on a cluster or in multiple -0118 % sessions. It therefore saves a lot of intermediate results to storage. -0119 % This also serves the purpose of not having to do redundant -0120 % calculations. This, however, comes with the disadvantage of somewhat -0121 % trickier handling. This is what this function does: -0122 % -0123 % 1a. Loads files from a local KEGG FTP dump and constructs a general -0124 % RAVEN model representing the metabolic network. The functions -0125 % getRxnsFromKEGG, getGenesFromKEGG, getMetsFromKEGG summarise the -0126 % data into 'keggRxns.mat', 'keggGenes.mat' and 'keggMets.mat' files, -0127 % which are later merged into 'keggModel.mat' by getModelFromKEGG -0128 % function. The function getPhylDist generates 'keggPhylDist.mat' -0129 % file. KEGG FTP access requires a <a href="matlab: -0130 % web('http://www.bioinformatics.jp/en/keggftp.html')">license</a>. -0131 % 1b. Generates protein FASTA files from the KEGG FTP dump (see 1a). One -0132 % multi-FASTA file for each KO in KEGG is generated. -0133 % -0134 % The Step 1 has to be re-done every time KEGG updates their database (or -0135 % rather when the updates are large enough to warrant re-running this -0136 % part). Many users would probably never use this feature. -0137 % -0138 % 2a. Filters KO-specific protein sets. This is done by using the -0139 % settings "maxPhylDist" and "nSequences" to control which sequences -0140 % should be used for constructing Hidden Markov models (HMMs), and -0141 % later for matching your sequences to. -0142 % The most common alternatives here would be to use sequences from -0143 % only eukaryotes, only prokaryotes or all sequences in KEGG, but you -0144 % could also play around with the parameters to use e.g. only fungal -0145 % sequences. -0146 % 2b. KO-specific protein FASTA files are re-organised into -0147 % non-redundant protein sets with CD-HIT. The user can only set -0148 % seqIdentity parameter, which corresponds to '-c' parameter in -0149 % CD-HIT, described as "sequence identity threshold". CD-HIT suggsted -0150 % sequence identity specific word_length (-n) parameters are used. -0151 % 2c. Does a multi sequence alignment for multi-FASTA files obtained in -0152 % Step 2b for future use. MAFFT software with automatic selection of -0153 % alignment algorithm is used in this step ('--auto'). -0154 % 2d. Trains hidden Markov models using HMMer for each of the aligned -0155 % KO-specific FASTA files obtained in Step 2c. This is performed with -0156 % 'hmmbuild' using the default settings. -0157 % -0158 % Step 2 may be reasonable to be re-done if the user wants to tweak the -0159 % settings in proteins filtering, clustering, multi sequence alignment or -0160 % HMMs training steps. However, it requires to have KO-specific protein -0161 % FASTA files obtained in Step 1a. As such files are not provided in -0162 % RAVEN and BioMet ToolBox, the user can only generate these files from -0163 % KEGG FTP dump files, so KEGG FTP license is needed. -0164 % -0165 % 3a. Queries the HMMs with sequences for the organism you are making a -0166 % model for. This step uses both the output from step 1a and from 2d. -0167 % This is done with 'hmmsearch' function under default settings. The -0168 % significance threshold value set in 'cutOff' parameter is used -0169 % later when parsing '*.out' files to filter out KO hits with higher -0170 % value than 'cutOff' value. The results with passable E values are -0171 % summarised into KO-gene occurence matrix with E values in -0172 % intersections as 'koGeneMat'. The parameters 'minScoreRatioG' and -0173 % 'minScoreRatioKO' are then applied to 'prune' KO-gene associations -0174 % (see the function descriptions above for more details). The -0175 % intersection values for these 'prunable' associations are converted -0176 % to zeroes. -0177 % 3b. Constructs a model based on the pre-processed KO-gene association -0178 % matrix (koGeneMat). As the full KEGG model already has reaction-KO -0179 % relationships, KOs are converted into the query genes. The final -0180 % draft model contains only these reactions, which are associated -0181 % with KOs from koGeneMat. The reactions without the genes may also -0182 % be included, if the user set keepSpontaneous as 'true'. +0113 % PLEASE READ THIS: The input to this function can be confusing, because +0114 % it is intended to be run in parallel on a cluster or in multiple +0115 % sessions. It therefore saves a lot of intermediate results to storage. +0116 % This also serves the purpose of not having to do redundant +0117 % calculations. This, however, comes with the disadvantage of somewhat +0118 % trickier handling. This is what this function does: +0119 % +0120 % 1a. Loads files from a local KEGG FTP dump and constructs a general +0121 % RAVEN model representing the metabolic network. The functions +0122 % getRxnsFromKEGG, getGenesFromKEGG, getMetsFromKEGG summarise the +0123 % data into 'keggRxns.mat', 'keggGenes.mat' and 'keggMets.mat' files, +0124 % which are later merged into 'keggModel.mat' by getModelFromKEGG +0125 % function. The function getPhylDist generates 'keggPhylDist.mat' +0126 % file. KEGG FTP access requires a <a href="matlab: +0127 % web('http://www.bioinformatics.jp/en/keggftp.html')">license</a>. +0128 % 1b. Generates protein FASTA files from the KEGG FTP dump (see 1a). One +0129 % multi-FASTA file for each KO in KEGG is generated. +0130 % +0131 % The Step 1 has to be re-done every time KEGG updates their database (or +0132 % rather when the updates are large enough to warrant re-running this +0133 % part). Many users would probably never use this feature. +0134 % +0135 % 2a. Filters KO-specific protein sets. This is done by using the +0136 % settings "maxPhylDist" and "nSequences" to control which sequences +0137 % should be used for constructing Hidden Markov models (HMMs), and +0138 % later for matching your sequences to. +0139 % The most common alternatives here would be to use sequences from +0140 % only eukaryotes, only prokaryotes or all sequences in KEGG, but you +0141 % could also play around with the parameters to use e.g. only fungal +0142 % sequences. +0143 % 2b. KO-specific protein FASTA files are re-organised into +0144 % non-redundant protein sets with CD-HIT. The user can only set +0145 % seqIdentity parameter, which corresponds to '-c' parameter in +0146 % CD-HIT, described as "sequence identity threshold". CD-HIT suggsted +0147 % sequence identity specific word_length (-n) parameters are used. +0148 % 2c. Does a multi sequence alignment for multi-FASTA files obtained in +0149 % Step 2b for future use. MAFFT software with automatic selection of +0150 % alignment algorithm is used in this step ('--auto'). +0151 % 2d. Trains hidden Markov models using HMMer for each of the aligned +0152 % KO-specific FASTA files obtained in Step 2c. This is performed with +0153 % 'hmmbuild' using the default settings. +0154 % +0155 % Step 2 may be reasonable to be re-done if the user wants to tweak the +0156 % settings in proteins filtering, clustering, multi sequence alignment or +0157 % HMMs training steps. However, it requires to have KO-specific protein +0158 % FASTA files obtained in Step 1a. As such files are not provided in +0159 % RAVEN and BioMet ToolBox, the user can only generate these files from +0160 % KEGG FTP dump files, so KEGG FTP license is needed. +0161 % +0162 % 3a. Queries the HMMs with sequences for the organism you are making a +0163 % model for. This step uses both the output from step 1a and from 2d. +0164 % This is done with 'hmmsearch' function under default settings. The +0165 % significance threshold value set in 'cutOff' parameter is used +0166 % later when parsing '*.out' files to filter out KO hits with higher +0167 % value than 'cutOff' value. The results with passable E values are +0168 % summarised into KO-gene occurence matrix with E values in +0169 % intersections as 'koGeneMat'. The parameters 'minScoreRatioG' and +0170 % 'minScoreRatioKO' are then applied to 'prune' KO-gene associations +0171 % (see the function descriptions above for more details). The +0172 % intersection values for these 'prunable' associations are converted +0173 % to zeroes. +0174 % 3b. Constructs a model based on the pre-processed KO-gene association +0175 % matrix (koGeneMat). As the full KEGG model already has reaction-KO +0176 % relationships, KOs are converted into the query genes. The final +0177 % draft model contains only these reactions, which are associated +0178 % with KOs from koGeneMat. The reactions without the genes may also +0179 % be included, if the user set keepSpontaneous as 'true'. +0180 % +0181 % The Step 3 is specific to the organism for which the model is +0182 % reconstructed. 0183 % -0184 % The Step 3 is specific to the organism for which the model is -0185 % reconstructed. -0186 % -0187 % In principle the function looks at which output that is already available -0188 % and runs only the parts that are required for step 3. This means -0189 % that (see the definition of the parameters for details): -0190 % -1a is only performed if there are no KEGG model files in the -0191 % RAVEN\external\kegg directory -0192 % -1b is only performed if not all required HMMs OR aligned FASTA files -0193 % OR multi-FASTA files exist in the defined dataDir. This means that this -0194 % step is skipped if the HMMs are downloaded from BioMet Toolbox instead -0195 % (see above). If not all files exist it will try to find -0196 % the KEGG database files in dataDir. -0197 % -2a is only performed if not all required HMMs OR aligned FASTA files -0198 % files exist in the defined dataDir. This means that this step is skipped -0199 % if the HMMs are downloaded from BioMet Toolbox instead (see above). -0200 % -2b is only performed if not all required HMMs exist in the defined -0201 % dataDir. This means that this step is skipped if the FASTA files or -0202 % HMMs are downloaded from BioMet Toolbox instead (see above). -0203 % -3a is performed for the required HMMs for which no corresponding .out -0204 % file exists in outDir. This is just a way to enable the function to be -0205 % run in parallel or to resume if interrupted. -0206 % -3b is always performed. +0184 % In principle the function looks at which output that is already available +0185 % and runs only the parts that are required for step 3. This means +0186 % that (see the definition of the parameters for details): +0187 % -1a is only performed if there are no KEGG model files in the +0188 % RAVEN\external\kegg directory +0189 % -1b is only performed if not all required HMMs OR aligned FASTA files +0190 % OR multi-FASTA files exist in the defined dataDir. This means that this +0191 % step is skipped if the HMMs are downloaded from BioMet Toolbox instead +0192 % (see above). If not all files exist it will try to find +0193 % the KEGG database files in dataDir. +0194 % -2a is only performed if not all required HMMs OR aligned FASTA files +0195 % files exist in the defined dataDir. This means that this step is skipped +0196 % if the HMMs are downloaded from BioMet Toolbox instead (see above). +0197 % -2b is only performed if not all required HMMs exist in the defined +0198 % dataDir. This means that this step is skipped if the FASTA files or +0199 % HMMs are downloaded from BioMet Toolbox instead (see above). +0200 % -3a is performed for the required HMMs for which no corresponding .out +0201 % file exists in outDir. This is just a way to enable the function to be +0202 % run in parallel or to resume if interrupted. +0203 % -3b is always performed. +0204 % +0205 % These steps are specific to the organism for which you are +0206 % reconstructing the model. 0207 % -0208 % These steps are specific to the organism for which you are -0209 % reconstructing the model. -0210 % -0211 % Regarding the whole pipeline, the function checks the output that is -0212 % already available and runs only the parts that are required for step 3. -0213 % This means that (see the definition of the parameters for details): -0214 % -1a is only performed if there are no KEGG model files in the -0215 % RAVEN\external\kegg directory. -0216 % -1b is only performed if any of required KOs do not have HMMs, aligned -0217 % FASTA files, clustered FASTA files and raw FASTA files in the defined -0218 % dataDir. This means that this step is skipped if the HMMs are -0219 % downloaded from BioMet Toolbox instead (see above). If not all files -0220 % exist it will try to find the KEGG database files in dataDir. -0221 % -2ab are only performed if any of required KOs do not have HMMs, -0222 % aligned FASTA files and clustered FASTA files in the defined dataDir. -0223 % This means that this step is skipped if the HMMs are downloaded from -0224 % BioMet Toolbox instead (see above). -0225 % -2c is only performed if any of required KOs do not have HMMs and -0226 % aligned FASTA files in the defined dataDir. This means that this step -0227 % is skipped if the HMMs are downloaded from BioMet Toolbox instead (see -0228 % above). -0229 % -2d is only performed if any of required KOs do not have HMMs exist in -0230 % the defined dataDir. This means that this step is skipped if the FASTA -0231 % files or HMMs are downloaded from BioMet Toolbox instead (see above). -0232 % -3a is performed for the required HMMs for which no corresponding .out -0233 % file exists in outDir. This is just a way to enable the function to be -0234 % run in parallel or to resume if interrupted. -0235 % -3b is always performed. -0236 % -0237 % NOTE: it is also possible to obtain draft model from KEGG without -0238 % providing protein FASTA file for the target organism. In such case the -0239 % organism three-four letter abbreviation set as 'organismID' must exist -0240 % in the local KEGG database. In such case, the program just fetches all -0241 % the reactions, which are associated with given 'organismID'. -0242 % -0243 % Usage: model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,... -0244 % outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,... -0245 % keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,... -0246 % nSequences,seqIdentity) -0247 -0248 if nargin<2 || isempty(fastaFile) -0249 fastaFile=[]; -0250 else -0251 fastaFile=char(fastaFile); -0252 end -0253 if nargin<3 -0254 dataDir=[]; -0255 else -0256 dataDir=char(dataDir); -0257 end -0258 if nargin<4 || isempty(outDir) -0259 outDir=tempdir; -0260 %Delete all *.out files if any exist -0261 delete(fullfile(outDir,'*.out')); -0262 else -0263 outDir=char(outDir); +0208 % Regarding the whole pipeline, the function checks the output that is +0209 % already available and runs only the parts that are required for step 3. +0210 % This means that (see the definition of the parameters for details): +0211 % -1a is only performed if there are no KEGG model files in the +0212 % RAVEN\external\kegg directory. +0213 % -1b is only performed if any of required KOs do not have HMMs, aligned +0214 % FASTA files, clustered FASTA files and raw FASTA files in the defined +0215 % dataDir. This means that this step is skipped if the HMMs are +0216 % downloaded from BioMet Toolbox instead (see above). If not all files +0217 % exist it will try to find the KEGG database files in dataDir. +0218 % -2ab are only performed if any of required KOs do not have HMMs, +0219 % aligned FASTA files and clustered FASTA files in the defined dataDir. +0220 % This means that this step is skipped if the HMMs are downloaded from +0221 % BioMet Toolbox instead (see above). +0222 % -2c is only performed if any of required KOs do not have HMMs and +0223 % aligned FASTA files in the defined dataDir. This means that this step +0224 % is skipped if the HMMs are downloaded from BioMet Toolbox instead (see +0225 % above). +0226 % -2d is only performed if any of required KOs do not have HMMs exist in +0227 % the defined dataDir. This means that this step is skipped if the FASTA +0228 % files or HMMs are downloaded from BioMet Toolbox instead (see above). +0229 % -3a is performed for the required HMMs for which no corresponding .out +0230 % file exists in outDir. This is just a way to enable the function to be +0231 % run in parallel or to resume if interrupted. +0232 % -3b is always performed. +0233 % +0234 % NOTE: it is also possible to obtain draft model from KEGG without +0235 % providing protein FASTA file for the target organism. In such case the +0236 % organism three-four letter abbreviation set as 'organismID' must exist +0237 % in the local KEGG database. In such case, the program just fetches all +0238 % the reactions, which are associated with given 'organismID'. +0239 % +0240 % Usage: model=getKEGGModelForOrganism(organismID,fastaFile,dataDir,... +0241 % outDir,keepSpontaneous,keepUndefinedStoich,keepIncomplete,... +0242 % keepGeneral,cutOff,minScoreRatioKO,minScoreRatioG,maxPhylDist,... +0243 % nSequences,seqIdentity) +0244 +0245 if nargin<2 || isempty(fastaFile) +0246 fastaFile=[]; +0247 else +0248 fastaFile=char(fastaFile); +0249 end +0250 if nargin<3 +0251 dataDir=[]; +0252 else +0253 dataDir=char(dataDir); +0254 end +0255 if nargin<4 || isempty(outDir) +0256 outDir=tempdir; +0257 %Delete all *.out files if any exist +0258 delete(fullfile(outDir,'*.out')); +0259 else +0260 outDir=char(outDir); +0261 end +0262 if nargin<5 +0263 keepSpontaneous=true; 0264 end -0265 if nargin<5 -0266 keepSpontaneous=true; +0265 if nargin<6 +0266 keepUndefinedStoich=true; 0267 end -0268 if nargin<6 -0269 keepUndefinedStoich=true; +0268 if nargin<7 +0269 keepIncomplete=true; 0270 end -0271 if nargin<7 -0272 keepIncomplete=true; +0271 if nargin<8 +0272 keepGeneral=false; 0273 end -0274 if nargin<8 -0275 keepGeneral=false; +0274 if nargin<9 +0275 cutOff=10^-50; 0276 end -0277 if nargin<9 -0278 cutOff=10^-50; +0277 if nargin<10 +0278 minScoreRatioKO=0.3; 0279 end -0280 if nargin<10 -0281 minScoreRatioKO=0.3; +0280 if nargin<11 +0281 minScoreRatioG=0.8; 0282 end -0283 if nargin<11 -0284 minScoreRatioG=0.8; -0285 end -0286 if nargin<12 -0287 maxPhylDist=inf; -0288 %Include all sequences for each reaction -0289 end -0290 if nargin<13 -0291 nSequences=inf; -0292 %Include all sequences for each reaction +0283 if nargin<12 +0284 maxPhylDist=inf; +0285 %Include all sequences for each reaction +0286 end +0287 if nargin<13 +0288 nSequences=inf; +0289 %Include all sequences for each reaction +0290 end +0291 if nargin<14 +0292 seqIdentity=0.9; 0293 end -0294 if nargin<14 -0295 seqIdentity=0.9; -0296 end -0297 -0298 if isempty(fastaFile) -0299 fprintf(['\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species <strong>' organismID '</strong> ***\n\n']); -0300 else -0301 fprintf('\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); -0302 %Check if query fasta exists -0303 fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir -0304 end -0305 -0306 %Run the external binaries multi-threaded to use all logical cores assigned -0307 %to MATLAB -0308 cores = evalc('feature(''numcores'')'); -0309 cores = strsplit(cores, 'MATLAB was assigned: '); -0310 cores = regexp(cores{2},'^\d*','match'); -0311 cores = cores{1}; +0294 +0295 if isempty(fastaFile) +0296 fprintf(['\n*** The model reconstruction from KEGG based on the annotation available for KEGG Species <strong>' organismID '</strong> ***\n\n']); +0297 else +0298 fprintf('\n*** The model reconstruction from KEGG based on the protein homology search against KEGG Orthology specific HMMs ***\n\n'); +0299 %Check if query fasta exists +0300 fastaFile=checkFileExistence(fastaFile,2); %Copy file to temp dir +0301 end +0302 +0303 %Run the external binaries multi-threaded to use all logical cores assigned +0304 %to MATLAB +0305 cores = evalc('feature(''numcores'')'); +0306 cores = strsplit(cores, 'MATLAB was assigned: '); +0307 cores = regexp(cores{2},'^\d*','match'); +0308 cores = cores{1}; +0309 +0310 %Get the directory for RAVEN Toolbox. +0311 ravenPath=findRAVENroot(); 0312 -0313 %Get the directory for RAVEN Toolbox. -0314 ravenPath=findRAVENroot(); -0315 -0316 %Checking if dataDir is consistent. It must point to pre-trained HMMs set, -0317 %compatible with the the current RAVEN version. The user may have the -0318 %required zip file already in working directory or have it extracted. If -0319 %the zip file and directory is not here, it is downloaded from the cloud -0320 if ~isempty(dataDir) -0321 hmmOptions={'euk90_kegg105','prok90_kegg105'}; -0322 if ~endsWith(dataDir,hmmOptions) %Check if dataDir ends with any of the hmmOptions. -0323 %If not, then check whether the required folders exist anyway. -0324 if ~isfile(fullfile(dataDir,'keggdb','genes.pep')) && ... -0325 ~isfolder(fullfile(dataDir,'fasta')) && ... -0326 ~isfolder(fullfile(dataDir,'aligned')) && ... -0327 ~isfolder(fullfile(dataDir,'hmms')) -0328 error(['Pre-trained HMMs set is not recognised. If you want download RAVEN provided sets, it should match any of the following: ' strjoin(hmmOptions,' or ')]) -0329 end -0330 else -0331 if isfolder(dataDir) && isfile(fullfile(dataDir,'hmms','K00844.hmm')) -0332 fprintf(['NOTE: Found <strong>' dataDir '</strong> directory with pre-trained HMMs, it will therefore be used during reconstruction\n']); -0333 elseif ~isfolder(dataDir) && isfile([dataDir,'.zip']) -0334 fprintf('Extracting the HMMs archive file... '); -0335 unzip([dataDir,'.zip']); -0336 fprintf('COMPLETE\n'); -0337 else -0338 hmmIndex=strcmp(dataDir,hmmOptions); -0339 if ~any(hmmIndex) -0340 error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg105 and euk90_kegg105). ' ... -0341 'Use either of these datasets, or otherwise download the relevant sequence data from KEGG to train HMMs with your desired sequence identity']) -0342 else -0343 fprintf('Downloading the HMMs archive file... '); -0344 try -0345 websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.8.0/',hmmOptions{hmmIndex},'.zip']); -0346 catch ME -0347 if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') -0348 error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') -0349 end -0350 end -0351 end -0352 +0313 %Checking if dataDir is consistent. It must point to pre-trained HMMs set, +0314 %compatible with the the current RAVEN version. The user may have the +0315 %required zip file already in working directory or have it extracted. If +0316 %the zip file and directory is not here, it is downloaded from the cloud +0317 if ~isempty(dataDir) +0318 hmmOptions={'euk90_kegg105','prok90_kegg105'}; +0319 if ~endsWith(dataDir,hmmOptions) %Check if dataDir ends with any of the hmmOptions. +0320 %If not, then check whether the required folders exist anyway. +0321 if ~isfile(fullfile(dataDir,'keggdb','genes.pep')) && ... +0322 ~isfolder(fullfile(dataDir,'fasta')) && ... +0323 ~isfolder(fullfile(dataDir,'aligned')) && ... +0324 ~isfolder(fullfile(dataDir,'hmms')) +0325 error(['Pre-trained HMMs set is not recognised. If you want download RAVEN provided sets, it should match any of the following: ' strjoin(hmmOptions,' or ')]) +0326 end +0327 else +0328 if isfolder(dataDir) && isfile(fullfile(dataDir,'hmms','K00844.hmm')) +0329 fprintf(['NOTE: Found <strong>' dataDir '</strong> directory with pre-trained HMMs, it will therefore be used during reconstruction\n']); +0330 elseif ~isfolder(dataDir) && isfile([dataDir,'.zip']) +0331 fprintf('Extracting the HMMs archive file... '); +0332 unzip([dataDir,'.zip']); +0333 fprintf('COMPLETE\n'); +0334 else +0335 hmmIndex=strcmp(dataDir,hmmOptions); +0336 if ~any(hmmIndex) +0337 error(['Pre-trained HMMs are only provided with proteins clustered at 90% sequence identity (i.e. prok90_kegg105 and euk90_kegg105). ' ... +0338 'Use either of these datasets, or otherwise download the relevant sequence data from KEGG to train HMMs with your desired sequence identity']) +0339 else +0340 fprintf('Downloading the HMMs archive file... '); +0341 try +0342 websave([dataDir,'.zip'],['https://github.com/SysBioChalmers/RAVEN/releases/download/v2.8.0/',hmmOptions{hmmIndex},'.zip']); +0343 catch ME +0344 if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') +0345 error('Failed to download the HMMs archive file, the server returned a 404 error, try again later. If the problem persists please report it on the RAVEN GitHub Issues page: https://github.com/SysBioChalmers/RAVEN/issues') +0346 end +0347 end +0348 end +0349 +0350 fprintf('COMPLETE\n'); +0351 fprintf('Extracting the HMMs archive file... '); +0352 unzip([dataDir,'.zip']); 0353 fprintf('COMPLETE\n'); -0354 fprintf('Extracting the HMMs archive file... '); -0355 unzip([dataDir,'.zip']); -0356 fprintf('COMPLETE\n'); -0357 end -0358 %Check if HMMs are extracted -0359 if ~isfile(fullfile(dataDir,'hmms','K00844.hmm')) -0360 error(['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']); -0361 end -0362 end -0363 end -0364 -0365 %Check if the fasta-file contains '/' or'\'. If not then it's probably just -0366 %a file name. Expand to full path. -0367 if any(fastaFile) -0368 if ~any(strfind(fastaFile,'\')) && ~any(strfind(fastaFile,'/')) -0369 fastaFile=which(fastaFile); -0370 end -0371 %Create the required sub-folders in dataDir if they dont exist -0372 if ~isfolder(fullfile(dataDir,'keggdb')) -0373 mkdir(dataDir,'keggdb'); +0354 end +0355 %Check if HMMs are extracted +0356 if ~isfile(fullfile(dataDir,'hmms','K00844.hmm')) +0357 error(['The HMM files seem improperly extracted and not found in ',dataDir,'/hmms. Please remove ',dataDir,' folder and rerun getKEGGModelForOrganism']); +0358 end +0359 end +0360 end +0361 +0362 %Check if the fasta-file contains '/' or'\'. If not then it's probably just +0363 %a file name. Expand to full path. +0364 if any(fastaFile) +0365 if ~any(strfind(fastaFile,'\')) && ~any(strfind(fastaFile,'/')) +0366 fastaFile=which(fastaFile); +0367 end +0368 %Create the required sub-folders in dataDir if they dont exist +0369 if ~isfolder(fullfile(dataDir,'keggdb')) +0370 mkdir(dataDir,'keggdb'); +0371 end +0372 if ~isfolder(fullfile(dataDir,'fasta')) +0373 mkdir(dataDir,'fasta'); 0374 end -0375 if ~isfolder(fullfile(dataDir,'fasta')) -0376 mkdir(dataDir,'fasta'); +0375 if ~isfolder(fullfile(dataDir,'aligned')) +0376 mkdir(dataDir,'aligned'); 0377 end -0378 if ~isfolder(fullfile(dataDir,'aligned')) -0379 mkdir(dataDir,'aligned'); +0378 if ~isfolder(fullfile(dataDir,'hmms')) +0379 mkdir(dataDir,'hmms'); 0380 end -0381 if ~isfolder(fullfile(dataDir,'hmms')) -0382 mkdir(dataDir,'hmms'); +0381 if ~isfolder(outDir) +0382 mkdir(outDir); 0383 end -0384 if ~isfolder(outDir) -0385 mkdir(outDir); -0386 end -0387 end -0388 -0389 %First generate the full global KEGG model. Can be provided as input. -0390 %Otherwise, getModelFromKEGG is run. The dataDir must not be supplied as -0391 %there is also an internal RAVEN version available -0392 if nargin==15 -0393 model=globalModel.model; -0394 KOModel=globalModel.KOModel; -0395 elseif any(dataDir) -0396 [model, KOModel]=getModelFromKEGG(fullfile(dataDir,'keggdb'),keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); -0397 else -0398 [model, KOModel]=getModelFromKEGG([],keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); -0399 end -0400 model.id=organismID; -0401 model.c=zeros(numel(model.rxns),1); -0402 -0403 %If no FASTA file is supplied, then just remove all genes which are not for -0404 %the given organism ID -0405 if isempty(fastaFile) -0406 %Check if organismID can be found in KEGG species list or is -0407 %set to "eukaryotes" or "prokaryotes" -0408 phylDistsFull=getPhylDist(fullfile(dataDir,'keggdb'),true); -0409 if ~ismember(organismID,[phylDistsFull.ids 'eukaryotes' 'prokaryotes']) -0410 error('Provided organismID is incorrect. Only species abbreviations from KEGG Species List or "eukaryotes"/"prokaryotes" are allowed.'); -0411 end -0412 -0413 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']); -0414 if ismember(organismID,{'eukaryotes','prokaryotes'}) -0415 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); -0416 if strcmp(organismID,'eukaryotes') -0417 proxyid='hsa'; -0418 %Use H. sapiens here -0419 else -0420 proxyid='eco'; -0421 %Use E. coli here -0422 end -0423 [~, phylDistId]=ismember(proxyid,phylDists.ids); -0424 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:))); -0425 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false); -0426 I=ismember(upper(taxIDs),upper(idsToKeep)); -0427 else -0428 %KEGG organism IDs may have three or four letters -0429 organismID=strcat(organismID,':'); -0430 %Add colon for accurate matching -0431 if length(organismID)==4 -0432 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes); -0433 elseif length(organismID)==5 -0434 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes); -0435 end -0436 end -0437 %Remove those genes -0438 model.genes=model.genes(I); -0439 model.rxnGeneMat=model.rxnGeneMat(:,I); -0440 fprintf('COMPLETE\n'); -0441 end -0442 -0443 %First remove all reactions without genes -0444 if keepSpontaneous==true -0445 fprintf('Removing non-spontaneous reactions without GPR rules... '); -0446 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous'); -0447 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); -0448 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous)); -0449 else -0450 fprintf('Removing reactions without GPR rules... '); -0451 I=~any(model.rxnGeneMat,2); -0452 end -0453 model=removeReactions(model,I,true); -0454 fprintf('COMPLETE\n'); -0455 -0456 %Clean gene names -0457 fprintf('Fixing gene names in the model... '); -0458 %Get rid of the prefix organism id -0459 model.genes=regexprep(model.genes,'^\w+?:',''); -0460 fprintf('COMPLETE\n'); -0461 -0462 %If no FASTA file is supplied, then we are done here -0463 if isempty(fastaFile) -0464 %Create grRules -0465 fprintf('Constructing GPR associations and annotations for the model... '); -0466 model.grRules=cell(numel(model.rxns),1); -0467 model.grRules(:)={''}; -0468 %Add the gene associations as 'or' -0469 for i=1:numel(model.rxns) -0470 %Find the involved genes -0471 I=find(model.rxnGeneMat(i,:)); -0472 if any(I) -0473 model.grRules{i}=['(' model.genes{I(1)}]; -0474 for j=2:numel(I) -0475 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; -0476 end -0477 model.grRules{i}=[model.grRules{i} ')']; -0478 end -0479 end -0480 %Fix grRules and reconstruct rxnGeneMat -0481 [grRules,rxnGeneMat] = standardizeGrRules(model); %Give detailed output -0482 model.grRules = grRules; -0483 model.rxnGeneMat = rxnGeneMat; -0484 %Add geneMiriams, assuming that it follows the syntax -0485 %kegg.genes/organismID:geneName -0486 model.geneMiriams=''; -0487 for i=1:numel(model.genes) -0488 model.geneMiriams{i,1}.name{1,1}='kegg.genes'; -0489 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1}); -0490 end -0491 %Add the description to the reactions -0492 for i=1:numel(model.rxns) -0493 if ~isempty(model.rxnNotes{i}) -0494 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i)); -0495 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); -0496 else -0497 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'}; -0498 end -0499 end -0500 fprintf('COMPLETE\n\n'); -0501 fprintf('*** Model reconstruction complete ***\n'); -0502 return; -0503 end -0504 -0505 %Create a phylogenetic distance structure -0506 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); -0507 [~, phylDistId]=ismember(model.id,phylDistStruct.ids); -0508 -0509 %Calculate the real maximal distance now. An abitary large number of 1000 -0510 %is used for the "all in kingdom" or "all sequences" options. This is a bit -0511 %inconvenient way to do it, but it is to make it fit with some older code -0512 if isinf(maxPhylDist) || maxPhylDist==-1 -0513 maxPhylDist=1000; -0514 end -0515 -0516 %Get the KO ids for which files have been generated. Maybe not the neatest -0517 %way.. -0518 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); -0519 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); -0520 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw')); -0521 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); -0522 outFiles=listFiles(fullfile(outDir,'*.out')); -0523 -0524 %Check if multi-FASTA files should be generated. This should only be -0525 %performed if there are IDs in the KOModel structure that haven't been -0526 %parsed yet -0527 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]); -0528 -0529 if ~isempty(missingFASTA) -0530 if ~isfile(fullfile(dataDir,'keggdb','genes.pep')) -0531 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n']; -0532 dispEM(EM); -0533 end -0534 %Only construct models for KOs which don't have files already -0535 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true); -0536 %Permute the order of the KOs in the model so that constructMultiFasta -0537 %can be run on several processors at once -0538 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns'); -0539 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta')); -0540 else -0541 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n'); -0542 end -0543 -0544 if isunix -0545 if ismac -0546 binEnd='.mac'; -0547 else -0548 binEnd=''; -0549 end -0550 elseif ispc -0551 binEnd=''; -0552 else -0553 EM='Unknown OS, exiting.'; -0554 disp(EM); -0555 return -0556 end -0557 -0558 %Check if alignment of FASTA files should be performed -0559 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]); -0560 if ~isempty(missingAligned) -0561 if seqIdentity==-1 -0562 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); -0563 else -0564 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); -0565 end -0566 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned))); -0567 tmpFile=tempname; -0568 %On Windows, paths need to be translated to Unix before parsing it to WSL -0569 if ispc -0570 wslPath.tmpFile=getWSLpath(tmpFile); -0571 %mafft has problems writing to terminal (/dev/stderr) when running -0572 %on WSL via MATLAB, instead write and read progress file -0573 mafftOutput = tempname; -0574 wslPath.mafftOutput=getWSLpath(mafftOutput); -0575 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); -0576 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit')); -0577 end -0578 -0579 for i=1:numel(missingAligned) -0580 %This is checked here because it could be that it is created by a -0581 %parallel process. The faw-files are saved as temporary files to -0582 %kept track of which files are being worked on -0583 if ~isfile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])) &&... -0584 ~isfile(fullfile(dataDir,'aligned',[missingAligned{i} '.fa'])) -0585 %Check that the multi-FASTA file exists. It should do so since -0586 %we are saving empty files as well. Print a warning and -0587 %continue if not -0588 if ~isfile(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])) -0589 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; -0590 dispEM(EM,false); -0591 continue; -0592 end -0593 -0594 %If the multi-FASTA file is empty then save an empty aligned -0595 %file and continue -0596 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0597 if s.bytes<=0 -0598 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); -0599 fclose(fid); -0600 continue; -0601 end -0602 -0603 %Create an empty file to prevent other threads to start to work -0604 %on the same alignment -0605 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); -0606 fclose(fid); -0607 -0608 %First load the FASTA file, then select up to nSequences -0609 %sequences of the most closely related species, apply any -0610 %constraints from maxPhylDist, and save it as a temporary file, -0611 %and create the model from that -0612 -0613 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0614 phylDist=inf(numel(fastaStruct),1); -0615 for j=1:numel(fastaStruct) -0616 %Get the organism abbreviation -0617 index=strfind(fastaStruct(j).Header,':'); -0618 if any(index) -0619 abbrev=fastaStruct(j).Header(1:index(1)-1); -0620 [~, index]=ismember(abbrev,phylDistStruct.ids); -0621 if any(index) -0622 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); -0623 end -0624 end -0625 end +0384 end +0385 +0386 %First generate the full global KEGG model. Can be provided as input. +0387 %Otherwise, getModelFromKEGG is run. The dataDir must not be supplied as +0388 %there is also an internal RAVEN version available +0389 if nargin==15 +0390 model=globalModel.model; +0391 KOModel=globalModel.KOModel; +0392 elseif any(dataDir) +0393 [model, KOModel]=getModelFromKEGG(fullfile(dataDir,'keggdb'),keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); +0394 else +0395 [model, KOModel]=getModelFromKEGG([],keepSpontaneous,keepUndefinedStoich,keepIncomplete,keepGeneral); +0396 end +0397 model.id=organismID; +0398 model.c=zeros(numel(model.rxns),1); +0399 +0400 %If no FASTA file is supplied, then just remove all genes which are not for +0401 %the given organism ID +0402 if isempty(fastaFile) +0403 %Check if organismID can be found in KEGG species list or is +0404 %set to "eukaryotes" or "prokaryotes" +0405 phylDistsFull=getPhylDist(fullfile(dataDir,'keggdb'),true); +0406 if ~ismember(organismID,[phylDistsFull.ids 'eukaryotes' 'prokaryotes']) +0407 error('Provided organismID is incorrect. Only species abbreviations from KEGG Species List or "eukaryotes"/"prokaryotes" are allowed.'); +0408 end +0409 +0410 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']); +0411 if ismember(organismID,{'eukaryotes','prokaryotes'}) +0412 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); +0413 if strcmp(organismID,'eukaryotes') +0414 proxyid='hsa'; +0415 %Use H. sapiens here +0416 else +0417 proxyid='eco'; +0418 %Use E. coli here +0419 end +0420 [~, phylDistId]=ismember(proxyid,phylDists.ids); +0421 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:))); +0422 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false); +0423 I=ismember(upper(taxIDs),upper(idsToKeep)); +0424 else +0425 %KEGG organism IDs may have three or four letters +0426 organismID=strcat(organismID,':'); +0427 %Add colon for accurate matching +0428 if length(organismID)==4 +0429 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes); +0430 elseif length(organismID)==5 +0431 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes); +0432 end +0433 end +0434 %Remove those genes +0435 model.genes=model.genes(I); +0436 model.rxnGeneMat=model.rxnGeneMat(:,I); +0437 fprintf('COMPLETE\n'); +0438 end +0439 +0440 %First remove all reactions without genes +0441 if keepSpontaneous==true +0442 fprintf('Removing non-spontaneous reactions without GPR rules... '); +0443 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous'); +0444 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); +0445 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous)); +0446 else +0447 fprintf('Removing reactions without GPR rules... '); +0448 I=~any(model.rxnGeneMat,2); +0449 end +0450 model=removeReactions(model,I,true); +0451 fprintf('COMPLETE\n'); +0452 +0453 %Clean gene names +0454 fprintf('Fixing gene names in the model... '); +0455 %Get rid of the prefix organism id +0456 model.genes=regexprep(model.genes,'^\w+?:',''); +0457 fprintf('COMPLETE\n'); +0458 +0459 %If no FASTA file is supplied, then we are done here +0460 if isempty(fastaFile) +0461 %Create grRules +0462 fprintf('Constructing GPR associations and annotations for the model... '); +0463 model.grRules=cell(numel(model.rxns),1); +0464 model.grRules(:)={''}; +0465 %Add the gene associations as 'or' +0466 for i=1:numel(model.rxns) +0467 %Find the involved genes +0468 I=find(model.rxnGeneMat(i,:)); +0469 if any(I) +0470 model.grRules{i}=['(' model.genes{I(1)}]; +0471 for j=2:numel(I) +0472 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; +0473 end +0474 model.grRules{i}=[model.grRules{i} ')']; +0475 end +0476 end +0477 %Fix grRules and reconstruct rxnGeneMat +0478 [grRules,rxnGeneMat] = standardizeGrRules(model); %Give detailed output +0479 model.grRules = grRules; +0480 model.rxnGeneMat = rxnGeneMat; +0481 %Add geneMiriams, assuming that it follows the syntax +0482 %kegg.genes/organismID:geneName +0483 model.geneMiriams=''; +0484 for i=1:numel(model.genes) +0485 model.geneMiriams{i,1}.name{1,1}='kegg.genes'; +0486 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1}); +0487 end +0488 %Add the description to the reactions +0489 for i=1:numel(model.rxns) +0490 if ~isempty(model.rxnNotes{i}) +0491 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i)); +0492 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); +0493 else +0494 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'}; +0495 end +0496 end +0497 fprintf('COMPLETE\n\n'); +0498 fprintf('*** Model reconstruction complete ***\n'); +0499 return; +0500 end +0501 +0502 %Create a phylogenetic distance structure +0503 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1); +0504 [~, phylDistId]=ismember(model.id,phylDistStruct.ids); +0505 +0506 %Calculate the real maximal distance now. An abitary large number of 1000 +0507 %is used for the "all in kingdom" or "all sequences" options. This is a bit +0508 %inconvenient way to do it, but it is to make it fit with some older code +0509 if isinf(maxPhylDist) || maxPhylDist==-1 +0510 maxPhylDist=1000; +0511 end +0512 +0513 %Get the KO ids for which files have been generated. Maybe not the neatest +0514 %way.. +0515 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa')); +0516 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa')); +0517 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw')); +0518 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm')); +0519 outFiles=listFiles(fullfile(outDir,'*.out')); +0520 +0521 %Check if multi-FASTA files should be generated. This should only be +0522 %performed if there are IDs in the KOModel structure that haven't been +0523 %parsed yet +0524 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]); +0525 +0526 if ~isempty(missingFASTA) +0527 if ~isfile(fullfile(dataDir,'keggdb','genes.pep')) +0528 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n']; +0529 dispEM(EM); +0530 end +0531 %Only construct models for KOs which don't have files already +0532 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true); +0533 %Permute the order of the KOs in the model so that constructMultiFasta +0534 %can be run on several processors at once +0535 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns'); +0536 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta')); +0537 else +0538 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n'); +0539 end +0540 +0541 if isunix +0542 if ismac +0543 binEnd='.mac'; +0544 else +0545 binEnd=''; +0546 end +0547 elseif ispc +0548 binEnd=''; +0549 else +0550 EM='Unknown OS, exiting.'; +0551 disp(EM); +0552 return +0553 end +0554 +0555 %Check if alignment of FASTA files should be performed +0556 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]); +0557 if ~isempty(missingAligned) +0558 if seqIdentity==-1 +0559 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); +0560 else +0561 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... 0%% complete'); +0562 end +0563 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned))); +0564 tmpFile=tempname; +0565 %On Windows, paths need to be translated to Unix before parsing it to WSL +0566 if ispc +0567 wslPath.tmpFile=getWSLpath(tmpFile); +0568 %mafft has problems writing to terminal (/dev/stderr) when running +0569 %on WSL via MATLAB, instead write and read progress file +0570 mafftOutput = tempname; +0571 wslPath.mafftOutput=getWSLpath(mafftOutput); +0572 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); +0573 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit')); +0574 end +0575 +0576 for i=1:numel(missingAligned) +0577 %This is checked here because it could be that it is created by a +0578 %parallel process. The faw-files are saved as temporary files to +0579 %kept track of which files are being worked on +0580 if ~isfile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])) &&... +0581 ~isfile(fullfile(dataDir,'aligned',[missingAligned{i} '.fa'])) +0582 %Check that the multi-FASTA file exists. It should do so since +0583 %we are saving empty files as well. Print a warning and +0584 %continue if not +0585 if ~isfile(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])) +0586 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; +0587 dispEM(EM,false); +0588 continue; +0589 end +0590 +0591 %If the multi-FASTA file is empty then save an empty aligned +0592 %file and continue +0593 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0594 if s.bytes<=0 +0595 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); +0596 fclose(fid); +0597 continue; +0598 end +0599 +0600 %Create an empty file to prevent other threads to start to work +0601 %on the same alignment +0602 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); +0603 fclose(fid); +0604 +0605 %First load the FASTA file, then select up to nSequences +0606 %sequences of the most closely related species, apply any +0607 %constraints from maxPhylDist, and save it as a temporary file, +0608 %and create the model from that +0609 +0610 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0611 phylDist=inf(numel(fastaStruct),1); +0612 for j=1:numel(fastaStruct) +0613 %Get the organism abbreviation +0614 index=strfind(fastaStruct(j).Header,':'); +0615 if any(index) +0616 abbrev=fastaStruct(j).Header(1:index(1)-1); +0617 [~, index]=ismember(abbrev,phylDistStruct.ids); +0618 if any(index) +0619 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); +0620 end +0621 end +0622 end +0623 +0624 %Inf means that it should not be included +0625 phylDist(phylDist>maxPhylDist)=[]; 0626 -0627 %Inf means that it should not be included -0628 phylDist(phylDist>maxPhylDist)=[]; +0627 %Sort based on phylDist +0628 [~, order]=sort(phylDist); 0629 -0630 %Sort based on phylDist -0631 [~, order]=sort(phylDist); -0632 -0633 %Save the first nSequences hits to a temporary FASTA file -0634 if nSequences<=numel(fastaStruct) -0635 fastaStruct=fastaStruct(order(1:nSequences)); -0636 else -0637 fastaStruct=fastaStruct(order); -0638 end -0639 -0640 %Do the clustering and alignment if there are more than one -0641 %sequences, otherwise just save the sequence (or an empty file) -0642 if numel(fastaStruct)>1 -0643 if seqIdentity~=-1 -0644 cdhitInpCustom=tempname; -0645 fastawrite(cdhitInpCustom,fastaStruct); -0646 if seqIdentity<=1 && seqIdentity>0.7 -0647 nparam='5'; -0648 elseif seqIdentity>0.6 -0649 nparam='4'; -0650 elseif seqIdentity>0.5 -0651 nparam='3'; -0652 elseif seqIdentity>0.4 -0653 nparam='2'; -0654 else -0655 EM='The provided seqIdentity must be between 0 and 1\n'; -0656 dispEM(EM); -0657 end -0658 if ispc -0659 wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom); -0660 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); -0661 elseif ismac || isunix -0662 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); -0663 end -0664 if status~=0 -0665 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0666 dispEM(EM); -0667 end -0668 %Remove the old tempfile -0669 if exist(cdhitInpCustom, 'file') -0670 delete([cdhitInpCustom '*']); -0671 end -0672 else -0673 %This means that CD-HIT should be skipped since -0674 %seqIdentity is equal to -1 -0675 fastawrite(tmpFile,fastaStruct); -0676 end -0677 %Do the alignment for this file -0678 if ismac -0679 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0680 elseif isunix -0681 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0682 elseif ispc -0683 wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])); -0684 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); -0685 output=fileread(mafftOutput); -0686 delete(mafftOutput); -0687 end -0688 if status~=0 -0689 %It could be that alignment failed because only one -0690 %sequence was left after clustering. If that is the -0691 %case, then the clustered file is just copied as 'faw' -0692 %file -0693 if any(regexp(output,'Only 1 sequence found')) -0694 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); -0695 else -0696 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; -0697 dispEM(EM); -0698 end -0699 end -0700 %Remove the old tempfile -0701 if exist(tmpFile, 'file') -0702 delete([tmpFile '*']); -0703 end -0704 else -0705 %If there is only one sequence then it's not possible to do -0706 %a multiple alignment. Just print the sequence instead. An -0707 %empty file was written previously so that doesn't have to -0708 %be dealt with -0709 if numel(fastaStruct)==1 -0710 warnState = warning; %Save the current warning state -0711 warning('off','Bioinfo:fastawrite:AppendToFile'); -0712 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); -0713 warning(warnState) %Reset warning state to previous settings -0714 end -0715 end -0716 %Move the temporary file to the real one -0717 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); -0718 -0719 %Print the progress every 25 files -0720 if rem(i-1,25) == 0 -0721 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); -0722 progress=pad(progress,3,'left'); -0723 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0724 end -0725 end -0726 end -0727 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0728 else -0729 if seqIdentity==-1 -0730 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0731 else -0732 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0733 end -0734 end -0735 -0736 %Check if training of Hidden Markov models should be performed -0737 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); -0738 if ~isempty(missingHMMs) -0739 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); -0740 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); -0741 %Train models for all missing KOs -0742 for i=1:numel(missingHMMs) -0743 %This is checked here because it could be that it is created by a -0744 %parallel process -0745 if ~isfile(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm'])) && ~isfile(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])) -0746 %Check that the aligned FASTA file exists. It could be that it -0747 %is still being worked on by some other instance of the program -0748 %(the .faw file should then exist). This should not happen on a -0749 %single computer. It doesn't throw an error, because it should -0750 %finalize the ones it can -0751 if ~isfile(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])) -0752 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; -0753 dispEM(EM,false); -0754 continue; -0755 end -0756 -0757 %If the multi-FASTA file is empty then save an empty aligned -0758 %file and continue -0759 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); -0760 if s.bytes<=0 -0761 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); -0762 fclose(fid); -0763 continue; -0764 end -0765 %Create a temporary file to indicate that it is working on the -0766 %KO. This is because hmmbuild cannot overwrite existing files -0767 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); -0768 fclose(fid); -0769 -0770 %Create HMM -0771 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); -0772 if status~=0 -0773 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; -0774 dispEM(EM); -0775 end -0776 -0777 %Delete the temporary file -0778 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); -0779 -0780 %Print the progress every 25 files -0781 if rem(i-1,25) == 0 -0782 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); -0783 progress=pad(progress,3,'left'); -0784 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0785 end -0786 end -0787 end -0788 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0789 else -0790 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); -0791 end -0792 -0793 %Check which new .out files that should be generated. Check if training of -0794 %Hidden Markov models should be performed -0795 missingOUT=setdiff(KOModel.rxns,outFiles); -0796 if ~isempty(missingOUT) -0797 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); -0798 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); -0799 for i=1:numel(missingOUT) -0800 %This is checked here because it could be that it is created by a -0801 %parallel process -0802 if ~isfile(fullfile(outDir,[missingOUT{i} '.out'])) -0803 %Check that the HMM file exists. It should do so since %we are -0804 %saving empty files as well. Print a warning and continue if -0805 %not -0806 if ~isfile(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])) -0807 EM=['The HMM file for ' missingOUT{i} ' does not exist']; -0808 dispEM(EM,false); -0809 continue; -0810 end -0811 -0812 %Save an empty file to prevent several threads working on the -0813 %same file -0814 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0815 fclose(fid); -0816 -0817 %If the HMM file is empty then save an out file and continue -0818 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); -0819 if s.bytes<=0 -0820 continue; -0821 end -0822 -0823 %Check each gene in the input file against this model -0824 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); -0825 if status~=0 -0826 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; -0827 dispEM(EM); -0828 end -0829 -0830 %Save the output to a file -0831 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0832 fwrite(fid,output); -0833 fclose(fid); -0834 -0835 %Print the progress every 25 files -0836 if rem(i-1,25) == 0 -0837 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); -0838 progress=pad(progress,3,'left'); -0839 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0840 end -0841 end -0842 end -0843 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0844 else -0845 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); -0846 end +0630 %Save the first nSequences hits to a temporary FASTA file +0631 if nSequences<=numel(fastaStruct) +0632 fastaStruct=fastaStruct(order(1:nSequences)); +0633 else +0634 fastaStruct=fastaStruct(order); +0635 end +0636 +0637 %Do the clustering and alignment if there are more than one +0638 %sequences, otherwise just save the sequence (or an empty file) +0639 if numel(fastaStruct)>1 +0640 if seqIdentity~=-1 +0641 cdhitInpCustom=tempname; +0642 fastawrite(cdhitInpCustom,fastaStruct); +0643 if seqIdentity<=1 && seqIdentity>0.7 +0644 nparam='5'; +0645 elseif seqIdentity>0.6 +0646 nparam='4'; +0647 elseif seqIdentity>0.5 +0648 nparam='3'; +0649 elseif seqIdentity>0.4 +0650 nparam='2'; +0651 else +0652 EM='The provided seqIdentity must be between 0 and 1\n'; +0653 dispEM(EM); +0654 end +0655 if ispc +0656 wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom); +0657 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0658 elseif ismac || isunix +0659 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0660 end +0661 if status~=0 +0662 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; +0663 dispEM(EM); +0664 end +0665 %Remove the old tempfile +0666 if exist(cdhitInpCustom, 'file') +0667 delete([cdhitInpCustom '*']); +0668 end +0669 else +0670 %This means that CD-HIT should be skipped since +0671 %seqIdentity is equal to -1 +0672 fastawrite(tmpFile,fastaStruct); +0673 end +0674 %Do the alignment for this file +0675 if ismac +0676 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0677 elseif isunix +0678 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0679 elseif ispc +0680 wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])); +0681 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); +0682 output=fileread(mafftOutput); +0683 delete(mafftOutput); +0684 end +0685 if status~=0 +0686 %It could be that alignment failed because only one +0687 %sequence was left after clustering. If that is the +0688 %case, then the clustered file is just copied as 'faw' +0689 %file +0690 if any(regexp(output,'Only 1 sequence found')) +0691 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); +0692 else +0693 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; +0694 dispEM(EM); +0695 end +0696 end +0697 %Remove the old tempfile +0698 if exist(tmpFile, 'file') +0699 delete([tmpFile '*']); +0700 end +0701 else +0702 %If there is only one sequence then it's not possible to do +0703 %a multiple alignment. Just print the sequence instead. An +0704 %empty file was written previously so that doesn't have to +0705 %be dealt with +0706 if numel(fastaStruct)==1 +0707 warnState = warning; %Save the current warning state +0708 warning('off','Bioinfo:fastawrite:AppendToFile'); +0709 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); +0710 warning(warnState) %Reset warning state to previous settings +0711 end +0712 end +0713 %Move the temporary file to the real one +0714 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); +0715 +0716 %Print the progress every 25 files +0717 if rem(i-1,25) == 0 +0718 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); +0719 progress=pad(progress,3,'left'); +0720 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0721 end +0722 end +0723 end +0724 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0725 else +0726 if seqIdentity==-1 +0727 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0728 else +0729 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0730 end +0731 end +0732 +0733 %Check if training of Hidden Markov models should be performed +0734 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); +0735 if ~isempty(missingHMMs) +0736 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); +0737 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); +0738 %Train models for all missing KOs +0739 for i=1:numel(missingHMMs) +0740 %This is checked here because it could be that it is created by a +0741 %parallel process +0742 if ~isfile(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm'])) && ~isfile(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])) +0743 %Check that the aligned FASTA file exists. It could be that it +0744 %is still being worked on by some other instance of the program +0745 %(the .faw file should then exist). This should not happen on a +0746 %single computer. It doesn't throw an error, because it should +0747 %finalize the ones it can +0748 if ~isfile(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])) +0749 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; +0750 dispEM(EM,false); +0751 continue; +0752 end +0753 +0754 %If the multi-FASTA file is empty then save an empty aligned +0755 %file and continue +0756 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); +0757 if s.bytes<=0 +0758 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); +0759 fclose(fid); +0760 continue; +0761 end +0762 %Create a temporary file to indicate that it is working on the +0763 %KO. This is because hmmbuild cannot overwrite existing files +0764 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); +0765 fclose(fid); +0766 +0767 %Create HMM +0768 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); +0769 if status~=0 +0770 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; +0771 dispEM(EM); +0772 end +0773 +0774 %Delete the temporary file +0775 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); +0776 +0777 %Print the progress every 25 files +0778 if rem(i-1,25) == 0 +0779 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); +0780 progress=pad(progress,3,'left'); +0781 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0782 end +0783 end +0784 end +0785 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0786 else +0787 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); +0788 end +0789 +0790 %Check which new .out files that should be generated. Check if training of +0791 %Hidden Markov models should be performed +0792 missingOUT=setdiff(KOModel.rxns,outFiles); +0793 if ~isempty(missingOUT) +0794 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); +0795 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); +0796 for i=1:numel(missingOUT) +0797 %This is checked here because it could be that it is created by a +0798 %parallel process +0799 if ~isfile(fullfile(outDir,[missingOUT{i} '.out'])) +0800 %Check that the HMM file exists. It should do so since %we are +0801 %saving empty files as well. Print a warning and continue if +0802 %not +0803 if ~isfile(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])) +0804 EM=['The HMM file for ' missingOUT{i} ' does not exist']; +0805 dispEM(EM,false); +0806 continue; +0807 end +0808 +0809 %Save an empty file to prevent several threads working on the +0810 %same file +0811 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0812 fclose(fid); +0813 +0814 %If the HMM file is empty then save an out file and continue +0815 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); +0816 if s.bytes<=0 +0817 continue; +0818 end +0819 +0820 %Check each gene in the input file against this model +0821 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); +0822 if status~=0 +0823 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; +0824 dispEM(EM); +0825 end +0826 +0827 %Save the output to a file +0828 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0829 fwrite(fid,output); +0830 fclose(fid); +0831 +0832 %Print the progress every 25 files +0833 if rem(i-1,25) == 0 +0834 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); +0835 progress=pad(progress,3,'left'); +0836 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0837 end +0838 end +0839 end +0840 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0841 else +0842 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); +0843 end +0844 +0845 +0846 %***Begin retrieving the output and putting together the resulting model 0847 -0848 -0849 %***Begin retrieving the output and putting together the resulting model -0850 -0851 fprintf('Parsing the HMM search results... '); -0852 %Retrieve matched genes from the HMMs -0853 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes -0854 genes=cell(3000,1); -0855 %Store the best score for a gene in a hash list (since it will be searching -0856 %many times) -0857 hTable = java.util.Hashtable; -0858 -0859 geneCounter=0; -0860 for i=1:numel(KOModel.rxns) -0861 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') -0862 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); -0863 beginMatches=false; -0864 while 1 -0865 %Get the next line -0866 tline = fgetl(fid); -0867 -0868 %Abort at end of file -0869 if ~ischar(tline) -0870 break; -0871 end -0872 -0873 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) -0874 break; -0875 end -0876 -0877 if beginMatches==false -0878 %This is how the listing of matches begins -0879 if any(strfind(tline,'E-value ')) -0880 %Read one more line that is only padding -0881 tline = fgetl(fid); -0882 beginMatches=true; -0883 end -0884 else -0885 %If matches should be read -0886 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) -0887 elements=regexp(tline,' ','split'); -0888 elements=elements(cellfun(@any,elements)); -0889 -0890 %Check if the match is below the treshhold -0891 score=str2double(elements{1}); -0892 gene=elements{9}; -0893 if score<=cutOff -0894 %If the score is exactly 0, change it to a very -0895 %small value to avoid NaN -0896 if score==0 -0897 score=10^-250; -0898 end -0899 %Check if the gene is added already and, is so, get -0900 %the best score for it -0901 I=hTable.get(gene); -0902 if any(I) -0903 koGeneMat(i,I)=score; -0904 else -0905 geneCounter=geneCounter+1; -0906 %The gene was not present yet so add it -0907 hTable.put(gene,geneCounter); -0908 genes{geneCounter}=gene; -0909 koGeneMat(i,geneCounter)=score; -0910 end -0911 end -0912 else -0913 break; -0914 end -0915 end -0916 end -0917 fclose(fid); -0918 end -0919 end -0920 fprintf('COMPLETE\n'); +0848 fprintf('Parsing the HMM search results... '); +0849 %Retrieve matched genes from the HMMs +0850 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes +0851 genes=cell(3000,1); +0852 %Store the best score for a gene in a hash list (since it will be searching +0853 %many times) +0854 hTable = java.util.Hashtable; +0855 +0856 geneCounter=0; +0857 for i=1:numel(KOModel.rxns) +0858 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') +0859 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); +0860 beginMatches=false; +0861 while 1 +0862 %Get the next line +0863 tline = fgetl(fid); +0864 +0865 %Abort at end of file +0866 if ~ischar(tline) +0867 break; +0868 end +0869 +0870 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) +0871 break; +0872 end +0873 +0874 if beginMatches==false +0875 %This is how the listing of matches begins +0876 if any(strfind(tline,'E-value ')) +0877 %Read one more line that is only padding +0878 tline = fgetl(fid); +0879 beginMatches=true; +0880 end +0881 else +0882 %If matches should be read +0883 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) +0884 elements=regexp(tline,' ','split'); +0885 elements=elements(cellfun(@any,elements)); +0886 +0887 %Check if the match is below the treshhold +0888 score=str2double(elements{1}); +0889 gene=elements{9}; +0890 if score<=cutOff +0891 %If the score is exactly 0, change it to a very +0892 %small value to avoid NaN +0893 if score==0 +0894 score=10^-250; +0895 end +0896 %Check if the gene is added already and, is so, get +0897 %the best score for it +0898 I=hTable.get(gene); +0899 if any(I) +0900 koGeneMat(i,I)=score; +0901 else +0902 geneCounter=geneCounter+1; +0903 %The gene was not present yet so add it +0904 hTable.put(gene,geneCounter); +0905 genes{geneCounter}=gene; +0906 koGeneMat(i,geneCounter)=score; +0907 end +0908 end +0909 else +0910 break; +0911 end +0912 end +0913 end +0914 fclose(fid); +0915 end +0916 end +0917 fprintf('COMPLETE\n'); +0918 +0919 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); +0920 koGeneMat=koGeneMat(:,1:geneCounter); 0921 -0922 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); -0923 koGeneMat=koGeneMat(:,1:geneCounter); -0924 -0925 %Remove the genes for each KO that are below minScoreRatioKO. -0926 for i=1:size(koGeneMat,1) -0927 J=find(koGeneMat(i,:)); -0928 if any(J) -0929 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; -0930 end -0931 end -0932 -0933 %Remove the KOs for each gene that are below minScoreRatioG -0934 for i=1:size(koGeneMat,2) -0935 J=find(koGeneMat(:,i)); -0936 if any(J) -0937 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; -0938 end -0939 end -0940 fprintf('COMPLETE\n'); -0941 -0942 fprintf('Adding gene annotations to the model... '); -0943 %Create the new model -0944 model.genes=genes(1:geneCounter); -0945 model.grRules=cell(numel(model.rxns),1); -0946 model.grRules(:)={''}; -0947 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); -0948 -0949 %Loop through the reactions and add the corresponding genes -0950 for i=1:numel(model.rxns) -0951 if isstruct(model.rxnMiriams{i}) -0952 %Get all KOs -0953 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); -0954 KOs=model.rxnMiriams{i}.value(I); -0955 %Find the KOs and the corresponding genes -0956 J=ismember(KOModel.rxns,KOs); -0957 [~, K]=find(koGeneMat(J,:)); -0958 -0959 if any(K) -0960 model.rxnGeneMat(i,K)=1; -0961 %Also delete KOs for which no genes were found. If no genes at -0962 %all were matched to the reaction it will be deleted later -0963 L=sum(koGeneMat(J,:),2)==0; -0964 model.rxnMiriams{i}.value(I(L))=[]; -0965 model.rxnMiriams{i}.name(I(L))=[]; -0966 end -0967 end -0968 end -0969 fprintf('COMPLETE\n'); -0970 -0971 %Find and delete all reactions without genes. This also removes genes that -0972 %are not used (which could happen because minScoreRatioG and -0973 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions -0974 %without genes are kept in the model. Spontaneous reactions with original -0975 %gene associations are treated in the same way, like the rest of the -0976 %reactions - if gene associations were removed during HMM search, such -0977 %reactions are deleted from the model -0978 if keepSpontaneous==true -0979 %Not the most comprise way to delete reactions without genes, but this -0980 %makes the code easier to understand. Firstly the non-spontaneous -0981 %reactions without genes are removed. After that, the second deletion -0982 %step removes spontaneous reactions, which had gene associations before -0983 %HMM search, but no longer have after it -0984 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); -0985 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); -0986 model=removeReactions(model,I,true,true); -0987 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); -0988 model=removeReactions(model,I,true,true); -0989 else -0990 %Just simply check for any new reactions without genes and remove -0991 %it -0992 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); -0993 I=~any(model.rxnGeneMat,2); -0994 model=removeReactions(model,I,true,true); -0995 end -0996 fprintf('COMPLETE\n'); -0997 -0998 fprintf('Constructing GPR rules and finalizing the model... '); -0999 %Add the gene associations as 'or' -1000 for i=1:numel(model.rxns) -1001 %Find the involved genes -1002 I=find(model.rxnGeneMat(i,:)); -1003 if any(I) -1004 model.grRules{i}=['(' model.genes{I(1)}]; -1005 for j=2:numel(I) -1006 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; -1007 end -1008 model.grRules{i}=[model.grRules{i} ')']; -1009 end -1010 end -1011 -1012 %Fix grRules and reconstruct rxnGeneMat -1013 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output -1014 model.grRules = grRules; -1015 model.rxnGeneMat = rxnGeneMat; -1016 -1017 %Fix subsystems -1018 emptySubSystems=cellfun(@isempty, model.subSystems); -1019 model.subSystems(emptySubSystems)={{''}}; -1020 -1021 %Add the description to the reactions -1022 for i=1:numel(model.rxns) -1023 if ~isempty(model.rxnNotes{i}) -1024 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); -1025 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); -1026 else -1027 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; -1028 end -1029 end -1030 %Remove the temp fasta file -1031 delete(fastaFile) -1032 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); -1033 end -1034 -1035 function files=listFiles(directory) -1036 %Supporter function to list the files in a directory and return them as a -1037 %cell array -1038 temp=dir(directory); -1039 files=cell(numel(temp),1); -1040 for i=1:numel(temp) -1041 files{i}=temp(i,1).name; -1042 end -1043 files=strrep(files,'.fa',''); -1044 files=strrep(files,'.hmm',''); -1045 files=strrep(files,'.out',''); -1046 files=strrep(files,'.faw',''); -1047 end +0922 %Remove the genes for each KO that are below minScoreRatioKO. +0923 for i=1:size(koGeneMat,1) +0924 J=find(koGeneMat(i,:)); +0925 if any(J) +0926 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; +0927 end +0928 end +0929 +0930 %Remove the KOs for each gene that are below minScoreRatioG +0931 for i=1:size(koGeneMat,2) +0932 J=find(koGeneMat(:,i)); +0933 if any(J) +0934 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; +0935 end +0936 end +0937 fprintf('COMPLETE\n'); +0938 +0939 fprintf('Adding gene annotations to the model... '); +0940 %Create the new model +0941 model.genes=genes(1:geneCounter); +0942 model.grRules=cell(numel(model.rxns),1); +0943 model.grRules(:)={''}; +0944 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); +0945 +0946 %Loop through the reactions and add the corresponding genes +0947 for i=1:numel(model.rxns) +0948 if isstruct(model.rxnMiriams{i}) +0949 %Get all KOs +0950 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); +0951 KOs=model.rxnMiriams{i}.value(I); +0952 %Find the KOs and the corresponding genes +0953 J=ismember(KOModel.rxns,KOs); +0954 [~, K]=find(koGeneMat(J,:)); +0955 +0956 if any(K) +0957 model.rxnGeneMat(i,K)=1; +0958 %Also delete KOs for which no genes were found. If no genes at +0959 %all were matched to the reaction it will be deleted later +0960 L=sum(koGeneMat(J,:),2)==0; +0961 model.rxnMiriams{i}.value(I(L))=[]; +0962 model.rxnMiriams{i}.name(I(L))=[]; +0963 end +0964 end +0965 end +0966 fprintf('COMPLETE\n'); +0967 +0968 %Find and delete all reactions without genes. This also removes genes that +0969 %are not used (which could happen because minScoreRatioG and +0970 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions +0971 %without genes are kept in the model. Spontaneous reactions with original +0972 %gene associations are treated in the same way, like the rest of the +0973 %reactions - if gene associations were removed during HMM search, such +0974 %reactions are deleted from the model +0975 if keepSpontaneous==true +0976 %Not the most comprise way to delete reactions without genes, but this +0977 %makes the code easier to understand. Firstly the non-spontaneous +0978 %reactions without genes are removed. After that, the second deletion +0979 %step removes spontaneous reactions, which had gene associations before +0980 %HMM search, but no longer have after it +0981 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); +0982 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); +0983 model=removeReactions(model,I,true,true); +0984 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); +0985 model=removeReactions(model,I,true,true); +0986 else +0987 %Just simply check for any new reactions without genes and remove +0988 %it +0989 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); +0990 I=~any(model.rxnGeneMat,2); +0991 model=removeReactions(model,I,true,true); +0992 end +0993 fprintf('COMPLETE\n'); +0994 +0995 fprintf('Constructing GPR rules and finalizing the model... '); +0996 %Add the gene associations as 'or' +0997 for i=1:numel(model.rxns) +0998 %Find the involved genes +0999 I=find(model.rxnGeneMat(i,:)); +1000 if any(I) +1001 model.grRules{i}=['(' model.genes{I(1)}]; +1002 for j=2:numel(I) +1003 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; +1004 end +1005 model.grRules{i}=[model.grRules{i} ')']; +1006 end +1007 end +1008 +1009 %Fix grRules and reconstruct rxnGeneMat +1010 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output +1011 model.grRules = grRules; +1012 model.rxnGeneMat = rxnGeneMat; +1013 +1014 %Fix subsystems +1015 emptySubSystems=cellfun(@isempty, model.subSystems); +1016 model.subSystems(emptySubSystems)={{''}}; +1017 +1018 %Add the description to the reactions +1019 for i=1:numel(model.rxns) +1020 if ~isempty(model.rxnNotes{i}) +1021 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); +1022 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); +1023 else +1024 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; +1025 end +1026 end +1027 %Remove the temp fasta file +1028 delete(fastaFile) +1029 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); +1030 end +1031 +1032 function files=listFiles(directory) +1033 %Supporter function to list the files in a directory and return them as a +1034 %cell array +1035 temp=dir(directory); +1036 files=cell(numel(temp),1); +1037 for i=1:numel(temp) +1038 files{i}=temp(i,1).name; +1039 end +1040 files=strrep(files,'.fa',''); +1041 files=strrep(files,'.hmm',''); +1042 files=strrep(files,'.out',''); +1043 files=strrep(files,'.faw',''); +1044 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/index.html b/doc/index.html index c87f7edd..25c6ef92 100644 --- a/doc/index.html +++ b/doc/index.html @@ -19,59 +19,59 @@

Matlab Directories

Matlab Files found in these Directories

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FSEOF expandModel getObjectiveString randomSampling
INITStepDesc exportForGit getPathwayDimensions ravenCobraWrapper
ManualINITTests exportModel getPhylDist readYAMLmodel
SBMLFromExcel exportModelToSIF getRxnsFromKEGG removeBadRxns
addExchangeRxns exportToExcelFormat getRxnsFromMetaCyc removeGenes
addGenesRaven exportToTabDelimited getRxnsInComp removeLowScoreGenes
addJavaPaths extractMiriam getToolboxVersion removeMets
addMets fillGaps getTransportRxns removeRavenFromPath
addRavenToUserPath fillGapsLargeTests getWSLpath removeReactions
addRxns fillGapsSmallTests getWoLFScores replaceMets
addRxnsGenesMets findGeneDeletions groupRxnScores reporterMetabolites
addSpontaneousRxns findRAVENroot guessComposition rescaleModelForINIT
addTransport fitParameters haveFlux reverseRxns
analyzeSampling fitTasks hmmerTests runDynamicFBA
blastPlusTests followChanged importExcelModel runINIT
buildEquation followFluxes importExportTests runPhenotypePhasePlane
canConsume ftINIT importModel runProductionEnvelope
canProduce ftINITFillGaps linkMetaCycKEGGRxns runRobustnessAnalysis
cdhitTests ftINITFillGapsForAllTasks loadSheet runSimpleOptKnock
changeGeneAssoc ftINITFillGapsMILP loadWorkbook scoreComplexModel
changeGrRules ftINITInternalAlg mafftTests scoreModel
changeRxns gapReport makeFakeBlastStructure setColorToMapRxns
checkFileExistence generateNewIds makeSomething setExchangeBounds
checkFunctionUniqueness getAllRxnsFromGenes mapCompartments setOmicDataToRxns
checkInstallation getAllSubGraphs mapPathwayRxnNames setParam
checkModelStruct getAllowedBounds markPathwayWithExpression setRavenSolver
checkProduction getBlast markPathwayWithFluxes setTitle
checkRxn getBlastFromExcel mergeCompartments simplifyModel
checkSolution getColorCodes mergeLinear solveLP
checkTasks getDiamond mergeModels solveQP
checkTasksTests getElementalBalance miriamTests solverTests
cleanSheet getEnzymesFromMetaCyc modelAbilitiesTests sortIdentifiers
closeModel getEssentialRxns modelConversionTests sortModel
colorPathway getExchangeRxns modelCurationTests standardizeGrRules
colorSubsystem getExprForRxnScore modelSortingTests standardizeModelFieldOrder
combineMetaCycKEGGModels getExpressionStructure optimizeProb startup
compareMultipleModels getFluxZ parallelPoolRAVEN tinitTests
compareRxnsGenesMetsComps getFullPath parseFormulas trimPathway
constructEquations getGenesFromGrRules parseHPA tutorial1
constructMultiFasta getGenesFromKEGG parseHPArna tutorial2
constructPathwayFromCelldesigner getINITModel parseRxnEqu tutorial2_solutions
constructS getINITSteps parseScores tutorial3
consumeSomething getIndexes parseTaskList tutorial3_solutions
contractModel getKEGGModelForOrganism permuteModel tutorial4
convertCharArray getMD5Hash plotAdditionalInfo tutorial4_solutions
convertToIrrev getMetaCycModelForOrganism plotLabels tutorial5
copyToComps getMetsFromKEGG predictLocalization tutorial6
deleteUnusedGenes getMetsFromMetaCyc prepINITModel updateDocumentation
diamondTests getMetsInComp printFluxes writeSheet
dispEM getMinNrFluxes printModel writeYAMLmodel
drawMap getModelFromHomology printModelStats
drawPathway getModelFromKEGG printOrange
editMiriam getModelFromMetaCyc qMOMA
+ FSEOF editMiriam getModelFromMetaCyc qMOMA + INITStepDesc expandModel getObjectiveString randomSampling + ManualINITTests exportForGit getPathwayDimensions ravenCobraWrapper + SBMLFromExcel exportModel getPhylDist readYAMLmodel + addExchangeRxns exportModelToSIF getRxnsFromKEGG removeBadRxns + addGenesRaven exportToExcelFormat getRxnsFromMetaCyc removeGenes + addIdentifierPrefix exportToTabDelimited getRxnsInComp removeIdentifierPrefix + addJavaPaths extractMiriam getToolboxVersion removeLowScoreGenes + addMets fillGaps getTransportRxns removeMets + addRavenToUserPath fillGapsLargeTests getWSLpath removeRavenFromPath + addRxns fillGapsSmallTests getWoLFScores removeReactions + addRxnsGenesMets findGeneDeletions groupRxnScores replaceMets + addSpontaneousRxns findRAVENroot guessComposition reporterMetabolites + addTransport fitParameters haveFlux rescaleModelForINIT + analyzeSampling fitTasks hmmerTests reverseRxns + blastPlusTests followChanged importExcelModel runDynamicFBA + buildEquation followFluxes importExportTests runINIT + canConsume ftINIT importModel runPhenotypePhasePlane + canProduce ftINITFillGaps linkMetaCycKEGGRxns runProductionEnvelope + cdhitTests ftINITFillGapsForAllTasks loadSheet runRobustnessAnalysis + changeGeneAssoc ftINITFillGapsMILP loadWorkbook runSimpleOptKnock + changeGrRules ftINITInternalAlg mafftTests scoreComplexModel + changeRxns gapReport makeFakeBlastStructure scoreModel + checkFileExistence generateNewIds makeSomething setColorToMapRxns + checkFunctionUniqueness getAllRxnsFromGenes mapCompartments setExchangeBounds + checkInstallation getAllSubGraphs mapPathwayRxnNames setOmicDataToRxns + checkModelStruct getAllowedBounds markPathwayWithExpression setParam + checkProduction getBlast markPathwayWithFluxes setRavenSolver + checkRxn getBlastFromExcel mergeCompartments setTitle + checkSolution getColorCodes mergeLinear simplifyModel + checkTasks getDiamond mergeModels solveLP + checkTasksTests getElementalBalance miriamTests solveQP + cleanSheet getEnzymesFromMetaCyc modelAbilitiesTests solverTests + closeModel getEssentialRxns modelConversionTests sortIdentifiers + colorPathway getExchangeRxns modelCurationTests sortModel + colorSubsystem getExprForRxnScore modelSortingTests standardizeGrRules + combineMetaCycKEGGModels getExpressionStructure optimizeProb standardizeModelFieldOrder + compareMultipleModels getFluxZ parallelPoolRAVEN startup + compareRxnsGenesMetsComps getFullPath parseFormulas tinitTests + constructEquations getGenesFromGrRules parseHPA trimPathway + constructMultiFasta getGenesFromKEGG parseHPArna tutorial1 + constructPathwayFromCelldesigner getINITModel parseRxnEqu tutorial2 + constructS getINITSteps parseScores tutorial2_solutions + consumeSomething getIndexes parseTaskList tutorial3 + contractModel getKEGGModelForOrganism permuteModel tutorial3_solutions + convertCharArray getMD5Hash plotAdditionalInfo tutorial4 + convertToIrrev getMetaCycModelForOrganism plotLabels tutorial4_solutions + copyToComps getMetsFromKEGG predictLocalization tutorial5 + deleteUnusedGenes getMetsFromMetaCyc prepINITModel tutorial6 + diamondTests getMetsInComp printFluxes updateDocumentation + dispEM getMinNrFluxes printModel writeSheet + drawMap getModelFromHomology printModelStats writeYAMLmodel + drawPathway getModelFromKEGG printOrange
Generated by m2html © 2005
diff --git a/doc/io/SBMLFromExcel.html b/doc/io/SBMLFromExcel.html index 03e9d4b2..4565e925 100644 --- a/doc/io/SBMLFromExcel.html +++ b/doc/io/SBMLFromExcel.html @@ -49,7 +49,7 @@

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

This function calls: +
  • exportModel exportModel
  • importExcelModel importExcelModel
  • This function is called by: @@ -87,7 +87,7 @@

    SOURCE CODE ^importExcelModel(fileName,false,printWarnings); 0029 printModelStats(model,printWarnings,false); -0030 exportModel(model,outputFileName,toCOBRA,true); +0030 exportModel(model,outputFileName,toCOBRA,true); 0031 end
    Generated by m2html © 2005
    diff --git a/doc/io/checkFileExistence.html b/doc/io/checkFileExistence.html index 3fd31ce7..d17fbfcd 100644 --- a/doc/io/checkFileExistence.html +++ b/doc/io/checkFileExistence.html @@ -61,7 +61,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • exportModel exportModel
  • getMD5Hash getMD5Hash
  • importModel importModel
  • +
  • exportModel exportModel
  • getMD5Hash getMD5Hash
  • importModel importModel
  • diff --git a/doc/io/exportForGit.html b/doc/io/exportForGit.html index 85d9acd8..74a90f62 100644 --- a/doc/io/exportForGit.html +++ b/doc/io/exportForGit.html @@ -24,7 +24,7 @@

    PURPOSE ^exportForGit

    SYNOPSIS ^

    -
    function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,cobraText)
    +
    function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,COBRAtext,COBRAstyle)

    DESCRIPTION ^

     exportForGit
    @@ -33,34 +33,43 @@ 

    DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • exportModel exportModel
  • exportToExcelFormat exportToExcelFormat
  • getToolboxVersion getToolboxVersion
  • sortIdentifiers exportModel
  • writeYAMLmodel writeYAMLmodel
  • This function is called by: @@ -69,158 +78,170 @@

    CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

    -
    0001 function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,cobraText)
    +
    0001 function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,COBRAtext,COBRAstyle)
     0002 % exportForGit
     0003 %   Generates a directory structure and populates this with model files, ready
     0004 %   to be commited to a Git(Hub) maintained model repository. Writes the model
     0005 %   as SBML L3V1 FBCv2 (both XML and YAML), COBRA text, Matlab MAT-file
     0006 %   orthologies in KEGG
     0007 %
    -0008 %   model               model structure in RAVEN format that should be exported
    -0009 %   prefix              prefix for all filenames (optional, default 'model')
    -0010 %   path                path where the directory structure should be generated
    -0011 %                       and populated with all files (optional, default to current
    -0012 %                       working directory)
    -0013 %   formats             cell array of strings specifying in what file formats
    -0014 %                       the model should be exported (optional, default to all
    -0015 %                       formats as {'mat', 'txt', 'xlsx', 'xml', 'yml'})
    -0016 %   mainBranchFlag      logical, if true, function will error if RAVEN (and
    -0017 %                       COBRA if detected) is/are not on the main branch.
    -0018 %                       (optional, default false)
    -0019 %   subDirs             logical, whether model files for each file format
    -0020 %                       should be written in its own subdirectory, with
    -0021 %                       'model' as parent directory, in accordance to the
    -0022 %                       standard-GEM repository format. If false, all files
    -0023 %                       are stored in the same folder. (optional, default true)
    -0024 %   cobraText           logical, whether the txt file should be in COBRA
    -0025 %                       Toolbox format using metabolite IDs, instead of
    -0026 %                       metabolite names and compartments. (optional, default
    -0027 %                       false)
    -0028 %
    -0029 % Usage: exportForGit(model,prefix,path,formats,mainBranchFlag)
    -0030 if nargin<7
    -0031     cobraText=false;
    -0032 end
    -0033 if nargin<6
    -0034     subDirs=true;
    -0035 end
    -0036 if nargin<5
    -0037     mainBranchFlag=false;
    -0038 end
    -0039 if nargin<4 || isempty(formats)
    -0040     formats={'mat', 'txt', 'xlsx', 'xml', 'yml'};
    -0041 else
    -0042     formats=convertCharArray(formats);
    -0043 end
    -0044 if any(~ismember(formats, {'mat', 'txt', 'xlsx', 'xml', 'yml'}))
    -0045     EM='Unknown file format defined. Only mat, txt, xlsx, xml and yml are allowed file formats.';
    -0046     error(EM)
    +0008 %   model               model structure in RAVEN format that should be
    +0009 %   exported
    +0010 %   prefix              prefix for all filenames (optional, default 'model')
    +0011 %   path                path where the directory structure should be
    +0012 %                       generated and populated with all files (optional,
    +0013 %                       default to current working directory)
    +0014 %   formats             cell array of strings specifying in what file
    +0015 %                       formats the model should be exported (optional,
    +0016 %                       default to all formats as {'mat', 'txt', 'xlsx',
    +0017 %                       'xml', 'yml'})
    +0018 %   mainBranchFlag      logical, if true, function will error if RAVEN (and
    +0019 %                       COBRA if detected) is/are not on the main branch.
    +0020 %                       (optional, default false)
    +0021 %   subDirs             logical, whether model files for each file format
    +0022 %                       should be written in its own subdirectory, with
    +0023 %                       'model' as parent directory, in accordance to the
    +0024 %                       standard-GEM repository format. If false, all files
    +0025 %                       are stored in the same folder. (optional, default
    +0026 %                       true)
    +0027 %   COBRAtext           logical, whether the txt file should be in COBRA
    +0028 %                       Toolbox format using metabolite IDs, instead of
    +0029 %                       metabolite names and compartments. (optional,
    +0030 %                       default false)
    +0031 %   COBRAstyle          true if COBRA-style prefixes should be added to all
    +0032 %                       identifiers in the SBML file: R_ for reactions, M_
    +0033 %                       for metabolites, G_ for genes and C_ for
    +0034 %                       compartments. If all identifiers of a particular
    +0035 %                       field already have the prefix, then no additional
    +0036 %                       prefixes are added. (optional, default false)
    +0037 %
    +0038 % Usage: exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,COBRAtext,COBRAstyle)
    +0039 if nargin<8
    +0040     COBRAstyle=false;
    +0041 end
    +0042 if nargin<7 || isempty(COBRAtext)
    +0043     COBRAtext=false;
    +0044 end
    +0045 if nargin<6 || isempty(subDirs)
    +0046     subDirs=true;
     0047 end
    -0048 if nargin<3
    -0049     path='.';
    -0050 else
    -0051     path=char(path);
    -0052 end
    -0053 if nargin<2
    -0054     prefix='model';
    -0055 else
    -0056     prefix=char(prefix);
    -0057 end
    -0058 
    -0059 %Sort reactions, metabolites and genes alphabetically
    -0060 model=sortIdentifiers(model);
    -0061 
    -0062 %Get versions or commits of toolboxes:
    -0063 RAVENver = getToolboxVersion('RAVEN','ravenCobraWrapper.m',mainBranchFlag);
    -0064 COBRAver = getToolboxVersion('COBRA','initCobraToolbox.m',mainBranchFlag);
    -0065 
    -0066 %Retrieve libSBML version:
    -0067 [ravenDir,prevDir]=findRAVENroot();
    -0068 try % 5.17.0 and newer
    -0069     libSBMLver=OutputSBML_RAVEN;
    -0070     libSBMLver=libSBMLver.libSBML_version_string;
    -0071 catch % before 5.17.0
    -0072     fid = fopen('tempModelForLibSBMLversion.xml','w+');
    -0073     fclose(fid);
    -0074     evalc('[~,~,libSBMLver]=TranslateSBML_RAVEN(''tempModelForLibSBMLversion.xml'',0,0)');
    -0075     libSBMLver=libSBMLver.libSBML_version_string;
    -0076     delete('tempModelForLibSBMLversion.xml');
    -0077 end
    -0078 
    -0079 % Make models folder, no warnings if folder already exists
    -0080 if subDirs
    -0081     path=fullfile(path,'model');
    -0082     filePath=strcat(path,filesep,{'txt','yml','mat','xlsx','xml'});
    -0083     [~,~,~]=mkdir(path);
    -0084     for i = 1:length(formats)
    -0085         [~,~,~]=mkdir(fullfile(path,formats{i}));
    -0086     end
    -0087 else
    -0088     filePath=cell(1,5); filePath(:)={path};
    +0048 if nargin<5 || isempty(mainBranchFlag)
    +0049     mainBranchFlag=false;
    +0050 end
    +0051 if nargin<4 || isempty(formats)
    +0052     formats={'mat', 'txt', 'xlsx', 'xml', 'yml'};
    +0053 else
    +0054     formats=convertCharArray(formats);
    +0055 end
    +0056 if any(~ismember(formats, {'mat', 'txt', 'xlsx', 'xml', 'yml'}))
    +0057     EM='Unknown file format defined. Only mat, txt, xlsx, xml and yml are allowed file formats.';
    +0058     error(EM)
    +0059 end
    +0060 if nargin<3 || isempty(path)
    +0061     path='.';
    +0062 else
    +0063     path=char(path);
    +0064 end
    +0065 if nargin<2 || isempty(prefix)
    +0066     prefix='model';
    +0067 else
    +0068     prefix=char(prefix);
    +0069 end
    +0070 
    +0071 %Sort reactions, metabolites and genes alphabetically
    +0072 model=sortIdentifiers(model);
    +0073 
    +0074 %Get versions or commits of toolboxes:
    +0075 RAVENver = getToolboxVersion('RAVEN','ravenCobraWrapper.m',mainBranchFlag);
    +0076 COBRAver = getToolboxVersion('COBRA','initCobraToolbox.m',mainBranchFlag);
    +0077 
    +0078 %Retrieve libSBML version:
    +0079 [ravenDir,prevDir]=findRAVENroot();
    +0080 try % 5.17.0 and newer
    +0081     libSBMLver=OutputSBML_RAVEN;
    +0082     libSBMLver=libSBMLver.libSBML_version_string;
    +0083 catch % before 5.17.0
    +0084     fid = fopen('tempModelForLibSBMLversion.xml','w+');
    +0085     fclose(fid);
    +0086     evalc('[~,~,libSBMLver]=TranslateSBML_RAVEN(''tempModelForLibSBMLversion.xml'',0,0)');
    +0087     libSBMLver=libSBMLver.libSBML_version_string;
    +0088     delete('tempModelForLibSBMLversion.xml');
     0089 end
     0090 
    -0091 
    -0092 % Write TXT format
    -0093 if ismember('txt', formats)
    -0094     fid=fopen(fullfile(filePath{1},strcat(prefix,'.txt')),'w');
    -0095     if cobraText==true
    -0096         eqns=constructEquations(model,model.rxns,false,false,false);
    -0097         eqns=strrep(eqns,' => ','  -> ');
    -0098         eqns=strrep(eqns,' <=> ','  <=> ');
    -0099         eqns=regexprep(eqns,'> $','>');
    -0100         grRules=regexprep(model.grRules,'\((?!\()','( ');
    -0101         grRules=regexprep(grRules,'(?<!\))\)',' )');
    -0102     else
    -0103         eqns=constructEquations(model,model.rxns);
    -0104         grRules=model.grRules;
    -0105     end
    -0106     fprintf(fid, 'Rxn name\tFormula\tGene-reaction association\tLB\tUB\tObjective\n');
    -0107     for i = 1:numel(model.rxns)
    -0108         fprintf(fid, '%s\t', model.rxns{i});
    -0109         fprintf(fid, '%s \t', eqns{i});
    -0110         fprintf(fid, '%s\t', grRules{i});
    -0111         fprintf(fid, '%6.2f\t%6.2f\t%6.2f\n', model.lb(i), model.ub(i), model.c(i));
    -0112     end
    -0113     fclose(fid);
    -0114 end
    -0115 
    -0116 % Write YML format
    -0117 if ismember('yml', formats)
    -0118     writeYAMLmodel(model,fullfile(filePath{2},strcat(prefix,'.yml')));
    -0119 end
    -0120 
    -0121 % Write MAT format
    -0122 if ismember('mat', formats)
    -0123     save(fullfile(filePath{3},strcat(prefix,'.mat')),'model');
    -0124 end
    -0125 
    -0126 % Write XLSX format
    -0127 if ismember('xlsx', formats)
    -0128     exportToExcelFormat(model,fullfile(filePath{4},strcat(prefix,'.xlsx')));
    -0129 end
    -0130 
    -0131 % Write XML format
    -0132 if ismember('xml', formats)
    -0133         exportModel(model,fullfile(filePath{5},strcat(prefix,'.xml')));
    -0134 end
    -0135 
    -0136 %Save file with versions:
    -0137 fid = fopen(fullfile(path,'dependencies.txt'),'wt');
    -0138 fprintf(fid,['MATLAB\t' version '\n']);
    -0139 fprintf(fid,['libSBML\t' libSBMLver '\n']);
    -0140 fprintf(fid,['RAVEN_toolbox\t' RAVENver '\n']);
    -0141 if ~isempty(COBRAver)
    -0142     fprintf(fid,['COBRA_toolbox\t' COBRAver '\n']);
    -0143 end
    -0144 if isfield(model,'modelVersion')
    -0145     fields = fieldnames(model.modelVersion);
    -0146     for i = 1:length(fields)
    -0147         value = model.modelVersion.(fields{i});
    -0148         fprintf(fid,[fields{i} '\t' num2str(value) '\n']);
    -0149     end
    -0150 end
    -0151 fclose(fid);
    -0152 end
    +0091 % Make models folder, no warnings if folder already exists +0092 if subDirs +0093 path=fullfile(path,'model'); +0094 filePath=strcat(path,filesep,{'txt','yml','mat','xlsx','xml'}); +0095 [~,~,~]=mkdir(path); +0096 for i = 1:length(formats) +0097 [~,~,~]=mkdir(fullfile(path,formats{i})); +0098 end +0099 else +0100 filePath=cell(1,5); filePath(:)={path}; +0101 end +0102 +0103 +0104 % Write TXT format +0105 if ismember('txt', formats) +0106 fid=fopen(fullfile(filePath{1},strcat(prefix,'.txt')),'w'); +0107 if COBRAtext==true +0108 eqns=constructEquations(model,model.rxns,false,false,false); +0109 eqns=strrep(eqns,' => ',' -> '); +0110 eqns=strrep(eqns,' <=> ',' <=> '); +0111 eqns=regexprep(eqns,'> $','>'); +0112 grRules=regexprep(model.grRules,'\((?!\()','( '); +0113 grRules=regexprep(grRules,'(?<!\))\)',' )'); +0114 else +0115 eqns=constructEquations(model,model.rxns); +0116 grRules=model.grRules; +0117 end +0118 fprintf(fid, 'Rxn name\tFormula\tGene-reaction association\tLB\tUB\tObjective\n'); +0119 for i = 1:numel(model.rxns) +0120 fprintf(fid, '%s\t', model.rxns{i}); +0121 fprintf(fid, '%s \t', eqns{i}); +0122 fprintf(fid, '%s\t', grRules{i}); +0123 fprintf(fid, '%6.2f\t%6.2f\t%6.2f\n', model.lb(i), model.ub(i), model.c(i)); +0124 end +0125 fclose(fid); +0126 end +0127 +0128 % Write YML format +0129 if ismember('yml', formats) +0130 writeYAMLmodel(model,fullfile(filePath{2},strcat(prefix,'.yml'))); +0131 end +0132 +0133 % Write MAT format +0134 if ismember('mat', formats) +0135 save(fullfile(filePath{3},strcat(prefix,'.mat')),'model'); +0136 end +0137 +0138 % Write XLSX format +0139 if ismember('xlsx', formats) +0140 exportToExcelFormat(model,fullfile(filePath{4},strcat(prefix,'.xlsx'))); +0141 end +0142 +0143 % Write XML format +0144 if ismember('xml', formats) +0145 exportModel(model,fullfile(filePath{5},strcat(prefix,'.xml')),COBRAstyle); +0146 end +0147 +0148 %Save file with versions: +0149 fid = fopen(fullfile(path,'dependencies.txt'),'wt'); +0150 fprintf(fid,['MATLAB\t' version '\n']); +0151 fprintf(fid,['libSBML\t' libSBMLver '\n']); +0152 fprintf(fid,['RAVEN_toolbox\t' RAVENver '\n']); +0153 if ~isempty(COBRAver) +0154 fprintf(fid,['COBRA_toolbox\t' COBRAver '\n']); +0155 end +0156 if isfield(model,'modelVersion') +0157 fields = fieldnames(model.modelVersion); +0158 for i = 1:length(fields) +0159 value = model.modelVersion.(fields{i}); +0160 fprintf(fid,[fields{i} '\t' num2str(value) '\n']); +0161 end +0162 end +0163 fclose(fid); +0164 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/exportModel.html b/doc/io/exportModel.html index 8e175fff..34b6f359 100644 --- a/doc/io/exportModel.html +++ b/doc/io/exportModel.html @@ -24,35 +24,36 @@

    PURPOSE ^exportModel

    SYNOPSIS ^

    -
    function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds)
    +
    function exportModel(model,fileName,neverPrefix,supressWarnings,sortIds)

    DESCRIPTION ^

     exportModel
        Exports a constraint-based model to an SBML file (L3V1 FBCv2)
     
    -   Input:
    + Input:
        model               a model structure
        fileName            filename to export the model to. A dialog window
                            will open if no file name is specified.
    -   exportGeneComplexes true if gene complexes (all gene sets linked with
    -                       AND relationship) should be recognised and exported
    -                       (optional, default false)
    -   supressWarnings     true if warnings should be supressed (optional, default
    -                       false)
    +   neverPrefix         true if prefixes are never added to identifiers,
    +                       even if start with e.g. digits. This might result
    +                       in invalid SBML files (optional, default false)
    +   supressWarnings     true if warnings should be supressed. This might
    +                       results in invalid SBML files, as no checks are
    +                       performed (optional, default false)
        sortIds             logical whether metabolites, reactions and genes
                            should be sorted alphabetically by their
                            identifiers (optional, default false)
     
    - Usage: exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds)
    + Usage: exportModel(model,fileName,neverPrefix,supressWarnings,sortIds)

    CROSS-REFERENCE INFORMATION ^

    This function calls: +
  • checkFileExistence checkFileExistence
  • exportModel exportModel
  • sortIdentifiers exportModel
  • This function is called by: +
  • SBMLFromExcel SBMLFromExcel
  • exportForGit exportForGit
  • exportModel exportModel
  • SUBFUNCTIONS ^

    @@ -60,817 +61,796 @@

    SUBFUNCTIONS ^function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions)
  • function miriamString=getMiriam(miriamStruct)
  • function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i)
  • function vecT = columnVector(vec)
  • SOURCE CODE ^

    -
    0001 function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds)
    +
    0001 function exportModel(model,fileName,neverPrefix,supressWarnings,sortIds)
     0002 % exportModel
     0003 %   Exports a constraint-based model to an SBML file (L3V1 FBCv2)
     0004 %
    -0005 %   Input:
    +0005 % Input:
     0006 %   model               a model structure
     0007 %   fileName            filename to export the model to. A dialog window
     0008 %                       will open if no file name is specified.
    -0009 %   exportGeneComplexes true if gene complexes (all gene sets linked with
    -0010 %                       AND relationship) should be recognised and exported
    -0011 %                       (optional, default false)
    -0012 %   supressWarnings     true if warnings should be supressed (optional, default
    -0013 %                       false)
    -0014 %   sortIds             logical whether metabolites, reactions and genes
    -0015 %                       should be sorted alphabetically by their
    -0016 %                       identifiers (optional, default false)
    -0017 %
    -0018 % Usage: exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds)
    -0019 if nargin<2 || isempty(fileName)
    -0020     [fileName, pathName] = uiputfile({'*.xml;*.sbml'}, 'Select file for model export',[model.id '.xml']);
    -0021     if fileName == 0
    -0022         error('You should provide a file location')
    -0023     else
    -0024         fileName = fullfile(pathName,fileName);
    -0025     end
    -0026 end
    -0027 fileName=char(fileName);
    -0028 if nargin<3
    -0029     exportGeneComplexes=false;
    -0030 end
    -0031 if nargin<4
    -0032     supressWarnings=false;
    -0033 end
    -0034 if nargin<5
    -0035     sortIds=false;
    -0036 end
    -0037 if sortIds==true
    -0038     model=sortIdentifiers(model);
    -0039 end
    -0040 
    -0041 %If no subSystems are defined, then no need to use groups package
    -0042 if isfield(model,'subSystems')
    -0043     modelHasSubsystems=true;
    -0044 else
    -0045     modelHasSubsystems=false;
    -0046 end
    -0047 
    -0048 %The default SBML format settings, which are used as input for appropriate
    -0049 %libSBML functions to generate the blank SBML model structure before using
    -0050 %exporting in with OutputSBML to xml file
    -0051 sbmlLevel=3;
    -0052 sbmlVersion=1;
    -0053 sbmlPackages={'fbc'};
    -0054 sbmlPackageVersions=2;
    -0055 if modelHasSubsystems
    -0056     sbmlPackages={sbmlPackages,'groups'};
    -0057     sbmlPackageVersions=[sbmlPackageVersions,1];
    -0058 end
    -0059 
    -0060 %Check if the "unconstrained" field is still present. This shows if
    -0061 %exchange metabolites have been removed
    -0062 if ~isfield(model,'unconstrained')
    -0063     model.unconstrained=zeros(numel(model.mets),1);
    +0009 %   neverPrefix         true if prefixes are never added to identifiers,
    +0010 %                       even if start with e.g. digits. This might result
    +0011 %                       in invalid SBML files (optional, default false)
    +0012 %   supressWarnings     true if warnings should be supressed. This might
    +0013 %                       results in invalid SBML files, as no checks are
    +0014 %                       performed (optional, default false)
    +0015 %   sortIds             logical whether metabolites, reactions and genes
    +0016 %                       should be sorted alphabetically by their
    +0017 %                       identifiers (optional, default false)
    +0018 %
    +0019 % Usage: exportModel(model,fileName,neverPrefix,supressWarnings,sortIds)
    +0020 
    +0021 if nargin<2 || isempty(fileName)
    +0022     [fileName, pathName] = uiputfile({'*.xml;*.sbml'}, 'Select file for model export',[model.id '.xml']);
    +0023     if fileName == 0
    +0024         error('You should provide a file location')
    +0025     else
    +0026         fileName = fullfile(pathName,fileName);
    +0027     end
    +0028 end
    +0029 fileName=char(fileName);
    +0030 if nargin<3 || isempty(neverPrefix)
    +0031     neverPrefix=false;
    +0032 end
    +0033 if nargin<4 || isempty(supressWarnings)
    +0034     supressWarnings=false;
    +0035 end
    +0036 if nargin<5 || isempty(sortIds)
    +0037     sortIds=false;
    +0038 end
    +0039 if sortIds==true
    +0040     model=sortIdentifiers(model);
    +0041 end
    +0042 
    +0043 if isfield(model,'ec')
    +0044     warning("exportModel does not store information from the 'model.ec' structure. Use 'writeYAMLmodel(model)' to export all content from a GECKO model.")
    +0045 end
    +0046 
    +0047 %If no subSystems are defined, then no need to use groups package
    +0048 if isfield(model,'subSystems')
    +0049     modelHasSubsystems=true;
    +0050 else
    +0051     modelHasSubsystems=false;
    +0052 end
    +0053 
    +0054 %The default SBML format settings, which are used as input for appropriate
    +0055 %libSBML functions to generate the blank SBML model structure before using
    +0056 %exporting in with OutputSBML to xml file
    +0057 sbmlLevel=3;
    +0058 sbmlVersion=1;
    +0059 sbmlPackages={'fbc'};
    +0060 sbmlPackageVersions=2;
    +0061 if modelHasSubsystems
    +0062     sbmlPackages={sbmlPackages,'groups'};
    +0063     sbmlPackageVersions=[sbmlPackageVersions,1];
     0064 end
     0065 
    -0066 %If model id and name do not exist, make sure that default
    -0067 %strings are included
    -0068 if ~isfield(model,'id')
    -0069     fprintf('WARNING: The model is missing the "id" field. Uses "blankID". \n');
    -0070     model.id='blankID';
    -0071 end
    -0072 if ~isfield(model,'name')
    -0073     fprintf('WARNING: The model is missing the "name" field. Uses "blankName". \n');
    -0074     model.name='blankName';
    -0075 end
    -0076 
    -0077 %Check the model structure
    -0078 if supressWarnings==false
    -0079     checkModelStruct(model,false);
    -0080 end
    -0081 
    -0082 %Add several blank fields, if they do not exist already. This is to reduce
    -0083 %the number of conditions below
    -0084 if ~isfield(model,'compMiriams')
    -0085     model.compMiriams=cell(numel(model.comps),1);
    -0086 end
    -0087 if ~isfield(model,'inchis')
    -0088     model.inchis=cell(numel(model.mets),1);
    +0066 %Check if the "unconstrained" field is still present. This shows if
    +0067 %exchange metabolites have been removed
    +0068 if ~isfield(model,'unconstrained')
    +0069     model.unconstrained=zeros(numel(model.mets),1);
    +0070 end
    +0071 
    +0072 %If model id and name do not exist, make sure that default
    +0073 %strings are included
    +0074 if ~isfield(model,'id')
    +0075     fprintf('WARNING: The model is missing the "id" field. Uses "blankID". \n');
    +0076     model.id='blankID';
    +0077 end
    +0078 if ~isfield(model,'name')
    +0079     fprintf('WARNING: The model is missing the "name" field. Uses "blankName". \n');
    +0080     model.name='blankName';
    +0081 end
    +0082 
    +0083 % Add prefixes if required
    +0084 if ~neverPrefix
    +0085     [model,hasChanged] = addIdentifierPrefix(model);
    +0086     dispEM(['The following fields have one or more entries that do not start '...
    +0087         'with a letter or _ (conflicting with SBML specifications). Prefixes '...
    +0088         'are added to all entries in those fields:'],false,hasChanged)
     0089 end
    -0090 if ~isfield(model,'metFormulas')
    -0091     model.metFormulas=cell(numel(model.mets),1);
    -0092 end
    -0093 if ~isfield(model,'metMiriams')
    -0094     model.metMiriams=cell(numel(model.mets),1);
    -0095 end
    -0096 if ~isfield(model,'geneMiriams') && isfield(model,'genes')
    -0097     model.geneMiriams=cell(numel(model.genes),1);
    -0098 end
    -0099 if ~isfield(model,'geneShortNames') && isfield(model,'genes')
    -0100     model.geneShortNames=cell(numel(model.genes),1);
    -0101 end
    -0102 if ~isfield(model,'subSystems')
    -0103     model.subSystems=cell(numel(model.rxns),1);
    -0104 end
    -0105 if ~isfield(model,'eccodes')
    -0106     model.eccodes=cell(numel(model.rxns),1);
    -0107 end
    -0108 if ~isfield(model,'rxnReferences')
    -0109     model.rxnReferences=cell(numel(model.rxns),1);
    -0110 end
    -0111 if ~isfield(model,'rxnConfidenceScores')
    -0112     model.rxnConfidenceScores=NaN(numel(model.rxns),1);
    -0113 end
    -0114 if ~isfield(model,'rxnNotes')
    -0115     model.rxnNotes=cell(numel(model.rxns),1);
    -0116 end
    -0117 if ~isfield(model,'rxnMiriams')
    -0118     model.rxnMiriams=cell(numel(model.rxns),1);
    -0119 end
    -0120 
    -0121 if sbmlLevel<3
    -0122     %Check if genes have associated compartments
    -0123     if ~isfield(model,'geneComps') && isfield(model,'genes')
    -0124         if supressWarnings==false
    -0125             EM='There are no compartments specified for genes. All genes will be assigned to the first compartment. This is because the SBML structure requires all elements to be assigned to a compartment';
    -0126             dispEM(EM,false);
    -0127         end
    -0128         model.geneComps=ones(numel(model.genes),1);
    -0129     end
    +0090 
    +0091 %Check the model structure
    +0092 if supressWarnings==false
    +0093     checkModelStruct(model);
    +0094 end
    +0095 
    +0096 %Add several blank fields, if they do not exist already. This is to reduce
    +0097 %the number of conditions below
    +0098 if ~isfield(model,'compMiriams')
    +0099     model.compMiriams=cell(numel(model.comps),1);
    +0100 end
    +0101 if ~isfield(model,'inchis')
    +0102     model.inchis=cell(numel(model.mets),1);
    +0103 end
    +0104 if ~isfield(model,'metFormulas')
    +0105     model.metFormulas=cell(numel(model.mets),1);
    +0106 end
    +0107 if ~isfield(model,'metMiriams')
    +0108     model.metMiriams=cell(numel(model.mets),1);
    +0109 end
    +0110 if ~isfield(model,'geneMiriams') && isfield(model,'genes')
    +0111     model.geneMiriams=cell(numel(model.genes),1);
    +0112 end
    +0113 if ~isfield(model,'geneShortNames') && isfield(model,'genes')
    +0114     model.geneShortNames=cell(numel(model.genes),1);
    +0115 end
    +0116 if ~isfield(model,'proteins') && isfield(model,'genes')
    +0117     model.proteins=cell(numel(model.genes),1);
    +0118 end
    +0119 if ~isfield(model,'subSystems')
    +0120     model.subSystems=cell(numel(model.rxns),1);
    +0121 end
    +0122 if ~isfield(model,'eccodes')
    +0123     model.eccodes=cell(numel(model.rxns),1);
    +0124 end
    +0125 if ~isfield(model,'rxnReferences')
    +0126     model.rxnReferences=cell(numel(model.rxns),1);
    +0127 end
    +0128 if ~isfield(model,'rxnConfidenceScores')
    +0129     model.rxnConfidenceScores=NaN(numel(model.rxns),1);
     0130 end
    -0131 
    -0132 %Convert ids to SBML-convenient format. This is to avoid the data loss when
    -0133 %unsupported characters are included in ids. Here we are using part from
    -0134 %convertSBMLID, originating from the COBRA Toolbox
    -0135 model.rxns=regexprep(model.rxns,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    -0136 model.mets=regexprep(model.mets,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    -0137 model.comps=regexprep(model.comps,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    -0138 if isfield(model,'genes')
    -0139     problemGenes=find(~cellfun('isempty',regexp(model.genes,'([^0-9_a-zA-Z])')));
    -0140     originalGenes=model.genes(problemGenes);
    -0141     replacedGenes=regexprep(model.genes(problemGenes),'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    -0142     model.genes(problemGenes)=replacedGenes;
    -0143     for i=1:numel(problemGenes)
    -0144         model.grRules = regexprep(model.grRules, ['(^|\s|\()' originalGenes{i} '($|\s|\))'], ['$1' replacedGenes{i} '$2']);
    -0145     end
    -0146 end
    -0147 
    -0148 %Generate an empty SBML structure
    -0149 modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0150 modelSBML.metaid=model.id;
    -0151 modelSBML.id=regexprep(model.id,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    -0152 modelSBML.name=model.name;
    -0153 
    -0154 if isfield(model,'annotation')
    -0155     if isfield(model.annotation,'note')
    -0156         modelSBML.notes=['<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>',regexprep(model.annotation.note,'<p>|</p>',''),'</p></body></notes>'];
    -0157     end
    -0158 else
    -0159     modelSBML.notes='<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>This file was generated using the exportModel function in RAVEN Toolbox 2 and OutputSBML in libSBML </p></body></notes>';
    -0160 end
    -0161 
    -0162 if isfield(model,'annotation')
    -0163     nameString='';
    -0164     if isfield(model.annotation,'familyName')
    -0165         if ~isempty(model.annotation.familyName)
    -0166             nameString=['<vCard:Family>' model.annotation.familyName '</vCard:Family>'];
    -0167         end
    -0168     end
    -0169     if isfield(model.annotation,'givenName')
    -0170         if ~isempty(model.annotation.givenName)
    -0171             nameString=[nameString '<vCard:Given>' model.annotation.givenName '</vCard:Given>'];
    -0172         end
    -0173     end
    -0174     email='';
    -0175     if isfield(model.annotation,'email')
    -0176         if ~isempty(model.annotation.email)
    -0177             email=['<vCard:EMAIL>' model.annotation.email '</vCard:EMAIL>'];
    -0178         end
    -0179     end
    -0180     org='';
    -0181     if isfield(model.annotation,'organization')
    -0182         if ~isempty(model.annotation.organization)
    -0183             org=['<vCard:ORG rdf:parseType="Resource"><vCard:Orgname>' model.annotation.organization '</vCard:Orgname></vCard:ORG>'];
    +0131 if ~isfield(model,'rxnNotes')
    +0132     model.rxnNotes=cell(numel(model.rxns),1);
    +0133 end
    +0134 if ~isfield(model,'rxnMiriams')
    +0135     model.rxnMiriams=cell(numel(model.rxns),1);
    +0136 end
    +0137 
    +0138 if sbmlLevel<3
    +0139     %Check if genes have associated compartments
    +0140     if ~isfield(model,'geneComps') && isfield(model,'genes')
    +0141         if supressWarnings==false
    +0142             EM='There are no compartments specified for genes. All genes will be assigned to the first compartment. This is because the SBML structure requires all elements to be assigned to a compartment';
    +0143             dispEM(EM,false);
    +0144         end
    +0145         model.geneComps=ones(numel(model.genes),1);
    +0146     end
    +0147 end
    +0148 
    +0149 %Convert ids to SBML-convenient format. This is to avoid the data loss when
    +0150 %unsupported characters are included in ids. Here we are using part from
    +0151 %convertSBMLID, originating from the COBRA Toolbox
    +0152 model.rxns=regexprep(model.rxns,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    +0153 model.mets=regexprep(model.mets,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    +0154 model.comps=regexprep(model.comps,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    +0155 if isfield(model,'genes')
    +0156     problemGenes=find(~cellfun('isempty',regexp(model.genes,'([^0-9_a-zA-Z])')));
    +0157     originalGenes=model.genes(problemGenes);
    +0158     replacedGenes=regexprep(model.genes(problemGenes),'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    +0159     model.genes(problemGenes)=replacedGenes;
    +0160     for i=1:numel(problemGenes)
    +0161         model.grRules = regexprep(model.grRules, ['(^|\s|\()' originalGenes{i} '($|\s|\))'], ['$1' replacedGenes{i} '$2']);
    +0162     end
    +0163 end
    +0164 
    +0165 %Generate an empty SBML structure
    +0166 modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0167 modelSBML.metaid=model.id;
    +0168 modelSBML.id=regexprep(model.id,'([^0-9_a-zA-Z])','__${num2str($1+0)}__');
    +0169 modelSBML.name=model.name;
    +0170 
    +0171 if isfield(model,'annotation')
    +0172     if isfield(model.annotation,'note')
    +0173         modelSBML.notes=['<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>',regexprep(model.annotation.note,'<p>|</p>',''),'</p></body></notes>'];
    +0174     end
    +0175 else
    +0176     modelSBML.notes='<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>This file was generated using the exportModel function in RAVEN Toolbox 2 and OutputSBML in libSBML </p></body></notes>';
    +0177 end
    +0178 
    +0179 if isfield(model,'annotation')
    +0180     nameString='';
    +0181     if isfield(model.annotation,'familyName')
    +0182         if ~isempty(model.annotation.familyName)
    +0183             nameString=['<vCard:Family>' model.annotation.familyName '</vCard:Family>'];
     0184         end
     0185     end
    -0186     if ~isempty(nameString) || ~isempty(email) || ~isempty(org) % Only fill .annotation if ownership data is provided
    -0187         modelSBML.annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.id '">'];
    -0188         modelSBML.annotation=[modelSBML.annotation '<dc:creator><rdf:Bag><rdf:li rdf:parseType="Resource">'];
    -0189         if ~isempty(nameString)
    -0190             modelSBML.annotation=[modelSBML.annotation '<vCard:N rdf:parseType="Resource">' nameString '</vCard:N>'];
    -0191         end
    -0192         modelSBML.annotation=[modelSBML.annotation email org '</rdf:li></rdf:Bag></dc:creator>'];
    -0193         modelSBML.annotation=[modelSBML.annotation '<dcterms:created rdf:parseType="Resource">'...
    -0194             '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:created><dcterms:modified rdf:parseType="Resource">'...
    -0195             '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:modified>'];
    -0196         if isfield(model.annotation,'taxonomy')
    -0197             modelSBML.annotation=[modelSBML.annotation '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="https://identifiers.org/taxonomy/' regexprep(model.annotation.taxonomy,'taxonomy/','') '"/></rdf:Bag></bqbiol:is>'];
    -0198         end
    -0199         modelSBML.annotation=[modelSBML.annotation '</rdf:Description></rdf:RDF></annotation>'];
    -0200     end
    -0201 end
    -0202 
    -0203 %Prepare compartments
    -0204 for i=1:numel(model.comps)
    -0205     %Add the default values, as these will be the same in all entries
    -0206     if i==1
    -0207         if isfield(modelSBML.compartment, 'sboTerm')
    -0208             modelSBML.compartment(i).sboTerm=290;
    -0209         end
    -0210         if isfield(modelSBML.compartment, 'spatialDimensions')
    -0211             modelSBML.compartment(i).spatialDimensions=3;
    -0212         end
    -0213         if isfield(modelSBML.compartment, 'size')
    -0214             modelSBML.compartment(i).size=1;
    +0186     if isfield(model.annotation,'givenName')
    +0187         if ~isempty(model.annotation.givenName)
    +0188             nameString=[nameString '<vCard:Given>' model.annotation.givenName '</vCard:Given>'];
    +0189         end
    +0190     end
    +0191     email='';
    +0192     if isfield(model.annotation,'email')
    +0193         if ~isempty(model.annotation.email)
    +0194             email=['<vCard:EMAIL>' model.annotation.email '</vCard:EMAIL>'];
    +0195         end
    +0196     end
    +0197     org='';
    +0198     if isfield(model.annotation,'organization')
    +0199         if ~isempty(model.annotation.organization)
    +0200             org=['<vCard:ORG rdf:parseType="Resource"><vCard:Orgname>' model.annotation.organization '</vCard:Orgname></vCard:ORG>'];
    +0201         end
    +0202     end
    +0203     if ~isempty(nameString) || ~isempty(email) || ~isempty(org) % Only fill .annotation if ownership data is provided
    +0204         modelSBML.annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.id '">'];
    +0205         modelSBML.annotation=[modelSBML.annotation '<dc:creator><rdf:Bag><rdf:li rdf:parseType="Resource">'];
    +0206         if ~isempty(nameString)
    +0207             modelSBML.annotation=[modelSBML.annotation '<vCard:N rdf:parseType="Resource">' nameString '</vCard:N>'];
    +0208         end
    +0209         modelSBML.annotation=[modelSBML.annotation email org '</rdf:li></rdf:Bag></dc:creator>'];
    +0210         modelSBML.annotation=[modelSBML.annotation '<dcterms:created rdf:parseType="Resource">'...
    +0211             '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:created><dcterms:modified rdf:parseType="Resource">'...
    +0212             '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:modified>'];
    +0213         if isfield(model.annotation,'taxonomy')
    +0214             modelSBML.annotation=[modelSBML.annotation '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="https://identifiers.org/taxonomy/' regexprep(model.annotation.taxonomy,'taxonomy/','') '"/></rdf:Bag></bqbiol:is>'];
     0215         end
    -0216         if isfield(modelSBML.compartment, 'constant')
    -0217             modelSBML.compartment(i).constant=1;
    -0218         end
    -0219         if isfield(modelSBML.compartment, 'isSetSize')
    -0220             modelSBML.compartment(i).isSetSize=1;
    -0221         end
    -0222         if isfield(modelSBML.compartment, 'isSetSpatialDimensions')
    -0223             modelSBML.compartment(i).isSetSpatialDimensions=1;
    -0224         end
    -0225     end
    -0226     %Copy the default values to the next entry as long as it is not the
    -0227     %last one
    -0228     if i<numel(model.comps)
    -0229         modelSBML.compartment(i+1)=modelSBML.compartment(i);
    -0230     end
    -0231     
    -0232     if isfield(modelSBML.compartment,'metaid')
    -0233         if regexp(model.comps{i},'^[^a-zA-Z_]')
    -0234             EM='The compartment IDs are in numeric format. For the compliance with SBML specifications, compartment IDs will be preceded with "c_" string';
    -0235             dispEM(EM,false);
    -0236             model.comps(i)=strcat('c_',model.comps(i));
    -0237         end
    -0238         modelSBML.compartment(i).metaid=model.comps{i};
    -0239     end
    -0240     %Prepare Miriam strings
    -0241     if ~isempty(model.compMiriams{i})
    -0242         [~,sbo_ind] = ismember('sbo',model.compMiriams{i}.name);
    -0243         if sbo_ind > 0
    -0244             modelSBML.compartment(i).sboTerm=str2double(regexprep(model.compMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    -0245             % remove the SBO term from compMiriams so the information is
    -0246             % not duplicated in the "annotation" field later on
    -0247             model.compMiriams{i}.name(sbo_ind) = [];
    -0248             model.compMiriams{i}.value(sbo_ind) = [];
    -0249         end
    -0250     end
    -0251     if ~isempty(model.compMiriams{i}) && isfield(modelSBML.compartment(i),'annotation')
    -0252         modelSBML.compartment(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.comps{i} '">'];
    -0253         modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation '<bqbiol:is><rdf:Bag>'];
    -0254         modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation getMiriam(model.compMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    -0255     end
    -0256     if isfield(modelSBML.compartment, 'name')
    -0257         modelSBML.compartment(i).name=model.compNames{i};
    -0258     end
    -0259     if isfield(modelSBML.compartment, 'id')
    -0260         modelSBML.compartment(i).id=model.comps{i};
    -0261     end
    -0262     
    -0263 end
    -0264 
    -0265 %Begin writing species
    -0266 for i=1:numel(model.mets)
    -0267     %Add the default values, as these will be the same in all entries
    -0268     if i==1
    -0269         if isfield(modelSBML.species, 'sboTerm')
    -0270             modelSBML.species(i).sboTerm=247;
    -0271         end
    -0272         if isfield(modelSBML.species, 'initialAmount')
    -0273             modelSBML.species(i).initialAmount=1;
    -0274         end
    -0275         if isfield(modelSBML.species, 'initialConcentration')
    -0276             modelSBML.species(i).initialConcentration=0;
    -0277         end
    -0278         if isfield(modelSBML.species, 'isSetInitialAmount')
    -0279             modelSBML.species(i).isSetInitialAmount=1;
    -0280         end
    -0281         if isfield(modelSBML.species, 'isSetInitialConcentration')
    -0282             modelSBML.species(i).isSetInitialConcentration=1;
    +0216         modelSBML.annotation=[modelSBML.annotation '</rdf:Description></rdf:RDF></annotation>'];
    +0217     end
    +0218 end
    +0219 
    +0220 %Prepare compartments
    +0221 for i=1:numel(model.comps)
    +0222     %Add the default values, as these will be the same in all entries
    +0223     if i==1
    +0224         if isfield(modelSBML.compartment, 'sboTerm')
    +0225             modelSBML.compartment(i).sboTerm=290;
    +0226         end
    +0227         if isfield(modelSBML.compartment, 'spatialDimensions')
    +0228             modelSBML.compartment(i).spatialDimensions=3;
    +0229         end
    +0230         if isfield(modelSBML.compartment, 'size')
    +0231             modelSBML.compartment(i).size=1;
    +0232         end
    +0233         if isfield(modelSBML.compartment, 'constant')
    +0234             modelSBML.compartment(i).constant=1;
    +0235         end
    +0236         if isfield(modelSBML.compartment, 'isSetSize')
    +0237             modelSBML.compartment(i).isSetSize=1;
    +0238         end
    +0239         if isfield(modelSBML.compartment, 'isSetSpatialDimensions')
    +0240             modelSBML.compartment(i).isSetSpatialDimensions=1;
    +0241         end
    +0242     end
    +0243     %Copy the default values to the next entry as long as it is not the
    +0244     %last one
    +0245     if i<numel(model.comps)
    +0246         modelSBML.compartment(i+1)=modelSBML.compartment(i);
    +0247     end
    +0248     
    +0249     if isfield(modelSBML.compartment,'metaid')
    +0250         modelSBML.compartment(i).metaid=model.comps{i};
    +0251     end
    +0252     %Prepare Miriam strings
    +0253     if ~isempty(model.compMiriams{i})
    +0254         [~,sbo_ind] = ismember('sbo',model.compMiriams{i}.name);
    +0255         if sbo_ind > 0
    +0256             modelSBML.compartment(i).sboTerm=str2double(regexprep(model.compMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    +0257             % remove the SBO term from compMiriams so the information is
    +0258             % not duplicated in the "annotation" field later on
    +0259             model.compMiriams{i}.name(sbo_ind) = [];
    +0260             model.compMiriams{i}.value(sbo_ind) = [];
    +0261         end
    +0262     end
    +0263     if ~isempty(model.compMiriams{i}) && isfield(modelSBML.compartment(i),'annotation')
    +0264         modelSBML.compartment(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.comps{i} '">'];
    +0265         modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation '<bqbiol:is><rdf:Bag>'];
    +0266         modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation getMiriam(model.compMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    +0267     end
    +0268     if isfield(modelSBML.compartment, 'name')
    +0269         modelSBML.compartment(i).name=model.compNames{i};
    +0270     end
    +0271     if isfield(modelSBML.compartment, 'id')
    +0272         modelSBML.compartment(i).id=model.comps{i};
    +0273     end
    +0274     
    +0275 end
    +0276 
    +0277 %Begin writing species
    +0278 for i=1:numel(model.mets)
    +0279     %Add the default values, as these will be the same in all entries
    +0280     if i==1
    +0281         if isfield(modelSBML.species, 'sboTerm')
    +0282             modelSBML.species(i).sboTerm=247;
     0283         end
    -0284     end
    -0285     %Copy the default values to the next entry as long as it is not the
    -0286     %last one
    -0287     if i<numel(model.mets)
    -0288         modelSBML.species(i+1)=modelSBML.species(i);
    -0289     end
    -0290     
    -0291     if isfield(modelSBML.species,'metaid')
    -0292         modelSBML.species(i).metaid=['M_' model.mets{i}];
    -0293     end
    -0294     if isfield(modelSBML.species, 'name')
    -0295         modelSBML.species(i).name=model.metNames{i};
    +0284         if isfield(modelSBML.species, 'initialAmount')
    +0285             modelSBML.species(i).initialAmount=1;
    +0286         end
    +0287         if isfield(modelSBML.species, 'initialConcentration')
    +0288             modelSBML.species(i).initialConcentration=0;
    +0289         end
    +0290         if isfield(modelSBML.species, 'isSetInitialAmount')
    +0291             modelSBML.species(i).isSetInitialAmount=1;
    +0292         end
    +0293         if isfield(modelSBML.species, 'isSetInitialConcentration')
    +0294             modelSBML.species(i).isSetInitialConcentration=1;
    +0295         end
     0296     end
    -0297     if isfield(modelSBML.species, 'id')
    -0298         modelSBML.species(i).id=['M_' model.mets{i}];
    -0299     end
    -0300     if isfield(modelSBML.species, 'compartment')
    -0301         modelSBML.species(i).compartment=model.comps{model.metComps(i)};
    -0302     end
    -0303     if isfield(model,'unconstrained')
    -0304         if model.unconstrained(i)
    -0305             modelSBML.species(i).boundaryCondition=1;
    -0306         end
    -0307     end
    -0308     if isfield(modelSBML.species, 'fbc_charge') && isfield(model,'metCharges')
    -0309         if ~isnan(model.metCharges(i))
    -0310             modelSBML.species(i).fbc_charge=model.metCharges(i);
    -0311             modelSBML.species(i).isSetfbc_charge=1;
    -0312         else
    -0313             modelSBML.species(i).isSetfbc_charge=0;
    -0314         end
    -0315     end
    -0316     if ~isempty(model.metMiriams{i})
    -0317         [~,sbo_ind] = ismember('sbo',model.metMiriams{i}.name);
    -0318         if sbo_ind > 0
    -0319             modelSBML.species(i).sboTerm=str2double(regexprep(model.metMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    -0320             % remove the SBO term from metMiriams so the information is
    -0321             % not duplicated in the "annotation" field later on
    -0322             model.metMiriams{i}.name(sbo_ind) = [];
    -0323             model.metMiriams{i}.value(sbo_ind) = [];
    -0324         end
    -0325     end
    -0326     if isfield(modelSBML.species,'annotation')
    -0327         if ~isempty(model.metMiriams{i}) || ~isempty(model.metFormulas{i})
    -0328             hasInchi=false;
    -0329             if ~isempty(model.metFormulas{i})
    -0330                 %Only export formula if there is no InChI. This is because
    -0331                 %the metFormulas field is populated by InChIs if available
    -0332                 if ~isempty(model.inchis{i})
    -0333                     hasInchi=true;
    -0334                 end
    -0335                 if hasInchi==false
    -0336                     modelSBML.species(i).fbc_chemicalFormula=model.metFormulas{i};
    -0337                 end
    -0338             end
    -0339             if ~isempty(model.metMiriams{i}) || hasInchi==true
    -0340                 modelSBML.species(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_M_' model.mets{i} '">'];
    -0341                 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<bqbiol:is><rdf:Bag>'];
    -0342                 if ~isempty(model.metMiriams{i})
    -0343                     modelSBML.species(i).annotation=[modelSBML.species(i).annotation getMiriam(model.metMiriams{i})];
    -0344                 end
    -0345                 if hasInchi==true
    -0346                     modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<rdf:li rdf:resource="https://identifiers.org/inchi/InChI=' regexprep(model.inchis{i},'^InChI=','') '"/>'];
    -0347                     modelSBML.species(i).fbc_chemicalFormula=char(regexp(model.inchis{i}, '/(\w+)/', 'tokens', 'once'));
    -0348                 end
    -0349                 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    +0297     %Copy the default values to the next entry as long as it is not the
    +0298     %last one
    +0299     if i<numel(model.mets)
    +0300         modelSBML.species(i+1)=modelSBML.species(i);
    +0301     end
    +0302     
    +0303     if isfield(modelSBML.species,'metaid')
    +0304         modelSBML.species(i).metaid=model.mets{i};
    +0305     end
    +0306     if isfield(modelSBML.species, 'name')
    +0307         modelSBML.species(i).name=model.metNames{i};
    +0308     end
    +0309     if isfield(modelSBML.species, 'id')
    +0310         modelSBML.species(i).id=model.mets{i};
    +0311     end
    +0312     if isfield(modelSBML.species, 'compartment')
    +0313         modelSBML.species(i).compartment=model.comps{model.metComps(i)};
    +0314     end
    +0315     if isfield(model,'unconstrained')
    +0316         if model.unconstrained(i)
    +0317             modelSBML.species(i).boundaryCondition=1;
    +0318         end
    +0319     end
    +0320     if isfield(modelSBML.species, 'fbc_charge') && isfield(model,'metCharges')
    +0321         if ~isnan(model.metCharges(i))
    +0322             modelSBML.species(i).fbc_charge=model.metCharges(i);
    +0323             modelSBML.species(i).isSetfbc_charge=1;
    +0324         else
    +0325             modelSBML.species(i).isSetfbc_charge=0;
    +0326         end
    +0327     end
    +0328     if ~isempty(model.metMiriams{i})
    +0329         [~,sbo_ind] = ismember('sbo',model.metMiriams{i}.name);
    +0330         if sbo_ind > 0
    +0331             modelSBML.species(i).sboTerm=str2double(regexprep(model.metMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    +0332             % remove the SBO term from metMiriams so the information is
    +0333             % not duplicated in the "annotation" field later on
    +0334             model.metMiriams{i}.name(sbo_ind) = [];
    +0335             model.metMiriams{i}.value(sbo_ind) = [];
    +0336         end
    +0337     end
    +0338     if isfield(modelSBML.species,'annotation')
    +0339         if ~isempty(model.metMiriams{i}) || ~isempty(model.metFormulas{i})
    +0340             hasInchi=false;
    +0341             if ~isempty(model.metFormulas{i})
    +0342                 %Only export formula if there is no InChI. This is because
    +0343                 %the metFormulas field is populated by InChIs if available
    +0344                 if ~isempty(model.inchis{i})
    +0345                     hasInchi=true;
    +0346                 end
    +0347                 if hasInchi==false
    +0348                     modelSBML.species(i).fbc_chemicalFormula=model.metFormulas{i};
    +0349                 end
     0350             end
    -0351         end
    -0352     end
    -0353 end
    -0354 
    -0355 if isfield(model,'genes')
    -0356     for i=1:numel(model.genes)
    -0357         %Add the default values, as these will be the same in all entries
    -0358         if i==1
    -0359             if isfield(modelSBML.fbc_geneProduct, 'sboTerm')
    -0360                 modelSBML.fbc_geneProduct(i).sboTerm=243;
    -0361             end
    -0362         end
    -0363         %Copy the default values to the next index as long as it is not the
    -0364         %last one
    -0365         if i<numel(model.genes)
    -0366             modelSBML.fbc_geneProduct(i+1)=modelSBML.fbc_geneProduct(i);
    -0367         end
    -0368         
    -0369         if isfield(modelSBML.fbc_geneProduct,'metaid')
    -0370             modelSBML.fbc_geneProduct(i).metaid=model.genes{i};
    -0371         end
    -0372         if ~isempty(model.geneMiriams{i})
    -0373             [~,sbo_ind] = ismember('sbo',model.geneMiriams{i}.name);
    -0374             if sbo_ind > 0
    -0375                 modelSBML.fbc_geneProduct(i).sboTerm=str2double(regexprep(model.geneMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    -0376                 % remove the SBO term from compMiriams so the information is
    -0377                 % not duplicated in the "annotation" field later on
    -0378                 model.geneMiriams{i}.name(sbo_ind) = [];
    -0379                 model.geneMiriams{i}.value(sbo_ind) = [];
    -0380             end
    -0381         end
    -0382         if ~isempty(model.geneMiriams{i}) && isfield(modelSBML.fbc_geneProduct(i),'annotation')
    -0383             modelSBML.fbc_geneProduct(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.genes{i} '">'];
    -0384             modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation '<bqbiol:is><rdf:Bag>'];
    -0385             modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation getMiriam(model.geneMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    -0386         end
    -0387         if isfield(modelSBML.fbc_geneProduct, 'fbc_id')
    -0388             modelSBML.fbc_geneProduct(i).fbc_id=model.genes{i};
    -0389         end
    -0390         if isfield(modelSBML.fbc_geneProduct, 'fbc_label') && isfield(model,'geneShortNames')
    -0391             if isempty(model.geneShortNames{i})
    -0392                 modelSBML.fbc_geneProduct(i).fbc_label=model.genes{i};
    -0393             else
    -0394                 modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i};
    -0395             end
    -0396         end
    -0397     end
    -0398     if exportGeneComplexes==true
    -0399         %Also add the complexes as genes. This is done by splitting grRules
    -0400         %on "or" and adding the ones which contain several genes
    -0401         geneComplexes={};
    -0402         if isfield(model,'grRules')
    -0403             %Only grRules which contain " and " can be complexes
    -0404             uniqueRules=unique(model.grRules);
    -0405             I=cellfun(@any,strfind(uniqueRules,' and '));
    -0406             uniqueRules(~I)=[];
    -0407             uniqueRules=strrep(uniqueRules,'(','');
    -0408             uniqueRules=strrep(uniqueRules,')','');
    -0409             uniqueRules=strrep(uniqueRules,' and ',':');
    -0410             for i=1:numel(uniqueRules)
    -0411                 genes=regexp(uniqueRules(i),' or ','split');
    -0412                 genes=genes{1}(:);
    -0413                 %Check which ones are complexes
    -0414                 I=cellfun(@any,strfind(genes,':'));
    -0415                 geneComplexes=[geneComplexes;genes(I)];
    -0416             end
    -0417         end
    -0418         geneComplexes=unique(geneComplexes);
    -0419         if ~isempty(geneComplexes)
    -0420             %Then add them as genes. There is a possiblity that a complex
    -0421             %A&B is added as separate from B&A. This is not really an issue
    -0422             %so this is not dealt with
    -0423             for i=1:numel(geneComplexes)
    -0424                 modelSBML.fbc_geneProduct(numel(model.genes)+i)=modelSBML.fbc_geneProduct(1);
    -0425                 if isfield(modelSBML.fbc_geneProduct,'metaid')
    -0426                     modelSBML.fbc_geneProduct(numel(model.genes)+i).metaid=geneComplexes{i};
    -0427                 end
    -0428                 if isfield(modelSBML.fbc_geneProduct,'fbc_id')
    -0429                     modelSBML.fbc_geneProduct(numel(model.genes)+i).fbc_id=geneComplexes{i};
    -0430                 else
    -0431                     modelSBML.fbc_geneProduct(i).fbc_label=modelSBML.fbc_geneProduct(i).fbc_id;
    -0432                 end
    -0433             end
    +0351             if ~isempty(model.metMiriams{i}) || hasInchi==true
    +0352                 modelSBML.species(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.mets{i} '">'];
    +0353                 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<bqbiol:is><rdf:Bag>'];
    +0354                 if ~isempty(model.metMiriams{i})
    +0355                     modelSBML.species(i).annotation=[modelSBML.species(i).annotation getMiriam(model.metMiriams{i})];
    +0356                 end
    +0357                 if hasInchi==true
    +0358                     modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<rdf:li rdf:resource="https://identifiers.org/inchi/InChI=' regexprep(model.inchis{i},'^InChI=','') '"/>'];
    +0359                     modelSBML.species(i).fbc_chemicalFormula=char(regexp(model.inchis{i}, '/(\w+)/', 'tokens', 'once'));
    +0360                 end
    +0361                 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    +0362             end
    +0363         end
    +0364     end
    +0365 end
    +0366 
    +0367 if isfield(model,'genes')
    +0368     for i=1:numel(model.genes)
    +0369         %Add the default values, as these will be the same in all entries
    +0370         if i==1
    +0371             if isfield(modelSBML.fbc_geneProduct, 'sboTerm')
    +0372                 modelSBML.fbc_geneProduct(i).sboTerm=243;
    +0373             end
    +0374         end
    +0375         %Copy the default values to the next index as long as it is not the
    +0376         %last one
    +0377         if i<numel(model.genes)
    +0378             modelSBML.fbc_geneProduct(i+1)=modelSBML.fbc_geneProduct(i);
    +0379         end
    +0380         
    +0381         if isfield(modelSBML.fbc_geneProduct,'metaid')
    +0382             modelSBML.fbc_geneProduct(i).metaid=model.genes{i};
    +0383         end
    +0384         if ~isempty(model.geneMiriams{i})
    +0385             [~,sbo_ind] = ismember('sbo',model.geneMiriams{i}.name);
    +0386             if sbo_ind > 0
    +0387                 modelSBML.fbc_geneProduct(i).sboTerm=str2double(regexprep(model.geneMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    +0388                 % remove the SBO term from compMiriams so the information is
    +0389                 % not duplicated in the "annotation" field later on
    +0390                 model.geneMiriams{i}.name(sbo_ind) = [];
    +0391                 model.geneMiriams{i}.value(sbo_ind) = [];
    +0392             end
    +0393         end
    +0394         if ~isempty(model.geneMiriams{i}) && isfield(modelSBML.fbc_geneProduct(i),'annotation')
    +0395             modelSBML.fbc_geneProduct(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.genes{i} '">'];
    +0396             modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation '<bqbiol:is><rdf:Bag>'];
    +0397             modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation getMiriam(model.geneMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    +0398         end
    +0399         if isfield(modelSBML.fbc_geneProduct, 'fbc_id')
    +0400             modelSBML.fbc_geneProduct(i).fbc_id=model.genes{i};
    +0401         end
    +0402         if isfield(modelSBML.fbc_geneProduct, 'fbc_label') && isfield(model,'geneShortNames')
    +0403             if isempty(model.geneShortNames{i})
    +0404                 modelSBML.fbc_geneProduct(i).fbc_label=model.genes{i};
    +0405             else
    +0406                 modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i};
    +0407             end
    +0408         end
    +0409         if isfield(modelSBML.fbc_geneProduct, 'fbc_name') && isfield(model,'proteins')
    +0410             if ~isempty(model.proteins{i})
    +0411                 modelSBML.fbc_geneProduct(i).fbc_name=model.proteins{i};
    +0412             end
    +0413         end
    +0414     end
    +0415 end
    +0416 
    +0417 %Generate a list of unique fbc_bound names
    +0418 totalValues=[model.lb; model.ub];
    +0419 totalNames=cell(size(totalValues,1),1);
    +0420 
    +0421 listUniqueValues=unique(totalValues);
    +0422 
    +0423 for i=1:length(listUniqueValues)
    +0424     listUniqueNames{i,1}=['FB',num2str(i),'N',num2str(abs(round(listUniqueValues(i))))]; % create unique flux bound IDs.
    +0425     ind=find(ismember(totalValues,listUniqueValues(i)));
    +0426     totalNames(ind)=listUniqueNames(i,1);
    +0427 end
    +0428 
    +0429 for i=1:length(listUniqueNames)
    +0430     %Add the default values, as these will be the same in all entries
    +0431     if i==1
    +0432         if isfield(modelSBML.parameter, 'constant')
    +0433             modelSBML.parameter(i).constant=1;
     0434         end
    -0435     end
    -0436 end
    -0437 
    -0438 %Generate a list of unique fbc_bound names
    -0439 totalValues=[model.lb; model.ub];
    -0440 totalNames=cell(size(totalValues,1),1);
    -0441 
    -0442 listUniqueValues=unique(totalValues);
    -0443 
    -0444 for i=1:length(listUniqueValues)
    -0445     listUniqueNames{i,1}=['FB',num2str(i),'N',num2str(abs(round(listUniqueValues(i))))]; % create unique flux bound IDs.
    -0446     ind=find(ismember(totalValues,listUniqueValues(i)));
    -0447     totalNames(ind)=listUniqueNames(i,1);
    -0448 end
    -0449 
    -0450 for i=1:length(listUniqueNames)
    -0451     %Add the default values, as these will be the same in all entries
    -0452     if i==1
    -0453         if isfield(modelSBML.parameter, 'constant')
    -0454             modelSBML.parameter(i).constant=1;
    -0455         end
    -0456         if isfield(modelSBML.parameter, 'isSetValue')
    -0457             modelSBML.parameter(i).isSetValue=1;
    -0458         end
    -0459     end
    -0460     %Copy the default values to the next index as long as it is not the
    -0461     %last one
    -0462     if i<numel(listUniqueNames)
    -0463         modelSBML.parameter(i+1)=modelSBML.parameter(i);
    -0464     end
    -0465     modelSBML.parameter(i).id=listUniqueNames{i};
    -0466     modelSBML.parameter(i).value=listUniqueValues(i);
    -0467 end
    -0468 
    -0469 for i=1:numel(model.rxns)
    -0470     %Add the default values, as these will be the same in all entries
    -0471     if i==1
    -0472         if isfield(modelSBML.reaction, 'sboTerm')
    -0473             modelSBML.reaction(i).sboTerm=176;
    -0474         end
    -0475         if isfield(modelSBML.reaction, 'isSetFast')
    -0476             modelSBML.reaction(i).isSetFast=1;
    -0477         end
    -0478     end
    -0479     %Copy the default values to the next index as long as it is not the
    -0480     %last one
    -0481     if i<numel(model.rxns)
    -0482         modelSBML.reaction(i+1)=modelSBML.reaction(i);
    -0483     end
    -0484     
    -0485     if isfield(modelSBML.reaction,'metaid')
    -0486         modelSBML.reaction(i).metaid=['R_' model.rxns{i}];
    -0487     end
    -0488     
    -0489     %Export notes information
    -0490     if (~isnan(model.rxnConfidenceScores(i)) || ~isempty(model.rxnReferences{i}) || ~isempty(model.rxnNotes{i}))
    -0491         modelSBML.reaction(i).notes='<notes><body xmlns="http://www.w3.org/1999/xhtml">';
    -0492         if ~isnan(model.rxnConfidenceScores(i))
    -0493             modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>Confidence Level: ' num2str(model.rxnConfidenceScores(i)) '</p>'];
    -0494         end
    -0495         if ~isempty(model.rxnReferences{i})
    -0496             modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>AUTHORS: ' model.rxnReferences{i} '</p>'];
    -0497         end
    -0498         if ~isempty(model.rxnNotes{i})
    -0499             modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>NOTES: ' model.rxnNotes{i} '</p>'];
    -0500         end
    -0501         modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '</body></notes>'];
    -0502     end
    -0503     
    -0504     % Export SBO terms from rxnMiriams
    -0505     if ~isempty(model.rxnMiriams{i})
    -0506         [~,sbo_ind] = ismember('sbo',model.rxnMiriams{i}.name);
    -0507         if sbo_ind > 0
    -0508             modelSBML.reaction(i).sboTerm=str2double(regexprep(model.rxnMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    -0509             % remove the SBO term from rxnMiriams so the information is not
    -0510             % duplicated in the "annotation" field later on
    -0511             model.rxnMiriams{i}.name(sbo_ind) = [];
    -0512             model.rxnMiriams{i}.value(sbo_ind) = [];
    -0513         end
    -0514     end
    -0515     
    -0516     %Export annotation information from rxnMiriams
    -0517     if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i})
    -0518         modelSBML.reaction(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_R_' model.rxns{i} '">'];
    -0519         modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<bqbiol:is><rdf:Bag>'];
    -0520         if ~isempty(model.eccodes{i})
    -0521             eccodes=regexp(model.eccodes{i},';','split');
    -0522             for j=1:numel(eccodes)
    -0523                 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation  '<rdf:li rdf:resource="https://identifiers.org/ec-code/' regexprep(eccodes{j},'ec-code/|EC','') '"/>'];
    -0524             end
    -0525         end
    -0526         modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation getMiriam(model.rxnMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    -0527     end
    -0528     
    -0529     if isfield(modelSBML.reaction, 'name')
    -0530         modelSBML.reaction(i).name=model.rxnNames{i};
    -0531     end
    -0532     if isfield(modelSBML.reaction, 'id')
    -0533         modelSBML.reaction(i).id=['R_' model.rxns{i}];
    -0534     end
    -0535     
    -0536     %Add the information about reactants and products
    -0537     involvedMets=addReactantsProducts(model,modelSBML,i);
    -0538     for j=1:numel(involvedMets.reactant)
    -0539         if j<numel(involvedMets.reactant)
    -0540             modelSBML.reaction(i).reactant(j+1)=modelSBML.reaction(i).reactant(j);
    -0541         end
    -0542         modelSBML.reaction(i).reactant(j).species=involvedMets.reactant(j).species;
    -0543         modelSBML.reaction(i).reactant(j).stoichiometry=involvedMets.reactant(j).stoichiometry;
    -0544         modelSBML.reaction(i).reactant(j).isSetStoichiometry=involvedMets.reactant(j).isSetStoichiometry;
    -0545         modelSBML.reaction(i).reactant(j).constant=involvedMets.reactant(j).constant;
    -0546     end
    -0547     if numel(involvedMets.reactant)==0
    -0548         modelSBML.reaction(i).reactant='';
    -0549     end
    -0550     for j=1:numel(involvedMets.product)
    -0551         if j<numel(involvedMets.product)
    -0552             modelSBML.reaction(i).product(j+1)=modelSBML.reaction(i).product(j);
    -0553         end
    -0554         modelSBML.reaction(i).product(j).species=involvedMets.product(j).species;
    -0555         modelSBML.reaction(i).product(j).stoichiometry=involvedMets.product(j).stoichiometry;
    -0556         modelSBML.reaction(i).product(j).isSetStoichiometry=involvedMets.product(j).isSetStoichiometry;
    -0557         modelSBML.reaction(i).product(j).constant=involvedMets.product(j).constant;
    -0558     end
    -0559     if numel(involvedMets.product)==0
    -0560         modelSBML.reaction(i).product='';
    -0561     end
    -0562     %Export reversibility information. Reactions are irreversible by
    -0563     %default
    -0564     if model.rev(i)==1
    -0565         modelSBML.reaction(i).reversible=1;
    -0566     end
    -0567     if isfield(model, 'rxnComps')
    -0568         modelSBML.reaction(i).compartment=model.comps{model.rxnComps(i)};
    -0569     end
    -0570     if isfield(model, 'grRules')
    -0571         modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association=model.grRules{i};
    -0572     end
    -0573     modelSBML.reaction(i).fbc_lowerFluxBound=totalNames{i};
    -0574     modelSBML.reaction(i).fbc_upperFluxBound=totalNames{length(model.lb)+i};
    -0575 end
    -0576 
    -0577 %Prepare subSystems Code taken from COBRA functions getModelSubSystems,
    -0578 %writeSBML, findRxnsFromSubSystem under GNU General Public License v3.0,
    -0579 %license file in readme/GPL.MD. Code modified for RAVEN
    -0580 if modelHasSubsystems
    -0581     modelSBML.groups_group.groups_kind = 'partonomy';
    -0582     modelSBML.groups_group.sboTerm = 633;
    -0583     tmpStruct=modelSBML.groups_group;
    -0584 
    -0585     rxns=strcat('R_',model.rxns);
    -0586     if ~any(cellfun(@iscell,model.subSystems))
    -0587         if ~any(~cellfun(@isempty,model.subSystems))
    -0588             subSystems = {};
    -0589         else
    -0590             subSystems = setdiff(model.subSystems,'');
    -0591         end
    -0592     else
    -0593         orderedSubs = cellfun(@(x) columnVector(x),model.subSystems,'UniformOUtput',false);
    -0594         subSystems = setdiff(vertcat(orderedSubs{:}),'');
    -0595     end
    -0596     if isempty(subSystems)
    -0597         subSystems = {};
    -0598     end
    -0599     if ~isempty(subSystems)
    -0600         %Build the groups for the group package
    -0601         groupIDs = strcat('group',cellfun(@num2str, num2cell(1:length(subSystems)),'UniformOutput',false));
    -0602         for i = 1:length(subSystems)
    -0603             cgroup = tmpStruct;
    -0604             if ~any(cellfun(@iscell,model.subSystems))
    -0605                 present = ismember(model.subSystems,subSystems{i});
    -0606             else
    -0607                 present = cellfun(@(x) any(ismember(x,subSystems{i})),model.subSystems);
    -0608             end
    -0609             groupMembers = rxns(present);
    -0610             for j = 1:numel(groupMembers)
    -0611                 cMember = tmpStruct.groups_member;
    -0612                 cMember.groups_idRef = groupMembers{j};
    -0613                 if j == 1
    -0614                     cgroup.groups_member = cMember;
    -0615                 else
    -0616                     cgroup.groups_member(j) = cMember;
    -0617                 end
    -0618             end
    -0619             cgroup.groups_id = groupIDs{i};
    -0620             cgroup.groups_name = subSystems{i};
    -0621             if i == 1
    -0622                 modelSBML.groups_group = cgroup;
    -0623             else
    -0624                 modelSBML.groups_group(i) = cgroup;
    -0625             end
    -0626         end
    -0627     end
    -0628 end
    -0629 
    -0630 %Prepare fbc_objective subfield
    +0435         if isfield(modelSBML.parameter, 'isSetValue')
    +0436             modelSBML.parameter(i).isSetValue=1;
    +0437         end
    +0438     end
    +0439     %Copy the default values to the next index as long as it is not the
    +0440     %last one
    +0441     if i<numel(listUniqueNames)
    +0442         modelSBML.parameter(i+1)=modelSBML.parameter(i);
    +0443     end
    +0444     modelSBML.parameter(i).id=listUniqueNames{i};
    +0445     modelSBML.parameter(i).value=listUniqueValues(i);
    +0446 end
    +0447 
    +0448 for i=1:numel(model.rxns)
    +0449     %Add the default values, as these will be the same in all entries
    +0450     if i==1
    +0451         if isfield(modelSBML.reaction, 'sboTerm')
    +0452             modelSBML.reaction(i).sboTerm=176;
    +0453         end
    +0454         if isfield(modelSBML.reaction, 'isSetFast')
    +0455             modelSBML.reaction(i).isSetFast=1;
    +0456         end
    +0457     end
    +0458     %Copy the default values to the next index as long as it is not the
    +0459     %last one
    +0460     if i<numel(model.rxns)
    +0461         modelSBML.reaction(i+1)=modelSBML.reaction(i);
    +0462     end
    +0463     
    +0464     if isfield(modelSBML.reaction,'metaid')
    +0465         modelSBML.reaction(i).metaid=model.rxns{i};
    +0466     end
    +0467     
    +0468     %Export notes information
    +0469     if (~isnan(model.rxnConfidenceScores(i)) || ~isempty(model.rxnReferences{i}) || ~isempty(model.rxnNotes{i}))
    +0470         modelSBML.reaction(i).notes='<notes><body xmlns="http://www.w3.org/1999/xhtml">';
    +0471         if ~isnan(model.rxnConfidenceScores(i))
    +0472             modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>Confidence Level: ' num2str(model.rxnConfidenceScores(i)) '</p>'];
    +0473         end
    +0474         if ~isempty(model.rxnReferences{i})
    +0475             modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>AUTHORS: ' model.rxnReferences{i} '</p>'];
    +0476         end
    +0477         if ~isempty(model.rxnNotes{i})
    +0478             modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>NOTES: ' model.rxnNotes{i} '</p>'];
    +0479         end
    +0480         modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '</body></notes>'];
    +0481     end
    +0482     
    +0483     % Export SBO terms from rxnMiriams
    +0484     if ~isempty(model.rxnMiriams{i})
    +0485         [~,sbo_ind] = ismember('sbo',model.rxnMiriams{i}.name);
    +0486         if sbo_ind > 0
    +0487             modelSBML.reaction(i).sboTerm=str2double(regexprep(model.rxnMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
    +0488             % remove the SBO term from rxnMiriams so the information is not
    +0489             % duplicated in the "annotation" field later on
    +0490             model.rxnMiriams{i}.name(sbo_ind) = [];
    +0491             model.rxnMiriams{i}.value(sbo_ind) = [];
    +0492         end
    +0493     end
    +0494     
    +0495     %Export annotation information from rxnMiriams
    +0496     if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i})
    +0497         modelSBML.reaction(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.rxns{i} '">'];
    +0498         modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<bqbiol:is><rdf:Bag>'];
    +0499         if ~isempty(model.eccodes{i})
    +0500             eccodes=regexp(model.eccodes{i},';','split');
    +0501             for j=1:numel(eccodes)
    +0502                 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation  '<rdf:li rdf:resource="https://identifiers.org/ec-code/' regexprep(eccodes{j},'ec-code/|EC','') '"/>'];
    +0503             end
    +0504         end
    +0505         modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation getMiriam(model.rxnMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
    +0506     end
    +0507     
    +0508     if isfield(modelSBML.reaction, 'name')
    +0509         modelSBML.reaction(i).name=model.rxnNames{i};
    +0510     end
    +0511     if isfield(modelSBML.reaction, 'id')
    +0512         modelSBML.reaction(i).id=model.rxns{i};
    +0513     end
    +0514     
    +0515     %Add the information about reactants and products
    +0516     involvedMets=addReactantsProducts(model,modelSBML,i);
    +0517     for j=1:numel(involvedMets.reactant)
    +0518         if j<numel(involvedMets.reactant)
    +0519             modelSBML.reaction(i).reactant(j+1)=modelSBML.reaction(i).reactant(j);
    +0520         end
    +0521         modelSBML.reaction(i).reactant(j).species=involvedMets.reactant(j).species;
    +0522         modelSBML.reaction(i).reactant(j).stoichiometry=involvedMets.reactant(j).stoichiometry;
    +0523         modelSBML.reaction(i).reactant(j).isSetStoichiometry=involvedMets.reactant(j).isSetStoichiometry;
    +0524         modelSBML.reaction(i).reactant(j).constant=involvedMets.reactant(j).constant;
    +0525     end
    +0526     if numel(involvedMets.reactant)==0
    +0527         modelSBML.reaction(i).reactant='';
    +0528     end
    +0529     for j=1:numel(involvedMets.product)
    +0530         if j<numel(involvedMets.product)
    +0531             modelSBML.reaction(i).product(j+1)=modelSBML.reaction(i).product(j);
    +0532         end
    +0533         modelSBML.reaction(i).product(j).species=involvedMets.product(j).species;
    +0534         modelSBML.reaction(i).product(j).stoichiometry=involvedMets.product(j).stoichiometry;
    +0535         modelSBML.reaction(i).product(j).isSetStoichiometry=involvedMets.product(j).isSetStoichiometry;
    +0536         modelSBML.reaction(i).product(j).constant=involvedMets.product(j).constant;
    +0537     end
    +0538     if numel(involvedMets.product)==0
    +0539         modelSBML.reaction(i).product='';
    +0540     end
    +0541     %Export reversibility information. Reactions are irreversible by
    +0542     %default
    +0543     if model.rev(i)==1
    +0544         modelSBML.reaction(i).reversible=1;
    +0545     end
    +0546     if isfield(model, 'rxnComps')
    +0547         modelSBML.reaction(i).compartment=model.comps{model.rxnComps(i)};
    +0548     end
    +0549     if isfield(model, 'grRules')
    +0550         modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association=model.grRules{i};
    +0551     end
    +0552     modelSBML.reaction(i).fbc_lowerFluxBound=totalNames{i};
    +0553     modelSBML.reaction(i).fbc_upperFluxBound=totalNames{length(model.lb)+i};
    +0554 end
    +0555 
    +0556 %Prepare subSystems Code taken from COBRA functions getModelSubSystems,
    +0557 %writeSBML, findRxnsFromSubSystem under GNU General Public License v3.0,
    +0558 %license file in readme/GPL.MD. Code modified for RAVEN
    +0559 if modelHasSubsystems
    +0560     modelSBML.groups_group.groups_kind = 'partonomy';
    +0561     modelSBML.groups_group.sboTerm = 633;
    +0562     tmpStruct=modelSBML.groups_group;
    +0563 
    +0564     rxns=model.rxns;
    +0565     if ~any(cellfun(@iscell,model.subSystems))
    +0566         if ~any(~cellfun(@isempty,model.subSystems))
    +0567             subSystems = {};
    +0568         else
    +0569             subSystems = setdiff(model.subSystems,'');
    +0570         end
    +0571     else
    +0572         orderedSubs = cellfun(@(x) columnVector(x),model.subSystems,'UniformOUtput',false);
    +0573         subSystems = setdiff(vertcat(orderedSubs{:}),'');
    +0574     end
    +0575     if isempty(subSystems)
    +0576         subSystems = {};
    +0577     end
    +0578     if ~isempty(subSystems)
    +0579         %Build the groups for the group package
    +0580         groupIDs = strcat('group',cellfun(@num2str, num2cell(1:length(subSystems)),'UniformOutput',false));
    +0581         for i = 1:length(subSystems)
    +0582             cgroup = tmpStruct;
    +0583             if ~any(cellfun(@iscell,model.subSystems))
    +0584                 present = ismember(model.subSystems,subSystems{i});
    +0585             else
    +0586                 present = cellfun(@(x) any(ismember(x,subSystems{i})),model.subSystems);
    +0587             end
    +0588             groupMembers = rxns(present);
    +0589             for j = 1:numel(groupMembers)
    +0590                 cMember = tmpStruct.groups_member;
    +0591                 cMember.groups_idRef = groupMembers{j};
    +0592                 if j == 1
    +0593                     cgroup.groups_member = cMember;
    +0594                 else
    +0595                     cgroup.groups_member(j) = cMember;
    +0596                 end
    +0597             end
    +0598             cgroup.groups_id = groupIDs{i};
    +0599             cgroup.groups_name = subSystems{i};
    +0600             if i == 1
    +0601                 modelSBML.groups_group = cgroup;
    +0602             else
    +0603                 modelSBML.groups_group(i) = cgroup;
    +0604             end
    +0605         end
    +0606     end
    +0607 end
    +0608 
    +0609 %Prepare fbc_objective subfield
    +0610 
    +0611 modelSBML.fbc_objective.fbc_type='maximize';
    +0612 modelSBML.fbc_objective.fbc_id='obj';
    +0613 
    +0614 ind=find(model.c);
    +0615 
    +0616 if isempty(ind)
    +0617     modelSBML.fbc_objective.fbc_fluxObjective.fbc_coefficient=0;
    +0618 else
    +0619     for i=1:length(ind)
    +0620         %Copy the default values to the next index as long as it is not the
    +0621         %last one
    +0622         if i<numel(ind)
    +0623             modelSBML.reaction(i+1)=modelSBML.reaction(i);
    +0624         end
    +0625         values=model.c(model.c~=0);
    +0626         modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction=modelSBML.reaction(ind(i)).id;
    +0627         modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient=values(i);
    +0628         modelSBML.fbc_objective(i).fbc_fluxObjective.isSetfbc_coefficient=1;
    +0629     end
    +0630 end
     0631 
    -0632 modelSBML.fbc_objective.fbc_type='maximize';
    -0633 modelSBML.fbc_objective.fbc_id='obj';
    -0634 
    -0635 ind=find(model.c);
    -0636 
    -0637 if isempty(ind)
    -0638     modelSBML.fbc_objective.fbc_fluxObjective.fbc_coefficient=0;
    -0639 else
    -0640     for i=1:length(ind)
    -0641         %Copy the default values to the next index as long as it is not the
    -0642         %last one
    -0643         if i<numel(ind)
    -0644             modelSBML.reaction(i+1)=modelSBML.reaction(i);
    -0645         end
    -0646         values=model.c(model.c~=0);
    -0647         modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction=modelSBML.reaction(ind(i)).id;
    -0648         modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient=values(i);
    -0649         modelSBML.fbc_objective(i).fbc_fluxObjective.isSetfbc_coefficient=1;
    -0650     end
    -0651 end
    -0652 
    -0653 modelSBML.fbc_activeObjective=modelSBML.fbc_objective.fbc_id;
    -0654 
    -0655 fbcStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/fbc/version',num2str(sbmlPackageVersions(1))];
    -0656 if modelHasSubsystems
    -0657     groupStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/groups/version',num2str(sbmlPackageVersions(2))];
    -0658     modelSBML.namespaces=struct('prefix',{'','fbc','groups'},...
    -0659     'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],...
    -0660     fbcStr,groupStr});
    -0661 else
    -0662     modelSBML.namespaces=struct('prefix',{'','fbc'},...
    -0663     'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],...
    -0664     fbcStr});
    -0665 end
    -0666 
    -0667 if sbmlPackageVersions(1) == 2
    -0668     modelSBML.fbc_strict=1;
    -0669     modelSBML.isSetfbc_strict = 1;
    -0670 end
    -0671 
    -0672 modelSBML.rule=[];
    -0673 modelSBML.constraint=[];
    -0674 
    -0675 [ravenDir,prevDir]=findRAVENroot();
    -0676 fileName=checkFileExistence(fileName,1,true,false);
    -0677 
    -0678 OutputSBML_RAVEN(modelSBML,fileName,1,0,[1,0]);
    -0679 end
    -0680 
    -0681 
    -0682 function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions)
    -0683 %Returns the blank SBML model structure by using appropriate libSBML
    -0684 %functions. This creates structure by considering three levels
    -0685 
    -0686 sbmlFieldNames=getStructureFieldnames('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0687 sbmlDefaultValues=getDefaultValues('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0688 
    -0689 for i=1:numel(sbmlFieldNames)
    -0690     modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i};
    -0691     sbmlSubfieldNames=getStructureFieldnames(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0692     sbmlSubfieldValues=getDefaultValues(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0693     if ~strcmp(sbmlFieldNames{1,i},'event') && ~strcmp(sbmlFieldNames{1,i},'functionDefinition') && ~strcmp(sbmlFieldNames{1,i},'initialAssignment')
    -0694         for j=1:numel(sbmlSubfieldNames)
    -0695             modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j})=sbmlSubfieldValues{1,j};
    -0696             sbmlSubsubfieldNames=getStructureFieldnames(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0697             sbmlSubsubfieldValues=getDefaultValues(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0698             if ~strcmp(sbmlSubfieldNames{1,j},'modifier') && ~strcmp(sbmlSubfieldNames{1,j},'kineticLaw')
    -0699                 for k=1:numel(sbmlSubsubfieldNames)
    -0700                     %'compartment' and 'species' fields are not supposed to
    -0701                     %have their standalone structures if they are subfields
    -0702                     %or subsubfields
    -0703                     if ~strcmp(sbmlSubfieldNames{1,j},'compartment') && ~strcmp(sbmlSubfieldNames{1,j},'species')
    -0704                         modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k})=sbmlSubsubfieldValues{1,k};
    -0705                     end
    -0706                     %If it is fbc_association in the third level, we need
    -0707                     %to establish the fourth level, since libSBML requires
    -0708                     %it
    -0709                     if strcmp(sbmlSubsubfieldNames{1,k},'fbc_association')
    -0710                         fbc_associationFieldNames=getStructureFieldnames('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0711                         fbc_associationFieldValues=getDefaultValues('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0712                         for l=1:numel(fbc_associationFieldNames)
    -0713                             modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k}).(fbc_associationFieldNames{1,l})=fbc_associationFieldValues{1,l};
    -0714                         end
    -0715                     end
    -0716                 end
    -0717             end
    -0718         end
    -0719     end
    -0720     if ~isstruct(modelSBML.(sbmlFieldNames{1,i}))
    -0721         modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i};
    -0722     end
    -0723 end
    -0724 
    -0725 modelSBML.unitDefinition.id='mmol_per_gDW_per_hr';
    -0726 
    -0727 unitFieldNames=getStructureFieldnames('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0728 unitDefaultValues=getDefaultValues('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    -0729 
    -0730 kinds={'mole','gram','second'};
    -0731 exponents=[1 -1 -1];
    -0732 scales=[-3 0 0];
    -0733 multipliers=[1 1 1*60*60];
    -0734 
    -0735 for i=1:numel(unitFieldNames)
    -0736     modelSBML.unitDefinition.unit(1).(unitFieldNames{1,i})=unitDefaultValues{1,i};
    -0737     for j=1:3
    -0738         modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=unitDefaultValues{1,i};
    -0739         if strcmp(unitFieldNames{1,i},'kind')
    -0740             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=kinds{j};
    -0741         elseif strcmp(unitFieldNames{1,i},'exponent')
    -0742             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=exponents(j);
    -0743         elseif strcmp(unitFieldNames{1,i},'scale')
    -0744             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=scales(j);
    -0745         elseif strcmp(unitFieldNames{1,i},'multiplier')
    -0746             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=multipliers(j);
    -0747         end
    -0748     end
    -0749 end
    -0750 end
    -0751 
    -0752 function miriamString=getMiriam(miriamStruct)
    -0753 %Returns a string with list elements for a miriam structure ('<rdf:li
    -0754 %rdf:resource="https://identifiers.org/go/GO:0005739"/>' for example). This
    -0755 %is just to speed up things since this is done many times during the
    -0756 %exporting
    -0757 
    -0758 miriamString='';
    -0759 if isfield(miriamStruct,'name')
    -0760     for i=1:numel(miriamStruct.name)
    -0761         miriamString=[miriamString '<rdf:li rdf:resource="https://identifiers.org/' miriamStruct.name{i} '/' miriamStruct.value{i} '"/>'];
    -0762     end
    -0763 end
    +0632 modelSBML.fbc_activeObjective=modelSBML.fbc_objective.fbc_id;
    +0633 
    +0634 fbcStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/fbc/version',num2str(sbmlPackageVersions(1))];
    +0635 if modelHasSubsystems
    +0636     groupStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/groups/version',num2str(sbmlPackageVersions(2))];
    +0637     modelSBML.namespaces=struct('prefix',{'','fbc','groups'},...
    +0638     'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],...
    +0639     fbcStr,groupStr});
    +0640 else
    +0641     modelSBML.namespaces=struct('prefix',{'','fbc'},...
    +0642     'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],...
    +0643     fbcStr});
    +0644 end
    +0645 
    +0646 if sbmlPackageVersions(1) == 2
    +0647     modelSBML.fbc_strict=1;
    +0648     modelSBML.isSetfbc_strict = 1;
    +0649 end
    +0650 
    +0651 modelSBML.rule=[];
    +0652 modelSBML.constraint=[];
    +0653 
    +0654 [ravenDir,prevDir]=findRAVENroot();
    +0655 fileName=checkFileExistence(fileName,1,true,false);
    +0656 
    +0657 OutputSBML_RAVEN(modelSBML,fileName,1,0,[1,0]);
    +0658 end
    +0659 
    +0660 
    +0661 function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions)
    +0662 %Returns the blank SBML model structure by using appropriate libSBML
    +0663 %functions. This creates structure by considering three levels
    +0664 
    +0665 sbmlFieldNames=getStructureFieldnames('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0666 sbmlDefaultValues=getDefaultValues('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0667 
    +0668 for i=1:numel(sbmlFieldNames)
    +0669     modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i};
    +0670     sbmlSubfieldNames=getStructureFieldnames(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0671     sbmlSubfieldValues=getDefaultValues(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0672     if ~strcmp(sbmlFieldNames{1,i},'event') && ~strcmp(sbmlFieldNames{1,i},'functionDefinition') && ~strcmp(sbmlFieldNames{1,i},'initialAssignment')
    +0673         for j=1:numel(sbmlSubfieldNames)
    +0674             modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j})=sbmlSubfieldValues{1,j};
    +0675             sbmlSubsubfieldNames=getStructureFieldnames(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0676             sbmlSubsubfieldValues=getDefaultValues(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0677             if ~strcmp(sbmlSubfieldNames{1,j},'modifier') && ~strcmp(sbmlSubfieldNames{1,j},'kineticLaw')
    +0678                 for k=1:numel(sbmlSubsubfieldNames)
    +0679                     %'compartment' and 'species' fields are not supposed to
    +0680                     %have their standalone structures if they are subfields
    +0681                     %or subsubfields
    +0682                     if ~strcmp(sbmlSubfieldNames{1,j},'compartment') && ~strcmp(sbmlSubfieldNames{1,j},'species')
    +0683                         modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k})=sbmlSubsubfieldValues{1,k};
    +0684                     end
    +0685                     %If it is fbc_association in the third level, we need
    +0686                     %to establish the fourth level, since libSBML requires
    +0687                     %it
    +0688                     if strcmp(sbmlSubsubfieldNames{1,k},'fbc_association')
    +0689                         fbc_associationFieldNames=getStructureFieldnames('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0690                         fbc_associationFieldValues=getDefaultValues('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0691                         for l=1:numel(fbc_associationFieldNames)
    +0692                             modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k}).(fbc_associationFieldNames{1,l})=fbc_associationFieldValues{1,l};
    +0693                         end
    +0694                     end
    +0695                 end
    +0696             end
    +0697         end
    +0698     end
    +0699     if ~isstruct(modelSBML.(sbmlFieldNames{1,i}))
    +0700         modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i};
    +0701     end
    +0702 end
    +0703 
    +0704 modelSBML.unitDefinition.id='mmol_per_gDW_per_hr';
    +0705 
    +0706 unitFieldNames=getStructureFieldnames('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0707 unitDefaultValues=getDefaultValues('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions);
    +0708 
    +0709 kinds={'mole','gram','second'};
    +0710 exponents=[1 -1 -1];
    +0711 scales=[-3 0 0];
    +0712 multipliers=[1 1 1*60*60];
    +0713 
    +0714 for i=1:numel(unitFieldNames)
    +0715     modelSBML.unitDefinition.unit(1).(unitFieldNames{1,i})=unitDefaultValues{1,i};
    +0716     for j=1:3
    +0717         modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=unitDefaultValues{1,i};
    +0718         if strcmp(unitFieldNames{1,i},'kind')
    +0719             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=kinds{j};
    +0720         elseif strcmp(unitFieldNames{1,i},'exponent')
    +0721             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=exponents(j);
    +0722         elseif strcmp(unitFieldNames{1,i},'scale')
    +0723             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=scales(j);
    +0724         elseif strcmp(unitFieldNames{1,i},'multiplier')
    +0725             modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=multipliers(j);
    +0726         end
    +0727     end
    +0728 end
    +0729 end
    +0730 
    +0731 function miriamString=getMiriam(miriamStruct)
    +0732 %Returns a string with list elements for a miriam structure ('<rdf:li
    +0733 %rdf:resource="https://identifiers.org/go/GO:0005739"/>' for example). This
    +0734 %is just to speed up things since this is done many times during the
    +0735 %exporting
    +0736 
    +0737 miriamString='';
    +0738 if isfield(miriamStruct,'name')
    +0739     for i=1:numel(miriamStruct.name)
    +0740         miriamString=[miriamString '<rdf:li rdf:resource="https://identifiers.org/' miriamStruct.name{i} '/' miriamStruct.value{i} '"/>'];
    +0741     end
    +0742 end
    +0743 end
    +0744 
    +0745 function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i)
    +0746 %This function provides reactants and products for particular reaction. The
    +0747 %function was 'borrowed' from writeSBML in COBRA toolbox, lines 663-679
    +0748 
    +0749 met_idx = find(model.S(:, i));
    +0750 tmp_Rxn.product=[];
    +0751 tmp_Rxn.reactant=[];
    +0752 for j_met=1:size(met_idx,1)
    +0753     tmp_idx = met_idx(j_met,1);
    +0754     sbml_tmp_species_ref.species = sbmlModel.species(tmp_idx).id;
    +0755     met_stoich = model.S(tmp_idx, i);
    +0756     sbml_tmp_species_ref.stoichiometry = abs(met_stoich);
    +0757     sbml_tmp_species_ref.isSetStoichiometry=1;
    +0758     sbml_tmp_species_ref.constant=1;
    +0759     if (met_stoich > 0)
    +0760         tmp_Rxn.product = [ tmp_Rxn.product, sbml_tmp_species_ref ];
    +0761     else
    +0762         tmp_Rxn.reactant = [ tmp_Rxn.reactant, sbml_tmp_species_ref];
    +0763     end
     0764 end
    -0765 
    -0766 function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i)
    -0767 %This function provides reactants and products for particular reaction. The
    -0768 %function was 'borrowed' from writeSBML in COBRA toolbox, lines 663-679
    -0769 
    -0770 met_idx = find(model.S(:, i));
    -0771 tmp_Rxn.product=[];
    -0772 tmp_Rxn.reactant=[];
    -0773 for j_met=1:size(met_idx,1)
    -0774     tmp_idx = met_idx(j_met,1);
    -0775     sbml_tmp_species_ref.species = sbmlModel.species(tmp_idx).id;
    -0776     met_stoich = model.S(tmp_idx, i);
    -0777     sbml_tmp_species_ref.stoichiometry = abs(met_stoich);
    -0778     sbml_tmp_species_ref.isSetStoichiometry=1;
    -0779     sbml_tmp_species_ref.constant=1;
    -0780     if (met_stoich > 0)
    -0781         tmp_Rxn.product = [ tmp_Rxn.product, sbml_tmp_species_ref ];
    -0782     else
    -0783         tmp_Rxn.reactant = [ tmp_Rxn.reactant, sbml_tmp_species_ref];
    -0784     end
    -0785 end
    -0786 end
    -0787 
    -0788 function vecT = columnVector(vec)
    -0789 % Code below taken from COBRA Toolbox under GNU General Public License v3.0
    -0790 % license file in readme/GPL.MD.
    -0791 %
    -0792 % Converts a vector to a column vector
    -0793 %
    -0794 % USAGE:
    -0795 %
    -0796 %   vecT = columnVector(vec)
    -0797 %
    -0798 % INPUT:
    -0799 %   vec:     a vector
    -0800 %
    -0801 % OUTPUT:
    -0802 %   vecT:    a column vector
    -0803 
    -0804 [n, m] = size(vec);
    -0805 
    -0806 if n < m
    -0807     vecT = vec';
    -0808 else
    -0809     vecT = vec;
    -0810 end
    -0811 end
    +0765 end +0766 +0767 function vecT = columnVector(vec) +0768 % Code below taken from COBRA Toolbox under GNU General Public License v3.0 +0769 % license file in readme/GPL.MD. +0770 % +0771 % Converts a vector to a column vector +0772 % +0773 % USAGE: +0774 % +0775 % vecT = columnVector(vec) +0776 % +0777 % INPUT: +0778 % vec: a vector +0779 % +0780 % OUTPUT: +0781 % vecT: a column vector +0782 +0783 [n, m] = size(vec); +0784 +0785 if n < m +0786 vecT = vec'; +0787 else +0788 vecT = vec; +0789 end +0790 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/exportToExcelFormat.html b/doc/io/exportToExcelFormat.html index ee040027..b23c701e 100644 --- a/doc/io/exportToExcelFormat.html +++ b/doc/io/exportToExcelFormat.html @@ -53,7 +53,7 @@

    CROSS-REFERENCE INFORMATION ^
 <li><a href=exportToTabDelimited exportToTabDelimited
  • loadWorkbook loadWorkbook
  • sortIdentifiers exportModel
  • writeSheet writeSheet
  • This function is called by: +
  • exportForGit exportForGit
  • diff --git a/doc/io/getToolboxVersion.html b/doc/io/getToolboxVersion.html index 9bd052c0..73335a08 100644 --- a/doc/io/getToolboxVersion.html +++ b/doc/io/getToolboxVersion.html @@ -49,7 +49,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • exportForGit exportForGit
  • +
  • exportForGit exportForGit
  • SUBFUNCTIONS ^

    diff --git a/doc/io/importExcelModel.html b/doc/io/importExcelModel.html index 192c63cc..0366309c 100644 --- a/doc/io/importExcelModel.html +++ b/doc/io/importExcelModel.html @@ -967,7 +967,7 @@

    SOURCE CODE ^elseif any(strfind(I{j},':')) 0848 index=max(strfind(I{j},':')); 0849 end -0850 if any(index) +0850 if exist('index','var') & any(index) 0851 miriamStruct{i}.name{startIndex+j}=I{j}(1:index-1); 0852 miriamStruct{i}.value{startIndex+j}=I{j}(index+1:end); 0853 else diff --git a/doc/io/importModel.html b/doc/io/importModel.html index 9048c59e..d0d442fc 100644 --- a/doc/io/importModel.html +++ b/doc/io/importModel.html @@ -24,21 +24,24 @@

    PURPOSE ^importModel

    SYNOPSIS ^

    -
    function model=importModel(fileName,removeExcMets,isSBML2COBRA,supressWarnings)
    +
    function model=importModel(fileName,removeExcMets,removePrefix,supressWarnings)

    DESCRIPTION ^

     importModel
    -   Import a constraint-based model from a SBML file
    +   Import a constraint-based model from an SBML file.
     
      Input:
    -   fileName        a SBML file to import. A dialog window will open if 
    +   fileName        a SBML file to import. A dialog window will open if
                        no file name is specified.
        removeExcMets   true if exchange metabolites should be removed. This is
                        needed to be able to run simulations, but it could also
                        be done using simplifyModel at a later stage (optional,
                        default true)
    -   isSBML2COBRA    true if the SBML file is in the old COBRA Toolbox
    -                   format (SBML Level 2) (optional, default false)
    +   removePrefix    true if identifier prefixes should be removed when
    +                   loading the model: G_ for genes, R_ for reactions,
    +                   M_ for metabolites, and C_ for compartments. These are
    +                   only removed if all identifiers of a certain type
    +                   contain the prefix. (optional, default true)
        supressWarnings true if warnings regarding the model structure should
                        be supressed (optional, default false)
     
    @@ -75,6 +78,7 @@ 

    DESCRIPTION ^DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

    @@ -104,9 +108,9 @@

    SUBFUNCTIONS ^function matchGenes=getGeneList(grRules)
  • function fieldContent=parseNote(searchString,fieldName)
  • function fieldContent=parseAnnotation(searchString,startString,midString,fieldName)
  • function miriamStruct=parseMiriam(searchString)
  • function miriam = addSBOtoMiriam(miriam,sboTerm)
  • SOURCE CODE ^

    -
    0001 function model=importModel(fileName,removeExcMets,isSBML2COBRA,supressWarnings)
    +
    0001 function model=importModel(fileName,removeExcMets,removePrefix,supressWarnings)
     0002 % importModel
    -0003 %   Import a constraint-based model from a SBML file
    +0003 %   Import a constraint-based model from an SBML file.
     0004 %
     0005 % Input:
     0006 %   fileName        a SBML file to import. A dialog window will open if
    @@ -115,1218 +119,1171 @@ 

    SOURCE CODE ^% needed to be able to run simulations, but it could also 0010 % be done using simplifyModel at a later stage (optional, 0011 % default true) -0012 % isSBML2COBRA true if the SBML file is in the old COBRA Toolbox -0013 % format (SBML Level 2) (optional, default false) -0014 % supressWarnings true if warnings regarding the model structure should -0015 % be supressed (optional, default false) -0016 % -0017 % Output: -0018 % model -0019 % id model ID -0020 % name name of model contents -0021 % annotation additional information about model -0022 % rxns reaction ids -0023 % mets metabolite ids -0024 % S stoichiometric matrix -0025 % lb lower bounds -0026 % ub upper bounds -0027 % rev reversibility vector -0028 % c objective coefficients -0029 % b equality constraints for the metabolite equations -0030 % comps compartment ids -0031 % compNames compartment names -0032 % compOutside the id (as in comps) for the compartment -0033 % surrounding each of the compartments -0034 % compMiriams structure with MIRIAM information about the -0035 % compartments -0036 % rxnNames reaction description -0037 % rxnComps compartments for reactions -0038 % grRules reaction to gene rules in text form -0039 % rxnGeneMat reaction-to-gene mapping in sparse matrix form -0040 % subSystems subsystem name for each reaction -0041 % eccodes EC-codes for the reactions -0042 % rxnMiriams structure with MIRIAM information about the reactions -0043 % rxnNotes reaction notes -0044 % rxnReferences reaction references -0045 % rxnConfidenceScores reaction confidence scores -0046 % genes list of all genes -0047 % geneComps compartments for genes -0048 % geneMiriams structure with MIRIAM information about the genes -0049 % geneShortNames gene alternative names (e.g. ERG10) -0050 % metNames metabolite description -0051 % metComps compartments for metabolites -0052 % inchis InChI-codes for metabolites -0053 % metFormulas metabolite chemical formula -0054 % metMiriams structure with MIRIAM information about the metabolites -0055 % metCharges metabolite charge -0056 % unconstrained true if the metabolite is an exchange metabolite -0057 % -0058 % A number of consistency checks are performed in order to ensure that the -0059 % model is valid. Take these warnings seriously and modify the model -0060 % structure to solve them. +0012 % removePrefix true if identifier prefixes should be removed when +0013 % loading the model: G_ for genes, R_ for reactions, +0014 % M_ for metabolites, and C_ for compartments. These are +0015 % only removed if all identifiers of a certain type +0016 % contain the prefix. (optional, default true) +0017 % supressWarnings true if warnings regarding the model structure should +0018 % be supressed (optional, default false) +0019 % +0020 % Output: +0021 % model +0022 % id model ID +0023 % name name of model contents +0024 % annotation additional information about model +0025 % rxns reaction ids +0026 % mets metabolite ids +0027 % S stoichiometric matrix +0028 % lb lower bounds +0029 % ub upper bounds +0030 % rev reversibility vector +0031 % c objective coefficients +0032 % b equality constraints for the metabolite equations +0033 % comps compartment ids +0034 % compNames compartment names +0035 % compOutside the id (as in comps) for the compartment +0036 % surrounding each of the compartments +0037 % compMiriams structure with MIRIAM information about the +0038 % compartments +0039 % rxnNames reaction description +0040 % rxnComps compartments for reactions +0041 % grRules reaction to gene rules in text form +0042 % rxnGeneMat reaction-to-gene mapping in sparse matrix form +0043 % subSystems subsystem name for each reaction +0044 % eccodes EC-codes for the reactions +0045 % rxnMiriams structure with MIRIAM information about the reactions +0046 % rxnNotes reaction notes +0047 % rxnReferences reaction references +0048 % rxnConfidenceScores reaction confidence scores +0049 % genes list of all genes +0050 % geneComps compartments for genes +0051 % geneMiriams structure with MIRIAM information about the genes +0052 % geneShortNames gene alternative names (e.g. ERG10) +0053 % proteins protein associated to each gene +0054 % metNames metabolite description +0055 % metComps compartments for metabolites +0056 % inchis InChI-codes for metabolites +0057 % metFormulas metabolite chemical formula +0058 % metMiriams structure with MIRIAM information about the metabolites +0059 % metCharges metabolite charge +0060 % unconstrained true if the metabolite is an exchange metabolite 0061 % -0062 % Usage: model = importModel(fileName, removeExcMets, isSBML2COBRA, supressWarnings) -0063 -0064 if nargin<1 || isempty(fileName) -0065 [fileName, pathName] = uigetfile({'*.xml;*.sbml'}, 'Please select the model file'); -0066 if fileName == 0 -0067 error('You should select a model file') -0068 else -0069 fileName = fullfile(pathName,fileName); -0070 end -0071 end -0072 fileName=char(fileName); -0073 if nargin<2 -0074 removeExcMets=true; +0062 % Note: A number of consistency checks are performed in order to ensure that the +0063 % model is valid. Take these warnings seriously and modify the model +0064 % structure to solve them. +0065 % +0066 % Usage: model = importModel(fileName, removeExcMets, removePrefix, supressWarnings) +0067 +0068 if nargin<1 || isempty(fileName) +0069 [fileName, pathName] = uigetfile({'*.xml;*.sbml'}, 'Please select the model file'); +0070 if fileName == 0 +0071 error('You should select a model file') +0072 else +0073 fileName = fullfile(pathName,fileName); +0074 end 0075 end -0076 -0077 if nargin<3 -0078 isSBML2COBRA=false; +0076 fileName=char(fileName); +0077 if nargin<2 || isempty(removeExcMets) +0078 removeExcMets=true; 0079 end 0080 -0081 if nargin<4 -0082 supressWarnings=false; +0081 if nargin<3 || isempty(removePrefix) +0082 removePrefix=true; 0083 end 0084 -0085 if ~isfile(fileName) -0086 error('SBML file %s cannot be found',string(fileName)); +0085 if nargin<4 +0086 supressWarnings=false; 0087 end 0088 -0089 %This is to match the order of the fields to those you get from importing -0090 %from Excel -0091 model=[]; -0092 model.id=[]; -0093 model.name=[]; -0094 model.annotation=[]; -0095 model.rxns={}; -0096 model.mets={}; -0097 model.S=[]; -0098 model.lb=[]; -0099 model.ub=[]; -0100 model.rev=[]; -0101 model.c=[]; -0102 model.b=[]; -0103 model.comps={}; -0104 model.compNames={}; -0105 model.compOutside={}; -0106 model.compMiriams={}; -0107 model.rxnNames={}; -0108 model.rxnComps=[]; -0109 model.grRules={}; -0110 model.rxnGeneMat=[]; -0111 model.subSystems={}; -0112 model.eccodes={}; -0113 model.rxnMiriams={}; -0114 model.rxnNotes={}; -0115 model.rxnReferences={}; -0116 model.rxnConfidenceScores=[]; -0117 model.genes={}; -0118 model.geneComps=[]; -0119 model.geneMiriams={}; -0120 model.geneShortNames={}; -0121 model.metNames={}; -0122 model.metComps=[]; -0123 model.inchis={}; -0124 model.metFormulas={}; -0125 model.metMiriams={}; -0126 model.metCharges=[]; -0127 model.unconstrained=[]; -0128 -0129 %Load the model using libSBML -0130 [ravenDir,prevDir]=findRAVENroot(); -0131 fileName=checkFileExistence(fileName,1); -0132 modelSBML = TranslateSBML_RAVEN(fileName,0,0,[1 1]); -0133 -0134 if isempty(modelSBML) -0135 EM='There is a problem with the SBML file. Try using the SBML Validator at http://sbml.org/Facilities/Validator'; -0136 dispEM(EM); -0137 end -0138 -0139 %Remove the preceding strings for reactions, compartments and -0140 %reactants/products in 'reaction' field. The strings for metabolites, genes -0141 %and complexes are not removed, as we will need them later to identify them -0142 %from 'species' field -0143 for i=1:numel(modelSBML.reaction) -0144 modelSBML.reaction(i).name=regexprep(modelSBML.reaction(i).name,'^R_',''); -0145 modelSBML.reaction(i).id=regexprep(modelSBML.reaction(i).id,'^R_',''); -0146 if isfield(modelSBML.reaction(i),'compartment') -0147 modelSBML.reaction(i).compartment=regexprep(modelSBML.reaction(i).compartment,'^C_',''); -0148 end -0149 for j=1:numel(modelSBML.reaction(i).reactant) -0150 modelSBML.reaction(i).reactant(j).species=regexprep(modelSBML.reaction(i).reactant(j).species,'^M_',''); -0151 end -0152 for j=1:numel(modelSBML.reaction(i).product) -0153 modelSBML.reaction(i).product(j).species=regexprep(modelSBML.reaction(i).product(j).species,'^M_',''); -0154 end -0155 end -0156 -0157 %Retrieve compartment names and IDs -0158 compartmentNames=cell(numel(modelSBML.compartment),1); -0159 compartmentIDs=cell(numel(modelSBML.compartment),1); -0160 compartmentOutside=cell(numel(modelSBML.compartment),1); -0161 compartmentMiriams=cell(numel(modelSBML.compartment),1); +0089 fileName=checkFileExistence(fileName,1); +0090 % If path contains non-ASCII characters, copy file to tempdir first, as +0091 % libSBML is known to have problems with this on Windows: +0092 % https://sbml.org/software/libsbml/libsbml-docs/known-pitfalls/#matlab-on-windows-has-issues-with-unicode-filenames +0093 if ispc && any(double(fileName)>128) +0094 [~,originalFile,ext] = fileparts(fileName); +0095 tempFile = fullfile(tempdir,[originalFile ext]); +0096 copyfile(fileName,tempFile); +0097 fileName = tempFile; +0098 end +0099 +0100 %This is to match the order of the fields to those you get from importing +0101 %from Excel +0102 model=[]; +0103 model.id=[]; +0104 model.name=[]; +0105 model.annotation=[]; +0106 model.rxns={}; +0107 model.mets={}; +0108 model.S=[]; +0109 model.lb=[]; +0110 model.ub=[]; +0111 model.rev=[]; +0112 model.c=[]; +0113 model.b=[]; +0114 model.comps={}; +0115 model.compNames={}; +0116 model.compOutside={}; +0117 model.compMiriams={}; +0118 model.rxnNames={}; +0119 model.rxnComps=[]; +0120 model.grRules={}; +0121 model.rxnGeneMat=[]; +0122 model.subSystems={}; +0123 model.eccodes={}; +0124 model.rxnMiriams={}; +0125 model.rxnNotes={}; +0126 model.rxnReferences={}; +0127 model.rxnConfidenceScores=[]; +0128 model.genes={}; +0129 model.geneComps=[]; +0130 model.geneMiriams={}; +0131 model.geneShortNames={}; +0132 model.proteins={}; +0133 model.metNames={}; +0134 model.metComps=[]; +0135 model.inchis={}; +0136 model.metFormulas={}; +0137 model.metMiriams={}; +0138 model.metCharges=[]; +0139 model.unconstrained=[]; +0140 +0141 %Load the model using libSBML +0142 [modelSBML,errorMsg] = TranslateSBML_RAVEN(fileName,0,0,[1 1]); +0143 if exist('tempFile','var') +0144 delete(tempFile) +0145 end +0146 +0147 if isempty(modelSBML) +0148 EM=['There is a problem with the SBML file. Try using the SBML Validator at http://sbml.org/Facilities/Validator.\nlibSBML reports: ', errorMsg.message]; +0149 dispEM(EM); +0150 end +0151 +0152 %Retrieve compartment names and IDs +0153 compartmentNames=cell(numel(modelSBML.compartment),1); +0154 compartmentIDs=cell(numel(modelSBML.compartment),1); +0155 compartmentOutside=cell(numel(modelSBML.compartment),1); +0156 compartmentMiriams=cell(numel(modelSBML.compartment),1); +0157 +0158 if isfield(modelSBML.compartment,'sboTerm') && numel(unique([modelSBML.compartment.sboTerm])) == 1 +0159 %If all the SBO terms are identical, don't add them to compMiriams +0160 modelSBML.compartment = rmfield(modelSBML.compartment,'sboTerm'); +0161 end 0162 -0163 if isfield(modelSBML.compartment,'sboTerm') && numel(unique([modelSBML.compartment.sboTerm])) == 1 -0164 %If all the SBO terms are identical, don't add them to compMiriams -0165 modelSBML.compartment = rmfield(modelSBML.compartment,'sboTerm'); -0166 end -0167 -0168 for i=1:numel(modelSBML.compartment) -0169 compartmentNames{i}=modelSBML.compartment(i).name; -0170 compartmentIDs{i}=regexprep(modelSBML.compartment(i).id,'^C_',''); -0171 if isfield(modelSBML.compartment(i),'outside') -0172 if ~isempty(modelSBML.compartment(i).outside) -0173 compartmentOutside{i}=regexprep(modelSBML.compartment(i).outside,'^C_',''); -0174 else -0175 compartmentOutside{i}=''; -0176 end -0177 else -0178 compartmentOutside{i}=[]; -0179 end -0180 -0181 if isfield(modelSBML.compartment(i),'annotation') -0182 compartmentMiriams{i}=parseMiriam(modelSBML.compartment(i).annotation); -0183 else -0184 compartmentMiriams{i}=[]; -0185 end -0186 -0187 if isfield(modelSBML.compartment(i),'sboTerm') && ~(modelSBML.compartment(i).sboTerm==-1) -0188 compartmentMiriams{i} = addSBOtoMiriam(compartmentMiriams{i},modelSBML.compartment(i).sboTerm); -0189 end +0163 for i=1:numel(modelSBML.compartment) +0164 compartmentNames{i}=modelSBML.compartment(i).name; +0165 compartmentIDs{i}=modelSBML.compartment(i).id; +0166 if isfield(modelSBML.compartment(i),'outside') +0167 if ~isempty(modelSBML.compartment(i).outside) +0168 compartmentOutside{i}=modelSBML.compartment(i).outside; +0169 else +0170 compartmentOutside{i}=''; +0171 end +0172 else +0173 compartmentOutside{i}=[]; +0174 end +0175 +0176 if isfield(modelSBML.compartment(i),'annotation') +0177 compartmentMiriams{i}=parseMiriam(modelSBML.compartment(i).annotation); +0178 else +0179 compartmentMiriams{i}=[]; +0180 end +0181 +0182 if isfield(modelSBML.compartment(i),'sboTerm') && ~(modelSBML.compartment(i).sboTerm==-1) +0183 compartmentMiriams{i} = addSBOtoMiriam(compartmentMiriams{i},modelSBML.compartment(i).sboTerm); +0184 end +0185 end +0186 +0187 %If there are no compartment names then use compartment id as name +0188 if all(cellfun(@isempty,compartmentNames)) +0189 compartmentNames=compartmentIDs; 0190 end 0191 -0192 %If there are no compartment names then use compartment id as name -0193 if all(cellfun(@isempty,compartmentNames)) -0194 compartmentNames=compartmentIDs; -0195 end -0196 -0197 %Retrieve info on metabolites, genes, complexes -0198 metaboliteNames={}; -0199 metaboliteIDs={}; -0200 metaboliteCompartments={}; -0201 metaboliteUnconstrained=[]; -0202 metaboliteFormula={}; -0203 metaboliteInChI={}; -0204 metaboliteMiriams={}; -0205 metaboliteCharges=[]; -0206 -0207 geneNames={}; -0208 geneIDs={}; -0209 geneMiriams={}; -0210 geneShortNames={}; -0211 geneCompartments={}; -0212 complexIDs={}; -0213 complexNames={}; -0214 -0215 %If the file is not a COBRA Toolbox model. According to the format -0216 %specified in the yeast consensus model both metabolites and genes are a -0217 %type of 'species'. The metabolites have names starting with 'M_' and genes -0218 %with 'E_' -0219 geneSBOs = []; -0220 metSBOs = []; -0221 %Regex of compartment names, later to be used to remove from metabolite -0222 %names if present as suffix. -0223 regexCompNames = ['\s?\[((' strjoin({modelSBML.compartment.name},')|(') '))\]$']; -0224 for i=1:numel(modelSBML.species) -0225 if ~isSBML2COBRA -0226 if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_') -0227 geneNames{numel(geneNames)+1,1}=modelSBML.species(i).name; -0228 -0229 %The "E_" is included in the ID. This is because it's only used -0230 %internally in this file and it makes the matching a little -0231 %smoother -0232 geneIDs{numel(geneIDs)+1,1}=modelSBML.species(i).id; -0233 geneCompartments{numel(geneCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); -0234 -0235 %Get Miriam structure -0236 if isfield(modelSBML.species(i),'annotation') -0237 %Get Miriam info -0238 geneMiriam=parseMiriam(modelSBML.species(i).annotation); -0239 geneMiriams{numel(geneMiriams)+1,1}=geneMiriam; -0240 else -0241 geneMiriams{numel(geneMiriams)+1,1}=[]; -0242 end -0243 -0244 %Protein short names (for example ERG10) are saved as SHORT -0245 %NAME: NAME in the notes-section of metabolites for SBML Level -0246 %2 and as PROTEIN_ASSOCIATION for each reaction in SBML Level 2 -0247 %COBRA Toolbox format. For now only the SHORT NAME is loaded -0248 %and no mapping takes place -0249 if isfield(modelSBML.species(i),'notes') -0250 geneShortNames{numel(geneShortNames)+1,1}=parseNote(modelSBML.species(i).notes,'SHORT NAME'); -0251 else -0252 geneShortNames{numel(geneShortNames)+1,1}=''; -0253 end -0254 -0255 %Get SBO term -0256 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) -0257 geneSBOs(end+1,1) = modelSBML.species(i).sboTerm; -0258 end -0259 elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_') -0260 %If it's a complex keep the ID and name -0261 complexIDs=[complexIDs;modelSBML.species(i).id]; -0262 complexNames=[complexNames;modelSBML.species(i).name]; -0263 else -0264 %If it is not gene or complex, then it must be a metabolite -0265 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; -0266 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); -0267 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); -0268 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; -0269 -0270 %For each metabolite retrieve the formula and the InChI code if -0271 %available First add the InChI code and the formula from the -0272 %InChI. This allows for overwriting the formula by setting the -0273 %actual formula field -0274 if ~isempty(modelSBML.species(i).annotation) -0275 %Get the formula if available -0276 startString='>InChI='; -0277 endString='</in:inchi>'; -0278 formStart=strfind(modelSBML.species(i).annotation,startString); -0279 if isempty(formStart) -0280 startString='InChI='; -0281 endString='"/>'; -0282 end -0283 formStart=strfind(modelSBML.species(i).annotation,startString); -0284 if ~isempty(formStart) -0285 formEnd=strfind(modelSBML.species(i).annotation,endString); -0286 formEndIndex=find(formEnd>formStart, 1 ); -0287 formula=modelSBML.species(i).annotation(formStart+numel(startString):formEnd(formEndIndex)-1); -0288 metaboliteInChI{numel(metaboliteInChI)+1,1}=formula; -0289 -0290 %The composition is most often present between the -0291 %first and second "/" in the model. In some simple -0292 %molecules, such as salts, there is no second "/". The -0293 %formula is then assumed to be to the end of the string -0294 compositionIndexes=strfind(formula,'/'); -0295 if numel(compositionIndexes)>1 -0296 metaboliteFormula{numel(metaboliteFormula)+1,1}=... -0297 formula(compositionIndexes(1)+1:compositionIndexes(2)-1); -0298 else -0299 if numel(compositionIndexes)==1 -0300 %Probably a simple molecule which can have only -0301 %one conformation -0302 metaboliteFormula{numel(metaboliteFormula)+1,1}=... -0303 formula(compositionIndexes(1)+1:numel(formula)); -0304 else -0305 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0306 end -0307 end -0308 elseif isfield(modelSBML.species(i),'fbc_chemicalFormula') -0309 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; -0310 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) -0311 %Cannot extract InChi from formula, so remains -0312 %empty -0313 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).fbc_chemicalFormula; -0314 else -0315 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0316 end -0317 else -0318 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; -0319 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0320 end -0321 -0322 %Get Miriam info -0323 metMiriam=parseMiriam(modelSBML.species(i).annotation); -0324 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; -0325 else -0326 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; -0327 if isfield(modelSBML.species(i),'notes') -0328 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); -0329 else -0330 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0331 end -0332 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=[]; -0333 end -0334 if ~isempty(modelSBML.species(i).notes) -0335 if ~isfield(modelSBML.species(i),'annotation') -0336 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); -0337 end -0338 elseif ~isfield(modelSBML.species(i),'annotation') -0339 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0340 end -0341 %Get SBO term -0342 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) -0343 metSBOs(end+1,1) = modelSBML.species(i).sboTerm; -0344 end -0345 end -0346 -0347 elseif isSBML2COBRA -0348 %The metabolite names are assumed to be M_NAME_COMPOSITION or -0349 %_NAME_COMPOSITION or NAME_COMPOSITION or NAME. Regular expressions -0350 %are used that only NAME_COMPOSITION or NAME would be possible -0351 -0352 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^M_',''); -0353 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^_',''); -0354 underscoreIndex=strfind(modelSBML.species(i).name,'_'); -0355 -0356 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; -0357 -0358 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); -0359 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); -0360 -0361 %I think that COBRA doesn't set the boundary condition, but rather -0362 %uses name_b. Check for either -0363 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; -0364 if strcmp(metaboliteIDs{end}(max(end-1,1):end),'_b') -0365 metaboliteUnconstrained(end)=1; -0366 end -0367 -0368 %Get the formula -0369 if max(underscoreIndex)<length(modelSBML.species(i).name) -0370 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).name(max(underscoreIndex)+1:length(modelSBML.species(i).name)); -0371 else -0372 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0373 end -0374 -0375 %The old COBRA version sometimes has composition information in the -0376 %notes instead -0377 if isfield(modelSBML.species(i),'notes') && ~isempty(parseNote(modelSBML.species(i).notes,'FORMULA')) -0378 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); -0379 end -0380 -0381 %Get Miriam info -0382 if ~isempty(modelSBML.species(i).annotation) -0383 metMiriam=parseMiriam(modelSBML.species(i).annotation); -0384 else -0385 metMiriam=[]; -0386 end -0387 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; -0388 -0389 %Get SBO term -0390 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) -0391 metSBOs(end+1,1) = modelSBML.species(i).sboTerm; -0392 end -0393 end -0394 %The following lines are executed regardless isSBML2COBRA setting -0395 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:2),'E_') -0396 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:3),'Cx_') -0397 %Remove trailing [compartment] from metabolite name if present -0398 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},regexCompNames,''); -0399 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},'^M_',''); -0400 if isfield(modelSBML.species(i),'fbc_charge') -0401 if ~isempty(modelSBML.species(i).fbc_charge) && modelSBML.species(i).isSetfbc_charge -0402 metaboliteCharges(numel(metaboliteCharges)+1,1)=double(modelSBML.species(i).fbc_charge); -0403 else -0404 if isfield(modelSBML.species(i),'notes') -0405 if strfind(modelSBML.species(i).notes,'CHARGE') -0406 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); -0407 else -0408 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0409 end -0410 else -0411 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0412 end -0413 end -0414 elseif isfield(modelSBML.species(i),'notes') -0415 if strfind(modelSBML.species(i).notes,'CHARGE') -0416 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); -0417 else -0418 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0419 end -0420 else -0421 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0422 end -0423 %Additional information from FBC format Chemical formula -0424 if isfield(modelSBML.species(i),'fbc_chemicalFormula') -0425 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) -0426 metaboliteFormula{numel(metaboliteFormula),1}=modelSBML.species(i).fbc_chemicalFormula; -0427 end -0428 end -0429 end -0430 end -0431 end -0432 -0433 %Add SBO terms to gene and metabolite miriam fields -0434 if numel(unique(geneSBOs)) > 1 % don't add if they're all identical -0435 for i = 1:numel(geneNames) -0436 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},geneSBOs(i)); -0437 end -0438 end -0439 if numel(unique(metSBOs)) > 1 -0440 for i = 1:numel(metaboliteNames) -0441 metaboliteMiriams{i} = addSBOtoMiriam(metaboliteMiriams{i},metSBOs(i)); +0192 %Retrieve info on metabolites, genes, complexes +0193 metaboliteNames={}; +0194 metaboliteIDs={}; +0195 metaboliteCompartments={}; +0196 metaboliteUnconstrained=[]; +0197 metaboliteFormula={}; +0198 metaboliteInChI={}; +0199 metaboliteMiriams={}; +0200 metaboliteCharges=[]; +0201 +0202 geneNames={}; +0203 geneIDs={}; +0204 geneMiriams={}; +0205 geneShortNames={}; +0206 proteins={}; +0207 geneCompartments={}; +0208 complexIDs={}; +0209 complexNames={}; +0210 +0211 %If the file is not a COBRA Toolbox model. According to the format +0212 %specified in the yeast consensus model both metabolites and genes are a +0213 %type of 'species'. The metabolites have names starting with 'M_' and genes +0214 %with 'E_' +0215 geneSBOs = []; +0216 metSBOs = []; +0217 %Regex of compartment names, later to be used to remove from metabolite +0218 %names if present as suffix. +0219 regexCompNames = ['\s?\[((' strjoin({modelSBML.compartment.name},')|(') '))\]$']; +0220 for i=1:numel(modelSBML.species) +0221 if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_') +0222 geneNames{numel(geneNames)+1,1}=modelSBML.species(i).name; +0223 +0224 %The "E_" is included in the ID. This is because it's only used +0225 %internally in this file and it makes the matching a little +0226 %smoother +0227 geneIDs{numel(geneIDs)+1,1}=modelSBML.species(i).id; +0228 geneCompartments{numel(geneCompartments)+1,1}=modelSBML.species(i).compartment; +0229 +0230 %Get Miriam structure +0231 if isfield(modelSBML.species(i),'annotation') +0232 %Get Miriam info +0233 geneMiriam=parseMiriam(modelSBML.species(i).annotation); +0234 geneMiriams{numel(geneMiriams)+1,1}=geneMiriam; +0235 else +0236 geneMiriams{numel(geneMiriams)+1,1}=[]; +0237 end +0238 +0239 %Protein short names (for example ERG10) are saved as SHORT +0240 %NAME: NAME in the notes-section of metabolites for SBML Level +0241 %2 and as PROTEIN_ASSOCIATION for each reaction in SBML Level 2 +0242 %COBRA Toolbox format. For now only the SHORT NAME is loaded +0243 %and no mapping takes place +0244 if isfield(modelSBML.species(i),'notes') +0245 geneShortNames{numel(geneShortNames)+1,1}=parseNote(modelSBML.species(i).notes,'SHORT NAME'); +0246 else +0247 geneShortNames{numel(geneShortNames)+1,1}=''; +0248 end +0249 +0250 %Get SBO term +0251 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) +0252 geneSBOs(end+1,1) = modelSBML.species(i).sboTerm; +0253 end +0254 elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_') +0255 %If it's a complex keep the ID and name +0256 complexIDs=[complexIDs;modelSBML.species(i).id]; +0257 complexNames=[complexNames;modelSBML.species(i).name]; +0258 else +0259 %If it is not gene or complex, then it must be a metabolite +0260 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; +0261 metaboliteIDs{numel(metaboliteIDs)+1,1}=modelSBML.species(i).id; +0262 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=modelSBML.species(i).compartment; +0263 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; +0264 +0265 %For each metabolite retrieve the formula and the InChI code if +0266 %available First add the InChI code and the formula from the +0267 %InChI. This allows for overwriting the formula by setting the +0268 %actual formula field +0269 if ~isempty(modelSBML.species(i).annotation) +0270 %Get the formula if available +0271 startString='>InChI='; +0272 endString='</in:inchi>'; +0273 formStart=strfind(modelSBML.species(i).annotation,startString); +0274 if isempty(formStart) +0275 startString='InChI='; +0276 endString='"/>'; +0277 end +0278 formStart=strfind(modelSBML.species(i).annotation,startString); +0279 if ~isempty(formStart) +0280 formEnd=strfind(modelSBML.species(i).annotation,endString); +0281 formEndIndex=find(formEnd>formStart, 1 ); +0282 formula=modelSBML.species(i).annotation(formStart+numel(startString):formEnd(formEndIndex)-1); +0283 metaboliteInChI{numel(metaboliteInChI)+1,1}=formula; +0284 +0285 %The composition is most often present between the +0286 %first and second "/" in the model. In some simple +0287 %molecules, such as salts, there is no second "/". The +0288 %formula is then assumed to be to the end of the string +0289 compositionIndexes=strfind(formula,'/'); +0290 if numel(compositionIndexes)>1 +0291 metaboliteFormula{numel(metaboliteFormula)+1,1}=... +0292 formula(compositionIndexes(1)+1:compositionIndexes(2)-1); +0293 else +0294 if numel(compositionIndexes)==1 +0295 %Probably a simple molecule which can have only +0296 %one conformation +0297 metaboliteFormula{numel(metaboliteFormula)+1,1}=... +0298 formula(compositionIndexes(1)+1:numel(formula)); +0299 else +0300 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0301 end +0302 end +0303 elseif isfield(modelSBML.species(i),'fbc_chemicalFormula') +0304 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; +0305 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) +0306 %Cannot extract InChi from formula, so remains +0307 %empty +0308 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).fbc_chemicalFormula; +0309 else +0310 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0311 end +0312 else +0313 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; +0314 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0315 end +0316 +0317 %Get Miriam info +0318 metMiriam=parseMiriam(modelSBML.species(i).annotation); +0319 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; +0320 else +0321 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; +0322 if isfield(modelSBML.species(i),'notes') +0323 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); +0324 else +0325 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0326 end +0327 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=[]; +0328 end +0329 if ~isempty(modelSBML.species(i).notes) +0330 if ~isfield(modelSBML.species(i),'annotation') +0331 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); +0332 end +0333 elseif ~isfield(modelSBML.species(i),'annotation') +0334 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0335 end +0336 %Get SBO term +0337 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) +0338 metSBOs(end+1,1) = modelSBML.species(i).sboTerm; +0339 end +0340 end +0341 +0342 %The following lines are executed regardless isSBML2COBRA setting +0343 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:2),'E_') +0344 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:3),'Cx_') +0345 %Remove trailing [compartment] from metabolite name if present +0346 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},regexCompNames,''); +0347 metaboliteNames{end,1}=metaboliteNames{end,1}; +0348 if isfield(modelSBML.species(i),'fbc_charge') +0349 if ~isempty(modelSBML.species(i).fbc_charge) && modelSBML.species(i).isSetfbc_charge +0350 metaboliteCharges(numel(metaboliteCharges)+1,1)=double(modelSBML.species(i).fbc_charge); +0351 else +0352 if isfield(modelSBML.species(i),'notes') +0353 if strfind(modelSBML.species(i).notes,'CHARGE') +0354 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); +0355 else +0356 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0357 end +0358 else +0359 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0360 end +0361 end +0362 elseif isfield(modelSBML.species(i),'notes') +0363 if strfind(modelSBML.species(i).notes,'CHARGE') +0364 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); +0365 else +0366 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0367 end +0368 else +0369 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0370 end +0371 %Additional information from FBC format Chemical formula +0372 if isfield(modelSBML.species(i),'fbc_chemicalFormula') +0373 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) +0374 metaboliteFormula{numel(metaboliteFormula),1}=modelSBML.species(i).fbc_chemicalFormula; +0375 end +0376 end +0377 end +0378 end +0379 end +0380 +0381 %Add SBO terms to gene and metabolite miriam fields +0382 if numel(unique(geneSBOs)) > 1 % don't add if they're all identical +0383 for i = 1:numel(geneNames) +0384 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},geneSBOs(i)); +0385 end +0386 end +0387 if numel(unique(metSBOs)) > 1 +0388 for i = 1:numel(metaboliteNames) +0389 metaboliteMiriams{i} = addSBOtoMiriam(metaboliteMiriams{i},metSBOs(i)); +0390 end +0391 end +0392 +0393 %Retrieve info on reactions +0394 reactionNames=cell(numel(modelSBML.reaction),1); +0395 reactionIDs=cell(numel(modelSBML.reaction),1); +0396 subsystems=cell(numel(modelSBML.reaction),1); +0397 eccodes=cell(numel(modelSBML.reaction),1); +0398 eccodes(:,:)=cellstr(''); +0399 rxnconfidencescores=NaN(numel(modelSBML.reaction),1); +0400 rxnreferences=cell(numel(modelSBML.reaction),1); +0401 rxnreferences(:,:)=cellstr(''); +0402 rxnnotes=cell(numel(modelSBML.reaction),1); +0403 rxnnotes(:,:)=cellstr(''); +0404 grRules=cell(numel(modelSBML.reaction),1); +0405 grRules(:,:)=cellstr(''); +0406 grRulesFromModifier=grRules; +0407 rxnComps=zeros(numel(modelSBML.reaction),1); +0408 rxnMiriams=cell(numel(modelSBML.reaction),1); +0409 reactionReversibility=zeros(numel(modelSBML.reaction),1); +0410 reactionUB=zeros(numel(modelSBML.reaction),1); +0411 reactionLB=zeros(numel(modelSBML.reaction),1); +0412 reactionObjective=zeros(numel(modelSBML.reaction),1); +0413 +0414 %Construct the stoichiometric matrix while the reaction info is read +0415 S=zeros(numel(metaboliteIDs),numel(modelSBML.reaction)); +0416 +0417 counter=0; +0418 %If FBC, then bounds have parameter ids defined for the whole model +0419 if isfield(modelSBML,'parameter') +0420 parameter.name=cell(numel(modelSBML.parameter),1); +0421 parameter.name={modelSBML.parameter(:).id}'; +0422 parameter.value={modelSBML.parameter(:).value}'; +0423 end +0424 +0425 if isfield(modelSBML.reaction,'sboTerm') && numel(unique([modelSBML.reaction.sboTerm])) == 1 +0426 %If all the SBO terms are identical, don't add them to rxnMiriams +0427 modelSBML.reaction = rmfield(modelSBML.reaction,'sboTerm'); +0428 end +0429 +0430 for i=1:numel(modelSBML.reaction) +0431 +0432 %Check that the reaction doesn't produce a complex and nothing else. If +0433 %so, then jump to the next reaction. This is because I get the genes +0434 %for complexes from the names and not from the reactions that create +0435 %them. This only applies to the non-COBRA format +0436 if numel(modelSBML.reaction(i).product)==1 +0437 if length(modelSBML.reaction(i).product(1).species)>=3 +0438 if strcmp(modelSBML.reaction(i).product(1).species(1:3),'Cx_')==true +0439 continue; +0440 end +0441 end 0442 end -0443 end -0444 -0445 %Retrieve info on reactions -0446 reactionNames=cell(numel(modelSBML.reaction),1); -0447 reactionIDs=cell(numel(modelSBML.reaction),1); -0448 subsystems=cell(numel(modelSBML.reaction),1); -0449 eccodes=cell(numel(modelSBML.reaction),1); -0450 eccodes(:,:)=cellstr(''); -0451 rxnconfidencescores=NaN(numel(modelSBML.reaction),1); -0452 rxnreferences=cell(numel(modelSBML.reaction),1); -0453 rxnreferences(:,:)=cellstr(''); -0454 rxnnotes=cell(numel(modelSBML.reaction),1); -0455 rxnnotes(:,:)=cellstr(''); -0456 grRules=cell(numel(modelSBML.reaction),1); -0457 grRules(:,:)=cellstr(''); -0458 grRulesFromModifier=grRules; -0459 rxnComps=zeros(numel(modelSBML.reaction),1); -0460 rxnMiriams=cell(numel(modelSBML.reaction),1); -0461 reactionReversibility=zeros(numel(modelSBML.reaction),1); -0462 reactionUB=zeros(numel(modelSBML.reaction),1); -0463 reactionLB=zeros(numel(modelSBML.reaction),1); -0464 reactionObjective=zeros(numel(modelSBML.reaction),1); -0465 -0466 %Construct the stoichiometric matrix while the reaction info is read -0467 S=zeros(numel(metaboliteIDs),numel(modelSBML.reaction)); -0468 -0469 counter=0; -0470 %If FBC, then bounds have parameter ids defined for the whole model -0471 if isfield(modelSBML,'parameter') -0472 parameter.name=cell(numel(modelSBML.parameter),1); -0473 parameter.name={modelSBML.parameter(:).id}'; -0474 parameter.value={modelSBML.parameter(:).value}'; -0475 end -0476 -0477 if isfield(modelSBML.reaction,'sboTerm') && numel(unique([modelSBML.reaction.sboTerm])) == 1 -0478 %If all the SBO terms are identical, don't add them to rxnMiriams -0479 modelSBML.reaction = rmfield(modelSBML.reaction,'sboTerm'); -0480 end -0481 -0482 for i=1:numel(modelSBML.reaction) -0483 -0484 %Check that the reaction doesn't produce a complex and nothing else. If -0485 %so, then jump to the next reaction. This is because I get the genes -0486 %for complexes from the names and not from the reactions that create -0487 %them. This only applies to the non-COBRA format -0488 if numel(modelSBML.reaction(i).product)==1 -0489 if length(modelSBML.reaction(i).product(1).species)>=3 -0490 if strcmp(modelSBML.reaction(i).product(1).species(1:3),'Cx_')==true -0491 continue; -0492 end -0493 end -0494 end -0495 -0496 %It didn't look like a gene complex-forming reaction -0497 counter=counter+1; -0498 -0499 reactionNames{counter}=modelSBML.reaction(i).name; -0500 -0501 reactionIDs{counter}=modelSBML.reaction(i).id; -0502 reactionReversibility(counter)=modelSBML.reaction(i).reversible; -0503 -0504 %If model is FBC, first get parameter of bound and then replace it with -0505 %the correct value. Probably faster with replace(), but this was only -0506 %introduced in Matlab R2016b -0507 if isfield(modelSBML.reaction(i),'fbc_lowerFluxBound') -0508 lb=modelSBML.reaction(i).fbc_lowerFluxBound; -0509 ub=modelSBML.reaction(i).fbc_upperFluxBound; -0510 for n=1:numel(parameter.value) -0511 lb=regexprep(lb,parameter.name(n),num2str(parameter.value{n})); -0512 ub=regexprep(ub,parameter.name(n),num2str(parameter.value{n})); -0513 end -0514 if isempty(lb) -0515 lb='-Inf'; -0516 end -0517 if isempty(ub) -0518 ub='Inf'; -0519 end -0520 reactionLB(counter)=str2num(lb); -0521 reactionUB(counter)=str2num(ub); -0522 %The order of these parameters should not be hard coded -0523 elseif isfield(modelSBML.reaction(i).kineticLaw,'parameter') -0524 reactionLB(counter)=modelSBML.reaction(i).kineticLaw.parameter(1).value; -0525 reactionUB(counter)=modelSBML.reaction(i).kineticLaw.parameter(2).value; -0526 reactionObjective(counter)=modelSBML.reaction(i).kineticLaw.parameter(3).value; -0527 else -0528 if reactionReversibility(counter)==true -0529 reactionLB(counter)=-inf; -0530 else -0531 reactionLB(counter)=0; -0532 end -0533 reactionUB(counter)=inf; -0534 reactionObjective(counter)=0; -0535 end -0536 -0537 %Find the associated gene if available -0538 %If FBC, get gene association data from corresponding fields -0539 if isfield(modelSBML.reaction(i),'fbc_geneProductAssociation') -0540 if ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation) && ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association) -0541 grRules{counter}=modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association; -0542 end -0543 elseif isfield(modelSBML.reaction(i),'notes') -0544 %This section was previously executed only if isSBML2COBRA is true. Now -0545 %it will be executed, if 'GENE_ASSOCIATION' is found in -0546 %modelSBML.reaction(i).notes -0547 if strfind(modelSBML.reaction(i).notes,'GENE_ASSOCIATION') -0548 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE_ASSOCIATION'); -0549 elseif strfind(modelSBML.reaction(i).notes,'GENE ASSOCIATION') -0550 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE ASSOCIATION'); -0551 else -0552 geneAssociation=''; -0553 end -0554 if ~isempty(geneAssociation) -0555 %This adds the grRules. The gene list and rxnGeneMat are created -0556 %later -0557 grRules{counter}=geneAssociation; -0558 end -0559 end -0560 if isempty(grRules{counter}) && ~isempty(modelSBML.reaction(i).modifier) -0561 rules=''; -0562 for j=1:numel(modelSBML.reaction(i).modifier) -0563 modifier=modelSBML.reaction(i).modifier(j).species; -0564 if ~isempty(modifier) -0565 if strcmpi(modifier(1:2),'E_') -0566 index=find(strcmp(modifier,geneIDs)); -0567 %This should be unique and in the geneIDs list, -0568 %otherwise something is wrong -0569 if numel(index)~=1 -0570 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; -0571 dispEM(EM); -0572 end -0573 if ~isempty(rules) -0574 rules=[rules ' or (' geneNames{index} ')']; -0575 else -0576 rules=['(' geneNames{index} ')']; -0577 end -0578 elseif strcmp(modifier(1:2),'s_') -0579 index=find(strcmp(modifier,metaboliteIDs)); -0580 %This should be unique and in the geneIDs list, -0581 %otherwise something is wrong -0582 if numel(index)~=1 -0583 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; -0584 dispEM(EM); -0585 end -0586 if ~isempty(rules) -0587 rules=[rules ' or (' metaboliteIDs{index} ')']; -0588 else -0589 rules=['(' metaboliteIDs{index} ')']; -0590 end -0591 else -0592 %It seems to be a complex. Add the corresponding -0593 %genes from the name of the complex (not the -0594 %reaction that creates it) -0595 index=find(strcmp(modifier,complexIDs)); -0596 if numel(index)==1 -0597 if ~isempty(rules) -0598 rules=[rules ' or (' strrep(complexNames{index},':',' and ') ')']; -0599 else -0600 rules=['(' strrep(complexNames{index},':',' and ') ')']; -0601 end -0602 else -0603 %Could not find a complex -0604 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; -0605 dispEM(EM); -0606 end -0607 end -0608 end +0443 +0444 %It didn't look like a gene complex-forming reaction +0445 counter=counter+1; +0446 +0447 reactionNames{counter}=modelSBML.reaction(i).name; +0448 +0449 reactionIDs{counter}=modelSBML.reaction(i).id; +0450 reactionReversibility(counter)=modelSBML.reaction(i).reversible; +0451 +0452 %If model is FBC, first get parameter of bound and then replace it with +0453 %the correct value. Probably faster with replace(), but this was only +0454 %introduced in Matlab R2016b +0455 if isfield(modelSBML.reaction(i),'fbc_lowerFluxBound') +0456 lb=modelSBML.reaction(i).fbc_lowerFluxBound; +0457 ub=modelSBML.reaction(i).fbc_upperFluxBound; +0458 for n=1:numel(parameter.value) +0459 lb=regexprep(lb,parameter.name(n),num2str(parameter.value{n})); +0460 ub=regexprep(ub,parameter.name(n),num2str(parameter.value{n})); +0461 end +0462 if isempty(lb) +0463 lb='-Inf'; +0464 end +0465 if isempty(ub) +0466 ub='Inf'; +0467 end +0468 reactionLB(counter)=str2num(lb); +0469 reactionUB(counter)=str2num(ub); +0470 %The order of these parameters should not be hard coded +0471 elseif isfield(modelSBML.reaction(i).kineticLaw,'parameter') +0472 reactionLB(counter)=modelSBML.reaction(i).kineticLaw.parameter(1).value; +0473 reactionUB(counter)=modelSBML.reaction(i).kineticLaw.parameter(2).value; +0474 reactionObjective(counter)=modelSBML.reaction(i).kineticLaw.parameter(3).value; +0475 else +0476 if reactionReversibility(counter)==true +0477 reactionLB(counter)=-inf; +0478 else +0479 reactionLB(counter)=0; +0480 end +0481 reactionUB(counter)=inf; +0482 reactionObjective(counter)=0; +0483 end +0484 +0485 %Find the associated gene if available +0486 %If FBC, get gene association data from corresponding fields +0487 if isfield(modelSBML.reaction(i),'fbc_geneProductAssociation') +0488 if ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation) && ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association) +0489 grRules{counter}=modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association; +0490 end +0491 elseif isfield(modelSBML.reaction(i),'notes') +0492 %This section was previously executed only if isSBML2COBRA is true. Now +0493 %it will be executed, if 'GENE_ASSOCIATION' is found in +0494 %modelSBML.reaction(i).notes +0495 if strfind(modelSBML.reaction(i).notes,'GENE_ASSOCIATION') +0496 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE_ASSOCIATION'); +0497 elseif strfind(modelSBML.reaction(i).notes,'GENE ASSOCIATION') +0498 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE ASSOCIATION'); +0499 else +0500 geneAssociation=''; +0501 end +0502 if ~isempty(geneAssociation) +0503 %This adds the grRules. The gene list and rxnGeneMat are created +0504 %later +0505 grRules{counter}=geneAssociation; +0506 end +0507 end +0508 if isempty(grRules{counter}) && ~isempty(modelSBML.reaction(i).modifier) +0509 rules=''; +0510 for j=1:numel(modelSBML.reaction(i).modifier) +0511 modifier=modelSBML.reaction(i).modifier(j).species; +0512 if ~isempty(modifier) +0513 if strcmpi(modifier(1:2),'E_') +0514 index=find(strcmp(modifier,geneIDs)); +0515 %This should be unique and in the geneIDs list, +0516 %otherwise something is wrong +0517 if numel(index)~=1 +0518 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; +0519 dispEM(EM); +0520 end +0521 if ~isempty(rules) +0522 rules=[rules ' or (' geneNames{index} ')']; +0523 else +0524 rules=['(' geneNames{index} ')']; +0525 end +0526 elseif strcmp(modifier(1:2),'s_') +0527 index=find(strcmp(modifier,metaboliteIDs)); +0528 %This should be unique and in the geneIDs list, +0529 %otherwise something is wrong +0530 if numel(index)~=1 +0531 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; +0532 dispEM(EM); +0533 end +0534 if ~isempty(rules) +0535 rules=[rules ' or (' metaboliteIDs{index} ')']; +0536 else +0537 rules=['(' metaboliteIDs{index} ')']; +0538 end +0539 else +0540 %It seems to be a complex. Add the corresponding +0541 %genes from the name of the complex (not the +0542 %reaction that creates it) +0543 index=find(strcmp(modifier,complexIDs)); +0544 if numel(index)==1 +0545 if ~isempty(rules) +0546 rules=[rules ' or (' strrep(complexNames{index},':',' and ') ')']; +0547 else +0548 rules=['(' strrep(complexNames{index},':',' and ') ')']; +0549 end +0550 else +0551 %Could not find a complex +0552 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; +0553 dispEM(EM); +0554 end +0555 end +0556 end +0557 end +0558 grRules{counter}=rules; +0559 grRulesFromModifier{counter}=rules;%Backup copy for grRules, useful to parse Yeast 7.6 +0560 end +0561 +0562 %Add reaction compartment +0563 if isfield(modelSBML.reaction(i),'compartment') +0564 if ~isempty(modelSBML.reaction(i).compartment) +0565 rxnComp=modelSBML.reaction(i).compartment; +0566 else +0567 rxnComp=''; +0568 end +0569 elseif isfield(modelSBML.reaction(i),'notes') +0570 rxnComp=parseNote(modelSBML.reaction(i).notes,'COMPARTMENT'); +0571 end +0572 if ~isempty(rxnComp) +0573 %Find it in the compartment list +0574 [~, J]=ismember(rxnComp,compartmentIDs); +0575 rxnComps(counter)=J; +0576 end +0577 +0578 +0579 miriamStruct=parseMiriam(modelSBML.reaction(i).annotation); +0580 rxnMiriams{counter}=miriamStruct; +0581 if isfield(modelSBML.reaction(i),'notes') +0582 subsystems{counter,1}=cellstr(parseNote(modelSBML.reaction(i).notes,'SUBSYSTEM')); +0583 subsystems{counter,1}(cellfun('isempty',subsystems{counter,1})) = []; +0584 if strfind(modelSBML.reaction(i).notes,'Confidence Level') +0585 confScore = parseNote(modelSBML.reaction(i).notes,'Confidence Level'); +0586 if isempty(confScore) +0587 confScore = 0; +0588 end +0589 rxnconfidencescores(counter)=str2double(confScore); +0590 end +0591 rxnreferences{counter,1}=parseNote(modelSBML.reaction(i).notes,'AUTHORS'); +0592 rxnnotes{counter,1}=parseNote(modelSBML.reaction(i).notes,'NOTES'); +0593 end +0594 +0595 %Get SBO terms +0596 if isfield(modelSBML.reaction(i),'sboTerm') && ~(modelSBML.reaction(i).sboTerm==-1) +0597 rxnMiriams{counter} = addSBOtoMiriam(rxnMiriams{counter}, modelSBML.reaction(i).sboTerm); +0598 end +0599 +0600 %Get ec-codes +0601 eccode=''; +0602 if ~isempty(modelSBML.reaction(i).annotation) +0603 if strfind(modelSBML.reaction(i).annotation,'urn:miriam:ec-code') +0604 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'urn:miriam:',':','ec-code'); +0605 elseif strfind(modelSBML.reaction(i).annotation,'http://identifiers.org/ec-code') +0606 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'http://identifiers.org/','/','ec-code'); +0607 elseif strfind(modelSBML.reaction(i).annotation,'https://identifiers.org/ec-code') +0608 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'https://identifiers.org/','/','ec-code'); 0609 end -0610 grRules{counter}=rules; -0611 grRulesFromModifier{counter}=rules;%Backup copy for grRules, useful to parse Yeast 7.6 -0612 end -0613 -0614 %Add reaction compartment -0615 if isfield(modelSBML.reaction(i),'compartment') -0616 if ~isempty(modelSBML.reaction(i).compartment) -0617 rxnComp=modelSBML.reaction(i).compartment; -0618 else -0619 rxnComp=''; -0620 end -0621 elseif isfield(modelSBML.reaction(i),'notes') -0622 rxnComp=parseNote(modelSBML.reaction(i).notes,'COMPARTMENT'); -0623 end -0624 if ~isempty(rxnComp) -0625 %Find it in the compartment list -0626 [~, J]=ismember(rxnComp,compartmentIDs); -0627 rxnComps(counter)=J; -0628 end -0629 -0630 %Get other Miriam fields. This may include for example database indexes -0631 %to organism-specific databases. EC-codes are supported by the COBRA -0632 %Toolbox format and are therefore loaded separately -0633 if isSBML2COBRA==false -0634 miriamStruct=parseMiriam(modelSBML.reaction(i).annotation); -0635 rxnMiriams{counter}=miriamStruct; -0636 if isfield(modelSBML.reaction(i),'notes') -0637 subsystems{counter,1}=cellstr(parseNote(modelSBML.reaction(i).notes,'SUBSYSTEM')); -0638 subsystems{counter,1}(cellfun('isempty',subsystems{counter,1})) = []; -0639 if strfind(modelSBML.reaction(i).notes,'Confidence Level') -0640 confScore = parseNote(modelSBML.reaction(i).notes,'Confidence Level'); -0641 if isempty(confScore) -0642 confScore = 0; -0643 end -0644 rxnconfidencescores(counter)=str2double(confScore); -0645 end -0646 rxnreferences{counter,1}=parseNote(modelSBML.reaction(i).notes,'AUTHORS'); -0647 rxnnotes{counter,1}=parseNote(modelSBML.reaction(i).notes,'NOTES'); -0648 end -0649 end -0650 -0651 %Get SBO terms -0652 if isfield(modelSBML.reaction(i),'sboTerm') && ~(modelSBML.reaction(i).sboTerm==-1) -0653 rxnMiriams{counter} = addSBOtoMiriam(rxnMiriams{counter}, modelSBML.reaction(i).sboTerm); -0654 end -0655 -0656 %Get ec-codes -0657 eccode=''; -0658 if ~isempty(modelSBML.reaction(i).annotation) -0659 if strfind(modelSBML.reaction(i).annotation,'urn:miriam:ec-code') -0660 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'urn:miriam:',':','ec-code'); -0661 elseif strfind(modelSBML.reaction(i).annotation,'http://identifiers.org/ec-code') -0662 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'http://identifiers.org/','/','ec-code'); -0663 elseif strfind(modelSBML.reaction(i).annotation,'https://identifiers.org/ec-code') -0664 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'https://identifiers.org/','/','ec-code'); -0665 end -0666 elseif isfield(modelSBML.reaction(i),'notes') -0667 if strfind(modelSBML.reaction(i).notes,'EC Number') -0668 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'EC Number')]; -0669 elseif strfind(modelSBML.reaction(i).notes,'PROTEIN_CLASS') -0670 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'PROTEIN_CLASS')]; +0610 elseif isfield(modelSBML.reaction(i),'notes') +0611 if strfind(modelSBML.reaction(i).notes,'EC Number') +0612 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'EC Number')]; +0613 elseif strfind(modelSBML.reaction(i).notes,'PROTEIN_CLASS') +0614 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'PROTEIN_CLASS')]; +0615 end +0616 end +0617 eccodes{counter}=eccode; +0618 +0619 %Add all reactants +0620 for j=1:numel(modelSBML.reaction(i).reactant) +0621 %Get the index of the metabolite in metaboliteIDs. External +0622 %metabolites will be removed at a later stage +0623 metIndex=find(strcmp(modelSBML.reaction(i).reactant(j).species,metaboliteIDs),1); +0624 if isempty(metIndex) +0625 EM=['Could not find metabolite ' modelSBML.reaction(i).reactant(j).species ' in reaction ' reactionIDs{counter}]; +0626 dispEM(EM); +0627 end +0628 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).reactant(j).stoichiometry*-1; +0629 end +0630 +0631 %Add all products +0632 for j=1:numel(modelSBML.reaction(i).product) +0633 %Get the index of the metabolite in metaboliteIDs. +0634 metIndex=find(strcmp(modelSBML.reaction(i).product(j).species,metaboliteIDs),1); +0635 if isempty(metIndex) +0636 EM=['Could not find metabolite ' modelSBML.reaction(i).product(j).species ' in reaction ' reactionIDs{counter}]; +0637 dispEM(EM); +0638 end +0639 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).product(j).stoichiometry; +0640 end +0641 end +0642 +0643 %if FBC, objective function is separately defined. Multiple objective +0644 %functions can be defined, one is set as active +0645 if isfield(modelSBML, 'fbc_activeObjective') +0646 obj=modelSBML.fbc_activeObjective; +0647 for i=1:numel(modelSBML.fbc_objective) +0648 if strcmp(obj,modelSBML.fbc_objective(i).fbc_id) +0649 if ~isempty(modelSBML.fbc_objective(i).fbc_fluxObjective) +0650 rxn=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction; +0651 idx=find(ismember(reactionIDs,rxn)); +0652 reactionObjective(idx)=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient; +0653 end +0654 end +0655 end +0656 end +0657 +0658 %subSystems can be stored as groups instead of in annotations +0659 if isfield(modelSBML,'groups_group') +0660 for i=1:numel(modelSBML.groups_group) +0661 groupreactions={modelSBML.groups_group(i).groups_member(:).groups_idRef}; +0662 [~, idx] = ismember(groupreactions, reactionIDs); +0663 if any(idx) +0664 for j=1:numel(idx) +0665 if isempty(subsystems{idx(j)}) % First subsystem +0666 subsystems{idx(j)} = {modelSBML.groups_group(i).groups_name}; +0667 else % Consecutive subsystems: concatenate +0668 subsystems{idx(j)} = horzcat(subsystems{idx(j)}, modelSBML.groups_group(i).groups_name); +0669 end +0670 end 0671 end 0672 end -0673 eccodes{counter}=eccode; -0674 -0675 %Add all reactants -0676 for j=1:numel(modelSBML.reaction(i).reactant) -0677 %Get the index of the metabolite in metaboliteIDs. External -0678 %metabolites will be removed at a later stage -0679 metIndex=find(strcmp(modelSBML.reaction(i).reactant(j).species,metaboliteIDs),1); -0680 if isempty(metIndex) -0681 EM=['Could not find metabolite ' modelSBML.reaction(i).reactant(j).species ' in reaction ' reactionIDs{counter}]; -0682 dispEM(EM); -0683 end -0684 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).reactant(j).stoichiometry*-1; -0685 end -0686 -0687 %Add all products -0688 for j=1:numel(modelSBML.reaction(i).product) -0689 %Get the index of the metabolite in metaboliteIDs. -0690 metIndex=find(strcmp(modelSBML.reaction(i).product(j).species,metaboliteIDs),1); -0691 if isempty(metIndex) -0692 EM=['Could not find metabolite ' modelSBML.reaction(i).product(j).species ' in reaction ' reactionIDs{counter}]; -0693 dispEM(EM); -0694 end -0695 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).product(j).stoichiometry; -0696 end -0697 end -0698 -0699 %if FBC, objective function is separately defined. Multiple objective -0700 %functions can be defined, one is set as active -0701 if isfield(modelSBML, 'fbc_activeObjective') -0702 obj=modelSBML.fbc_activeObjective; -0703 for i=1:numel(modelSBML.fbc_objective) -0704 if strcmp(obj,modelSBML.fbc_objective(i).fbc_id) -0705 if ~isempty(modelSBML.fbc_objective(i).fbc_fluxObjective) -0706 rxn=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction; -0707 rxn=regexprep(rxn,'^R_',''); -0708 idx=find(ismember(reactionIDs,rxn)); -0709 reactionObjective(idx)=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient; -0710 end -0711 end -0712 end -0713 end -0714 -0715 %subSystems can be stored as groups instead of in annotations -0716 if isfield(modelSBML,'groups_group') -0717 for i=1:numel(modelSBML.groups_group) -0718 groupreactions={modelSBML.groups_group(i).groups_member(:).groups_idRef}; -0719 groupreactions=regexprep(groupreactions,'^R_',''); -0720 [~, idx] = ismember(groupreactions, reactionIDs); -0721 if any(idx) -0722 for j=1:numel(idx) -0723 if isempty(subsystems{idx(j)}) % First subsystem -0724 subsystems{idx(j)} = {modelSBML.groups_group(i).groups_name}; -0725 else % Consecutive subsystems: concatenate -0726 subsystems{idx(j)} = horzcat(subsystems{idx(j)}, modelSBML.groups_group(i).groups_name); -0727 end -0728 end -0729 end -0730 end -0731 end -0732 -0733 %Shrink the structures if complex-forming reactions had to be skipped -0734 reactionNames=reactionNames(1:counter); -0735 reactionIDs=reactionIDs(1:counter); -0736 subsystems=subsystems(1:counter); -0737 eccodes=eccodes(1:counter); -0738 rxnconfidencescores=rxnconfidencescores(1:counter); -0739 rxnreferences=rxnreferences(1:counter); -0740 rxnnotes=rxnnotes(1:counter); -0741 grRules=grRules(1:counter); -0742 rxnMiriams=rxnMiriams(1:counter); -0743 reactionReversibility=reactionReversibility(1:counter); -0744 reactionUB=reactionUB(1:counter); -0745 reactionLB=reactionLB(1:counter); -0746 reactionObjective=reactionObjective(1:counter); -0747 S=S(:,1:counter); -0748 -0749 model.name=modelSBML.name; -0750 model.id=regexprep(modelSBML.id,'^M_',''); % COBRA adds M_ prefix -0751 model.rxns=reactionIDs; -0752 model.mets=metaboliteIDs; -0753 model.S=sparse(S); -0754 model.lb=reactionLB; -0755 model.ub=reactionUB; -0756 model.rev=reactionReversibility; -0757 model.c=reactionObjective; -0758 model.b=zeros(numel(metaboliteIDs),1); -0759 model.comps=compartmentIDs; -0760 model.compNames=compartmentNames; -0761 model.rxnConfidenceScores=rxnconfidencescores; -0762 model.rxnReferences=rxnreferences; -0763 model.rxnNotes=rxnnotes; -0764 -0765 %Load annotation if available. If there are several authors, only the first -0766 %author credentials are imported -0767 if isfield(modelSBML,'annotation') -0768 endString='</'; -0769 I=strfind(modelSBML.annotation,endString); -0770 J=strfind(modelSBML.annotation,'<vCard:Family>'); -0771 if any(J) -0772 model.annotation.familyName=modelSBML.annotation(J(1)+14:I(find(I>J(1),1))-1); -0773 end -0774 J=strfind(modelSBML.annotation,'<vCard:Given>'); -0775 if any(J) -0776 model.annotation.givenName=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); -0777 end -0778 J=strfind(modelSBML.annotation,'<vCard:EMAIL>'); -0779 if any(J) -0780 model.annotation.email=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); -0781 end -0782 J=strfind(modelSBML.annotation,'<vCard:Orgname>'); -0783 if any(J) -0784 model.annotation.organization=modelSBML.annotation(J(1)+15:I(find(I>J(1),1))-1); -0785 end -0786 endString='"/>'; -0787 I=strfind(modelSBML.annotation,endString); -0788 if strfind(modelSBML.annotation,'"urn:miriam:') -0789 J=strfind(modelSBML.annotation,'"urn:miriam:'); -0790 if any(J) -0791 model.annotation.taxonomy=modelSBML.annotation(J+12:I(find(I>J,1))-1); -0792 end -0793 else -0794 J=strfind(modelSBML.annotation,'"http://identifiers.org/'); -0795 if any(J) -0796 model.annotation.taxonomy=modelSBML.annotation(J+24:I(find(I>J,1))-1); -0797 else -0798 J=strfind(modelSBML.annotation,'"https://identifiers.org/'); -0799 if any(J) -0800 model.annotation.taxonomy=modelSBML.annotation(J+25:I(find(I>J,1))-1); -0801 end -0802 end -0803 end -0804 end -0805 if isfield(modelSBML,'notes') -0806 startString=strfind(modelSBML.notes,'xhtml">'); -0807 endString=strfind(modelSBML.notes,'</body>'); -0808 if any(startString) && any(endString) -0809 model.annotation.note=modelSBML.notes(startString+7:endString-1); -0810 model.annotation.note=regexprep(model.annotation.note,'<p>|</p>',''); -0811 model.annotation.note=strtrim(model.annotation.note); -0812 if regexp(model.annotation.note,'This file was generated using the exportModel function in RAVEN Toolbox \d\.\d and OutputSBML in libSBML') -0813 model.annotation=rmfield(model.annotation,'note'); % Default note added when running exportModel -0814 end -0815 end -0816 end -0817 -0818 if any(~cellfun(@isempty,compartmentOutside)) -0819 model.compOutside=compartmentOutside; -0820 end -0821 -0822 model.rxnNames=reactionNames; -0823 model.metNames=metaboliteNames; -0824 -0825 %Match the compartments for metabolites -0826 [~, J]=ismember(metaboliteCompartments,model.comps); -0827 model.metComps=J; -0828 -0829 %If any genes have been loaded (only for the new format) -0830 if ~isempty(geneNames) -0831 %In some rare cases geneNames may not necessarily be used in grRules. -0832 %That is true for Yeast 7.6. It's therefore important to change gene -0833 %systematic names to geneIDs in sophisticated way. Gene systematic -0834 %names are not unique, since exactly the same name may be in different -0835 %compartments -0836 if all(cellfun(@isempty,strfind(grRules,geneNames{1}))) -0837 geneShortNames=geneNames; -0838 %geneShortNames contain compartments as well, so these are removed -0839 geneShortNames=regexprep(geneShortNames,' \[.+$',''); -0840 %grRules obtained from modifier fields contain geneNames. These are -0841 %changed into geneIDs. grRulesFromModifier is a good way to have -0842 %geneIDs and rxns association when it's important to resolve -0843 %systematic name ambiguities -0844 grRulesFromModifier=regexprep(regexprep(grRulesFromModifier,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); -0845 grRules=regexprep(regexprep(grRules,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); -0846 -0847 %Yeast 7.6 contains several metabolites, which were used in gene -0848 %associations. For that reason, the list of species ID is created -0849 %and we then check whether any of them have kegg.genes annotation -0850 %thereby obtaining systematic gene names -0851 geneShortNames=vertcat(geneShortNames,metaboliteNames); -0852 geneIDs=vertcat(geneIDs,metaboliteIDs); -0853 geneSystNames=extractMiriam(vertcat(geneMiriams,metaboliteMiriams),'kegg.genes'); -0854 geneCompartments=vertcat(geneCompartments,metaboliteCompartments); -0855 geneMiriams=vertcat(geneMiriams,metaboliteMiriams); -0856 -0857 %Now we retain information for only these entries, which have -0858 %kegg.genes annotation -0859 geneShortNames=geneShortNames(~cellfun('isempty',geneSystNames)); -0860 geneIDs=geneIDs(~cellfun('isempty',geneSystNames)); -0861 geneSystNames=geneSystNames(~cellfun('isempty',geneSystNames)); -0862 geneCompartments=geneCompartments(~cellfun('isempty',geneSystNames)); -0863 geneMiriams=geneMiriams(~cellfun('isempty',geneSystNames)); -0864 %Now we reorder geneIDs and geneSystNames by geneSystNames string -0865 %length -0866 geneNames=geneIDs;%Backuping geneIDs, since we need unsorted order for later -0867 [~, Indx] = sort(cellfun('size', geneSystNames, 2), 'descend'); -0868 geneIDs = geneIDs(Indx); -0869 geneSystNames = geneSystNames(Indx); -0870 for i=1:numel(geneSystNames) -0871 for j=1:numel(grRules) -0872 if strfind(grRules{j},geneSystNames{i}) -0873 if ~isempty(grRules{j}) -0874 if sum(ismember(geneSystNames,geneSystNames{i}))==1 -0875 grRules{j}=regexprep(grRules{j},geneSystNames{i},geneIDs{i}); -0876 elseif sum(ismember(geneSystNames,geneSystNames{i}))>1 -0877 counter=0; -0878 ovrlpIDs=geneIDs(ismember(geneSystNames,geneSystNames{i})); -0879 for k=1:numel(ovrlpIDs) -0880 if strfind(grRulesFromModifier{j},ovrlpIDs{k}) -0881 counter=counter+1; -0882 grRules{j}=regexprep(grRules{j},geneSystNames{i},ovrlpIDs{k}); -0883 end -0884 if counter>1 -0885 EM=['Gene association is ambiguous for reaction ' modelSBML.reaction(j).id]; -0886 dispEM(EM); -0887 end -0888 end -0889 end -0890 end -0891 end -0892 end -0893 end -0894 end -0895 model.genes=geneNames; -0896 model.grRules=grRules; -0897 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0898 model.grRules = grRules; -0899 model.rxnGeneMat = rxnGeneMat; -0900 -0901 %Match the compartments for genes -0902 [~, J]=ismember(geneCompartments,model.comps); -0903 model.geneComps=J; -0904 else -0905 if ~all(cellfun(@isempty,grRules)) -0906 %If fbc_geneProduct exists, follow the specified gene order, such -0907 %that matching geneShortNames in function below will work -0908 if isfield(modelSBML,'fbc_geneProduct') -0909 genes={modelSBML.fbc_geneProduct.fbc_id}; -0910 -0911 %Get gene Miriams if they were not retrieved above (this occurs -0912 %when genes are stored as fbc_geneProduct instead of species) -0913 if isempty(geneMiriams) -0914 geneMiriams = cell(numel(genes),1); -0915 if isfield(modelSBML.fbc_geneProduct,'sboTerm') && numel(unique([modelSBML.fbc_geneProduct.sboTerm])) == 1 -0916 %If all the SBO terms are identical, don't add them to geneMiriams -0917 modelSBML.fbc_geneProduct = rmfield(modelSBML.fbc_geneProduct,'sboTerm'); -0918 end -0919 for i = 1:numel(genes) -0920 geneMiriams{i}=parseMiriam(modelSBML.fbc_geneProduct(i).annotation); -0921 if isfield(modelSBML.fbc_geneProduct(i),'sboTerm') && ~(modelSBML.fbc_geneProduct(i).sboTerm==-1) -0922 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},modelSBML.fbc_geneProduct(i).sboTerm); -0923 end -0924 end -0925 end -0926 else -0927 genes=getGeneList(grRules); -0928 end -0929 if strcmpi(genes{1}(1:2),'G_') -0930 genes=regexprep(genes,'^G_',''); -0931 grRules=regexprep(grRules,'^G_',''); -0932 grRules=regexprep(grRules,'\(G_','('); -0933 grRules=regexprep(grRules,' G_',' '); -0934 end -0935 model.genes=genes; -0936 model.grRules=grRules; -0937 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0938 model.grRules = grRules; -0939 model.rxnGeneMat = rxnGeneMat; -0940 end -0941 end -0942 -0943 if all(cellfun(@isempty,geneShortNames)) -0944 if isfield(modelSBML,'fbc_geneProduct') -0945 for i=1:numel(genes) -0946 if ~isempty(modelSBML.fbc_geneProduct(i).fbc_label) -0947 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_label; -0948 elseif ~isempty(modelSBML.fbc_geneProduct(i).fbc_name) -0949 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_name; -0950 else -0951 geneShortNames{i,1}=''; -0952 end -0953 end -0954 end -0955 end -0956 -0957 %If any InChIs have been loaded -0958 if any(~cellfun(@isempty,metaboliteInChI)) -0959 model.inchis=metaboliteInChI; -0960 end +0673 end +0674 +0675 %Shrink the structures if complex-forming reactions had to be skipped +0676 reactionNames=reactionNames(1:counter); +0677 reactionIDs=reactionIDs(1:counter); +0678 subsystems=subsystems(1:counter); +0679 eccodes=eccodes(1:counter); +0680 rxnconfidencescores=rxnconfidencescores(1:counter); +0681 rxnreferences=rxnreferences(1:counter); +0682 rxnnotes=rxnnotes(1:counter); +0683 grRules=grRules(1:counter); +0684 rxnMiriams=rxnMiriams(1:counter); +0685 reactionReversibility=reactionReversibility(1:counter); +0686 reactionUB=reactionUB(1:counter); +0687 reactionLB=reactionLB(1:counter); +0688 reactionObjective=reactionObjective(1:counter); +0689 S=S(:,1:counter); +0690 +0691 model.name=modelSBML.name; +0692 model.id=modelSBML.id; +0693 model.rxns=reactionIDs; +0694 model.mets=metaboliteIDs; +0695 model.S=sparse(S); +0696 model.lb=reactionLB; +0697 model.ub=reactionUB; +0698 model.rev=reactionReversibility; +0699 model.c=reactionObjective; +0700 model.b=zeros(numel(metaboliteIDs),1); +0701 model.comps=compartmentIDs; +0702 model.compNames=compartmentNames; +0703 model.rxnConfidenceScores=rxnconfidencescores; +0704 model.rxnReferences=rxnreferences; +0705 model.rxnNotes=rxnnotes; +0706 +0707 %Load annotation if available. If there are several authors, only the first +0708 %author credentials are imported +0709 if isfield(modelSBML,'annotation') +0710 endString='</'; +0711 I=strfind(modelSBML.annotation,endString); +0712 J=strfind(modelSBML.annotation,'<vCard:Family>'); +0713 if any(J) +0714 model.annotation.familyName=modelSBML.annotation(J(1)+14:I(find(I>J(1),1))-1); +0715 end +0716 J=strfind(modelSBML.annotation,'<vCard:Given>'); +0717 if any(J) +0718 model.annotation.givenName=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); +0719 end +0720 J=strfind(modelSBML.annotation,'<vCard:EMAIL>'); +0721 if any(J) +0722 model.annotation.email=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); +0723 end +0724 J=strfind(modelSBML.annotation,'<vCard:Orgname>'); +0725 if any(J) +0726 model.annotation.organization=modelSBML.annotation(J(1)+15:I(find(I>J(1),1))-1); +0727 end +0728 endString='"/>'; +0729 I=strfind(modelSBML.annotation,endString); +0730 if strfind(modelSBML.annotation,'"urn:miriam:') +0731 J=strfind(modelSBML.annotation,'"urn:miriam:'); +0732 if any(J) +0733 model.annotation.taxonomy=modelSBML.annotation(J+12:I(find(I>J,1))-1); +0734 end +0735 else +0736 J=strfind(modelSBML.annotation,'"http://identifiers.org/'); +0737 if any(J) +0738 model.annotation.taxonomy=modelSBML.annotation(J+24:I(find(I>J,1))-1); +0739 else +0740 J=strfind(modelSBML.annotation,'"https://identifiers.org/'); +0741 if any(J) +0742 model.annotation.taxonomy=modelSBML.annotation(J+25:I(find(I>J,1))-1); +0743 end +0744 end +0745 end +0746 end +0747 if isfield(modelSBML,'notes') +0748 startString=strfind(modelSBML.notes,'xhtml">'); +0749 endString=strfind(modelSBML.notes,'</body>'); +0750 if any(startString) && any(endString) +0751 model.annotation.note=modelSBML.notes(startString+7:endString-1); +0752 model.annotation.note=regexprep(model.annotation.note,'<p>|</p>',''); +0753 model.annotation.note=strtrim(model.annotation.note); +0754 if regexp(model.annotation.note,'This file was generated using the exportModel function in RAVEN Toolbox \d\.\d and OutputSBML in libSBML') +0755 model.annotation=rmfield(model.annotation,'note'); % Default note added when running exportModel +0756 end +0757 end +0758 end +0759 +0760 if any(~cellfun(@isempty,compartmentOutside)) +0761 model.compOutside=compartmentOutside; +0762 end +0763 +0764 model.rxnNames=reactionNames; +0765 model.metNames=metaboliteNames; +0766 +0767 %Match the compartments for metabolites +0768 [~, J]=ismember(metaboliteCompartments,model.comps); +0769 model.metComps=J; +0770 +0771 %If any genes have been loaded (only for the new format) +0772 if ~isempty(geneNames) +0773 %In some rare cases geneNames may not necessarily be used in grRules. +0774 %That is true for Yeast 7.6. It's therefore important to change gene +0775 %systematic names to geneIDs in sophisticated way. Gene systematic +0776 %names are not unique, since exactly the same name may be in different +0777 %compartments +0778 if all(cellfun(@isempty,strfind(grRules,geneNames{1}))) +0779 geneShortNames=geneNames; +0780 %geneShortNames contain compartments as well, so these are removed +0781 geneShortNames=regexprep(geneShortNames,' \[.+$',''); +0782 %grRules obtained from modifier fields contain geneNames. These are +0783 %changed into geneIDs. grRulesFromModifier is a good way to have +0784 %geneIDs and rxns association when it's important to resolve +0785 %systematic name ambiguities +0786 grRulesFromModifier=regexprep(regexprep(grRulesFromModifier,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); +0787 grRules=regexprep(regexprep(grRules,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); +0788 +0789 %Yeast 7.6 contains several metabolites, which were used in gene +0790 %associations. For that reason, the list of species ID is created +0791 %and we then check whether any of them have kegg.genes annotation +0792 %thereby obtaining systematic gene names +0793 geneShortNames=vertcat(geneShortNames,metaboliteNames); +0794 geneIDs=vertcat(geneIDs,metaboliteIDs); +0795 geneSystNames=extractMiriam(vertcat(geneMiriams,metaboliteMiriams),'kegg.genes'); +0796 geneCompartments=vertcat(geneCompartments,metaboliteCompartments); +0797 geneMiriams=vertcat(geneMiriams,metaboliteMiriams); +0798 +0799 %Now we retain information for only these entries, which have +0800 %kegg.genes annotation +0801 geneShortNames=geneShortNames(~cellfun('isempty',geneSystNames)); +0802 geneIDs=geneIDs(~cellfun('isempty',geneSystNames)); +0803 geneSystNames=geneSystNames(~cellfun('isempty',geneSystNames)); +0804 geneCompartments=geneCompartments(~cellfun('isempty',geneSystNames)); +0805 geneMiriams=geneMiriams(~cellfun('isempty',geneSystNames)); +0806 %Now we reorder geneIDs and geneSystNames by geneSystNames string +0807 %length +0808 geneNames=geneIDs;%Backuping geneIDs, since we need unsorted order for later +0809 [~, Indx] = sort(cellfun('size', geneSystNames, 2), 'descend'); +0810 geneIDs = geneIDs(Indx); +0811 geneSystNames = geneSystNames(Indx); +0812 for i=1:numel(geneSystNames) +0813 for j=1:numel(grRules) +0814 if strfind(grRules{j},geneSystNames{i}) +0815 if ~isempty(grRules{j}) +0816 if sum(ismember(geneSystNames,geneSystNames{i}))==1 +0817 grRules{j}=regexprep(grRules{j},geneSystNames{i},geneIDs{i}); +0818 elseif sum(ismember(geneSystNames,geneSystNames{i}))>1 +0819 counter=0; +0820 ovrlpIDs=geneIDs(ismember(geneSystNames,geneSystNames{i})); +0821 for k=1:numel(ovrlpIDs) +0822 if strfind(grRulesFromModifier{j},ovrlpIDs{k}) +0823 counter=counter+1; +0824 grRules{j}=regexprep(grRules{j},geneSystNames{i},ovrlpIDs{k}); +0825 end +0826 if counter>1 +0827 EM=['Gene association is ambiguous for reaction ' modelSBML.reaction(j).id]; +0828 dispEM(EM); +0829 end +0830 end +0831 end +0832 end +0833 end +0834 end +0835 end +0836 end +0837 model.genes=geneNames; +0838 model.grRules=grRules; +0839 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0840 model.grRules = grRules; +0841 model.rxnGeneMat = rxnGeneMat; +0842 +0843 %Match the compartments for genes +0844 [~, J]=ismember(geneCompartments,model.comps); +0845 model.geneComps=J; +0846 else +0847 if ~all(cellfun(@isempty,grRules)) +0848 %If fbc_geneProduct exists, follow the specified gene order, such +0849 %that matching geneShortNames in function below will work +0850 if isfield(modelSBML,'fbc_geneProduct') +0851 genes={modelSBML.fbc_geneProduct.fbc_id}; +0852 +0853 %Get gene Miriams if they were not retrieved above (this occurs +0854 %when genes are stored as fbc_geneProduct instead of species) +0855 if isempty(geneMiriams) +0856 geneMiriams = cell(numel(genes),1); +0857 if isfield(modelSBML.fbc_geneProduct,'sboTerm') && numel(unique([modelSBML.fbc_geneProduct.sboTerm])) == 1 +0858 %If all the SBO terms are identical, don't add them to geneMiriams +0859 modelSBML.fbc_geneProduct = rmfield(modelSBML.fbc_geneProduct,'sboTerm'); +0860 end +0861 for i = 1:numel(genes) +0862 geneMiriams{i}=parseMiriam(modelSBML.fbc_geneProduct(i).annotation); +0863 if isfield(modelSBML.fbc_geneProduct(i),'sboTerm') && ~(modelSBML.fbc_geneProduct(i).sboTerm==-1) +0864 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},modelSBML.fbc_geneProduct(i).sboTerm); +0865 end +0866 end +0867 end +0868 proteins={modelSBML.fbc_geneProduct.fbc_name}; +0869 else +0870 genes=getGeneList(grRules); +0871 end +0872 model.genes=genes; +0873 model.grRules=grRules; +0874 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0875 model.grRules = grRules; +0876 model.rxnGeneMat = rxnGeneMat; +0877 end +0878 end +0879 +0880 if all(cellfun(@isempty,geneShortNames)) +0881 if isfield(modelSBML,'fbc_geneProduct') +0882 for i=1:numel(genes) +0883 if ~isempty(modelSBML.fbc_geneProduct(i).fbc_label) +0884 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_label; +0885 elseif ~isempty(modelSBML.fbc_geneProduct(i).fbc_name) +0886 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_name; +0887 else +0888 geneShortNames{i,1}=''; +0889 end +0890 end +0891 end +0892 end +0893 +0894 %If any InChIs have been loaded +0895 if any(~cellfun(@isempty,metaboliteInChI)) +0896 model.inchis=metaboliteInChI; +0897 end +0898 +0899 %If any formulas have been loaded +0900 if any(~cellfun(@isempty,metaboliteFormula)) +0901 model.metFormulas=metaboliteFormula; +0902 end +0903 +0904 %If any charges have been loaded +0905 if ~isempty(metaboliteCharges) +0906 model.metCharges=metaboliteCharges; +0907 end +0908 +0909 %If any gene short names have been loaded +0910 if any(~cellfun(@isempty,geneShortNames)) +0911 model.geneShortNames=geneShortNames; +0912 end +0913 +0914 %If any Miriam strings for compartments have been loaded +0915 if any(~cellfun(@isempty,compartmentMiriams)) +0916 model.compMiriams=compartmentMiriams; +0917 end +0918 +0919 %If any Miriam strings for metabolites have been loaded +0920 if any(~cellfun(@isempty,metaboliteMiriams)) +0921 model.metMiriams=metaboliteMiriams; +0922 end +0923 +0924 %If any subsystems have been loaded +0925 if any(~cellfun(@isempty,subsystems)) +0926 model.subSystems=subsystems; +0927 end +0928 if any(rxnComps) +0929 if all(rxnComps) +0930 model.rxnComps=rxnComps; +0931 else +0932 if supressWarnings==false +0933 EM='The compartments for the following reactions could not be matched. Ignoring reaction compartment information'; +0934 dispEM(EM,false,model.rxns(rxnComps==0)); +0935 end +0936 end +0937 end +0938 +0939 %If any ec-codes have been loaded +0940 if any(~cellfun(@isempty,eccodes)) +0941 model.eccodes=eccodes; +0942 end +0943 +0944 %If any Miriam strings for reactions have been loaded +0945 if any(~cellfun(@isempty,rxnMiriams)) +0946 model.rxnMiriams=rxnMiriams; +0947 end +0948 +0949 %If any Miriam strings for genes have been loaded +0950 if any(~cellfun(@isempty,geneMiriams)) +0951 model.geneMiriams=geneMiriams; +0952 end +0953 +0954 %If any protein strings have been loaded +0955 if any(~cellfun(@isempty,proteins)) +0956 proteins = reshape(proteins,[],1); +0957 model.proteins=proteins; +0958 end +0959 +0960 model.unconstrained=metaboliteUnconstrained; 0961 -0962 %If any formulas have been loaded -0963 if any(~cellfun(@isempty,metaboliteFormula)) -0964 model.metFormulas=metaboliteFormula; -0965 end -0966 -0967 %If any charges have been loaded -0968 if ~isempty(metaboliteCharges) -0969 model.metCharges=metaboliteCharges; -0970 end -0971 -0972 %If any gene short names have been loaded -0973 if any(~cellfun(@isempty,geneShortNames)) -0974 model.geneShortNames=geneShortNames; -0975 end -0976 -0977 %If any Miriam strings for compartments have been loaded -0978 if any(~cellfun(@isempty,compartmentMiriams)) -0979 model.compMiriams=compartmentMiriams; -0980 end -0981 -0982 %If any Miriam strings for metabolites have been loaded -0983 if any(~cellfun(@isempty,metaboliteMiriams)) -0984 model.metMiriams=metaboliteMiriams; -0985 end -0986 -0987 %If any subsystems have been loaded -0988 if any(~cellfun(@isempty,subsystems)) -0989 model.subSystems=subsystems; +0962 %Convert SBML IDs back into their original strings. Here we are using part +0963 %from convertSBMLID, originating from the COBRA Toolbox +0964 model.rxns=regexprep(model.rxns,'__([0-9]+)__','${char(str2num($1))}'); +0965 model.mets=regexprep(model.mets,'__([0-9]+)__','${char(str2num($1))}'); +0966 model.comps=regexprep(model.comps,'__([0-9]+)__','${char(str2num($1))}'); +0967 model.grRules=regexprep(model.grRules,'__([0-9]+)__','${char(str2num($1))}'); +0968 model.genes=regexprep(model.genes,'__([0-9]+)__','${char(str2num($1))}'); +0969 model.id=regexprep(model.id,'__([0-9]+)__','${char(str2num($1))}'); +0970 +0971 if removePrefix +0972 [model, hasChanged]=removeIdentifierPrefix(model); +0973 dispEM(['The following fields have prefixes removed from all entries. '... +0974 'If this is undesired, run importModel with removePrefix as false. Example: '... +0975 'importModel(''filename.xml'',[],false);'],false,hasChanged) +0976 end +0977 +0978 %Remove unused fields +0979 if isempty(model.annotation) +0980 model=rmfield(model,'annotation'); +0981 end +0982 if isempty(model.compOutside) +0983 model=rmfield(model,'compOutside'); +0984 end +0985 if isempty(model.compMiriams) +0986 model=rmfield(model,'compMiriams'); +0987 end +0988 if isempty(model.rxnComps) +0989 model=rmfield(model,'rxnComps'); 0990 end -0991 if any(rxnComps) -0992 if all(rxnComps) -0993 model.rxnComps=rxnComps; -0994 else -0995 if supressWarnings==false -0996 EM='The compartments for the following reactions could not be matched. Ignoring reaction compartment information'; -0997 dispEM(EM,false,model.rxns(rxnComps==0)); -0998 end -0999 end -1000 end -1001 -1002 %If any ec-codes have been loaded -1003 if any(~cellfun(@isempty,eccodes)) -1004 model.eccodes=eccodes; -1005 end -1006 -1007 %If any Miriam strings for reactions have been loaded -1008 if any(~cellfun(@isempty,rxnMiriams)) -1009 model.rxnMiriams=rxnMiriams; +0991 if isempty(model.grRules) +0992 model=rmfield(model,'grRules'); +0993 end +0994 if isempty(model.rxnGeneMat) +0995 model=rmfield(model,'rxnGeneMat'); +0996 end +0997 if isempty(model.subSystems) +0998 model=rmfield(model,'subSystems'); +0999 else +1000 model.subSystems(cellfun(@isempty,subsystems))={{''}}; +1001 end +1002 if isempty(model.eccodes) +1003 model=rmfield(model,'eccodes'); +1004 end +1005 if isempty(model.rxnMiriams) +1006 model=rmfield(model,'rxnMiriams'); +1007 end +1008 if cellfun(@isempty,model.rxnNotes) +1009 model=rmfield(model,'rxnNotes'); 1010 end -1011 -1012 %If any Miriam strings for genes have been loaded -1013 if any(~cellfun(@isempty,geneMiriams)) -1014 model.geneMiriams=geneMiriams; -1015 end -1016 -1017 model.unconstrained=metaboliteUnconstrained; -1018 -1019 %Convert SBML IDs back into their original strings. Here we are using part -1020 %from convertSBMLID, originating from the COBRA Toolbox -1021 model.rxns=regexprep(model.rxns,'__([0-9]+)__','${char(str2num($1))}'); -1022 model.mets=regexprep(model.mets,'__([0-9]+)__','${char(str2num($1))}'); -1023 model.comps=regexprep(model.comps,'__([0-9]+)__','${char(str2num($1))}'); -1024 model.grRules=regexprep(model.grRules,'__([0-9]+)__','${char(str2num($1))}'); -1025 model.genes=regexprep(model.genes,'__([0-9]+)__','${char(str2num($1))}'); -1026 model.id=regexprep(model.id,'__([0-9]+)__','${char(str2num($1))}'); -1027 -1028 %Remove unused fields -1029 if isempty(model.annotation) -1030 model=rmfield(model,'annotation'); -1031 end -1032 if isempty(model.compOutside) -1033 model=rmfield(model,'compOutside'); -1034 end -1035 if isempty(model.compMiriams) -1036 model=rmfield(model,'compMiriams'); -1037 end -1038 if isempty(model.rxnComps) -1039 model=rmfield(model,'rxnComps'); -1040 end -1041 if isempty(model.grRules) -1042 model=rmfield(model,'grRules'); -1043 end -1044 if isempty(model.rxnGeneMat) -1045 model=rmfield(model,'rxnGeneMat'); -1046 end -1047 if isempty(model.subSystems) -1048 model=rmfield(model,'subSystems'); -1049 else -1050 model.subSystems(cellfun(@isempty,subsystems))={{''}}; -1051 end -1052 if isempty(model.eccodes) -1053 model=rmfield(model,'eccodes'); -1054 end -1055 if isempty(model.rxnMiriams) -1056 model=rmfield(model,'rxnMiriams'); -1057 end -1058 if cellfun(@isempty,model.rxnNotes) -1059 model=rmfield(model,'rxnNotes'); +1011 if cellfun(@isempty,model.rxnReferences) +1012 model=rmfield(model,'rxnReferences'); +1013 end +1014 if isempty(model.rxnConfidenceScores) || all(isnan(model.rxnConfidenceScores)) +1015 model=rmfield(model,'rxnConfidenceScores'); +1016 end +1017 if isempty(model.genes) +1018 model=rmfield(model,'genes'); +1019 elseif isrow(model.genes) +1020 model.genes=transpose(model.genes); +1021 end +1022 if isempty(model.geneComps) +1023 model=rmfield(model,'geneComps'); +1024 end +1025 if isempty(model.geneMiriams) +1026 model=rmfield(model,'geneMiriams'); +1027 end +1028 if isempty(model.geneShortNames) +1029 model=rmfield(model,'geneShortNames'); +1030 end +1031 if isempty(model.proteins) +1032 model=rmfield(model,'proteins'); +1033 end +1034 if isempty(model.inchis) +1035 model=rmfield(model,'inchis'); +1036 end +1037 if isempty(model.metFormulas) +1038 model=rmfield(model,'metFormulas'); +1039 end +1040 if isempty(model.metMiriams) +1041 model=rmfield(model,'metMiriams'); +1042 end +1043 if ~any(model.metCharges) +1044 model=rmfield(model,'metCharges'); +1045 end +1046 +1047 %This just removes the grRules if no genes have been loaded +1048 if ~isfield(model,'genes') && isfield(model,'grRules') +1049 model=rmfield(model,'grRules'); +1050 end +1051 +1052 %Print warnings about bad structure +1053 if supressWarnings==false +1054 checkModelStruct(model,false); +1055 end +1056 +1057 if removeExcMets==true +1058 model=simplifyModel(model); +1059 end 1060 end -1061 if cellfun(@isempty,model.rxnReferences) -1062 model=rmfield(model,'rxnReferences'); -1063 end -1064 if isempty(model.rxnConfidenceScores) || all(isnan(model.rxnConfidenceScores)) -1065 model=rmfield(model,'rxnConfidenceScores'); -1066 end -1067 if isempty(model.genes) -1068 model=rmfield(model,'genes'); -1069 elseif isrow(model.genes) -1070 model.genes=transpose(model.genes); -1071 end -1072 if isempty(model.geneComps) -1073 model=rmfield(model,'geneComps'); -1074 end -1075 if isempty(model.geneMiriams) -1076 model=rmfield(model,'geneMiriams'); -1077 end -1078 if isempty(model.geneShortNames) -1079 model=rmfield(model,'geneShortNames'); -1080 end -1081 if isempty(model.inchis) -1082 model=rmfield(model,'inchis'); -1083 end -1084 if isempty(model.metFormulas) -1085 model=rmfield(model,'metFormulas'); -1086 end -1087 if isempty(model.metMiriams) -1088 model=rmfield(model,'metMiriams'); -1089 end -1090 if ~any(model.metCharges) -1091 model=rmfield(model,'metCharges'); -1092 end -1093 -1094 %This just removes the grRules if no genes have been loaded -1095 if ~isfield(model,'genes') && isfield(model,'grRules') -1096 model=rmfield(model,'grRules'); -1097 end -1098 -1099 %Print warnings about bad structure -1100 if supressWarnings==false -1101 checkModelStruct(model,false); -1102 end -1103 -1104 if removeExcMets==true -1105 model=simplifyModel(model); -1106 end -1107 end -1108 -1109 function matchGenes=getGeneList(grRules) -1110 %Constructs the list of unique genes from grRules -1111 -1112 %Assumes that everything that isn't a paranthesis, " AND " or " or " is a -1113 %gene name -1114 genes=strrep(grRules,'(',''); -1115 genes=strrep(genes,')',''); -1116 genes=strrep(genes,' or ',' '); -1117 genes=strrep(genes,' and ',' '); -1118 genes=strrep(genes,' OR ',' '); -1119 genes=strrep(genes,' AND ',' '); -1120 genes=regexp(genes,' ','split'); +1061 +1062 function matchGenes=getGeneList(grRules) +1063 %Constructs the list of unique genes from grRules +1064 +1065 %Assumes that everything that isn't a paranthesis, " AND " or " or " is a +1066 %gene name +1067 genes=strrep(grRules,'(',''); +1068 genes=strrep(genes,')',''); +1069 genes=strrep(genes,' or ',' '); +1070 genes=strrep(genes,' and ',' '); +1071 genes=strrep(genes,' OR ',' '); +1072 genes=strrep(genes,' AND ',' '); +1073 genes=regexp(genes,' ','split'); +1074 +1075 allNames={}; +1076 for i=1:numel(genes) +1077 allNames=[allNames genes{i}]; +1078 end +1079 matchGenes=unique(allNames)'; +1080 +1081 %Remove the empty element if present +1082 if isempty(matchGenes{1}) +1083 matchGenes(1)=[]; +1084 end +1085 end +1086 +1087 function fieldContent=parseNote(searchString,fieldName) +1088 %The function obtains the particular information from 'notes' field, using +1089 %fieldName as the dummy string +1090 +1091 fieldContent=''; +1092 +1093 if strfind(searchString,fieldName) +1094 [~,targetString] = regexp(searchString,['<p>' fieldName '.*?</p>'],'tokens','match'); +1095 targetString=regexprep(targetString,'<p>|</p>',''); +1096 targetString=regexprep(targetString,[fieldName, ':'],''); +1097 for i=1:numel(targetString) +1098 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; +1099 end +1100 fieldContent=regexprep(fieldContent,'^;|;$',''); +1101 else +1102 fieldContent=''; +1103 end +1104 end +1105 +1106 function fieldContent=parseAnnotation(searchString,startString,midString,fieldName) +1107 +1108 fieldContent=''; +1109 +1110 %Removing whitespace characters from the ending strings, which may occur in +1111 %several cases +1112 searchString=regexprep(searchString,'" />','"/>'); +1113 [~,targetString] = regexp(searchString,['<rdf:li rdf:resource="' startString fieldName midString '.*?"/>'],'tokens','match'); +1114 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); +1115 targetString=regexprep(targetString,startString,''); +1116 targetString=regexprep(targetString,[fieldName midString],''); +1117 +1118 for i=1:numel(targetString) +1119 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; +1120 end 1121 -1122 allNames={}; -1123 for i=1:numel(genes) -1124 allNames=[allNames genes{i}]; -1125 end -1126 matchGenes=unique(allNames)'; +1122 fieldContent=regexprep(fieldContent,'^;|;$',''); +1123 end +1124 +1125 function miriamStruct=parseMiriam(searchString) +1126 %Generates miriam structure from annotation field 1127 -1128 %Remove the empty element if present -1129 if isempty(matchGenes{1}) -1130 matchGenes(1)=[]; -1131 end -1132 end -1133 -1134 function fieldContent=parseNote(searchString,fieldName) -1135 %The function obtains the particular information from 'notes' field, using -1136 %fieldName as the dummy string -1137 -1138 fieldContent=''; -1139 -1140 if strfind(searchString,fieldName) -1141 [~,targetString] = regexp(searchString,['<p>' fieldName '.*?</p>'],'tokens','match'); -1142 targetString=regexprep(targetString,'<p>|</p>',''); -1143 targetString=regexprep(targetString,[fieldName, ':'],''); -1144 for i=1:numel(targetString) -1145 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; -1146 end -1147 fieldContent=regexprep(fieldContent,'^;|;$',''); -1148 else -1149 fieldContent=''; -1150 end -1151 end -1152 -1153 function fieldContent=parseAnnotation(searchString,startString,midString,fieldName) -1154 -1155 fieldContent=''; -1156 -1157 %Removing whitespace characters from the ending strings, which may occur in -1158 %several cases -1159 searchString=regexprep(searchString,'" />','"/>'); -1160 [~,targetString] = regexp(searchString,['<rdf:li rdf:resource="' startString fieldName midString '.*?"/>'],'tokens','match'); -1161 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); -1162 targetString=regexprep(targetString,startString,''); -1163 targetString=regexprep(targetString,[fieldName midString],''); +1128 %Finding whether miriams are written in the old or the new way +1129 if strfind(searchString,'urn:miriam:') +1130 startString='urn:miriam:'; +1131 midString=':'; +1132 elseif strfind(searchString,'http://identifiers.org/') +1133 startString='http://identifiers.org/'; +1134 midString='/'; +1135 elseif strfind(searchString,'https://identifiers.org/') +1136 startString='https://identifiers.org/'; +1137 midString='/'; +1138 else +1139 miriamStruct=[]; +1140 return; +1141 end +1142 +1143 miriamStruct=[]; +1144 +1145 searchString=regexprep(searchString,'" />','"/>'); +1146 [~,targetString] = regexp(searchString,'<rdf:li rdf:resource=".*?"/>','tokens','match'); +1147 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); +1148 targetString=regexprep(targetString,startString,''); +1149 targetString=regexprep(targetString,midString,'/','once'); +1150 +1151 counter=0; +1152 for i=1:numel(targetString) +1153 if isempty(regexp(targetString{1,i},'inchi|ec-code', 'once')) +1154 counter=counter+1; +1155 miriamStruct.name{counter,1} = regexprep(targetString{1,i},'/.+','','once'); +1156 miriamStruct.value{counter,1} = regexprep(targetString{1,i},[miriamStruct.name{counter,1} '/'],'','once'); +1157 miriamStruct.name{counter,1} = regexprep(miriamStruct.name{counter,1},'^obo\.',''); +1158 end +1159 end +1160 end +1161 +1162 function miriam = addSBOtoMiriam(miriam,sboTerm) +1163 %Appends SBO term to miriam structure 1164 -1165 for i=1:numel(targetString) -1166 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; -1167 end -1168 -1169 fieldContent=regexprep(fieldContent,'^;|;$',''); -1170 end -1171 -1172 function miriamStruct=parseMiriam(searchString) -1173 %Generates miriam structure from annotation field -1174 -1175 %Finding whether miriams are written in the old or the new way -1176 if strfind(searchString,'urn:miriam:') -1177 startString='urn:miriam:'; -1178 midString=':'; -1179 elseif strfind(searchString,'http://identifiers.org/') -1180 startString='http://identifiers.org/'; -1181 midString='/'; -1182 elseif strfind(searchString,'https://identifiers.org/') -1183 startString='https://identifiers.org/'; -1184 midString='/'; -1185 else -1186 miriamStruct=[]; -1187 return; -1188 end -1189 -1190 miriamStruct=[]; -1191 -1192 searchString=regexprep(searchString,'" />','"/>'); -1193 [~,targetString] = regexp(searchString,'<rdf:li rdf:resource=".*?"/>','tokens','match'); -1194 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); -1195 targetString=regexprep(targetString,startString,''); -1196 targetString=regexprep(targetString,midString,'/','once'); -1197 -1198 counter=0; -1199 for i=1:numel(targetString) -1200 if isempty(regexp(targetString{1,i},'inchi|ec-code', 'once')) -1201 counter=counter+1; -1202 miriamStruct.name{counter,1} = regexprep(targetString{1,i},'/.+','','once'); -1203 miriamStruct.value{counter,1} = regexprep(targetString{1,i},[miriamStruct.name{counter,1} '/'],'','once'); -1204 miriamStruct.name{counter,1} = regexprep(miriamStruct.name{counter,1},'^obo\.',''); -1205 end -1206 end -1207 end -1208 -1209 function miriam = addSBOtoMiriam(miriam,sboTerm) -1210 %Appends SBO term to miriam structure -1211 -1212 sboTerm = {['SBO:' sprintf('%07u',sboTerm)]}; % convert to proper format -1213 if isempty(miriam) -1214 miriam.name = {'sbo'}; -1215 miriam.value = sboTerm; -1216 elseif any(strcmp('sbo',miriam.name)) -1217 currSbo = strcmp('sbo',miriam.name); -1218 miriam.value(currSbo) = sboTerm; -1219 else -1220 miriam.name(end+1) = {'sbo'}; -1221 miriam.value(end+1) = sboTerm; -1222 end -1223 end

    +1165 sboTerm = {['SBO:' sprintf('%07u',sboTerm)]}; % convert to proper format +1166 if isempty(miriam) +1167 miriam.name = {'sbo'}; +1168 miriam.value = sboTerm; +1169 elseif any(strcmp('sbo',miriam.name)) +1170 currSbo = strcmp('sbo',miriam.name); +1171 miriam.value(currSbo) = sboTerm; +1172 else +1173 miriam.name(end+1) = {'sbo'}; +1174 miriam.value(end+1) = sboTerm; +1175 end +1176 end

    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/readYAMLmodel.html b/doc/io/readYAMLmodel.html index bb38e6dd..a76b6b12 100644 --- a/doc/io/readYAMLmodel.html +++ b/doc/io/readYAMLmodel.html @@ -157,582 +157,585 @@

    SOURCE CODE ^'geneComps',cell(0,0);... %Changed to double in the end. 0101 'geneMiriams',cell(0,0);... 0102 'geneShortNames',cell(0,0);... -0103 'unconstrained',cell(0,0);... %Changed to double in the end. -0104 'metFrom',cell(0,0);... -0105 'rxnFrom',cell(0,0)}; -0106 for i=1:size(modelFields,1) -0107 model.(modelFields{i,1})=modelFields{i,2}; -0108 end -0109 -0110 % If GECKO model -0111 if any(contains(line_key,'geckoLight')) -0112 isGECKO=true; -0113 ecFields = {'geckoLight', false;... -0114 'rxns', {};... -0115 'kcat', {};... -0116 'source', cell(0,0);... -0117 'notes', cell(0,0);... -0118 'eccodes', cell(0,0);... -0119 'genes', cell(0,0);... -0120 'enzymes', cell(0,0);... -0121 'mw', cell(0,0);... -0122 'sequence', cell(0,0);... -0123 'concs', cell(0,0);... -0124 'rxnEnzMat', []}; -0125 for i=1:size(ecFields,1) -0126 model.ec.(ecFields{i,1})=ecFields{i,2}; -0127 end -0128 ecGecko=cell(25000,2); ecGeckoNo=1; -0129 enzStoich=cell(100000,3); enzStoichNo=1; -0130 else -0131 isGECKO=false; -0132 end -0133 -0134 section = 0; -0135 metMiriams=cell(100000,3); metMirNo=1; -0136 rxnMiriams=cell(100000,3); rxnMirNo=1; -0137 geneMiriams=cell(100000,3); genMirNo=1; -0138 subSystems=cell(100000,2); subSysNo=1; -0139 eccodes=cell(100000,2); ecCodeNo=1; -0140 equations=cell(100000,3); equatiNo=1; -0141 -0142 for i=1:numel(line_key) -0143 tline_raw = line_raw{i}; -0144 tline_key = line_key{i}; -0145 tline_value = line_value{i}; -0146 % import different sections -0147 switch tline_raw -0148 case '- metaData:' -0149 section = 1; -0150 if verbose -0151 fprintf('\t%d\n', section); -0152 end -0153 continue % Go to next line -0154 case '- metabolites:' -0155 section = 2; -0156 if verbose -0157 fprintf('\t%d\n', section); -0158 end -0159 pos=0; -0160 continue -0161 case '- reactions:' -0162 section = 3; -0163 if verbose -0164 fprintf('\t%d\n', section); -0165 end -0166 pos=0; -0167 continue -0168 case '- genes:' -0169 section = 4; -0170 if verbose -0171 fprintf('\t%d\n', section); -0172 end -0173 pos=0; -0174 continue -0175 case '- compartments: !!omap' -0176 section = 5; -0177 if verbose -0178 fprintf('\t%d\n', section); -0179 end -0180 pos=0; -0181 continue -0182 case '- ec-rxns:' -0183 section = 6; -0184 if verbose -0185 fprintf('\t%d\n', section); -0186 end -0187 pos=0; -0188 continue -0189 case '- ec-enzymes:' -0190 section = 7; -0191 if verbose -0192 fprintf('\t%d\n', section); -0193 end -0194 pos=0; -0195 continue -0196 end -0197 -0198 % skip over empty keys -0199 if isempty(tline_raw) || (isempty(tline_key) && contains(tline_raw,'!!omap')) -0200 continue; -0201 end -0202 -0203 % import metaData -0204 if section == 1 -0205 switch tline_key -0206 case {'short_name','id'} %short_name used by human-GEM -0207 model.id = tline_value; -0208 case 'name' -0209 model.name = tline_value; -0210 case 'full_name' %used by human-GEM -0211 model.description = tline_value; -0212 case 'version' -0213 model.version = tline_value; -0214 case 'date' -0215 model.date = tline_value; -0216 case 'taxonomy' -0217 model.annotation.taxonomy = tline_value; -0218 case {'description','note'} %description used by human-GEM -0219 model.annotation.note = tline_value; -0220 case 'github' -0221 model.annotation.sourceUrl = tline_value; -0222 case 'givenName' -0223 model.annotation.givenName = tline_value; -0224 case 'familyName' -0225 model.annotation.familyName = tline_value; -0226 case 'authors' -0227 model.annotation.authorList = tline_value; -0228 case 'email' -0229 model.annotation.email = tline_value; -0230 case 'organization' -0231 model.annotation.organization = tline_value; -0232 case 'geckoLight' -0233 if strcmp(tline_value,'true') -0234 model.ec.geckoLight = true; -0235 end -0236 end; continue -0237 end -0238 -0239 % import metabolites: -0240 if section == 2 -0241 switch tline_key -0242 case 'id' -0243 pos = pos + 1; -0244 model = readFieldValue(model, 'mets', tline_value,pos); -0245 readList=''; miriamKey=''; -0246 case 'name' -0247 model = readFieldValue(model, 'metNames', tline_value, pos); -0248 readList=''; miriamKey=''; -0249 case 'compartment' -0250 model = readFieldValue(model, 'metComps', tline_value, pos); -0251 readList=''; miriamKey=''; -0252 case 'formula' -0253 model = readFieldValue(model, 'metFormulas', tline_value, pos); -0254 readList=''; miriamKey=''; -0255 case 'charge' -0256 model = readFieldValue(model, 'metCharges', tline_value, pos); -0257 readList=''; miriamKey=''; -0258 case 'notes' -0259 model = readFieldValue(model, 'metNotes', tline_value, pos); -0260 readList=''; miriamKey=''; -0261 case 'inchis' -0262 model = readFieldValue(model, 'inchis', tline_value, pos); -0263 readList=''; miriamKey=''; -0264 case 'smiles' -0265 model = readFieldValue(model, 'metSmiles', tline_value, pos); -0266 readList=''; miriamKey=''; -0267 case 'deltaG' -0268 model = readFieldValue(model, 'metDeltaG', tline_value, pos); -0269 readList=''; miriamKey=''; -0270 case 'metFrom' -0271 model = readFieldValue(model, 'metFrom', tline_value, pos); -0272 readList=''; miriamKey=''; -0273 case 'annotation' -0274 readList = 'annotation'; -0275 otherwise -0276 switch readList -0277 case 'annotation' -0278 [metMiriams, miriamKey] = gatherAnnotation(pos,metMiriams,tline_key,tline_value,miriamKey,metMirNo); -0279 metMirNo = metMirNo + 1; -0280 otherwise -0281 error(['Unknown entry in yaml file: ' tline_raw]) -0282 end -0283 end; continue -0284 end -0285 -0286 % import reactions: -0287 if section == 3 -0288 switch tline_key -0289 case 'id' -0290 pos = pos + 1; -0291 model = readFieldValue(model, 'rxns', tline_value,pos); -0292 readList=''; miriamKey=''; -0293 case 'name' -0294 model = readFieldValue(model, 'rxnNames', tline_value, pos); -0295 readList=''; miriamKey=''; -0296 case 'lower_bound' -0297 model.lb(pos,1) = {tline_value}; -0298 readList=''; miriamKey=''; -0299 case 'upper_bound' -0300 model.ub(pos,1) = {tline_value}; -0301 readList=''; miriamKey=''; -0302 case 'rev' -0303 model.rev(pos,1) = {tline_value}; -0304 readList=''; miriamKey=''; -0305 case 'gene_reaction_rule' -0306 model = readFieldValue(model, 'grRules', tline_value, pos); -0307 readList=''; miriamKey=''; -0308 case 'rxnNotes' -0309 model = readFieldValue(model, 'rxnNotes', tline_value, pos); -0310 readList=''; miriamKey=''; -0311 case 'rxnFrom' -0312 model = readFieldValue(model, 'rxnFrom', tline_value, pos); -0313 readList=''; miriamKey=''; -0314 case 'deltaG' -0315 model = readFieldValue(model, 'rxnDeltaG', tline_value, pos); -0316 readList=''; miriamKey=''; -0317 case 'objective_coefficient' -0318 model.c(pos,1) = 1; -0319 readList=''; miriamKey=''; -0320 case 'references' -0321 model = readFieldValue(model, 'rxnReferences', tline_value, pos); -0322 readList=''; miriamKey=''; -0323 case 'confidence_score' -0324 model = readFieldValue(model, 'rxnConfidenceScores', tline_value, pos); -0325 readList=''; miriamKey=''; -0326 case 'eccodes' -0327 if isempty(tline_value) -0328 readList = 'eccodes'; -0329 else -0330 eccodes(ecCodeNo,1:2)={pos,tline_value}; -0331 ecCodeNo=ecCodeNo+1; -0332 end -0333 case 'subsystem' -0334 if isempty(tline_value) -0335 readList = 'subsystem'; -0336 else -0337 subSystems(subSysNo,1:2)={pos,tline_value}; -0338 subSysNo=subSysNo+1; -0339 end -0340 case 'metabolites' -0341 readList = 'equation'; -0342 case 'annotation' -0343 readList = 'annotation'; -0344 -0345 otherwise -0346 switch readList -0347 case 'eccodes' -0348 eccodes(ecCodeNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; -0349 ecCodeNo=ecCodeNo+1; -0350 case 'subsystem' -0351 subSystems(subSysNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; -0352 subSysNo=subSysNo+1; -0353 case 'annotation' -0354 [rxnMiriams, miriamKey,rxnMirNo] = gatherAnnotation(pos,rxnMiriams,tline_key,tline_value,miriamKey,rxnMirNo); -0355 rxnMirNo=rxnMirNo+1; -0356 case 'equation' -0357 coeff = sscanf(tline_value,'%f'); -0358 equations(equatiNo,1:3)={pos,tline_key,coeff}; -0359 equatiNo=equatiNo+1; -0360 otherwise -0361 error(['Unknown entry in yaml file: ' tline_raw]) -0362 end -0363 end; continue -0364 end -0365 -0366 % import genes: -0367 if section == 4 -0368 switch tline_key -0369 case 'id' -0370 pos = pos + 1; -0371 model = readFieldValue(model, 'genes', tline_value, pos); -0372 readList = ''; -0373 miriamKey = ''; -0374 case 'name' -0375 model = readFieldValue(model, 'geneShortNames', tline_value, pos); -0376 case 'annotation' -0377 readList = 'annotation'; -0378 otherwise -0379 switch readList -0380 case 'annotation' -0381 [geneMiriams, miriamKey] = gatherAnnotation(pos,geneMiriams,tline_key,tline_value,miriamKey,genMirNo); -0382 genMirNo = genMirNo + 1; -0383 otherwise -0384 error(['Unknown entry in yaml file: ' tline_raw]) -0385 end -0386 end; continue -0387 end -0388 -0389 % import compartments: -0390 if section == 5 -0391 model.comps(end+1,1) = {tline_key}; -0392 model.compNames(end+1,1) = {tline_value}; -0393 end -0394 -0395 % import ec reaction info -0396 if section == 6 -0397 switch tline_key -0398 case 'id' -0399 pos = pos + 1; -0400 model.ec = readFieldValue(model.ec, 'rxns', tline_value, pos); -0401 readList=''; -0402 case 'kcat' -0403 model.ec = readFieldValue(model.ec, 'kcat', tline_value, pos); +0103 'proteins',cell(0,0);... +0104 'unconstrained',cell(0,0);... %Changed to double in the end. +0105 'metFrom',cell(0,0);... +0106 'rxnFrom',cell(0,0)}; +0107 for i=1:size(modelFields,1) +0108 model.(modelFields{i,1})=modelFields{i,2}; +0109 end +0110 +0111 % If GECKO model +0112 if any(contains(line_key,'geckoLight')) +0113 isGECKO=true; +0114 ecFields = {'geckoLight', false;... +0115 'rxns', {};... +0116 'kcat', {};... +0117 'source', cell(0,0);... +0118 'notes', cell(0,0);... +0119 'eccodes', cell(0,0);... +0120 'genes', cell(0,0);... +0121 'enzymes', cell(0,0);... +0122 'mw', cell(0,0);... +0123 'sequence', cell(0,0);... +0124 'concs', cell(0,0);... +0125 'rxnEnzMat', []}; +0126 for i=1:size(ecFields,1) +0127 model.ec.(ecFields{i,1})=ecFields{i,2}; +0128 end +0129 ecGecko=cell(25000,2); ecGeckoNo=1; +0130 enzStoich=cell(100000,3); enzStoichNo=1; +0131 else +0132 isGECKO=false; +0133 end +0134 +0135 section = 0; +0136 metMiriams=cell(100000,3); metMirNo=1; +0137 rxnMiriams=cell(100000,3); rxnMirNo=1; +0138 geneMiriams=cell(100000,3); genMirNo=1; +0139 subSystems=cell(100000,2); subSysNo=1; +0140 eccodes=cell(100000,2); ecCodeNo=1; +0141 equations=cell(100000,3); equatiNo=1; +0142 +0143 for i=1:numel(line_key) +0144 tline_raw = line_raw{i}; +0145 tline_key = line_key{i}; +0146 tline_value = line_value{i}; +0147 % import different sections +0148 switch tline_raw +0149 case '- metaData:' +0150 section = 1; +0151 if verbose +0152 fprintf('\t%d\n', section); +0153 end +0154 continue % Go to next line +0155 case '- metabolites:' +0156 section = 2; +0157 if verbose +0158 fprintf('\t%d\n', section); +0159 end +0160 pos=0; +0161 continue +0162 case '- reactions:' +0163 section = 3; +0164 if verbose +0165 fprintf('\t%d\n', section); +0166 end +0167 pos=0; +0168 continue +0169 case '- genes:' +0170 section = 4; +0171 if verbose +0172 fprintf('\t%d\n', section); +0173 end +0174 pos=0; +0175 continue +0176 case '- compartments: !!omap' +0177 section = 5; +0178 if verbose +0179 fprintf('\t%d\n', section); +0180 end +0181 pos=0; +0182 continue +0183 case '- ec-rxns:' +0184 section = 6; +0185 if verbose +0186 fprintf('\t%d\n', section); +0187 end +0188 pos=0; +0189 continue +0190 case '- ec-enzymes:' +0191 section = 7; +0192 if verbose +0193 fprintf('\t%d\n', section); +0194 end +0195 pos=0; +0196 continue +0197 end +0198 +0199 % skip over empty keys +0200 if isempty(tline_raw) || (isempty(tline_key) && contains(tline_raw,'!!omap')) +0201 continue; +0202 end +0203 +0204 % import metaData +0205 if section == 1 +0206 switch tline_key +0207 case {'short_name','id'} %short_name used by human-GEM +0208 model.id = tline_value; +0209 case 'name' +0210 model.name = tline_value; +0211 case 'full_name' %used by human-GEM +0212 model.description = tline_value; +0213 case 'version' +0214 model.version = tline_value; +0215 case 'date' +0216 model.date = tline_value; +0217 case 'taxonomy' +0218 model.annotation.taxonomy = tline_value; +0219 case {'description','note'} %description used by human-GEM +0220 model.annotation.note = tline_value; +0221 case 'github' +0222 model.annotation.sourceUrl = tline_value; +0223 case 'givenName' +0224 model.annotation.givenName = tline_value; +0225 case 'familyName' +0226 model.annotation.familyName = tline_value; +0227 case 'authors' +0228 model.annotation.authorList = tline_value; +0229 case 'email' +0230 model.annotation.email = tline_value; +0231 case 'organization' +0232 model.annotation.organization = tline_value; +0233 case 'geckoLight' +0234 if strcmp(tline_value,'true') +0235 model.ec.geckoLight = true; +0236 end +0237 end; continue +0238 end +0239 +0240 % import metabolites: +0241 if section == 2 +0242 switch tline_key +0243 case 'id' +0244 pos = pos + 1; +0245 model = readFieldValue(model, 'mets', tline_value,pos); +0246 readList=''; miriamKey=''; +0247 case 'name' +0248 model = readFieldValue(model, 'metNames', tline_value, pos); +0249 readList=''; miriamKey=''; +0250 case 'compartment' +0251 model = readFieldValue(model, 'metComps', tline_value, pos); +0252 readList=''; miriamKey=''; +0253 case 'formula' +0254 model = readFieldValue(model, 'metFormulas', tline_value, pos); +0255 readList=''; miriamKey=''; +0256 case 'charge' +0257 model = readFieldValue(model, 'metCharges', tline_value, pos); +0258 readList=''; miriamKey=''; +0259 case 'notes' +0260 model = readFieldValue(model, 'metNotes', tline_value, pos); +0261 readList=''; miriamKey=''; +0262 case 'inchis' +0263 model = readFieldValue(model, 'inchis', tline_value, pos); +0264 readList=''; miriamKey=''; +0265 case 'smiles' +0266 model = readFieldValue(model, 'metSmiles', tline_value, pos); +0267 readList=''; miriamKey=''; +0268 case 'deltaG' +0269 model = readFieldValue(model, 'metDeltaG', tline_value, pos); +0270 readList=''; miriamKey=''; +0271 case 'metFrom' +0272 model = readFieldValue(model, 'metFrom', tline_value, pos); +0273 readList=''; miriamKey=''; +0274 case 'annotation' +0275 readList = 'annotation'; +0276 otherwise +0277 switch readList +0278 case 'annotation' +0279 [metMiriams, miriamKey] = gatherAnnotation(pos,metMiriams,tline_key,tline_value,miriamKey,metMirNo); +0280 metMirNo = metMirNo + 1; +0281 otherwise +0282 error(['Unknown entry in yaml file: ' tline_raw]) +0283 end +0284 end; continue +0285 end +0286 +0287 % import reactions: +0288 if section == 3 +0289 switch tline_key +0290 case 'id' +0291 pos = pos + 1; +0292 model = readFieldValue(model, 'rxns', tline_value,pos); +0293 readList=''; miriamKey=''; +0294 case 'name' +0295 model = readFieldValue(model, 'rxnNames', tline_value, pos); +0296 readList=''; miriamKey=''; +0297 case 'lower_bound' +0298 model.lb(pos,1) = {tline_value}; +0299 readList=''; miriamKey=''; +0300 case 'upper_bound' +0301 model.ub(pos,1) = {tline_value}; +0302 readList=''; miriamKey=''; +0303 case 'rev' +0304 model.rev(pos,1) = {tline_value}; +0305 readList=''; miriamKey=''; +0306 case 'gene_reaction_rule' +0307 model = readFieldValue(model, 'grRules', tline_value, pos); +0308 readList=''; miriamKey=''; +0309 case 'rxnNotes' +0310 model = readFieldValue(model, 'rxnNotes', tline_value, pos); +0311 readList=''; miriamKey=''; +0312 case 'rxnFrom' +0313 model = readFieldValue(model, 'rxnFrom', tline_value, pos); +0314 readList=''; miriamKey=''; +0315 case 'deltaG' +0316 model = readFieldValue(model, 'rxnDeltaG', tline_value, pos); +0317 readList=''; miriamKey=''; +0318 case 'objective_coefficient' +0319 model.c(pos,1) = 1; +0320 readList=''; miriamKey=''; +0321 case 'references' +0322 model = readFieldValue(model, 'rxnReferences', tline_value, pos); +0323 readList=''; miriamKey=''; +0324 case 'confidence_score' +0325 model = readFieldValue(model, 'rxnConfidenceScores', tline_value, pos); +0326 readList=''; miriamKey=''; +0327 case 'eccodes' +0328 if isempty(tline_value) +0329 readList = 'eccodes'; +0330 else +0331 eccodes(ecCodeNo,1:2)={pos,tline_value}; +0332 ecCodeNo=ecCodeNo+1; +0333 end +0334 case 'subsystem' +0335 if isempty(tline_value) +0336 readList = 'subsystem'; +0337 else +0338 subSystems(subSysNo,1:2)={pos,tline_value}; +0339 subSysNo=subSysNo+1; +0340 end +0341 case 'metabolites' +0342 readList = 'equation'; +0343 case 'annotation' +0344 readList = 'annotation'; +0345 +0346 otherwise +0347 switch readList +0348 case 'eccodes' +0349 eccodes(ecCodeNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; +0350 ecCodeNo=ecCodeNo+1; +0351 case 'subsystem' +0352 subSystems(subSysNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; +0353 subSysNo=subSysNo+1; +0354 case 'annotation' +0355 [rxnMiriams, miriamKey,rxnMirNo] = gatherAnnotation(pos,rxnMiriams,tline_key,tline_value,miriamKey,rxnMirNo); +0356 rxnMirNo=rxnMirNo+1; +0357 case 'equation' +0358 coeff = sscanf(tline_value,'%f'); +0359 equations(equatiNo,1:3)={pos,tline_key,coeff}; +0360 equatiNo=equatiNo+1; +0361 otherwise +0362 error(['Unknown entry in yaml file: ' tline_raw]) +0363 end +0364 end; continue +0365 end +0366 +0367 % import genes: +0368 if section == 4 +0369 switch tline_key +0370 case 'id' +0371 pos = pos + 1; +0372 model = readFieldValue(model, 'genes', tline_value, pos); +0373 readList = ''; +0374 miriamKey = ''; +0375 case 'name' +0376 model = readFieldValue(model, 'geneShortNames', tline_value, pos); +0377 case 'protein' +0378 model = readFieldValue(model, 'proteins', tline_value, pos); +0379 case 'annotation' +0380 readList = 'annotation'; +0381 otherwise +0382 switch readList +0383 case 'annotation' +0384 [geneMiriams, miriamKey] = gatherAnnotation(pos,geneMiriams,tline_key,tline_value,miriamKey,genMirNo); +0385 genMirNo = genMirNo + 1; +0386 otherwise +0387 error(['Unknown entry in yaml file: ' tline_raw]) +0388 end +0389 end; continue +0390 end +0391 +0392 % import compartments: +0393 if section == 5 +0394 model.comps(end+1,1) = {tline_key}; +0395 model.compNames(end+1,1) = {tline_value}; +0396 end +0397 +0398 % import ec reaction info +0399 if section == 6 +0400 switch tline_key +0401 case 'id' +0402 pos = pos + 1; +0403 model.ec = readFieldValue(model.ec, 'rxns', tline_value, pos); 0404 readList=''; -0405 case 'source' -0406 model.ec = readFieldValue(model.ec, 'source', tline_value, pos); +0405 case 'kcat' +0406 model.ec = readFieldValue(model.ec, 'kcat', tline_value, pos); 0407 readList=''; -0408 case 'notes' -0409 model.ec = readFieldValue(model.ec, 'notes', tline_value, pos); +0408 case 'source' +0409 model.ec = readFieldValue(model.ec, 'source', tline_value, pos); 0410 readList=''; -0411 case 'eccodes' -0412 if isempty(tline_value) -0413 readList = 'eccodes'; -0414 else -0415 ecGecko(ecGeckoNo,1:2)={pos,tline_value}; -0416 ecGeckoNo=ecGeckoNo+1; -0417 end -0418 case 'enzymes' -0419 readList = 'enzStoich'; -0420 otherwise -0421 switch readList -0422 case 'eccodes' -0423 ecGecko(ecGeckoNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; -0424 ecGeckoNo=ecGeckoNo+1; -0425 case 'enzStoich' -0426 coeff = sscanf(tline_value,'%f'); -0427 enzStoich(enzStoichNo,1:3)={pos,tline_key,coeff}; -0428 enzStoichNo=enzStoichNo+1; -0429 otherwise -0430 error(['Unknown entry in yaml file: ' tline_raw]) -0431 end -0432 end; continue -0433 end -0434 -0435 % import ec enzyme info -0436 if section == 7 -0437 switch tline_key -0438 case 'genes' -0439 pos = pos + 1; -0440 model.ec = readFieldValue(model.ec, 'genes', tline_value, pos); -0441 case 'enzymes' -0442 model.ec = readFieldValue(model.ec, 'enzymes', tline_value, pos); -0443 case 'mw' -0444 model.ec = readFieldValue(model.ec, 'mw', tline_value, pos); -0445 case 'sequence' -0446 model.ec = readFieldValue(model.ec, 'sequence', tline_value, pos); -0447 case 'concs' -0448 model.ec = readFieldValue(model.ec, 'concs', tline_value, pos); -0449 otherwise -0450 error(['Unknown entry in yaml file: ' tline_raw]) -0451 end; continue -0452 end -0453 end -0454 -0455 %Parse annotations -0456 if ~isempty(metMiriams) -0457 locs = cell2mat(metMiriams(:,1)); -0458 for i=unique(locs)' -0459 model.metMiriams{i,1}.name=metMiriams(locs==i,2); -0460 model.metMiriams{i,1}.value=metMiriams(locs==i,3); -0461 end -0462 end -0463 if ~isempty(rxnMiriams) -0464 locs = cell2mat(rxnMiriams(:,1)); -0465 for i=unique(locs)' -0466 model.rxnMiriams{i,1}.name=rxnMiriams(locs==i,2); -0467 model.rxnMiriams{i,1}.value=rxnMiriams(locs==i,3); -0468 end -0469 end -0470 if ~isempty(geneMiriams) -0471 locs = cell2mat(geneMiriams(:,1)); -0472 for i=unique(locs)' -0473 model.geneMiriams{i,1}.name=geneMiriams(locs==i,2); -0474 model.geneMiriams{i,1}.value=geneMiriams(locs==i,3); -0475 end -0476 end -0477 -0478 %Parse subSystems -0479 if ~isempty(subSystems) -0480 locs = cell2mat(subSystems(:,1)); -0481 for i=unique(locs)' -0482 model.subSystems{i,1}=subSystems(locs==i,2); -0483 end -0484 end -0485 -0486 %Parse ec-codes -0487 if ~isempty(eccodes) -0488 locs = cell2mat(eccodes(:,1)); -0489 for i=unique(locs)' -0490 eccodesCat=strjoin(eccodes(locs==i,2),';'); -0491 model.eccodes{i,1}=eccodesCat; -0492 end -0493 emptyEc=cellfun('isempty',model.eccodes); -0494 model.eccodes(emptyEc)={''}; -0495 end -0496 -0497 % follow-up data processing -0498 if verbose -0499 fprintf('\nimporting completed\nfollow-up processing...'); -0500 end -0501 [~, model.metComps] = ismember(model.metComps, model.comps); -0502 [~, model.geneComps] = ismember(model.geneComps, model.comps); -0503 [~, model.rxnComps] = ismember(model.rxnComps, model.comps); -0504 -0505 % Fill S-matrix -0506 rxnIdx = cellfun('isempty', equations(:,1)); -0507 equations(rxnIdx,:) = ''; -0508 rxnIdx = cell2mat(equations(:,1)); -0509 [~,metIdx] = ismember(equations(:,2),model.mets); -0510 coeffs = cell2mat(equations(:,3)); -0511 model.S=sparse(max(metIdx),max(rxnIdx)); -0512 linearIndices = sub2ind([max(metIdx), max(rxnIdx)],metIdx,rxnIdx); -0513 model.S(linearIndices) = coeffs; -0514 -0515 % Convert strings to numeric -0516 model.metCharges = str2double(model.metCharges); -0517 model.lb = str2double(model.lb); -0518 model.ub = str2double(model.ub); -0519 model.rxnConfidenceScores = str2double(model.rxnConfidenceScores); -0520 model.b = zeros(length(model.mets),1); -0521 model.metDeltaG = str2double(model.metDeltaG); -0522 model.rxnDeltaG = str2double(model.rxnDeltaG); -0523 -0524 % Fill some other fields -0525 model.annotation.defaultLB = min(model.lb); -0526 model.annotation.defaultUB = max(model.ub); -0527 if numel(model.lb)<numel(model.rxns) %No LB reported = min -0528 model.lb(end+1:numel(model.rxns)-numel(model.lb),1) = double(model.annotation.defaultLB); -0529 end -0530 if numel(model.ub)<numel(model.rxns) %No UB reported = max -0531 model.ub(end+1:numel(model.rxns)-numel(model.ub),1) = double(model.annotation.defaultUB); +0411 case 'notes' +0412 model.ec = readFieldValue(model.ec, 'notes', tline_value, pos); +0413 readList=''; +0414 case 'eccodes' +0415 if isempty(tline_value) +0416 readList = 'eccodes'; +0417 else +0418 ecGecko(ecGeckoNo,1:2)={pos,tline_value}; +0419 ecGeckoNo=ecGeckoNo+1; +0420 end +0421 case 'enzymes' +0422 readList = 'enzStoich'; +0423 otherwise +0424 switch readList +0425 case 'eccodes' +0426 ecGecko(ecGeckoNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; +0427 ecGeckoNo=ecGeckoNo+1; +0428 case 'enzStoich' +0429 coeff = sscanf(tline_value,'%f'); +0430 enzStoich(enzStoichNo,1:3)={pos,tline_key,coeff}; +0431 enzStoichNo=enzStoichNo+1; +0432 otherwise +0433 error(['Unknown entry in yaml file: ' tline_raw]) +0434 end +0435 end; continue +0436 end +0437 +0438 % import ec enzyme info +0439 if section == 7 +0440 switch tline_key +0441 case 'genes' +0442 pos = pos + 1; +0443 model.ec = readFieldValue(model.ec, 'genes', tline_value, pos); +0444 case 'enzymes' +0445 model.ec = readFieldValue(model.ec, 'enzymes', tline_value, pos); +0446 case 'mw' +0447 model.ec = readFieldValue(model.ec, 'mw', tline_value, pos); +0448 case 'sequence' +0449 model.ec = readFieldValue(model.ec, 'sequence', tline_value, pos); +0450 case 'concs' +0451 model.ec = readFieldValue(model.ec, 'concs', tline_value, pos); +0452 otherwise +0453 error(['Unknown entry in yaml file: ' tline_raw]) +0454 end; continue +0455 end +0456 end +0457 +0458 %Parse annotations +0459 if ~isempty(metMiriams) +0460 locs = cell2mat(metMiriams(:,1)); +0461 for i=unique(locs)' +0462 model.metMiriams{i,1}.name=metMiriams(locs==i,2); +0463 model.metMiriams{i,1}.value=metMiriams(locs==i,3); +0464 end +0465 end +0466 if ~isempty(rxnMiriams) +0467 locs = cell2mat(rxnMiriams(:,1)); +0468 for i=unique(locs)' +0469 model.rxnMiriams{i,1}.name=rxnMiriams(locs==i,2); +0470 model.rxnMiriams{i,1}.value=rxnMiriams(locs==i,3); +0471 end +0472 end +0473 if ~isempty(geneMiriams) +0474 locs = cell2mat(geneMiriams(:,1)); +0475 for i=unique(locs)' +0476 model.geneMiriams{i,1}.name=geneMiriams(locs==i,2); +0477 model.geneMiriams{i,1}.value=geneMiriams(locs==i,3); +0478 end +0479 end +0480 +0481 %Parse subSystems +0482 if ~isempty(subSystems) +0483 locs = cell2mat(subSystems(:,1)); +0484 for i=unique(locs)' +0485 model.subSystems{i,1}=subSystems(locs==i,2); +0486 end +0487 end +0488 +0489 %Parse ec-codes +0490 if ~isempty(eccodes) +0491 locs = cell2mat(eccodes(:,1)); +0492 for i=unique(locs)' +0493 eccodesCat=strjoin(eccodes(locs==i,2),';'); +0494 model.eccodes{i,1}=eccodesCat; +0495 end +0496 emptyEc=cellfun('isempty',model.eccodes); +0497 model.eccodes(emptyEc)={''}; +0498 end +0499 +0500 % follow-up data processing +0501 if verbose +0502 fprintf('\nimporting completed\nfollow-up processing...'); +0503 end +0504 [~, model.metComps] = ismember(model.metComps, model.comps); +0505 [~, model.geneComps] = ismember(model.geneComps, model.comps); +0506 [~, model.rxnComps] = ismember(model.rxnComps, model.comps); +0507 +0508 % Fill S-matrix +0509 rxnIdx = cellfun('isempty', equations(:,1)); +0510 equations(rxnIdx,:) = ''; +0511 rxnIdx = cell2mat(equations(:,1)); +0512 [~,metIdx] = ismember(equations(:,2),model.mets); +0513 coeffs = cell2mat(equations(:,3)); +0514 model.S=sparse(max(metIdx),max(rxnIdx)); +0515 linearIndices = sub2ind([max(metIdx), max(rxnIdx)],metIdx,rxnIdx); +0516 model.S(linearIndices) = coeffs; +0517 +0518 % Convert strings to numeric +0519 model.metCharges = str2double(model.metCharges); +0520 model.lb = str2double(model.lb); +0521 model.ub = str2double(model.ub); +0522 model.rxnConfidenceScores = str2double(model.rxnConfidenceScores); +0523 model.b = zeros(length(model.mets),1); +0524 model.metDeltaG = str2double(model.metDeltaG); +0525 model.rxnDeltaG = str2double(model.rxnDeltaG); +0526 +0527 % Fill some other fields +0528 model.annotation.defaultLB = min(model.lb); +0529 model.annotation.defaultUB = max(model.ub); +0530 if numel(model.lb)<numel(model.rxns) %No LB reported = min +0531 model.lb(end+1:numel(model.rxns)-numel(model.lb),1) = double(model.annotation.defaultLB); 0532 end -0533 if ~all(cellfun('isempty',model.rev)) -0534 model.rev = str2double(model.rev); -0535 else -0536 model.rev = []; -0537 end -0538 if numel(model.rev)<numel(model.rxns) %No rev reported, assume from LB and UB -0539 model.rev(end+1:numel(model.rxns)-numel(model.rev),1) = double(model.lb<0 & model.ub>0); +0533 if numel(model.ub)<numel(model.rxns) %No UB reported = max +0534 model.ub(end+1:numel(model.rxns)-numel(model.ub),1) = double(model.annotation.defaultUB); +0535 end +0536 if ~all(cellfun('isempty',model.rev)) +0537 model.rev = str2double(model.rev); +0538 else +0539 model.rev = []; 0540 end -0541 -0542 % Remove empty fields, otherwise fill to correct length -0543 % Reactions -0544 for i={'rxnNames','grRules','eccodes','rxnNotes','rxnReferences',... -0545 'rxnFrom','subSystems','rxnMiriams'} % Empty strings -0546 model = emptyOrFill(model,i{1},{''},'rxns'); -0547 end -0548 for i={'c'} % Zeros -0549 model = emptyOrFill(model,i{1},0,'rxns',true); +0541 if numel(model.rev)<numel(model.rxns) %No rev reported, assume from LB and UB +0542 model.rev(end+1:numel(model.rxns)-numel(model.rev),1) = double(model.lb<0 & model.ub>0); +0543 end +0544 +0545 % Remove empty fields, otherwise fill to correct length +0546 % Reactions +0547 for i={'rxnNames','grRules','eccodes','rxnNotes','rxnReferences',... +0548 'rxnFrom','subSystems','rxnMiriams'} % Empty strings +0549 model = emptyOrFill(model,i{1},{''},'rxns'); 0550 end -0551 for i={'rxnConfidenceScores','rxnDeltaG'} % NaNs -0552 model = emptyOrFill(model,i{1},NaN,'rxns'); +0551 for i={'c'} % Zeros +0552 model = emptyOrFill(model,i{1},0,'rxns',true); 0553 end -0554 for i={'rxnComps'} % Ones, assume first compartment -0555 model = emptyOrFill(model,i{1},1,'rxns'); +0554 for i={'rxnConfidenceScores','rxnDeltaG'} % NaNs +0555 model = emptyOrFill(model,i{1},NaN,'rxns'); 0556 end -0557 % Metabolites -0558 for i={'metNames','inchis','metFormulas','metMiriams','metFrom','metSmiles','metNotes'} % Empty strings -0559 model = emptyOrFill(model,i{1},{''},'mets'); -0560 end -0561 for i={'metCharges','unconstrained'} % Zeros -0562 model = emptyOrFill(model,i{1},0,'mets'); +0557 for i={'rxnComps'} % Ones, assume first compartment +0558 model = emptyOrFill(model,i{1},1,'rxns'); +0559 end +0560 % Metabolites +0561 for i={'metNames','inchis','metFormulas','metMiriams','metFrom','metSmiles','metNotes'} % Empty strings +0562 model = emptyOrFill(model,i{1},{''},'mets'); 0563 end -0564 for i={'metDeltaG'} % % NaNs -0565 model = emptyOrFill(model,i{1},NaN,'mets'); -0566 end -0567 for i={'metComps'} % Ones, assume first compartment -0568 model = emptyOrFill(model,i{1},1,'mets'); -0569 end -0570 % Genes -0571 for i={'geneMiriams','geneShortNames'} % Empty strings -0572 model = emptyOrFill(model,i{1},{''},'genes'); -0573 end -0574 for i={'geneComps'} % Ones, assume first compartment -0575 model = emptyOrFill(model,i{1},1,'genes'); +0564 for i={'metCharges','unconstrained'} % Zeros +0565 model = emptyOrFill(model,i{1},0,'mets'); +0566 end +0567 for i={'metDeltaG'} % % NaNs +0568 model = emptyOrFill(model,i{1},NaN,'mets'); +0569 end +0570 for i={'metComps'} % Ones, assume first compartment +0571 model = emptyOrFill(model,i{1},1,'mets'); +0572 end +0573 % Genes +0574 for i={'geneMiriams','geneShortNames','proteins'} % Empty strings +0575 model = emptyOrFill(model,i{1},{''},'genes'); 0576 end -0577 % Comps -0578 for i={'compNames'} % Empty strings -0579 model = emptyOrFill(model,i{1},{''},'comps'); -0580 end -0581 for i={'compOutside'} % First comp -0582 model = emptyOrFill(model,i{1},model.comps{1},'comps'); +0577 for i={'geneComps'} % Ones, assume first compartment +0578 model = emptyOrFill(model,i{1},1,'genes'); +0579 end +0580 % Comps +0581 for i={'compNames'} % Empty strings +0582 model = emptyOrFill(model,i{1},{''},'comps'); 0583 end -0584 % Single fields are kept, even if empty -0585 % for i={'description','name','version','date','annotation'} -0586 % if isempty(model.(i{1})) -0587 % model = rmfield(model,i{1}); -0588 % end -0589 % end -0590 -0591 % Make rxnGeneMat fields and map to the existing model.genes field -0592 [genes, rxnGeneMat] = getGenesFromGrRules(model.grRules); -0593 model.rxnGeneMat = sparse(numel(model.rxns),numel(model.genes)); -0594 [~,geneOrder] = ismember(genes,model.genes); -0595 if any(geneOrder == 0) -0596 error(['The grRules includes the following gene(s), that are not in '... -0597 'the list of model genes: ', genes{~geneOrder}]) -0598 end -0599 model.rxnGeneMat(:,geneOrder) = rxnGeneMat; -0600 -0601 % Finalize GECKO model -0602 if isGECKO -0603 % Fill in empty fields and empty entries -0604 for i={'kcat','source','notes','eccodes'} % Even keep empty -0605 model.ec = emptyOrFill(model.ec,i{1},{''},'rxns',true); -0606 end -0607 for i={'enzymes','mw','sequence'} -0608 model.ec = emptyOrFill(model.ec,i{1},{''},'genes',true); +0584 for i={'compOutside'} % First comp +0585 model = emptyOrFill(model,i{1},model.comps{1},'comps'); +0586 end +0587 % Single fields are kept, even if empty +0588 % for i={'description','name','version','date','annotation'} +0589 % if isempty(model.(i{1})) +0590 % model = rmfield(model,i{1}); +0591 % end +0592 % end +0593 +0594 % Make rxnGeneMat fields and map to the existing model.genes field +0595 [genes, rxnGeneMat] = getGenesFromGrRules(model.grRules); +0596 model.rxnGeneMat = sparse(numel(model.rxns),numel(model.genes)); +0597 [~,geneOrder] = ismember(genes,model.genes); +0598 if any(geneOrder == 0) +0599 error(['The grRules includes the following gene(s), that are not in '... +0600 'the list of model genes: ', genes{~geneOrder}]) +0601 end +0602 model.rxnGeneMat(:,geneOrder) = rxnGeneMat; +0603 +0604 % Finalize GECKO model +0605 if isGECKO +0606 % Fill in empty fields and empty entries +0607 for i={'kcat','source','notes','eccodes'} % Even keep empty +0608 model.ec = emptyOrFill(model.ec,i{1},{''},'rxns',true); 0609 end -0610 model.ec = emptyOrFill(model.ec,'concs',{'NaN'},'genes',true); -0611 model.ec = emptyOrFill(model.ec,'kcat',{'0'},'genes',true); -0612 % Change string to double -0613 for i={'kcat','mw','concs'} -0614 if isfield(model.ec,i{1}) -0615 model.ec.(i{1}) = str2double(model.ec.(i{1})); -0616 end -0617 end -0618 % Fill rxnEnzMat -0619 rxnIdx = cellfun('isempty', enzStoich(:,1)); -0620 enzStoich(rxnIdx,:) = ''; -0621 rxnIdx = cell2mat(enzStoich(:,1)); -0622 [~,enzIdx] = ismember(enzStoich(:,2),model.ec.enzymes); -0623 coeffs = cell2mat(enzStoich(:,3)); -0624 model.ec.rxnEnzMat = zeros(max(rxnIdx), max(enzIdx)); -0625 linearIndices = sub2ind([max(rxnIdx), max(enzIdx)], rxnIdx, enzIdx); -0626 model.ec.rxnEnzMat(linearIndices) = coeffs; -0627 %Parse ec-codes -0628 if ~isempty(ecGecko) -0629 locs = cell2mat(ecGecko(:,1)); -0630 for i=unique(locs)' -0631 ecGeckoCat=strjoin(ecGecko(locs==i,2),';'); -0632 model.ec.eccodes{i,1}=ecGeckoCat; -0633 end -0634 emptyEc=cellfun('isempty',model.ec.eccodes); -0635 model.ec.eccodes(emptyEc)={''}; -0636 end -0637 end -0638 -0639 if verbose -0640 fprintf(' Done!\n'); -0641 end -0642 end -0643 -0644 function model = emptyOrFill(model,field,emptyEntry,type,keepEmpty) -0645 if nargin<5 -0646 keepEmpty=false; -0647 end -0648 if isnumeric(emptyEntry) -0649 emptyCells=isempty(model.(field)); -0650 else -0651 emptyCells=cellfun('isempty',model.(field)); -0652 end -0653 if all(emptyCells) && ~keepEmpty -0654 model = rmfield(model, field); -0655 elseif numel(model.(field))<numel(model.(type)) -0656 model.(field)(end+1:numel(model.(type)),1)=emptyEntry; -0657 end -0658 end -0659 -0660 function model = readFieldValue(model, fieldName, value, pos) -0661 if numel(model.(fieldName))<pos-1 -0662 model.(fieldName)(end+1:pos,1) = {''}; -0663 end -0664 model.(fieldName)(pos,1) = {value}; -0665 end -0666 -0667 function [miriams, miriamKey,entryNumber] = gatherAnnotation(pos,miriams,key,value,miriamKey,entryNumber) -0668 if isempty(key) -0669 key=miriamKey; -0670 else -0671 miriamKey=key; -0672 end -0673 if ~isempty(value) -0674 miriams(entryNumber,1:3) = {pos, key, strip(value)}; -0675 else -0676 entryNumber = entryNumber - 1; -0677 end -0678 end

    +0610 for i={'enzymes','mw','sequence'} +0611 model.ec = emptyOrFill(model.ec,i{1},{''},'genes',true); +0612 end +0613 model.ec = emptyOrFill(model.ec,'concs',{'NaN'},'genes',true); +0614 model.ec = emptyOrFill(model.ec,'kcat',{'0'},'genes',true); +0615 % Change string to double +0616 for i={'kcat','mw','concs'} +0617 if isfield(model.ec,i{1}) +0618 model.ec.(i{1}) = str2double(model.ec.(i{1})); +0619 end +0620 end +0621 % Fill rxnEnzMat +0622 rxnIdx = cellfun('isempty', enzStoich(:,1)); +0623 enzStoich(rxnIdx,:) = ''; +0624 rxnIdx = cell2mat(enzStoich(:,1)); +0625 [~,enzIdx] = ismember(enzStoich(:,2),model.ec.enzymes); +0626 coeffs = cell2mat(enzStoich(:,3)); +0627 model.ec.rxnEnzMat = zeros(numel(model.ec.rxns), numel(model.ec.genes)); +0628 linearIndices = sub2ind([max(rxnIdx), max(enzIdx)], rxnIdx, enzIdx); +0629 model.ec.rxnEnzMat(linearIndices) = coeffs; +0630 %Parse ec-codes +0631 if ~isempty(ecGecko) +0632 locs = cell2mat(ecGecko(:,1)); +0633 for i=unique(locs)' +0634 ecGeckoCat=strjoin(ecGecko(locs==i,2),';'); +0635 model.ec.eccodes{i,1}=ecGeckoCat; +0636 end +0637 emptyEc=cellfun('isempty',model.ec.eccodes); +0638 model.ec.eccodes(emptyEc)={''}; +0639 end +0640 end +0641 +0642 if verbose +0643 fprintf(' Done!\n'); +0644 end +0645 end +0646 +0647 function model = emptyOrFill(model,field,emptyEntry,type,keepEmpty) +0648 if nargin<5 +0649 keepEmpty=false; +0650 end +0651 if isnumeric(emptyEntry) +0652 emptyCells=isempty(model.(field)); +0653 else +0654 emptyCells=cellfun('isempty',model.(field)); +0655 end +0656 if all(emptyCells) && ~keepEmpty +0657 model = rmfield(model, field); +0658 elseif numel(model.(field))<numel(model.(type)) +0659 model.(field)(end+1:numel(model.(type)),1)=emptyEntry; +0660 end +0661 end +0662 +0663 function model = readFieldValue(model, fieldName, value, pos) +0664 if numel(model.(fieldName))<pos-1 +0665 model.(fieldName)(end+1:pos,1) = {''}; +0666 end +0667 model.(fieldName)(pos,1) = {value}; +0668 end +0669 +0670 function [miriams, miriamKey,entryNumber] = gatherAnnotation(pos,miriams,key,value,miriamKey,entryNumber) +0671 if isempty(key) +0672 key=miriamKey; +0673 else +0674 miriamKey=key; +0675 end +0676 if ~isempty(value) +0677 miriams(entryNumber,1:3) = {pos, key, strip(value)}; +0678 else +0679 entryNumber = entryNumber - 1; +0680 end +0681 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/io/sortIdentifiers.html b/doc/io/sortIdentifiers.html index 6a20f6fd..71563563 100644 --- a/doc/io/sortIdentifiers.html +++ b/doc/io/sortIdentifiers.html @@ -47,7 +47,7 @@

    CROSS-REFERENCE INFORMATION ^
 </ul>
 This function is called by:
 <ul style= -
  • exportForGit exportForGit
  • exportModel exportModel
  • exportToExcelFormat exportToExcelFormat
  • exportToTabDelimited exportToTabDelimited
  • writeYAMLmodel writeYAMLmodel
  • +
  • exportForGit exportForGit
  • exportModel exportModel
  • exportToExcelFormat exportToExcelFormat
  • exportToTabDelimited exportToTabDelimited
  • writeYAMLmodel writeYAMLmodel
  • diff --git a/doc/io/writeYAMLmodel.html b/doc/io/writeYAMLmodel.html index 66b837d2..77164d47 100644 --- a/doc/io/writeYAMLmodel.html +++ b/doc/io/writeYAMLmodel.html @@ -49,7 +49,7 @@

    CROSS-REFERENCE INFORMATION ^
 <li><a href=sortIdentifiers exportModel This function is called by: +
  • exportForGit exportForGit
  • SUBFUNCTIONS ^

    @@ -172,268 +172,269 @@

    SOURCE CODE ^' - !!omap\n'); 0114 writeField(model, fid, 'genes', 'txt', i, ' - id', preserveQuotes) 0115 writeField(model, fid, 'geneShortNames', 'txt', i, ' - name', preserveQuotes) -0116 writeField(model, fid, 'geneMiriams', 'txt', i, ' - annotation', preserveQuotes) -0117 end -0118 -0119 %Compartments: -0120 fprintf(fid,'- compartments: !!omap\n'); -0121 for i = 1:length(model.comps) -0122 writeField(model, fid, 'compNames', 'txt', i, ['- ' model.comps{i}], preserveQuotes) -0123 writeField(model, fid, 'compMiriams', 'txt', i, '- annotation', preserveQuotes) -0124 end -0125 +0116 writeField(model, fid, 'proteins', 'txt', i, ' - protein', preserveQuotes) +0117 writeField(model, fid, 'geneMiriams', 'txt', i, ' - annotation', preserveQuotes) +0118 end +0119 +0120 %Compartments: +0121 fprintf(fid,'- compartments: !!omap\n'); +0122 for i = 1:length(model.comps) +0123 writeField(model, fid, 'compNames', 'txt', i, ['- ' model.comps{i}], preserveQuotes) +0124 writeField(model, fid, 'compMiriams', 'txt', i, '- annotation', preserveQuotes) +0125 end 0126 -0127 %EC-model: -0128 if isfield(model,'ec') -0129 fprintf(fid,'- ec-rxns:\n'); -0130 for i = 1:length(model.ec.rxns) -0131 fprintf(fid,' - !!omap\n'); -0132 writeField(model.ec, fid, 'rxns', 'txt', i, '- id', preserveQuotes) -0133 writeField(model.ec, fid, 'kcat', 'num', i, '- kcat', preserveQuotes) -0134 writeField(model.ec, fid, 'source', 'txt', i, '- source', preserveQuotes) -0135 writeField(model.ec, fid, 'notes', 'txt', i, '- notes', preserveQuotes) -0136 writeField(model.ec, fid, 'eccodes', 'txt', i, '- eccodes', preserveQuotes) -0137 writeField(model.ec, fid, 'rxnEnzMat', 'txt', i, '- enzymes', preserveQuotes) -0138 end -0139 -0140 fprintf(fid,'- ec-enzymes:\n'); -0141 for i = 1:length(model.ec.genes) -0142 fprintf(fid,' - !!omap\n'); -0143 writeField(model.ec, fid, 'genes', 'txt', i, '- genes', preserveQuotes) -0144 writeField(model.ec, fid, 'enzymes', 'txt', i, '- enzymes', preserveQuotes) -0145 writeField(model.ec, fid, 'mw', 'num', i, '- mw', preserveQuotes) -0146 writeField(model.ec, fid, 'sequence', 'txt', i, '- sequence', preserveQuotes) -0147 writeField(model.ec, fid, 'concs', 'num', i, '- concs', preserveQuotes) -0148 end -0149 end -0150 -0151 %Close file: -0152 fclose(fid); -0153 -0154 end -0155 -0156 function writeField(model,fid,fieldName,type,pos,name,preserveQuotes) -0157 %Writes a new line in the yaml file if the field exists and the field is -0158 %not empty at the correspoinding position. It's recursive for some fields -0159 %(metMiriams, rxnMiriams, and S) -0160 -0161 if isfield(model,fieldName) -0162 if strcmp(fieldName,'metComps') -0163 %metComps: write full name -0164 fieldName = 'comps'; -0165 pos = model.metComps(pos); -0166 end -0167 -0168 field = model.(fieldName); -0169 -0170 if strcmp(fieldName,'metMiriams') -0171 if ~isempty(model.metMiriams{pos}) -0172 fprintf(fid,' %s: !!omap\n',name); -0173 for i=1:size(model.newMetMiriams,2) -0174 %'i' represents the different miriam names, e.g. -0175 %kegg.compound or chebi -0176 if ~isempty(model.newMetMiriams{pos,i}) -0177 %As during the following writeField call the value of -0178 %'i' would be lost, it is temporarily concatenated to -0179 %'name' parameter, which will be edited later -0180 writeField(model, fid, 'newMetMiriams', 'txt', pos, [' - ' model.newMetMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0181 end -0182 end -0183 end -0184 -0185 elseif strcmp(fieldName,'rxnMiriams') -0186 if ~isempty(model.rxnMiriams{pos}) -0187 fprintf(fid,' %s: !!omap\n',name); -0188 for i=1:size(model.newRxnMiriams,2) -0189 if ~isempty(model.newRxnMiriams{pos,i}) -0190 writeField(model, fid, 'newRxnMiriams', 'txt', pos, [' - ' model.newRxnMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0191 end -0192 end -0193 end -0194 -0195 elseif strcmp(fieldName,'geneMiriams') -0196 if ~isempty(model.geneMiriams{pos}) -0197 fprintf(fid,' %s: !!omap\n',name); -0198 for i=1:size(model.newGeneMiriams,2) -0199 if ~isempty(model.newGeneMiriams{pos,i}) -0200 writeField(model, fid, 'newGeneMiriams', 'txt', pos, [' - ' model.newGeneMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0201 end -0202 end -0203 end -0204 -0205 elseif strcmp(fieldName,'compMiriams') -0206 if ~isempty(model.compMiriams{pos}) -0207 fprintf(fid,' %s: !!omap\n',name); -0208 for i=1:size(model.newCompMiriams,2) -0209 if ~isempty(model.newCompMiriams{pos,i}) -0210 writeField(model, fid, 'newCompMiriams', 'txt', pos, [' - ' model.newCompMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0211 end -0212 end -0213 end -0214 -0215 elseif strcmp(fieldName,'S') -0216 %S: create header & write each metabolite in a new line -0217 fprintf(fid,' %s: !!omap\n',name); -0218 if sum(field(:,pos) ~= 0) > 0 -0219 model.mets = model.mets(field(:,pos) ~= 0); -0220 model.coeffs = field(field(:,pos) ~= 0,pos); -0221 %Sort metabolites: -0222 [model.mets,order] = sort(model.mets); -0223 model.coeffs = model.coeffs(order); -0224 for i = 1:length(model.mets) -0225 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.mets{i}], preserveQuotes) -0226 end -0227 end -0228 -0229 elseif strcmp(fieldName,'rxnEnzMat') -0230 %S: create header & write each enzyme in a new line -0231 fprintf(fid,' %s: !!omap\n',name); -0232 if sum(field(pos,:) ~= 0) > 0 -0233 model.enzymes = model.enzymes(field(pos,:) ~= 0); -0234 model.coeffs = field(pos,field(pos,:) ~= 0); -0235 %Sort metabolites: -0236 [model.enzymes,order] = sort(model.enzymes); -0237 model.coeffs = model.coeffs(order); -0238 for i = 1:length(model.enzymes) -0239 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.enzymes{i}], preserveQuotes) -0240 end -0241 end -0242 -0243 elseif sum(strcmp({'subSystems','newMetMiriams','newRxnMiriams','newGeneMiriams','newCompMiriams','eccodes'},fieldName)) > 0 -0244 %eccodes/rxnNotes: if 1 write in 1 line, if more create header and list -0245 if strcmp(fieldName,'subSystems') -0246 list = field{pos}; %subSystems already comes in a cell array -0247 if isempty(list) -0248 return -0249 end -0250 elseif strcmp(fieldName,'newMetMiriams') -0251 index = str2double(regexprep(name,'^.+_','')); -0252 name = regexprep(name,'_\d+$',''); -0253 list = strsplit(model.newMetMiriams{pos,index},'; '); -0254 elseif strcmp(fieldName,'newRxnMiriams') -0255 index = str2double(regexprep(name,'^.+_','')); -0256 name = regexprep(name,'_\d+$',''); -0257 list = strsplit(model.newRxnMiriams{pos,index},'; '); -0258 elseif strcmp(fieldName,'newGeneMiriams') -0259 index = str2double(regexprep(name,'^.+_','')); -0260 name = regexprep(name,'_\d+$',''); -0261 list = strsplit(model.newGeneMiriams{pos,index},'; '); -0262 elseif strcmp(fieldName,'newCompMiriams') -0263 index = str2double(regexprep(name,'^.+_','')); -0264 name = regexprep(name,'_\d+$',''); -0265 list = strsplit(model.newCompMiriams{pos,index},'; '); -0266 elseif ~isempty(field{pos}) -0267 list = strrep(field{pos},' ',''); -0268 list = strsplit(list,';'); -0269 else -0270 return % empty, needs no line in file -0271 end -0272 list=strip(list); -0273 -0274 if length(list) == 1 && ~strcmp(list{1},'') && ~strcmp(fieldName,'subSystems') -0275 if preserveQuotes -0276 list = ['"' list{1} '"']; -0277 end -0278 fprintf(fid,' %s: %s\n',name,list); -0279 elseif ischar(list) && strcmp(fieldName,'subSystems') -0280 if preserveQuotes -0281 list = ['"' list '"']; -0282 end -0283 fprintf(fid,' %s: %s\n',name,list); -0284 elseif length(list) > 1 || strcmp(fieldName,'subSystems') -0285 if preserveQuotes -0286 for j=1:numel(list) -0287 list{j} = ['"' list{j} '"']; -0288 end -0289 end -0290 fprintf(fid,' %s:\n',name); -0291 for i = 1:length(list) -0292 fprintf(fid,'%s - %s\n',regexprep(name,'(^\s*).*','$1'),list{i}); -0293 end -0294 end -0295 -0296 elseif sum(pos) > 0 -0297 %All other fields: -0298 if strcmp(type,'txt') -0299 value = field{pos}; -0300 if preserveQuotes && ~isempty(value) -0301 value = ['"',value,'"']; -0302 end -0303 elseif strcmp(type,'num') -0304 if isnan(field(pos)) -0305 value = []; -0306 else -0307 value = sprintf('%.15g',full(field(pos))); -0308 end -0309 end -0310 if ~isempty(value) -0311 fprintf(fid,' %s: %s\n',name,value); -0312 end -0313 end -0314 end +0127 +0128 %EC-model: +0129 if isfield(model,'ec') +0130 fprintf(fid,'- ec-rxns:\n'); +0131 for i = 1:length(model.ec.rxns) +0132 fprintf(fid,' - !!omap\n'); +0133 writeField(model.ec, fid, 'rxns', 'txt', i, '- id', preserveQuotes) +0134 writeField(model.ec, fid, 'kcat', 'num', i, '- kcat', preserveQuotes) +0135 writeField(model.ec, fid, 'source', 'txt', i, '- source', preserveQuotes) +0136 writeField(model.ec, fid, 'notes', 'txt', i, '- notes', preserveQuotes) +0137 writeField(model.ec, fid, 'eccodes', 'txt', i, '- eccodes', preserveQuotes) +0138 writeField(model.ec, fid, 'rxnEnzMat', 'txt', i, '- enzymes', preserveQuotes) +0139 end +0140 +0141 fprintf(fid,'- ec-enzymes:\n'); +0142 for i = 1:length(model.ec.genes) +0143 fprintf(fid,' - !!omap\n'); +0144 writeField(model.ec, fid, 'genes', 'txt', i, '- genes', preserveQuotes) +0145 writeField(model.ec, fid, 'enzymes', 'txt', i, '- enzymes', preserveQuotes) +0146 writeField(model.ec, fid, 'mw', 'num', i, '- mw', preserveQuotes) +0147 writeField(model.ec, fid, 'sequence', 'txt', i, '- sequence', preserveQuotes) +0148 writeField(model.ec, fid, 'concs', 'num', i, '- concs', preserveQuotes) +0149 end +0150 end +0151 +0152 %Close file: +0153 fclose(fid); +0154 +0155 end +0156 +0157 function writeField(model,fid,fieldName,type,pos,name,preserveQuotes) +0158 %Writes a new line in the yaml file if the field exists and the field is +0159 %not empty at the correspoinding position. It's recursive for some fields +0160 %(metMiriams, rxnMiriams, and S) +0161 +0162 if isfield(model,fieldName) +0163 if strcmp(fieldName,'metComps') +0164 %metComps: write full name +0165 fieldName = 'comps'; +0166 pos = model.metComps(pos); +0167 end +0168 +0169 field = model.(fieldName); +0170 +0171 if strcmp(fieldName,'metMiriams') +0172 if ~isempty(model.metMiriams{pos}) +0173 fprintf(fid,' %s: !!omap\n',name); +0174 for i=1:size(model.newMetMiriams,2) +0175 %'i' represents the different miriam names, e.g. +0176 %kegg.compound or chebi +0177 if ~isempty(model.newMetMiriams{pos,i}) +0178 %As during the following writeField call the value of +0179 %'i' would be lost, it is temporarily concatenated to +0180 %'name' parameter, which will be edited later +0181 writeField(model, fid, 'newMetMiriams', 'txt', pos, [' - ' model.newMetMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0182 end +0183 end +0184 end +0185 +0186 elseif strcmp(fieldName,'rxnMiriams') +0187 if ~isempty(model.rxnMiriams{pos}) +0188 fprintf(fid,' %s: !!omap\n',name); +0189 for i=1:size(model.newRxnMiriams,2) +0190 if ~isempty(model.newRxnMiriams{pos,i}) +0191 writeField(model, fid, 'newRxnMiriams', 'txt', pos, [' - ' model.newRxnMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0192 end +0193 end +0194 end +0195 +0196 elseif strcmp(fieldName,'geneMiriams') +0197 if ~isempty(model.geneMiriams{pos}) +0198 fprintf(fid,' %s: !!omap\n',name); +0199 for i=1:size(model.newGeneMiriams,2) +0200 if ~isempty(model.newGeneMiriams{pos,i}) +0201 writeField(model, fid, 'newGeneMiriams', 'txt', pos, [' - ' model.newGeneMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0202 end +0203 end +0204 end +0205 +0206 elseif strcmp(fieldName,'compMiriams') +0207 if ~isempty(model.compMiriams{pos}) +0208 fprintf(fid,' %s: !!omap\n',name); +0209 for i=1:size(model.newCompMiriams,2) +0210 if ~isempty(model.newCompMiriams{pos,i}) +0211 writeField(model, fid, 'newCompMiriams', 'txt', pos, [' - ' model.newCompMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0212 end +0213 end +0214 end +0215 +0216 elseif strcmp(fieldName,'S') +0217 %S: create header & write each metabolite in a new line +0218 fprintf(fid,' %s: !!omap\n',name); +0219 if sum(field(:,pos) ~= 0) > 0 +0220 model.mets = model.mets(field(:,pos) ~= 0); +0221 model.coeffs = field(field(:,pos) ~= 0,pos); +0222 %Sort metabolites: +0223 [model.mets,order] = sort(model.mets); +0224 model.coeffs = model.coeffs(order); +0225 for i = 1:length(model.mets) +0226 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.mets{i}], preserveQuotes) +0227 end +0228 end +0229 +0230 elseif strcmp(fieldName,'rxnEnzMat') +0231 %S: create header & write each enzyme in a new line +0232 fprintf(fid,' %s: !!omap\n',name); +0233 if sum(field(pos,:) ~= 0) > 0 +0234 model.enzymes = model.enzymes(field(pos,:) ~= 0); +0235 model.coeffs = field(pos,field(pos,:) ~= 0); +0236 %Sort metabolites: +0237 [model.enzymes,order] = sort(model.enzymes); +0238 model.coeffs = model.coeffs(order); +0239 for i = 1:length(model.enzymes) +0240 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.enzymes{i}], preserveQuotes) +0241 end +0242 end +0243 +0244 elseif sum(strcmp({'subSystems','newMetMiriams','newRxnMiriams','newGeneMiriams','newCompMiriams','eccodes'},fieldName)) > 0 +0245 %eccodes/rxnNotes: if 1 write in 1 line, if more create header and list +0246 if strcmp(fieldName,'subSystems') +0247 list = field{pos}; %subSystems already comes in a cell array +0248 if isempty(list) +0249 return +0250 end +0251 elseif strcmp(fieldName,'newMetMiriams') +0252 index = str2double(regexprep(name,'^.+_','')); +0253 name = regexprep(name,'_\d+$',''); +0254 list = strsplit(model.newMetMiriams{pos,index},'; '); +0255 elseif strcmp(fieldName,'newRxnMiriams') +0256 index = str2double(regexprep(name,'^.+_','')); +0257 name = regexprep(name,'_\d+$',''); +0258 list = strsplit(model.newRxnMiriams{pos,index},'; '); +0259 elseif strcmp(fieldName,'newGeneMiriams') +0260 index = str2double(regexprep(name,'^.+_','')); +0261 name = regexprep(name,'_\d+$',''); +0262 list = strsplit(model.newGeneMiriams{pos,index},'; '); +0263 elseif strcmp(fieldName,'newCompMiriams') +0264 index = str2double(regexprep(name,'^.+_','')); +0265 name = regexprep(name,'_\d+$',''); +0266 list = strsplit(model.newCompMiriams{pos,index},'; '); +0267 elseif ~isempty(field{pos}) +0268 list = strrep(field{pos},' ',''); +0269 list = strsplit(list,';'); +0270 else +0271 return % empty, needs no line in file +0272 end +0273 list=strip(list); +0274 +0275 if length(list) == 1 && ~strcmp(list{1},'') && ~strcmp(fieldName,'subSystems') +0276 if preserveQuotes +0277 list = ['"' list{1} '"']; +0278 end +0279 fprintf(fid,' %s: %s\n',name,list); +0280 elseif ischar(list) && strcmp(fieldName,'subSystems') +0281 if preserveQuotes +0282 list = ['"' list '"']; +0283 end +0284 fprintf(fid,' %s: %s\n',name,list); +0285 elseif length(list) > 1 || strcmp(fieldName,'subSystems') +0286 if preserveQuotes +0287 for j=1:numel(list) +0288 list{j} = ['"' list{j} '"']; +0289 end +0290 end +0291 fprintf(fid,' %s:\n',name); +0292 for i = 1:length(list) +0293 fprintf(fid,'%s - %s\n',regexprep(name,'(^\s*).*','$1'),list{i}); +0294 end +0295 end +0296 +0297 elseif sum(pos) > 0 +0298 %All other fields: +0299 if strcmp(type,'txt') +0300 value = field{pos}; +0301 if preserveQuotes && ~isempty(value) +0302 value = ['"',value,'"']; +0303 end +0304 elseif strcmp(type,'num') +0305 if isnan(field(pos)) +0306 value = []; +0307 else +0308 value = sprintf('%.15g',full(field(pos))); +0309 end +0310 end +0311 if ~isempty(value) +0312 fprintf(fid,' %s: %s\n',name,value); +0313 end +0314 end 0315 end -0316 -0317 function writeMetadata(model,fid) -0318 % Writes model metadata to the yaml file. This information will eventually -0319 % be extracted entirely from the model, but for now, many of the entries -0320 % are hard-coded defaults for HumanGEM. -0321 -0322 fprintf(fid, '- metaData:\n'); -0323 if isfield(model,'id') -0324 fprintf(fid, ' id: "%s"\n', model.id); -0325 else -0326 fprintf(fid, ' id: "blankID"\n'); -0327 end -0328 if isfield(model,'name') -0329 fprintf(fid, ' name: "%s"\n',model.name); -0330 else -0331 fprintf(fid, ' name: "blankName"\n'); -0332 end -0333 if isfield(model,'version') -0334 fprintf(fid, ' version: "%s"\n',model.version); -0335 end -0336 fprintf(fid, ' date: "%s"\n',datestr(now,29)); % 29=YYYY-MM-DD -0337 if isfield(model,'annotation') -0338 if isfield(model.annotation,'defaultLB') -0339 fprintf(fid, ' defaultLB: "%g"\n', model.annotation.defaultLB); -0340 end -0341 if isfield(model.annotation,'defaultUB') -0342 fprintf(fid, ' defaultUB: "%g"\n', model.annotation.defaultUB); -0343 end -0344 if isfield(model.annotation,'givenName') -0345 fprintf(fid, ' givenName: "%s"\n', model.annotation.givenName); -0346 end -0347 if isfield(model.annotation,'familyName') -0348 fprintf(fid, ' familyName: "%s"\n', model.annotation.familyName); -0349 end -0350 if isfield(model.annotation,'authors') -0351 fprintf(fid, ' authors: "%s"\n', model.annotation.authors); -0352 end -0353 if isfield(model.annotation,'email') -0354 fprintf(fid, ' email: "%s"\n', model.annotation.email); -0355 end -0356 if isfield(model.annotation,'organization') -0357 fprintf(fid, ' organization: "%s"\n',model.annotation.organization); -0358 end -0359 if isfield(model.annotation,'taxonomy') -0360 fprintf(fid, ' taxonomy: "%s"\n', model.annotation.taxonomy); -0361 end -0362 if isfield(model.annotation,'note') -0363 fprintf(fid, ' note: "%s"\n', model.annotation.note); -0364 end -0365 if isfield(model.annotation,'sourceUrl') -0366 fprintf(fid, ' sourceUrl: "%s"\n', model.annotation.sourceUrl); -0367 end -0368 end -0369 if isfield(model,'ec') -0370 if model.ec.geckoLight -0371 geckoLight = 'true'; -0372 else -0373 geckoLight = 'false'; -0374 end -0375 fprintf(fid,' geckoLight: "%s"\n',geckoLight); -0376 end -0377 end +0316 end +0317 +0318 function writeMetadata(model,fid) +0319 % Writes model metadata to the yaml file. This information will eventually +0320 % be extracted entirely from the model, but for now, many of the entries +0321 % are hard-coded defaults for HumanGEM. +0322 +0323 fprintf(fid, '- metaData:\n'); +0324 if isfield(model,'id') +0325 fprintf(fid, ' id: "%s"\n', model.id); +0326 else +0327 fprintf(fid, ' id: "blankID"\n'); +0328 end +0329 if isfield(model,'name') +0330 fprintf(fid, ' name: "%s"\n',model.name); +0331 else +0332 fprintf(fid, ' name: "blankName"\n'); +0333 end +0334 if isfield(model,'version') +0335 fprintf(fid, ' version: "%s"\n',model.version); +0336 end +0337 fprintf(fid, ' date: "%s"\n',datestr(now,29)); % 29=YYYY-MM-DD +0338 if isfield(model,'annotation') +0339 if isfield(model.annotation,'defaultLB') +0340 fprintf(fid, ' defaultLB: "%g"\n', model.annotation.defaultLB); +0341 end +0342 if isfield(model.annotation,'defaultUB') +0343 fprintf(fid, ' defaultUB: "%g"\n', model.annotation.defaultUB); +0344 end +0345 if isfield(model.annotation,'givenName') +0346 fprintf(fid, ' givenName: "%s"\n', model.annotation.givenName); +0347 end +0348 if isfield(model.annotation,'familyName') +0349 fprintf(fid, ' familyName: "%s"\n', model.annotation.familyName); +0350 end +0351 if isfield(model.annotation,'authors') +0352 fprintf(fid, ' authors: "%s"\n', model.annotation.authors); +0353 end +0354 if isfield(model.annotation,'email') +0355 fprintf(fid, ' email: "%s"\n', model.annotation.email); +0356 end +0357 if isfield(model.annotation,'organization') +0358 fprintf(fid, ' organization: "%s"\n',model.annotation.organization); +0359 end +0360 if isfield(model.annotation,'taxonomy') +0361 fprintf(fid, ' taxonomy: "%s"\n', model.annotation.taxonomy); +0362 end +0363 if isfield(model.annotation,'note') +0364 fprintf(fid, ' note: "%s"\n', model.annotation.note); +0365 end +0366 if isfield(model.annotation,'sourceUrl') +0367 fprintf(fid, ' sourceUrl: "%s"\n', model.annotation.sourceUrl); +0368 end +0369 end +0370 if isfield(model,'ec') +0371 if model.ec.geckoLight +0372 geckoLight = 'true'; +0373 else +0374 geckoLight = 'false'; +0375 end +0376 fprintf(fid,' geckoLight: "%s"\n',geckoLight); +0377 end +0378 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/struct_conversion/addIdentifierPrefix.html b/doc/struct_conversion/addIdentifierPrefix.html new file mode 100644 index 00000000..de10d34d --- /dev/null +++ b/doc/struct_conversion/addIdentifierPrefix.html @@ -0,0 +1,128 @@ + + + + Description of addIdentifierPrefix + + + + + + + + + +
    Home > struct_conversion > addIdentifierPrefix.m
    + + + +

    addIdentifierPrefix +

    + +

    PURPOSE ^

    +
    addIdentifierPrefix
    + +

    SYNOPSIS ^

    +
    function [model, hasChanged]=addIdentifierPrefix(model,fields)
    + +

    DESCRIPTION ^

    +
     addIdentifierPrefix
    +   If reaction, metabolite, compartment, gene or model identifiers do not
    +   start with a letter or _, which conflicts with SBML specifications,
    +   prefixes are added for all identifiers in the respective model field.
    +   The prefixes are:
    +       "R_" for model.rxns,
    +       "M_" for model.mets,
    +       "C_" for model.comps;
    +       "G_" for model.genes (and also represented in model.grRules)
    +
    + Input:
    +   model           model whose identifiers should be modified
    +   fields          cell array with model field names that should be
    +                   checked if prefixes should be added, possible values: 
    +                   'rxns', 'mets', 'comps', 'genes', 'id'. (optional, by
    +                   default all listed model fields will be checked).
    +
    + Output:
    +   model           modified model
    +   hasChanged      cell array with fields and prefixes that are added
    +
    + Usage: [model, hasChanged]=addIdentifierPrefix(model,fields)
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + + + +

    SOURCE CODE ^

    +
    0001 function [model, hasChanged]=addIdentifierPrefix(model,fields)
    +0002 % addIdentifierPrefix
    +0003 %   If reaction, metabolite, compartment, gene or model identifiers do not
    +0004 %   start with a letter or _, which conflicts with SBML specifications,
    +0005 %   prefixes are added for all identifiers in the respective model field.
    +0006 %   The prefixes are:
    +0007 %       "R_" for model.rxns,
    +0008 %       "M_" for model.mets,
    +0009 %       "C_" for model.comps;
    +0010 %       "G_" for model.genes (and also represented in model.grRules)
    +0011 %
    +0012 % Input:
    +0013 %   model           model whose identifiers should be modified
    +0014 %   fields          cell array with model field names that should be
    +0015 %                   checked if prefixes should be added, possible values:
    +0016 %                   'rxns', 'mets', 'comps', 'genes', 'id'. (optional, by
    +0017 %                   default all listed model fields will be checked).
    +0018 %
    +0019 % Output:
    +0020 %   model           modified model
    +0021 %   hasChanged      cell array with fields and prefixes that are added
    +0022 %
    +0023 % Usage: [model, hasChanged]=addIdentifierPrefix(model,fields)
    +0024 
    +0025 if nargin<2 || isempty(fields)
    +0026     fields = {'rxns','mets','comps','genes','id'};
    +0027 end
    +0028 
    +0029 modelFields = {'rxns','R_';
    +0030                'mets','M_';
    +0031                'comps','C_';
    +0032                'genes','G_';
    +0033                'id','M_'};
    +0034 
    +0035 toChangeIdx = find(ismember(modelFields(:,1),fields));
    +0036 hasChanged  = false(numel(modelFields(:,1)),1);
    +0037 for i=1:numel(toChangeIdx)
    +0038     currName    = modelFields{toChangeIdx(i),1};
    +0039     currPrefix  = modelFields{toChangeIdx(i),2};
    +0040     if isfield(model,currName)
    +0041         currField   = model.(currName);
    +0042     else
    +0043         continue;
    +0044     end
    +0045     if ~all(startsWith(currField,regexpPattern('^[a-zA-Z_]')))
    +0046         currField = strcat(currPrefix, currField);
    +0047         hasChanged(toChangeIdx(i)) = true;
    +0048 
    +0049         if strcmp(currName,'genes')
    +0050                 model.grRules = regexprep(model.grRules, '(\<[0-9_a-zA-Z])', 'G_$1');
    +0051                 model.grRules = regexprep(model.grRules, ' G_or ', ' or ');
    +0052                 model.grRules = regexprep(model.grRules, ' G_and ', ' and ');
    +0053         end
    +0054         model.(currName) = currField;
    +0055     end
    +0056 end
    +0057 
    +0058 hasChanged = modelFields(hasChanged,:);
    +0059 hasChanged = append('model.', hasChanged(:,1), ' (', hasChanged(:,2), ' prefix)');
    +0060 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/struct_conversion/index.html b/doc/struct_conversion/index.html index e8c1ba9c..aa35ccd6 100644 --- a/doc/struct_conversion/index.html +++ b/doc/struct_conversion/index.html @@ -19,7 +19,7 @@

    Index for struct_conversion

    Matlab files in this directory:

    -
     editMiriameditMiriam
     extractMiriamextractMiriam
     ravenCobraWrapperravenCobraWrapper
     standardizeModelFieldOrderstandardizeModelFieldOrder
    + addIdentifierPrefixaddIdentifierPrefix  editMiriameditMiriam  extractMiriamextractMiriam  ravenCobraWrapperravenCobraWrapper  removeIdentifierPrefixremoveIdentifierPrefix  standardizeModelFieldOrderstandardizeModelFieldOrder diff --git a/doc/struct_conversion/ravenCobraWrapper.html b/doc/struct_conversion/ravenCobraWrapper.html index 07137fa5..6bb7c386 100644 --- a/doc/struct_conversion/ravenCobraWrapper.html +++ b/doc/struct_conversion/ravenCobraWrapper.html @@ -49,8 +49,7 @@

    DESCRIPTION ^SOURCE CODE ^% reconstructed based on lower bound reaction values 0022 % 0023 % NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following -0024 % fields are lost: geneEntrezID, metSmiles, modelVersion, -0025 % proteinNames, proteins -0026 % -0027 % NOTE: The information about mandatory RAVEN fields was taken from -0028 % checkModelStruct function, whereas the corresponding information about -0029 % COBRA fields was fetched from verifyModel function -0030 % -0031 % Usage: newModel=ravenCobraWrapper(model) -0032 -0033 if isfield(model,'rules') -0034 isRaven=false; -0035 else -0036 isRaven=true; -0037 end -0038 -0039 ravenPath=findRAVENroot(); -0040 -0041 % Load COBRA field information -0042 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv -0043 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1); -0044 dbFields = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams -0045 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'}); -0046 COBRAnamespace = fieldFile{5}(dbFields); -0047 COBRAnamespace = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace -0048 COBRAfields = fieldFile{1}(dbFields); -0049 fclose(fid); -0050 -0051 % Load conversion between additional COBRA fields and namespaces: -0052 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv')); -0053 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0); -0054 COBRAfields = [COBRAfields; fieldFile{1}]; -0055 COBRAnamespace = [COBRAnamespace; fieldFile{2}]; -0056 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn')); -0057 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn')); -0058 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met')); -0059 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met')); -0060 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene')); -0061 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene')); -0062 fclose(fid); -0063 -0064 if isRaven -0065 %Firstly remove boundary metabolites -0066 model=simplifyModel(model); -0067 end -0068 -0069 % Keep fields that have identical names and content -0070 newModel.S=model.S; -0071 newModel.lb=model.lb; -0072 newModel.ub=model.ub; -0073 if isfield(model,'c') -0074 newModel.c=model.c; -0075 else -0076 newModel.c=zeros(numel(model.rxns),1); -0077 end -0078 newModel.rxns=model.rxns; -0079 optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',... -0080 'metFormulas','comps','compNames','metCharges','genes',... -0081 'rxnConfidenceScores','rxnGeneMat','metNotes','rev'}; -0082 for i=1:length(optFields) -0083 if isfield(model,optFields{i}) -0084 newModel.(optFields{i})=model.(optFields{i}); -0085 end -0086 end -0087 -0088 % Convert unique fields -0089 if isRaven -0090 fprintf('Converting RAVEN structure to COBRA..\n'); -0091 %Convert from RAVEN to COBRA structure -0092 -0093 %Mandatory COBRA fields -0094 newModel.rxns=model.rxns; -0095 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$'))) -0096 newModel.mets=model.mets; -0097 else -0098 %Check if model has compartment info as "met_c" suffix in all metabolites: -0099 BiGGformat = false(size(model.mets)); -0100 for i=1:numel(model.comps) -0101 compPos=model.metComps==i; -0102 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$'])); -0103 end -0104 if all(BiGGformat) -0105 newModel.mets=model.mets; -0106 for i=1:numel(model.comps) -0107 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']); -0108 end -0109 else -0110 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']'); -0111 end -0112 end -0113 -0114 %b, csense, osenseStr, genes, rules are also mandatory, but defined -0115 %later to match the order of fields -0116 -0117 %Optional COBRA fields -0118 if isfield(model,'id') -0119 newModel.modelID=model.id; -0120 end -0121 if isfield(model,'name') -0122 newModel.modelName=model.name; -0123 end -0124 if isfield(model,'eccodes') -0125 newModel.rxnECNumbers=model.eccodes; -0126 end -0127 if isfield(model,'rxnMiriams') -0128 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams); -0129 for i = 1:length(rxnCOBRAfields) -0130 j=ismember(extractedMiriamNames,rxnNamespaces{i}); -0131 if any(j) -0132 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);']) -0133 end -0134 end -0135 end -0136 if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams -0137 if isfield(newModel,'rxnReferences') -0138 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences); -0139 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $',''); -0140 else -0141 newModel.rxnReferences = model.rxnReferences; -0142 end -0143 end -0144 if isfield(model,'metNames') -0145 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']'); -0146 end -0147 if isfield(model,'metMiriams') -0148 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams); -0149 %Shorten miriam names for KEGG and PubChem. These shorter names -0150 %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem -0151 %Compound/Substance, into corresponding COBRA model fields -0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg'); -0153 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem'); -0154 i=ismember(extractedMiriamNames,'kegg'); -0155 if any(i) % Combine KEGG compounds and glycans -0156 for j=1:length(i) -0157 if i(j) && isfield(newModel,'metKEGGID')~=1 -0158 newModel.metKEGGID=miriams(:,j); -0159 elseif i(j) -0160 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j)); -0161 end -0162 end -0163 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$',''); -0164 end -0165 i=ismember(extractedMiriamNames,'pubchem'); -0166 if any(i) % Combine Pubchem compounds and substances -0167 for j=1:length(i) -0168 if i(j) && isfield(newModel,'metPubChemID')~=1 -0169 newModel.metPubChemID=miriams(:,j); -0170 elseif i(j) -0171 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j)); -0172 end -0173 end -0174 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$',''); -0175 end -0176 %All other Miriams can be directly parsed with no modifications: -0177 for i = 1:length(metCOBRAfields) -0178 j=ismember(extractedMiriamNames,metNamespaces{i}); -0179 if any(j) -0180 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);']) -0181 end -0182 end -0183 end -0184 if isfield(model,'inchis') -0185 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$',''); -0186 end -0187 newModel.b=zeros(numel(model.mets),1); -0188 newModel.csense=repmat('E',size(model.mets)); -0189 if isfield(model,'geneMiriams') -0190 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams); -0191 for i = 1:length(geneCOBRAfields) -0192 j=ismember(extractedMiriamNames,geneNamespaces{i}); -0193 if any(j) -0194 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);']) -0195 end -0196 end -0197 end -0198 if isfield(model,'geneShortNames') -0199 newModel.geneNames=model.geneShortNames; -0200 end -0201 if isfield(model,'genes') -0202 newModel.rules=grrulesToRules(model); -0203 else -0204 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n'); -0205 end -0206 newModel.osenseStr='max'; -0207 else -0208 fprintf('Converting COBRA structure to RAVEN..\n'); -0209 %Convert from COBRA to RAVEN structure -0210 -0211 %Mandatory RAVEN fields -0212 newModel.mets=model.mets; -0213 if ~isfield(model,'comps') -0214 %Since 'comps' field is not mandatory in COBRA, it may be required -0215 %to obtain the non-redundant list of comps from metabolite ids, if -0216 %'comps' field is not available -0217 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1')); -0218 newModel.compNames = newModel.comps; -0219 end -0220 for i=1:numel(newModel.comps) -0221 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],''); -0222 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],''); -0223 end -0224 -0225 %In some cases (e.g. any model that uses BiGG ids as main ids), there -0226 %may be overlapping mets due to removal of compartment info. To avoid -0227 %this, we change compartments from e.g. [c] into _c -0228 if numel(unique(newModel.mets))~=numel(model.mets) -0229 newModel.mets=model.mets; -0230 for i=1:numel(newModel.comps) -0231 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]); -0232 end -0233 end -0234 %Since COBRA no longer contains rev field it is assumed that rxn is -0235 %reversible if its lower bound is set below zero -0236 if ~isfield(model,'rev') -0237 for i=1:numel(model.rxns) -0238 if model.lb(i)<0 -0239 newModel.rev(i,1)=1; -0240 else -0241 newModel.rev(i,1)=0; -0242 end -0243 end -0244 end -0245 newModel.b=zeros(numel(model.mets),1); -0246 -0247 %metComps is also mandatory, but defined later to match the order of -0248 %fields -0249 -0250 %Fields 'name' and 'id' are also considered as mandatory, but -0251 %these are added to the model during exportModel/exportToExcelFormat -0252 %anyway, so there is no point to add this information here -0253 -0254 %Optional RAVEN fields -0255 if isfield(model,'modelID') -0256 newModel.id=model.modelID; -0257 end -0258 if isfield(model,'modelName') -0259 newModel.name=model.modelName; -0260 end -0261 if isfield(model,'rules') && ~isfield(model,'grRules') -0262 model.grRules = rulesTogrrules(model); -0263 end -0264 if isfield(model,'grRules') -0265 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0266 newModel.grRules = grRules; -0267 newModel.rxnGeneMat = rxnGeneMat; -0268 end -0269 if isfield(model,'rxnECNumbers') -0270 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:',''); -0271 end -0272 if any(isfield(model,rxnCOBRAfields)) -0273 for i=1:numel(model.rxns) -0274 counter=1; -0275 newModel.rxnMiriams{i,1}=[]; -0276 if isfield(model,'rxnReferences') -0277 if ~isempty(model.rxnReferences{i}) -0278 pmids = model.rxnReferences{i}; -0279 pmids = strsplit(pmids,'; '); -0280 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once')); -0281 if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead -0282 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', '); -0283 pmids(nonPmids)=[]; -0284 end -0285 for j = 1:length(pmids) -0286 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed'; -0287 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j}; -0288 counter=counter+1; -0289 end -0290 end -0291 end -0292 for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences -0293 if isfield(model,rxnCOBRAfields{j}) -0294 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']); -0295 if ~isempty(rxnAnnotation) -0296 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';')); -0297 for a=1:length(rxnAnnotation) -0298 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j}; -0299 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a}; -0300 counter=counter+1; -0301 end -0302 end -0303 end -0304 end -0305 end -0306 end -0307 if isfield(newModel,'rxnReferences') -0308 emptyEntry = cellfun(@isempty,newModel.rxnReferences); -0309 newModel.rxnReferences(emptyEntry)={''}; -0310 diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences); -0311 if diffNumel > 0 -0312 newModel.rxnReferences(end+1:end+diffNumel) = {''}; -0313 end -0314 end -0315 if any(isfield(model,geneCOBRAfields)) -0316 for i=1:numel(model.genes) -0317 counter=1; -0318 newModel.geneMiriams{i,1}=[]; -0319 for j = 1:length(geneCOBRAfields) -0320 if isfield(model,geneCOBRAfields{j}) -0321 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']); -0322 if ~isempty(geneAnnotation) -0323 geneAnnotation = strtrim(strsplit(geneAnnotation,';')); -0324 for a=1:length(geneAnnotation) -0325 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j}; -0326 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a}; -0327 counter=counter+1; -0328 end -0329 end -0330 end -0331 end -0332 end -0333 end -0334 if isfield(model,'geneNames') -0335 newModel.geneShortNames=model.geneNames; -0336 end -0337 newModel.metNames=model.metNames; -0338 for i=1:numel(newModel.comps) -0339 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],''); -0340 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],''); -0341 end -0342 newModel.metNames=deblank(newModel.metNames); -0343 newModel.metComps=regexprep(model.mets,'^.+\[',''); -0344 newModel.metComps=regexprep(newModel.metComps,'\]$',''); -0345 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps); -0346 if isfield(model,'metInChIString') -0347 newModel.inchis=regexprep(model.metInChIString,'^InChI=',''); -0348 end -0349 printWarning=false; -0350 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID'])) -0351 for i=1:numel(model.mets) -0352 counter=1; -0353 newModel.metMiriams{i,1}=[]; -0354 if isfield(model,'metKEGGID') -0355 if ~isempty(model.metKEGGID{i}) -0356 if strcmp(model.metKEGGID{i}(1),'C') -0357 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound'; -0358 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; -0359 counter=counter+1; -0360 elseif strcmp(model.metKEGGID{i}(1),'G') -0361 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan'; -0362 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; -0363 counter=counter+1; -0364 end -0365 end -0366 end -0367 if isfield(model,'metPubChemID') -0368 if ~isempty(model.metPubChemID{i}) -0369 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:') -0370 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; -0371 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; -0372 counter=counter+1; -0373 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:') -0374 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance'; -0375 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; -0376 counter=counter+1; -0377 else -0378 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; -0379 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; -0380 counter=counter+1; -0381 printWarning=true; -0382 end -0383 end -0384 end -0385 for j = 1:length(metCOBRAfields) -0386 if isfield(model,metCOBRAfields{j}) -0387 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']); -0388 if ~isempty(metAnnotation) -0389 metAnnotation = strtrim(strsplit(metAnnotation,';')); -0390 for a=1:length(metAnnotation) -0391 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j}; -0392 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a}; -0393 counter=counter+1; -0394 end -0395 end -0396 end -0397 end -0398 end -0399 end -0400 if printWarning -0401 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n'); -0402 end -0403 end -0404 -0405 % Order fields -0406 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models -0407 end -0408 -0409 function rules=grrulesToRules(model) -0410 %This function just takes grRules, changes all gene names to -0411 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding -0412 %symbols -0413 replacingGenes=cell([size(model.genes,1) 1]); -0414 for i=1:numel(replacingGenes) -0415 replacingGenes{i}=strcat('x(',num2str(i),')'); -0416 end -0417 rules = strcat({' '},model.grRules,{' '}); -0418 for i=1:length(model.genes) -0419 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']); -0420 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']); -0421 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']); -0422 end -0423 rules=regexprep(rules,' and ',' & '); -0424 rules=regexprep(rules,' or ',' | '); -0425 rules=strtrim(rules); -0426 end -0427 -0428 function grRules=rulesTogrrules(model) -0429 %This function takes rules, replaces &/| for and/or, replaces the x(i) -0430 %format with the actual gene ID, and takes out extra whitespace and -0431 %redundant parenthesis introduced by COBRA, to create grRules. -0432 grRules = strrep(model.rules,'&','and'); -0433 grRules = strrep(grRules,'|','or'); -0434 for i = 1:length(model.genes) -0435 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i}); -0436 end -0437 grRules = strrep(grRules,'( ','('); -0438 grRules = strrep(grRules,' )',')'); -0439 grRules = regexprep(grRules,'^(',''); %rules that start with a "(" -0440 grRules = regexprep(grRules,')$',''); %rules that end with a ")" -0441 end +0024 % fields are lost: geneEntrezID, modelVersion, proteins +0025 % +0026 % NOTE: The information about mandatory RAVEN fields was taken from +0027 % checkModelStruct function, whereas the corresponding information about +0028 % COBRA fields was fetched from verifyModel function +0029 % +0030 % Usage: newModel=ravenCobraWrapper(model) +0031 +0032 if isfield(model,'rules') +0033 isRaven=false; +0034 else +0035 isRaven=true; +0036 end +0037 +0038 ravenPath=findRAVENroot(); +0039 +0040 % Load COBRA field information +0041 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv +0042 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1); +0043 dbFields = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams +0044 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'}); +0045 COBRAnamespace = fieldFile{5}(dbFields); +0046 COBRAnamespace = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace +0047 COBRAfields = fieldFile{1}(dbFields); +0048 fclose(fid); +0049 +0050 % Load conversion between additional COBRA fields and namespaces: +0051 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv')); +0052 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0); +0053 COBRAfields = [COBRAfields; fieldFile{1}]; +0054 COBRAnamespace = [COBRAnamespace; fieldFile{2}]; +0055 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn')); +0056 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn')); +0057 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met')); +0058 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met')); +0059 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene')); +0060 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene')); +0061 fclose(fid); +0062 +0063 if isRaven +0064 %Firstly remove boundary metabolites +0065 model=simplifyModel(model); +0066 end +0067 +0068 % Keep fields that have identical names and content +0069 newModel.S=model.S; +0070 newModel.lb=model.lb; +0071 newModel.ub=model.ub; +0072 if isfield(model,'c') +0073 newModel.c=model.c; +0074 else +0075 newModel.c=zeros(numel(model.rxns),1); +0076 end +0077 newModel.rxns=model.rxns; +0078 optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',... +0079 'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',... +0080 'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteins'}; +0081 for i=1:length(optFields) +0082 if isfield(model,optFields{i}) +0083 newModel.(optFields{i})=model.(optFields{i}); +0084 end +0085 end +0086 +0087 % Convert unique fields +0088 if isRaven +0089 fprintf('Converting RAVEN structure to COBRA..\n'); +0090 %Convert from RAVEN to COBRA structure +0091 +0092 %Mandatory COBRA fields +0093 newModel.rxns=model.rxns; +0094 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$'))) +0095 newModel.mets=model.mets; +0096 else +0097 %Check if model has compartment info as "met_c" suffix in all metabolites: +0098 BiGGformat = false(size(model.mets)); +0099 for i=1:numel(model.comps) +0100 compPos=model.metComps==i; +0101 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$'])); +0102 end +0103 if all(BiGGformat) +0104 newModel.mets=model.mets; +0105 for i=1:numel(model.comps) +0106 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']); +0107 end +0108 else +0109 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']'); +0110 end +0111 end +0112 +0113 %b, csense, osenseStr, genes, rules are also mandatory, but defined +0114 %later to match the order of fields +0115 +0116 %Optional COBRA fields +0117 if isfield(model,'id') +0118 newModel.modelID=model.id; +0119 end +0120 if isfield(model,'name') +0121 newModel.modelName=model.name; +0122 end +0123 if isfield(model,'eccodes') +0124 newModel.rxnECNumbers=model.eccodes; +0125 end +0126 if isfield(model,'rxnMiriams') +0127 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams); +0128 for i = 1:length(rxnCOBRAfields) +0129 j=ismember(extractedMiriamNames,rxnNamespaces{i}); +0130 if any(j) +0131 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);']) +0132 end +0133 end +0134 end +0135 if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams +0136 if isfield(newModel,'rxnReferences') +0137 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences); +0138 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $',''); +0139 else +0140 newModel.rxnReferences = model.rxnReferences; +0141 end +0142 end +0143 if isfield(model,'metNames') +0144 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']'); +0145 end +0146 if isfield(model,'metMiriams') +0147 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams); +0148 %Shorten miriam names for KEGG and PubChem. These shorter names +0149 %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem +0150 %Compound/Substance, into corresponding COBRA model fields +0151 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg'); +0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem'); +0153 i=ismember(extractedMiriamNames,'kegg'); +0154 if any(i) % Combine KEGG compounds and glycans +0155 for j=1:length(i) +0156 if i(j) && isfield(newModel,'metKEGGID')~=1 +0157 newModel.metKEGGID=miriams(:,j); +0158 elseif i(j) +0159 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j)); +0160 end +0161 end +0162 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$',''); +0163 end +0164 i=ismember(extractedMiriamNames,'pubchem'); +0165 if any(i) % Combine Pubchem compounds and substances +0166 for j=1:length(i) +0167 if i(j) && isfield(newModel,'metPubChemID')~=1 +0168 newModel.metPubChemID=miriams(:,j); +0169 elseif i(j) +0170 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j)); +0171 end +0172 end +0173 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$',''); +0174 end +0175 %All other Miriams can be directly parsed with no modifications: +0176 for i = 1:length(metCOBRAfields) +0177 j=ismember(extractedMiriamNames,metNamespaces{i}); +0178 if any(j) +0179 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);']) +0180 end +0181 end +0182 end +0183 if isfield(model,'inchis') +0184 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$',''); +0185 end +0186 newModel.b=zeros(numel(model.mets),1); +0187 newModel.csense=repmat('E',size(model.mets)); +0188 if isfield(model,'geneMiriams') +0189 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams); +0190 for i = 1:length(geneCOBRAfields) +0191 j=ismember(extractedMiriamNames,geneNamespaces{i}); +0192 if any(j) +0193 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);']) +0194 end +0195 end +0196 end +0197 if isfield(model,'geneShortNames') +0198 newModel.geneNames=model.geneShortNames; +0199 end +0200 if isfield(model,'genes') +0201 newModel.rules=grrulesToRules(model); +0202 else +0203 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n'); +0204 end +0205 newModel.osenseStr='max'; +0206 else +0207 fprintf('Converting COBRA structure to RAVEN..\n'); +0208 %Convert from COBRA to RAVEN structure +0209 +0210 %Mandatory RAVEN fields +0211 newModel.mets=model.mets; +0212 if ~isfield(model,'comps') +0213 %Since 'comps' field is not mandatory in COBRA, it may be required +0214 %to obtain the non-redundant list of comps from metabolite ids, if +0215 %'comps' field is not available +0216 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1')); +0217 newModel.compNames = newModel.comps; +0218 end +0219 for i=1:numel(newModel.comps) +0220 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],''); +0221 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],''); +0222 end +0223 +0224 %In some cases (e.g. any model that uses BiGG ids as main ids), there +0225 %may be overlapping mets due to removal of compartment info. To avoid +0226 %this, we change compartments from e.g. [c] into _c +0227 if numel(unique(newModel.mets))~=numel(model.mets) +0228 newModel.mets=model.mets; +0229 for i=1:numel(newModel.comps) +0230 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]); +0231 end +0232 end +0233 %Since COBRA no longer contains rev field it is assumed that rxn is +0234 %reversible if its lower bound is set below zero +0235 if ~isfield(model,'rev') +0236 for i=1:numel(model.rxns) +0237 if model.lb(i)<0 +0238 newModel.rev(i,1)=1; +0239 else +0240 newModel.rev(i,1)=0; +0241 end +0242 end +0243 end +0244 newModel.b=zeros(numel(model.mets),1); +0245 +0246 %metComps is also mandatory, but defined later to match the order of +0247 %fields +0248 +0249 %Fields 'name' and 'id' are also considered as mandatory, but +0250 %these are added to the model during exportModel/exportToExcelFormat +0251 %anyway, so there is no point to add this information here +0252 +0253 %Optional RAVEN fields +0254 if isfield(model,'modelID') +0255 newModel.id=model.modelID; +0256 end +0257 if isfield(model,'modelName') +0258 newModel.name=model.modelName; +0259 end +0260 if isfield(model,'rules') && ~isfield(model,'grRules') +0261 model.grRules = rulesTogrrules(model); +0262 end +0263 if isfield(model,'grRules') +0264 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0265 newModel.grRules = grRules; +0266 newModel.rxnGeneMat = rxnGeneMat; +0267 end +0268 if isfield(model,'rxnECNumbers') +0269 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:',''); +0270 end +0271 if any(isfield(model,rxnCOBRAfields)) +0272 for i=1:numel(model.rxns) +0273 counter=1; +0274 newModel.rxnMiriams{i,1}=[]; +0275 if isfield(model,'rxnReferences') +0276 if ~isempty(model.rxnReferences{i}) +0277 pmids = model.rxnReferences{i}; +0278 pmids = strsplit(pmids,'; '); +0279 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once')); +0280 if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead +0281 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', '); +0282 pmids(nonPmids)=[]; +0283 end +0284 for j = 1:length(pmids) +0285 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed'; +0286 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j}; +0287 counter=counter+1; +0288 end +0289 end +0290 end +0291 for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences +0292 if isfield(model,rxnCOBRAfields{j}) +0293 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']); +0294 if ~isempty(rxnAnnotation) +0295 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';')); +0296 for a=1:length(rxnAnnotation) +0297 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j}; +0298 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a}; +0299 counter=counter+1; +0300 end +0301 end +0302 end +0303 end +0304 end +0305 end +0306 if isfield(newModel,'rxnReferences') +0307 emptyEntry = cellfun(@isempty,newModel.rxnReferences); +0308 newModel.rxnReferences(emptyEntry)={''}; +0309 diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences); +0310 if diffNumel > 0 +0311 newModel.rxnReferences(end+1:end+diffNumel) = {''}; +0312 end +0313 end +0314 if any(isfield(model,geneCOBRAfields)) +0315 for i=1:numel(model.genes) +0316 counter=1; +0317 newModel.geneMiriams{i,1}=[]; +0318 for j = 1:length(geneCOBRAfields) +0319 if isfield(model,geneCOBRAfields{j}) +0320 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']); +0321 if ~isempty(geneAnnotation) +0322 geneAnnotation = strtrim(strsplit(geneAnnotation,';')); +0323 for a=1:length(geneAnnotation) +0324 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j}; +0325 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a}; +0326 counter=counter+1; +0327 end +0328 end +0329 end +0330 end +0331 end +0332 end +0333 if isfield(model,'geneNames') +0334 newModel.geneShortNames=model.geneNames; +0335 end +0336 newModel.metNames=model.metNames; +0337 for i=1:numel(newModel.comps) +0338 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],''); +0339 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],''); +0340 end +0341 newModel.metNames=deblank(newModel.metNames); +0342 newModel.metComps=regexprep(model.mets,'^.+\[',''); +0343 newModel.metComps=regexprep(newModel.metComps,'\]$',''); +0344 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps); +0345 if isfield(model,'metInChIString') +0346 newModel.inchis=regexprep(model.metInChIString,'^InChI=',''); +0347 end +0348 printWarning=false; +0349 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID'])) +0350 for i=1:numel(model.mets) +0351 counter=1; +0352 newModel.metMiriams{i,1}=[]; +0353 if isfield(model,'metKEGGID') +0354 if ~isempty(model.metKEGGID{i}) +0355 if strcmp(model.metKEGGID{i}(1),'C') +0356 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound'; +0357 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; +0358 counter=counter+1; +0359 elseif strcmp(model.metKEGGID{i}(1),'G') +0360 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan'; +0361 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; +0362 counter=counter+1; +0363 end +0364 end +0365 end +0366 if isfield(model,'metPubChemID') +0367 if ~isempty(model.metPubChemID{i}) +0368 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:') +0369 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; +0370 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; +0371 counter=counter+1; +0372 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:') +0373 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance'; +0374 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; +0375 counter=counter+1; +0376 else +0377 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; +0378 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; +0379 counter=counter+1; +0380 printWarning=true; +0381 end +0382 end +0383 end +0384 for j = 1:length(metCOBRAfields) +0385 if isfield(model,metCOBRAfields{j}) +0386 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']); +0387 if ~isempty(metAnnotation) +0388 metAnnotation = strtrim(strsplit(metAnnotation,';')); +0389 for a=1:length(metAnnotation) +0390 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j}; +0391 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a}; +0392 counter=counter+1; +0393 end +0394 end +0395 end +0396 end +0397 end +0398 end +0399 if printWarning +0400 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n'); +0401 end +0402 end +0403 +0404 % Order fields +0405 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models +0406 end +0407 +0408 function rules=grrulesToRules(model) +0409 %This function just takes grRules, changes all gene names to +0410 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding +0411 %symbols +0412 replacingGenes=cell([size(model.genes,1) 1]); +0413 for i=1:numel(replacingGenes) +0414 replacingGenes{i}=strcat('x(',num2str(i),')'); +0415 end +0416 rules = strcat({' '},model.grRules,{' '}); +0417 for i=1:length(model.genes) +0418 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']); +0419 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']); +0420 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']); +0421 end +0422 rules=regexprep(rules,' and ',' & '); +0423 rules=regexprep(rules,' or ',' | '); +0424 rules=strtrim(rules); +0425 end +0426 +0427 function grRules=rulesTogrrules(model) +0428 %This function takes rules, replaces &/| for and/or, replaces the x(i) +0429 %format with the actual gene ID, and takes out extra whitespace and +0430 %redundant parenthesis introduced by COBRA, to create grRules. +0431 grRules = strrep(model.rules,'&','and'); +0432 grRules = strrep(grRules,'|','or'); +0433 for i = 1:length(model.genes) +0434 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i}); +0435 end +0436 grRules = strrep(grRules,'( ','('); +0437 grRules = strrep(grRules,' )',')'); +0438 grRules = regexprep(grRules,'^(',''); %rules that start with a "(" +0439 grRules = regexprep(grRules,')$',''); %rules that end with a ")" +0440 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/struct_conversion/removeIdentifierPrefix.html b/doc/struct_conversion/removeIdentifierPrefix.html new file mode 100644 index 00000000..3fae6a6c --- /dev/null +++ b/doc/struct_conversion/removeIdentifierPrefix.html @@ -0,0 +1,143 @@ + + + + Description of removeIdentifierPrefix + + + + + + + + + +
    Home > struct_conversion > removeIdentifierPrefix.m
    + + + +

    removeIdentifierPrefix +

    + +

    PURPOSE ^

    +
    removeIdentifierPrefix
    + +

    SYNOPSIS ^

    +
    function [model, hasChanged]=removeIdentifierPrefix(model,fields,forceRemove)
    + +

    DESCRIPTION ^

    +
     removeIdentifierPrefix
    +   This function removes identifier prefixes:
    +       "R_" for model.rxns, model.rxnNames and model.id,
    +       "M_" for model.mets and model.metNames,
    +       "C_" for model.comps;
    +       "G_" for model.genes (and also represented in model.grRules).
    +   By default, the prefixes are only removed if all entries in a
    +   particular field has the prefix. The prefixes might have been present
    +   because one or more identifiers do not start with a letter or _, which
    +   conflicts with SBML specifications.
    +
    + Input:
    +   model           model whose identifiers should be modified
    +   fields          cell array with model field names from which the
    +                   identifiers should be removed, possible values: 
    +                   'rxns', 'mets', 'comps', 'genes', 'metNames', 
    +                   'rxnNames', 'id'. (optional, by default all listed
    +                   model fields will be checked).
    +   forceRemove     if prefixes should be removed even if not all entries
    +                   in a model field have the prefix (optional, default
    +                   false)
    +
    + Output:
    +   model           modified model
    +   hasChanged      cell array with fields and prefixes that are removed
    +
    + Usage: model=removeIdentifierPrefix(model,fields,forceRemove)
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + + + +

    SOURCE CODE ^

    +
    0001 function [model, hasChanged]=removeIdentifierPrefix(model,fields,forceRemove)
    +0002 % removeIdentifierPrefix
    +0003 %   This function removes identifier prefixes:
    +0004 %       "R_" for model.rxns, model.rxnNames and model.id,
    +0005 %       "M_" for model.mets and model.metNames,
    +0006 %       "C_" for model.comps;
    +0007 %       "G_" for model.genes (and also represented in model.grRules).
    +0008 %   By default, the prefixes are only removed if all entries in a
    +0009 %   particular field has the prefix. The prefixes might have been present
    +0010 %   because one or more identifiers do not start with a letter or _, which
    +0011 %   conflicts with SBML specifications.
    +0012 %
    +0013 % Input:
    +0014 %   model           model whose identifiers should be modified
    +0015 %   fields          cell array with model field names from which the
    +0016 %                   identifiers should be removed, possible values:
    +0017 %                   'rxns', 'mets', 'comps', 'genes', 'metNames',
    +0018 %                   'rxnNames', 'id'. (optional, by default all listed
    +0019 %                   model fields will be checked).
    +0020 %   forceRemove     if prefixes should be removed even if not all entries
    +0021 %                   in a model field have the prefix (optional, default
    +0022 %                   false)
    +0023 %
    +0024 % Output:
    +0025 %   model           modified model
    +0026 %   hasChanged      cell array with fields and prefixes that are removed
    +0027 %
    +0028 % Usage: model=removeIdentifierPrefix(model,fields,forceRemove)
    +0029 
    +0030 if nargin<2 || isempty(fields)
    +0031     fields = {'rxns','mets','comps','genes','metNames','rxnNames','id'};
    +0032 end
    +0033 if nargin<3 || isempty(forceRemove)
    +0034     forceRemove = false;
    +0035 end
    +0036 
    +0037 modelFields = {'rxns',      'R_';
    +0038     'mets',      'M_';
    +0039     'comps',     'C_';
    +0040     'genes',     'G_';
    +0041     'metNames',  'M_';
    +0042     'rxnNames',  'R_';
    +0043     'id',        'M_'};
    +0044 
    +0045 toChangeIdx = find(ismember(modelFields(:,1),fields));
    +0046 hasChanged  = false(numel(modelFields(:,1)),1);
    +0047 for i=1:numel(toChangeIdx)
    +0048     currName    = modelFields{toChangeIdx(i),1};
    +0049     currPrefix  = modelFields{toChangeIdx(i),2};
    +0050     currField   = model.(currName);
    +0051 
    +0052     if forceRemove && any(startsWith(currField,currPrefix))
    +0053         hasPrefix = true;
    +0054     else
    +0055         hasPrefix = all(startsWith(currField,currPrefix));
    +0056     end
    +0057     if hasPrefix
    +0058         currField = regexprep(currField,['^' currPrefix],'');
    +0059         hasChanged(toChangeIdx(i)) = true;
    +0060         if strcmp(currName,'genes')
    +0061             model.grRules=regexprep(model.grRules,'^G_','');
    +0062             model.grRules=regexprep(model.grRules,'\(G_','(');
    +0063             model.grRules=regexprep(model.grRules,' G_',' ');
    +0064         end
    +0065     end
    +0066     model.(currName) = currField;
    +0067 end
    +0068 hasChanged = modelFields(hasChanged,:);
    +0069 hasChanged = append('model.', hasChanged(:,1), ' (', hasChanged(:,2), ' prefix)');
    +0070 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/testing/unit_tests/fillGapsLargeTests.html b/doc/testing/unit_tests/fillGapsLargeTests.html index f70820d7..9b44640b 100644 --- a/doc/testing/unit_tests/fillGapsLargeTests.html +++ b/doc/testing/unit_tests/fillGapsLargeTests.html @@ -54,7 +54,7 @@

    SOURCE CODE ^try 0008 gurobi_read('solverTests.m'); 0009 catch ME -0010 if ~startsWith(ME.message,'Gurobi error 10012') % Expected error code, others may indicate problems with license +0010 if ~startsWith(ME.message,{'Gurobi error 10012','Gurobi error 10003'}) % Expected error codes, others may indicate problems with license 0011 testGurobi = false; 0012 end 0013 end diff --git a/doc/testing/unit_tests/modelCurationTests.html b/doc/testing/unit_tests/modelCurationTests.html index 6cf1a9f3..dde7fe80 100644 --- a/doc/testing/unit_tests/modelCurationTests.html +++ b/doc/testing/unit_tests/modelCurationTests.html @@ -498,7 +498,7 @@

    SOURCE CODE ^'test_data','ecoli_textbook.mat'), 'model'); 0452 0453 sbmlFile=fullfile(sourceDir,'..','..','tutorial','empty.xml'); -0454 evalc('modelEmpty=importModel(sbmlFile)'); % Repress warnings +0454 evalc('modelEmpty=importModel(sbmlFile,[],true)'); % Repress warnings 0455 0456 evalc('modelNew=addRxnsGenesMets(model,modelEmpty,''r1'',true);'); 0457 diff --git a/external/kegg/getKEGGModelForOrganism.m b/external/kegg/getKEGGModelForOrganism.m index e3abc2d2..24355620 100755 --- a/external/kegg/getKEGGModelForOrganism.m +++ b/external/kegg/getKEGGModelForOrganism.m @@ -38,15 +38,12 @@ % The hidden Markov models as generated in 2b or % downloaded from BioMet Toolbox (see below) % The final directory in dataDir should be styled as -% proXXX_keggYY or eukXXX_keggYY, indicating whether +% prok90_kegg105 or euk90_kegg105, indicating whether % the HMMs were trained on pro- or eukaryotic -% sequences, using a sequence similarity threshold of -% XXX %, fitting the KEGG version YY. E.g. -% euk90_kegg105. (optional, see note about fastaFile. Note -% that in order to rebuild the KEGG model from a -% database dump, as opposed to using the version -% supplied with RAVEN, you would still need to supply -% this) +% sequences; using which sequence similarity treshold +% (first set of digits); using which KEGG version +% (second set of digits). (this parameter should +% ALWAYS be provided) % outDir directory to save the results from the quering of % the hidden Markov models. The output is specific % for the input sequences and the settings used. It diff --git a/io/exportForGit.m b/io/exportForGit.m index 7fc96313..d90f5b04 100755 --- a/io/exportForGit.m +++ b/io/exportForGit.m @@ -1,39 +1,51 @@ -function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,cobraText) +function out=exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,COBRAtext,COBRAstyle) % exportForGit % Generates a directory structure and populates this with model files, ready % to be commited to a Git(Hub) maintained model repository. Writes the model % as SBML L3V1 FBCv2 (both XML and YAML), COBRA text, Matlab MAT-file % orthologies in KEGG % -% model model structure in RAVEN format that should be exported +% model model structure in RAVEN format that should be +% exported % prefix prefix for all filenames (optional, default 'model') -% path path where the directory structure should be generated -% and populated with all files (optional, default to current -% working directory) -% formats cell array of strings specifying in what file formats -% the model should be exported (optional, default to all -% formats as {'mat', 'txt', 'xlsx', 'xml', 'yml'}) +% path path where the directory structure should be +% generated and populated with all files (optional, +% default to current working directory) +% formats cell array of strings specifying in what file +% formats the model should be exported (optional, +% default to all formats as {'mat', 'txt', 'xlsx', +% 'xml', 'yml'}) % mainBranchFlag logical, if true, function will error if RAVEN (and % COBRA if detected) is/are not on the main branch. % (optional, default false) -% subDirs logical, whether model files for each file format +% subDirs logical, whether model files for each file format % should be written in its own subdirectory, with % 'model' as parent directory, in accordance to the % standard-GEM repository format. If false, all files -% are stored in the same folder. (optional, default true) -% cobraText logical, whether the txt file should be in COBRA +% are stored in the same folder. (optional, default +% true) +% COBRAtext logical, whether the txt file should be in COBRA % Toolbox format using metabolite IDs, instead of -% metabolite names and compartments. (optional, default -% false) +% metabolite names and compartments. (optional, +% default false) +% COBRAstyle true if COBRA-style prefixes should be added to all +% identifiers in the SBML file: R_ for reactions, M_ +% for metabolites, G_ for genes and C_ for +% compartments. If all identifiers of a particular +% field already have the prefix, then no additional +% prefixes are added. (optional, default false) % -% Usage: exportForGit(model,prefix,path,formats,mainBranchFlag) -if nargin<7 - cobraText=false; +% Usage: exportForGit(model,prefix,path,formats,mainBranchFlag,subDirs,COBRAtext,COBRAstyle) +if nargin<8 + COBRAstyle=false; end -if nargin<6 +if nargin<7 || isempty(COBRAtext) + COBRAtext=false; +end +if nargin<6 || isempty(subDirs) subDirs=true; end -if nargin<5 +if nargin<5 || isempty(mainBranchFlag) mainBranchFlag=false; end if nargin<4 || isempty(formats) @@ -45,12 +57,12 @@ EM='Unknown file format defined. Only mat, txt, xlsx, xml and yml are allowed file formats.'; error(EM) end -if nargin<3 +if nargin<3 || isempty(path) path='.'; else path=char(path); end -if nargin<2 +if nargin<2 || isempty(prefix) prefix='model'; else prefix=char(prefix); @@ -92,7 +104,7 @@ % Write TXT format if ismember('txt', formats) fid=fopen(fullfile(filePath{1},strcat(prefix,'.txt')),'w'); - if cobraText==true + if COBRAtext==true eqns=constructEquations(model,model.rxns,false,false,false); eqns=strrep(eqns,' => ',' -> '); eqns=strrep(eqns,' <=> ',' <=> '); @@ -130,7 +142,7 @@ % Write XML format if ismember('xml', formats) - exportModel(model,fullfile(filePath{5},strcat(prefix,'.xml'))); + exportModel(model,fullfile(filePath{5},strcat(prefix,'.xml')),COBRAstyle); end %Save file with versions: diff --git a/io/exportModel.m b/io/exportModel.m index 3ac8dbc6..8beef3b2 100755 --- a/io/exportModel.m +++ b/io/exportModel.m @@ -1,21 +1,23 @@ -function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) +function exportModel(model,fileName,neverPrefix,supressWarnings,sortIds) % exportModel % Exports a constraint-based model to an SBML file (L3V1 FBCv2) % -% Input: +% Input: % model a model structure % fileName filename to export the model to. A dialog window % will open if no file name is specified. -% exportGeneComplexes true if gene complexes (all gene sets linked with -% AND relationship) should be recognised and exported -% (optional, default false) -% supressWarnings true if warnings should be supressed (optional, default -% false) +% neverPrefix true if prefixes are never added to identifiers, +% even if start with e.g. digits. This might result +% in invalid SBML files (optional, default false) +% supressWarnings true if warnings should be supressed. This might +% results in invalid SBML files, as no checks are +% performed (optional, default false) % sortIds logical whether metabolites, reactions and genes % should be sorted alphabetically by their % identifiers (optional, default false) % -% Usage: exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) +% Usage: exportModel(model,fileName,neverPrefix,supressWarnings,sortIds) + if nargin<2 || isempty(fileName) [fileName, pathName] = uiputfile({'*.xml;*.sbml'}, 'Select file for model export',[model.id '.xml']); if fileName == 0 @@ -25,19 +27,23 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) end end fileName=char(fileName); -if nargin<3 - exportGeneComplexes=false; +if nargin<3 || isempty(neverPrefix) + neverPrefix=false; end -if nargin<4 +if nargin<4 || isempty(supressWarnings) supressWarnings=false; end -if nargin<5 +if nargin<5 || isempty(sortIds) sortIds=false; end if sortIds==true model=sortIdentifiers(model); end +if isfield(model,'ec') + warning("exportModel does not store information from the 'model.ec' structure. Use 'writeYAMLmodel(model)' to export all content from a GECKO model.") +end + %If no subSystems are defined, then no need to use groups package if isfield(model,'subSystems') modelHasSubsystems=true; @@ -74,9 +80,17 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) model.name='blankName'; end +% Add prefixes if required +if ~neverPrefix + [model,hasChanged] = addIdentifierPrefix(model); + dispEM(['The following fields have one or more entries that do not start '... + 'with a letter or _ (conflicting with SBML specifications). Prefixes '... + 'are added to all entries in those fields:'],false,hasChanged) +end + %Check the model structure if supressWarnings==false - checkModelStruct(model,false); + checkModelStruct(model); end %Add several blank fields, if they do not exist already. This is to reduce @@ -99,6 +113,9 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) if ~isfield(model,'geneShortNames') && isfield(model,'genes') model.geneShortNames=cell(numel(model.genes),1); end +if ~isfield(model,'proteins') && isfield(model,'genes') + model.proteins=cell(numel(model.genes),1); +end if ~isfield(model,'subSystems') model.subSystems=cell(numel(model.rxns),1); end @@ -230,11 +247,6 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) end if isfield(modelSBML.compartment,'metaid') - if regexp(model.comps{i},'^[^a-zA-Z_]') - EM='The compartment IDs are in numeric format. For the compliance with SBML specifications, compartment IDs will be preceded with "c_" string'; - dispEM(EM,false); - model.comps(i)=strcat('c_',model.comps(i)); - end modelSBML.compartment(i).metaid=model.comps{i}; end %Prepare Miriam strings @@ -289,13 +301,13 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) end if isfield(modelSBML.species,'metaid') - modelSBML.species(i).metaid=['M_' model.mets{i}]; + modelSBML.species(i).metaid=model.mets{i}; end if isfield(modelSBML.species, 'name') modelSBML.species(i).name=model.metNames{i}; end if isfield(modelSBML.species, 'id') - modelSBML.species(i).id=['M_' model.mets{i}]; + modelSBML.species(i).id=model.mets{i}; end if isfield(modelSBML.species, 'compartment') modelSBML.species(i).compartment=model.comps{model.metComps(i)}; @@ -337,7 +349,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) end end if ~isempty(model.metMiriams{i}) || hasInchi==true - modelSBML.species(i).annotation=['']; + modelSBML.species(i).annotation=['']; modelSBML.species(i).annotation=[modelSBML.species(i).annotation '']; if ~isempty(model.metMiriams{i}) modelSBML.species(i).annotation=[modelSBML.species(i).annotation getMiriam(model.metMiriams{i})]; @@ -394,42 +406,9 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i}; end end - end - if exportGeneComplexes==true - %Also add the complexes as genes. This is done by splitting grRules - %on "or" and adding the ones which contain several genes - geneComplexes={}; - if isfield(model,'grRules') - %Only grRules which contain " and " can be complexes - uniqueRules=unique(model.grRules); - I=cellfun(@any,strfind(uniqueRules,' and ')); - uniqueRules(~I)=[]; - uniqueRules=strrep(uniqueRules,'(',''); - uniqueRules=strrep(uniqueRules,')',''); - uniqueRules=strrep(uniqueRules,' and ',':'); - for i=1:numel(uniqueRules) - genes=regexp(uniqueRules(i),' or ','split'); - genes=genes{1}(:); - %Check which ones are complexes - I=cellfun(@any,strfind(genes,':')); - geneComplexes=[geneComplexes;genes(I)]; - end - end - geneComplexes=unique(geneComplexes); - if ~isempty(geneComplexes) - %Then add them as genes. There is a possiblity that a complex - %A&B is added as separate from B&A. This is not really an issue - %so this is not dealt with - for i=1:numel(geneComplexes) - modelSBML.fbc_geneProduct(numel(model.genes)+i)=modelSBML.fbc_geneProduct(1); - if isfield(modelSBML.fbc_geneProduct,'metaid') - modelSBML.fbc_geneProduct(numel(model.genes)+i).metaid=geneComplexes{i}; - end - if isfield(modelSBML.fbc_geneProduct,'fbc_id') - modelSBML.fbc_geneProduct(numel(model.genes)+i).fbc_id=geneComplexes{i}; - else - modelSBML.fbc_geneProduct(i).fbc_label=modelSBML.fbc_geneProduct(i).fbc_id; - end + if isfield(modelSBML.fbc_geneProduct, 'fbc_name') && isfield(model,'proteins') + if ~isempty(model.proteins{i}) + modelSBML.fbc_geneProduct(i).fbc_name=model.proteins{i}; end end end @@ -483,7 +462,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) end if isfield(modelSBML.reaction,'metaid') - modelSBML.reaction(i).metaid=['R_' model.rxns{i}]; + modelSBML.reaction(i).metaid=model.rxns{i}; end %Export notes information @@ -515,7 +494,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) %Export annotation information from rxnMiriams if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i}) - modelSBML.reaction(i).annotation=['']; + modelSBML.reaction(i).annotation=['']; modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '']; if ~isempty(model.eccodes{i}) eccodes=regexp(model.eccodes{i},';','split'); @@ -530,7 +509,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) modelSBML.reaction(i).name=model.rxnNames{i}; end if isfield(modelSBML.reaction, 'id') - modelSBML.reaction(i).id=['R_' model.rxns{i}]; + modelSBML.reaction(i).id=model.rxns{i}; end %Add the information about reactants and products @@ -582,7 +561,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) modelSBML.groups_group.sboTerm = 633; tmpStruct=modelSBML.groups_group; - rxns=strcat('R_',model.rxns); + rxns=model.rxns; if ~any(cellfun(@iscell,model.subSystems)) if ~any(~cellfun(@isempty,model.subSystems)) subSystems = {}; diff --git a/io/importExcelModel.m b/io/importExcelModel.m index a703b5bb..0c4c0784 100755 --- a/io/importExcelModel.m +++ b/io/importExcelModel.m @@ -847,7 +847,7 @@ elseif any(strfind(I{j},':')) index=max(strfind(I{j},':')); end - if any(index) + if exist('index','var') & any(index) miriamStruct{i}.name{startIndex+j}=I{j}(1:index-1); miriamStruct{i}.value{startIndex+j}=I{j}(index+1:end); else diff --git a/io/importModel.m b/io/importModel.m index df9b10f2..b36a4ad9 100755 --- a/io/importModel.m +++ b/io/importModel.m @@ -1,16 +1,19 @@ -function model=importModel(fileName,removeExcMets,isSBML2COBRA,supressWarnings) +function model=importModel(fileName,removeExcMets,removePrefix,supressWarnings) % importModel -% Import a constraint-based model from a SBML file +% Import a constraint-based model from an SBML file. % % Input: -% fileName a SBML file to import. A dialog window will open if +% fileName a SBML file to import. A dialog window will open if % no file name is specified. % removeExcMets true if exchange metabolites should be removed. This is % needed to be able to run simulations, but it could also % be done using simplifyModel at a later stage (optional, % default true) -% isSBML2COBRA true if the SBML file is in the old COBRA Toolbox -% format (SBML Level 2) (optional, default false) +% removePrefix true if identifier prefixes should be removed when +% loading the model: G_ for genes, R_ for reactions, +% M_ for metabolites, and C_ for compartments. These are +% only removed if all identifiers of a certain type +% contain the prefix. (optional, default true) % supressWarnings true if warnings regarding the model structure should % be supressed (optional, default false) % @@ -47,6 +50,7 @@ % geneComps compartments for genes % geneMiriams structure with MIRIAM information about the genes % geneShortNames gene alternative names (e.g. ERG10) +% proteins protein associated to each gene % metNames metabolite description % metComps compartments for metabolites % inchis InChI-codes for metabolites @@ -55,11 +59,11 @@ % metCharges metabolite charge % unconstrained true if the metabolite is an exchange metabolite % -% A number of consistency checks are performed in order to ensure that the +% Note: A number of consistency checks are performed in order to ensure that the % model is valid. Take these warnings seriously and modify the model % structure to solve them. % -% Usage: model = importModel(fileName, removeExcMets, isSBML2COBRA, supressWarnings) +% Usage: model = importModel(fileName, removeExcMets, removePrefix, supressWarnings) if nargin<1 || isempty(fileName) [fileName, pathName] = uigetfile({'*.xml;*.sbml'}, 'Please select the model file'); @@ -70,20 +74,27 @@ end end fileName=char(fileName); -if nargin<2 +if nargin<2 || isempty(removeExcMets) removeExcMets=true; end -if nargin<3 - isSBML2COBRA=false; +if nargin<3 || isempty(removePrefix) + removePrefix=true; end if nargin<4 supressWarnings=false; end -if ~isfile(fileName) - error('SBML file %s cannot be found',string(fileName)); +fileName=checkFileExistence(fileName,1); +% If path contains non-ASCII characters, copy file to tempdir first, as +% libSBML is known to have problems with this on Windows: +% https://sbml.org/software/libsbml/libsbml-docs/known-pitfalls/#matlab-on-windows-has-issues-with-unicode-filenames +if ispc && any(double(fileName)>128) + [~,originalFile,ext] = fileparts(fileName); + tempFile = fullfile(tempdir,[originalFile ext]); + copyfile(fileName,tempFile); + fileName = tempFile; end %This is to match the order of the fields to those you get from importing @@ -118,6 +129,7 @@ model.geneComps=[]; model.geneMiriams={}; model.geneShortNames={}; +model.proteins={}; model.metNames={}; model.metComps=[]; model.inchis={}; @@ -127,33 +139,16 @@ model.unconstrained=[]; %Load the model using libSBML -[ravenDir,prevDir]=findRAVENroot(); -fileName=checkFileExistence(fileName,1); -modelSBML = TranslateSBML_RAVEN(fileName,0,0,[1 1]); +[modelSBML,errorMsg] = TranslateSBML_RAVEN(fileName,0,0,[1 1]); +if exist('tempFile','var') + delete(tempFile) +end if isempty(modelSBML) - EM='There is a problem with the SBML file. Try using the SBML Validator at http://sbml.org/Facilities/Validator'; + EM=['There is a problem with the SBML file. Try using the SBML Validator at http://sbml.org/Facilities/Validator.\nlibSBML reports: ', errorMsg.message]; dispEM(EM); end -%Remove the preceding strings for reactions, compartments and -%reactants/products in 'reaction' field. The strings for metabolites, genes -%and complexes are not removed, as we will need them later to identify them -%from 'species' field -for i=1:numel(modelSBML.reaction) - modelSBML.reaction(i).name=regexprep(modelSBML.reaction(i).name,'^R_',''); - modelSBML.reaction(i).id=regexprep(modelSBML.reaction(i).id,'^R_',''); - if isfield(modelSBML.reaction(i),'compartment') - modelSBML.reaction(i).compartment=regexprep(modelSBML.reaction(i).compartment,'^C_',''); - end - for j=1:numel(modelSBML.reaction(i).reactant) - modelSBML.reaction(i).reactant(j).species=regexprep(modelSBML.reaction(i).reactant(j).species,'^M_',''); - end - for j=1:numel(modelSBML.reaction(i).product) - modelSBML.reaction(i).product(j).species=regexprep(modelSBML.reaction(i).product(j).species,'^M_',''); - end -end - %Retrieve compartment names and IDs compartmentNames=cell(numel(modelSBML.compartment),1); compartmentIDs=cell(numel(modelSBML.compartment),1); @@ -164,26 +159,26 @@ %If all the SBO terms are identical, don't add them to compMiriams modelSBML.compartment = rmfield(modelSBML.compartment,'sboTerm'); end - + for i=1:numel(modelSBML.compartment) compartmentNames{i}=modelSBML.compartment(i).name; - compartmentIDs{i}=regexprep(modelSBML.compartment(i).id,'^C_',''); + compartmentIDs{i}=modelSBML.compartment(i).id; if isfield(modelSBML.compartment(i),'outside') if ~isempty(modelSBML.compartment(i).outside) - compartmentOutside{i}=regexprep(modelSBML.compartment(i).outside,'^C_',''); + compartmentOutside{i}=modelSBML.compartment(i).outside; else compartmentOutside{i}=''; end else compartmentOutside{i}=[]; end - + if isfield(modelSBML.compartment(i),'annotation') compartmentMiriams{i}=parseMiriam(modelSBML.compartment(i).annotation); else compartmentMiriams{i}=[]; end - + if isfield(modelSBML.compartment(i),'sboTerm') && ~(modelSBML.compartment(i).sboTerm==-1) compartmentMiriams{i} = addSBOtoMiriam(compartmentMiriams{i},modelSBML.compartment(i).sboTerm); end @@ -208,6 +203,7 @@ geneIDs={}; geneMiriams={}; geneShortNames={}; +proteins={}; geneCompartments={}; complexIDs={}; complexNames={}; @@ -222,181 +218,133 @@ %names if present as suffix. regexCompNames = ['\s?\[((' strjoin({modelSBML.compartment.name},')|(') '))\]$']; for i=1:numel(modelSBML.species) - if ~isSBML2COBRA - if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_') - geneNames{numel(geneNames)+1,1}=modelSBML.species(i).name; - - %The "E_" is included in the ID. This is because it's only used - %internally in this file and it makes the matching a little - %smoother - geneIDs{numel(geneIDs)+1,1}=modelSBML.species(i).id; - geneCompartments{numel(geneCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); - - %Get Miriam structure - if isfield(modelSBML.species(i),'annotation') - %Get Miriam info - geneMiriam=parseMiriam(modelSBML.species(i).annotation); - geneMiriams{numel(geneMiriams)+1,1}=geneMiriam; - else - geneMiriams{numel(geneMiriams)+1,1}=[]; - end - - %Protein short names (for example ERG10) are saved as SHORT - %NAME: NAME in the notes-section of metabolites for SBML Level - %2 and as PROTEIN_ASSOCIATION for each reaction in SBML Level 2 - %COBRA Toolbox format. For now only the SHORT NAME is loaded - %and no mapping takes place - if isfield(modelSBML.species(i),'notes') - geneShortNames{numel(geneShortNames)+1,1}=parseNote(modelSBML.species(i).notes,'SHORT NAME'); - else - geneShortNames{numel(geneShortNames)+1,1}=''; - end - - %Get SBO term - if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) - geneSBOs(end+1,1) = modelSBML.species(i).sboTerm; - end - elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_') - %If it's a complex keep the ID and name - complexIDs=[complexIDs;modelSBML.species(i).id]; - complexNames=[complexNames;modelSBML.species(i).name]; + if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_') + geneNames{numel(geneNames)+1,1}=modelSBML.species(i).name; + + %The "E_" is included in the ID. This is because it's only used + %internally in this file and it makes the matching a little + %smoother + geneIDs{numel(geneIDs)+1,1}=modelSBML.species(i).id; + geneCompartments{numel(geneCompartments)+1,1}=modelSBML.species(i).compartment; + + %Get Miriam structure + if isfield(modelSBML.species(i),'annotation') + %Get Miriam info + geneMiriam=parseMiriam(modelSBML.species(i).annotation); + geneMiriams{numel(geneMiriams)+1,1}=geneMiriam; else - %If it is not gene or complex, then it must be a metabolite - metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; - metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); - metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); - metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; - - %For each metabolite retrieve the formula and the InChI code if - %available First add the InChI code and the formula from the - %InChI. This allows for overwriting the formula by setting the - %actual formula field - if ~isempty(modelSBML.species(i).annotation) - %Get the formula if available - startString='>InChI='; - endString=''; - formStart=strfind(modelSBML.species(i).annotation,startString); - if isempty(formStart) - startString='InChI='; - endString='"/>'; - end - formStart=strfind(modelSBML.species(i).annotation,startString); - if ~isempty(formStart) - formEnd=strfind(modelSBML.species(i).annotation,endString); - formEndIndex=find(formEnd>formStart, 1 ); - formula=modelSBML.species(i).annotation(formStart+numel(startString):formEnd(formEndIndex)-1); - metaboliteInChI{numel(metaboliteInChI)+1,1}=formula; - - %The composition is most often present between the - %first and second "/" in the model. In some simple - %molecules, such as salts, there is no second "/". The - %formula is then assumed to be to the end of the string - compositionIndexes=strfind(formula,'/'); - if numel(compositionIndexes)>1 + geneMiriams{numel(geneMiriams)+1,1}=[]; + end + + %Protein short names (for example ERG10) are saved as SHORT + %NAME: NAME in the notes-section of metabolites for SBML Level + %2 and as PROTEIN_ASSOCIATION for each reaction in SBML Level 2 + %COBRA Toolbox format. For now only the SHORT NAME is loaded + %and no mapping takes place + if isfield(modelSBML.species(i),'notes') + geneShortNames{numel(geneShortNames)+1,1}=parseNote(modelSBML.species(i).notes,'SHORT NAME'); + else + geneShortNames{numel(geneShortNames)+1,1}=''; + end + + %Get SBO term + if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) + geneSBOs(end+1,1) = modelSBML.species(i).sboTerm; + end + elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_') + %If it's a complex keep the ID and name + complexIDs=[complexIDs;modelSBML.species(i).id]; + complexNames=[complexNames;modelSBML.species(i).name]; + else + %If it is not gene or complex, then it must be a metabolite + metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; + metaboliteIDs{numel(metaboliteIDs)+1,1}=modelSBML.species(i).id; + metaboliteCompartments{numel(metaboliteCompartments)+1,1}=modelSBML.species(i).compartment; + metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; + + %For each metabolite retrieve the formula and the InChI code if + %available First add the InChI code and the formula from the + %InChI. This allows for overwriting the formula by setting the + %actual formula field + if ~isempty(modelSBML.species(i).annotation) + %Get the formula if available + startString='>InChI='; + endString=''; + formStart=strfind(modelSBML.species(i).annotation,startString); + if isempty(formStart) + startString='InChI='; + endString='"/>'; + end + formStart=strfind(modelSBML.species(i).annotation,startString); + if ~isempty(formStart) + formEnd=strfind(modelSBML.species(i).annotation,endString); + formEndIndex=find(formEnd>formStart, 1 ); + formula=modelSBML.species(i).annotation(formStart+numel(startString):formEnd(formEndIndex)-1); + metaboliteInChI{numel(metaboliteInChI)+1,1}=formula; + + %The composition is most often present between the + %first and second "/" in the model. In some simple + %molecules, such as salts, there is no second "/". The + %formula is then assumed to be to the end of the string + compositionIndexes=strfind(formula,'/'); + if numel(compositionIndexes)>1 + metaboliteFormula{numel(metaboliteFormula)+1,1}=... + formula(compositionIndexes(1)+1:compositionIndexes(2)-1); + else + if numel(compositionIndexes)==1 + %Probably a simple molecule which can have only + %one conformation metaboliteFormula{numel(metaboliteFormula)+1,1}=... - formula(compositionIndexes(1)+1:compositionIndexes(2)-1); - else - if numel(compositionIndexes)==1 - %Probably a simple molecule which can have only - %one conformation - metaboliteFormula{numel(metaboliteFormula)+1,1}=... - formula(compositionIndexes(1)+1:numel(formula)); - else - metaboliteFormula{numel(metaboliteFormula)+1,1}=''; - end - end - elseif isfield(modelSBML.species(i),'fbc_chemicalFormula') - metaboliteInChI{numel(metaboliteInChI)+1,1}=''; - if ~isempty(modelSBML.species(i).fbc_chemicalFormula) - %Cannot extract InChi from formula, so remains - %empty - metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).fbc_chemicalFormula; + formula(compositionIndexes(1)+1:numel(formula)); else metaboliteFormula{numel(metaboliteFormula)+1,1}=''; end - else - metaboliteInChI{numel(metaboliteInChI)+1,1}=''; - metaboliteFormula{numel(metaboliteFormula)+1,1}=''; end - - %Get Miriam info - metMiriam=parseMiriam(modelSBML.species(i).annotation); - metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; - else + elseif isfield(modelSBML.species(i),'fbc_chemicalFormula') metaboliteInChI{numel(metaboliteInChI)+1,1}=''; - if isfield(modelSBML.species(i),'notes') - metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); + if ~isempty(modelSBML.species(i).fbc_chemicalFormula) + %Cannot extract InChi from formula, so remains + %empty + metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).fbc_chemicalFormula; else metaboliteFormula{numel(metaboliteFormula)+1,1}=''; end - metaboliteMiriams{numel(metaboliteMiriams)+1,1}=[]; - end - if ~isempty(modelSBML.species(i).notes) - if ~isfield(modelSBML.species(i),'annotation') - metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); - end - elseif ~isfield(modelSBML.species(i),'annotation') + else + metaboliteInChI{numel(metaboliteInChI)+1,1}=''; metaboliteFormula{numel(metaboliteFormula)+1,1}=''; end - %Get SBO term - if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) - metSBOs(end+1,1) = modelSBML.species(i).sboTerm; + + %Get Miriam info + metMiriam=parseMiriam(modelSBML.species(i).annotation); + metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; + else + metaboliteInChI{numel(metaboliteInChI)+1,1}=''; + if isfield(modelSBML.species(i),'notes') + metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); + else + metaboliteFormula{numel(metaboliteFormula)+1,1}=''; end + metaboliteMiriams{numel(metaboliteMiriams)+1,1}=[]; end - - elseif isSBML2COBRA - %The metabolite names are assumed to be M_NAME_COMPOSITION or - %_NAME_COMPOSITION or NAME_COMPOSITION or NAME. Regular expressions - %are used that only NAME_COMPOSITION or NAME would be possible - - modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^M_',''); - modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^_',''); - underscoreIndex=strfind(modelSBML.species(i).name,'_'); - - metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; - - metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); - metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); - - %I think that COBRA doesn't set the boundary condition, but rather - %uses name_b. Check for either - metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; - if strcmp(metaboliteIDs{end}(max(end-1,1):end),'_b') - metaboliteUnconstrained(end)=1; - end - - %Get the formula - if max(underscoreIndex) RAVEN -> COBRA conversion cycle the following -% fields are lost: geneEntrezID, metSmiles, modelVersion, -% proteinNames, proteins +% fields are lost: geneEntrezID, modelVersion, proteins % % NOTE: The information about mandatory RAVEN fields was taken from % checkModelStruct function, whereas the corresponding information about @@ -76,9 +75,9 @@ newModel.c=zeros(numel(model.rxns),1); end newModel.rxns=model.rxns; -optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',... - 'metFormulas','comps','compNames','metCharges','genes',... - 'rxnConfidenceScores','rxnGeneMat','metNotes','rev'}; +optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',... + 'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',... + 'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteins'}; for i=1:length(optFields) if isfield(model,optFields{i}) newModel.(optFields{i})=model.(optFields{i}); diff --git a/struct_conversion/removeIdentifierPrefix.m b/struct_conversion/removeIdentifierPrefix.m new file mode 100644 index 00000000..0a960108 --- /dev/null +++ b/struct_conversion/removeIdentifierPrefix.m @@ -0,0 +1,70 @@ +function [model, hasChanged]=removeIdentifierPrefix(model,fields,forceRemove) +% removeIdentifierPrefix +% This function removes identifier prefixes: +% "R_" for model.rxns, model.rxnNames and model.id, +% "M_" for model.mets and model.metNames, +% "C_" for model.comps; +% "G_" for model.genes (and also represented in model.grRules). +% By default, the prefixes are only removed if all entries in a +% particular field has the prefix. The prefixes might have been present +% because one or more identifiers do not start with a letter or _, which +% conflicts with SBML specifications. +% +% Input: +% model model whose identifiers should be modified +% fields cell array with model field names from which the +% identifiers should be removed, possible values: +% 'rxns', 'mets', 'comps', 'genes', 'metNames', +% 'rxnNames', 'id'. (optional, by default all listed +% model fields will be checked). +% forceRemove if prefixes should be removed even if not all entries +% in a model field have the prefix (optional, default +% false) +% +% Output: +% model modified model +% hasChanged cell array with fields and prefixes that are removed +% +% Usage: model=removeIdentifierPrefix(model,fields,forceRemove) + +if nargin<2 || isempty(fields) + fields = {'rxns','mets','comps','genes','metNames','rxnNames','id'}; +end +if nargin<3 || isempty(forceRemove) + forceRemove = false; +end + +modelFields = {'rxns', 'R_'; + 'mets', 'M_'; + 'comps', 'C_'; + 'genes', 'G_'; + 'metNames', 'M_'; + 'rxnNames', 'R_'; + 'id', 'M_'}; + +toChangeIdx = find(ismember(modelFields(:,1),fields)); +hasChanged = false(numel(modelFields(:,1)),1); +for i=1:numel(toChangeIdx) + currName = modelFields{toChangeIdx(i),1}; + currPrefix = modelFields{toChangeIdx(i),2}; + currField = model.(currName); + + if forceRemove && any(startsWith(currField,currPrefix)) + hasPrefix = true; + else + hasPrefix = all(startsWith(currField,currPrefix)); + end + if hasPrefix + currField = regexprep(currField,['^' currPrefix],''); + hasChanged(toChangeIdx(i)) = true; + if strcmp(currName,'genes') + model.grRules=regexprep(model.grRules,'^G_',''); + model.grRules=regexprep(model.grRules,'\(G_','('); + model.grRules=regexprep(model.grRules,' G_',' '); + end + end + model.(currName) = currField; +end +hasChanged = modelFields(hasChanged,:); +hasChanged = append('model.', hasChanged(:,1), ' (', hasChanged(:,2), ' prefix)'); +end diff --git a/testing/unit_tests/fillGapsLargeTests.m b/testing/unit_tests/fillGapsLargeTests.m index 34978720..1c0576d4 100755 --- a/testing/unit_tests/fillGapsLargeTests.m +++ b/testing/unit_tests/fillGapsLargeTests.m @@ -7,7 +7,7 @@ try gurobi_read('solverTests.m'); catch ME - if ~startsWith(ME.message,'Gurobi error 10012') % Expected error code, others may indicate problems with license + if ~startsWith(ME.message,{'Gurobi error 10012','Gurobi error 10003'}) % Expected error codes, others may indicate problems with license testGurobi = false; end end diff --git a/testing/unit_tests/modelCurationTests.m b/testing/unit_tests/modelCurationTests.m index 90fa7ff6..62e3dc96 100755 --- a/testing/unit_tests/modelCurationTests.m +++ b/testing/unit_tests/modelCurationTests.m @@ -451,7 +451,7 @@ function addRxnsGenesMetsTest(testCase) load(fullfile(sourceDir,'test_data','ecoli_textbook.mat'), 'model'); sbmlFile=fullfile(sourceDir,'..','..','tutorial','empty.xml'); -evalc('modelEmpty=importModel(sbmlFile)'); % Repress warnings +evalc('modelEmpty=importModel(sbmlFile,[],true)'); % Repress warnings evalc('modelNew=addRxnsGenesMets(model,modelEmpty,''r1'',true);'); diff --git a/testing/unit_tests/test_data/importExportResults.mat b/testing/unit_tests/test_data/importExportResults.mat index 0674972e..0a5df5ef 100644 Binary files a/testing/unit_tests/test_data/importExportResults.mat and b/testing/unit_tests/test_data/importExportResults.mat differ