diff --git a/INIT/getINITModel.m b/INIT/getINITModel.m index 9626d9e6..e48bf1a2 100644 --- a/INIT/getINITModel.m +++ b/INIT/getINITModel.m @@ -391,6 +391,9 @@ if isfield(model,'geneShortNames') model.geneShortNames(I)=[]; end +if isfield(model,'proteins') + model.proteins(I)=[]; +end if isfield(model,'geneMiriams') model.geneMiriams(I)=[]; end diff --git a/INIT/mergeLinear.m b/INIT/mergeLinear.m index 4ce4c3af..1378353b 100644 --- a/INIT/mergeLinear.m +++ b/INIT/mergeLinear.m @@ -29,6 +29,9 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames={}; end +if isfield(reducedModel,'proteins') + reducedModel.proteins={}; +end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams={}; end diff --git a/INIT/removeLowScoreGenes.m b/INIT/removeLowScoreGenes.m index 589a54b9..76a5bdeb 100644 --- a/INIT/removeLowScoreGenes.m +++ b/INIT/removeLowScoreGenes.m @@ -119,6 +119,9 @@ if isfield(newModel,'geneShortNames') newModel.geneShortNames(remInd) = []; end +if isfield(newModel,'proteins') + newModel.proteins(remInd) = []; +end if isfield(newModel,'geneMiriams') newModel.geneMiriams(remInd) = []; end diff --git a/core/addGenesRaven.m b/core/addGenesRaven.m index 9be3abf4..cc44f153 100755 --- a/core/addGenesRaven.m +++ b/core/addGenesRaven.m @@ -14,6 +14,8 @@ % default '') % geneMiriams cell array with MIRIAM structures (optional, % default []) +% proteins cell array of protein names associated to +% each gene (optional, default '') % % newModel an updated model structure % @@ -56,6 +58,9 @@ if isfield(genesToAdd,'geneShortNames') genesToAdd.geneShortNames(I)=[]; end + if isfield(genesToAdd,'proteins') + genesToAdd.proteins(I)=[]; + end if isfield(genesToAdd,'geneMiriams') genesToAdd.geneMiriams(I)=[]; end @@ -81,6 +86,24 @@ newModel.geneShortNames=[newModel.geneShortNames;filler]; end end +if isfield(genesToAdd,'proteins') + genesToAdd.proteins=convertCharArray(genesToAdd.proteins); + if numel(genesToAdd.proteins)~=nGenes + EM='genesToAdd.proteins must have the same number of elements as genesToAdd.genes'; + dispEM(EM); + end + %Add empty field if it doesn't exist + if ~isfield(newModel,'proteins') + newModel.proteins=largeFiller; + end + newModel.proteins=[newModel.proteins;genesToAdd.proteins(:)]; +else + %Add empty strings if structure is in model + if isfield(newModel,'proteins') + newModel.proteins=[newModel.proteins;filler]; + end +end + %Don't check the type of geneMiriams if isfield(genesToAdd,'geneMiriams') diff --git a/core/checkModelStruct.m b/core/checkModelStruct.m index 7526d590..139c43d8 100755 --- a/core/checkModelStruct.m +++ b/core/checkModelStruct.m @@ -120,6 +120,21 @@ function checkModelStruct(model,throwErrors,trimWarnings) EM='The "grRules" field must be a cell array of strings'; dispEM(EM,throwErrors); end + if ~isfield(model,'genes') + EM='If "grRules" field exists, the model should also contain a "genes" field'; + dispEM(EM,throwErrors); + else + geneList = strjoin(model.grRules); + geneList = regexp(geneList,' |)|(|and|or','split'); % Remove all grRule punctuation + geneList = geneList(~cellfun(@isempty,geneList)); % Remove spaces and empty genes + geneList = setdiff(unique(geneList),model.genes); + if ~isempty(geneList) + problemGrRules = model.rxns(contains(model.grRules,geneList)); + problemGrRules = strjoin(problemGrRules(:),'; '); + EM=['The reaction(s) "' problemGrRules '" contain the following genes in its "grRules" field, but these are not in the "genes" field:']; + dispEM(EM,throwErrors,geneList); + end + end end if isfield(model,'rxnComps') if ~isnumeric(model.rxnComps) @@ -229,6 +244,26 @@ function checkModelStruct(model,throwErrors,trimWarnings) end end +%Validate format of ids +fields = {'rxns';'mets';'comps';'genes'}; +fieldNames = {'reaction';'metabolite';'compartment';'gene'}; +fieldPrefix = {'R_';'M_';'C_';'G_'}; +for i=1:numel(fields) + try + numIDs = ~startsWith(model.(fields{i}),regexpPattern('^[a-zA-Z_]')); + catch + numIDs = []; + end + if any(numIDs) + EM = ['The following ' fieldNames{i} ' identifiers do not start '... + 'with a letter or _ (conflicting with SBML specifications). '... + 'This does not impact RAVEN functionality, but be aware that '... + 'exportModel will automatically add ' fieldPrefix{i} ... + ' prefixes to all ' fieldNames{i} ' identifiers:']; + dispEM(EM,false,{model.(fields{i}){numIDs}},trimWarnings); + end +end + %Duplicates EM='The following reaction IDs are duplicates:'; dispEM(EM,throwErrors,model.rxns(duplicates(model.rxns)),trimWarnings); @@ -259,10 +294,10 @@ function checkModelStruct(model,throwErrors,trimWarnings) dispEM(EM,false,model.comps(I),trimWarnings); %Contradicting bounds -EM='The following reactions have contradicting bounds:'; +EM='The following reactions have contradicting bounds (lower bound is higher than upper bound):'; dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); -EM='The following reactions have bounds contradicting their reversibility:'; -dispEM(EM,throwErrors,model.rxns(model.lb<0 & model.rev==0),trimWarnings); +EM='The following reactions have lower and upper bounds that indicate reversibility, but are indicated as irreversible in model.rev:'; +dispEM(EM,false,model.rxns(model.lb < 0 & model.ub > 0 & model.rev==0),trimWarnings); %Multiple or no objective functions not allowed in SBML L3V1 FBCv2 if numel(find(model.c))>1 @@ -272,9 +307,6 @@ function checkModelStruct(model,throwErrors,trimWarnings) EM='No objective function found. This might be intended, but results in FBCv2 non-compliant SBML file when exported'; dispEM(EM,false); end - -EM='The following reactions have contradicting bounds:'; -dispEM(EM,throwErrors,model.rxns(model.lb>model.ub),trimWarnings); %Mapping of compartments if isfield(model,'compOutside') @@ -292,8 +324,8 @@ function checkModelStruct(model,throwErrors,trimWarnings) end end end -EM='The following metabolite IDs begin with a number directly followed by space:'; -dispEM(EM,throwErrors,model.mets(I),trimWarnings); +EM='The following metabolite names begin with a number directly followed by space, which could potentially cause problems:'; +dispEM(EM,false,model.metNames(I),trimWarnings); %Non-parseable composition if isfield(model,'metFormulas') diff --git a/core/constructS.m b/core/constructS.m index 87ac8c89..5412cf24 100755 --- a/core/constructS.m +++ b/core/constructS.m @@ -152,7 +152,8 @@ strjoin(unique(metsToS(~metsPresent)),', ')],'') else missingMet = find(~metsPresent); - missingMet = char(strcat(metsToS(missingMet),' (reaction:',rxns(rxnsToS(missingMet)),')\n')); + missingMet = strcat(metsToS(missingMet),' (reaction:',rxns(rxnsToS(missingMet)),')\n'); + missingMet = strjoin(missingMet,''); error(['Could not find the following metabolites (reaction indicated) in the metabolite list: \n' ... missingMet '%s'],''); end diff --git a/core/deleteUnusedGenes.m b/core/deleteUnusedGenes.m index 49cff71f..7d0427f9 100755 --- a/core/deleteUnusedGenes.m +++ b/core/deleteUnusedGenes.m @@ -37,6 +37,10 @@ reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); end +if isfield(reducedModel,'proteins') + reducedModel.proteins=reducedModel.proteins(toKeep); +end + if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); end diff --git a/core/dispEM.m b/core/dispEM.m index 8ece09ff..8ed45e8b 100755 --- a/core/dispEM.m +++ b/core/dispEM.m @@ -33,6 +33,10 @@ function dispEM(string,throwErrors,toList,trimWarnings) end if throwErrors==false errorText=['WARNING: ' string '\n']; + % Wrap text to command window size + sz = get(0, 'CommandWindowSize'); + errorText = textwrap({errorText},sz(1)); + errorText = strjoin(errorText,'\n'); else errorText=[string '\n']; end diff --git a/core/getExchangeRxns.m b/core/getExchangeRxns.m index 459bc095..fa197b21 100755 --- a/core/getExchangeRxns.m +++ b/core/getExchangeRxns.m @@ -1,47 +1,84 @@ function [exchangeRxns, exchangeRxnsIndexes]=getExchangeRxns(model,reactionType) % getExchangeRxns -% Retrieves the exchange reactions from a model +% Retrieves the exchange reactions from a model. Exchange reactions are +% identified by having either no substrates or products. % +% Input: % model a model structure -% reactionType retrieve all reactions ('both'), only production -% ('out'), or only consumption ('in') (optional, default -% 'both') +% reactionType which exchange reactions should be returned +% 'all' all reactions, irrespective of reaction +% bounds +% 'uptake' reactions with bounds that imply that +% only uptake are allowed. Reaction +% direction, upper and lower bounds are +% all considered +% 'excrete' reactions with bounds that imply that +% only excretion are allowed. Reaction +% direction, upper and lower bounds are +% all considered +% 'reverse' reactions with non-zero upper and lower +% bounds that imply that both uptake and +% excretion are allowed +% 'blocked' reactions that have zero upper and lower +% bounds, not allowing any flux +% 'in' reactions where the boundary metabolite +% is the substrate of the reaction, a +% positive flux value would imply uptake, +% but reaction bounds are not considered +% 'out' reactions where the boundary metabolite +% is the substrate of the reaction, a +% positive flux value would imply uptake, +% but reaction bounds are not considered. % +% Output: % exchangeRxns cell array with the IDs of the exchange reactions % exchangeRxnsIndexes vector with the indexes of the exchange reactions % -% Exchange reactions are defined as reactions which involve only products -% or only reactants. If the unconstrained field is present, then that is -% used instead. +% Note: +% The union of 'in' and 'out' equals 'all'. Also, the union of 'uptake', +% 'excrete', 'reverse' and 'blocked' equals all. % % Usage: [exchangeRxns,exchangeRxnsIndexes]=getExchangeRxns(model,reactionType) if nargin<2 - reactionType='both'; + reactionType='all'; else reactionType=char(reactionType); end -hasNoProducts=sparse(numel(model.rxns),1); -hasNoReactants=sparse(numel(model.rxns),1); - -if isfield(model,'unconstrained') - if strcmpi(reactionType,'both') || strcmpi(reactionType,'out') - [~, I]=find(model.S(model.unconstrained~=0,:)>0); - hasNoProducts(I)=true; - end - if strcmpi(reactionType,'both') || strcmpi(reactionType,'in') - [~, I]=find(model.S(model.unconstrained~=0,:)<0); - hasNoReactants(I)=true; - end +% Find exchange reactions +if isfield(model, 'unconstrained') + [~, I]=find(model.S(model.unconstrained~=0,:)>0); + hasNoProd(I)=true; + [~, I]=find(model.S(model.unconstrained~=0,:)<0); + hasNoSubs(I)=true; else - if strcmpi(reactionType,'both') || strcmpi(reactionType,'out') - hasNoProducts=sum((model.S>0))==0; - end - if strcmpi(reactionType,'both') || strcmpi(reactionType,'in') - hasNoReactants=sum((model.S<0))==0; - end + hasNoProd = transpose(find(sum(model.S>0)==0)); + hasNoSubs = transpose(find(sum(model.S<0)==0)); +end +allExch = [hasNoProd; hasNoSubs]; + +switch reactionType + case {'both','all'} % For legacy reasons, 'both' is also allowed + exchangeRxnsIndexes = allExch; + case 'in' + exchangeRxnsIndexes = hasNoSubs; + case 'out' + exchangeRxnsIndexes = hasNoProd; + case 'blocked' + exchangeRxnsIndexes = allExch(model.lb(allExch) == 0 & model.ub(allExch) == 0); + case 'reverse' + exchangeRxnsIndexes = allExch(model.lb(allExch) < 0 & model.ub(allExch) > 0); + case 'uptake' + + exchangeRxnsIndexes = allExch([(model.lb(hasNoProd) < 0 & model.ub(hasNoProd) <= 0); ... + (model.lb(hasNoSubs) >= 0 & model.ub(hasNoSubs) > 0)]); + case 'excrete' + exchangeRxnsIndexes = allExch([(model.lb(hasNoProd) >= 0 & model.ub(hasNoProd) > 0); ... + (model.lb(hasNoSubs) < 0 & model.ub(hasNoSubs) <= 0)]); + otherwise + error('Invalid reactionType specified') end -exchangeRxnsIndexes=find(hasNoProducts(:) | hasNoReactants(:)); -exchangeRxns=model.rxns(exchangeRxnsIndexes); +exchangeRxnsIndexes = sort(exchangeRxnsIndexes); +exchangeRxns = model.rxns(exchangeRxnsIndexes); end diff --git a/core/getModelFromHomology.m b/core/getModelFromHomology.m index 242e6f36..aa356b03 100755 --- a/core/getModelFromHomology.m +++ b/core/getModelFromHomology.m @@ -107,14 +107,17 @@ modelNames=cell(numel(models),1); for i=1:numel(models) modelNames{i}=models{i}.id; - %Gene short names and geneMiriams are often different between species, - %safer not to include them + %Gene short names, geneMiriams and proteins are often different + %between species, safer not to include them if isfield(models{i},'geneShortNames') models{i}=rmfield(models{i},'geneShortNames'); end if isfield(models{i},'geneMiriams') models{i}=rmfield(models{i},'geneMiriams'); end + if isfield(models{i},'proteins') + models{i}=rmfield(models{i},'proteins'); + end %The geneFrom field also loses meaning if the genes are replaced by %orthologs if isfield(models{i},'geneFrom') diff --git a/core/mergeModels.m b/core/mergeModels.m index 19de25c1..29faff96 100755 --- a/core/mergeModels.m +++ b/core/mergeModels.m @@ -492,7 +492,11 @@ if isfield(models{i},'geneShortNames') model.geneShortNames=models{i}.geneShortNames; end - + + if isfield(models{i},'proteins') + model.proteins=models{i}.proteins; + end + if isfield(models{i},'geneMiriams') model.geneMiriams=models{i}.geneMiriams; end @@ -530,7 +534,23 @@ model.geneShortNames=[model.geneShortNames;emptyGeneSN]; end end - + + if isfield(models{i},'proteins') + if isfield(model,'proteins') + model.proteins=[model.proteins;models{i}.proteins(genesToAdd)]; + else + emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); + emptyGeneSN(:)={''}; + model.proteins=[emptyGeneSN;models{i}.proteins(genesToAdd)]; + end + else + if isfield(model,'proteins') + emptyGeneSN=cell(numel(genesToAdd),1); + emptyGeneSN(:)={''}; + model.proteins=[model.proteins;emptyGeneSN]; + end + end + if isfield(models{i},'geneMiriams') if isfield(model,'geneMiriams') model.geneMiriams=[model.geneMiriams;models{i}.geneMiriams(genesToAdd)]; diff --git a/core/permuteModel.m b/core/permuteModel.m index 200bf283..4fad67c2 100755 --- a/core/permuteModel.m +++ b/core/permuteModel.m @@ -132,6 +132,9 @@ if isfield(newModel,'geneShortNames') newModel.geneShortNames=newModel.geneShortNames(indexes); end + if isfield(newModel,'proteins') + newModel.proteins=newModel.proteins(indexes); + end if isfield(newModel,'rxnGeneMat') newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); end diff --git a/core/predictLocalization.m b/core/predictLocalization.m index bab49bb0..2bfff833 100755 --- a/core/predictLocalization.m +++ b/core/predictLocalization.m @@ -199,6 +199,9 @@ if isfield(model,'geneMiriams') model.geneMiriams=[model.geneMiriams;{[]}]; end + if isfield(model,'proteins') + model.proteins=[model.proteins;{[]}]; + end if isfield(model,'geneFrom') model.geneFrom=[model.geneFrom;{{'FAKE'}}]; end @@ -258,6 +261,9 @@ if isfield(model,'geneShortNames') model.geneShortNames=[model.geneShortNames;{''}]; end + if isfield(model,'proteins') + model.proteins=[model.proteins;{''}]; + end if isfield(model,'geneFrom') model.geneFrom=[model.geneFrom;{'COMPLEX'}]; end @@ -759,6 +765,9 @@ if isfield(outModel,'geneShortNames') outModel.geneShortNames(I)=[]; end +if isfield(outModel,'proteins') + outModel.proteins(I)=[]; +end outModel.rxnGeneMat(:,I)=[]; %Fix grRules and reconstruct rxnGeneMat diff --git a/core/printOrange.m b/core/printOrange.m index 21625a08..b31b5639 100755 --- a/core/printOrange.m +++ b/core/printOrange.m @@ -1,4 +1,4 @@ -function printOrange(stringToPrint) +function orangeString = printOrange(stringToPrint) % printOrange % Print orange-colored stringToPrint to the MATLAB Command Window. Only % if MATLAB is open with GUI, does not work with command-line MATLAB. @@ -7,10 +7,18 @@ function printOrange(stringToPrint) % stringToPrint string that should be printed in orange color % % Usage: printOrange(stringToPrint) + try useDesktop = usejava('desktop'); catch, useDesktop = false; end if useDesktop - fprintf(['[\b' stringToPrint,']\b']) + orangeString = ['[\b' stringToPrint,']\b']; else - fprintf(stringToPrint) + orangeString = stringToPrint; +end +if nargout < 1 + % Wrap text to command window size + sz = get(0, 'CommandWindowSize'); + orangeString = textwrap({orangeString},sz(1)); + orangeString = strjoin(orangeString,'\n'); + fprintf(orangeString); end end diff --git a/core/removeReactions.m b/core/removeReactions.m index d2af5a4a..4255c6e2 100755 --- a/core/removeReactions.m +++ b/core/removeReactions.m @@ -129,6 +129,10 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); end + + if isfield(reducedModel,'proteins') + reducedModel.proteins=reducedModel.proteins(toKeep); + end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); diff --git a/core/simplifyModel.m b/core/simplifyModel.m index 2dbc72ec..d99f7ae6 100755 --- a/core/simplifyModel.m +++ b/core/simplifyModel.m @@ -221,6 +221,9 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames={}; end + if isfield(reducedModel,'proteins') + reducedModel.proteins={}; + end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams={}; end diff --git a/doc/INIT/getINITModel.html b/doc/INIT/getINITModel.html index 66b87a44..a1079434 100644 --- a/doc/INIT/getINITModel.html +++ b/doc/INIT/getINITModel.html @@ -544,77 +544,80 @@