diff --git a/AcetaldehydeNISTRefMixed2.csv b/AcetaldehydeNISTRefMixed2.csv deleted file mode 100644 index 92eed9bda..000000000 --- a/AcetaldehydeNISTRefMixed2.csv +++ /dev/null @@ -1,60 +0,0 @@ -Source:,Measured,NIST,NIST,NIST,NIST,NIST,NIST,NIST,NIST -Molecules,Acetaldehyde,(E) 2-Butenal (Crotonaldehyde,CO,CO2,Ethylene (Ethene),Ethanol ,Crotyl Alcohol ,H2,H2O -Electron Numbers,24,38,14,22,16,26,40,2,10 -Molecular Mass,44.0526,70.0898,28.01,44.01,28.0532,46.0684,72.1057,2.01588,18 -1,0,0,0,0,0,0,0,424.7389898,0 -2,0,0,0,0,19.86799808,0,0,19866.01128,0 -12,0,0,781.7557194,1448.743046,83.16550206,33.26620082,44.90937111,0,0 -13,0,0,0,0,147.0941008,227.1786667,88.25646045,0,0 -14,0,0,0,0,337.2745273,232.8800307,362.9716341,0,0 -15,0,0,0,0,47.35142646,1048.044906,953.3420526,0,0 -16,0,0,263.7233818,1490.812764,0,0,83.77095656,0,139.6182609 -17,0,0,0,0,0,106.7407994,93.01698234,0,3235.771091 -18,0,0,0,0,0,82.44744969,98.93693963,0,14988.94635 -19,0,0,0,0,0,411.2019015,103.1689359,0,73.69209704 -20,0,0,0,0,0,0,0,0,43.47782297 -21,0,0,0,0,0,0,0,0,0 -22,0,0,0,266.3729108,0,0,0,0,0 -24,0,0,0,0,312.1445707,82.78616876,40.71450923,0,0 -25,0,6.678381267,0,0,1043.163154,328.5763583,269.8066032,0,0 -26,1070,23.66682522,0,0,6961.991086,1295.101269,1429.213279,0,0 -27,774,63.43502845,0,0,8073.078315,2901.181607,5294.882987,0,0 -28,6103,0,12748.55137,1250.757966,12748.55137,439.8690093,1802.825447,0,0 -29,9999,48.98377684,150.7193134,12.55994278,288.8786839,3749.14292,9122.286441,0,0 -30,0,2.475254689,0,0,12.37627345,1004.953404,460.3973722,0,0 -31,0,2.43976367,0,0,0,12197.59847,3536.43744,0,0 -32,0,0,0,0,0,0,151.5474765,0,0 -33,0,1.186255089,0,0,0,36.77390776,0,0,0 -34,0,0,0,0,0,0,30.42972015,0,0 -36,0,4.561902603,0,0,0,0,123.1713703,0,0 -37,0,20.27625635,0,0,0,0,767.1183653,0,0 -38,0,32.27883123,0,0,0,0,1344.58028,0,0 -39,0,106.7280359,0,0,0,0,5758.912782,0,0 -40,0,29.37968112,0,0,0,0,788.8988448,0,0 -41,546,107.6497409,0,0,0,147.4948946,3357.931213,0,0 -42,0,31.97085678,0,0,0,505.1395372,866.4102188,0,0 -43,0,4.221627226,0,0,0,1207.385387,4339.832788,0,0 -44,9998,0,0,10456.35237,0,74.24752658,1127.307516,0,0 -45,0,0,0,124.4033088,0,5337.93864,319.3018258,0,0 -46,0,0,0,41.1308037,0,2224.14821,0,0,0 -47,0,0,0,0,0,74.49411797,0,0,0 -48,0,1.013286071,0,0,0,0,46.61115925,0,0 -49,0,4.026904784,0,0,0,0,229.5335727,0,0 -50,0,5.003938727,0,0,0,0,670.5277894,0,0 -51,0,3.981882875,0,0,0,0,595.2914899,0,0 -52,0,1.981550233,0,0,0,0,211.0350998,0,0 -53,0,2.960102815,0,0,0,0,1332.046267,0,0 -54,0,0.983248184,0,0,0,0,897.7055923,0,0 -55,0,4.902084273,0,0,0,0,852.9626634,0,0 -56,0,0,0,0,0,0,44.99751964,0,0 -57,0,0,0,0,0,0,9765.208054,0,0 -58,0,0,0,0,0,0,316.1110568,0,0 -59,0,0,0,0,0,0,33.1603964,0,0 -66,0,0.990286219,0,0,0,0,0,0,0 -67,0,0.994911979,0,0,0,0,0,0,0 -68,0,5.000795817,0,0,0,0,32.00509323,0,0 -69,0,65.39180514,0,0,0,0,182.0910266,0,0 -70,0,83.0264599,0,0,0,0,139.7274569,0,0 -71,0,8.157034082,0,0,0,0,709.6619651,0,0 -72,0,1.027362141,0,0,0,0,2629.019718,0,0 -73,0,0,0,0,0,0,113.928809,0,0 diff --git a/AcetaldehydeNISTRefMixed2.tsv b/AcetaldehydeNISTRefMixed2.tsv new file mode 100644 index 000000000..b6b25555e Binary files /dev/null and b/AcetaldehydeNISTRefMixed2.tsv differ diff --git a/DefaultUserInput.py b/DefaultUserInput.py index 726c10b1b..83ec3ed16 100644 --- a/DefaultUserInput.py +++ b/DefaultUserInput.py @@ -13,7 +13,7 @@ #//Input Files// UserChoices['inputFiles'] = {} #initialize the inputFiles container -UserChoices['inputFiles']['referenceFileNamesList'] = ['AcetaldehydeNISTRefMixed2.csv'] #enter the file name of the file containing reference information +UserChoices['inputFiles']['referenceFileNamesList'] = ['AcetaldehydeNISTRefMixed2.tsv'] #enter the file name of the file containing reference information. tsv is tab-separated, csv is comma separated. tsv supports commas in molecule names. UserChoices['inputFiles']['referenceFormsList'] = 'xyyy' #form is either 'xyyy' or 'xyxy' (if using reference pattern time chooser enter as list with forms for each individual reference file ['xyyy','xyyy','xyyy']) UserChoices['inputFiles']['referencePatternTimeRanges'] = [] #Leave empty if not using reference pattern time chooser [] UserChoices['inputFiles']['collectedFileName'] = '2-CrotAcetExp#2.csv' #enter the file name with raw mass spectrometer data diff --git a/MSRESOLVE.py b/MSRESOLVE.py index 8f1e632fe..cafd36a8b 100644 --- a/MSRESOLVE.py +++ b/MSRESOLVE.py @@ -2545,7 +2545,7 @@ def IterativePrepareNextIterationInputFiles(ExperimentDataFullCopy): #Now going to overwrite parallelized variables with their original versions if they were set to length of chosen molecules. delimitedStringOfVariablesToUnparallelize = 'moleculeLikelihoods, sensitivityValues, referenceValueThreshold, referenceSignificantFragmentThresholds' - listOfVariablesToUnparallelize = delimitedStringOfVariablesToUnparallelize.split(", ") #Note that we are using ", " as the delimeter, not just "," + listOfVariablesToUnparallelize = delimitedStringOfVariablesToUnparallelize.split(", ") #Note that we are using ", " as the delimiter, not just "," for variable in listOfVariablesToUnparallelize: G.nextUserInputModule.__dict__[variable]=G.beforeParsedGDict[variable] @@ -2603,6 +2603,22 @@ def IterativeAnalysisPostProcessing(ExperimentData, simulateddata, mass_fragment #These functions read in the experimental data file and the reference file. The #returned variables can then be used to initialize the respective classes. +#a small helper function to check if an extension exists in a filename and to return the delimiter based on that. +def getDelimiterFromExtension(filename): + if ".tsv" in filename: + delimiter = '\t' + elif ".tab" in filename: + delimiter = '\t' + elif ".txt" in filename: + delimiter = '\t' + elif ".skv" in filename: + delimiter = ';' + elif ".csv" in filename: + delimiter = ',' #it could be something else, but we will assume that a csv + else: + delimiter = '\t' #for MSRESOLVE, this is now the default delimiter. + return delimiter + def readDataFile(collectedFileName): #read the csv file into a dataframe. dataFrame means "dataframe" and is a pandas object. @@ -2703,8 +2719,14 @@ def FromXYXYtoXYYY(provided_reference_patterns): provided_reference_patterns = reference_holder return provided_reference_patterns - #read the csv file into a dataframe - dataFrame = pandas.read_csv('%s' %referenceFileName, header = None) + #read the csv file into a dataframe + if '.csv' in referenceFileName: + dataFrame = pandas.read_csv('%s' %referenceFileName, header = None) + elif '.tsv' in referenceFileName: + try: #no easy way to assess utf16 vs utf8, so try both. + dataFrame = pandas.read_csv('%s' %referenceFileName, header = None, delimiter = '\t', encoding = 'utf8') #need to specify encoding for cases of tab delimited files. + except: #no easy way to assess utf16 vs utf8, so try both. + dataFrame = pandas.read_csv('%s' %referenceFileName, header = None, delimiter = '\t', encoding = 'utf16') #need to use utf16 for some cases of tab delimited files. if form == 'xyyy': for rowIndex in range(len(dataFrame)): #Loop through each row and check the abscissa value @@ -2714,20 +2736,20 @@ def FromXYXYtoXYYY(provided_reference_patterns): reference = dfreference.values #convert to matrix provided_reference_patterns = reference.astype(float) #convert the matrix to floats provided_reference_patterns = DataFunctions.removeColumnsWithAllvaluesBelowZeroOrThreshold(provided_reference_patterns,startingRowIndex=1) #clear row of zeros - break #exit the for loop + break #exit the for loop since the first non-header row has been reached. except: #Otherwise the row consists of other information if (dataFrame.iloc[rowIndex][0] == 'SourceOfFragmentationPatterns') or (dataFrame.iloc[rowIndex][0] == 'Source:'): #if the abscissa titles the source (both old and new reference files) dfSourceOfFragmentationPatterns = dataFrame.iloc[rowIndex][1:] #select the row of names SourceOfFragmentationPatterns = dfSourceOfFragmentationPatterns.values #convert to matrix - SourceOfFragmentationPatterns = SourceOfFragmentationPatterns.astype(numpy.str) #save as class object with type string + SourceOfFragmentationPatterns = SourceOfFragmentationPatterns.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'sourceOfIonizationData': dfsourceOfIonizationData = dataFrame.iloc[rowIndex][1:] #Select the row of names sourceOfIonizationData = dfsourceOfIonizationData.values #convert to matrix - sourceOfIonizationData = sourceOfIonizationData.astype(numpy.str) #save as class object with type string + sourceOfIonizationData = sourceOfIonizationData.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'Molecules': #if the abscissa titles the molecule names dfmolecules = dataFrame.iloc[rowIndex][1:] #select the row of names molecules = dfmolecules.values #convert to matrix - molecules = molecules.astype(numpy.str) #save as class object with type string + molecules = molecules.astype(str) #save as class object with type string molecules = list(molecules) for moleculeIndex in range(len(molecules)): molecules[moleculeIndex] = molecules[moleculeIndex].strip()#remove leading and trailing whitespaces. @@ -2742,7 +2764,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): elif dataFrame.iloc[rowIndex][0] == 'moleculeIonizationType': dfmoleculeIonizationType = dataFrame.iloc[rowIndex][1:] #select row of names moleculeIonizationType = dfmoleculeIonizationType.values #convert to matrix - moleculeIonizationType = moleculeIonizationType.astype(numpy.str) #save as class object with type string + moleculeIonizationType = moleculeIonizationType.astype(str) #save as class object with type string elif (dataFrame.iloc[rowIndex][0] == 'relativeIonizationEfficiencies') or (dataFrame.iloc[rowIndex][0] == 'knownIonizationFactorsRelativeToN2'): dfrelativeIonizationEfficiencies = dataFrame.iloc[rowIndex][1:] #select row of names relativeIonizationEfficiencies = dfrelativeIonizationEfficiencies.values #convert to matrix @@ -2799,7 +2821,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # molecules = dfmolecules.values # #save as class object with type string -# molecules = molecules.astype(numpy.str) +# molecules = molecules.astype(str) # # '''generate list of molecular weights''' # #select row of names @@ -2815,7 +2837,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # sourceInfo = dfsourceInfo.values # #save as class object with type string -# sourceInfo = sourceInfo.astype(numpy.str) +# sourceInfo = sourceInfo.astype(str) '''list of massfragments monitored is not part of reference file''' mass_fragment_numbers_monitored = None @@ -2837,15 +2859,15 @@ def FromXYXYtoXYYY(provided_reference_patterns): if (dataFrame.iloc[rowIndex][0] == 'SourceOfFragmentationPatterns') or (dataFrame.iloc[rowIndex][0] == 'Source:'): #if the abscissa titles the source (both old and new reference files) dfSourceOfFragmentationPatterns = dataFrame.iloc[rowIndex][1::2] #select the row of names SourceOfFragmentationPatterns = dfSourceOfFragmentationPatterns.values #convert to matrix - SourceOfFragmentationPatterns = SourceOfFragmentationPatterns.astype(numpy.str) #save as class object with type string + SourceOfFragmentationPatterns = SourceOfFragmentationPatterns.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'sourceOfIonizationData': dfsourceOfIonizationData = dataFrame.iloc[rowIndex][1::2] #Select the row of names sourceOfIonizationData = dfsourceOfIonizationData.values #convert to matrix - sourceOfIonizationData = sourceOfIonizationData.astype(numpy.str) #save as class object with type string + sourceOfIonizationData = sourceOfIonizationData.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'Molecules': #if the abscissa titles the molecule names dfmolecules = dataFrame.iloc[rowIndex][1::2] #select the row of names molecules = dfmolecules.values #convert to matrix - molecules = molecules.astype(numpy.str) #save as class object with type string + molecules = molecules.astype(str) #save as class object with type string molecules = list(molecules) for moleculeIndex in range(len(molecules)): molecules[moleculeIndex] = molecules[moleculeIndex].strip()#remove leading and trailing whitespaces. @@ -2860,7 +2882,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): elif dataFrame.iloc[rowIndex][0] == 'moleculeIonizationType': dfmoleculeIonizationType = dataFrame.iloc[rowIndex][1::2] #select row of names moleculeIonizationType = dfmoleculeIonizationType.values #convert to matrix - moleculeIonizationType = moleculeIonizationType.astype(numpy.str) #save as class object with type string + moleculeIonizationType = moleculeIonizationType.astype(str) #save as class object with type string elif (dataFrame.iloc[rowIndex][0] == 'relativeIonizationEfficiencies') or (dataFrame.iloc[rowIndex][0] == 'knownIonizationFactorsRelativeToN2'): dfrelativeIonizationEfficiencies = dataFrame.iloc[rowIndex][1::2] #select row of names relativeIonizationEfficiencies = dfrelativeIonizationEfficiencies.values #convert to matrix @@ -2919,7 +2941,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # molecules = dfmolecules.values # #save as class object with type string -# molecules = molecules.astype(numpy.str) +# molecules = molecules.astype(str) # # '''generate list of molecular weights''' # #select row of names @@ -2935,7 +2957,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # sourceInfo = dfsourceInfo.values # #save as class object with type string -# sourceInfo = sourceInfo.astype(numpy.str) +# sourceInfo = sourceInfo.astype(str) '''list of massfragments monitored is not part of reference file''' mass_fragment_numbers_monitored = None @@ -3096,6 +3118,8 @@ def __init__(self, provided_reference_patterns, electronnumbers, molecules, mole #class object variable created to allow class to be used separately from the program. self.ExportAtEachStep = '' self.iterationSuffix = '' + if type(referenceFileName) != type(None): + self.referenceFileNameExtension = self.referenceFileName.split(".")[1] #This loops through the molecules, and removes whitespaces from before and after the molecule's names. for moleculeIndex, moleculeName in enumerate(self.molecules): self.molecules[moleculeIndex] = moleculeName.strip() @@ -3136,6 +3160,24 @@ def __init__(self, provided_reference_patterns, electronnumbers, molecules, mole self.populateIonizationEfficiencies(self.AllMID_ObjectsDict) self.exportIonizationInfo() + #a small helper function to check if an extension exists in a filename and to return the delimiter based on that. + def getDelimiterFromExtension(self, filename=''): + if filename == '': + filename = self.referenceFileName + if ".tsv" in filename: + delimiter = '\t' + elif ".tab" in filename: + delimiter = '\t' + elif ".txt" in filename: + delimiter = '\t' + elif ".skv" in filename: + delimiter = ';' + elif ".csv" in filename: + delimiter = ',' #it could be something else, but we will assume that a csv + else: + delimiter = '\t' #for MSRESOLVE, this is now the default delimiter. + return delimiter + #This function allows adding molecules to an existing reference patterns. When using TuningCorrector it is used to create MixedReference patterns. #Though these variable names are plural, they are expected to be lists of one. "molecules" is supposed to be a list of variable names. #provided_reference_patterns should be in an XYYY format. If starting with XYXY data, is okay to feed a single "XY" at a time and to do so repeatedly in a loop. @@ -3231,7 +3273,8 @@ def ExportFragmentationPatterns(self, verbose=True): print(self.runTimeAtExport[savePoint]) if self.ExportAtEachStep == 'yes': #inserting the data for a particular savePoint - filename = 'Exported%s%s.csv'%(savePoint, self.labelToExport[savePoint]) + delimiter = getDelimiterFromExtension(self.referenceFileNameExtension) + filename = 'Exported%s%s.%s'%(savePoint, self.labelToExport[savePoint], self.referenceFileNameExtension) data = self.dataToExport[savePoint] colIndex = ['%s'% y for y in self.moleculesToExport[savePoint]] #colIndex = ['%s'% y for y in self.molecules] @@ -5569,10 +5612,10 @@ def ExportXYYYData(outputFileName, data, dataHeader, abscissaHeader = 'Mass', fi if dataType == 'Experiment': extraLine = len(data[0,1:]) -#If future applications of Export XYYY are desired, the new formats can be -#specified by additional keywords and if statements. + #If future applications of Export XYYY are desired, the new formats can be + #specified by additional keywords and if statements. -#if iterative analysis is being used and the suffix is wanted + #if iterative analysis is being used and the suffix is wanted if not fileSuffix =='': #then the filename will have a suffix attached outputFileName = outputFileName[:-4] + fileSuffix + outputFileName[-4:] @@ -5609,7 +5652,9 @@ def ExportXYYYData(outputFileName, data, dataHeader, abscissaHeader = 'Mass', fi lineToInsert = numpy.array(lineToInsert.split(',')) fullArrayToExport = numpy.vstack((lineToInsert, fullArrayToExport)) #save the file to the correct name - numpy.savetxt(filename, fullArrayToExport, delimiter = ',', fmt ="%s") + + delimiter = getDelimiterFromExtension(filename) + numpy.savetxt(filename, fullArrayToExport, delimiter = delimiter, fmt ="%s") '''This function inserts rows of percentages into arrays of data''' diff --git a/UnitTests/BestMassFragmentChooser/MSResolveDependenciesForExtentOfSLSsolvable.py b/UnitTests/BestMassFragmentChooser/MSResolveDependenciesForExtentOfSLSsolvable.py index 0901eb342..1f425e98a 100644 --- a/UnitTests/BestMassFragmentChooser/MSResolveDependenciesForExtentOfSLSsolvable.py +++ b/UnitTests/BestMassFragmentChooser/MSResolveDependenciesForExtentOfSLSsolvable.py @@ -307,26 +307,26 @@ def FromXYXYtoXYYY(provided_reference_patterns): float(dataFrame.iloc[rowIndex][0]) #if successful, then this rowIndex is the first index of provided reference intensities dfreference = dataFrame.iloc[rowIndex:][:] #remove the rows of headers reference = dfreference.values #convert to matrix - provided_reference_patterns = reference.astype(numpy.float) #convert the matrix to floats + provided_reference_patterns = reference.astype(float) #convert the matrix to floats provided_reference_patterns = DataFunctions.removeColumnsWithAllvaluesBelowZeroOrThreshold(provided_reference_patterns,startingRowIndex=1) #clear row of zeros break #exit the for loop except: #Otherwise the row consists of other information if (dataFrame.iloc[rowIndex][0] == 'SourceOfFragmentationPatterns') or (dataFrame.iloc[rowIndex][0] == 'Source:'): #if the abscissa titles the source (both old and new reference files) dfSourceOfFragmentationPatterns = dataFrame.iloc[rowIndex][1:] #select the row of names SourceOfFragmentationPatterns = dfSourceOfFragmentationPatterns.values #convert to matrix - SourceOfFragmentationPatterns = SourceOfFragmentationPatterns.astype(numpy.str) #save as class object with type string + SourceOfFragmentationPatterns = SourceOfFragmentationPatterns.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'sourceOfIonizationData': dfsourceOfIonizationData = dataFrame.iloc[rowIndex][1:] #Select the row of names sourceOfIonizationData = dfsourceOfIonizationData.values #convert to matrix - sourceOfIonizationData = sourceOfIonizationData.astype(numpy.str) #save as class object with type string + sourceOfIonizationData = sourceOfIonizationData.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'Molecules': #if the abscissa titles the molecule names dfmolecules = dataFrame.iloc[rowIndex][1:] #select the row of names molecules = dfmolecules.values #convert to matrix - molecules = molecules.astype(numpy.str) #save as class object with type string + molecules = molecules.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'Electron Numbers': #if the abscissa titles the electron numbers dfelectronnumbers = dataFrame.iloc[rowIndex][1:] #select the row of names electronnumbers = dfelectronnumbers.values #convert to matrix - electronnumbers = electronnumbers.astype(numpy.int) #save as class object with type int + electronnumbers = electronnumbers.astype(int) #save as class object with type int elif dataFrame.iloc[rowIndex][0] == 'Molecular Mass': #if the abscissa titles the molecular weights dfmolecularWeights = dataFrame.iloc[rowIndex][1:] #select row of names molecularWeights = dfmolecularWeights.values #convert to matrix @@ -334,7 +334,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): elif dataFrame.iloc[rowIndex][0] == 'moleculeIonizationType': dfmoleculeIonizationType = dataFrame.iloc[rowIndex][1:] #select row of names moleculeIonizationType = dfmoleculeIonizationType.values #convert to matrix - moleculeIonizationType = moleculeIonizationType.astype(numpy.str) #save as class object with type string + moleculeIonizationType = moleculeIonizationType.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'relativeIonizationEfficiencies': dfrelativeIonizationEfficiencies = dataFrame.iloc[rowIndex][1:] #select row of names relativeIonizationEfficiencies = dfrelativeIonizationEfficiencies.values #convert to matrix @@ -343,7 +343,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): relativeIonizationEfficiencies[index] = float(relativeIonizationEfficiencies[index]) except: #if not possible, the value is probably None or 'unknown' so leave as a string pass -# relativeIonizationEfficiencies = relativeIonizationEfficiencies.astype(numpy.float) #save as class object with type float +# relativeIonizationEfficiencies = relativeIonizationEfficiencies.astype(float) #save as class object with type float sourceOfIonizationData = None #To remove MSRESOLVE dependencies. @@ -379,7 +379,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # reference = dfreference.values # #convert the matrix to floats -# provided_reference_patterns = reference.astype(numpy.float) +# provided_reference_patterns = reference.astype(float) # #clear rows of zeros # provided_reference_patterns=DataFunctions.removeColumnsWithAllvaluesBelowZeroOrThreshold(provided_reference_patterns,startingRowIndex=1) # @@ -389,7 +389,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # electronnumbers = dfelectronnumbers.values # #save as class object with type int -# electronnumbers = electronnumbers.astype(numpy.int32) +# electronnumbers = electronnumbers.astype(int32) # # '''generate list of molecule names''' # #select row of names @@ -397,7 +397,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # molecules = dfmolecules.values # #save as class object with type string -# molecules = molecules.astype(numpy.str) +# molecules = molecules.astype(str) # # '''generate list of molecular weights''' # #select row of names @@ -405,7 +405,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # molecularWeights = dfmolecularWeights.values # #save as class object with type float -# molecularWeights = molecularWeights.astype(numpy.float) +# molecularWeights = molecularWeights.astype(float) # # '''generate list of source information''' # #select row of names @@ -413,7 +413,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # sourceInfo = dfsourceInfo.values # #save as class object with type string -# sourceInfo = sourceInfo.astype(numpy.str) +# sourceInfo = sourceInfo.astype(str) '''list of massfragments monitored is not part of reference file''' mass_fragment_numbers_monitored = None @@ -424,7 +424,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): float(dataFrame.iloc[rowIndex][0]) #if successful, then this rowIndex is the first index of provided reference intensities dfreference = dataFrame.iloc[rowIndex:][:] #remove the rows of headers reference = dfreference.values #convert to matrix - provided_reference_patterns = reference.astype(numpy.float) #convert the matrix to floats + provided_reference_patterns = reference.astype(float) #convert the matrix to floats print("Warning: FromXYXYtoXYYY for converting data patterns has not been tested in a long time. A unit test should be created and checked prior to use. Then this warning updated (this warning appears in two parts of the code." ) provided_reference_patterns = FromXYXYtoXYYY(provided_reference_patterns) #convert reference from XYXY to XYYY provided_reference_patterns = DataFunctions.removeColumnsWithAllvaluesBelowZeroOrThreshold(provided_reference_patterns,startingRowIndex=1) #clear row of zeros @@ -433,26 +433,26 @@ def FromXYXYtoXYYY(provided_reference_patterns): if dataFrame.iloc[rowIndex][0] == 'Source:': #if the abscissa titles the source dfsourceInfo = dataFrame.iloc[rowIndex][1::2] #select the row of names sourceInfo = dfsourceInfo.values #convert to matrix - sourceInfo = sourceInfo.astype(numpy.str) #save as class object with type string + sourceInfo = sourceInfo.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'Molecules': #if the abscissa titles the molecule names dfmolecules = dataFrame.iloc[rowIndex][1::2] #select the row of names molecules = dfmolecules.values #convert to matrix - molecules = molecules.astype(numpy.str) #save as class object with type string + molecules = molecules.astype(str) #save as class object with type string elif dataFrame.iloc[rowIndex][0] == 'Electron Numbers': #if the abscissa titles the electron numbers dfelectronnumbers = dataFrame.iloc[rowIndex][1::2] #select the row of names electronnumbers = dfelectronnumbers.values #convert to matrix - electronnumbers = electronnumbers.astype(numpy.int32) #save as class object with type int + electronnumbers = electronnumbers.astype(int32) #save as class object with type int elif dataFrame.iloc[rowIndex][0] == 'Molecular Mass': #if the abscissa titles the molecular weights dfmolecularWeights = dataFrame.iloc[rowIndex][1::2] #select row of names molecularWeights = dfmolecularWeights.values #convert to matrix - molecularWeights = molecularWeights.astype(numpy.float) #save as class object with type float + molecularWeights = molecularWeights.astype(float) #save as class object with type float # '''generate reference matrix''' # #remove top 4 rows # dfreference = dataFrame.iloc[4:][:] # #convert to matrix # reference = dfreference.values # #convert the matrix to floats -# provided_reference_patterns = reference.astype(numpy.float) +# provided_reference_patterns = reference.astype(float) # #convert reference from XYXY to XYYY # print("Warning: FromXYXYtoXYYY for converting data patterns has not been tested in a long time. A unit test should be created and checked prior to use. Then this warning updated (this warning appears in two parts of the code." ) # provided_reference_patterns=FromXYXYtoXYYY(provided_reference_patterns) @@ -465,7 +465,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # electronnumbers = dfelectronnumbers.values # #save as class object with type int -# electronnumbers = electronnumbers.astype(numpy.int32) +# electronnumbers = electronnumbers.astype(int32) # # '''generate list of molecule names''' # #select matrix of names @@ -473,7 +473,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # molecules = dfmolecules.values # #save as class object with type string -# molecules = molecules.astype(numpy.str) +# molecules = molecules.astype(str) # # '''generate list of molecular weights''' # #select row of names @@ -481,7 +481,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # molecularWeights = dfmolecularWeights.values # #save as class object with type float -# molecularWeights = molecularWeights.astype(numpy.float) +# molecularWeights = molecularWeights.astype(float) # # '''generate list of source information''' # #select row of names @@ -489,7 +489,7 @@ def FromXYXYtoXYYY(provided_reference_patterns): # #convert to matrix # sourceInfo = dfsourceInfo.values # #save as class object with type string -# sourceInfo = sourceInfo.astype(numpy.str) +# sourceInfo = sourceInfo.astype(str) '''list of massfragments monitored is not part of reference file''' mass_fragment_numbers_monitored = None diff --git a/UnitTests/excludeMoleculesIfSignificantFragmentNotObserved/AcetaldehydeNISTRefMixed2.csv b/UnitTests/excludeMoleculesIfSignificantFragmentNotObserved/AcetaldehydeNISTRefMixed2.csv deleted file mode 100644 index 92eed9bda..000000000 --- a/UnitTests/excludeMoleculesIfSignificantFragmentNotObserved/AcetaldehydeNISTRefMixed2.csv +++ /dev/null @@ -1,60 +0,0 @@ -Source:,Measured,NIST,NIST,NIST,NIST,NIST,NIST,NIST,NIST -Molecules,Acetaldehyde,(E) 2-Butenal (Crotonaldehyde,CO,CO2,Ethylene (Ethene),Ethanol ,Crotyl Alcohol ,H2,H2O -Electron Numbers,24,38,14,22,16,26,40,2,10 -Molecular Mass,44.0526,70.0898,28.01,44.01,28.0532,46.0684,72.1057,2.01588,18 -1,0,0,0,0,0,0,0,424.7389898,0 -2,0,0,0,0,19.86799808,0,0,19866.01128,0 -12,0,0,781.7557194,1448.743046,83.16550206,33.26620082,44.90937111,0,0 -13,0,0,0,0,147.0941008,227.1786667,88.25646045,0,0 -14,0,0,0,0,337.2745273,232.8800307,362.9716341,0,0 -15,0,0,0,0,47.35142646,1048.044906,953.3420526,0,0 -16,0,0,263.7233818,1490.812764,0,0,83.77095656,0,139.6182609 -17,0,0,0,0,0,106.7407994,93.01698234,0,3235.771091 -18,0,0,0,0,0,82.44744969,98.93693963,0,14988.94635 -19,0,0,0,0,0,411.2019015,103.1689359,0,73.69209704 -20,0,0,0,0,0,0,0,0,43.47782297 -21,0,0,0,0,0,0,0,0,0 -22,0,0,0,266.3729108,0,0,0,0,0 -24,0,0,0,0,312.1445707,82.78616876,40.71450923,0,0 -25,0,6.678381267,0,0,1043.163154,328.5763583,269.8066032,0,0 -26,1070,23.66682522,0,0,6961.991086,1295.101269,1429.213279,0,0 -27,774,63.43502845,0,0,8073.078315,2901.181607,5294.882987,0,0 -28,6103,0,12748.55137,1250.757966,12748.55137,439.8690093,1802.825447,0,0 -29,9999,48.98377684,150.7193134,12.55994278,288.8786839,3749.14292,9122.286441,0,0 -30,0,2.475254689,0,0,12.37627345,1004.953404,460.3973722,0,0 -31,0,2.43976367,0,0,0,12197.59847,3536.43744,0,0 -32,0,0,0,0,0,0,151.5474765,0,0 -33,0,1.186255089,0,0,0,36.77390776,0,0,0 -34,0,0,0,0,0,0,30.42972015,0,0 -36,0,4.561902603,0,0,0,0,123.1713703,0,0 -37,0,20.27625635,0,0,0,0,767.1183653,0,0 -38,0,32.27883123,0,0,0,0,1344.58028,0,0 -39,0,106.7280359,0,0,0,0,5758.912782,0,0 -40,0,29.37968112,0,0,0,0,788.8988448,0,0 -41,546,107.6497409,0,0,0,147.4948946,3357.931213,0,0 -42,0,31.97085678,0,0,0,505.1395372,866.4102188,0,0 -43,0,4.221627226,0,0,0,1207.385387,4339.832788,0,0 -44,9998,0,0,10456.35237,0,74.24752658,1127.307516,0,0 -45,0,0,0,124.4033088,0,5337.93864,319.3018258,0,0 -46,0,0,0,41.1308037,0,2224.14821,0,0,0 -47,0,0,0,0,0,74.49411797,0,0,0 -48,0,1.013286071,0,0,0,0,46.61115925,0,0 -49,0,4.026904784,0,0,0,0,229.5335727,0,0 -50,0,5.003938727,0,0,0,0,670.5277894,0,0 -51,0,3.981882875,0,0,0,0,595.2914899,0,0 -52,0,1.981550233,0,0,0,0,211.0350998,0,0 -53,0,2.960102815,0,0,0,0,1332.046267,0,0 -54,0,0.983248184,0,0,0,0,897.7055923,0,0 -55,0,4.902084273,0,0,0,0,852.9626634,0,0 -56,0,0,0,0,0,0,44.99751964,0,0 -57,0,0,0,0,0,0,9765.208054,0,0 -58,0,0,0,0,0,0,316.1110568,0,0 -59,0,0,0,0,0,0,33.1603964,0,0 -66,0,0.990286219,0,0,0,0,0,0,0 -67,0,0.994911979,0,0,0,0,0,0,0 -68,0,5.000795817,0,0,0,0,32.00509323,0,0 -69,0,65.39180514,0,0,0,0,182.0910266,0,0 -70,0,83.0264599,0,0,0,0,139.7274569,0,0 -71,0,8.157034082,0,0,0,0,709.6619651,0,0 -72,0,1.027362141,0,0,0,0,2629.019718,0,0 -73,0,0,0,0,0,0,113.928809,0,0 diff --git a/UnitTests/excludeMoleculesIfSignificantFragmentNotObserved/AcetaldehydeNISTRefMixed2.tsv b/UnitTests/excludeMoleculesIfSignificantFragmentNotObserved/AcetaldehydeNISTRefMixed2.tsv new file mode 100644 index 000000000..5aaecbca1 --- /dev/null +++ b/UnitTests/excludeMoleculesIfSignificantFragmentNotObserved/AcetaldehydeNISTRefMixed2.tsv @@ -0,0 +1,60 @@ +Source: Measured NIST NIST NIST NIST NIST NIST NIST NIST +Molecules Acetaldehyde (E) 2-Butenal (Crotonaldehyde CO CO2 Ethylene (Ethene) Ethanol Crotyl Alcohol H2 H2O +Electron Numbers 24 38 14 22 16 26 40 2 10 +Molecular Mass 44.0526 70.0898 28.01 44.01 28.0532 46.0684 72.1057 2.01588 18 +1 0 0 0 0 0 0 0 424.7389898 0 +2 0 0 0 0 19.86799808 0 0 19866.01128 0 +12 0 0 781.7557194 1448.743046 83.16550206 33.26620082 44.90937111 0 0 +13 0 0 0 0 147.0941008 227.1786667 88.25646045 0 0 +14 0 0 0 0 337.2745273 232.8800307 362.9716341 0 0 +15 0 0 0 0 47.35142646 1048.044906 953.3420526 0 0 +16 0 0 263.7233818 1490.812764 0 0 83.77095656 0 139.6182609 +17 0 0 0 0 0 106.7407994 93.01698234 0 3235.771091 +18 0 0 0 0 0 82.44744969 98.93693963 0 14988.94635 +19 0 0 0 0 0 411.2019015 103.1689359 0 73.69209704 +20 0 0 0 0 0 0 0 0 43.47782297 +21 0 0 0 0 0 0 0 0 0 +22 0 0 0 266.3729108 0 0 0 0 0 +24 0 0 0 0 312.1445707 82.78616876 40.71450923 0 0 +25 0 6.678381267 0 0 1043.163154 328.5763583 269.8066032 0 0 +26 1070 23.66682522 0 0 6961.991086 1295.101269 1429.213279 0 0 +27 774 63.43502845 0 0 8073.078315 2901.181607 5294.882987 0 0 +28 6103 0 12748.55137 1250.757966 12748.55137 439.8690093 1802.825447 0 0 +29 9999 48.98377684 150.7193134 12.55994278 288.8786839 3749.14292 9122.286441 0 0 +30 0 2.475254689 0 0 12.37627345 1004.953404 460.3973722 0 0 +31 0 2.43976367 0 0 0 12197.59847 3536.43744 0 0 +32 0 0 0 0 0 0 151.5474765 0 0 +33 0 1.186255089 0 0 0 36.77390776 0 0 0 +34 0 0 0 0 0 0 30.42972015 0 0 +36 0 4.561902603 0 0 0 0 123.1713703 0 0 +37 0 20.27625635 0 0 0 0 767.1183653 0 0 +38 0 32.27883123 0 0 0 0 1344.58028 0 0 +39 0 106.7280359 0 0 0 0 5758.912782 0 0 +40 0 29.37968112 0 0 0 0 788.8988448 0 0 +41 546 107.6497409 0 0 0 147.4948946 3357.931213 0 0 +42 0 31.97085678 0 0 0 505.1395372 866.4102188 0 0 +43 0 4.221627226 0 0 0 1207.385387 4339.832788 0 0 +44 9998 0 0 10456.35237 0 74.24752658 1127.307516 0 0 +45 0 0 0 124.4033088 0 5337.93864 319.3018258 0 0 +46 0 0 0 41.1308037 0 2224.14821 0 0 0 +47 0 0 0 0 0 74.49411797 0 0 0 +48 0 1.013286071 0 0 0 0 46.61115925 0 0 +49 0 4.026904784 0 0 0 0 229.5335727 0 0 +50 0 5.003938727 0 0 0 0 670.5277894 0 0 +51 0 3.981882875 0 0 0 0 595.2914899 0 0 +52 0 1.981550233 0 0 0 0 211.0350998 0 0 +53 0 2.960102815 0 0 0 0 1332.046267 0 0 +54 0 0.983248184 0 0 0 0 897.7055923 0 0 +55 0 4.902084273 0 0 0 0 852.9626634 0 0 +56 0 0 0 0 0 0 44.99751964 0 0 +57 0 0 0 0 0 0 9765.208054 0 0 +58 0 0 0 0 0 0 316.1110568 0 0 +59 0 0 0 0 0 0 33.1603964 0 0 +66 0 0.990286219 0 0 0 0 0 0 0 +67 0 0.994911979 0 0 0 0 0 0 0 +68 0 5.000795817 0 0 0 0 32.00509323 0 0 +69 0 65.39180514 0 0 0 0 182.0910266 0 0 +70 0 83.0264599 0 0 0 0 139.7274569 0 0 +71 0 8.157034082 0 0 0 0 709.6619651 0 0 +72 0 1.027362141 0 0 0 0 2629.019718 0 0 +73 0 0 0 0 0 0 113.928809 0 0 diff --git a/UserInput.py b/UserInput.py index 726c10b1b..83ec3ed16 100644 --- a/UserInput.py +++ b/UserInput.py @@ -13,7 +13,7 @@ #//Input Files// UserChoices['inputFiles'] = {} #initialize the inputFiles container -UserChoices['inputFiles']['referenceFileNamesList'] = ['AcetaldehydeNISTRefMixed2.csv'] #enter the file name of the file containing reference information +UserChoices['inputFiles']['referenceFileNamesList'] = ['AcetaldehydeNISTRefMixed2.tsv'] #enter the file name of the file containing reference information. tsv is tab-separated, csv is comma separated. tsv supports commas in molecule names. UserChoices['inputFiles']['referenceFormsList'] = 'xyyy' #form is either 'xyyy' or 'xyxy' (if using reference pattern time chooser enter as list with forms for each individual reference file ['xyyy','xyyy','xyyy']) UserChoices['inputFiles']['referencePatternTimeRanges'] = [] #Leave empty if not using reference pattern time chooser [] UserChoices['inputFiles']['collectedFileName'] = '2-CrotAcetExp#2.csv' #enter the file name with raw mass spectrometer data diff --git a/setup.py b/setup.py index e643c4bc7..ff8256158 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ EMAIL = 'AditySavara2008@u.northwestern.edu' AUTHOR = 'Aditya Savara' REQUIRES_PYTHON = '>=3.5.0' -VERSION = '43.0.0' +VERSION = '43.0.1' LICENSE = 'BSD-3-Clause' # What packages are required for this module to be executed?