From 73d5d06299b8f4389b783d48f3b3b826673de6cc Mon Sep 17 00:00:00 2001 From: Charles Cowart Date: Wed, 6 Nov 2024 18:00:58 -0800 Subject: [PATCH] Pre-test push --- qp_klp/SequencingTech.py | 56 +++-- qp_klp/Workflows.py | 3 + .../miseq_metagenomic.json | 4 +- .../metagenomic/tellseq/good_sheet_draft1.csv | 95 +------- .../tellseq/good_sheet_draft1_large.csv | 121 ++++++++++ qp_klp/tests/data/tellread_test.sbatch | 6 +- qp_klp/tests/test_basics.py | 225 +++++------------- 7 files changed, 234 insertions(+), 276 deletions(-) create mode 100644 qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1_large.csv diff --git a/qp_klp/SequencingTech.py b/qp_klp/SequencingTech.py index dab812b..d7ba0a5 100644 --- a/qp_klp/SequencingTech.py +++ b/qp_klp/SequencingTech.py @@ -88,11 +88,17 @@ class TellSeq(SequencingTech): seqtech_type = SEQTECH_NAME_TELLSEQ def convert_raw_to_fastq(self): - config = self.pipeline.get_software_configuration('tell-read') + # NB: Note that 'tell-seq' currently encapsulates the parameters + # for all sub-tasks including tell-read, integrate, etc. Some of + # these tasks may have different values for parameters like + # 'cores_per_task'. When differences are encountered, please create + # separate values for each in the tell-seq section. + config = self.pipeline.get_software_configuration('tell-seq') print("RUN DIR: %s" % self.pipeline.run_dir) print("OUTPUT PATH: %s" % self.pipeline.output_path) print("INPUT FILE PATH: %s" % self.pipeline.input_file_path) + print("CONFIG: %s" % config) tr_job = TellReadJob(self.pipeline.run_dir, self.pipeline.output_path, @@ -106,28 +112,16 @@ def convert_raw_to_fastq(self): config['label'], config['reference_base'], config['reference_map'], - config['tmp1_path'], config['sing_script_path'], config['cores_per_task']) tr_job.run(callback=self.status_update_callback) - ''' - when run is run, we're going to create a job script which we can test to see if it checks out. - then we're going to submit the job, and we need to fake the sbatch command. - then we need to fake the results directory. - all we're testing is that the job script is what we expect. - - ''' - - """ - - - # TODO: determine these appropriately max_array_length = "foo" label = "bar" if self.iseq_run: + print("PERFORMING NCJOB") # NB: the original master script performed this job prior to # integration and after the main job completed, but only # for iSeq jobs. This may be useful for additional @@ -146,6 +140,12 @@ def convert_raw_to_fastq(self): label) nc_job.run(callback=self.status_update_callback) + else: + print("SKIPPING NCJOB") + + # TODO: Replace these hardcodes + reference_base = "" + reference_map = "" # after the primary job and the optional counts job is completed, # the job to integrate results and add metadata to the fastq files @@ -161,10 +161,35 @@ def convert_raw_to_fastq(self): self.master_qiita_job_id, max_array_length, config['indicies_script_path'], - label) + label, + reference_base, + reference_map, + config['cores_per_task']) i_job.run(callback=self.status_update_callback) + print("DONE WITH INTEGRATE JOB") + + fake_audit_results = [] + + return fake_audit_results + + # we need to return back what failed to process + + ''' + when run is run, we're going to create a job script which we can test to see if it checks out. + then we're going to submit the job, and we need to fake the sbatch command. + then we need to fake the results directory. + all we're testing is that the job script is what we expect. + + ''' + + """ + + + # TODO: determine these appropriately + + # NB: after i_job is completed, there are two optional jobs that # can be performed in parallel using the new functionality in Job() # class. However we are not using the output from this step right now @@ -234,7 +259,6 @@ def convert_raw_to_fastq(self): -\ def _post_process_file(self, fastq_file, mapping): # generate names of the form generated by bcl-convert/bcl2fastq: diff --git a/qp_klp/Workflows.py b/qp_klp/Workflows.py index 59a23e1..d075345 100644 --- a/qp_klp/Workflows.py +++ b/qp_klp/Workflows.py @@ -1195,6 +1195,9 @@ class TellSeqMetagenomicWorkflow(Workflow, Metagenomic, TellSeq): def __init__(self, **kwargs): super().__init__(**kwargs) + # TODO: For now set to False + self.iseq_run = False + # TODO: Replace these with frozen set() or similar. self.mandatory_attributes = ['qclient', 'uif_path', 'config_fp', 'run_identifier', 'output_dir', 'job_id', diff --git a/qp_klp/tests/data/configuration_profiles/miseq_metagenomic.json b/qp_klp/tests/data/configuration_profiles/miseq_metagenomic.json index 250e216..02049a2 100644 --- a/qp_klp/tests/data/configuration_profiles/miseq_metagenomic.json +++ b/qp_klp/tests/data/configuration_profiles/miseq_metagenomic.json @@ -3,11 +3,10 @@ "instrument_type": "MiSeq", "assay_type": "Metagenomic", "configuration": { - "tell-read": { + "tell-seq": { "label": "my_label", "reference_base": "/my/reference/base/path", "reference_map": "/my/reference/map/path", - "tmp1_path": "/my/tmp1/path", "sing_script_path": "/my/sing/script/path", "cores_per_task": "999", "nodes": 1, @@ -17,6 +16,7 @@ "modules_to_load": [ "foo_module" ], + "indicies_script_path": "my/indicies/script/path", "per_process_memory_limit": "10" }, "bcl-convert": { diff --git a/qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1.csv b/qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1.csv index 645c382..8962de6 100644 --- a/qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1.csv +++ b/qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1.csv @@ -17,102 +17,13 @@ Chemistry,Default,,,,,,, ReverseComplement,0,,,,,,, [Data],,,,,,,, Sample_ID,Sample_Name,Sample_Plate,Sample_Well,barcode_id,Sample_Project,Well_description,Lane, -LS_8_10_2013_SRE,LS.8.10.2013.SRE,LS_Donor_SS_Samples_P1,A1,C501,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.10.2013.SRE,4, -LS_12_17_2014_SRE,LS.12.17.2014.SRE,LS_Donor_SS_Samples_P1,B1,C509,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.17.2014.SRE,4, -LS_4_4_2015_SRE,LS.4.4.2015.SRE,LS_Donor_SS_Samples_P1,C1,C502,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.4.2015.SRE,4, -LS_2_23_2015_SRE,LS.2.23.2015.SRE,LS_Donor_SS_Samples_P1,D1,C510,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.23.2015.SRE,4, -LS_9_28_2014_SRE,LS.9.28.2014.SRE,LS_Donor_SS_Samples_P1,E1,C503,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.9.28.2014.SRE,4, -LS_12_14_2013_SRE,LS.12.14.2013.SRE,LS_Donor_SS_Samples_P1,F1,C511,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.14.2013.SRE,4, -LS_4_7_2013_SRE,LS.4.7.2013.SRE,LS_Donor_SS_Samples_P1,G1,C504,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.7.2013.SRE,4, -LS_7_14_2013_SRE,LS.7.14.2013.SRE,LS_Donor_SS_Samples_P1,H1,C512,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.14.2013.SRE,4, -LS_10_27_2013_SRE,LS.10.27.2013.SRE,LS_Donor_SS_Samples_P1,I1,C505,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.10.27.2013.SRE,4, -LS_1_19_2014_SRE,LS.1.19.2014.SRE,LS_Donor_SS_Samples_P1,J1,C513,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.19.2014.SRE,4, -LS_9_3_2013_SRE,LS.9.3.2013.SRE,LS_Donor_SS_Samples_P1,K1,C506,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.9.3.2013.SRE,4, -LS_2_25_2013_SRE,LS.2.25.2013.SRE,LS_Donor_SS_Samples_P1,L1,C514,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.25.2013.SRE,4, -LS_7_26_2015_SRE,LS.7.26.2015.SRE,LS_Donor_SS_Samples_P1,M1,C507,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.26.2015.SRE,4, -LS_2_17_2014_SRE,LS.2.17.2014.SRE,LS_Donor_SS_Samples_P1,N1,C515,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.17.2014.SRE,4, -LS_6_29_2015_SRE,LS.6.29.2015.SRE,LS_Donor_SS_Samples_P1,O1,C508,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.29.2015.SRE,4, -LS_3_24_2015_SRE,LS.3.24.2015.SRE,LS_Donor_SS_Samples_P1,P1,C516,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.24.2015.SRE,4, -LS_1_6_2015_SRE,LS.1.6.2015.SRE,LS_Donor_SS_Samples_P1,A2,C517,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.6.2015.SRE,4, -T_LS_7_15_15B_SRE,T.LS.7.15.15B.SRE,LS_Donor_SS_Samples_P1,B2,C525,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.15.15B.SRE,4, -LS_6_9_2013_SRE,LS.6.9.2013.SRE,LS_Donor_SS_Samples_P1,C2,C518,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.9.2013.SRE,4, -Person_A_SRE,Person_A.SRE,LS_Donor_SS_Samples_P1,D2,C526,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_A.SRE,4, -LS_8_22_2014_R2_SRE,LS.8.22.2014.R2.SRE,LS_Donor_SS_Samples_P1,E2,C519,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.22.2014.R2.SRE,4, -Person_B_SRE,Person_B.SRE,LS_Donor_SS_Samples_P1,F2,C527,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_B.SRE,4, -LS_8_22_2014_R1_SRE,LS.8.22.2014.R1.SRE,LS_Donor_SS_Samples_P1,G2,C520,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.22.2014.R1.SRE,4, -Person_C_SRE,Person_C.SRE,LS_Donor_SS_Samples_P1,H2,C528,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_C.SRE,4, -LS_12_28_2011_SRE,LS.12.28.2011.SRE,LS_Donor_SS_Samples_P1,I2,C521,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.28.2011.SRE,4, -Person_D_SRE,Person_D.SRE,LS_Donor_SS_Samples_P1,J2,C529,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_D.SRE,4, -LS_5_4_2014_SRE,LS.5.4.2014.SRE,LS_Donor_SS_Samples_P1,K2,C522,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.5.4.2014.SRE,4, -45208_1_1,45208.1.1,UROBIOME_TEST_MF_SAMPLES_P2,L2,C530,Tellseq_Shortread_Metagenomic_Analysis_10283,45208.1.1,4, -LS_11_6_2012_SRE,LS.11.6.2012.SRE,LS_Donor_SS_Samples_P1,M2,C523,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.6.2012.SRE,4, -45248_2_2,45248.2.2,UROBIOME_TEST_MF_SAMPLES_P2,N2,C531,Tellseq_Shortread_Metagenomic_Analysis_10283,45248.2.2,4, -LS_4_3_2012_SRE,LS.4.3.2012.SRE,LS_Donor_SS_Samples_P1,O2,C524,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.3.2012.SRE,4, -45261_2_1,45261.2.1,UROBIOME_TEST_MF_SAMPLES_P2,P2,C532,Tellseq_Shortread_Metagenomic_Analysis_10283,45261.2.1,4, -45272_11_2,45272.11.2,UROBIOME_TEST_MF_SAMPLES_P2,A3,C533,Tellseq_Shortread_Metagenomic_Analysis_10283,45272.11.2,4, -T_LS_7_12_15A,T.LS.7.12.15A,Larry_Smarr_Plus_Donor_Samples_P3,B3,C541,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.12.15A,4, -45316_8_1,45316.8.1,UROBIOME_TEST_MF_SAMPLES_P2,C3,C534,Tellseq_Shortread_Metagenomic_Analysis_10283,45316.8.1,4, -T_LS_7_8_15A,T.LS.7.8.15A,Larry_Smarr_Plus_Donor_Samples_P3,D3,C542,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.8.15A,4, -45327_7_2,45327.7.2,UROBIOME_TEST_MF_SAMPLES_P2,E3,C535,Tellseq_Shortread_Metagenomic_Analysis_10283,45327.7.2,4, -LS_8_10_2013,LS.8.10.2013,LS_Time_Series_ABSQ_P4,F3,C543,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.10.2013,4, -45272_1_swab_2,45272.1.swab.2,UROBIOME_TEST_MF_SAMPLES_P2,G3,C536,Tellseq_Shortread_Metagenomic_Analysis_10283,45272.1.swab.2,4, -LS_6_29_2015,LS.6.29.2015,LS_Time_Series_ABSQ_P4,H3,C544,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.29.2015,4, -45326_1_swab_2,45326.1.swab.2,UROBIOME_TEST_MF_SAMPLES_P2,I3,C537,Tellseq_Shortread_Metagenomic_Analysis_10283,45326.1.swab.2,4, -LS_3_8_2015,LS.3.8.2015,LS_Time_Series_ABSQ_P4,J3,C545,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.8.2015,4, -T_LS_7_19_15A,T.LS.7.19.15A,Larry_Smarr_Plus_Donor_Samples_P3,K3,C538,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.19.15A,4, -LS_4_29_2013,LS.4.29.2013,LS_Time_Series_ABSQ_P4,L3,C546,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.29.2013,4, -T_LS_7_15_15B,T.LS.7.15.15B,Larry_Smarr_Plus_Donor_Samples_P3,M3,C539,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.15.15B,4, -LS_11_16_2014,LS.11.16.2014,LS_Time_Series_ABSQ_P4,N3,C547,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.16.2014,4, -T_LS_7_19_15B,T.LS.7.19.15B,Larry_Smarr_Plus_Donor_Samples_P3,O3,C540,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.19.15B,4, -LS_1_19_2014,LS.1.19.2014,LS_Time_Series_ABSQ_P4,P3,C548,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.19.2014,4, -LS_3_24_2015,LS.3.24.2015,LS_Time_Series_ABSQ_P4,A4,C549,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.24.2015,4, -LS_2_8_2013,LS.2.8.2013,LS_Time_Series_ABSQ_P4,B4,C557,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.8.2013,4, +BLANK_K15_cancer_patient,BLANK.K15.cancer.patient,Tumor_Community_P7,F6,C591,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.K15.cancer.patient,4, LS_11_10_2013,LS.11.10.2013,LS_Time_Series_ABSQ_P4,C4,C550,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.10.2013,4, -Marine_Sediment_0_2cm_R1,Marine.Sediment.0.2cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,D4,C558,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.0.2cm.R1,4, -LS_3_23_2014,LS.3.23.2014,LS_Time_Series_ABSQ_P4,E4,C551,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.23.2014,4, -Marine_Sediment_5_7cm_R1,Marine.Sediment.5.7cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,F4,C559,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.5.7cm.R1,4, -LS_1_14_2015,LS.1.14.2015,LS_Time_Series_ABSQ_P4,G4,C552,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.14.2015,4, -Marine_Sediment_10_12cm_R2,Marine.Sediment.10.12cm.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,H4,C560,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.10.12cm.R2,4, -LS_8_25_2014,LS.8.25.2014,LS_Time_Series_ABSQ_P4,I4,C553,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.25.2014,4, -Marine_Sediment_15_17cm_R1,Marine.Sediment.15.17cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,J4,C561,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.15.17cm.R1,4, -LS_1_26_2013,LS.1.26.2013,LS_Time_Series_ABSQ_P4,K4,C554,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.26.2013,4, -Marine_Sediment_20_22cm_R1,Marine.Sediment.20.22cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,L4,C562,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.20.22cm.R1,4, -LS_6_16_2014,LS.6.16.2014,LS_Time_Series_ABSQ_P4,M4,C555,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.16.2014,4, -Marine_Sediment_25_27cm_R2,Marine.Sediment.25.27cm.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,N4,C563,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.25.27cm.R2,4, -LS_7_27_2014,LS.7.27.2014,LS_Time_Series_ABSQ_P4,O4,C556,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.27.2014,4, -Marine_Sediment_30_32cm_R3,Marine.Sediment.30.32cm.R3,MarineSediment_Donor_LarrySmarr_NoProK_P5,P4,C564,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.30.32cm.R3,4, -Person_A_R3,Person.A.R3,MarineSediment_Donor_LarrySmarr_NoProK_P5,A5,C565,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.A.R3,4, -Soil_SynCom_T4_2_Tube5,Soil.SynCom.T4.2.Tube5,16_member_community_native_soil_P6,B5,C573,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T4.2.Tube5,4, +LS_9_28_2014_SRE,LS.9.28.2014.SRE,LS_Donor_SS_Samples_P1,E1,C503,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.9.28.2014.SRE,4, Person_B_R2,Person.B.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,C5,C566,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.B.R2,4, -A21,A21,Tumor_Community_P7,D5,C574,Tellseq_Shortread_Metagenomic_Analysis_10283,A21,4, -Person_C_R4,Person.C.R4,MarineSediment_Donor_LarrySmarr_NoProK_P5,E5,C567,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.C.R4,4, -A23,A23,Tumor_Community_P7,F5,C575,Tellseq_Shortread_Metagenomic_Analysis_10283,A23,4, -Person_D_R2,Person.D.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,G5,C568,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.D.R2,4, -A27,A27,Tumor_Community_P7,H5,C576,Tellseq_Shortread_Metagenomic_Analysis_10283,A27,4, -Soil_SynCom_T1_2_Tube1,Soil.SynCom.T1.2.Tube1,16_member_community_native_soil_P6,I5,C569,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T1.2.Tube1,4, -A30,A30,Tumor_Community_P7,J5,C577,Tellseq_Shortread_Metagenomic_Analysis_10283,A30,4, -Soil_SynCom_T2_2_Tube2,Soil.SynCom.T2.2.Tube2,16_member_community_native_soil_P6,K5,C570,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T2.2.Tube2,4, +LS_7_27_2014,LS.7.27.2014,LS_Time_Series_ABSQ_P4,O4,C556,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.27.2014,4, A31,A31,Tumor_Community_P7,L5,C578,Tellseq_Shortread_Metagenomic_Analysis_10283,A31,4, -Soil_SynCom_T3_2_Tube3,Soil.SynCom.T3.2.Tube3,16_member_community_native_soil_P6,M5,C571,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T3.2.Tube3,4, -S1_T1_A,S1.T1.A,Tumor_Community_P7,N5,C579,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.A,4, -Soil_SynCom_T4_1_Tube4,Soil.SynCom.T4.1.Tube4,16_member_community_native_soil_P6,O5,C572,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T4.1.Tube4,4, -S2_T1_B_A,S2.T1.B.A,Tumor_Community_P7,P5,C580,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.B.A,4, -S2_T1_01BH1_Y_A,S2.T1.01BH1.Y.A,Tumor_Community_P7,A6,C581,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.01BH1.Y.A,4, -S1_T1_1CIM_A,S1.T1.1CIM.A,Tumor_Community_P7,B6,C589,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.1CIM.A,4, -S2_MT1_1HBI_Y_A,S2.MT1.1HBI.Y.A,Tumor_Community_P7,C6,C582,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.MT1.1HBI.Y.A,4, -S1_M1_B_1CIM_A,S1.M1.B.1CIM.A,Tumor_Community_P7,D6,C590,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.M1.B.1CIM.A,4, -S1_T1_B_LBM_A,S1.T1.B.LBM.A,Tumor_Community_P7,E6,C583,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.B.LBM.A,4, -BLANK_K15_cancer_patient,BLANK.K15.cancer.patient,Tumor_Community_P7,F6,C591,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.K15.cancer.patient,4, -S2_MT1_LBM_A,S2.MT1.LBM.A,Tumor_Community_P7,G6,C584,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.MT1.LBM.A,4, BLANK_M15_cancer_patient,BLANK.M15.cancer.patient,Tumor_Community_P7,H6,C592,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.M15.cancer.patient,4, -S2_T1_A,S2.T1.A,Tumor_Community_P7,I6,C585,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.A,4, -BLANK_O15_cancer_patient,BLANK.O15.cancer.patient,Tumor_Community_P7,J6,C593,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.O15.cancer.patient,4, -1CIM_M_CNTL_A,1CIM.M.CNTL.A,Tumor_Community_P7,K6,C586,Tellseq_Shortread_Metagenomic_Analysis_10283,1CIM.M.CNTL.A,4, -BLANK_A17_cancer_patient,BLANK.A17.cancer.patient,Tumor_Community_P7,L6,C594,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.A17.cancer.patient,4, -1CIM_G_CNTL_A,1CIM.G.CNTL.A,Tumor_Community_P7,M6,C587,Tellseq_Shortread_Metagenomic_Analysis_10283,1CIM.G.CNTL.A,4, -BLANK_C17_cancer_patient,BLANK.C17.cancer.patient,Tumor_Community_P7,N6,C595,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.C17.cancer.patient,4, -GC_1HCOM_A,GC.1HCOM.A,Tumor_Community_P7,O6,C588,Tellseq_Shortread_Metagenomic_Analysis_10283,GC.1HCOM.A,4, -BLANK_E17_cancer_patient,BLANK.E17.cancer.patient,Tumor_Community_P7,P6,C596,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.E17.cancer.patient,4, [Bioinformatics],,,,,,,, Sample_Project,QiitaID,BarcodesAreRC,ForwardAdapter,ReverseAdapter,HumanFiltering,library_construction_protocol,experiment_design_description,contains_replicates Tellseq_Shortread_Metagenomic_Analysis_10283,10283,True,GATCGGAAGAGCACACGTCTGAACTCCAGTCAC,GATCGGAAGAGCGTCGTGTAGGGAAAGGAGTGT,True,Knight Lab Kapa HyperPlus,tellseq metagenomics,False diff --git a/qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1_large.csv b/qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1_large.csv new file mode 100644 index 0000000..645c382 --- /dev/null +++ b/qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1_large.csv @@ -0,0 +1,121 @@ +[Header],,,,,,,, +IEMFileVersion,1,,,,,,, +SheetType,tellseq_metag,,,,,,, +SheetVersion,10,,,,,,, +Investigator Name,Knight,,,,,,, +Experiment Name,RKL0151,,,,,,, +Date,5/6/24,,,,,,, +Workflow,GenerateFASTQ,,,,,,, +Application,FASTQ Only,,,,,,, +Assay,Metagenomic,,,,,,, +Description,,,,,,,, +Chemistry,Default,,,,,,, +[Reads],,,,,,,, +151,,,,,,,, +151,,,,,,,, +[Settings],,,,,,,, +ReverseComplement,0,,,,,,, +[Data],,,,,,,, +Sample_ID,Sample_Name,Sample_Plate,Sample_Well,barcode_id,Sample_Project,Well_description,Lane, +LS_8_10_2013_SRE,LS.8.10.2013.SRE,LS_Donor_SS_Samples_P1,A1,C501,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.10.2013.SRE,4, +LS_12_17_2014_SRE,LS.12.17.2014.SRE,LS_Donor_SS_Samples_P1,B1,C509,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.17.2014.SRE,4, +LS_4_4_2015_SRE,LS.4.4.2015.SRE,LS_Donor_SS_Samples_P1,C1,C502,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.4.2015.SRE,4, +LS_2_23_2015_SRE,LS.2.23.2015.SRE,LS_Donor_SS_Samples_P1,D1,C510,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.23.2015.SRE,4, +LS_9_28_2014_SRE,LS.9.28.2014.SRE,LS_Donor_SS_Samples_P1,E1,C503,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.9.28.2014.SRE,4, +LS_12_14_2013_SRE,LS.12.14.2013.SRE,LS_Donor_SS_Samples_P1,F1,C511,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.14.2013.SRE,4, +LS_4_7_2013_SRE,LS.4.7.2013.SRE,LS_Donor_SS_Samples_P1,G1,C504,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.7.2013.SRE,4, +LS_7_14_2013_SRE,LS.7.14.2013.SRE,LS_Donor_SS_Samples_P1,H1,C512,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.14.2013.SRE,4, +LS_10_27_2013_SRE,LS.10.27.2013.SRE,LS_Donor_SS_Samples_P1,I1,C505,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.10.27.2013.SRE,4, +LS_1_19_2014_SRE,LS.1.19.2014.SRE,LS_Donor_SS_Samples_P1,J1,C513,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.19.2014.SRE,4, +LS_9_3_2013_SRE,LS.9.3.2013.SRE,LS_Donor_SS_Samples_P1,K1,C506,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.9.3.2013.SRE,4, +LS_2_25_2013_SRE,LS.2.25.2013.SRE,LS_Donor_SS_Samples_P1,L1,C514,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.25.2013.SRE,4, +LS_7_26_2015_SRE,LS.7.26.2015.SRE,LS_Donor_SS_Samples_P1,M1,C507,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.26.2015.SRE,4, +LS_2_17_2014_SRE,LS.2.17.2014.SRE,LS_Donor_SS_Samples_P1,N1,C515,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.17.2014.SRE,4, +LS_6_29_2015_SRE,LS.6.29.2015.SRE,LS_Donor_SS_Samples_P1,O1,C508,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.29.2015.SRE,4, +LS_3_24_2015_SRE,LS.3.24.2015.SRE,LS_Donor_SS_Samples_P1,P1,C516,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.24.2015.SRE,4, +LS_1_6_2015_SRE,LS.1.6.2015.SRE,LS_Donor_SS_Samples_P1,A2,C517,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.6.2015.SRE,4, +T_LS_7_15_15B_SRE,T.LS.7.15.15B.SRE,LS_Donor_SS_Samples_P1,B2,C525,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.15.15B.SRE,4, +LS_6_9_2013_SRE,LS.6.9.2013.SRE,LS_Donor_SS_Samples_P1,C2,C518,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.9.2013.SRE,4, +Person_A_SRE,Person_A.SRE,LS_Donor_SS_Samples_P1,D2,C526,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_A.SRE,4, +LS_8_22_2014_R2_SRE,LS.8.22.2014.R2.SRE,LS_Donor_SS_Samples_P1,E2,C519,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.22.2014.R2.SRE,4, +Person_B_SRE,Person_B.SRE,LS_Donor_SS_Samples_P1,F2,C527,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_B.SRE,4, +LS_8_22_2014_R1_SRE,LS.8.22.2014.R1.SRE,LS_Donor_SS_Samples_P1,G2,C520,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.22.2014.R1.SRE,4, +Person_C_SRE,Person_C.SRE,LS_Donor_SS_Samples_P1,H2,C528,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_C.SRE,4, +LS_12_28_2011_SRE,LS.12.28.2011.SRE,LS_Donor_SS_Samples_P1,I2,C521,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.28.2011.SRE,4, +Person_D_SRE,Person_D.SRE,LS_Donor_SS_Samples_P1,J2,C529,Tellseq_Shortread_Metagenomic_Analysis_10283,Person_D.SRE,4, +LS_5_4_2014_SRE,LS.5.4.2014.SRE,LS_Donor_SS_Samples_P1,K2,C522,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.5.4.2014.SRE,4, +45208_1_1,45208.1.1,UROBIOME_TEST_MF_SAMPLES_P2,L2,C530,Tellseq_Shortread_Metagenomic_Analysis_10283,45208.1.1,4, +LS_11_6_2012_SRE,LS.11.6.2012.SRE,LS_Donor_SS_Samples_P1,M2,C523,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.6.2012.SRE,4, +45248_2_2,45248.2.2,UROBIOME_TEST_MF_SAMPLES_P2,N2,C531,Tellseq_Shortread_Metagenomic_Analysis_10283,45248.2.2,4, +LS_4_3_2012_SRE,LS.4.3.2012.SRE,LS_Donor_SS_Samples_P1,O2,C524,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.3.2012.SRE,4, +45261_2_1,45261.2.1,UROBIOME_TEST_MF_SAMPLES_P2,P2,C532,Tellseq_Shortread_Metagenomic_Analysis_10283,45261.2.1,4, +45272_11_2,45272.11.2,UROBIOME_TEST_MF_SAMPLES_P2,A3,C533,Tellseq_Shortread_Metagenomic_Analysis_10283,45272.11.2,4, +T_LS_7_12_15A,T.LS.7.12.15A,Larry_Smarr_Plus_Donor_Samples_P3,B3,C541,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.12.15A,4, +45316_8_1,45316.8.1,UROBIOME_TEST_MF_SAMPLES_P2,C3,C534,Tellseq_Shortread_Metagenomic_Analysis_10283,45316.8.1,4, +T_LS_7_8_15A,T.LS.7.8.15A,Larry_Smarr_Plus_Donor_Samples_P3,D3,C542,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.8.15A,4, +45327_7_2,45327.7.2,UROBIOME_TEST_MF_SAMPLES_P2,E3,C535,Tellseq_Shortread_Metagenomic_Analysis_10283,45327.7.2,4, +LS_8_10_2013,LS.8.10.2013,LS_Time_Series_ABSQ_P4,F3,C543,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.10.2013,4, +45272_1_swab_2,45272.1.swab.2,UROBIOME_TEST_MF_SAMPLES_P2,G3,C536,Tellseq_Shortread_Metagenomic_Analysis_10283,45272.1.swab.2,4, +LS_6_29_2015,LS.6.29.2015,LS_Time_Series_ABSQ_P4,H3,C544,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.29.2015,4, +45326_1_swab_2,45326.1.swab.2,UROBIOME_TEST_MF_SAMPLES_P2,I3,C537,Tellseq_Shortread_Metagenomic_Analysis_10283,45326.1.swab.2,4, +LS_3_8_2015,LS.3.8.2015,LS_Time_Series_ABSQ_P4,J3,C545,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.8.2015,4, +T_LS_7_19_15A,T.LS.7.19.15A,Larry_Smarr_Plus_Donor_Samples_P3,K3,C538,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.19.15A,4, +LS_4_29_2013,LS.4.29.2013,LS_Time_Series_ABSQ_P4,L3,C546,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.29.2013,4, +T_LS_7_15_15B,T.LS.7.15.15B,Larry_Smarr_Plus_Donor_Samples_P3,M3,C539,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.15.15B,4, +LS_11_16_2014,LS.11.16.2014,LS_Time_Series_ABSQ_P4,N3,C547,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.16.2014,4, +T_LS_7_19_15B,T.LS.7.19.15B,Larry_Smarr_Plus_Donor_Samples_P3,O3,C540,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.19.15B,4, +LS_1_19_2014,LS.1.19.2014,LS_Time_Series_ABSQ_P4,P3,C548,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.19.2014,4, +LS_3_24_2015,LS.3.24.2015,LS_Time_Series_ABSQ_P4,A4,C549,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.24.2015,4, +LS_2_8_2013,LS.2.8.2013,LS_Time_Series_ABSQ_P4,B4,C557,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.8.2013,4, +LS_11_10_2013,LS.11.10.2013,LS_Time_Series_ABSQ_P4,C4,C550,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.10.2013,4, +Marine_Sediment_0_2cm_R1,Marine.Sediment.0.2cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,D4,C558,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.0.2cm.R1,4, +LS_3_23_2014,LS.3.23.2014,LS_Time_Series_ABSQ_P4,E4,C551,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.23.2014,4, +Marine_Sediment_5_7cm_R1,Marine.Sediment.5.7cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,F4,C559,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.5.7cm.R1,4, +LS_1_14_2015,LS.1.14.2015,LS_Time_Series_ABSQ_P4,G4,C552,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.14.2015,4, +Marine_Sediment_10_12cm_R2,Marine.Sediment.10.12cm.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,H4,C560,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.10.12cm.R2,4, +LS_8_25_2014,LS.8.25.2014,LS_Time_Series_ABSQ_P4,I4,C553,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.25.2014,4, +Marine_Sediment_15_17cm_R1,Marine.Sediment.15.17cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,J4,C561,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.15.17cm.R1,4, +LS_1_26_2013,LS.1.26.2013,LS_Time_Series_ABSQ_P4,K4,C554,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.26.2013,4, +Marine_Sediment_20_22cm_R1,Marine.Sediment.20.22cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,L4,C562,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.20.22cm.R1,4, +LS_6_16_2014,LS.6.16.2014,LS_Time_Series_ABSQ_P4,M4,C555,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.16.2014,4, +Marine_Sediment_25_27cm_R2,Marine.Sediment.25.27cm.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,N4,C563,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.25.27cm.R2,4, +LS_7_27_2014,LS.7.27.2014,LS_Time_Series_ABSQ_P4,O4,C556,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.27.2014,4, +Marine_Sediment_30_32cm_R3,Marine.Sediment.30.32cm.R3,MarineSediment_Donor_LarrySmarr_NoProK_P5,P4,C564,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.30.32cm.R3,4, +Person_A_R3,Person.A.R3,MarineSediment_Donor_LarrySmarr_NoProK_P5,A5,C565,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.A.R3,4, +Soil_SynCom_T4_2_Tube5,Soil.SynCom.T4.2.Tube5,16_member_community_native_soil_P6,B5,C573,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T4.2.Tube5,4, +Person_B_R2,Person.B.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,C5,C566,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.B.R2,4, +A21,A21,Tumor_Community_P7,D5,C574,Tellseq_Shortread_Metagenomic_Analysis_10283,A21,4, +Person_C_R4,Person.C.R4,MarineSediment_Donor_LarrySmarr_NoProK_P5,E5,C567,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.C.R4,4, +A23,A23,Tumor_Community_P7,F5,C575,Tellseq_Shortread_Metagenomic_Analysis_10283,A23,4, +Person_D_R2,Person.D.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,G5,C568,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.D.R2,4, +A27,A27,Tumor_Community_P7,H5,C576,Tellseq_Shortread_Metagenomic_Analysis_10283,A27,4, +Soil_SynCom_T1_2_Tube1,Soil.SynCom.T1.2.Tube1,16_member_community_native_soil_P6,I5,C569,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T1.2.Tube1,4, +A30,A30,Tumor_Community_P7,J5,C577,Tellseq_Shortread_Metagenomic_Analysis_10283,A30,4, +Soil_SynCom_T2_2_Tube2,Soil.SynCom.T2.2.Tube2,16_member_community_native_soil_P6,K5,C570,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T2.2.Tube2,4, +A31,A31,Tumor_Community_P7,L5,C578,Tellseq_Shortread_Metagenomic_Analysis_10283,A31,4, +Soil_SynCom_T3_2_Tube3,Soil.SynCom.T3.2.Tube3,16_member_community_native_soil_P6,M5,C571,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T3.2.Tube3,4, +S1_T1_A,S1.T1.A,Tumor_Community_P7,N5,C579,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.A,4, +Soil_SynCom_T4_1_Tube4,Soil.SynCom.T4.1.Tube4,16_member_community_native_soil_P6,O5,C572,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T4.1.Tube4,4, +S2_T1_B_A,S2.T1.B.A,Tumor_Community_P7,P5,C580,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.B.A,4, +S2_T1_01BH1_Y_A,S2.T1.01BH1.Y.A,Tumor_Community_P7,A6,C581,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.01BH1.Y.A,4, +S1_T1_1CIM_A,S1.T1.1CIM.A,Tumor_Community_P7,B6,C589,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.1CIM.A,4, +S2_MT1_1HBI_Y_A,S2.MT1.1HBI.Y.A,Tumor_Community_P7,C6,C582,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.MT1.1HBI.Y.A,4, +S1_M1_B_1CIM_A,S1.M1.B.1CIM.A,Tumor_Community_P7,D6,C590,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.M1.B.1CIM.A,4, +S1_T1_B_LBM_A,S1.T1.B.LBM.A,Tumor_Community_P7,E6,C583,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.B.LBM.A,4, +BLANK_K15_cancer_patient,BLANK.K15.cancer.patient,Tumor_Community_P7,F6,C591,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.K15.cancer.patient,4, +S2_MT1_LBM_A,S2.MT1.LBM.A,Tumor_Community_P7,G6,C584,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.MT1.LBM.A,4, +BLANK_M15_cancer_patient,BLANK.M15.cancer.patient,Tumor_Community_P7,H6,C592,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.M15.cancer.patient,4, +S2_T1_A,S2.T1.A,Tumor_Community_P7,I6,C585,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.A,4, +BLANK_O15_cancer_patient,BLANK.O15.cancer.patient,Tumor_Community_P7,J6,C593,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.O15.cancer.patient,4, +1CIM_M_CNTL_A,1CIM.M.CNTL.A,Tumor_Community_P7,K6,C586,Tellseq_Shortread_Metagenomic_Analysis_10283,1CIM.M.CNTL.A,4, +BLANK_A17_cancer_patient,BLANK.A17.cancer.patient,Tumor_Community_P7,L6,C594,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.A17.cancer.patient,4, +1CIM_G_CNTL_A,1CIM.G.CNTL.A,Tumor_Community_P7,M6,C587,Tellseq_Shortread_Metagenomic_Analysis_10283,1CIM.G.CNTL.A,4, +BLANK_C17_cancer_patient,BLANK.C17.cancer.patient,Tumor_Community_P7,N6,C595,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.C17.cancer.patient,4, +GC_1HCOM_A,GC.1HCOM.A,Tumor_Community_P7,O6,C588,Tellseq_Shortread_Metagenomic_Analysis_10283,GC.1HCOM.A,4, +BLANK_E17_cancer_patient,BLANK.E17.cancer.patient,Tumor_Community_P7,P6,C596,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.E17.cancer.patient,4, +[Bioinformatics],,,,,,,, +Sample_Project,QiitaID,BarcodesAreRC,ForwardAdapter,ReverseAdapter,HumanFiltering,library_construction_protocol,experiment_design_description,contains_replicates +Tellseq_Shortread_Metagenomic_Analysis_10283,10283,True,GATCGGAAGAGCACACGTCTGAACTCCAGTCAC,GATCGGAAGAGCGTCGTGTAGGGAAAGGAGTGT,True,Knight Lab Kapa HyperPlus,tellseq metagenomics,False +[Contact],,,,,,,, +Sample_Project,Email,,,,,,, +Tellseq_Shortread_Metagenomic_Analysis_10283,cbrenchy@gmail.com,,,,,,, diff --git a/qp_klp/tests/data/tellread_test.sbatch b/qp_klp/tests/data/tellread_test.sbatch index 865ab73..c3fa6f2 100644 --- a/qp_klp/tests/data/tellread_test.sbatch +++ b/qp_klp/tests/data/tellread_test.sbatch @@ -11,7 +11,7 @@ set -x -export TMPDIR=/my/tmp1/path +export TMPDIR=/Users/ccowart/SAMPLE_SHEET_MADNESSS/one/qp_klp/tests/data/077c4da8-74eb-4184-8860-0207f53623be/TellReadJob/output/tmp1 mkdir -p ${TMPDIR} export TMPDIR=$(mktemp -d) @@ -21,8 +21,8 @@ module load foo_module /my/sing/script/path \ -i qp_klp/tests/data/211021_A00000_0000_SAMPLE \ -o /Users/ccowart/SAMPLE_SHEET_MADNESSS/one/qp_klp/tests/data/077c4da8-74eb-4184-8860-0207f53623be/TellReadJob/output \ - -s $(echo LS_8_10_2013_SRE,LS_12_17_2014_SRE,LS_4_4_2015_SRE,LS_2_23_2015_SRE,LS_9_28_2014_SRE,LS_12_14_2013_SRE,LS_4_7_2013_SRE,LS_7_14_2013_SRE,LS_10_27_2013_SRE,LS_1_19_2014_SRE,LS_9_3_2013_SRE,LS_2_25_2013_SRE,LS_7_26_2015_SRE,LS_2_17_2014_SRE,LS_6_29_2015_SRE,LS_3_24_2015_SRE,LS_1_6_2015_SRE,T_LS_7_15_15B_SRE,LS_6_9_2013_SRE,Person_A_SRE,LS_8_22_2014_R2_SRE,Person_B_SRE,LS_8_22_2014_R1_SRE,Person_C_SRE,LS_12_28_2011_SRE,Person_D_SRE,LS_5_4_2014_SRE,45208_1_1,LS_11_6_2012_SRE,45248_2_2,LS_4_3_2012_SRE,45261_2_1,45272_11_2,T_LS_7_12_15A,45316_8_1,T_LS_7_8_15A,45327_7_2,LS_8_10_2013,45272_1_swab_2,LS_6_29_2015,45326_1_swab_2,LS_3_8_2015,T_LS_7_19_15A,LS_4_29_2013,T_LS_7_15_15B,LS_11_16_2014,T_LS_7_19_15B,LS_1_19_2014,LS_3_24_2015,LS_2_8_2013,LS_11_10_2013,Marine_Sediment_0_2cm_R1,LS_3_23_2014,Marine_Sediment_5_7cm_R1,LS_1_14_2015,Marine_Sediment_10_12cm_R2,LS_8_25_2014,Marine_Sediment_15_17cm_R1,LS_1_26_2013,Marine_Sediment_20_22cm_R1,LS_6_16_2014,Marine_Sediment_25_27cm_R2,LS_7_27_2014,Marine_Sediment_30_32cm_R3,Person_A_R3,Soil_SynCom_T4_2_Tube5,Person_B_R2,A21,Person_C_R4,A23,Person_D_R2,A27,Soil_SynCom_T1_2_Tube1,A30,Soil_SynCom_T2_2_Tube2,A31,Soil_SynCom_T3_2_Tube3,S1_T1_A,Soil_SynCom_T4_1_Tube4,S2_T1_B_A,S2_T1_01BH1_Y_A,S1_T1_1CIM_A,S2_MT1_1HBI_Y_A,S1_M1_B_1CIM_A,S1_T1_B_LBM_A,BLANK_K15_cancer_patient,S2_MT1_LBM_A,BLANK_M15_cancer_patient,S2_T1_A,BLANK_O15_cancer_patient,1CIM_M_CNTL_A,BLANK_A17_cancer_patient,1CIM_G_CNTL_A,BLANK_C17_cancer_patient,GC_1HCOM_A,BLANK_E17_cancer_patient | tr -d '"') \ - -g $(echo NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE | tr -d '"') \ + -s $(echo C591,C550,C503,C566,C556,C578,C592 | tr -d '"') \ + -g $(echo NONE,NONE,NONE,NONE,NONE,NONE,NONE | tr -d '"') \ -j ${SLURM_JOB_CPUS_PER_NODE} \ -l s_4 diff --git a/qp_klp/tests/test_basics.py b/qp_klp/tests/test_basics.py index 7234726..3a465aa 100644 --- a/qp_klp/tests/test_basics.py +++ b/qp_klp/tests/test_basics.py @@ -245,8 +245,19 @@ def tearDown(self): if exists(fp): remove(fp) - def create_fake_bin(self, name, content): + def create_fake_bin(self, name, content, chain_cmd=None): tmp = join(self.fake_bin_path, name) + + if chain_cmd: + # if chain_cmd is true, there will be a second + # binary file written named "{name}.2" in the same + # location. The final command of this fake bin will + # be to overwrite itself with "{name}.2" so that + # the next invocation of the the command e.g. 'sbatch' + # will do different things. + tmp2 = join(self.fake_bin_path, chain_cmd) + content += f"\nmv {tmp2} {tmp}\n" + with open(tmp, 'w') as f: f.write(f"#!/bin/sh\n{content}\n") chmod(tmp, 0o777) @@ -804,71 +815,65 @@ def test_partial_amplicon_pipeline(self): # Post-processing for absent Quality Control successful. def test_partial_tellseq_pipeline(self): - cmds = ["echo 'Submitted batch job 9999999'"] - cmds.append("mkdir -p %s" % join(self.output_dir, 'TellReadJob', 'logs')) - cmds = ["echo 'Submitted batch job 9999999'"] + # substitute for UI callback function. + def call_me_back(**kwargs): + jid = kwargs['jid'] + status = kwargs['status'] - # skip creating fastq files for now. - - # write all the statements out into a bash-script named 'sbatch' and - # place it somewhere in the PATH. (It will be removed on tearDown()). - self.create_fake_bin('sbatch', "\n".join(cmds)) - - # create fake squeue binary that writes to stdout what Job() needs to - # see to think that bcl-convert has completed successfully. - self.create_fake_bin('squeue', "echo 'ARRAY_JOB_ID,JOBID,STATE\n" - "9999999,9999999,COMPLETED'") + with open("callback.log", 'a') as f: + print(f"{jid}: {status}", file=f) + # emulate TellReadJob output + cmds = ["echo 'Submitted batch job 9999990'"] + output_dir = join(join(self.output_dir, 'TellReadJob', 'output')) + cmds.append("mkdir -p %s" % join(output_dir, '1_demult', 'Raw')) + cmds.append("mkdir -p %s" % join(self.output_dir, 'TellReadJob', 'logs')) - ''' + # create output we expect to see from tellread. + barcode_ids = ['C591','C550','C503','C566','C556','C578','C592'] + files = [] + for file_type in ['I1', 'R1', 'R2']: + files += [f'project_{file_type}_{x}_raw.fastq.gz' + for x in barcode_ids] + for _file in files: + cmds.append(f"touch {join(output_dir, '1_demult', 'Raw', _file)}") - cmds.append("mkdir -p %s" % join(self.output_dir, 'ConvertJob', 'logs')) + # write all the statements out into a bash-script named 'sbatch' and + # place it somewhere in the PATH. (It will be removed on tearDown()). + self.create_fake_bin('sbatch', "\n".join(cmds), chain_cmd='sbatch.2') - # use the list of sample_ids found in the sample-sheet to generate - # fake fastq files for convert_raw_to_fastq() to find and manipulate. - sheet = load_sample_sheet("qp_klp/tests/data/sample-sheets/metagenomic/" - "tellseq/good_sheet_draft1.csv") - exp = defaultdict(list) - for sample in sheet.samples: - sample = sample.to_json() - exp[sample['Sample_Project']].append(sample['Sample_ID']) + # create fake squeue binary that writes to stdout what Job() needs to + # see to think that tell-read has completed successfully. + self.create_fake_bin('squeue', "echo 'ARRAY_JOB_ID,JOBID,STATE\n" + "9999990,9999990,COMPLETED'", + chain_cmd='squeue.2') - # in order to test post-process auditing, we will manually remove a - # single sample_id from each project in order to simulate failed - # conversions. - simulated_failed_samples = [] + # emulate TRIntegrateJob output - for project in exp: - # sort the list so that files are created in a predictable order. - exp[project].sort() + cmds = ["echo 'Submitted batch job 9999991'"] + # intentionally let's not create a separate output dir. + #output_dir = join(join(self.output_dir, 'TRIntegrateJob', 'output')) + output_dir = join(join(self.output_dir, 'TRIntegrateJob', 'integrated')) + #cmds.append("mkdir -p %s" % join(output_dir, 'integrated')) + cmds.append("mkdir -p %s" % output_dir) + cmds.append("mkdir -p %s" % join(self.output_dir, 'TRIntegrateJob', 'logs')) - simulated_failed_samples.append(exp[project].pop()) + files = [] + for file_type in ['I1', 'R1', 'R2']: + files += [f'project_{file_type}_{x}.fastq.gz' + for x in barcode_ids] - fake_path = join(self.output_dir, 'ConvertJob', project) - cmds.append(f"mkdir -p {fake_path}") + for _file in files: + # cmds.append(f"touch {join(output_dir, 'integrated', _file)}") + cmds.append(f"touch {join(output_dir, _file)}") - for sample in exp[project]: - r1 = join(fake_path, f'{sample}_S123_L001_R1_001.fastq.gz') - r2 = join(fake_path, f'{sample}_S123_L001_R2_001.fastq.gz') + self.create_fake_bin('sbatch.2', "\n".join(cmds)) + self.create_fake_bin('squeue.2', "echo 'ARRAY_JOB_ID,JOBID,STATE\n" + "9999991,9999991,COMPLETED'") - # let r1 and r2 be the same size. - size = randint(1, 10) - for file_path in [r1, r2]: - cmds.append(self._generate_empty_file_cmd(file_path, size)) - - # write all the statements out into a bash-script named 'sbatch' and - # place it somewhere in the PATH. (It will be removed on tearDown()). - self.create_fake_bin('sbatch', "\n".join(cmds)) - - # create fake squeue binary that writes to stdout what Job() needs to - # see to think that bcl-convert has completed successfully. - self.create_fake_bin('squeue', "echo 'ARRAY_JOB_ID,JOBID,STATE\n" - "9999999,9999999,COMPLETED'") - - ''' kwargs = {"uif_path": "qp_klp/tests/data/sample-sheets/metagenomic/tellseq/good_sheet_draft1.csv", "qclient": FakeClient(), "lane_number": "4", @@ -876,12 +881,15 @@ def test_partial_tellseq_pipeline(self): "run_identifier": '211021_A00000_0000_SAMPLE', "output_dir": self.output_dir, "job_id": "077c4da8-74eb-4184-8860-0207f53623be", + "status_update_callback": call_me_back, "is_restart": False } wf = WorkflowFactory.generate_workflow(**kwargs) - wf.convert_raw_to_fastq() + audit_results = sorted(wf.convert_raw_to_fastq()) + + print("AUDIT RESULTS: %s" % audit_results) # verify job script was properly created s = 'qp_klp/tests/data/077c4da8-74eb-4184-8860-0207f53623be/TellReadJob' @@ -891,7 +899,7 @@ def test_partial_tellseq_pipeline(self): self.assertTrue(exists(trjob_script)) def open_job_script(script_path): - with open(trjob_script, 'r') as f: + with open(script_path, 'r') as f: obs = f.readlines() obs = [x.strip() for x in obs] obs = [re.sub('-directory .*?/qp_klp', @@ -905,114 +913,5 @@ def open_job_script(script_path): self.assertTrue(False) - # Illumina.convert_raw_to_fastq() calls Job.audit() after bcl-convert - # exists and will identify the samples that failed to process. Confirm - # the values are correct here. - audit_results = sorted(wf.convert_raw_to_fastq()) - - self.assertEqual(audit_results, sorted(simulated_failed_samples)) - - # NB: bcl-convert's presence in ConvertJob.sh confirms it is getting - # path and binary name from the correct configuration file. Note it - # doesn't check to see that the binary exists because where Job() runs - # is not the same location as where bcl-convert will run (compute- - # node.) - - # confirm ConvertJob.sh Slurm job script looks exactly as intended by - # confirming its digest. - - exp = ['#!/bin/bash', - '#SBATCH --job-name None_ConvertJob', - '#SBATCH -p qiita', - '#SBATCH -N 1', - '#SBATCH -n 16', - '#SBATCH --time 216', - '#SBATCH --mail-type=ALL', - '#SBATCH --mail-user qiita.help@gmail.com', - '#SBATCH --mem-per-cpu 10gb', - 'set -x', - 'date', - 'hostname', - 'cd qp_klp/tests/data/211021_A00000_0000_SAMPLE', - 'module load bclconvert_3.7.5', - 'bcl-convert --sample-sheet "qp_klp/tests/data/sample-sheets/' - 'metagenomic/illumina/good_sheet1.csv" --output-directory ' - 'qp_klp/tests/data/077c4da8-74eb-4184-8860-0207f53623be/' - 'ConvertJob --bcl-input-directory . --bcl-num-decompression-' - 'threads 16 --bcl-num-conversion-threads 16 --bcl-num-' - 'compression-threads 16 --bcl-num-parallel-tiles 16 ' - '--bcl-sampleproject-subdirectories true --force'] - - with open("qp_klp/tests/data/077c4da8-74eb-4184-8860-0207f53623be/" - "ConvertJob/ConvertJob.sh", 'r') as f: - obs = f.readlines() - obs = [x.strip() for x in obs] - obs = [re.sub('-directory .*?/qp_klp', - '-directory qp_klp', x) for x in obs] - - self.assertEqual(obs, exp) - - # ConvertJob successful. - cmds = [] - - # the lines to recreate the directories a standard Job() object creates. - cmds.append("mkdir -p %s" % join(self.output_dir, 'NuQCJob', 'logs')) - cmds.append("mkdir -p %s" % join(self.output_dir, 'NuQCJob', 'only-adapter-filtered')) - cmds.append("mkdir -p %s" % join(self.output_dir, 'NuQCJob', 'fastp_reports_dir', 'html')) - cmds.append("mkdir -p %s" % join(self.output_dir, 'NuQCJob', 'fastp_reports_dir', 'json')) - cmds.append("mkdir -p %s" % join(self.output_dir, 'NuQCJob', 'tmp')) - cmds.append("mkdir -p %s" % join(self.output_dir, 'NuQCJob', 'tmp.564341')) - - # simulate host-filtering scripts by scanning contents of ConvertJob - # directory and copying the files into the expected location for post- - # processing. - for root, _, files in walk(join(self.output_dir, 'ConvertJob')): - for _file in files: - # don't process anything from ConvertJob directory that isn't - # a simulated fastq file. Since these are fake files we can - # assume 'Undetermined' fastq files are not present. - if not _file.endswith('.fastq.gz'): - continue - - raw_file = join(root, _file) - _, project_name = split(root) - - new_name = _file.replace('.fastq.gz', '.interleave.fastq.gz') - file_path = join(self.output_dir, 'NuQCJob', 'only-adapter-filtered', new_name) - cmds.append(f"cp {raw_file} {file_path}") - - cmds.append("mkdir -p %s" % join(self.output_dir, 'NuQCJob', project_name, 'filtered_sequences')) - - file_path = join(self.output_dir, 'NuQCJob', project_name, 'filtered_sequences', _file) - cmds.append(f"cp {raw_file} {file_path}") - - new_name = _file.replace('.fastq.gz', '.html') - file_path = join(self.output_dir, 'NuQCJob', 'fastp_reports_dir', 'html', new_name) - cmds.append(f"echo 'This is an html file.' > {file_path}") - - new_name = _file.replace('.fastq.gz', '.json') - file_path = join(self.output_dir, 'NuQCJob', 'fastp_reports_dir', 'json', new_name) - cmds.append(f"echo 'This is a json file.' > {file_path}") - - cmds.append("echo 'Submitted batch job 9999999'") - # write all the statements out into a bash-script named 'sbatch' and - # place it somewhere in the PATH. (It will be removed on tearDown()). - self.create_fake_bin('sbatch', "\n".join(cmds)) - audit_results = sorted(wf.quality_control()) - - # TODO: Add assertion tests for NuQCJob - - # add tests to test audit results, modify test to introduce some 'zero-length' files. - # add some assertions to show that the post-processing step is munging the correct - # directory structure needed for subsequent steps. - - # TODO: Test that some files end up in zero-files folder. - - #NuQCJob successful. - - # TODO: set up fake multiqc bin to. fake results of fastqc and - # multiqc jobs. - # audit_results = sorted(wf.generate_reports()) -