Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scRNA - fixes for file validations #1453

Merged
merged 6 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 97 additions & 27 deletions app/models/labware_creators/plate_split_to_tube_racks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,12 @@ class PlateSplitToTubeRacks < Base
validates_nested :sequencing_csv_file, if: :sequencing_file
validates_nested :contingency_csv_file, if: :contingency_file

# validations for duplications between the two tube rack files
validate :check_tube_rack_barcodes_differ_between_files
validate :check_tube_barcodes_differ_between_files

# validate there are sufficient tubes in the racks for the number of parent wells
validate :must_have_sufficient_tubes_in_rack, if: :contingency_file
validate :must_have_sufficient_tubes_in_rack_files, if: :contingency_file

# validate that the tube barcodes do not already exist in the system
validate :tube_barcodes_are_unique?
Expand Down Expand Up @@ -137,6 +141,22 @@ def anchor
'children_tab'
end

# Returns a CsvFile object for the sequencing tube rack scan CSV file, or nil if the file doesn't exist.
#
# @return [CsvFile, nil] A CsvFile object for the sequencing tube rack scan CSV file, or nil if the file
# doesn't exist.
def sequencing_csv_file
@sequencing_csv_file ||= CsvFile.new(sequencing_file) if sequencing_file
end

# Returns a CsvFile object for the contingency tube rack scan CSV file, or nil if the file doesn't exist.
#
# @return [CsvFile, nil] A CsvFile object for the contingency tube rack scan CSV file, or nil if the file
# doesn't exist.
def contingency_csv_file
@contingency_csv_file ||= CsvFile.new(contingency_file) if contingency_file
end

# Returns the number of unique sample UUIDs for the parent wells after applying the current well filter.
#
# @return [Integer] The number of unique sample UUIDs.
Expand All @@ -151,27 +171,93 @@ def num_parent_wells
@num_parent_wells ||= well_filter.filtered.length
end

# Validation method that checks whether there are enough tubes in the rack.
# Validation to compare the tube rack barcodes in the two files to check for duplication
#
# @return [void]
def must_have_sufficient_tubes_in_rack
errors.add(:tube_rack_file, 'Must have sufficient tubes in rack') unless sufficient_tubes_in_racks?
# Sets errors if the tube rack barcodes are the same
def check_tube_rack_barcodes_differ_between_files
return unless contingency_file_correctly_parsed? && sequencing_file_correctly_parsed?

return unless same_tube_rack_barcode?

errors.add(
:contingency_csv_file,
'The tube rack barcodes within the contingency and sequencing files must be different'
)
end

def same_tube_rack_barcode?
seq_tube_rack = extract_tube_rack_barcode(sequencing_csv_file)
cont_tube_rack = extract_tube_rack_barcode(contingency_csv_file)

seq_tube_rack == cont_tube_rack
end

def extract_tube_rack_barcode(file)
file.position_details.values.first['tube_rack_barcode']
end

# Validation to compare the tube barcodes in the two files to check for duplication
#
# Sets errors if the tube rack barcodes are the same
def check_tube_barcodes_differ_between_files
return unless contingency_file_correctly_parsed? && sequencing_file_correctly_parsed?

seq_barcodes = extract_barcodes(sequencing_csv_file)
cont_barcodes = extract_barcodes(contingency_csv_file)

duplicate_barcodes = seq_barcodes & cont_barcodes
return if duplicate_barcodes.empty?

errors.add(
:contingency_csv_file,
"Tube barcodes are duplicated across contingency and sequencing files (#{duplicate_barcodes.join(', ')})"
)
end

def extract_barcodes(file)
file.position_details.values.pluck('tube_barcode')
end

# Checks if there are sufficient tubes in the child tube racks for all the parent wells.
# Validation that checks if there are sufficient tubes in the child tube racks for all the parent wells.
# This depends on the number of unique samples in the parent plate, and the number of parent wells,
# as well as whether they are using both sequencing tubes and contingency tubes or just contingency.
#
# @return [Boolean] `true` if there are sufficient tubes, `false` otherwise.
def sufficient_tubes_in_racks?
# Sets errors if there are insufficient tubes.
def must_have_sufficient_tubes_in_rack_files
return unless files_correctly_parsed?

if require_contingency_tubes_only?
num_contingency_tubes >= num_parent_wells
add_error_if_insufficient_tubes(:contingency_csv_file, num_contingency_tubes, num_parent_wells)
else
(num_sequencing_tubes >= num_parent_unique_samples) &&
(num_contingency_tubes >= (num_parent_wells - num_parent_unique_samples))
add_error_if_insufficient_tubes(
:contingency_csv_file,
num_contingency_tubes,
num_parent_wells - num_parent_unique_samples
)
add_error_if_insufficient_tubes(:sequencing_csv_file, num_sequencing_tubes, num_parent_unique_samples)
end
end

# Checks the files parsed correctly
def files_correctly_parsed?
return contingency_file_correctly_parsed? if require_contingency_tubes_only?

contingency_file_correctly_parsed? && sequencing_file_correctly_parsed?
end

def sequencing_file_correctly_parsed?
sequencing_file.present? && sequencing_csv_file.correctly_parsed?
end

def contingency_file_correctly_parsed?
contingency_file.present? && contingency_csv_file.correctly_parsed?
end

# Adds an error when there are insufficient tubes in the given file
def add_error_if_insufficient_tubes(file, num_tubes, required_tubes)
errors.add(file, 'contains insufficient tubes') unless num_tubes >= required_tubes
end

# Validation that the tube barcodes are unique and do not already exist in the system.
# NB. this checks all the tube barcodes in the uploaded tube rack scan files, not just the
# ones that will be used.
Expand Down Expand Up @@ -261,22 +347,6 @@ def upload_tube_rack_files
parent_v1.qc_files.create_from_file!(contingency_file, 'scrna_core_contingency_tube_rack_scan.csv')
end

# Returns a CsvFile object for the sequencing tube rack scan CSV file, or nil if the file doesn't exist.
#
# @return [CsvFile, nil] A CsvFile object for the sequencing tube rack scan CSV file, or nil if the file
# doesn't exist.
def sequencing_csv_file
@sequencing_csv_file ||= CsvFile.new(sequencing_file) if sequencing_file
end

# Returns a CsvFile object for the contingency tube rack scan CSV file, or nil if the file doesn't exist.
#
# @return [CsvFile, nil] A CsvFile object for the contingency tube rack scan CSV file, or nil if the file
# doesn't exist.
def contingency_csv_file
@contingency_csv_file ||= CsvFile.new(contingency_file) if contingency_file
end

# Returns true if only contingency tubes are required for the parent plate, false otherwise.
#
# @return [Boolean]
Expand Down
37 changes: 35 additions & 2 deletions app/models/labware_creators/plate_split_to_tube_racks/csv_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ module LabwareCreators
# and the tube locations then used to create a driver file for the liquid handler.
# The filename of the file should also contain the tube rack barcode.
# Example of file content (NB. no header line):
# TR00012345, A1, FX05653780
# TR00012345, A2, NO READ
# TR00012345,A1,FX05653780
# TR00012345,A2,NO READ
# etc.
#
class PlateSplitToTubeRacks::CsvFile
Expand All @@ -23,6 +23,9 @@ class PlateSplitToTubeRacks::CsvFile

validate :correctly_parsed?
validates_nested :tube_rack_scan, if: :correctly_formatted?
validate :check_for_rack_barcodes_the_same, if: :correctly_formatted?
validate :check_no_duplicate_well_coordinates, if: :correctly_formatted?
validate :check_no_duplicate_tube_barcodes, if: :correctly_formatted?

NO_TUBE_TEXTS = ['NO READ', 'NOSCAN'].freeze

Expand Down Expand Up @@ -97,6 +100,36 @@ def correctly_formatted?
correctly_parsed?
end

def check_for_rack_barcodes_the_same
tube_rack_barcodes = tube_rack_scan.group_by(&:tube_rack_barcode).keys

return unless tube_rack_barcodes.size > 1

barcodes_str = tube_rack_barcodes.join(',')
errors.add(:base, "Should not contain different rack barcodes (#{barcodes_str})")
end

def check_no_duplicate_well_coordinates
duplicated_well_coordinates =
tube_rack_scan.group_by(&:tube_position).select { |_position, tubes| tubes.size > 1 }.keys.join(',')

return if duplicated_well_coordinates.empty?

errors.add(:base, "Contains duplicate well coordinates (#{duplicated_well_coordinates})")
end

def check_no_duplicate_tube_barcodes
duplicates = tube_rack_scan.group_by(&:tube_barcode).select { |_tube_barcode, tubes| tubes.size > 1 }.keys

# remove any NO READ or NOSCAN values
ignore_list = ['NO READ', 'NOSCAN']
duplicates = duplicates.reject { |barcode| ignore_list.include?(barcode) }

return if duplicates.empty?

errors.add(:base, "Contains duplicate tube barcodes (#{duplicates.join(',')})")
end

# Generates a hash of position details based on the tube rack scan data in the CSV file.
#
# @return [Hash] A hash of position details, where the keys are positions and the values
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FX12345678, A1, AB10000001
FX12345678, B1, AB10000002
FX12345678, C1, AB10000003
FX12345678, D1, AB10000004
FX12345678, E1, AB10000005
FX12345678, F1, AB10000006
FX12345678, G1, AB10000007
FX12345678, H1, AB10000008
FX23838838, A2, AB10000009
FX12345678, B2, AB10000010
FX12345678, C2, AB10000011
FX12345678, D2, AB10000012
FX12345678, E2, AB10000013
FX12345678, F2, AB10000014
FX12345678, G2, AB10000015
FX12345678, H2, AB10000016
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FX12345678, A1, AB10000001
FX12345678, B1, AB10000002
FX12345678, C1, AB10000003
FX12345678, D1, AB10000004
FX12345678, E1, NO READ
FX12345678, F1, AB10000006
FX12345678, G1, AB10000007
FX12345678, H1, AB10000008
FX12345678, A2, AB10000009
FX12345678, B2, AB10000009
FX12345678, C2, AB10000011
FX12345678, D2, AB10000011
FX12345678, E2, AB10000013
FX12345678, F2, AB10000014
FX12345678, G2, NO READ
FX12345678, H2, AB10000016
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FX12345678, A1, AB10000001
FX12345678, B1, AB10000002
FX12345678, C1, AB10000003
FX12345678, D1, AB10000004
FX12345678, E1, AB10000005
FX12345678, F1, AB10000006
FX12345678, G1, AB10000007
FX12345678, H1, AB10000008
FX12345678, A2, AB10000009
FX12345678, A2, AB10000010
FX12345678, C2, AB10000011
FX12345678, D2, AB10000012
FX12345678, E2, AB10000013
FX12345678, E2, AB10000014
FX12345678, G2, AB10000015
FX12345678, H2, AB10000016
Original file line number Diff line number Diff line change
Expand Up @@ -300,4 +300,69 @@
end
end
end

# there should be only one rack barcode in the file and it should be the same for all rows
context 'A file with inconsistant rack barcodes' do
let(:file) do
fixture_file_upload(
'spec/fixtures/files/plate_split_to_tube_racks/tube_rack_scan_with_different_rack_barcodes.csv',
'sequencescape/qc_file'
)
end

describe '#valid?' do
it 'should be invalid' do
expect(subject.valid?).to be false
end

it 'reports the errors' do
subject.valid?
expect(subject.errors.full_messages).to include(
'Should not contain different rack barcodes (FX12345678,FX23838838)'
)
end
end
end

# the same well position should not appear more than once in the file
context 'A file with duplicated well positions' do
let(:file) do
fixture_file_upload(
'spec/fixtures/files/plate_split_to_tube_racks/tube_rack_scan_with_duplicate_well_positions.csv',
'sequencescape/qc_file'
)
end

describe '#valid?' do
it 'should be invalid' do
expect(subject.valid?).to be false
end

it 'reports the errors' do
subject.valid?
expect(subject.errors.full_messages).to include('Contains duplicate well coordinates (A2,E2)')
end
end
end

# the same tube barcode should not appear more than once in the file
context 'A file with duplicated tube barcodes' do
let(:file) do
fixture_file_upload(
'spec/fixtures/files/plate_split_to_tube_racks/tube_rack_scan_with_duplicate_tubes.csv',
'sequencescape/qc_file'
)
end

describe '#valid?' do
it 'should not be valid' do
expect(subject.valid?).to be false
end

it 'reports the errors' do
subject.valid?
expect(subject.errors.full_messages).to include('Contains duplicate tube barcodes (AB10000009,AB10000011)')
end
end
end
end
Loading
Loading