From c32cb06e0df7ca56b77728972c0518d88b692d7a Mon Sep 17 00:00:00 2001 From: gergelycsegzi <33738892+gergelycsegzi@users.noreply.github.com> Date: Sun, 24 Oct 2021 18:07:25 +0100 Subject: [PATCH] If the FCS file uses whitespace delimiters (e.g. \x0c) avoid stripping (#37) * If the FCS file uses whitespace delimiters (e.g. \x0c) avoid stripping Otherwise the $BEGINDATA could be cut. In my case I had headers without 'data start' where the first data row had: \x0c$BEGINDATA So doing both the trim and the raw_text = raw_text[1:] means that we cut off the '$' which results in an error on line 381: self._data_start = int(text['$BEGINDATA']) * Added unit tests for whitespace delimiter Co-authored-by: Gergely Csegzi --- fcsparser/api.py | 4 +++- fcsparser/tests/test_fcs_reader.py | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fcsparser/api.py b/fcsparser/api.py index 080a183..796bb85 100644 --- a/fcsparser/api.py +++ b/fcsparser/api.py @@ -280,7 +280,9 @@ def _extract_text_dict(raw_text): delimiter = raw_text[0] if raw_text[-1] != delimiter: - raw_text = raw_text.strip() + # Avoid stripping whitespace delimiter + if delimiter.strip() == delimiter: + raw_text = raw_text.strip() if raw_text[-1] != delimiter: msg = (u'The first two characters were:\n {}. The last two characters were: {}\n' u'Parser expects the same delimiter character in beginning ' diff --git a/fcsparser/tests/test_fcs_reader.py b/fcsparser/tests/test_fcs_reader.py index 9255ffe..c9b86f7 100755 --- a/fcsparser/tests/test_fcs_reader.py +++ b/fcsparser/tests/test_fcs_reader.py @@ -84,6 +84,23 @@ def test_repeated_delimiter_text_segment(self): text = parser._extract_text_dict(raw_text) self.assertDictEqual(text, {'flow_speed': '3 m/s', 'x': 'a/', 'y': 'b//'}) + def whitespace_delimiter_test_helper(self, has_final_delimiter: bool): + parser = FCSParser() + delimiter = '\t' + text_values = ['$BEGINDATA', '15', '$ENDDATA', '500'] + raw_text = delimiter + delimiter.join(text_values) + if has_final_delimiter: + raw_text = raw_text + delimiter + text = parser._extract_text_dict(raw_text) + self.assertDictEqual(text, {'$BEGINDATA': '15', '$ENDDATA': '500'}) + + def test_whitespace_delimited_text_extraction(self): + TestFCSReader.whitespace_delimiter_test_helper(self, has_final_delimiter=True) + + + def test_whitespace_delimited_text_extraction_no_final_delimiter(self): + TestFCSReader.whitespace_delimiter_test_helper(self, has_final_delimiter=False) + def test_mq_FCS_2_0_data_segment(self): """Test DATA segment parsed from FCS (2.0 format) file from a MACSQuant flow cytometer""" values = np.array([[1.60764902830123901367e-03, 1.46554875373840332031e+00,