If the FCS file uses whitespace delimiters (e.g. \x0c) avoid stripping (

#37) * If the FCS file uses whitespace delimiters (e.g. \x0c) avoid stripping Otherwise the $BEGINDATA could be cut. In my case I had headers without 'data start' where the first data row had: \x0c$BEGINDATA So doing both the trim and the raw_text = raw_text[1:] means that we cut off the '$' which results in an error on line 381: self._data_start = int(text['$BEGINDATA']) * Added unit tests for whitespace delimiter Co-authored-by: Gergely Csegzi <[email protected]>
eyurtsev · Oct 24, 2021 · c32cb06 · c32cb06
1 parent dc82c22
commit c32cb06
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 1 deletion.
diff --git a/fcsparser/api.py b/fcsparser/api.py
@@ -280,7 +280,9 @@ def _extract_text_dict(raw_text):
         delimiter = raw_text[0]
 
         if raw_text[-1] != delimiter:
-            raw_text = raw_text.strip()
+            # Avoid stripping whitespace delimiter
+            if delimiter.strip() == delimiter:
+                raw_text = raw_text.strip()
             if raw_text[-1] != delimiter:
                 msg = (u'The first two characters were:\n {}. The last two characters were: {}\n'
                        u'Parser expects the same delimiter character in beginning '

diff --git a/fcsparser/tests/test_fcs_reader.py b/fcsparser/tests/test_fcs_reader.py
@@ -84,6 +84,23 @@ def test_repeated_delimiter_text_segment(self):
         text = parser._extract_text_dict(raw_text)
         self.assertDictEqual(text, {'flow_speed': '3 m/s', 'x': 'a/', 'y': 'b//'})
 
+    def whitespace_delimiter_test_helper(self, has_final_delimiter: bool):
+        parser = FCSParser()
+        delimiter = '\t'
+        text_values = ['$BEGINDATA', '15', '$ENDDATA', '500']
+        raw_text = delimiter + delimiter.join(text_values)
+        if has_final_delimiter:
+            raw_text = raw_text + delimiter
+        text = parser._extract_text_dict(raw_text)
+        self.assertDictEqual(text, {'$BEGINDATA': '15', '$ENDDATA': '500'})
+
+    def test_whitespace_delimited_text_extraction(self):
+        TestFCSReader.whitespace_delimiter_test_helper(self, has_final_delimiter=True)
+
+
+    def test_whitespace_delimited_text_extraction_no_final_delimiter(self):
+        TestFCSReader.whitespace_delimiter_test_helper(self, has_final_delimiter=False)
+
     def test_mq_FCS_2_0_data_segment(self):
         """Test DATA segment parsed from FCS (2.0 format) file from a MACSQuant flow cytometer"""
         values = np.array([[1.60764902830123901367e-03, 1.46554875373840332031e+00,