fixed bug in embedded strings

michaelkamprath · Apr 21, 2024 · 4cd5252 · 4cd5252
1 parent 8ae1696
commit 4cd5252
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 10 deletions.
diff --git a/src/bespokeasm/assembler/line_object/emdedded_string.py b/src/bespokeasm/assembler/line_object/emdedded_string.py
@@ -16,7 +16,7 @@
 class EmbeddedString(LineWithBytes):
     QUOTED_STRING_PATTERN = re.compile(
         rf'^{EMBEDDED_STRING_PATTERN}',
-        flags=re.IGNORECASE | re.MULTILINE
+        flags=re.IGNORECASE | re.MULTILINE | re.DOTALL
     )
 
     @classmethod
@@ -45,19 +45,18 @@ def __init__(
             cstr_terminator: int = 0,
     ) -> None:
         super().__init__(line_id, instruction, comment, current_memzone)
-        self._quoted_string = quoted_string
-        self._cstr_terminator = cstr_terminator
+        self._string_bytes = \
+            [ord(x) for x in list(bytes(quoted_string, 'utf-8').decode('unicode_escape'))] \
+            + [cstr_terminator]
+
+    def __str__(self):
+        return f'EmbeddedString<{self.instruction}, size={self.byte_size}, chars={self._string_bytes}>'
 
     @property
     def byte_size(self) -> int:
         """Returns the number of bytes this data line will generate"""
-        return len(self._quoted_string) + 1
+        return len(self._string_bytes)
 
     def generate_bytes(self) -> None:
-        # convert the quoted string to bytes
-        converted_str = bytes(self._quoted_string, 'utf-8').decode('unicode_escape')
-        values_list = [ord(x) for x in list(converted_str)]
-        # add the terminator
-        values_list.append(self._cstr_terminator)
         # set the bytes
-        self._bytes.extend(values_list)
+        self._bytes.extend(self._string_bytes)
diff --git a/test/test_line_objects.py b/test/test_line_objects.py
@@ -843,6 +843,35 @@ def test_embedded_string_lines(self):
                 0,
             )
 
+    def test_embedded_string_bugs(self):
+        fp = pkg_resources.files(config_files).joinpath('test_operand_features.yaml')
+        isa_model = AssemblerModel(str(fp), 0)
+        # force embedded strings to be allowed
+        isa_model._config['general']['allow_embedded_strings'] = True
+        memzone_mngr = MemoryZoneManager(
+            isa_model.address_size,
+            isa_model.default_origin,
+            isa_model.predefined_memory_zones,
+        )
+
+        lineid = LineIdentifier(67, 'test_embedded_string_bugs')
+        # test bug where length of embedded string was not being calculated correctly
+        # escapes sequences in code files are double escaped when read in, so the
+        # string "\n" is read as "\\n". The byte conversion that is done will properly
+        # convert the string to the escaped value, but the bug was we were taking the
+        # length of the string before the escape sequences were converted, so the length
+        # of the string was 3 instead of 2.
+        t1 = EmbeddedString.factory(
+            lineid,
+            '"\\n"',
+            'embedded string',
+            memzone_mngr.global_zone,
+            0,
+        )
+        self.assertIsNotNone(t1, 'embedded string object created')
+        self.assertIsInstance(t1, EmbeddedString)
+        self.assertEqual(t1.byte_size, 2, 'string has 2 bytes')
+
 
 if __name__ == '__main__':
     unittest.main()