Skip to content

Commit

Permalink
fixed bug in embedded strings
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkamprath committed Apr 21, 2024
1 parent 8ae1696 commit 4cd5252
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
19 changes: 9 additions & 10 deletions src/bespokeasm/assembler/line_object/emdedded_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
class EmbeddedString(LineWithBytes):
QUOTED_STRING_PATTERN = re.compile(
rf'^{EMBEDDED_STRING_PATTERN}',
flags=re.IGNORECASE | re.MULTILINE
flags=re.IGNORECASE | re.MULTILINE | re.DOTALL
)

@classmethod
Expand Down Expand Up @@ -45,19 +45,18 @@ def __init__(
cstr_terminator: int = 0,
) -> None:
super().__init__(line_id, instruction, comment, current_memzone)
self._quoted_string = quoted_string
self._cstr_terminator = cstr_terminator
self._string_bytes = \
[ord(x) for x in list(bytes(quoted_string, 'utf-8').decode('unicode_escape'))] \
+ [cstr_terminator]

def __str__(self):
return f'EmbeddedString<{self.instruction}, size={self.byte_size}, chars={self._string_bytes}>'

@property
def byte_size(self) -> int:
"""Returns the number of bytes this data line will generate"""
return len(self._quoted_string) + 1
return len(self._string_bytes)

def generate_bytes(self) -> None:
# convert the quoted string to bytes
converted_str = bytes(self._quoted_string, 'utf-8').decode('unicode_escape')
values_list = [ord(x) for x in list(converted_str)]
# add the terminator
values_list.append(self._cstr_terminator)
# set the bytes
self._bytes.extend(values_list)
self._bytes.extend(self._string_bytes)
29 changes: 29 additions & 0 deletions test/test_line_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,35 @@ def test_embedded_string_lines(self):
0,
)

def test_embedded_string_bugs(self):
fp = pkg_resources.files(config_files).joinpath('test_operand_features.yaml')
isa_model = AssemblerModel(str(fp), 0)
# force embedded strings to be allowed
isa_model._config['general']['allow_embedded_strings'] = True
memzone_mngr = MemoryZoneManager(
isa_model.address_size,
isa_model.default_origin,
isa_model.predefined_memory_zones,
)

lineid = LineIdentifier(67, 'test_embedded_string_bugs')
# test bug where length of embedded string was not being calculated correctly
# escapes sequences in code files are double escaped when read in, so the
# string "\n" is read as "\\n". The byte conversion that is done will properly
# convert the string to the escaped value, but the bug was we were taking the
# length of the string before the escape sequences were converted, so the length
# of the string was 3 instead of 2.
t1 = EmbeddedString.factory(
lineid,
'"\\n"',
'embedded string',
memzone_mngr.global_zone,
0,
)
self.assertIsNotNone(t1, 'embedded string object created')
self.assertIsInstance(t1, EmbeddedString)
self.assertEqual(t1.byte_size, 2, 'string has 2 bytes')


if __name__ == '__main__':
unittest.main()

0 comments on commit 4cd5252

Please sign in to comment.