Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stdlib] Fix atol parsing for prefixed integer literals with leading underscores #3180

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,13 @@ future and `StringSlice.__len__` now does return the Unicode codepoints length.

- `SIMD.load/store` now supports `UnsafePointer` overloads.

- The `atol` function now correctly supports leading underscores,
(e.g.`atol("0x_ff", 0)`), when the appropriate base is specified or inferred
(base 0). non-base-10 integer literals as per Python's [Integer Literals](\
<https://docs.python.org/3/reference/lexical_analysis.html#integers>).
([PR #3180](https://github.com/modularml/mojo/pull/3180)
by [@jjvraw](https://github.com/jjvraw))

### ❌ Removed

- It is no longer possible to cast (implicitly or explicitly) from `Reference`
Expand Down
12 changes: 8 additions & 4 deletions stdlib/src/builtin/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
var ord_letter_max = (-1, -1)
var result = 0
var is_negative: Bool = False
var has_prefix: Bool = False
var start: Int = 0
var str_len = len(str_ref)
var buff = str_ref.unsafe_ptr()
Expand All @@ -250,14 +251,17 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
str_ref[start + 1] == "b" or str_ref[start + 1] == "B"
):
start += 2
has_prefix = True
elif base == 8 and (
str_ref[start + 1] == "o" or str_ref[start + 1] == "O"
):
start += 2
has_prefix = True
elif base == 16 and (
str_ref[start + 1] == "x" or str_ref[start + 1] == "X"
):
start += 2
has_prefix = True

alias ord_0 = ord("0")
# FIXME:
Expand All @@ -269,6 +273,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
var real_base_new_start = _identify_base(str_ref, start)
real_base = real_base_new_start[0]
start = real_base_new_start[1]
has_prefix = real_base != 10
if real_base == -1:
raise Error(_atol_error(base, str_ref))
else:
Expand All @@ -285,10 +290,9 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:

var found_valid_chars_after_start = False
var has_space_after_number = False
# single underscores are only allowed between digits
# starting "was_last_digit_undescore" to true such that
# if the first digit is an undesrcore an error is raised
var was_last_digit_undescore = True
# Prefixed integer literals with real_base 2, 8, 16 may begin with leading
# underscores under the conditions they have a prefix
var was_last_digit_undescore = not (real_base in (2, 8, 16) and has_prefix)
for pos in range(start, str_len):
var ord_current = int(buff[pos])
if ord_current == ord_underscore:
Expand Down
41 changes: 36 additions & 5 deletions stdlib/test/builtin/test_string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,9 @@ def test_atol():
assert_equal(10, atol("0o12", 8))
assert_equal(10, atol("0O12", 8))
assert_equal(35, atol("Z", 36))
assert_equal(255, atol("0x_00_ff", 16))
assert_equal(18, atol("0b0001_0010", 2))
assert_equal(18, atol("0b_000_1001_0", 2))

# Negative cases
with assert_raises(
Expand Down Expand Up @@ -398,12 +401,37 @@ def test_atol():
):
_ = atol("5", 5)

with assert_raises(
contains="String is not convertible to integer with base 10: '0x_ff'"
):
_ = atol("0x_ff")

with assert_raises(
contains="String is not convertible to integer with base 3: '_12'"
):
_ = atol("_12", 3)

with assert_raises(contains="Base must be >= 2 and <= 36, or 0."):
_ = atol("0", 1)

with assert_raises(contains="Base must be >= 2 and <= 36, or 0."):
_ = atol("0", 37)

with assert_raises(
contains="String is not convertible to integer with base 16: '_ff'"
):
_ = atol("_ff", base=16)

with assert_raises(
contains="String is not convertible to integer with base 2: ' _01'"
):
_ = atol(" _01", base=2)

with assert_raises(
contains="String is not convertible to integer with base 10: '0x_ff'"
):
_ = atol("0x_ff")

with assert_raises(
contains="String is not convertible to integer with base 10: ''"
):
Expand Down Expand Up @@ -433,6 +461,14 @@ def test_atol_base_0():

assert_equal(0, atol("0X0", base=0))

assert_equal(255, atol("0x_00_ff", base=0))

assert_equal(18, atol("0b_0001_0010", base=0))
assert_equal(18, atol("0b000_1001_0", base=0))

assert_equal(10, atol("0o_000_12", base=0))
assert_equal(10, atol("0o00_12", base=0))

with assert_raises(
contains="String is not convertible to integer with base 0: ' 0x'"
):
Expand All @@ -453,11 +489,6 @@ def test_atol_base_0():
):
_ = atol("0r100", base=0)

with assert_raises(
contains="String is not convertible to integer with base 0: '0b_0'"
):
_ = atol("0b_0", base=0)

with assert_raises(
contains="String is not convertible to integer with base 0: '0xf__f'"
):
Expand Down