From 52e16dde8fc48b11fdf1d95471ac775fbb8fb18a Mon Sep 17 00:00:00 2001 From: Shiro Kawai Date: Sat, 4 Jan 2025 18:26:06 -1000 Subject: [PATCH] Support repeating decimal notation --- ChangeLog | 8 ++ lib/gauche/numioutil.scm | 37 ++++++--- src/number.c | 159 +++++++++++++++++++++++++++++++++------ test/number.scm | 28 +++++-- 4 files changed, 190 insertions(+), 42 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3206929d2..b5398944c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2025-01-04 Shiro Kawai + + * src/number.c, lib/gauche/numioutil.scm: Support repeating decimal + notation. Delegate R7RS padding '#' reading to Scheme routine. + Technically, this makes previously non-number token to a number + (e.g. '0.#1 was read as a symbol before, but now it's a number). + I expect such weird token has been |-escaped practically. + 2024-12-31 Shiro Kawai * src/number.c (read_real): Allow much larger exponent for explicitly diff --git a/lib/gauche/numioutil.scm b/lib/gauche/numioutil.scm index 8ccc65d7e..70cba8dc6 100644 --- a/lib/gauche/numioutil.scm +++ b/lib/gauche/numioutil.scm @@ -78,22 +78,32 @@ (display (substring digits (- diglen k) diglen)) (+ diglen 3 (if (negative? number) 1 0))))) -;; Repeating decimals -;; We support notation of 1.2#34 as 1.23434343434... +;; Read hashsign number literals. ;; -;; - The caller deals with numeric prefixes and exponent part, so -;; the 'main' numeric part, which consists of digits, '#', '.', -;; and '_', is passed. -;; - Returns either a rational or #f. +;; We support 2 kinds of syntax. +;; 1. Insignificant digits (R5RS) - If a portion of numeric literal +;; has '#'s up to the end, it designates insignificant digits. +;; We read it as if it's '0'. +;; 123##.## == 12300.00 +;; 2. Repeating decimal (Gauche) - If any digits (incuding decimal point) +;; follow a single '#', it designates a repeating decimal, e.g. +;; 0.5#12 == 0.512121212... +;; +;; When the main number reader detects '#', it cut out the ureal portion +;; including '#', and calls read-hashsign-numeric. +;; +;; It returns a rational number, or #f if the syntax is invalid. -(define (read-repeating-decimal word) +(define (read-hashsign-numeric word) (define (digits&scale deci) ; "12.3#4" -> "123#4" & 2 (if-let1 m (#/\./ deci) (let* ([integ (m 'before)] [frac (m 'after)] [digits (string-append integ frac)]) (if (string-scan frac #\#) - (values digits (- (string-length frac) 1)) + (if (#/\d$/ frac) + (values digits (- (string-length frac) 1)) + (values digits (string-length frac))) (values digits (string-length frac)))) (values deci 0))) (define (split-repeats digits) ; "123#45" -> (* (+ 123 45/99) 100) @@ -119,6 +129,11 @@ (assume-type word ) (and (not (#/__/ word)) ;don't allow consecutive '_' (<= (strcount word #\.) 1) - (= (strcount word #\#) 1) - (receive (digits scale) (digits&scale (regexp-replace-all #/_/ word "")) - (* (split-repeats digits) (expt 10 (- scale)))))) + (let1 purified (regexp-replace-all #/_/ word "") ;remove '_' + (receive (digits scale) (digits&scale purified) + (cond [(#/^\d+#+$/ digits) + (* (string->number (regexp-replace-all #/#/ digits "0")) + (expt 10 (- scale)))] + [(= (strcount purified #\#) 1) + (* (split-repeats digits) (expt 10 (- scale)))] + [else #f]))))) diff --git a/src/number.c b/src/number.c index c4200574b..cfd0b51d8 100644 --- a/src/number.c +++ b/src/number.c @@ -4438,15 +4438,12 @@ static ScmObj read_uint(const char **strp, int *lenp, continue; } - if (ctx->padread) { - if (c == '#') digval = 0; - else break; - } else if (digread && c == '#') { - digval = 0; + if (digread && c == '#') { ctx->padread = TRUE; if (ctx->exactness == NOEXACT) { ctx->exactness = INEXACT; } + break; /* We let read-hashsign-numeric to parse it. */ } else { for (const char *ptab = tab; ptab < tab+radix; ptab++) { if (c == *ptab) { @@ -4476,7 +4473,12 @@ static ScmObj read_uint(const char **strp, int *lenp, /* integer literal can't end with '_' */ return numerr("Invalid use of '_' in numeric literal", ctx); } - + if (ctx->padread) { + if (ctx->strict) { + return numerr("'#' in numeric literal isn't allowed in the strict mode", ctx); + } + return SCM_FALSE; /* caller will handle this */ + } if (value_big == NULL) return Scm_MakeInteger(value_int); if (digits > 0) { value_big = Scm_BignumAccMultAddUI(value_big, @@ -4599,6 +4601,76 @@ static double algorithmR(ScmObj f, int e, double z) /*NOTREACHED*/ } +/* When read_real detects potential repeating decimal notation, this is called. + START points to the beginning of digit sequence (after prefixes and sign), + STRP is a reference to the pointer where a character after '#' resides, + LENP is a reference to the remaining length of input. Those references + are updated to the consumed input. + If the input is successfully parsed, returns a rational number. *STRP + may point to a remaining input (exponent part of the real, imaginary + part of a complex, or angular part of a complex). + IF the input can't be parsed, it either returns #f or throws an error, + depending on ctx->throwerror. + */ +static ScmObj read_repeating_decimal(const char *start, + const char **strp, + int *lenp, + int decimal_point_read, + struct numread_packet *ctx) +{ + /* possibly repeating decimal. We don't need performance here, + so we delegate parsing to a Scheme routine. */ + int remaining = *lenp; + for (;remaining > 0; --remaining) { + switch (**strp) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '_': case '#': + (*strp)++; + continue; + case '.': + if (decimal_point_read) { + return numerr("Invalid use of '#'", ctx); + } else { + (*strp)++; + continue; + } + case '+': case '-': case '@': + case 'e': case 'E': /* exponent suffix */ + case 'i': case 'I': /* imaginary suffix */ + case 'p': case 'P': /* 'pi' suffix of angular part */ + break; + default: + return numerr("Invalid use of '#'", ctx); + } + break; + } + *lenp = remaining; + ScmObj word = Scm_MakeString(start, *strp - start, *strp - start, 0); + static ScmObj read_hashsign_numeric_proc = SCM_UNDEFINED; + SCM_BIND_PROC(read_hashsign_numeric_proc, + "read-hashsign-numeric", + gauche_numioutil_module()); + ScmObj r = Scm_ApplyRec1(read_hashsign_numeric_proc, word); + if (SCM_FALSEP(r)) { + return numerr("Invalid use of '#'", ctx); + } else { + return r; + } +} + +static ScmObj scale_exact(ScmObj exactnum, _Bool minusp, int scale) +{ + ScmObj e = Scm_Mul(exactnum, + Scm_ExactIntegerExpt(SCM_MAKE_INT(10), + Scm_MakeInteger(scale))); + if (minusp) return Scm_Negate(e); + else return e; +} + +/* Read one real number, including sign. Update *strp and *lenp. + Stops at the end of word, or additional part of complex number. + Returns a real number, or #f. */ static ScmObj read_real(const char **strp, int *lenp, struct numread_packet *ctx) { @@ -4637,10 +4709,29 @@ static ScmObj read_real(const char **strp, int *lenp, /* Read integral part */ if (**strp != '.') { intpart = read_uint(strp, lenp, ctx, SCM_FALSE); + if (ctx->padread) { + /* hash sign in numeric literal. We don't need performance, + so we delegate parsing to a Scheme routine. */ + (*strp)++; /* read past '#' */ + (*lenp)--; + const char *save = *strp; + ScmObj r = read_repeating_decimal(mark, strp, lenp, FALSE, ctx); + if (SCM_FALSEP(r) && save < *strp) return r; + if (*lenp <= 0) { + if (minusp) r = Scm_Negate(r); + if (ctx->exactness == INEXACT) { + return Scm_Inexact(r); + } else { + return r; + } + } + intpart = r; + /* fallthrough */ + } if (SCM_FALSEP(intpart)) { - return numerr("Stray period", ctx); + return numerr("Invalid numeric literal", ctx); } - if ((*lenp) <= 0) { + if (*lenp <= 0) { if (minusp) intpart = Scm_Negate(intpart); if (ctx->exactness == INEXACT) { return Scm_Inexact(intpart); @@ -4648,7 +4739,9 @@ static ScmObj read_real(const char **strp, int *lenp, return intpart; } } - if (**strp == '/') { + + /* See if it's a rational */ + if (!ctx->padread && **strp == '/') { /* possibly rational */ ScmObj denom; int lensave; @@ -4686,29 +4779,45 @@ static ScmObj read_real(const char **strp, int *lenp, } /* Read fractional part. - At this point, simple integer is already eliminated. */ + At this point, simple integer is already eliminated. + Note: If the repeating decimal notation appeared in the intpart, + the decimal point and subsequent digits are already taken care of. */ if (**strp == '.') { if (ctx->radix != 10) { return numerr("(only 10-based fraction is supported)", ctx); } + if (*lenp == 1 && SCM_FALSEP(intpart)) { + return SCM_FALSE; /* input is '.' */ + } + (*strp)++; (*lenp)--; const char *fracp = *strp; fraction = read_uint(strp, lenp, ctx, intpart); + + if (ctx->padread) { + /* hash sign in fractinal part. */ + SCM_ASSERT(**strp == '#'); + (*strp)++; + (*lenp)--; + ScmObj r = read_repeating_decimal(mark, strp, lenp, TRUE, ctx); + if (SCM_FALSEP(r)) return r; + fraction = r; + fracdigs = 0; /* scaling is already done */ + } else { + /* Count fraction digits. we can't simply do *strp - fracp, + for fraction part may contain '_' (srfi-169). */ + for (; fracp < *strp; fracp++) { + if (*fracp != '_') fracdigs++; + } + } + if (SCM_FALSEP(fraction)) { return numerr("Incomplete decimal point number", ctx); } - /* Count fraction digits. we can't simply do *strp - fracp, - for fraction part may contain '_' (srfi-169). */ - for (; fracp < *strp; fracp++) { - if (*fracp != '_') fracdigs++; - } } else { fraction = intpart; } - if (SCM_FALSEP(intpart)) { - if (fracdigs == 0) return SCM_FALSE; /* input was "." */ - } if (mark == *strp) return SCM_FALSE; /* Read exponent. */ @@ -4765,13 +4874,13 @@ static ScmObj read_real(const char **strp, int *lenp, /* Compose the number. */ if (ctx->exactness == EXACT) { - /* Explicit exact number. We can continue exact arithmetic - (it may end up ratnum) */ - ScmObj e = Scm_Mul(fraction, - Scm_ExactIntegerExpt(SCM_MAKE_INT(10), - Scm_MakeInteger(exponent-fracdigs))); - if (minusp) return Scm_Negate(e); - else return e; + return scale_exact(fraction, minusp, exponent-fracdigs); + } + + if (SCM_RATNUMP(fraction)) { + /* Repeating decimal case. + Scale, then inexactify, to avoid rounding error (though slow) */ + return Scm_Inexact(scale_exact(fraction, minusp, exponent-fracdigs)); } /* Get double approximaiton of fraction. If fraction >= 2^53 we'll diff --git a/test/number.scm b/test/number.scm index 5e58c2efc..bc0acb003 100644 --- a/test/number.scm +++ b/test/number.scm @@ -409,17 +409,11 @@ (test* "padding" '(100.0 #t) (flonum-test "100.0#")) (test* "padding" '(1.0 #t) (flonum-test "1.#")) -(test* "padding" (test-error) (flonum-test "1#1")) -(test* "padding" (test-error) (flonum-test "1##1")) -(test* "padding" (test-error) (flonum-test "1#.1")) -(test* "padding" (test-error) (flonum-test "1.#1")) - (test* "padding" (test-error) (flonum-test ".#")) (test* "padding" '(0.0 #t) (flonum-test "0.#")) (test* "padding" '(0.0 #t) (flonum-test ".0#")) (test* "padding" '(0.0 #t) (flonum-test "0#")) (test* "padding" '(0.0 #t) (flonum-test "0#.#")) -(test* "padding" (test-error) (flonum-test "0#.0")) (test* "padding" '(1000.0 #t) (flonum-test "1#e2")) (test* "padding" '(1000.0 #t) (flonum-test "1##e1")) @@ -545,6 +539,28 @@ (test* "complex reader (padding)" '(0.0 1.2) (decompose-complex (string->number "1.2##@.5###pi"))) +;;------------------------------------------------------------------ +(test-section "repeating decimals") + +(define (test-repeating-real input expect) + (test* #"repeating decimal ~|input|" (inexact expect) + (string->number input)) + (test* #"repeating decimal ~|input|" expect + (string->number (string-append "#e" input))) + ) + +(test-repeating-real "0.#1" 1/9) +(test-repeating-real "0.#12" 12/99) +(test-repeating-real "1.#142857" (+ 1 1/7)) +(test-repeating-real "0.1#12" (+ 1/10 12/990)) +(test-repeating-real "0#1" 10/9) +(test-repeating-real "12.3#456e-2" (/ (+ 123/10 456/9990) 100)) +(test-repeating-real "12.3#456e3" (* (+ 123/10 456/9990) 1000)) + +(test* "bad repeating decimal 1" #f (string->number "0.##1")) +(test* "bad repeating decimal 2" #f (string->number "0.#1#")) +(test* "bad repeating decimal 3" #f (string->number "0#.#1")) + ;;------------------------------------------------------------------ (test-section "integer writer syntax")