Support repeating decimal notation

shirok · Jan 5, 2025 · 52e16dd · 52e16dd
1 parent 5736730
commit 52e16dd
Show file tree

Hide file tree

Showing 4 changed files with 190 additions and 42 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,11 @@
+2025-01-04  Shiro Kawai  <[email protected]>
+
+	* src/number.c, lib/gauche/numioutil.scm: Support repeating decimal
+	  notation.  Delegate R7RS padding '#' reading to Scheme routine.
+	  Technically, this makes previously non-number token to a number
+	  (e.g. '0.#1 was read as a symbol before, but now it's a number).
+	  I expect such weird token has been |-escaped practically.
+
 2024-12-31  Shiro Kawai  <[email protected]>
 
 	* src/number.c (read_real): Allow much larger exponent for explicitly

diff --git a/lib/gauche/numioutil.scm b/lib/gauche/numioutil.scm
@@ -78,22 +78,32 @@
       (display (substring digits (- diglen k) diglen))
       (+ diglen 3 (if (negative? number) 1 0)))))
 
-;; Repeating decimals
-;;   We support notation of 1.2#34 as 1.23434343434...
+;; Read hashsign number literals.
 ;;
-;; - The caller deals with numeric prefixes and exponent part, so
-;;   the 'main' numeric part, which consists of digits, '#', '.',
-;;   and '_', is passed.
-;; - Returns either a rational or #f.
+;;   We support 2 kinds of syntax.
+;;   1. Insignificant digits (R5RS) - If a <ureal> portion of numeric literal
+;;        has '#'s up to the end, it designates insignificant digits.
+;;        We read it as if it's '0'.
+;;          123##.## == 12300.00
+;;   2. Repeating decimal (Gauche) - If any digits (incuding decimal point)
+;;        follow a single '#', it designates a repeating decimal, e.g.
+;;          0.5#12  == 0.512121212...
+;;
+;;   When the main number reader detects '#', it cut out the ureal portion
+;;   including '#', and calls read-hashsign-numeric.
+;;
+;;   It returns a rational number, or #f if the syntax is invalid.
 
-(define (read-repeating-decimal word)
+(define (read-hashsign-numeric word)
   (define (digits&scale deci) ; "12.3#4" -> "123#4" & 2
     (if-let1 m (#/\./ deci)
       (let* ([integ (m 'before)]
              [frac (m 'after)]
              [digits (string-append integ frac)])
         (if (string-scan frac #\#)
-          (values digits (- (string-length frac) 1))
+          (if (#/\d$/ frac)
+            (values digits (- (string-length frac) 1))
+            (values digits (string-length frac)))
           (values digits (string-length frac))))
       (values deci 0)))
   (define (split-repeats digits) ; "123#45" -> (* (+ 123 45/99) 100)
@@ -119,6 +129,11 @@
   (assume-type word <string>)
   (and (not (#/__/ word))               ;don't allow consecutive '_'
        (<= (strcount word #\.) 1)
-       (= (strcount word #\#) 1)
-       (receive (digits scale) (digits&scale (regexp-replace-all #/_/ word ""))
-         (* (split-repeats digits) (expt 10 (- scale))))))
+       (let1 purified (regexp-replace-all #/_/ word "") ;remove '_'
+         (receive (digits scale) (digits&scale purified)
+           (cond [(#/^\d+#+$/ digits)
+                  (* (string->number (regexp-replace-all #/#/ digits "0"))
+                     (expt 10 (- scale)))]
+                 [(= (strcount purified #\#) 1)
+                  (* (split-repeats digits) (expt 10 (- scale)))]
+                 [else #f])))))
diff --git a/src/number.c b/src/number.c
@@ -4438,15 +4438,12 @@ static ScmObj read_uint(const char **strp, int *lenp,
             continue;
         }
 
-        if (ctx->padread) {
-            if (c == '#') digval = 0;
-            else break;
-        } else if (digread && c == '#') {
-            digval = 0;
+        if (digread && c == '#') {
             ctx->padread = TRUE;
             if (ctx->exactness == NOEXACT) {
                 ctx->exactness = INEXACT;
             }
+            break;              /* We let read-hashsign-numeric to parse it. */
         } else {
             for (const char *ptab = tab; ptab < tab+radix; ptab++) {
                 if (c == *ptab) {
@@ -4476,7 +4473,12 @@ static ScmObj read_uint(const char **strp, int *lenp,
         /* integer literal can't end with '_' */
         return numerr("Invalid use of '_' in numeric literal", ctx);
     }
-
+    if (ctx->padread) {
+        if (ctx->strict) {
+            return numerr("'#' in numeric literal isn't allowed in the strict mode", ctx);
+        }
+        return SCM_FALSE;       /* caller will handle this */
+    }
     if (value_big == NULL) return Scm_MakeInteger(value_int);
     if (digits > 0) {
         value_big = Scm_BignumAccMultAddUI(value_big,
@@ -4599,6 +4601,76 @@ static double algorithmR(ScmObj f, int e, double z)
     /*NOTREACHED*/
 }
 
+/* When read_real detects potential repeating decimal notation, this is called.
+   START points to the beginning of digit sequence (after prefixes and sign),
+   STRP is a reference to the pointer where a character after '#' resides,
+   LENP is a reference to the remaining length of input.  Those references
+   are updated to the consumed input.
+   If the input is successfully parsed, returns a rational number.  *STRP
+   may point to a remaining input (exponent part of the real, imaginary
+   part of a complex, or angular part of a complex).
+   IF the input can't be parsed, it either returns #f or throws an error,
+   depending on ctx->throwerror.
+ */
+static ScmObj read_repeating_decimal(const char *start,
+                                     const char **strp,
+                                     int *lenp,
+                                     int decimal_point_read,
+                                     struct numread_packet *ctx)
+{
+    /* possibly repeating decimal.  We don't need performance here,
+       so we delegate parsing to a Scheme routine. */
+    int remaining = *lenp;
+    for (;remaining > 0; --remaining) {
+        switch (**strp) {
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+        case '_': case '#':
+            (*strp)++;
+            continue;
+        case '.':
+            if (decimal_point_read) {
+                return numerr("Invalid use of '#'", ctx);
+            } else {
+                (*strp)++;
+                continue;
+            }
+        case '+': case '-': case '@':
+        case 'e': case 'E':     /* exponent suffix */
+        case 'i': case 'I':     /* imaginary suffix */
+        case 'p': case 'P':     /* 'pi' suffix of angular part */
+            break;
+        default:
+            return numerr("Invalid use of '#'", ctx);
+        }
+        break;
+    }
+    *lenp = remaining;
+    ScmObj word = Scm_MakeString(start, *strp - start, *strp - start, 0);
+    static ScmObj read_hashsign_numeric_proc = SCM_UNDEFINED;
+    SCM_BIND_PROC(read_hashsign_numeric_proc,
+                  "read-hashsign-numeric",
+                  gauche_numioutil_module());
+    ScmObj r = Scm_ApplyRec1(read_hashsign_numeric_proc, word);
+    if (SCM_FALSEP(r)) {
+        return numerr("Invalid use of '#'", ctx);
+    } else {
+        return r;
+    }
+}
+
+static ScmObj scale_exact(ScmObj exactnum, _Bool minusp, int scale)
+{
+    ScmObj e = Scm_Mul(exactnum,
+                       Scm_ExactIntegerExpt(SCM_MAKE_INT(10),
+                                            Scm_MakeInteger(scale)));
+    if (minusp) return Scm_Negate(e);
+    else return e;
+}
+
+/* Read one real number, including sign.  Update *strp and *lenp.
+   Stops at the end of word, or additional part of complex number.
+   Returns a real number, or #f. */
 static ScmObj read_real(const char **strp, int *lenp,
                         struct numread_packet *ctx)
 {
@@ -4637,18 +4709,39 @@ static ScmObj read_real(const char **strp, int *lenp,
     /* Read integral part */
     if (**strp != '.') {
         intpart = read_uint(strp, lenp, ctx, SCM_FALSE);
+        if (ctx->padread) {
+            /* hash sign in numeric literal.  We don't need performance,
+               so we delegate parsing to a Scheme routine. */
+            (*strp)++;          /* read past '#' */
+            (*lenp)--;
+            const char *save = *strp;
+            ScmObj r = read_repeating_decimal(mark, strp, lenp, FALSE, ctx);
+            if (SCM_FALSEP(r) && save < *strp) return r;
+            if (*lenp <= 0) {
+                if (minusp) r = Scm_Negate(r);
+                if (ctx->exactness == INEXACT) {
+                    return Scm_Inexact(r);
+                } else {
+                    return r;
+                }
+            }
+            intpart = r;
+            /* fallthrough */
+        }
         if (SCM_FALSEP(intpart)) {
-            return numerr("Stray period", ctx);
+            return numerr("Invalid numeric literal", ctx);
         }
-        if ((*lenp) <= 0) {
+        if (*lenp <= 0) {
             if (minusp) intpart = Scm_Negate(intpart);
             if (ctx->exactness == INEXACT) {
                 return Scm_Inexact(intpart);
             } else {
                 return intpart;
             }
         }
-        if (**strp == '/') {
+
+        /* See if it's a rational */
+        if (!ctx->padread && **strp == '/') {
             /* possibly rational */
             ScmObj denom;
             int lensave;
@@ -4686,29 +4779,45 @@ static ScmObj read_real(const char **strp, int *lenp,
     }
 
     /* Read fractional part.
-       At this point, simple integer is already eliminated. */
+       At this point, simple integer is already eliminated.
+       Note: If the repeating decimal notation appeared in the intpart,
+       the decimal point and subsequent digits are already taken care of. */
     if (**strp == '.') {
         if (ctx->radix != 10) {
             return numerr("(only 10-based fraction is supported)", ctx);
         }
+        if (*lenp == 1 && SCM_FALSEP(intpart)) {
+            return SCM_FALSE;   /* input is '.' */
+        }
+
         (*strp)++; (*lenp)--;
         const char *fracp = *strp;
         fraction = read_uint(strp, lenp, ctx, intpart);
+
+        if (ctx->padread) {
+            /* hash sign in fractinal part. */
+            SCM_ASSERT(**strp == '#');
+            (*strp)++;
+            (*lenp)--;
+            ScmObj r = read_repeating_decimal(mark, strp, lenp, TRUE, ctx);
+            if (SCM_FALSEP(r)) return r;
+            fraction = r;
+            fracdigs = 0;       /* scaling is already done */
+        } else {
+            /* Count fraction digits.  we can't simply do *strp - fracp,
+               for fraction part may contain '_' (srfi-169). */
+            for (; fracp < *strp; fracp++) {
+                if (*fracp != '_') fracdigs++;
+            }
+        }
+
         if (SCM_FALSEP(fraction)) {
             return numerr("Incomplete decimal point number", ctx);
         }
-        /* Count fraction digits.  we can't simply do *strp - fracp,
-           for fraction part may contain '_' (srfi-169). */
-        for (; fracp < *strp; fracp++) {
-            if (*fracp != '_') fracdigs++;
-        }
     } else {
         fraction = intpart;
     }
 
-    if (SCM_FALSEP(intpart)) {
-        if (fracdigs == 0) return SCM_FALSE; /* input was "." */
-    }
     if (mark == *strp) return SCM_FALSE;
 
     /* Read exponent.  */
@@ -4765,13 +4874,13 @@ static ScmObj read_real(const char **strp, int *lenp,
 
     /* Compose the number. */
     if (ctx->exactness == EXACT) {
-        /* Explicit exact number.  We can continue exact arithmetic
-           (it may end up ratnum) */
-        ScmObj e = Scm_Mul(fraction,
-                           Scm_ExactIntegerExpt(SCM_MAKE_INT(10),
-                                                Scm_MakeInteger(exponent-fracdigs)));
-        if (minusp) return Scm_Negate(e);
-        else        return e;
+        return scale_exact(fraction, minusp, exponent-fracdigs);
+    }
+
+    if (SCM_RATNUMP(fraction)) {
+        /* Repeating decimal case.
+           Scale, then inexactify, to avoid rounding error (though slow) */
+        return Scm_Inexact(scale_exact(fraction, minusp, exponent-fracdigs));
     }
 
     /* Get double approximaiton of fraction.  If fraction >= 2^53 we'll

diff --git a/test/number.scm b/test/number.scm
@@ -409,17 +409,11 @@
 (test* "padding" '(100.0 #t) (flonum-test "100.0#"))
 (test* "padding" '(1.0 #t) (flonum-test "1.#"))
 
-(test* "padding" (test-error) (flonum-test "1#1"))
-(test* "padding" (test-error) (flonum-test "1##1"))
-(test* "padding" (test-error) (flonum-test "1#.1"))
-(test* "padding" (test-error) (flonum-test "1.#1"))
-
 (test* "padding" (test-error) (flonum-test ".#"))
 (test* "padding" '(0.0 #t) (flonum-test "0.#"))
 (test* "padding" '(0.0 #t) (flonum-test ".0#"))
 (test* "padding" '(0.0 #t) (flonum-test "0#"))
 (test* "padding" '(0.0 #t) (flonum-test "0#.#"))
-(test* "padding" (test-error) (flonum-test "0#.0"))
 
 (test* "padding" '(1000.0 #t) (flonum-test "1#e2"))
 (test* "padding" '(1000.0 #t) (flonum-test "1##e1"))
@@ -545,6 +539,28 @@
 (test* "complex reader (padding)" '(0.0 1.2)
        (decompose-complex (string->number "1.2##@.5###pi")))
 
+;;------------------------------------------------------------------
+(test-section "repeating decimals")
+
+(define (test-repeating-real input expect)
+  (test* #"repeating decimal ~|input|" (inexact expect)
+         (string->number input))
+  (test* #"repeating decimal ~|input|" expect
+         (string->number (string-append "#e" input)))
+  )
+
+(test-repeating-real "0.#1" 1/9)
+(test-repeating-real "0.#12" 12/99)
+(test-repeating-real "1.#142857" (+ 1 1/7))
+(test-repeating-real "0.1#12" (+ 1/10 12/990))
+(test-repeating-real "0#1" 10/9)
+(test-repeating-real "12.3#456e-2" (/ (+ 123/10 456/9990) 100))
+(test-repeating-real "12.3#456e3" (* (+ 123/10 456/9990) 1000))
+
+(test* "bad repeating decimal 1" #f (string->number "0.##1"))
+(test* "bad repeating decimal 2" #f (string->number "0.#1#"))
+(test* "bad repeating decimal 3" #f (string->number "0#.#1"))
+
 ;;------------------------------------------------------------------
 (test-section "integer writer syntax")