Skip to content

Commit

Permalink
Support repeating decimal notation
Browse files Browse the repository at this point in the history
  • Loading branch information
shirok committed Jan 5, 2025
1 parent 5736730 commit 52e16dd
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 42 deletions.
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
2025-01-04 Shiro Kawai <[email protected]>

* src/number.c, lib/gauche/numioutil.scm: Support repeating decimal
notation. Delegate R7RS padding '#' reading to Scheme routine.
Technically, this makes previously non-number token to a number
(e.g. '0.#1 was read as a symbol before, but now it's a number).
I expect such weird token has been |-escaped practically.

2024-12-31 Shiro Kawai <[email protected]>

* src/number.c (read_real): Allow much larger exponent for explicitly
Expand Down
37 changes: 26 additions & 11 deletions lib/gauche/numioutil.scm
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,32 @@
(display (substring digits (- diglen k) diglen))
(+ diglen 3 (if (negative? number) 1 0)))))

;; Repeating decimals
;; We support notation of 1.2#34 as 1.23434343434...
;; Read hashsign number literals.
;;
;; - The caller deals with numeric prefixes and exponent part, so
;; the 'main' numeric part, which consists of digits, '#', '.',
;; and '_', is passed.
;; - Returns either a rational or #f.
;; We support 2 kinds of syntax.
;; 1. Insignificant digits (R5RS) - If a <ureal> portion of numeric literal
;; has '#'s up to the end, it designates insignificant digits.
;; We read it as if it's '0'.
;; 123##.## == 12300.00
;; 2. Repeating decimal (Gauche) - If any digits (incuding decimal point)
;; follow a single '#', it designates a repeating decimal, e.g.
;; 0.5#12 == 0.512121212...
;;
;; When the main number reader detects '#', it cut out the ureal portion
;; including '#', and calls read-hashsign-numeric.
;;
;; It returns a rational number, or #f if the syntax is invalid.

(define (read-repeating-decimal word)
(define (read-hashsign-numeric word)
(define (digits&scale deci) ; "12.3#4" -> "123#4" & 2
(if-let1 m (#/\./ deci)
(let* ([integ (m 'before)]
[frac (m 'after)]
[digits (string-append integ frac)])
(if (string-scan frac #\#)
(values digits (- (string-length frac) 1))
(if (#/\d$/ frac)
(values digits (- (string-length frac) 1))
(values digits (string-length frac)))
(values digits (string-length frac))))
(values deci 0)))
(define (split-repeats digits) ; "123#45" -> (* (+ 123 45/99) 100)
Expand All @@ -119,6 +129,11 @@
(assume-type word <string>)
(and (not (#/__/ word)) ;don't allow consecutive '_'
(<= (strcount word #\.) 1)
(= (strcount word #\#) 1)
(receive (digits scale) (digits&scale (regexp-replace-all #/_/ word ""))
(* (split-repeats digits) (expt 10 (- scale))))))
(let1 purified (regexp-replace-all #/_/ word "") ;remove '_'
(receive (digits scale) (digits&scale purified)
(cond [(#/^\d+#+$/ digits)
(* (string->number (regexp-replace-all #/#/ digits "0"))
(expt 10 (- scale)))]
[(= (strcount purified #\#) 1)
(* (split-repeats digits) (expt 10 (- scale)))]
[else #f])))))
159 changes: 134 additions & 25 deletions src/number.c
Original file line number Diff line number Diff line change
Expand Up @@ -4438,15 +4438,12 @@ static ScmObj read_uint(const char **strp, int *lenp,
continue;
}

if (ctx->padread) {
if (c == '#') digval = 0;
else break;
} else if (digread && c == '#') {
digval = 0;
if (digread && c == '#') {
ctx->padread = TRUE;
if (ctx->exactness == NOEXACT) {
ctx->exactness = INEXACT;
}
break; /* We let read-hashsign-numeric to parse it. */
} else {
for (const char *ptab = tab; ptab < tab+radix; ptab++) {
if (c == *ptab) {
Expand Down Expand Up @@ -4476,7 +4473,12 @@ static ScmObj read_uint(const char **strp, int *lenp,
/* integer literal can't end with '_' */
return numerr("Invalid use of '_' in numeric literal", ctx);
}

if (ctx->padread) {
if (ctx->strict) {
return numerr("'#' in numeric literal isn't allowed in the strict mode", ctx);
}
return SCM_FALSE; /* caller will handle this */
}
if (value_big == NULL) return Scm_MakeInteger(value_int);
if (digits > 0) {
value_big = Scm_BignumAccMultAddUI(value_big,
Expand Down Expand Up @@ -4599,6 +4601,76 @@ static double algorithmR(ScmObj f, int e, double z)
/*NOTREACHED*/
}

/* When read_real detects potential repeating decimal notation, this is called.
START points to the beginning of digit sequence (after prefixes and sign),
STRP is a reference to the pointer where a character after '#' resides,
LENP is a reference to the remaining length of input. Those references
are updated to the consumed input.
If the input is successfully parsed, returns a rational number. *STRP
may point to a remaining input (exponent part of the real, imaginary
part of a complex, or angular part of a complex).
IF the input can't be parsed, it either returns #f or throws an error,
depending on ctx->throwerror.
*/
static ScmObj read_repeating_decimal(const char *start,
const char **strp,
int *lenp,
int decimal_point_read,
struct numread_packet *ctx)
{
/* possibly repeating decimal. We don't need performance here,
so we delegate parsing to a Scheme routine. */
int remaining = *lenp;
for (;remaining > 0; --remaining) {
switch (**strp) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '_': case '#':
(*strp)++;
continue;
case '.':
if (decimal_point_read) {
return numerr("Invalid use of '#'", ctx);
} else {
(*strp)++;
continue;
}
case '+': case '-': case '@':
case 'e': case 'E': /* exponent suffix */
case 'i': case 'I': /* imaginary suffix */
case 'p': case 'P': /* 'pi' suffix of angular part */
break;
default:
return numerr("Invalid use of '#'", ctx);
}
break;
}
*lenp = remaining;
ScmObj word = Scm_MakeString(start, *strp - start, *strp - start, 0);
static ScmObj read_hashsign_numeric_proc = SCM_UNDEFINED;
SCM_BIND_PROC(read_hashsign_numeric_proc,
"read-hashsign-numeric",
gauche_numioutil_module());
ScmObj r = Scm_ApplyRec1(read_hashsign_numeric_proc, word);
if (SCM_FALSEP(r)) {
return numerr("Invalid use of '#'", ctx);
} else {
return r;
}
}

static ScmObj scale_exact(ScmObj exactnum, _Bool minusp, int scale)
{
ScmObj e = Scm_Mul(exactnum,
Scm_ExactIntegerExpt(SCM_MAKE_INT(10),
Scm_MakeInteger(scale)));
if (minusp) return Scm_Negate(e);
else return e;
}

/* Read one real number, including sign. Update *strp and *lenp.
Stops at the end of word, or additional part of complex number.
Returns a real number, or #f. */
static ScmObj read_real(const char **strp, int *lenp,
struct numread_packet *ctx)
{
Expand Down Expand Up @@ -4637,18 +4709,39 @@ static ScmObj read_real(const char **strp, int *lenp,
/* Read integral part */
if (**strp != '.') {
intpart = read_uint(strp, lenp, ctx, SCM_FALSE);
if (ctx->padread) {
/* hash sign in numeric literal. We don't need performance,
so we delegate parsing to a Scheme routine. */
(*strp)++; /* read past '#' */
(*lenp)--;
const char *save = *strp;
ScmObj r = read_repeating_decimal(mark, strp, lenp, FALSE, ctx);
if (SCM_FALSEP(r) && save < *strp) return r;
if (*lenp <= 0) {
if (minusp) r = Scm_Negate(r);
if (ctx->exactness == INEXACT) {
return Scm_Inexact(r);
} else {
return r;
}
}
intpart = r;
/* fallthrough */
}
if (SCM_FALSEP(intpart)) {
return numerr("Stray period", ctx);
return numerr("Invalid numeric literal", ctx);
}
if ((*lenp) <= 0) {
if (*lenp <= 0) {
if (minusp) intpart = Scm_Negate(intpart);
if (ctx->exactness == INEXACT) {
return Scm_Inexact(intpart);
} else {
return intpart;
}
}
if (**strp == '/') {

/* See if it's a rational */
if (!ctx->padread && **strp == '/') {
/* possibly rational */
ScmObj denom;
int lensave;
Expand Down Expand Up @@ -4686,29 +4779,45 @@ static ScmObj read_real(const char **strp, int *lenp,
}

/* Read fractional part.
At this point, simple integer is already eliminated. */
At this point, simple integer is already eliminated.
Note: If the repeating decimal notation appeared in the intpart,
the decimal point and subsequent digits are already taken care of. */
if (**strp == '.') {
if (ctx->radix != 10) {
return numerr("(only 10-based fraction is supported)", ctx);
}
if (*lenp == 1 && SCM_FALSEP(intpart)) {
return SCM_FALSE; /* input is '.' */
}

(*strp)++; (*lenp)--;
const char *fracp = *strp;
fraction = read_uint(strp, lenp, ctx, intpart);

if (ctx->padread) {
/* hash sign in fractinal part. */
SCM_ASSERT(**strp == '#');
(*strp)++;
(*lenp)--;
ScmObj r = read_repeating_decimal(mark, strp, lenp, TRUE, ctx);
if (SCM_FALSEP(r)) return r;
fraction = r;
fracdigs = 0; /* scaling is already done */
} else {
/* Count fraction digits. we can't simply do *strp - fracp,
for fraction part may contain '_' (srfi-169). */
for (; fracp < *strp; fracp++) {
if (*fracp != '_') fracdigs++;
}
}

if (SCM_FALSEP(fraction)) {
return numerr("Incomplete decimal point number", ctx);
}
/* Count fraction digits. we can't simply do *strp - fracp,
for fraction part may contain '_' (srfi-169). */
for (; fracp < *strp; fracp++) {
if (*fracp != '_') fracdigs++;
}
} else {
fraction = intpart;
}

if (SCM_FALSEP(intpart)) {
if (fracdigs == 0) return SCM_FALSE; /* input was "." */
}
if (mark == *strp) return SCM_FALSE;

/* Read exponent. */
Expand Down Expand Up @@ -4765,13 +4874,13 @@ static ScmObj read_real(const char **strp, int *lenp,

/* Compose the number. */
if (ctx->exactness == EXACT) {
/* Explicit exact number. We can continue exact arithmetic
(it may end up ratnum) */
ScmObj e = Scm_Mul(fraction,
Scm_ExactIntegerExpt(SCM_MAKE_INT(10),
Scm_MakeInteger(exponent-fracdigs)));
if (minusp) return Scm_Negate(e);
else return e;
return scale_exact(fraction, minusp, exponent-fracdigs);
}

if (SCM_RATNUMP(fraction)) {
/* Repeating decimal case.
Scale, then inexactify, to avoid rounding error (though slow) */
return Scm_Inexact(scale_exact(fraction, minusp, exponent-fracdigs));
}

/* Get double approximaiton of fraction. If fraction >= 2^53 we'll
Expand Down
28 changes: 22 additions & 6 deletions test/number.scm
Original file line number Diff line number Diff line change
Expand Up @@ -409,17 +409,11 @@
(test* "padding" '(100.0 #t) (flonum-test "100.0#"))
(test* "padding" '(1.0 #t) (flonum-test "1.#"))

(test* "padding" (test-error) (flonum-test "1#1"))
(test* "padding" (test-error) (flonum-test "1##1"))
(test* "padding" (test-error) (flonum-test "1#.1"))
(test* "padding" (test-error) (flonum-test "1.#1"))

(test* "padding" (test-error) (flonum-test ".#"))
(test* "padding" '(0.0 #t) (flonum-test "0.#"))
(test* "padding" '(0.0 #t) (flonum-test ".0#"))
(test* "padding" '(0.0 #t) (flonum-test "0#"))
(test* "padding" '(0.0 #t) (flonum-test "0#.#"))
(test* "padding" (test-error) (flonum-test "0#.0"))

(test* "padding" '(1000.0 #t) (flonum-test "1#e2"))
(test* "padding" '(1000.0 #t) (flonum-test "1##e1"))
Expand Down Expand Up @@ -545,6 +539,28 @@
(test* "complex reader (padding)" '(0.0 1.2)
(decompose-complex (string->number "1.2##@.5###pi")))

;;------------------------------------------------------------------
(test-section "repeating decimals")

(define (test-repeating-real input expect)
(test* #"repeating decimal ~|input|" (inexact expect)
(string->number input))
(test* #"repeating decimal ~|input|" expect
(string->number (string-append "#e" input)))
)

(test-repeating-real "0.#1" 1/9)
(test-repeating-real "0.#12" 12/99)
(test-repeating-real "1.#142857" (+ 1 1/7))
(test-repeating-real "0.1#12" (+ 1/10 12/990))
(test-repeating-real "0#1" 10/9)
(test-repeating-real "12.3#456e-2" (/ (+ 123/10 456/9990) 100))
(test-repeating-real "12.3#456e3" (* (+ 123/10 456/9990) 1000))

(test* "bad repeating decimal 1" #f (string->number "0.##1"))
(test* "bad repeating decimal 2" #f (string->number "0.#1#"))
(test* "bad repeating decimal 3" #f (string->number "0#.#1"))

;;------------------------------------------------------------------
(test-section "integer writer syntax")

Expand Down

0 comments on commit 52e16dd

Please sign in to comment.