-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathre-low.scm
123 lines (106 loc) · 4.35 KB
/
re-low.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
;;; Regular expression matching for scsh
;;; Copyright (c) 1994 by Olin Shivers.
;;; Match data for regexp matches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record-type :regexp-match
(make-regexp-match string submatches)
regexp-match?
(string regexp-match:string)
(submatches regexp-match:submatches))
(define (match:start match . maybe-index)
(let ((index (:optional maybe-index 0))
(submatches (regexp-match:submatches match)))
(and (<= 0 index (- (vector-length submatches) 1))
(match-start (vector-ref submatches index)))))
(define (match:end match . maybe-index)
(let ((index (:optional maybe-index 0))
(submatches (regexp-match:submatches match)))
(and (<= 0 index (- (vector-length submatches) 1))
(match-end (vector-ref submatches index)))))
(define (match:substring match . maybe-index)
(let ((index (:optional maybe-index 0))
(submatches (regexp-match:submatches match)))
(and (<= 0 index (- (vector-length submatches) 1))
(let ((submatch (vector-ref submatches index)))
(substring (regexp-match:string match)
(match-start submatch)
(match-end submatch))))))
;;; Compiling regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; There's no legal Posix string expressing the empty match (e.g., (|))
;;; that will never match anything. So when we have one of these, we set
;;; the STRING field to #f. The matchers will spot this case and handle it
;;; specially.
;;; We compile the string two ways, on demand -- one for cre-search, and
;;; one for cre-search?.
;(define-record cre ; A compiled regular expression
; string ; The Posix string form of the regexp or #F.
; max-paren ; Max paren in STRING needed for submatches.
; (regexp #f) ; Compiled form or #F.
; (regexp/nm #f) ; Same as REGEXP, but compiled with no-submatch.
; tvec ; Translation vector for the submatches
; ((disclose self) (list "cre" (cre:string self))))
(define-record-type :cre
(really-make-cre string max-paren regexp regexp/nm tvec debug)
cre?
(string cre:string set-cre:string)
(max-paren cre:max-paren set-cre:max-paren)
(regexp cre:regexp set-cre:regexp)
(regexp/nm cre:regexp/nm set-cre:regexp/nm)
(tvec cre:tvec set-cre:tvec)
(debug cre:debug set-cre:debug))
(define-record-discloser :cre
(lambda (self) (list "cre" (cre:string self))))
(define (make-cre str max-paren tvec)
(really-make-cre str max-paren #f #f tvec #f))
(define (new-cre str tvec)
(make-cre str (max-live-posix-submatch tvec) tvec))
(define (max-live-posix-submatch tvec)
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
;;; Searching with compiled regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; cre-search returns match info; cre-search? is just a predicate.
(define (cre-search cre match-vec str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
(if (not re-str)
#f
(begin
(if (not (cre:regexp cre))
(set-cre:regexp cre (make-regexp re-str
(regexp-option extended)
(regexp-option submatches))))
(let ((ret (regexp-match (cre:regexp cre) str start #t #t #t)))
(if (not ret)
#f
(make-regexp-match str
(translate-submatches ret
(cre:tvec cre)
match-vec))))))))
(define (translate-submatches matches trans-vec match-vec)
(let ((n-virtual-submatches (vector-length trans-vec)))
(let loop ((virtual-index 0)
(match-index 0)
(matches matches))
(cond
((> virtual-index n-virtual-submatches)
match-vec)
((if (zero? virtual-index)
0
(vector-ref trans-vec (- virtual-index 1)))
=> (lambda (actual-index)
(if (= match-index actual-index)
(begin
(vector-set! match-vec virtual-index (car matches))
(loop (+ 1 virtual-index) (+ 1 match-index) (cdr matches)))
(loop virtual-index (+ 1 match-index) (cdr matches)))))
(else
(loop (+ 1 virtual-index) match-index matches))))))
(define (cre-search? cre str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
(if (not re-str)
#f
(begin
(if (not (cre:regexp/nm cre))
(set-cre:regexp/nm cre (make-regexp re-str
(regexp-option extended))))
(regexp-match (cre:regexp/nm cre) str start #f #t #t)))))