-
-
Notifications
You must be signed in to change notification settings - Fork 353
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
review: feat: java lexer for better position detection #5753
base: master
Are you sure you want to change the base?
Changes from all commits
0d1d4b2
477c754
08c995d
a788646
92d2bcd
6961a97
e40f495
12fc594
e878ee4
c43e09b
df4db75
cd2fbff
b3c879f
e793462
3146df6
338a46f
854542e
6a992e3
27ecdba
ec7b663
a3a5714
ebcf55c
bce0e21
7b75693
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/* | ||
* SPDX-License-Identifier: (MIT OR CECILL-C) | ||
* | ||
* Copyright (C) 2006-2023 INRIA and contributors | ||
* | ||
* Spoon is available either under the terms of the MIT License (see LICENSE-MIT.txt) or the Cecill-C License (see LICENSE-CECILL-C.txt). You as the user are entitled to choose the terms under which to adopt Spoon. | ||
*/ | ||
package spoon.support.util.internal.lexer; | ||
|
||
import java.util.Arrays; | ||
|
||
/** | ||
* A helper class to deal with unicode escapes. | ||
*/ | ||
class CharRemapper { | ||
private final char[] content; | ||
private final int start; | ||
private final int end; | ||
private int[] positionRemap; | ||
|
||
CharRemapper(char[] content, int start, int end) { | ||
this.content = content; | ||
this.start = start; | ||
this.end = end; | ||
} | ||
|
||
/** | ||
* {@return the sub-array from start to end of the original char array with unicode escapes replaced} | ||
*/ | ||
char[] remapContent() { | ||
char[] chars = new char[this.end - this.start]; // approximate | ||
int t = 0; | ||
boolean escape = false; | ||
for (int i = this.start; i < this.end; i++, t++) { | ||
if (!escape && this.content[i] == '\\' && this.end > i + 5 && this.content[i + 1] == 'u') { | ||
int utf16 = parseHex(i + 2); | ||
if (utf16 >= 0) { | ||
chars[t] = (char) utf16; | ||
i += 5; | ||
if (this.positionRemap == null) { | ||
this.positionRemap = createPositionRemap(chars); | ||
} | ||
this.positionRemap[t] = 6; | ||
continue; | ||
} | ||
} | ||
if (this.content[i] == '\\') { | ||
if (escape) { | ||
escape = false; | ||
} else if (this.end > i + 1 && this.content[i + 1] == '\\') { | ||
escape = true; | ||
} | ||
} | ||
Comment on lines
+47
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not quite sure what this is doing. Maybe rename |
||
chars[t] = this.content[i]; | ||
} | ||
if (t == chars.length) { | ||
return chars; | ||
} | ||
// otherwise, we encountered a unicode sequence | ||
this.positionRemap[0] += this.start; | ||
Arrays.parallelPrefix(this.positionRemap, Integer::sum); | ||
return Arrays.copyOf(chars, t); | ||
} | ||
|
||
int remapPosition(int index) { | ||
if (this.positionRemap == null) { | ||
return index + this.start; | ||
} | ||
if (index == 0) { | ||
return this.start; | ||
} | ||
return this.positionRemap[index - 1]; | ||
} | ||
|
||
private int[] createPositionRemap(char[] chars) { | ||
int[] remap = new int[chars.length]; | ||
Arrays.fill(remap, 1); | ||
return remap; | ||
} | ||
|
||
private int parseHex(int start) { | ||
int result = 0; | ||
for (int i = start; i < start + 4; i++) { | ||
result *= 16; | ||
char c = this.content[i]; | ||
if ('0' <= c && '9' >= c) { | ||
result += c - '0'; | ||
} else { | ||
c |= ' '; // lowercase potential letter | ||
if ('a' <= c && 'f' >= c) { | ||
result += (c - 'a') + 10; | ||
continue; | ||
} | ||
// not a valid symbol, mark result | ||
result = Integer.MIN_VALUE; | ||
SirYwell marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
return result; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* SPDX-License-Identifier: (MIT OR CECILL-C) | ||
* | ||
* Copyright (C) 2006-2023 INRIA and contributors | ||
* | ||
* Spoon is available either under the terms of the MIT License (see LICENSE-MIT.txt) or the Cecill-C License (see LICENSE-CECILL-C.txt). You as the user are entitled to choose the terms under which to adopt Spoon. | ||
*/ | ||
package spoon.support.util.internal.lexer; | ||
|
||
/** | ||
* Valid Java (contextual) keywords | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But |
||
*/ | ||
enum JavaKeyword { | ||
ABSTRACT, | ||
ASSERT, | ||
BOOLEAN, | ||
BREAK, | ||
BYTE, | ||
CASE, | ||
CATCH, | ||
CHAR, | ||
CLASS, | ||
CONTINUE, | ||
DEFAULT, | ||
DO, | ||
DOUBLE, | ||
ELSE, | ||
EXTENDS, | ||
FALSE, | ||
FINAL, | ||
FINALLY, | ||
FLOAT, | ||
FOR, | ||
IF, | ||
IMPLEMENTS, | ||
IMPORT, | ||
INSTANCEOF, | ||
INT, | ||
INTERFACE, | ||
LONG, | ||
NATIVE, | ||
NEW, | ||
NON_SEALED { | ||
@Override | ||
public String toString() { | ||
return "non-sealed"; | ||
} | ||
}, | ||
NULL, | ||
PACKAGE, | ||
PERMITS, | ||
PRIVATE, | ||
PROTECTED, | ||
PUBLIC, | ||
RECORD, | ||
RETURN, | ||
SEALED, | ||
SHORT, | ||
STATIC, | ||
STRICTFP, | ||
SUPER, | ||
SWITCH, | ||
SYNCHRONIZED, | ||
THIS, | ||
THROW, | ||
THROWS, | ||
TRANSIENT, | ||
TRUE, | ||
TRY, | ||
VOID, | ||
VOLATILE, | ||
WHILE, | ||
YIELD; | ||
|
||
@Override | ||
public String toString() { | ||
return name().toLowerCase(); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a comment stating why this is
6
? And maybe a short comment at the top saying that you are first building a map from index -> skip value to next char and then accumulate it at the bottom or something in that spirit?