Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

review: feat: java lexer for better position detection #5753

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 18 additions & 34 deletions src/main/java/spoon/support/compiler/jdt/PositionBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
*/
package spoon.support.compiler.jdt;

import org.apache.commons.lang3.ArrayUtils;
import org.eclipse.jdt.internal.compiler.ast.ASTNode;
import org.eclipse.jdt.internal.compiler.ast.AbstractMethodDeclaration;
import org.eclipse.jdt.internal.compiler.ast.AbstractVariableDeclaration;
Expand Down Expand Up @@ -43,12 +42,13 @@
import spoon.reflect.declaration.CtElement;
import spoon.reflect.declaration.CtModifiable;
import spoon.reflect.declaration.CtPackage;
import spoon.reflect.declaration.ModifierKind;
import spoon.reflect.factory.CoreFactory;
import spoon.reflect.reference.CtTypeReference;
import spoon.support.compiler.jdt.ContextBuilder.CastInfo;
import spoon.support.reflect.CtExtendedModifier;
import spoon.support.util.internal.lexer.ModifierExtractor;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
Expand All @@ -62,10 +62,12 @@
*/
public class PositionBuilder {

private final ModifierExtractor extractor;
private final JDTTreeBuilder jdtTreeBuilder;

public PositionBuilder(JDTTreeBuilder jdtTreeBuilder) {
this.jdtTreeBuilder = jdtTreeBuilder;
this.extractor = new ModifierExtractor();
}

SourcePosition buildPosition(int sourceStart, int sourceEnd) {
Expand Down Expand Up @@ -320,8 +322,8 @@ SourcePosition buildPositionCtElement(CtElement e, ASTNode node) {
modifiersSourceEnd = findPrevNonWhitespace(contents, modifiersSourceStart - 1,
findPrevWhitespace(contents, modifiersSourceStart - 1,
findPrevNonWhitespace(contents, modifiersSourceStart - 1, sourceStart - 1)));
if (e instanceof CtModifiable) {
setModifiersPosition((CtModifiable) e, modifiersSourceStart, modifiersSourceEnd);
if (e instanceof CtModifiable modifiable) {
setModifiersPosition(modifiable, modifiersSourceStart, modifiersSourceEnd);
}
if (modifiersSourceEnd < modifiersSourceStart) {
//there is no modifier
Expand Down Expand Up @@ -570,44 +572,26 @@ private void setModifiersPosition(CtModifiable e, int start, int end) {
char[] contents = jdtTreeBuilder.getContextBuilder().getCompilationUnitContents();

Set<CtExtendedModifier> modifiers = e.getExtendedModifiers();
Map<String, CtExtendedModifier> explicitModifiersByName = new HashMap<>();
Map<ModifierKind, CtExtendedModifier> explicitModifiersByKind = new HashMap<>();
for (CtExtendedModifier modifier: modifiers) {
if (modifier.isImplicit()) {
modifier.setPosition(cf.createPartialSourcePosition(cu));
continue;
}
if (explicitModifiersByName.put(modifier.getKind().toString(), modifier) != null) {
throw new SpoonException("The modifier " + modifier.getKind().toString() + " found twice");
if (explicitModifiersByKind.put(modifier.getKind(), modifier) != null) {
throw new SpoonException("The modifier " + modifier.getKind().toString() + " was found twice");
}
}

//move end after the last char
end++;
while (start < end && explicitModifiersByName.size() > 0) {
int o1 = findNextNonWhitespace(contents, end - 1, start);
if (o1 == -1) {
break;
}
int o2 = findNextWhitespace(contents, end - 1, o1);
if (o2 == -1) {
o2 = end;
}

// this is the index into the modifier char array snippet, so must be +o1 if >-1
int chevronIndex = ArrayUtils.indexOf(Arrays.copyOfRange(contents, o1, o2), '<');
if (chevronIndex > 0) {
o2 = o1 + chevronIndex;
}

String modifierName = String.valueOf(contents, o1, o2 - o1);
CtExtendedModifier modifier = explicitModifiersByName.remove(modifierName);
if (modifier != null) {
modifier.setPosition(cf.createSourcePosition(cu, o1, o2 - 1, jdtTreeBuilder.getContextBuilder().getCompilationUnitLineSeparatorPositions()));
}
start = o2;
}
if (explicitModifiersByName.size() > 0) {
throw new SpoonException("Position of CtExtendedModifiers: [" + String.join(", ", explicitModifiersByName.keySet()) + "] not found in " + String.valueOf(contents, start, end - start));
extractor.collectModifiers(
contents,
start,
Math.max(start, end) + 1, // move end after the last char, fixup weird end positions
explicitModifiersByKind,
(modStart, modEnd) -> cf.createSourcePosition(cu, modStart, modEnd, cu.getLineSeparatorPositions())
);
if (!explicitModifiersByKind.isEmpty()) {
throw new SpoonException("Position of CtExtendedModifiers: " + explicitModifiersByKind.keySet() + " not found in " + String.valueOf(contents, start, end - start));
}
}

Expand Down
100 changes: 100 additions & 0 deletions src/main/java/spoon/support/util/internal/lexer/CharRemapper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* SPDX-License-Identifier: (MIT OR CECILL-C)
*
* Copyright (C) 2006-2023 INRIA and contributors
*
* Spoon is available either under the terms of the MIT License (see LICENSE-MIT.txt) or the Cecill-C License (see LICENSE-CECILL-C.txt). You as the user are entitled to choose the terms under which to adopt Spoon.
*/
package spoon.support.util.internal.lexer;

import java.util.Arrays;

/**
* A helper class to deal with unicode escapes.
*/
class CharRemapper {
private final char[] content;
private final int start;
private final int end;
private int[] positionRemap;

CharRemapper(char[] content, int start, int end) {
this.content = content;
this.start = start;
this.end = end;
}

/**
* {@return the sub-array from start to end of the original char array with unicode escapes replaced}
*/
char[] remapContent() {
char[] chars = new char[this.end - this.start]; // approximate
int t = 0;
boolean escape = false;
for (int i = this.start; i < this.end; i++, t++) {
if (!escape && this.content[i] == '\\' && this.end > i + 5 && this.content[i + 1] == 'u') {
int utf16 = parseHex(i + 2);
if (utf16 >= 0) {
chars[t] = (char) utf16;
i += 5;
if (this.positionRemap == null) {
this.positionRemap = createPositionRemap(chars);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a comment stating why this is 6? And maybe a short comment at the top saying that you are first building a map from index -> skip value to next char and then accumulate it at the bottom or something in that spirit?

this.positionRemap[t] = 6;
continue;
}
}
if (this.content[i] == '\\') {
if (escape) {
escape = false;
} else if (this.end > i + 1 && this.content[i + 1] == '\\') {
escape = true;
}
}
Comment on lines +47 to +53
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not quite sure what this is doing. Maybe rename escape to escapeBackslash or just handle it inline and increment i by one in that branch and skip the loop iteration? At present I am not exactly sure what this is doing.

chars[t] = this.content[i];
}
if (t == chars.length) {
return chars;
}
// otherwise, we encountered a unicode sequence
this.positionRemap[0] += this.start;
Arrays.parallelPrefix(this.positionRemap, Integer::sum);
return Arrays.copyOf(chars, t);
}

int remapPosition(int index) {
if (this.positionRemap == null) {
return index + this.start;
}
if (index == 0) {
return this.start;
}
return this.positionRemap[index - 1];
}

private int[] createPositionRemap(char[] chars) {
int[] remap = new int[chars.length];
Arrays.fill(remap, 1);
return remap;
}

private int parseHex(int start) {
int result = 0;
for (int i = start; i < start + 4; i++) {
result *= 16;
char c = this.content[i];
if ('0' <= c && '9' >= c) {
result += c - '0';
} else {
c |= ' '; // lowercase potential letter
if ('a' <= c && 'f' >= c) {
result += (c - 'a') + 10;
continue;
}
// not a valid symbol, mark result
result = Integer.MIN_VALUE;
SirYwell marked this conversation as resolved.
Show resolved Hide resolved
}
}
return result;
}
}
79 changes: 79 additions & 0 deletions src/main/java/spoon/support/util/internal/lexer/JavaKeyword.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* SPDX-License-Identifier: (MIT OR CECILL-C)
*
* Copyright (C) 2006-2023 INRIA and contributors
*
* Spoon is available either under the terms of the MIT License (see LICENSE-MIT.txt) or the Cecill-C License (see LICENSE-CECILL-C.txt). You as the user are entitled to choose the terms under which to adopt Spoon.
*/
package spoon.support.util.internal.lexer;

/**
* Valid Java (contextual) keywords
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But VAR for example is missing, so not all contextual?

*/
enum JavaKeyword {
ABSTRACT,
ASSERT,
BOOLEAN,
BREAK,
BYTE,
CASE,
CATCH,
CHAR,
CLASS,
CONTINUE,
DEFAULT,
DO,
DOUBLE,
ELSE,
EXTENDS,
FALSE,
FINAL,
FINALLY,
FLOAT,
FOR,
IF,
IMPLEMENTS,
IMPORT,
INSTANCEOF,
INT,
INTERFACE,
LONG,
NATIVE,
NEW,
NON_SEALED {
@Override
public String toString() {
return "non-sealed";
}
},
NULL,
PACKAGE,
PERMITS,
PRIVATE,
PROTECTED,
PUBLIC,
RECORD,
RETURN,
SEALED,
SHORT,
STATIC,
STRICTFP,
SUPER,
SWITCH,
SYNCHRONIZED,
THIS,
THROW,
THROWS,
TRANSIENT,
TRUE,
TRY,
VOID,
VOLATILE,
WHILE,
YIELD;

@Override
public String toString() {
return name().toLowerCase();
}
}
Loading