Skip to content

Commit

Permalink
Pad slashes in /regex/ syntax to provide parity with $regex syntax (#933
Browse files Browse the repository at this point in the history
)
  • Loading branch information
PrashanthCorp authored Nov 13, 2018
1 parent 14a59f0 commit c7ef1f1
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 10 deletions.
32 changes: 22 additions & 10 deletions src/mongo/MongoScrapbook.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,7 @@ class FindMongoCommandsVisitor extends MongoVisitor<MongoCommand[]> {
const argAsObject = this.contextToObject(ctx);
const argText = EJSON.stringify(argAsObject);
lastCommand.arguments.push(argText);
let removeDuplicatedBackslash = /\\{4}(?=[0-9bwds.*])/gi;
/*
We remove duplicate backslashes due the behavior of '\b' - \b in a regex denotes word boundary, while \b in a string denotes backspace.
$regex syntax uses a string. Strings require slashes to be escaped, while /regex/ does not. Eg. /abc+\b/ is equivalent to {$regex: "abc+\\b"}.
{$regex: "abc+\b"} with an unescaped slash gets parsed as {$regex: <EOF>}. The user can only type '\\b' (which is encoded as '\\\\b').
We need to convert this appropriately. Other special characters (\n, \t, \r) don't carry significance in regexes - we don't handle those
What the regex does: '\\{4}' looks for the escaped slash 4 times. Lookahead checks if the character being escaped has a special meaning.
*/
let escapeHandled = argText.replace(removeDuplicatedBackslash, `\\\\`);
let escapeHandled = this.deduplicateEscapesForRegex(argText);
let ejsonParsed = {};
try {
ejsonParsed = EJSON.parse(escapeHandled);
Expand Down Expand Up @@ -404,7 +396,8 @@ class FindMongoCommandsVisitor extends MongoVisitor<MongoCommand[]> {
// It is intended for the errors thrown here to be handled by the catch block.
let tokenObject = new RegExp(pattern, flags);
tokenObject = tokenObject;
return { $regex: pattern, $options: flags };
// we are passing back a $regex annotation, hence we ensure parity wit the $regex syntax
return { $regex: this.regexToStringNotation(pattern), $options: flags };
} catch (error) { //User may not have finished typing
let err: IParsedError = parseError(error);
this.addErrorToCommand(err, ctx);
Expand All @@ -419,4 +412,23 @@ class FindMongoCommandsVisitor extends MongoVisitor<MongoCommand[]> {
command.errors.push(currentErrorDesc);
}

private regexToStringNotation(pattern: string): string {
// The equivalence:
// /ker\b/ <=> $regex: "ker\\b", /ker\\b/ <=> "ker\\\\b"
return pattern.replace(/\\([0-9a-z.*])/i, '\\\\$1');
}

private deduplicateEscapesForRegex(argAsString: string) {
let removeDuplicatedBackslash = /\\{4}([0-9a-z.*])/gi;
/*
We remove duplicate backslashes due the behavior of '\b' - \b in a regex denotes word boundary, while \b in a string denotes backspace.
$regex syntax uses a string. Strings require slashes to be escaped, while /regex/ does not. Eg. /abc+\b/ is equivalent to {$regex: "abc+\\b"}.
{$regex: "abc+\b"} with an unescaped slash gets parsed as {$regex: <EOF>}. The user can only type '\\b' (which is encoded as '\\\\b').
We need to convert this appropriately. Other special characters (\n, \t, \r) don't carry significance in regexes - we don't handle those
What the regex does: '\\{4}' looks for the escaped slash 4 times. Lookahead checks if the character being escaped has a special meaning.
*/
let escapeHandled = argAsString.replace(removeDuplicatedBackslash, `\\\\$1`);
return escapeHandled;
}

}
32 changes: 32 additions & 0 deletions test/mongoGetCommand.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,38 @@ suite("scrapbook parsing Tests", () => {
assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "^(hello?= world).*[^0-9]+|(world\\b\\*){0,2}$"]);
});

test("test regular expression parsing interoperability - word break", () => {
let text1 = `db.test1.beep.find({ sku: /ker\\b/g })`; // equivalent to user typing out /ker\b/
let command1 = getCommandFromTextAtLocation(text1, new Position(0, 0));
let generatedRegExp1 = (<any>command1.argumentObjects[0]).sku;
let text2 = `db.test1.beep.find({ sku: {$regex: "ker\\\\b", $options: "g"} })`;
let command2 = getCommandFromTextAtLocation(text2, new Position(0, 0));
let generatedRegExp2 = (<any>command2.argumentObjects[0]).sku;
assert.deepEqual([generatedRegExp1.flags, generatedRegExp1.source], ["g", "ker\\b"]);
assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "ker\\b"]);
});

test("test regular expression parsing interoperability - newline", () => {
let text1 = `db.test1.beep.find({ sku: /ker\\n/g })`; // equivalent to user typing out /ker\n/
let command1 = getCommandFromTextAtLocation(text1, new Position(0, 0));
let generatedRegExp1 = (<any>command1.argumentObjects[0]).sku;
let text2 = `db.test1.beep.find({ sku: {$regex: "ker\\\\n", $options: "g"} })`;
let command2 = getCommandFromTextAtLocation(text2, new Position(0, 0));
let generatedRegExp2 = (<any>command2.argumentObjects[0]).sku;
assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "ker\\n"]);
assert.deepEqual([generatedRegExp1.flags, generatedRegExp1.source], ["g", "ker\\n"]);
});
test("test regular expression parsing interoperability - carriage return", () => {
let text1 = `db.test1.beep.find({ sku: /ker\\r/g })`; // equivalent to user typing out /ker\r/
let command1 = getCommandFromTextAtLocation(text1, new Position(0, 0));
let generatedRegExp1 = (<any>command1.argumentObjects[0]).sku;
let text2 = `db.test1.beep.find({ sku: {$regex: "ker\\\\r", $options: "g"} })`;
let command2 = getCommandFromTextAtLocation(text2, new Position(0, 0));
let generatedRegExp2 = (<any>command2.argumentObjects[0]).sku;
assert.deepEqual([generatedRegExp1.flags, generatedRegExp1.source], ["g", "ker\\r"]);
assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "ker\\r"]);
});

test("test regular expressions - only pattern, no flags", () => {
let text = `db.test1.beep.find({ sku: { $regex: "789$" } })`;
let command = getCommandFromTextAtLocation(text, new Position(0, 0));
Expand Down

0 comments on commit c7ef1f1

Please sign in to comment.