diff --git a/src/mongo/MongoScrapbook.ts b/src/mongo/MongoScrapbook.ts index be056b78..1951d9aa 100644 --- a/src/mongo/MongoScrapbook.ts +++ b/src/mongo/MongoScrapbook.ts @@ -247,15 +247,7 @@ class FindMongoCommandsVisitor extends MongoVisitor { const argAsObject = this.contextToObject(ctx); const argText = EJSON.stringify(argAsObject); lastCommand.arguments.push(argText); - let removeDuplicatedBackslash = /\\{4}(?=[0-9bwds.*])/gi; - /* - We remove duplicate backslashes due the behavior of '\b' - \b in a regex denotes word boundary, while \b in a string denotes backspace. - $regex syntax uses a string. Strings require slashes to be escaped, while /regex/ does not. Eg. /abc+\b/ is equivalent to {$regex: "abc+\\b"}. - {$regex: "abc+\b"} with an unescaped slash gets parsed as {$regex: }. The user can only type '\\b' (which is encoded as '\\\\b'). - We need to convert this appropriately. Other special characters (\n, \t, \r) don't carry significance in regexes - we don't handle those - What the regex does: '\\{4}' looks for the escaped slash 4 times. Lookahead checks if the character being escaped has a special meaning. - */ - let escapeHandled = argText.replace(removeDuplicatedBackslash, `\\\\`); + let escapeHandled = this.deduplicateEscapesForRegex(argText); let ejsonParsed = {}; try { ejsonParsed = EJSON.parse(escapeHandled); @@ -404,7 +396,8 @@ class FindMongoCommandsVisitor extends MongoVisitor { // It is intended for the errors thrown here to be handled by the catch block. let tokenObject = new RegExp(pattern, flags); tokenObject = tokenObject; - return { $regex: pattern, $options: flags }; + // we are passing back a $regex annotation, hence we ensure parity wit the $regex syntax + return { $regex: this.regexToStringNotation(pattern), $options: flags }; } catch (error) { //User may not have finished typing let err: IParsedError = parseError(error); this.addErrorToCommand(err, ctx); @@ -419,4 +412,23 @@ class FindMongoCommandsVisitor extends MongoVisitor { command.errors.push(currentErrorDesc); } + private regexToStringNotation(pattern: string): string { + // The equivalence: + // /ker\b/ <=> $regex: "ker\\b", /ker\\b/ <=> "ker\\\\b" + return pattern.replace(/\\([0-9a-z.*])/i, '\\\\$1'); + } + + private deduplicateEscapesForRegex(argAsString: string) { + let removeDuplicatedBackslash = /\\{4}([0-9a-z.*])/gi; + /* + We remove duplicate backslashes due the behavior of '\b' - \b in a regex denotes word boundary, while \b in a string denotes backspace. + $regex syntax uses a string. Strings require slashes to be escaped, while /regex/ does not. Eg. /abc+\b/ is equivalent to {$regex: "abc+\\b"}. + {$regex: "abc+\b"} with an unescaped slash gets parsed as {$regex: }. The user can only type '\\b' (which is encoded as '\\\\b'). + We need to convert this appropriately. Other special characters (\n, \t, \r) don't carry significance in regexes - we don't handle those + What the regex does: '\\{4}' looks for the escaped slash 4 times. Lookahead checks if the character being escaped has a special meaning. + */ + let escapeHandled = argAsString.replace(removeDuplicatedBackslash, `\\\\$1`); + return escapeHandled; + } + } diff --git a/test/mongoGetCommand.test.ts b/test/mongoGetCommand.test.ts index 8d7f7605..cc0d2e1c 100644 --- a/test/mongoGetCommand.test.ts +++ b/test/mongoGetCommand.test.ts @@ -746,6 +746,38 @@ suite("scrapbook parsing Tests", () => { assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "^(hello?= world).*[^0-9]+|(world\\b\\*){0,2}$"]); }); + test("test regular expression parsing interoperability - word break", () => { + let text1 = `db.test1.beep.find({ sku: /ker\\b/g })`; // equivalent to user typing out /ker\b/ + let command1 = getCommandFromTextAtLocation(text1, new Position(0, 0)); + let generatedRegExp1 = (command1.argumentObjects[0]).sku; + let text2 = `db.test1.beep.find({ sku: {$regex: "ker\\\\b", $options: "g"} })`; + let command2 = getCommandFromTextAtLocation(text2, new Position(0, 0)); + let generatedRegExp2 = (command2.argumentObjects[0]).sku; + assert.deepEqual([generatedRegExp1.flags, generatedRegExp1.source], ["g", "ker\\b"]); + assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "ker\\b"]); + }); + + test("test regular expression parsing interoperability - newline", () => { + let text1 = `db.test1.beep.find({ sku: /ker\\n/g })`; // equivalent to user typing out /ker\n/ + let command1 = getCommandFromTextAtLocation(text1, new Position(0, 0)); + let generatedRegExp1 = (command1.argumentObjects[0]).sku; + let text2 = `db.test1.beep.find({ sku: {$regex: "ker\\\\n", $options: "g"} })`; + let command2 = getCommandFromTextAtLocation(text2, new Position(0, 0)); + let generatedRegExp2 = (command2.argumentObjects[0]).sku; + assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "ker\\n"]); + assert.deepEqual([generatedRegExp1.flags, generatedRegExp1.source], ["g", "ker\\n"]); + }); + test("test regular expression parsing interoperability - carriage return", () => { + let text1 = `db.test1.beep.find({ sku: /ker\\r/g })`; // equivalent to user typing out /ker\r/ + let command1 = getCommandFromTextAtLocation(text1, new Position(0, 0)); + let generatedRegExp1 = (command1.argumentObjects[0]).sku; + let text2 = `db.test1.beep.find({ sku: {$regex: "ker\\\\r", $options: "g"} })`; + let command2 = getCommandFromTextAtLocation(text2, new Position(0, 0)); + let generatedRegExp2 = (command2.argumentObjects[0]).sku; + assert.deepEqual([generatedRegExp1.flags, generatedRegExp1.source], ["g", "ker\\r"]); + assert.deepEqual([generatedRegExp2.flags, generatedRegExp2.source], ["g", "ker\\r"]); + }); + test("test regular expressions - only pattern, no flags", () => { let text = `db.test1.beep.find({ sku: { $regex: "789$" } })`; let command = getCommandFromTextAtLocation(text, new Position(0, 0));