diff --git a/CHANGELOG.md b/CHANGELOG.md index c298411e432..5bf9a766b4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue where the file path is invisible in dark theme. [#7382](https://github.com/JabRef/jabref/issues/7382) - We fixed an issue where the secondary sorting is not working for some special fields. [#7015](https://github.com/JabRef/jabref/issues/7015) - We fixed an issue where changing the font size makes the font size field too small. [#7085](https://github.com/JabRef/jabref/issues/7085) +- We fixed an issue where the Unicode to Latex formatter produced wrong results for characters with a codepoint higher than Character.MAX_VALUE. [#7387](https://github.com/JabRef/jabref/issues/7387) ### Removed diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java index cc5fc9f350e..ee7a663da76 100644 --- a/src/main/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java @@ -38,7 +38,11 @@ public String format(String text) { Integer cpNext = result.codePointAt(i + 1); String code = HTMLUnicodeConversionMaps.ESCAPED_ACCENTS.get(cpNext); if (code == null) { - sb.append((char) cpCurrent); + // skip next index to avoid reading surrogate as a separate char + if (!Character.isBmpCodePoint(cpCurrent)) { + i++; + } + sb.appendCodePoint(cpCurrent); } else { sb.append("{\\").append(code).append('{').append((char) cpCurrent).append("}}"); consumed = true; diff --git a/src/main/java/org/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java b/src/main/java/org/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java index e1b0ec49016..f26952c9e0a 100644 --- a/src/main/java/org/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java +++ b/src/main/java/org/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java @@ -760,7 +760,8 @@ public class HTMLUnicodeConversionMaps { {"64259", "", "ffi"}, // ffi ligature (which LaTeX solves by itself) {"64260", "", "ffl"}, // ffl ligature (which LaTeX solves by itself) {"119978", "Oscr", "$\\mathcal{O}$"}, // script capital O -- possibly use \mathscr - {"119984", "Uscr", "$\\mathcal{U}$"} // script capital U -- possibly use \mathscr + {"119984", "Uscr", "$\\mathcal{U}$"}, // script capital U -- possibly use \mathscr + {"120598", "", "$\\epsilon$"}, // mathematical italic epsilon U+1D716 -- requires amsmath }; diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatterTest.java index 5b87c87661d..865c4a8da79 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatterTest.java @@ -24,6 +24,11 @@ void formatMultipleUnicodeCharacters() { assertEquals("{{\\aa}}{\\\"{a}}{\\\"{o}}", formatter.format("\u00E5\u00E4\u00F6")); } + @Test + void formatHighCodepointUnicodeCharacter() { + assertEquals("$\\epsilon$", formatter.format("\uD835\uDF16")); + } + @Test void formatExample() { assertEquals("M{\\\"{o}}nch", formatter.format(formatter.getExampleInput()));