Skip to content

Commit

Permalink
Fix wrong conversion of unicode chars (#7419)
Browse files Browse the repository at this point in the history
* fix wrong character conversion in unicode to latex formatter

* fix checkstyle issue

* fix typo, add test for unicode to latex formatter

* add issue link to changelog
  • Loading branch information
tmrd993 authored Feb 3, 2021
1 parent bdfd8c8 commit 242a494
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where the file path is invisible in dark theme. [#7382](https://github.com/JabRef/jabref/issues/7382)
- We fixed an issue where the secondary sorting is not working for some special fields. [#7015](https://github.com/JabRef/jabref/issues/7015)
- We fixed an issue where changing the font size makes the font size field too small. [#7085](https://github.com/JabRef/jabref/issues/7085)
- We fixed an issue where the Unicode to Latex formatter produced wrong results for characters with a codepoint higher than Character.MAX_VALUE. [#7387](https://github.com/JabRef/jabref/issues/7387)

### Removed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ public String format(String text) {
Integer cpNext = result.codePointAt(i + 1);
String code = HTMLUnicodeConversionMaps.ESCAPED_ACCENTS.get(cpNext);
if (code == null) {
sb.append((char) cpCurrent);
// skip next index to avoid reading surrogate as a separate char
if (!Character.isBmpCodePoint(cpCurrent)) {
i++;
}
sb.appendCodePoint(cpCurrent);
} else {
sb.append("{\\").append(code).append('{').append((char) cpCurrent).append("}}");
consumed = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,8 @@ public class HTMLUnicodeConversionMaps {
{"64259", "", "ffi"}, // ffi ligature (which LaTeX solves by itself)
{"64260", "", "ffl"}, // ffl ligature (which LaTeX solves by itself)
{"119978", "Oscr", "$\\mathcal{O}$"}, // script capital O -- possibly use \mathscr
{"119984", "Uscr", "$\\mathcal{U}$"} // script capital U -- possibly use \mathscr
{"119984", "Uscr", "$\\mathcal{U}$"}, // script capital U -- possibly use \mathscr
{"120598", "", "$\\epsilon$"}, // mathematical italic epsilon U+1D716 -- requires amsmath

};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ void formatMultipleUnicodeCharacters() {
assertEquals("{{\\aa}}{\\\"{a}}{\\\"{o}}", formatter.format("\u00E5\u00E4\u00F6"));
}

@Test
void formatHighCodepointUnicodeCharacter() {
assertEquals("$\\epsilon$", formatter.format("\uD835\uDF16"));
}

@Test
void formatExample() {
assertEquals("M{\\\"{o}}nch", formatter.format(formatter.getExampleInput()));
Expand Down

0 comments on commit 242a494

Please sign in to comment.