From b97d1f9e1786c40e9801aad0a1937edb2200b626 Mon Sep 17 00:00:00 2001 From: Mercury13 Date: Sun, 23 Apr 2023 14:27:25 +0300 Subject: [PATCH] #172 finally decapped --- AutoBuilder/data.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/AutoBuilder/data.cpp b/AutoBuilder/data.cpp index 188f7b000..b0162b500 100644 --- a/AutoBuilder/data.cpp +++ b/AutoBuilder/data.cpp @@ -993,6 +993,7 @@ const std::unordered_map dictionary { enum class Exf { CPONLY = 1, ///< Works if codepoint is present (Sun=star, sun=weather) MIXCASE = 2, ///< Mixed case: for lowercase letter convert to small + LEAVE_BY_CONDITION = 4, ///< Leave as is special condition (see charsConditionalLeave) }; struct Exception { @@ -1287,7 +1288,6 @@ const std::unordered_map exceptions{ // Yiii EX("Yi syllable iteration mark") - /// @todo [textbase] A9BC is “ĕ” // Misc letters EX2("A", Exf::MIXCASE) EX2("B", Exf::MIXCASE) @@ -1298,7 +1298,7 @@ const std::unordered_map exceptions{ EX2("C", Exf::MIXCASE) EX("Ca") EX("Cha") - /// @todo [textbase] “Chi”, “Pi” in Kana upper, in Newa lower + EX2("Chi", Exf::LEAVE_BY_CONDITION) EX("Chha") EX("soft Da") EX("soft Dda") @@ -1309,6 +1309,7 @@ const std::unordered_map exceptions{ EX("Dda") EX("Ddha") EX("Dha") + { "ĕ", Exception{ "Ĕ", {} } }, // A9BC EX("Ei") EX2("F", Exf::MIXCASE) EX("Fu") @@ -1362,8 +1363,7 @@ const std::unordered_map exceptions{ EX2("M", Exf::MIXCASE) EX("Ma") EX("subjoined Ma") - /// @todo [textbase] in Indics “Maa” is upper, in Taml supp lower - EX("Maa") + EX2("Maa", Exf::LEAVE_BY_CONDITION) EX("logosyllabic Muwa") EX2("N", Exf::MIXCASE) EX("hard Na") @@ -1375,6 +1375,7 @@ const std::unordered_map exceptions{ EX("Nna") EX2("P", Exf::MIXCASE) EX("Pha") + EX2("Pi", Exf::LEAVE_BY_CONDITION) EX2("Q", Exf::MIXCASE) EX("Qa") EX2("R", Exf::MIXCASE) @@ -1948,6 +1949,14 @@ const std::unordered_set charsEgyptianHatch { }; +/// One method of homonym disambig: these chars are left as-are +/// while the rest are decapped by dictionary under Exf::LEAVE_BY_CONDITION +const std::unordered_set charsConditionalLeave { + 0x11451, // Newa digit One = chi + 0x11454, // Newa digit Four = pi + 0x11fc8, // Tamil fraction One twentieth = maa +}; + /// @todo [langs] Stopped at Canadian syllabics const std::set langNames { "also Cornish", // корнский (Великобритания) @@ -2348,6 +2357,10 @@ std::string decapitalize( if (itEx->second.flags.have(Exf::MIXCASE) && flags.have(Dcfg::LOCASE)) { return str::toLower(itEx->second.r); } + if (itEx->second.flags.have(Exf::LEAVE_BY_CONDITION) + && charsConditionalLeave.contains(cp)) { + return std::string{x}; + } return std::string(itEx->second.r); } }