forked from osm-fr/osmose-backend
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathName_UpperCase.py
104 lines (91 loc) · 4.87 KB
/
Name_UpperCase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#-*- coding: utf-8 -*-
###########################################################################
## ##
## Copyrights Frédéric Rodrigo 2016 ##
## ##
## This program is free software: you can redistribute it and/or modify ##
## it under the terms of the GNU General Public License as published by ##
## the Free Software Foundation, either version 3 of the License, or ##
## (at your option) any later version. ##
## ##
## This program is distributed in the hope that it will be useful, ##
## but WITHOUT ANY WARRANTY; without even the implied warranty of ##
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ##
## GNU General Public License for more details. ##
## ##
## You should have received a copy of the GNU General Public License ##
## along with this program. If not, see <http://www.gnu.org/licenses/>. ##
## ##
###########################################################################
from modules.OsmoseTranslation import T_
from plugins.Plugin import Plugin
import regex as re
from plugins.modules.name_suggestion_index import whitelist_from_nsi
# Whitelist of allowed capitals by country code
UpperCase_WhiteList = {
"FR": ["CNFPT", "COSEC", "EHPAD", "MEDEF", "URSSAF"],
}
class Name_UpperCase(Plugin):
not_for = ["CU", "JP"]
def init(self, logger):
Plugin.init(self, logger)
self.errors[803] = self.def_class(item = 5010, level = 1, tags = ['name', 'fix:chair'],
title = T_('Name with uppercase'),
detail = T_(
'''This feature is tagged with a name which contains a fully uppercase word (or words).
This is not expected for the majority of named features.'''),
trap = T_(
'''While uncommon, it is possible for a name to have uppercase words.
This is particularly the case for corporate/branded locations as well as acronyms.''')
)
self.UpperTitleCase = re.compile(r".*[\p{Lu}\p{Lt}]{5,}")
self.RomanNumber = re.compile(r".*[IVXCDLM]{5,}")
if "country" in self.father.config.options:
country = self.father.config.options.get("country")[:2]
self.whitelist = set(UpperCase_WhiteList.get(country, []))
nsi_whitelist = set(filter(lambda name: self.UpperTitleCase.match(name) and not self.RomanNumber.match(name),
whitelist_from_nsi(country.lower())))
self.whitelist.update(nsi_whitelist)
else:
self.whitelist = set()
def node(self, data, tags):
err = []
if u"name" in tags:
# first check if the name *might* match
if self.UpperTitleCase.match(tags[u"name"]) and not self.RomanNumber.match(tags[u"name"]):
if not self.whitelist or not any(map(lambda whitelist: whitelist in tags["name"], self.whitelist)):
err.append({"class": 803, "text": T_("Concerns tag: `{0}`", '='.join(['name', tags['name']])) })
else:
# Check if we match the whitelist and if so re-try
name = " ".join([i for i in tags["name"].split() if not i in " ".join(self.whitelist).split()])
if self.UpperTitleCase.match(name) and not self.RomanNumber.match(name):
err.append({"class": 803, "text": T_("Concerns tag: `{0}`", '='.join(['name', tags['name']])) })
return err
def way(self, data, tags, nds):
return self.node(data, tags)
###########################################################################
from plugins.Plugin import TestPluginCommon
class Test(TestPluginCommon):
def test(self):
a = Name_UpperCase(None)
class _config:
options = {"country": "FR"}
class father:
config = _config()
a.father = father()
a.init(None)
for t in [{u"name": u"COL TRÈS HAUTTT"},
{u"name": u"EHPAD MAGEUSCULE"},
{u"name": u"ICI PARIS XL"}, # in NSI, but not for FR
{u"name": u"AÇDZÞΣSSὩΙST"},
{u"name": u"NORMA PARIS"},
]:
self.check_err(a.node(None, t), t)
self.check_err(a.way(None, t, None), t)
for t in [{u"name": u"Col des Champs XIIVVVIM"},
{u"name": u"EHPAD La Madelon"},
{u"name": u"NORMA"}, # in NSI
{u"name": u"NORMA Paris"},
{u"name": u"ƻאᎯᚦ京"},
]:
assert not a.node(None, t), t