From 14d60fad98a7287f4a4ea451a800d6c80249adf5 Mon Sep 17 00:00:00 2001 From: Sascha Brawer Date: Sat, 25 May 2024 15:27:59 +0200 Subject: [PATCH] Resolve language prefixes in interwiki links https://github.com/brawer/wikidata-qrank/issues/10 --- cmd/qrank-builder/interwikimap.go | 25 ---------------- cmd/qrank-builder/interwikimap_test.go | 40 -------------------------- cmd/qrank-builder/wikisites.go | 30 ++++++++++++++++++- cmd/qrank-builder/wikisites_test.go | 8 ++++-- 4 files changed, 34 insertions(+), 69 deletions(-) delete mode 100644 cmd/qrank-builder/interwikimap_test.go diff --git a/cmd/qrank-builder/interwikimap.go b/cmd/qrank-builder/interwikimap.go index 079067d..400349e 100644 --- a/cmd/qrank-builder/interwikimap.go +++ b/cmd/qrank-builder/interwikimap.go @@ -82,28 +82,3 @@ func FetchInterwikiMap(client *http.Client) (InterwikiMap, error) { return result, nil } - -// Build returns the interwikimap for a wiki site such as `rmwikibooks`. -// The returned map is useful for resolving links across wikis, -// for example when interpreting the SQL dump of the `iwlinks` table. -func (m InterwikiMap) Build(siteid string) map[string]string { - result := make(map[string]string, 200) - - prefixes := make([]string, 0, 3) - prefixes = append(prefixes, "__global:") - if siteType, ok := m["__sites:"+siteid]; ok { - prefix := fmt.Sprintf("_%s:", siteType) - prefixes = append(prefixes, prefix) - } - prefixes = append(prefixes, siteid+":") - for key, value := range m { - for _, prefix := range prefixes { - if strings.HasPrefix(key, prefix) { - k := key[len(prefix):len(key)] - result[k] = value - } - } - } - - return result -} diff --git a/cmd/qrank-builder/interwikimap_test.go b/cmd/qrank-builder/interwikimap_test.go deleted file mode 100644 index e685f8b..0000000 --- a/cmd/qrank-builder/interwikimap_test.go +++ /dev/null @@ -1,40 +0,0 @@ -// SPDX-FileCopyrightText: 2022 Sascha Brawer -// SPDX-License-Identifier: MIT - -package main - -import ( - "net/http" - "reflect" - "testing" -) - -func TestInterwikiMap(t *testing.T) { - client := &http.Client{Transport: &FakeWikiSite{}} - iwm, err := FetchInterwikiMap(client) - if err != nil { - t.Fatal(err) - } - - got := iwm.Build("rmwikibooks") - want := map[string]string{ - "advisory": "advisory.wikimedia.org", - "c": "commons.wikimedia.org", - "chapter": "rm.wikimedia.org", - "commons": "commons.wikimedia.org", - "d": "www.wikidata.org", - "de": "de.wikibooks.org", - "gsw": "als.wikibooks.org", - "metawiki": "meta.wikimedia.org", - "metawikimedia": "meta.wikimedia.org", - "rm": "rm.wikibooks.org", - "s": "rm.wikisource.org", - "v": "rm.wikiversity.org", - "voy": "rm.wikivoyage.org", - "w": "rm.wikipedia.org", - "wikt": "rm.wiktionary.org", - } - if !reflect.DeepEqual(got, want) { - t.Errorf("got %v, want %v", got, want) - } -} diff --git a/cmd/qrank-builder/wikisites.go b/cmd/qrank-builder/wikisites.go index e979601..389584b 100644 --- a/cmd/qrank-builder/wikisites.go +++ b/cmd/qrank-builder/wikisites.go @@ -112,6 +112,30 @@ func ReadWikiSites(dumps string, iwmap *InterwikiMap) (*WikiSites, error) { } } + projectInterwikiMaps := make(map[string]map[string]*WikiSite, 20) + for key, project := range *iwmap { + // '__sites:rmwikibooks' => 'wikibooks' + if wiki, found := strings.CutPrefix(key, "__sites:"); found { + if _, siteFound := sites.Sites[wiki]; siteFound { + pm, pmFound := projectInterwikiMaps[project] + if !pmFound { + pm = make(map[string]*WikiSite, 200) + projectInterwikiMaps[project] = pm + } + } + } + } + for project, langMap := range projectInterwikiMaps { + prefix := "_" + project + ":" // match eg "_wikibooks:rm" + for key, domain := range *iwmap { + if lang, found := strings.CutPrefix(key, prefix); found { + if site, siteFound := sites.Domains[domain]; siteFound { + langMap[lang] = site + } + } + } + } + for _, site := range sites.Sites { localInterwikiMap := make(map[string]*WikiSite, 10) k := site.Key + ":" // eg "rmwiktionary:" @@ -123,8 +147,12 @@ func ReadWikiSites(dumps string, iwmap *InterwikiMap) (*WikiSites, error) { } } - // TODO: also add interwikimap for _wiki, _wiktionary etc. site.InterwikiMaps = append(site.InterwikiMaps, localInterwikiMap) + if project, found := (*iwmap)["__sites:"+site.Key]; found { + if langMap, langMapFound := projectInterwikiMaps[project]; langMapFound { + site.InterwikiMaps = append(site.InterwikiMaps, langMap) + } + } site.InterwikiMaps = append(site.InterwikiMaps, globalInterwikiMap) } } diff --git a/cmd/qrank-builder/wikisites_test.go b/cmd/qrank-builder/wikisites_test.go index a397465..8d30624 100644 --- a/cmd/qrank-builder/wikisites_test.go +++ b/cmd/qrank-builder/wikisites_test.go @@ -50,9 +50,11 @@ func TestReadWikiSites(t *testing.T) { prefix string want string }{ - {"rmwiki", "d", "wikidatawiki"}, // __global:d => wikidatawiki - {"rmwiki", "b", "rmwikibooks"}, // rmwiki:b => rmwikibooks - {"rmwiki", "unknown", ""}, // no such prefix + {"rmwiki", "d", "wikidatawiki"}, // __global:d => wikidatawiki + {"rmwiki", "b", "rmwikibooks"}, // rmwiki:b => rmwikibooks + {"rmwiki", "unknown", ""}, // no such prefix + {"rmwiki", "rm", "rmwiki"}, // _wiki:rm => rmwiki + {"rmwikibooks", "rm", "rmwikibooks"}, // _wikibooks:rm => rmwikibooks } { got := "" if target := sites.Sites[tc.wiki].ResolveInterwikiPrefix(tc.prefix); target != nil {