Skip to content

Commit

Permalink
Resolve language prefixes in interwiki links
Browse files Browse the repository at this point in the history
  • Loading branch information
brawer committed May 25, 2024
1 parent a176d08 commit 14d60fa
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 69 deletions.
25 changes: 0 additions & 25 deletions cmd/qrank-builder/interwikimap.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,28 +82,3 @@ func FetchInterwikiMap(client *http.Client) (InterwikiMap, error) {

return result, nil
}

// Build returns the interwikimap for a wiki site such as `rmwikibooks`.
// The returned map is useful for resolving links across wikis,
// for example when interpreting the SQL dump of the `iwlinks` table.
func (m InterwikiMap) Build(siteid string) map[string]string {
result := make(map[string]string, 200)

prefixes := make([]string, 0, 3)
prefixes = append(prefixes, "__global:")
if siteType, ok := m["__sites:"+siteid]; ok {
prefix := fmt.Sprintf("_%s:", siteType)
prefixes = append(prefixes, prefix)
}
prefixes = append(prefixes, siteid+":")
for key, value := range m {
for _, prefix := range prefixes {
if strings.HasPrefix(key, prefix) {
k := key[len(prefix):len(key)]
result[k] = value
}
}
}

return result
}
40 changes: 0 additions & 40 deletions cmd/qrank-builder/interwikimap_test.go

This file was deleted.

30 changes: 29 additions & 1 deletion cmd/qrank-builder/wikisites.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,30 @@ func ReadWikiSites(dumps string, iwmap *InterwikiMap) (*WikiSites, error) {
}
}

projectInterwikiMaps := make(map[string]map[string]*WikiSite, 20)
for key, project := range *iwmap {
// '__sites:rmwikibooks' => 'wikibooks'
if wiki, found := strings.CutPrefix(key, "__sites:"); found {
if _, siteFound := sites.Sites[wiki]; siteFound {
pm, pmFound := projectInterwikiMaps[project]
if !pmFound {
pm = make(map[string]*WikiSite, 200)
projectInterwikiMaps[project] = pm
}
}
}
}
for project, langMap := range projectInterwikiMaps {
prefix := "_" + project + ":" // match eg "_wikibooks:rm"
for key, domain := range *iwmap {
if lang, found := strings.CutPrefix(key, prefix); found {
if site, siteFound := sites.Domains[domain]; siteFound {
langMap[lang] = site
}
}
}
}

for _, site := range sites.Sites {
localInterwikiMap := make(map[string]*WikiSite, 10)
k := site.Key + ":" // eg "rmwiktionary:"
Expand All @@ -123,8 +147,12 @@ func ReadWikiSites(dumps string, iwmap *InterwikiMap) (*WikiSites, error) {
}
}

// TODO: also add interwikimap for _wiki, _wiktionary etc.
site.InterwikiMaps = append(site.InterwikiMaps, localInterwikiMap)
if project, found := (*iwmap)["__sites:"+site.Key]; found {
if langMap, langMapFound := projectInterwikiMaps[project]; langMapFound {
site.InterwikiMaps = append(site.InterwikiMaps, langMap)
}
}
site.InterwikiMaps = append(site.InterwikiMaps, globalInterwikiMap)
}
}
Expand Down
8 changes: 5 additions & 3 deletions cmd/qrank-builder/wikisites_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@ func TestReadWikiSites(t *testing.T) {
prefix string
want string
}{
{"rmwiki", "d", "wikidatawiki"}, // __global:d => wikidatawiki
{"rmwiki", "b", "rmwikibooks"}, // rmwiki:b => rmwikibooks
{"rmwiki", "unknown", ""}, // no such prefix
{"rmwiki", "d", "wikidatawiki"}, // __global:d => wikidatawiki
{"rmwiki", "b", "rmwikibooks"}, // rmwiki:b => rmwikibooks
{"rmwiki", "unknown", ""}, // no such prefix
{"rmwiki", "rm", "rmwiki"}, // _wiki:rm => rmwiki
{"rmwikibooks", "rm", "rmwikibooks"}, // _wikibooks:rm => rmwikibooks
} {
got := ""
if target := sites.Sites[tc.wiki].ResolveInterwikiPrefix(tc.prefix); target != nil {
Expand Down

0 comments on commit 14d60fa

Please sign in to comment.