From 5cc8b8586d2f3c36ab1e082d29b9effc658faf9a Mon Sep 17 00:00:00 2001 From: Sascha Brawer Date: Fri, 14 Jun 2024 16:12:56 +0200 Subject: [PATCH] Handle pagelinks dumps without namespace column https://github.com/brawer/wikidata-qrank/issues/10 --- cmd/qrank-builder/pagelinks.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cmd/qrank-builder/pagelinks.go b/cmd/qrank-builder/pagelinks.go index 65e81a8..5b54979 100644 --- a/cmd/qrank-builder/pagelinks.go +++ b/cmd/qrank-builder/pagelinks.go @@ -140,12 +140,18 @@ func readPageLinks(ctx context.Context, site *WikiSite, property string, dumps s } fromPage := row[fromPageCol] - namespace := row[namespaceCol] title := row[titleCol] + // Depending on the Wikimedia software version and the wiki project, + // the pagelinks dump may not always have a namespace column. + var namespace string + if namespaceCol >= 0 { + namespace = row[namespaceCol] + } + var nsPrefix string - if namespace != "0" { - if ns, found := site.Namespaces[row[namespaceCol]]; found && ns.Localized != "" { + if len(namespace) > 0 && namespace != "0" { + if ns, found := site.Namespaces[namespace]; found && ns.Localized != "" { nsPrefix = ns.Localized + ":" } }