From 545226757a4e65a383aa187e56a5e671ecfe5e52 Mon Sep 17 00:00:00 2001 From: Sascha Brawer Date: Fri, 17 May 2024 15:51:03 +0200 Subject: [PATCH] Add temporary test on LineMerger within production pipeline This is an experiment, trying to find the cause of this bug: https://github.com/brawer/wikidata-qrank/issues/40 --- cmd/qrank-builder/itemsignals.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cmd/qrank-builder/itemsignals.go b/cmd/qrank-builder/itemsignals.go index 3691c30..805ed42 100644 --- a/cmd/qrank-builder/itemsignals.go +++ b/cmd/qrank-builder/itemsignals.go @@ -192,6 +192,31 @@ func buildItemSignals(ctx context.Context, pageviews []string, sites *map[string scannerNames = append(scannerNames, pv) } + // TODO: This is just hack to investigate a bug. Remove it. + // https://github.com/brawer/wikidata-qrank/issues/40 + if true { + merg := NewLineMerger(scanners, scannerNames) + logger.Printf("BuildItemSignals(): start testing LineMerger") + var lastLine string + var numOrderErrors int64 + var numLines int64 + for merg.Advance() { + numLines += 1 + line := merg.Line() + if lastLine >= line && numOrderErrors < 10 { + logger.Printf(`LineMerger broken: "%s" after "%s"`, line, lastLine) + } + numOrderErrors += 1 + lastLine = line + } + if err := merg.Err(); err != nil { + logger.Printf("LineMerger failed: %v", err) + return time.Time{}, err + } + logger.Printf("BuildItemSignals(): finished testing LineMerger, returned %d lines", numLines) + return time.Time{}, nil + } + // Produce a stream of ItemSignals, sorted by Wikidata item ID. sigChan := make(chan extsort.SortType, 10000) config := extsort.DefaultConfig()