Skip to content

Commit

Permalink
PageSignalMerger: Log how many records were processed and generated
Browse files Browse the repository at this point in the history
  • Loading branch information
brawer committed May 14, 2024
1 parent 87f5781 commit 6da7b75
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
17 changes: 12 additions & 5 deletions cmd/qrank-builder/pagesignals.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,10 @@ type pageSignalMerger struct {
numClaims int64
numIdentifiers int64
numSiteLinks int64

// Stats for logging.
inputRecords int64
outputRecords int64
}

func NewPageSignalMerger(w io.WriteCloser) *pageSignalMerger {
Expand All @@ -444,12 +448,13 @@ func NewPageSignalMerger(w io.WriteCloser) *pageSignalMerger {
// Input must be grouped by page (such as by sorting lines).
// Recognized line formats:
//
// "200,Q72": wikipage 200 is for Wikidata entity Q72
// "200,c=8": wikipage 200 has 8 claims in wikidatawiki
// "200,i=17": wikipage 200 has 17 identifiers in wikidatawiki
// "200,l=23": wikipage 200 has 23 sitelinks in wikidatawiki
// "200,s=830167": wikipage 200 has 830167 bytes in wikitext format
// "200,Q72": wikipage 200 is for Wikidata entity Q72
// "200,c=8": wikipage 200 has 8 claims in wikidatawiki
// "200,i=17": wikipage 200 has 17 identifiers in wikidatawiki
// "200,l=23": wikipage 200 has 23 sitelinks in wikidatawiki
// "200,s=830167": wikipage 200 has 830167 bytes in wikitext format
func (m *pageSignalMerger) Process(line string) error {
m.inputRecords += 1
pos := strings.IndexByte(line, ',')
page := line[0:pos]
if page != m.page {
Expand Down Expand Up @@ -493,6 +498,7 @@ func (m *pageSignalMerger) Close() error {
return err
}

logger.Printf("PageSignalMerger: processed %d → %d records", m.inputRecords, m.outputRecords)
return nil
}

Expand Down Expand Up @@ -523,6 +529,7 @@ func (m *pageSignalMerger) write() error {
}
buf.WriteByte('\n')
_, err = m.writer.Write(buf.Bytes())
m.outputRecords += 1
}

m.page = ""
Expand Down
1 change: 1 addition & 0 deletions cmd/qrank-builder/pagesignals_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ func storeFakePageSignals(id string, content string, s3 *FakeS3, t *testing.T) {
}

func TestPageSignalMerger(t *testing.T) {
logger = log.New(&bytes.Buffer{}, "", log.Lshortfile)
var buf strings.Builder
m := NewPageSignalMerger(NopWriteCloser(&buf))
for _, line := range []string{
Expand Down

0 comments on commit 6da7b75

Please sign in to comment.