Skip to content

Commit

Permalink
Make LineMerger work on an interface
Browse files Browse the repository at this point in the history
Our own PageSignalsScanner is essentially the same as a bufio.Scanner,
but without serialization. We could either write (quite a bit of) code
to serialize the lines to a buffer and then consume it right away in
a bufio.Scanner, or we abstract the functionality into a common interface.
Opting for the latter, because it's conceptually simpler. Probably also
more efficient (no serialization), but that's not the reason for the choice.
  • Loading branch information
brawer committed May 14, 2024
1 parent 2ad50cb commit f9622ad
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 14 deletions.
16 changes: 11 additions & 5 deletions cmd/qrank-builder/linemerger.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
package main

import (
"bufio"
"bytes"
"container/heap"
"io"
)

// Merges the lines of a multiple io.Readers whose content is in sorted order.
Expand All @@ -17,11 +15,19 @@ type LineMerger struct {
inited bool
}

func NewLineMerger(r []io.Reader) *LineMerger {
// LineScanner is implemented by bufio.Scanner and our own pageSignalsScanner.
type LineScanner interface {
Scan() bool
Err() error
Bytes() []byte
Text() string
}

func NewLineMerger(r []LineScanner) *LineMerger {
m := &LineMerger{}
m.heap = make(lineMergerHeap, 0, len(r))
for _, rr := range r {
item := &mergee{scanner: bufio.NewScanner(rr)}
item := &mergee{scanner: rr}
if item.scanner.Scan() {
m.heap = append(m.heap, item)
}
Expand Down Expand Up @@ -72,7 +78,7 @@ func (m *LineMerger) Line() string {
}

type mergee struct {
scanner *bufio.Scanner
scanner LineScanner
index int
}

Expand Down
17 changes: 9 additions & 8 deletions cmd/qrank-builder/linemerger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
package main

import (
"bufio"
"errors"
"io"
"strings"
"testing"
)

func TestLineMerger(t *testing.T) {
m := NewLineMerger([]io.Reader{
strings.NewReader("C1\nD1"),
strings.NewReader("B2\nE2"),
strings.NewReader("A3\nB3"),
strings.NewReader(""),
strings.NewReader("B5"),
m := NewLineMerger([]LineScanner{
bufio.NewScanner(strings.NewReader("C1\nD1")),
bufio.NewScanner(strings.NewReader("B2\nE2")),
bufio.NewScanner(strings.NewReader("A3\nB3")),
bufio.NewScanner(strings.NewReader("")),
bufio.NewScanner(strings.NewReader("B5")),
})
result := make([]string, 0, 5)
for m.Advance() {
Expand All @@ -42,7 +42,8 @@ func (e *errReader) Read(p []byte) (n int, err error) {
}

func TestLineMergerError(t *testing.T) {
m := NewLineMerger([]io.Reader{&errReader{}})
reader := &errReader{}
m := NewLineMerger([]LineScanner{bufio.NewScanner(reader)})
if m.Advance() {
t.Error("expected m.Advance()=false, got true")
return
Expand Down
7 changes: 6 additions & 1 deletion cmd/qrank-builder/qviews.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package main

import (
"bufio"
"bytes"
"context"
"encoding/binary"
Expand Down Expand Up @@ -158,7 +159,11 @@ func writeQViewCount(w io.Writer, entity int64, count int64) error {

func readQViewInputs(testRun bool, inputs []io.Reader, ch chan<- extsort.SortType, ctx context.Context) error {
defer close(ch)
merger := NewLineMerger(inputs)
scanners := make([]LineScanner, 0, len(inputs))
for _, input := range inputs {
scanners = append(scanners, bufio.NewScanner(input))
}
merger := NewLineMerger(scanners)
var lastKey string
var entity, numViews, numLinesRead int64
for merger.Advance() {
Expand Down

0 comments on commit f9622ad

Please sign in to comment.