Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix reading past 'endstream' token when parsing #54

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 32 additions & 15 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ func (this *PdfReader) skipComments(r *bufio.Reader) error {
return nil
}

// Advance reader so that whitespace is ignored
func (this *PdfReader) skipWhitespace(r *bufio.Reader) error {
// Advance reader so that skipBytes are ignored
func (this *PdfReader) skip(r *bufio.Reader, skipBytes []byte) error {
var err error
var b byte

Expand All @@ -139,12 +139,20 @@ func (this *PdfReader) skipWhitespace(r *bufio.Reader) error {
return errors.Wrap(err, "Failed to read byte")
}

if b == ' ' || b == '\n' || b == '\r' || b == '\t' {
skipFound := false
for _, skipByte := range skipBytes {
if skipByte == b {
skipFound = true
break
}
}

if skipFound {
continue
} else {
r.UnreadByte()
break
}

r.UnreadByte()
break
}

return nil
Expand All @@ -154,14 +162,15 @@ func (this *PdfReader) skipWhitespace(r *bufio.Reader) error {
func (this *PdfReader) readToken(r *bufio.Reader) (string, error) {
var err error

var buffer bytes.Buffer
// If there is a token available on the stack, pop it out and return it.
if len(this.stack) > 0 {
var popped string
popped, this.stack = this.stack[len(this.stack)-1], this.stack[:len(this.stack)-1]
return popped, nil
}

err = this.skipWhitespace(r)
err = this.skip(r, whitespaceBytes())
if err != nil {
return "", errors.Wrap(err, "Failed to skip whitespace")
}
Expand Down Expand Up @@ -201,9 +210,7 @@ func (this *PdfReader) readToken(r *bufio.Reader) (string, error) {
return this.readToken(r)

default:
// FIXME this may not be performant to create new strings for each byte
// Is it probably better to create a buffer and then convert to a string at the end.
str := string(b)
buffer.WriteByte(b)

loop:
for {
Expand All @@ -216,10 +223,10 @@ func (this *PdfReader) readToken(r *bufio.Reader) (string, error) {
r.UnreadByte()
break loop
default:
str += string(b)
buffer.WriteByte(b)
}
}
return str, nil
return buffer.String(), nil
}

return "", nil
Expand Down Expand Up @@ -668,7 +675,8 @@ func (this *PdfReader) resolveObject(objSpec *PdfValue) (*PdfValue, error) {
if token == "stream" {
result.Type = PDF_TYPE_STREAM

err = this.skipWhitespace(r)
// we just want to skip until after first CRLF
err = this.skip(r, newlineBytes())
if err != nil {
return nil, errors.Wrap(err, "Failed to skip whitespace")
}
Expand Down Expand Up @@ -808,6 +816,7 @@ func (this *PdfReader) findXref() error {
func (this *PdfReader) readXref() error {
var err error

whitespace := whitespaceBytes()
// Create new bufio.Reader
r := bufio.NewReader(this.f)

Expand Down Expand Up @@ -908,7 +917,7 @@ func (this *PdfReader) readXref() error {

startObject := index[0]

err = this.skipWhitespace(r)
err = this.skip(r, whitespace)
if err != nil {
return errors.Wrap(err, "Failed to skip whitespace")
}
Expand Down Expand Up @@ -939,7 +948,7 @@ func (this *PdfReader) readXref() error {
return errors.New("Expected next token to be: stream, got: " + t)
}

err = this.skipWhitespace(r)
err = this.skip(r, whitespace)
if err != nil {
return errors.Wrap(err, "Failed to skip whitespace")
}
Expand Down Expand Up @@ -1630,3 +1639,11 @@ func (this *PdfReader) read() error {

return nil
}

func whitespaceBytes() []byte {
return []byte{0x20, 0x0A, 0x0C, 0x0D, 0x09, 0x00}
}

func newlineBytes() []byte {
return []byte{'\r', '\n'}
}