Skip to content

Commit

Permalink
tools/internal/parser: refactor to separate text processing from pars…
Browse files Browse the repository at this point in the history
…er main logic (publicsuffix#1999)

* tools/internal/parser: add a text processing helper

This helper contains no parsing "business logic", it just provides some
text processing helpers while keeping track of source locations for
future reporting.

This commit just adds the text helper, but doesn't use it yet.

* tools/internal/parser: rewrite parsing logic using the text processing helper

* tools/internal/parser: fix documentation inconsistency for newSource.

* tools/internal/parser: make Source use 0-indexed half open intervals

This matches the semantics of Go slices, and makes all code that uses
Source easier to reason about. Source.LocationString() still prints
1-indexed closed intervals, which is the convention for telling a
human where errors are.

* tools/internal/parser: merge source and Source into one.
  • Loading branch information
danderson authored Jun 19, 2024
1 parent 45d3d06 commit 7eb6ada
Show file tree
Hide file tree
Showing 7 changed files with 725 additions and 298 deletions.
4 changes: 2 additions & 2 deletions tools/internal/parser/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type UnknownSectionMarker struct {
}

func (e UnknownSectionMarker) Error() string {
return fmt.Sprintf("unknown kind of section marker %q at %s", trimComment(e.Line.Raw), e.Line.LocationString())
return fmt.Sprintf("unknown kind of section marker %q at %s", e.Line.Text(), e.Line.LocationString())
}

// UnterminatedSectionMarker reports that a section marker is missing
Expand All @@ -65,7 +65,7 @@ type UnterminatedSectionMarker struct {
}

func (e UnterminatedSectionMarker) Error() string {
return fmt.Sprintf(`section marker %q at %s is missing trailing "==="`, trimComment(e.Line.Raw), e.Line.LocationString())
return fmt.Sprintf(`section marker %q at %s is missing trailing "==="`, e.Line.Text(), e.Line.LocationString())
}

// MissingEntityName reports that a block of suffixes does not have a
Expand Down
2 changes: 1 addition & 1 deletion tools/internal/parser/exceptions.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import "strings"
func downgradeToWarning(e error) bool {
switch v := e.(type) {
case MissingEntityEmail:
return sourceIsExempted(missingEmail, v.Suffixes.Raw)
return sourceIsExempted(missingEmail, v.Suffixes.Text())
}
return false
}
Expand Down
24 changes: 0 additions & 24 deletions tools/internal/parser/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,30 +64,6 @@ func (f *File) SuffixBlocksInSection(name string) []Suffixes {
return ret
}

// Source is a piece of source text with location information.
type Source struct {
// StartLine is the first line of this piece of source text in the
// original file. The first line of a file is line 1 rather than
// line 0, since that is how text editors conventionally number
// lines.
StartLine int
// EndLine is the last line of this piece of source text in the
// original file. The line named by EndLine is included in the
// source block.
EndLine int
// Raw is the unparsed source text for this block.
Raw string
}

// LocationString returns a short string describing the source
// location.
func (s Source) LocationString() string {
if s.StartLine == s.EndLine {
return fmt.Sprintf("line %d", s.StartLine)
}
return fmt.Sprintf("lines %d-%d", s.StartLine, s.EndLine)
}

// A Block is a parsed chunk of a PSL file.
// In Parse's output, a Block is one of the following concrete types:
// Comment, StartSection, EndSection, Suffixes.
Expand Down
Loading

0 comments on commit 7eb6ada

Please sign in to comment.