Skip to content

Commit

Permalink
tools/internal/parser: detect and report section markers within suffi…
Browse files Browse the repository at this point in the history
…x blocks (publicsuffix#2011)
  • Loading branch information
danderson authored Jun 25, 2024
1 parent 3b1b0d0 commit d6e0e8a
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 4 deletions.
10 changes: 10 additions & 0 deletions tools/internal/parser/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ func (e LeadingWhitespaceError) Error() string {
return fmt.Sprintf("%s has leading whitespace", e.Line.LocationString())
}

// SectionInSuffixBlock reports that a comment within a block of
// suffixes contains a section delimiter.
type SectionInSuffixBlock struct {
Line Source
}

func (e SectionInSuffixBlock) Error() string {
return fmt.Sprintf("section delimiters are not allowed in suffix block comment at %s", e.Line.LocationString())
}

// UnclosedSectionError reports that a file section was not closed
// properly before EOF.
type UnclosedSectionError struct {
Expand Down
17 changes: 13 additions & 4 deletions tools/internal/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,25 @@ func (p *parser) processSuffixes(block, header, rest Source) {
// TODO: s.Header should be a single Source for the entire
// comment.
s.Header = append(s.Header, line)
// Trim the comment prefix in two steps, because some PSL
// comments don't have whitepace between the // and the
// following text.
metadataSrc = append(metadataSrc, strings.TrimSpace(strings.TrimPrefix(line.Text(), "//")))
if strings.HasPrefix(line.Text(), sectionMarkerPrefix) {
p.addError(SectionInSuffixBlock{line})
} else {
// Trim the comment prefix in two steps, because some PSL
// comments don't have whitepace between the // and the
// following text.
metadataSrc = append(metadataSrc, strings.TrimSpace(strings.TrimPrefix(line.Text(), "//")))
}
}

// rest consists of suffixes and possibly inline comments.
commentLine := func(line Source) bool { return strings.HasPrefix(line.Text(), "//") }
rest.forEachRun(commentLine, func(block Source, isComment bool) {
if isComment {
for _, line := range block.lineSources() {
if strings.HasPrefix(line.Text(), sectionMarkerPrefix) {
p.addError(SectionInSuffixBlock{line})
}
}
s.InlineComments = append(s.InlineComments, block)
} else {
// TODO: parse entries properly, for how we just
Expand Down
112 changes: 112 additions & 0 deletions tools/internal/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,118 @@ func TestParser(t *testing.T) {
},
},

{
name: "suffixes_with_section_markers_in_header",
psl: byteLines(
"// Just some suffixes",
"// ===BEGIN ICANN DOMAINS===",
"com",
"org",
"",
"// ===END ICANN DOMAINS===",
),
want: File{
Blocks: []Block{
Suffixes{
Source: mkSrc(0,
"// Just some suffixes",
"// ===BEGIN ICANN DOMAINS===",
"com",
"org",
),
Header: []Source{
mkSrc(0, "// Just some suffixes"),
mkSrc(1, "// ===BEGIN ICANN DOMAINS==="),
},
Entries: []Source{
mkSrc(2, "com"),
mkSrc(3, "org"),
},
Entity: "Just some suffixes",
},
EndSection{
Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
Name: "ICANN DOMAINS",
},
},
Errors: []error{
SectionInSuffixBlock{
Line: mkSrc(1, "// ===BEGIN ICANN DOMAINS==="),
},
// Note: trying to gracefully parse the
// StartSection would require splitting the suffix
// block in two, which would need more code and
// also result in additional spurious validation
// errors. Instead this tests that section markers
// within suffix blocks are ignored for section
// validation.
UnstartedSectionError{
End: EndSection{
Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
Name: "ICANN DOMAINS",
},
},
},
},
},

{
name: "suffixes_with_section_markers_inline",
psl: byteLines(
"// Just some suffixes",
"com",
"// ===BEGIN ICANN DOMAINS===",
"org",
"",
"// ===END ICANN DOMAINS===",
),
want: File{
Blocks: []Block{
Suffixes{
Source: mkSrc(0,
"// Just some suffixes",
"com",
"// ===BEGIN ICANN DOMAINS===",
"org",
),
Header: []Source{
mkSrc(0, "// Just some suffixes"),
},
Entries: []Source{
mkSrc(1, "com"),
mkSrc(3, "org"),
},
InlineComments: []Source{
mkSrc(2, "// ===BEGIN ICANN DOMAINS==="),
},
Entity: "Just some suffixes",
},
EndSection{
Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
Name: "ICANN DOMAINS",
},
},
Errors: []error{
SectionInSuffixBlock{
Line: mkSrc(2, "// ===BEGIN ICANN DOMAINS==="),
},
// Note: trying to gracefully parse the
// StartSection would require splitting the suffix
// block in two, which would need more code and
// also result in additional spurious validation
// errors. Instead this tests that section markers
// within suffix blocks are ignored for section
// validation.
UnstartedSectionError{
End: EndSection{
Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
Name: "ICANN DOMAINS",
},
},
},
},
},

{
name: "suffixes_with_unstructured_header",
psl: byteLines(
Expand Down

0 comments on commit d6e0e8a

Please sign in to comment.