tools/internal/parser: detect and report section markers within suffi…

…x blocks (publicsuffix#2011)
tawk · Jun 25, 2024 · d6e0e8a · d6e0e8a
1 parent 3b1b0d0
commit d6e0e8a
Show file tree

Hide file tree

Showing 3 changed files with 135 additions and 4 deletions.
diff --git a/tools/internal/parser/errors.go b/tools/internal/parser/errors.go
@@ -59,6 +59,16 @@ func (e LeadingWhitespaceError) Error() string {
 	return fmt.Sprintf("%s has leading whitespace", e.Line.LocationString())
 }
 
+// SectionInSuffixBlock reports that a comment within a block of
+// suffixes contains a section delimiter.
+type SectionInSuffixBlock struct {
+	Line Source
+}
+
+func (e SectionInSuffixBlock) Error() string {
+	return fmt.Sprintf("section delimiters are not allowed in suffix block comment at %s", e.Line.LocationString())
+}
+
 // UnclosedSectionError reports that a file section was not closed
 // properly before EOF.
 type UnclosedSectionError struct {

diff --git a/tools/internal/parser/parser.go b/tools/internal/parser/parser.go
@@ -93,16 +93,25 @@ func (p *parser) processSuffixes(block, header, rest Source) {
 		// TODO: s.Header should be a single Source for the entire
 		// comment.
 		s.Header = append(s.Header, line)
-		// Trim the comment prefix in two steps, because some PSL
-		// comments don't have whitepace between the // and the
-		// following text.
-		metadataSrc = append(metadataSrc, strings.TrimSpace(strings.TrimPrefix(line.Text(), "//")))
+		if strings.HasPrefix(line.Text(), sectionMarkerPrefix) {
+			p.addError(SectionInSuffixBlock{line})
+		} else {
+			// Trim the comment prefix in two steps, because some PSL
+			// comments don't have whitepace between the // and the
+			// following text.
+			metadataSrc = append(metadataSrc, strings.TrimSpace(strings.TrimPrefix(line.Text(), "//")))
+		}
 	}
 
 	// rest consists of suffixes and possibly inline comments.
 	commentLine := func(line Source) bool { return strings.HasPrefix(line.Text(), "//") }
 	rest.forEachRun(commentLine, func(block Source, isComment bool) {
 		if isComment {
+			for _, line := range block.lineSources() {
+				if strings.HasPrefix(line.Text(), sectionMarkerPrefix) {
+					p.addError(SectionInSuffixBlock{line})
+				}
+			}
 			s.InlineComments = append(s.InlineComments, block)
 		} else {
 			// TODO: parse entries properly, for how we just

diff --git a/tools/internal/parser/parser_test.go b/tools/internal/parser/parser_test.go
@@ -238,6 +238,118 @@ func TestParser(t *testing.T) {
 			},
 		},
 
+		{
+			name: "suffixes_with_section_markers_in_header",
+			psl: byteLines(
+				"// Just some suffixes",
+				"// ===BEGIN ICANN DOMAINS===",
+				"com",
+				"org",
+				"",
+				"// ===END ICANN DOMAINS===",
+			),
+			want: File{
+				Blocks: []Block{
+					Suffixes{
+						Source: mkSrc(0,
+							"// Just some suffixes",
+							"// ===BEGIN ICANN DOMAINS===",
+							"com",
+							"org",
+						),
+						Header: []Source{
+							mkSrc(0, "// Just some suffixes"),
+							mkSrc(1, "// ===BEGIN ICANN DOMAINS==="),
+						},
+						Entries: []Source{
+							mkSrc(2, "com"),
+							mkSrc(3, "org"),
+						},
+						Entity: "Just some suffixes",
+					},
+					EndSection{
+						Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
+						Name:   "ICANN DOMAINS",
+					},
+				},
+				Errors: []error{
+					SectionInSuffixBlock{
+						Line: mkSrc(1, "// ===BEGIN ICANN DOMAINS==="),
+					},
+					// Note: trying to gracefully parse the
+					// StartSection would require splitting the suffix
+					// block in two, which would need more code and
+					// also result in additional spurious validation
+					// errors. Instead this tests that section markers
+					// within suffix blocks are ignored for section
+					// validation.
+					UnstartedSectionError{
+						End: EndSection{
+							Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
+							Name:   "ICANN DOMAINS",
+						},
+					},
+				},
+			},
+		},
+
+		{
+			name: "suffixes_with_section_markers_inline",
+			psl: byteLines(
+				"// Just some suffixes",
+				"com",
+				"// ===BEGIN ICANN DOMAINS===",
+				"org",
+				"",
+				"// ===END ICANN DOMAINS===",
+			),
+			want: File{
+				Blocks: []Block{
+					Suffixes{
+						Source: mkSrc(0,
+							"// Just some suffixes",
+							"com",
+							"// ===BEGIN ICANN DOMAINS===",
+							"org",
+						),
+						Header: []Source{
+							mkSrc(0, "// Just some suffixes"),
+						},
+						Entries: []Source{
+							mkSrc(1, "com"),
+							mkSrc(3, "org"),
+						},
+						InlineComments: []Source{
+							mkSrc(2, "// ===BEGIN ICANN DOMAINS==="),
+						},
+						Entity: "Just some suffixes",
+					},
+					EndSection{
+						Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
+						Name:   "ICANN DOMAINS",
+					},
+				},
+				Errors: []error{
+					SectionInSuffixBlock{
+						Line: mkSrc(2, "// ===BEGIN ICANN DOMAINS==="),
+					},
+					// Note: trying to gracefully parse the
+					// StartSection would require splitting the suffix
+					// block in two, which would need more code and
+					// also result in additional spurious validation
+					// errors. Instead this tests that section markers
+					// within suffix blocks are ignored for section
+					// validation.
+					UnstartedSectionError{
+						End: EndSection{
+							Source: mkSrc(5, "// ===END ICANN DOMAINS==="),
+							Name:   "ICANN DOMAINS",
+						},
+					},
+				},
+			},
+		},
+
 		{
 			name: "suffixes_with_unstructured_header",
 			psl: byteLines(