Skip to content

Commit

Permalink
Merge branch 'open-dev' into open
Browse files Browse the repository at this point in the history
  • Loading branch information
StJudeWasHere committed Oct 3, 2024
2 parents 0633ac0 + 1a8bf4a commit 382d964
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 9 deletions.
42 changes: 42 additions & 0 deletions internal/services/html_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,45 @@ func TestRobotsNone(t *testing.T) {
t.Error("NewPageReport Nofollow should be true")
}
}

func TestSrcset(t *testing.T) {
u, err := url.Parse(testURL)
if err != nil {
fmt.Println(err)
}

images := []string{
"https://example.com/logo.png",
"https://example.com/image,c_fill,w_576.jpg",
"https://example.com/image,c_fill,w_276.jpg",
"https://example.com/image,c_fill,w_76.jpg",
}
body := []byte(
`<html>
<head></head>
<body>
<img src="` + images[0] + `"
srcset=",` + images[1] + ` 576w, ,` + images[2] + ` 276w,` + images[3] + `,">
</body>
</html>`)
statusCode := 200
headers := &http.Header{
"Content-Type": []string{"text/html"},
}

pageReport, _, err := services.NewHTMLParser(u, statusCode, headers, body, int64(len(body)))
if err != nil {
t.Fatal(err)
}

if len(pageReport.Images) != len(images) {
t.Errorf("pagereport images len want: %d Got: %d", len(images), len(pageReport.Images))
}

for n, i := range images {
if pageReport.Images[n].URL != i {
t.Errorf("pageReport image %d should be %s. Got: %s", n, i, pageReport.Images[n].URL)
}
}

}
31 changes: 25 additions & 6 deletions internal/services/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"net/http"
"net/url"
"strings"
"unicode"

"github.com/stjudewashere/seonaut/internal/models"

Expand Down Expand Up @@ -586,19 +587,37 @@ func (p *Parser) headersLocation() string {
func (p *Parser) parseSrcSet(srcset string) []string {
var imageURLs []string

srcset = strings.Trim(srcset, " ,")
if srcset == "" {
return imageURLs
}

imageSet := strings.Split(srcset, ",")
for _, s := range imageSet {
i := strings.Split(s, " ")

if len(i) > 0 {
imageURLs = append(imageURLs, strings.TrimSpace(i[0]))
// URLs in srcset strings can contain an optional descriptor.
// Also take into account URLs with commas in them.
parsingURL := true
var currentURL strings.Builder
for _, char := range srcset {
if parsingURL {
if unicode.IsSpace(char) {
if currentURL.Len() > 0 {
parsingURL = false
}
} else if currentURL.Len() > 0 || char != ',' {
currentURL.WriteRune(char)
}
} else {
if char == ',' {
parsingURL = true
imageURLs = append(imageURLs, strings.TrimSpace(currentURL.String()))
currentURL.Reset()
}
}
}

if currentURL.Len() > 0 {
imageURLs = append(imageURLs, strings.TrimSpace(currentURL.String()))
}

return imageURLs
}

Expand Down
4 changes: 2 additions & 2 deletions web/static/echarts.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion web/templates/dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ <h2>Analyze Raw Data</h2>
</div>
</div>

<script src="/resources/echarts.min.js?v=5.5.0"></script>
<script src="/resources/echarts.min.js?v=5.5.1"></script>
<script type="text/javascript">

// ISSUES TIMELINE CHART
Expand Down

0 comments on commit 382d964

Please sign in to comment.