Skip to content

Commit

Permalink
Check if URLs are using underscore character
Browse files Browse the repository at this point in the history
  • Loading branch information
StJudeWasHere committed May 8, 2024
1 parent 5c58f87 commit 9a26d06
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 1 deletion.
1 change: 1 addition & 0 deletions internal/issues/errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,5 @@ const (
ErrorExternalLinkRedirect // Pages with external links to redirect URLs
ErrorExternalLinkBroken // Pages with brooken external links
ErrorTimeout // Pages that timed out
ErrorUnderscoreURL // Pages wich URL has underscore characters
)
3 changes: 3 additions & 0 deletions internal/issues/page/reporters.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,8 @@ func GetAllReporters() []*models.PageIssueReporter {

// Add timeout issue reporter
NewTimeoutReporter(),

// Add URL issue reports
NewUnderscoreURLReporter(),
}
}
24 changes: 24 additions & 0 deletions internal/issues/page/url.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package page

import (
"net/http"
"strings"

"github.com/stjudewashere/seonaut/internal/issues/errors"
"github.com/stjudewashere/seonaut/internal/models"

"golang.org/x/net/html"
)

// Returns a report_manager.PageIssueReporter with a callback function that checks
// if URL has undescore characters.
func NewUnderscoreURLReporter() *models.PageIssueReporter {
c := func(pageReport *models.PageReport, htmlNode *html.Node, header *http.Header) bool {
return strings.Contains(pageReport.URL, "_")
}

return &models.PageIssueReporter{
ErrorType: errors.ErrorUnderscoreURL,
Callback: c,
}
}
58 changes: 58 additions & 0 deletions internal/issues/page/url_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package page_test

import (
"net/http"
"testing"

"github.com/stjudewashere/seonaut/internal/issues/errors"
"github.com/stjudewashere/seonaut/internal/issues/page"
"github.com/stjudewashere/seonaut/internal/models"

"golang.org/x/net/html"
)

// Test the UnderscoreURL reporter with an URL that has not an _ character.
// The reporter should not report the issue.
func TestNoUnderscoreURL(t *testing.T) {
pageReport := &models.PageReport{
URL: "https://example.com/some-url",
Crawled: true,
MediaType: "text/html",
StatusCode: 200,
Title: "not empty description",
}

reporter := page.NewUnderscoreURLReporter()
if reporter.ErrorType != errors.ErrorUnderscoreURL {
t.Errorf("TestNoIssues: error type is not correct")
}

reportsIssue := reporter.Callback(pageReport, &html.Node{}, &http.Header{})

if reportsIssue == true {
t.Errorf("TestUnderscoreURL: reportsIssue should be false")
}
}

// Test the UnderscoreURL reporter with an URL that has an _ character.
// The reporter should report the issue.
func TestUnderscoreURL(t *testing.T) {
pageReport := &models.PageReport{
URL: "https://example.com/some_url",
Crawled: true,
MediaType: "text/html",
StatusCode: 200,
Title: "not empty description",
}

reporter := page.NewUnderscoreURLReporter()
if reporter.ErrorType != errors.ErrorUnderscoreURL {
t.Errorf("TestNoIssues: error type is not correct")
}

reportsIssue := reporter.Callback(pageReport, &html.Node{}, &http.Header{})

if reportsIssue == false {
t.Errorf("TestUnderscoreURL: reportsIssue should be true")
}
}
1 change: 1 addition & 0 deletions migrations/0054_underscore_url_issue.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DELETE FROM issue_types WHERE id = 63;
1 change: 1 addition & 0 deletions migrations/0054_underscore_url_issue.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
INSERT INTO issue_types (id, type, priority) VALUES(63, "ERROR_UNDERSCORE_URL", 3);
5 changes: 4 additions & 1 deletion translations/translation.en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,7 @@ ERROR_EXTERNAL_LINK_BROKEN: Pages with broken external links
ERROR_EXTERNAL_LINK_BROKEN_DESC: Broken external links don't impact your site's search engine rankings, but they can frustrate users by leading them to non-existent pages. Fix this by removing or updating the link with the correct URL.

ERROR_TIMEOUT: Timeout
ERROR_TIMEOUT_DESC: Pages that timed out when our crawler attempted to access them. When this happens, search engine crawlers may fail to access and index the content, suggesting potential server issues or temporary problems hindering visibility in search results.
ERROR_TIMEOUT_DESC: Pages that timed out when our crawler attempted to access them. When this happens, search engine crawlers may fail to access and index the content, suggesting potential server issues or temporary problems hindering visibility in search results.

ERROR_UNDERSCORE_URL: URLs with underscore characters
ERROR_UNDERSCORE_URL_DESC: It is usually not recommended to use underscore characters in URLs, as some search engines will ignore them and treat that part as a single word. Use a dash instead as a word separator character.

0 comments on commit 9a26d06

Please sign in to comment.