Broken Links Crawler #60
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Broken Links Crawler | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- master | |
schedule: | |
# * is a special character in YAML so you have to quote this string | |
- cron: '35 13 5,19 * *' | |
jobs: | |
crawl_for_broken_links: | |
runs-on: ubuntu-latest | |
name: Broken-Links-Crawler | |
steps: | |
- name: Debug Action | |
uses: hmarr/[email protected] | |
- name: Checking broken links | |
id: check-broken-links | |
uses: ScholliYT/[email protected] | |
with: | |
# We would need a high `max_depth` for the crawler to naturally find all the keymapdb pages but we do not want the crawler to go deep | |
# in unrelated websites, so in order to keep the `max_depth` low but the keymapdb page coverage high, we manually list them here. | |
website_url: "https://keymapdb.com,http://keymapdb.com/page/2/,http://keymapdb.com/page/3/,http://keymapdb.com/page/3/,http://keymapdb.com/page/4/,http://keymapdb.com/page/5/,http://keymapdb.com/page/6/,http://keymapdb.com/page/7/" | |
exclude_url_prefix: "/assets,https://mechdb.net" | |
max_retries: 2 | |
max_depth: 3 | |
verbose: debug |