Skip to content

Broken Links Crawler #60

Broken Links Crawler

Broken Links Crawler #60

name: Broken Links Crawler
on:
workflow_dispatch:
push:
branches:
- master
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '35 13 5,19 * *'
jobs:
crawl_for_broken_links:
runs-on: ubuntu-latest
name: Broken-Links-Crawler
steps:
- name: Debug Action
uses: hmarr/[email protected]
- name: Checking broken links
id: check-broken-links
uses: ScholliYT/[email protected]
with:
# We would need a high `max_depth` for the crawler to naturally find all the keymapdb pages but we do not want the crawler to go deep
# in unrelated websites, so in order to keep the `max_depth` low but the keymapdb page coverage high, we manually list them here.
website_url: "https://keymapdb.com,http://keymapdb.com/page/2/,http://keymapdb.com/page/3/,http://keymapdb.com/page/3/,http://keymapdb.com/page/4/,http://keymapdb.com/page/5/,http://keymapdb.com/page/6/,http://keymapdb.com/page/7/"
exclude_url_prefix: "/assets,https://mechdb.net"
max_retries: 2
max_depth: 3
verbose: debug