generated from fregante/browser-extension-template
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #20 from kishore881/feature/web-archive
adding support for web.archive.org
- Loading branch information
Showing
6 changed files
with
65 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import moment from 'moment-timezone'; | ||
import Fixer from '../fixer.js'; | ||
|
||
/** | ||
* This script enables uniform timestamps for web.archive.org. | ||
* Timestamps handled by this script: | ||
* - start and end date of snapshots in WayBack Machine's calendar view | ||
* - saved n times between <start date> and <end date> | ||
* - div.captures-range-info a[href^="/web/"] | ||
* - time of capture shown at the top of the calendar view | ||
* - a.capture-link[href^="/web/"] | ||
* - time of crawl in WayBack Machine's calendar view | ||
* - a.snapshot-link[href^="/web/"] | ||
*/ | ||
const fixer = new Fixer('WayBackMachine', [ | ||
{ | ||
name: 'Time of Crawl', | ||
selector: 'div.captures-range-info a[href^="/web/"], a.capture-link[href^="/web/"], a.snapshot-link[href^="/web/"]', | ||
attachTo: node => node, | ||
timestamp(node) { | ||
// Href = "/web/20230304105925/example.com" | ||
const timestamp = node.getAttribute('href').match(/\/web\/(\d+)\//); | ||
if (timestamp && timestamp[1]) { | ||
return parseWayBackMachineDateString(timestamp[1]); | ||
} | ||
|
||
return null; | ||
}, | ||
url(node) { | ||
// `https://web.archive.org/web/20230304105925/example.com` | ||
return `https://web.archive.org${node.getAttribute('href')}`; | ||
}, | ||
label: 'snapshot', | ||
}, | ||
]); | ||
|
||
fixer.start(); | ||
|
||
const parseWayBackMachineDateString = dateString => { | ||
// DateString = "20230304105925" | ||
if (typeof dateString !== 'string' || !/^\d{14}$/.test(dateString)) { | ||
return null; | ||
} | ||
|
||
return moment(dateString, 'YYYYMMDDHHmmss').utc(); | ||
}; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters