Skip to content

Commit

Permalink
Merge pull request #22 from kishore881/fix/utc_web_archive
Browse files Browse the repository at this point in the history
fix: web.archive.org bugs
  • Loading branch information
msramalho authored Jan 10, 2024
2 parents a69ebef + 10d00da commit e9dc66e
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 25 deletions.
14 changes: 12 additions & 2 deletions source/js/fixer.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ import HoverPopup from './hover-popup.js';
* @returns {HTMLElement}
*/

/**
* @callback MutationObserverResolver
* @param {HTMLElement} node
* @param {HoverPopup} popup
* @returns {MutationObserver}
*/

/**
* @typedef Target
* @type {object}
Expand All @@ -33,6 +40,7 @@ import HoverPopup from './hover-popup.js';
* @property {URLResolver} url - Function that resolves the canonical URL for a timestamp.
* @property {TimestampResolver} timestamp - Function that resolves the ISO timestamp for a node.
* @property {AttachToResolver} attachTo - Function that resolves where the popup should attach to.
* @property {MutationObserverResolver} observer - Function that is called upon DOM changes that can update the popup information, used when the HTML of the element hovered changes.
*/

/**
Expand All @@ -58,8 +66,10 @@ class Fixer {
for (const target of this.targets) {
for (const node of this.getNodes(target)) {
try {
// eslint-disable-next-line no-new
new HoverPopup(target.attachTo(node), target.timestamp(node), target.label ?? 'post', target.url(node));
const popup = new HoverPopup(target.attachTo(node), target.timestamp(node), target.label ?? 'post', target.url(node));
if (target.observe) {
target.observe(node, popup);
}
} catch (error) {
console.error('failed to process node:', error, node);
} finally {
Expand Down
59 changes: 37 additions & 22 deletions source/js/timezone-fixers/web-archive.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,48 @@ import Fixer from '../fixer.js';
const fixer = new Fixer('WayBackMachine', [
{
name: 'Time of Crawl',
selector: 'div.captures-range-info a[href^="/web/"], a.capture-link[href^="/web/"], a.snapshot-link[href^="/web/"]',
selector: 'div.captures-range-info a[href^="/web/"], a.snapshot-link[href^="/web/"]',
attachTo: node => node,
timestamp(node) {
// Href = "/web/20230304105925/example.com"
const timestamp = node.getAttribute('href').match(/\/web\/(\d+)\//);
if (timestamp && timestamp[1]) {
return parseWayBackMachineDateString(timestamp[1]);
}

return null;
},
url(node) {
// `https://web.archive.org/web/20230304105925/example.com`
return `https://web.archive.org${node.getAttribute('href')}`;
},
timestamp: getTimestampFromHref,
url: getResourceUrlFromHref,
label: 'snapshot',
},
{
name: 'Last Hovered Time of Crawl',
selector: 'a.capture-link[href^="/web/"]',
attachTo: node => node,
timestamp: getTimestampFromHref,
url: getResourceUrlFromHref,
label: 'snapshot',
observe(node, popup) {
const observer = new MutationObserver(mutationsList => {
// If href attribute changed, update timestamp and resourceUrl in the popup
for (const mutation of mutationsList) {
if (mutation.type === 'attributes' && mutation.attributeName === 'href') {
popup.moment = getTimestampFromHref(node);
popup.resourceUrl = `https://web.archive.org${node.getAttribute('href')}`;
}
}
});
observer.observe(node, {attributes: true});
},
},
]);

fixer.start();

const parseWayBackMachineDateString = dateString => {
// DateString = "20230304105925"
if (typeof dateString !== 'string' || !/^\d{14}$/.test(dateString)) {
return null;
function getTimestampFromHref(node) {
// Href = "/web/20230304105925/example.com"
const timestamp = node.getAttribute('href').match(/\/web\/(\d+)\//);
if (timestamp && timestamp[1] && /^\d{14}$/.test(timestamp[1])) {
return moment.utc(timestamp[1], 'YYYYMMDDHHmmss');
}

return moment(dateString, 'YYYYMMDDHHmmss').utc();
};
return null;
}

function getResourceUrlFromHref(node) {
// Href = "/web/20230304105925/example.com"
// ResourceUrl = 'https://web.archive.org/web/20230304105925/example.com'
return `https://web.archive.org${node.getAttribute('href')}`;
}

fixer.start();
2 changes: 1 addition & 1 deletion source/manifest.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "Uniform Timezone Extension",
"version": "0.0.11",
"version": "0.0.12",
"description": "Brings standardization to social media posts' dates and times.",
"homepage_url": "https://github.com/bellingcat/uniform-timezone",
"manifest_version": 3,
Expand Down

0 comments on commit e9dc66e

Please sign in to comment.