This repository has been archived by the owner on Mar 28, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
82 lines (66 loc) · 1.82 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
'use strict'
const got = require('got')
const cheerio = require('cheerio')
const getInline = require('get-inline-styles')
const stripComments = require('strip-html-comments')
const stripWayback = require('strip-wayback-toolbar')
module.exports = (url, timestamp) => {
let waybackUrl = null
return getAvailableUrl(url, timestamp)
.then(url => waybackUrl = url && got(url))
.then(res => stripWayback(res.body))
.then(getCss)
.then(getCssFromLinks)
.then(aggregateCss)
}
const aggregateCss = css => {
css.css = css.links.concat(css.styles).join(' ')
return css
}
const normalizeLink = (baseUrl, link) => {
if (/^http/.test(link)) {
return link
} else if (/^\/\//.test(link)) {
return `http:${link}`
} else {
return `${baseUrl}${link}`
}
}
const getCssFromLinks = css => {
const baseUrl = 'http://web.archive.org'
const linkCss = []
const px = css.links.map(link => {
const loc = normalizeLink(baseUrl, link)
return got(loc)
.then(res => linkCss.push(res.body))
.catch(console.log)
})
return Promise.all(px)
.then(() => ({
styles: css.styles,
inline: css.inline,
links: linkCss
}))
}
const getCss = html => {
const $ = cheerio.load(html)
const results = {
html,
links: [],
styles: [],
inline: getInline(html)
}
$('style').each(function () {
results.styles.push(stripComments($(this).text()))
})
$('link[rel=stylesheet]').each(function () {
results.links.push($(this).attr('href'))
})
return results
}
const getAvailableUrl = (url, timestamp) => {
const availabilityUrl = `http://archive.org/wayback/available?url=${url}×tamp=${timestamp}`
return got(availabilityUrl, { json: true })
.then(res => res.body.archived_snapshots.closest)
.then(closest => closest.available && closest.url)
}