-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01adjustMetadata.js
69 lines (55 loc) · 2.7 KB
/
01adjustMetadata.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import fs from 'fs'
import Parser from 'xml2js'
import https from 'https'
fs.readJsonSync = path => JSON.parse(fs.readFileSync(path))
const folderPath = 'sources/adjusted'
const { sources } = fs.readJsonSync('sources.json')
function get(o, ...keys) {
keys.forEach(key => o = o[key][0])
return o
}
folderPath.split('/').forEach((_, index, array) => {
let folder = array.slice(0, index + 1).join('/')
if (!fs.existsSync(folder)) fs.mkdirSync(folder)
})
const annex = theme => new Promise((resolve, reject) => {
https.get(theme + '/' + theme.slice('/').at(-1) + '.en.json', (res) => {
if (res.statusCode !== 200) return reject(res.statusMessage)
let body = []
res.on('data', data => body.push(data.toString()))
res.on('end', () => resolve(JSON.parse(body.join()).theme.annex))
});
})
sources.forEach(source => {
Parser.parseString(fs.readFileSync('sources/metadata/' + source.split('/').at(-1) + '.xml'), (err, data) => {
if (err) return console.error(err)
let metadata = data['gmd:MD_Metadata']
let citation = get(metadata, 'gmd:identificationInfo', 'gmd:MD_DataIdentification', 'gmd:citation', 'gmd:CI_Citation')
let title = get(citation, 'gmd:title', 'gco:CharacterString')
let doi = get(citation, 'gmd:identifier', 'gmd:MD_Identifier', 'gmd:code', 'gmx:Anchor')['$']['xlink:href']
let identificationInfo = get(metadata, 'gmd:identificationInfo', 'gmd:MD_DataIdentification')
let descriptiveKeywords = identificationInfo['gmd:descriptiveKeywords']
.map(keyword => get(keyword, 'gmd:MD_Keywords', 'gmd:keyword'))
.filter(keyword => keyword['gmx:Anchor'])
.map(keyword => get(keyword, 'gmx:Anchor')['$']['xlink:href'])
let resourceConstraints = identificationInfo['gmd:resourceConstraints']
.map(keyword => get(keyword, 'gmd:MD_LegalConstraints', 'gmd:otherConstraints', 'gmx:Anchor')['$']['xlink:href'])
let theme = descriptiveKeywords.find(keyword => keyword.startsWith('http://inspire.ec.europa.eu/theme/'))
// manually verfied that all links support https
theme = theme.replace('http', 'https')
let onLines = get(metadata, 'gmd:distributionInfo', 'gmd:MD_Distribution', 'gmd:transferOptions', 'gmd:MD_DigitalTransferOptions')['gmd:onLine']
.map(onLine => get(onLine, 'gmd:CI_OnlineResource', 'gmd:linkage', 'gmd:URL'))
let desc = onLines.find(onLine => onLine.endsWith('.pdf') && onLine.includes('gpkg'))
let gpkg = onLines.find(onLine => onLine.endsWith('.gpkg'))
annex(theme).then(annex => {
fs.writeFileSync(folderPath + '/' + source.split('/').at(-1) + '.json', JSON.stringify({
title, doi,
theme,
annex,
desc,
gpkg,
resourceConstraints,
}, null, 2))
})
})
})