-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathwikipedia.rb
executable file
·42 lines (32 loc) · 1.3 KB
/
wikipedia.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env ruby
# frozen_string_literal: true
# Figures out the URLs for the Wikipedia biography pages of Representatives and Senators
$LOAD_PATH.unshift "#{File.dirname(__FILE__)}/lib"
require "name"
require "people"
require "mechanize"
require "configuration"
require "extract_wikipedia_links"
def write_links(links, filename)
xml = File.open(filename, "w")
x = Builder::XmlMarkup.new(target: xml, indent: 1)
x.instruct!
x.peopleinfo do
links.each { |link| x.personinfo(id: link[0], wikipedia_url: link[1]) }
end
xml.close
end
conf = Configuration.new
puts "Reading member data..."
people = PeopleCSVReader.read_members
agent = Mechanize.new
# Slightly naughty because Wikipedia specifically blocks Ruby Mechanize
agent.user_agent_alias = "Mac Safari"
puts "Wikipedia links for Representatives..."
links = extract_all_representative_wikipedia_links(people, agent)
write_links(links, "#{conf.members_xml_path}/wikipedia-commons.xml")
# For Representatives just for curiousity sake find out which has a link back to OpenAustralia
links.each { |link| check_wikipedia_page(link[1], agent) }
puts "Wikipedia links for Senators..."
write_links(extract_all_senator_wikipedia_links(people, agent), "#{conf.members_xml_path}/wikipedia-lords.xml")
system("#{conf.web_root}/twfy/scripts/mpinfoin.pl links")