-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpolls.py
73 lines (58 loc) · 1.54 KB
/
polls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
import lxml.html as lh
import pandas as pd
root = 'https://www.wahlrecht.de/umfragen/'
insts = [
'allensbach',
'emnid',
'politbarometer',
'gms',
'dimap',
'insa',
'yougov'
]
def getUrl(inst):
return root + inst + '.htm'
def getData(inst):
# Create a handle, page, to handle the contents of the website
page = requests.get(getUrl(inst))
# Store the contents of the website under doc
doc = lh.fromstring(page.content)
# Create empty list
headers=[]
data=[]
headers.append('date')
# Parse headers header
theader = doc.xpath('/html/body/table/thead/tr')
# Parse body
tbody = doc.xpath('/html/body/table/tbody')
# store headers in an empty list
for t in theader[0]:
name=t.text_content()
if len(name) > 1 and name != 'Datum':
headers.append(name)
# loop through rows
for r in tbody[0]:
row = []
# loop through cells
for t in r:
name=t.text_content()
# exclude \ cells but include NaN
if len(name) > 1 or name in ['?', '–']:
# print('this is the name: ' + name)
row.append(name)
zipbObj = zip(headers, row)
datadict = dict(zipbObj)
data.append(datadict)
# create data frame from dictionaries
polls = pd.DataFrame(data)
polls['Institut'] = inst
# drop bundestagswahl rows
polls = polls.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
return polls
# save polling data to csv
for i in insts:
results = getData(i)
results.to_csv(
r'./data/' + i + '.csv',
index=None, header=True, sep=";")