diff --git a/extractors/cyclocity.py b/extractors/cyclocity.py deleted file mode 100644 index 171fcc195..000000000 --- a/extractors/cyclocity.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2010-2012, eskerda -# Distributed under the AGPL license, see LICENSE.txt - -import os -import sys -import time -import json -import argparse - -from slugify import slugify -import pybikes - -api_key = 'ace81338b73283277ddfe54c217ab965ac93cb50' - -description = 'Extract cyclocity instances' - -parser = argparse.ArgumentParser(description = description) - -parser.add_argument('-o', metavar = "output", dest = "output", - type = argparse.FileType('w'), default = sys.stdout, - help="Output file") - -parser.add_argument('-v', action="store_true", dest = 'verbose', - default = False, help="Verbose output for debugging (no progress)") - -parser.add_argument('--proxy', metavar = "host:proxy", dest = 'proxy', - default = None, help="Use host:port as a proxy for site calls") - -parser.add_argument('--httpsproxy', metavar = "host:proxy", dest = 'httpsproxy', - default = None, help="Use host:port as an HTTPS proxy for site calls") - -args = parser.parse_args() - -scraper = pybikes.utils.PyBikesScraper() - -proxies = {} - -sysdef = { - "system": "cyclocity", - "class": "Cyclocity", - "instances": [] -} - -def clearline(length): - clearline = "\r" + "".join([" " for i in range(length)]) - sys.stderr.flush() - sys.stderr.write(clearline) - sys.stderr.flush() - -def print_status(i, total, status): - progress = "".join(["#" for step in range(i)]) + \ - "".join([" " for step in range(total-i)]) - status_pattern = "\r{0}/{1}: [{2}] {3}" - output = status_pattern.format(i, total, progress, status) - sys.stderr.flush() - sys.stderr.write(unicode(output)) - sys.stderr.flush() - if (i == total): - sys.stderr.write('\n') - return len(output) - -def main(): - if args.proxy is not None: - proxies['http'] = args.proxy - scraper.enableProxy() - - if args.httpsproxy is not None: - proxies['https'] = args.httpsproxy - scraper.enableProxy() - - scraper.setProxies(proxies) - - services = pybikes.Cyclocity.get_contracts(api_key, scraper) - lastlen = 0 - for i, service in enumerate(services): - sysdef['instances'].append( - { - 'tag': slugify(service['commercial_name']), - 'contract': service['name'], - 'meta': { - 'name': service['commercial_name'], - 'country': service['country_code'] - } - } - ) - clearline(lastlen) - lastlen = print_status(i+1, len(services), \ - "Testing %s" % repr(service['name'])) - - output = json.dumps(sysdef, sort_keys = False, indent = 4) - args.output.write(output) - args.output.write('\n') - args.output.close() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/extractors/cyclocity_navigator.py b/extractors/cyclocity_navigator.py deleted file mode 100644 index e53ae4fc8..000000000 --- a/extractors/cyclocity_navigator.py +++ /dev/null @@ -1,137 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2010-2012, eskerda -# Distributed under the AGPL license, see LICENSE.txt - -import string - -import requests - -import sys - -MAIN = 'https://gw.cyclocity.fr/3311a6cea2e49b10/' - -ACTIONS = { - 'cities': 'contracts/full?token={token}', - 'token': 'token/key/b885ab926fdca7dbfbf717084fb36b5f', - 'availability': 'availability/{city}/stations/state/?token={token}', - 'availabitity_by_geo': 'availability/{city}/stations/proximity/{what}?lat={lat}&lng={lng}&maxRes=10000&min=1&token={token}' -} - -TOKEN = None - -def getUrl(action, **args): - return '%s%s' % (MAIN, ACTIONS[action].format(**args)) - -def call(action, **args): - if 'token' not in args and TOKEN is not None: - args['token'] = TOKEN - url = getUrl(action, **args) - r = requests.get(url) - return r.json() - -def getToken(): - data = call('token') - return data['token'] - -def listCities(): - cities = call('cities', token = TOKEN) - print '--- %d Cities ---' % len(cities) - for idx, city in enumerate(cities): - print '[%d] %s - %s' % (idx, city['name'], city['code']) - return cities - -def listActions(): - print '--- Actions ---' - res = [] - for idx, action in enumerate(MENU_ACTIONS): - print '%d %s' % (idx, action) - res.append(action) - number = input('>> Select an action: ') - action = res[number] - return MENU_ACTIONS[action] - -def getParams(action): - iterparams = string.Formatter().parse(ACTIONS[action]) - params = [x[1] for x in iterparams if x[1] is not None and x[1] != 'token' and x[1] != 'city'] - user_input = {} - for p in params: - stuff = str(raw_input('>> %s: ' % p)) - user_input[p] = stuff - return user_input - -def quit(** args): - sys.exit(0) - -def get_everything(city): - n_stations = count_stations(city) - minLat = city['viewPort']['minLat'] - minLng = city['viewPort']['minLng'] - maxLat = city['viewPort']['maxLat'] - maxLng = city['viewPort']['maxLng'] - square = [0.001, 0.001] - square[0] = float(raw_input('>> Select latitude box: ')) - square[1] = float(raw_input('>> Select longitude box: ')) - if (minLat > maxLat): - square[0] = square[0] * -1 - if (minLng > maxLng): - square[1] = square[1] * -1 - - c_square_lat = minLat - c_square_lng = minLng - inRange = True - geosquares = [] - all_stations = {} - print 'From %s to %s' % ([minLat, minLng], [maxLat, maxLng]) - print 'Using %s' % square - inc = 0 - print 'Recalculating splines...' - while(inRange): - geosquares.append([c_square_lat, c_square_lng]) - c_square_lng = c_square_lng + square[1] - if (c_square_lng > maxLng + square[1]): - c_square_lng = minLng - c_square_lat = c_square_lat + square[0] - inRange = c_square_lat < maxLat + square[0] - print "%d Geo Squares calculated" % len(geosquares) - nothing = raw_input('Is it ok?') - if nothing == 'no': - return - for idx, gsquare in enumerate(geosquares): - stations = call('availabitity_by_geo', city = city['code'], what = 'bike', lat = gsquare[0], lng = gsquare[1]) - added = 0 - for station in stations: - if station['station']['nb'] not in all_stations: - all_stations[station['station']['nb']] = station - added = added + 1 - if (added > 0): - sys.stdout.flush() - sys.stdout.write('\r[%d%%] Got %d stations of %d' % (idx * 100 / len(geosquares), len(all_stations), n_stations)) - sys.stdout.write('.') - - print len(all_stations) - -def count_stations(city): - stations = call('availability', city = city['code']) - print '%d stations in %s' % (len(stations['ststates']), city['name']) - print stations - return len(stations['ststates']) - -MENU_ACTIONS = { - 'quit': quit, - 'get_everything': get_everything, - 'count_stations': count_stations, -} - - -if TOKEN is None: - TOKEN = getToken() - -cities = listCities() -number = input('>> Please, select your city: ') -city = cities[number] -print('%s selected' % city['name']) -while (True): - action = listActions() - action(city) - - diff --git a/extractors/domoblue.py b/extractors/domoblue.py deleted file mode 100644 index bd2b072ad..000000000 --- a/extractors/domoblue.py +++ /dev/null @@ -1,178 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2010-2012, eskerda -# Distributed under the AGPL license, see LICENSE.txt - -import os -import sys -import time -import json -import argparse -from collections import namedtuple -import re - -from lxml import etree - -from googlegeocoder import GoogleGeocoder -from slugify import slugify -from pybikes.utils import PyBikesScraper -from pybikes.domoblue import Domoblue - -MAIN = 'http://clientes.domoblue.es/onroll/' -TOKEN_URL = 'generaMapa.php?cliente={service}&ancho=500&alto=700' -XML_URL = 'generaXml.php?token={token}&cliente={service}' -TOKEN_RE = 'generaXml\.php\?token\=(.*?)\&cliente' - -geocoder = GoogleGeocoder() - -CityRecord = namedtuple('CityRecord', 'city, country, lat, lng') - -description = 'Extract DomoBlue instances from the main site' - -parser = argparse.ArgumentParser(description = description) - -parser.add_argument('-o', metavar = "file", dest = 'outfile', default = None, - help="Save output to the specified file") -parser.add_argument('-g','--geocode', action="store_true", - help="Use Google GeoCoder for lat/lng and better names") - -parser.add_argument('--proxy', metavar = "host:proxy", dest = 'proxy', - default = None, help="Use host:port as a proxy for site calls") - -parser.add_argument('-v', action="store_true", dest = 'verbose', - default = False, help="Verbose output for debugging (no progress)") - -args = parser.parse_args() - -outfile = args.outfile - -proxies = {} - -user_agent = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19' - -scraper = PyBikesScraper() -scraper.setUserAgent(user_agent) - -sysdef = { - "system": "domoblue", - "class": "Domoblue", - "instances": [] -} - -if args.proxy is not None: - proxies['http'] = args.proxy - scraper.setProxies(proxies) - scraper.enableProxy() - - -def get_token(client_id): - if 'Referer' in scraper.headers: - del(scraper.headers['Referer']) - url = MAIN + TOKEN_URL.format(service = client_id) - data = scraper.request(url) - token = re.findall(TOKEN_RE, data) - scraper.headers['Referer'] = url - return token[0] - -def get_xml(client_id): - token = get_token(client_id) - url = MAIN + XML_URL.format(token = token, service = client_id) - return scraper.request(url).encode('raw_unicode_escape').decode('utf-8') - -def test_system_health(domo_sys): - online = False - for s in domo_sys.stations: - online = s.extra['status']['online'] - if online: - break - return online - -def google_reverse_geocode(lat, lng): - country_info = lambda lst: lst[len(lst) - 1].short_name - target = 'locality' - - if args.verbose: - print "--- Javascript code for debugging output ---" - print " var geocoder = new google.maps.Geocoder()" - print " latlng = new google.maps.LatLng(%s,%s)" % (str(lat), str(lng)) - print " geocoder.geocode({latLng:latlng}, function(res){console.log(res)})" - - info = geocoder.get((lat, lng),language = 'es') - city_info = [i for i in info if target in i.types] - if len(city_info) == 0: - target = 'political' - city_info = [i for i in info if target in i.types] - if len(city_info) == 0: - raise Exception - else: - city_info = city_info[0] - - city = city_info.address_components[0].long_name - - country = country_info(city_info.address_components) - latitude = city_info.geometry.location.lat - longitude = city_info.geometry.location.lng - - return CityRecord(city, country, latitude, longitude) - -def extract_systems(): - xml_data = get_xml('todos') - xml_dom = etree.fromstring(xml_data) - systems = [] - for marker in xml_dom.xpath('//marker'): - if marker.get('tipo') == 'pendiente': - continue - sys = Domoblue('foo', {}, int(marker.get('codigoCliente'))) - sys.update() - online = True #test_system_health(sys) - if args.verbose: - print "--- %s --- " % repr(marker.get('nombre')) - print " Total stations: %d" % len(sys.stations) - print " Health: %s" % (lambda b: 'Online' if b else 'Offline')(online) - if not online: - if args.verbose: - print " %s is Offline, ignoring!\n" % repr(marker.get('nombre')) - continue - - name = 'Onroll %s' % marker.get('nombre') - slug = slugify(name) - city = marker.get('nombre') - latitude = marker.get('lat') - longitude = marker.get('lng') - country = 'ES' - - if args.geocode: - time.sleep(1) - try: - city, country, latitude, longitude = google_reverse_geocode(latitude, longitude) - name = 'Onroll %s' % city - except Exception: - print " No geocoding results for %s!!" % repr(name) - system = { - 'tag': slug, - 'system_id': int(marker.get('codigoCliente')), - 'meta': { - 'name': name, - 'latitude': latitude, - 'longitude': longitude, - 'city': city, - 'country': 'ES' - } - } - systems.append(system) - if args.verbose: - print " Appended!\n" - return systems - -instances = extract_systems() -sysdef['instances'] = sorted(instances, key = lambda inst: inst['tag']) - -data = json.dumps(sysdef, sort_keys = False, indent = 4) - -if outfile is not None: - f = open(outfile, 'w') - f.write(data) - f.close() - print "%s file written" % outfile -else: - print "---- OUTPUT ----" - print data diff --git a/extractors/nextbike.py b/extractors/nextbike.py deleted file mode 100644 index 33b6e191c..000000000 --- a/extractors/nextbike.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2010-2014, eskerda -# Distributed under the AGPL license, see LICENSE.txt - -import argparse -import codecs -import json -import sys - -from lxml import etree -import requests -from slugify import slugify - - -FEEDS = "https://nextbike.net/maps/nextbike-live.xml?domains={domain}" - -parser = argparse.ArgumentParser() - -parser.add_argument('--domain', type=str, default="", dest="domain") -parser.add_argument('-o', metavar="file", dest="out", - type=argparse.FileType('w'), default=sys.stdout) -parser.add_argument('-v', action="store_true", dest="verbose", default=False) -parser.add_argument('-vv', action="store_true", dest="ultraverbose", - default=False) -parser.add_argument('--base', metavar="file", dest="baseinst", - default=None, type=argparse.FileType('r')) - -args = parser.parse_args() - -UTF8Writer = codecs.getwriter('utf8') -sys.stderr = UTF8Writer(sys.stderr) -filewriter = UTF8Writer(args.out) - -if args.baseinst: - sysdef = json.loads(args.baseinst.read()) -else: - sysdef = { - "system": "nextbike", - "class": "Nextbike", - "instances": [] - } - - -class Nextfeed(object): - def __new__(cls, city_tree, domain=""): - stations = city_tree.xpath('place') - suspected_bikes = [st for st in stations if Nextfeed.is_flexbike(st)] - net_stations = [st for st in stations if not Nextfeed.is_flexbike(st)] - if args.ultraverbose: - sys.stderr.write("%s\n" % city_tree.attrib['name']) - sys.stderr.write("=" * len(city_tree.attrib['name'])) - sys.stderr.write("\n") - sys.stderr.write(u"├ Stations: %d\n" % len(stations)) - sys.stderr.write(u"├ Flex Bikes: %d\n" % len(suspected_bikes)) - sys.stderr.write(u"├ Net Stations: %d\n" % len(net_stations)) - sys.stderr.write("\n\n") - return super(Nextfeed, cls).__new__(cls, city_tree, domain) - - def __init__(self, city_tree, domain=""): - self.domain = domain - self.tag = slugify(city_tree.attrib['name']) - self.name = city_tree.attrib['name'] - self.city_uid = int(city_tree.attrib['uid']) - - def out(self): - return { - "domain": self.domain, - "tag": self.tag, - "meta": { - "name": self.name - }, - "city_uid": self.city_uid - } - - @staticmethod - def is_flexbike(station_tree): - if 'bike' in station_tree.attrib: - if station_tree.attrib['bikes'] == "1": - if station_tree.attrib['bike'] == "1": - return True - if 'spot' in station_tree.attrib: - if station_tree.attrib['spot'] == "1": - return False - else: - return True - if 'BIKE' in station_tree.attrib['name']: - return True - return False - - @staticmethod - def print_station(station_tree, prefix=""): - for attrib in station_tree.attrib: - sys.stderr.write(prefix) - sys.stderr.write(u"├ %s: %s\n" % ( - attrib, station_tree.attrib[attrib]) - ) - - -def get_systems(domain=""): - nextfeed = etree.fromstring( - requests.get(FEEDS.format(domain=domain)).text.encode('utf-8')) - cities = nextfeed.xpath("/markers/country/city") - new_cities = [] - for c in cities: - if 'city_uid' in c.attrib: - uid = int(c.attrib['city_uid']) - elif 'uid' in c.attrib: - uid = int(c.attrib['uid']) - else: - raise Exception("This city has no uid") - found = next( - (i for i in sysdef['instances'] if i['city_uid'] == uid), None) - if not found: - new_cities.append(c) - if args.verbose: - sys.stderr.write(">> Found %d new cities in %s\n" % ( - len(new_cities), domain)) - systems = map(lambda c: Nextfeed(c, domain), new_cities) - return systems - -for domain in args.domain.split(','): - systems = get_systems(domain) - sysdef['instances'] += map(lambda sys: sys.out(), systems) - -filewriter.write(json.dumps(sysdef, indent=4, separators=(',', ':'))) -filewriter.write("\n") diff --git a/extractors/filler.py b/utils/filler.py similarity index 96% rename from extractors/filler.py rename to utils/filler.py index b1e9de374..408efe274 100644 --- a/extractors/filler.py +++ b/utils/filler.py @@ -2,6 +2,9 @@ # Copyright (C) 2010-2012, eskerda # Distributed under the AGPL license, see LICENSE.txt +""" This is a really ugly and nasty script to ease filling up instance files +without cities, latitudes and longitudes. Does more than it needs to """ + import os import sys, traceback import time @@ -22,42 +25,42 @@ parser = argparse.ArgumentParser(description = description) -parser.add_argument('input', metavar = "input", +parser.add_argument('input', metavar = "input", type = argparse.FileType('r'), default = sys.stdin, help="Input file") -parser.add_argument('-o', metavar = "output", dest = "output", - default = sys.stdout, +parser.add_argument('-o', metavar = "output", dest = "output", + default = sys.stdout, help="Output file") -parser.add_argument('-v', action="store_true", dest = 'verbose', +parser.add_argument('-v', action="store_true", dest = 'verbose', default = False, help="Verbose output for debugging (no progress)") -parser.add_argument('--proxy', metavar = "host:proxy", dest = 'proxy', +parser.add_argument('--proxy', metavar = "host:proxy", dest = 'proxy', default = None, help="Use host:port as a proxy for site calls") -parser.add_argument('--httpsproxy', metavar = "host:proxy", dest = 'httpsproxy', +parser.add_argument('--httpsproxy', metavar = "host:proxy", dest = 'httpsproxy', default = None, help="Use host:port as an HTTPS proxy for site calls") -parser.add_argument('--slugify', action="store_true", dest = 'slugify', +parser.add_argument('--slugify', action="store_true", dest = 'slugify', default = False, help="Correct slugs, using the name as input") -parser.add_argument('--geocode', action="store_true", dest = 'geocode', +parser.add_argument('--geocode', action="store_true", dest = 'geocode', default = False, help="Correct geodata using Google GeoCoder") parser.add_argument('--correct_name', action="store_true", dest = "geoname", default = False, help="Correct just the name using geodata") -parser.add_argument('-f', action="store_true", dest = 'overwrite', +parser.add_argument('-f', action="store_true", dest = 'overwrite', default = False, help="Overwrite already set variables") -parser.add_argument('-i', action="store_true", dest = 'interactive', +parser.add_argument('-i', action="store_true", dest = 'interactive', default = False, help="Interactive prompt to select between results") -parser.add_argument('-c', action="store_true", dest = 'continuous', +parser.add_argument('-c', action="store_true", dest = 'continuous', default = False, help="Continuous write output file") -parser.add_argument('-s', action="store_true", dest = 'skip', +parser.add_argument('-s', action="store_true", dest = 'skip', default = False, help="Skip complete instances") args = parser.parse_args() @@ -125,7 +128,7 @@ def geocode(instance, systemCls, language, address = None): sys.stderr.write("latlng = new google.maps.LatLng(%s,%s)\n" % (str(latitude), str(longitude))) sys.stderr.write("geocoder.geocode({latLng:latlng}, function(res){console.log(res)})\n") query = (latitude, longitude) - try: + try: info = geocoder.get(query, language = language) except Exception as e: print e @@ -214,7 +217,7 @@ def handle_System(schema, cls, instances): raise Exception("name not set in instance %s" % str(instance)) if args.skip and is_complete(instance): if args.verbose: - sys.stderr.write("%s Looks complete, passing by\n" % + sys.stderr.write("%s Looks complete, passing by\n" % repr(instance['meta']['name']) ) continue diff --git a/extractors/requirements.txt b/utils/requirements.txt similarity index 100% rename from extractors/requirements.txt rename to utils/requirements.txt