Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gbfs] add configurable cache deltas for feeds #626

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion pybikes/contrib.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,14 @@ def __getitem__(self, key):
raise KeyError('%s' % key)
if key not in self.store:
raise KeyError('%s' % key)

ts_value = self.store[key]
if time.time() - ts_value['ts'] > self.delta:
the_time = time.time()
delta = ts_value.get('delta', self.delta)

if the_time - ts_value['ts'] > delta:
raise KeyError('%s' % key)

return ts_value['value']

def __contains__(self, key):
Expand All @@ -58,3 +63,11 @@ def __test_key__(self, key):

def __transform_key__(self, key):
return key

def set_with_delta(self, key, value, delta):
""" Set a key-value with a specific delta """
self.store[key] = {
'value': value,
'ts': time.time(),
'delta': delta,
}
19 changes: 12 additions & 7 deletions pybikes/deutschebahn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@
class DB(Gbfs):
authed = True

cache = True
cache_deltas = {
# 12 hours
'gbfs': 12 * 60 * 60,
# 1 hour
'station_information': 60 * 60,
# 60 seconds
'station_status': 60,
# 12 hours
'vehicle_types': 12 * 60 * 60,
}

meta = {
'company': ['Deutsche Bahn AG'],
'system': 'deutschebahn',
Expand Down Expand Up @@ -52,12 +64,5 @@ class Callabike(DB):

provider = 'CallABike'

# caches the feed for 60s
cache = TSTCache(delta=60)

def __init__(self, * args, ** kwargs):
super(Callabike, self).__init__(* args, provider=Callabike.provider, ** kwargs)

def update(self, scraper=None):
scraper = scraper or PyBikesScraper(self.cache)
super(Callabike, self).update(scraper)
50 changes: 40 additions & 10 deletions pybikes/gbfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from pybikes import BikeShareSystem, BikeShareStation, exceptions
from pybikes.utils import PyBikesScraper, filter_bounds
from pybikes.contrib import TSTCache

try:
# Python 2
Expand All @@ -25,6 +26,18 @@ class Gbfs(BikeShareSystem):

station_cls = None

# Specific deltas can be configured here to cache parts of the feed that do
# not change so often, like vehicle_types or station_information
# XXX: Additionally, some responses come with a ttl, which could be
# respected too
cache = False
cache_deltas = {
'gbfs': None,
'station_information': None,
'station_status': None,
'vehicle_types': None,
}

def __init__(
self,
tag,
Expand All @@ -34,6 +47,9 @@ def __init__(
station_information=False,
station_status=False,
ignore_errors=False,
cache=False,
cache_default_delta=60,
cache_deltas=None,
retry=None,
bbox=None,
):
Expand All @@ -46,6 +62,9 @@ def __init__(
self.retry = retry
self.bbox = bbox

self.cache = (self.cache or cache) and TSTCache(delta=cache_default_delta)
self.cache_deltas.update(cache_deltas or {})

# Allow hardcoding feed urls on initialization
self.feeds = {}
if station_information:
Expand Down Expand Up @@ -80,7 +99,11 @@ def get_feeds(self, url, scraper, force_https):
if self.feeds:
return self.feeds

feed_data = scraper.request(url, raw=True)
feed_data = scraper.request(
url,
raw=True,
cache_with_delta=self.cache_deltas['gbfs'],
)

# do not hide Unauthorized or Too many requests status codes
if scraper.last_request.status_code in [401, 429]:
Expand Down Expand Up @@ -114,23 +137,30 @@ def get_feeds(self, url, scraper, force_https):


def update(self, scraper=None):
scraper = scraper or PyBikesScraper()
scraper = scraper or PyBikesScraper(self.cache or None)
if self.retry:
scraper.retry = True
scraper.retry_opts.update(self.retry)

feeds = self.get_feeds(self.feed_url, scraper, self.force_https)

# Station Information and Station Status data retrieval
station_information = json.loads(
scraper.request(feeds['station_information'])
)['data']['stations']
station_status = json.loads(
scraper.request(feeds['station_status'])
)['data']['stations']
cache_d = self.cache_deltas

info_rq = scraper.request(feeds['station_information'],
cache_with_delta=cache_d['station_information'],
)
station_information = json.loads(info_rq)['data']['stations']

status_rq = scraper.request(feeds['station_status'],
cache_with_delta=cache_d['station_status'],
)
station_status = json.loads(status_rq)['data']['stations']

if 'vehicle_types' in feeds:
vehicle_info = json.loads(scraper.request(feeds['vehicle_types']))
vehicle_rq = scraper.request(feeds['vehicle_types'],
cache_with_delta=cache_d['vehicle_types'],
)
vehicle_info = json.loads(vehicle_rq)
# map vehicle id to vehicle info AND extra info resolver
# for direct access
vehicles = {
Expand Down
14 changes: 10 additions & 4 deletions pybikes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def setUserAgent(self, user_agent):
self.headers['User-Agent'] = user_agent

def request(self, url, method='GET', params=None, data=None, raw=False,
headers=None, default_encoding='UTF-8', skip_cache=False):
headers=None, default_encoding='UTF-8', skip_cache=False,
cache_with_delta=None):

if self.retry:
retries = Retry(** self.retry_opts)
Expand All @@ -66,8 +67,10 @@ def request(self, url, method='GET', params=None, data=None, raw=False,
_headers = self.headers.copy()
_headers.update(headers or {})

cached = self.cachedict and url in self.cachedict and not skip_cache

# XXX proper encode arguments for proper call args -> response
if self.cachedict and url in self.cachedict and not skip_cache:
if cached:
response = self.cachedict[url]
else:
response = self.session.request(
Expand Down Expand Up @@ -99,8 +102,11 @@ def request(self, url, method='GET', params=None, data=None, raw=False,
self.headers['Cookie'] = response.headers['set-cookie']
self.last_request = response

if self.cachedict is not None:
self.cachedict[url] = response
if not cached and self.cachedict is not None and response.status_code in [200, 206]:
if cache_with_delta:
self.cachedict.set_with_delta(url, response, delta=cache_with_delta)
else:
self.cachedict[url] = response

return data

Expand Down
2 changes: 1 addition & 1 deletion tests/test_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_uses_scraper(self, instance, i_data, cls, mod):
def test_update(self, instance, i_data, cls, mod, record_property):
scraper = pybikes.PyBikesScraper(
# use a simple dict cache for systems that use a single endpoint
cachedict=cache if instance.unifeed else None,
cachedict=cache if (instance.unifeed or instance.cache) else None,
# reuse headers per mod
headers=headers.setdefault(mod, {}),
)
Expand Down