Skip to content

Commit

Permalink
Implemented last_harvest
Browse files Browse the repository at this point in the history
`rialto_airflow.utils.last_harvest()` will inspect the snapshots in the DATA_DIR and return the
datetime for the latest one, or None if there are none there. Also moved
the utils/__init__.py to utils.py until we need to create more
submodules for shared utilities.

Closes #15
  • Loading branch information
edsu committed Jun 19, 2024
1 parent 9fa6a6e commit 737b8d1
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 16 deletions.
2 changes: 1 addition & 1 deletion rialto_airflow/dags/update_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def setup():
Setup the data directory to write to and determine the last harvest.
"""
return {
"last_harvest": last_harvest(),
"last_harvest": last_harvest(data_dir),
"snapshot_dir": create_snapshot_dir(data_dir),
}

Expand Down
33 changes: 33 additions & 0 deletions rialto_airflow/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
import datetime

snap_dir_format = '%Y%m%d%H%M%S'


def last_harvest(data_dir):
"""
Determine the last time a harvest was run using the most recent snapshot
directory in the supplied data directory.
"""
snapshot_dirs = sorted(os.listdir(data_dir))

if '.keep' in snapshot_dirs:
snapshot_dirs.remove('.keep')

if len(snapshot_dirs) == 0:
return None

snapshot_time = datetime.datetime.strptime(snapshot_dirs[-1], snap_dir_format)
snapshot_time = snapshot_time.replace(tzinfo=datetime.timezone.utc)
return snapshot_time


def create_snapshot_dir(data_dir):
"""
Create a snapshot directory in the given data directory.
"""
now = datetime.datetime.now()
snapshot_dir = os.path.join(data_dir, now.strftime(snap_dir_format))
os.mkdir(snapshot_dir)

return snapshot_dir
15 changes: 0 additions & 15 deletions rialto_airflow/utils/__init__.py

This file was deleted.

27 changes: 27 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
import re
import time

from rialto_airflow.utils import last_harvest, create_snapshot_dir

def test_no_last_harvest(tmpdir):
assert last_harvest(tmpdir) is None

def test_create_snapshot_dir(tmpdir):
snap_dir = create_snapshot_dir(tmpdir)
assert os.path.isdir(snap_dir)
assert re.match(r'^\d{14}$', os.path.basename(snap_dir))

def test_last_harvest(tmpdir):
create_snapshot_dir(tmpdir)
last_harvest_1 = last_harvest(tmpdir)
assert last_harvest_1

time.sleep(3)

create_snapshot_dir(tmpdir)
last_harvest_2 = last_harvest(tmpdir)
assert last_harvest_2

assert last_harvest_2 > last_harvest_1

0 comments on commit 737b8d1

Please sign in to comment.