Skip to content

Commit

Permalink
Retry Dimensions pubs query
Browse files Browse the repository at this point in the history
  • Loading branch information
lwrubel committed Jul 16, 2024
1 parent fe61779 commit 0536631
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 4 deletions.
3 changes: 2 additions & 1 deletion rialto_airflow/harvest/dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def dois_from_orcid(orcid):
""".format(orcid)

# The Dimensions API can flake out sometimes, so try to catch & retry.
# TODO: Consider using retry param in query() instead
try_count = 0
while try_count < 20:
try_count += 1
Expand Down Expand Up @@ -82,7 +83,7 @@ def publications_from_dois(dois: list, batch_size=200):
limit 1000
"""

result = dsl().query(q)
result = dsl().query(q, retry=5)

for pub in result["publications"]:
yield normalize_publication(pub)
Expand Down
6 changes: 3 additions & 3 deletions test/harvest/test_merge_pubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def sul_pubs_csv(tmp_path):

def test_dimensions_pubs_df(dimensions_pubs_csv):
lazy_df = merge_pubs.dimensions_pubs_df(dimensions_pubs_csv)
assert type(lazy_df) == pl.lazyframe.frame.LazyFrame
assert isinstance(lazy_df, pl.lazyframe.frame.LazyFrame)
df = lazy_df.collect()
assert df.shape[0] == 2
assert "bogus" not in df.columns, "Unneeded columns have been dropped"
Expand All @@ -147,7 +147,7 @@ def test_dimensions_pubs_df(dimensions_pubs_csv):

def test_openalex_pubs_df(openalex_pubs_csv):
lazy_df = merge_pubs.openalex_pubs_df(openalex_pubs_csv)
assert type(lazy_df) == pl.lazyframe.frame.LazyFrame
assert isinstance(lazy_df, pl.lazyframe.frame.LazyFrame)
df = lazy_df.collect()
assert df.shape[0] == 2
assert "bogus" not in df.columns, "Unneeded columns have been dropped"
Expand All @@ -156,7 +156,7 @@ def test_openalex_pubs_df(openalex_pubs_csv):

def test_sulpub_df(sul_pubs_csv):
lazy_df = merge_pubs.sulpub_df(sul_pubs_csv)
assert type(lazy_df) == pl.lazyframe.frame.LazyFrame
assert isinstance(lazy_df, pl.lazyframe.frame.LazyFrame)
df = lazy_df.collect()
assert df.shape[0] == 2, "Row without a doi has been dropped"
assert df.columns == [
Expand Down
1 change: 1 addition & 0 deletions test/harvest/test_openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def test_pyalex_urlencoding():
), "we handle url URL encoding DOIs until pyalex does"


@pytest.mark.skip(reason="This record no longer exhibits the problem")
def test_pyalex_varnish_bug():
# it seems like this author has a few records that are so big they blow out
# OpenAlex's Varnish index. See https://groups.google.com/u/1/g/openalex-community/c/hl09WRF3Naw
Expand Down

0 comments on commit 0536631

Please sign in to comment.