diff --git a/ConversionContainer/pyproject.toml b/ConversionContainer/pyproject.toml index 5e87fe0..fc02ae7 100644 --- a/ConversionContainer/pyproject.toml +++ b/ConversionContainer/pyproject.toml @@ -18,6 +18,7 @@ google-cloud-logging = "^3.5.0" bs4 = "^0.0.1" filelock = "^3.12.2" mysqlclient = "*" +requests = "^2.31.0" [tool.poetry.dev-dependencies] pytest-mock = "*" diff --git a/ConversionContainer/source/config.py b/ConversionContainer/source/config.py index 4656648..74791ca 100644 --- a/ConversionContainer/source/config.py +++ b/ConversionContainer/source/config.py @@ -2,7 +2,7 @@ import os -CLASSIC_DATABASE_URI = os.environ.get('CLASSIC_DATABASE_URI') +CLASSIC_DATABASE_URI = os.environ['CLASSIC_DATABASE_URI'] OUT_BUCKET_ARXIV_ID = os.environ['DOCUMENT_CONVERTED_BUCKET'] # Startup failure on miss IN_BUCKET_SUB_ID = os.environ['SUBMISSION_SOURCE_BUCKET'] # Startup failure on miss @@ -22,4 +22,7 @@ SQLALCHEMY_DATABASE_URI = CLASSIC_DATABASE_URI SQLALCHEMY_BINDS = { 'latexml': LATEXML_DB_URI } +FASTLY_PURGE_KEY = os.environ.get('FASTLY_PURGE_KEY', 'no-key-dev') +IS_DEV = os.environ.get('IS_DEV', True) + LOCK_DIR = '/arxiv/locks' \ No newline at end of file diff --git a/ConversionContainer/source/publish/__init__.py b/ConversionContainer/source/publish/__init__.py index 0fa44a2..d71a61e 100644 --- a/ConversionContainer/source/publish/__init__.py +++ b/ConversionContainer/source/publish/__init__.py @@ -18,6 +18,7 @@ move_sub_qa_to_doc_qa ) from .watermark import make_published_watermark, insert_watermark +from .fastly_purge import fastly_purge_abs logger = logging.getLogger() @@ -88,9 +89,11 @@ def _publish (submission_id: int, paper_id: str, version: int): # Move log output from sub bucket to published bucket move_sub_qa_to_doc_qa (submission_id, paper_idv) - logger.info(f'Successfully wrote {submission_id}/{paper_idv} qa to doc bucket') + logger.info(f'Successfully wrote {submission_id}/{paper_idv} qa to doc bucket') - + # Purge abs page from fastly so we can see it + if not current_app.config['IS_DEV']: + fastly_purge_abs(paper_id, version, current_app.config['FASTLY_PURGE_KEY']) except Exception as e: try: diff --git a/ConversionContainer/source/publish/fastly_purge.py b/ConversionContainer/source/publish/fastly_purge.py new file mode 100644 index 0000000..1d33171 --- /dev/null +++ b/ConversionContainer/source/publish/fastly_purge.py @@ -0,0 +1,21 @@ +import logging +import requests + +from flask import current_app + +def fastly_purge_abs (paper_id: str, version: int, fastly_key: str): + headers = { + "Fastly-Key": fastly_key, + "Accept": "application/json", + } + domains = ["arxiv.org", "web3.arxiv.org", "www.arxiv.org"] + for domain in domains: + _purge_url (f"https://{ domain }/abs/{ paper_id }", headers) + _purge_url (f"https://{ domain }/abs/{ paper_id }v{ version }", headers) + +def _purge_url (url: str, headers: dict): + response = requests.request("PURGE", url, headers=headers) + if response.status_code == 200: + logging.info(f'successfully purged { url }') + else: + logging.warning(f'failed to purge { url }') \ No newline at end of file