Skip to content

Commit

Permalink
IIIF v3 manifest support (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
moltude authored Jun 27, 2024
1 parent 291838b commit 9d49662
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 28 deletions.
18 changes: 16 additions & 2 deletions wikimedia/executors/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ def download(self, source, destination):
destination, size = self._save_to_s3(source=source, bucket=bucket, key=key)
self.tracker.increment(Result.DOWNLOADED, size=size)
return destination, size
except Exception as exec:
except Exception as err:
self.tracker.increment(Result.FAILED)
raise DownloadException(f"Failed download {source} - {str(exec)}") from exec
raise err
# DownloadException(f"Failed download {source} - {str(exec)}") from exec

# TODO This maybe better in the FileSystem class
def save_to_local(self, source, file):
Expand All @@ -73,10 +74,23 @@ def save_to_local(self, source, file):
"""
try:
response = requests.get(source, timeout=30)
invalid_types = [
"text/html",
"application/json",
"application/xml",
"text/plain",
]
for invalid_type in invalid_types:
if invalid_type in response.headers["content-type"]:
raise DownloadException(
f"Invalid content-type: {response.headers['content-type']}"
)
with open(file, "wb") as f:
f.write(response.content)
file_size = os.path.getsize(file)
return file, file_size
except DownloadException as de:
raise de
except Exception as exec:
raise DownloadException(f"Failed saving to local {str(exec)}") from exec

Expand Down
76 changes: 50 additions & 26 deletions wikimedia/utilities/iiif.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,38 @@ def __init__(self):
def iiif_v2_urls(self, iiif):
"""
Extracts image URLs from IIIF manfiest and returns them as a list
# TODO
"""
urls = []
sequences = iiif.get("sequences", [])
sequence = sequences[0:1] if len(sequences) == 1 else None
canvases = sequence[0].get("canvases", []) if sequence else []

for canvase in canvases:
for image in canvase.get("images", []):
url = image.get("resource", {}).get("@id", None)
if url:
urls.append(url)
return urls

def iiif__v3_urls(self, iiif):
"""
Needs to be implemented for Georgia uploads to Wikimedia Commons
To be done by October 2023
# TODO
"""
""" """
# items[0] \ items[x] \ items[0] \ body \ id
resolution = "/full/full/0/default.jpg"
urls = []
for item in iiif.get("items", []):
try:
url = item["items"][0]["items"][0].get("body", {}).get("id", None)
# This is a hack to get around that v3 presumes the user supplies the
# resolution in the URL
if url:
# This condition may not be necessary but I'm leaving it in for now
if url.endswith("default.jpg"):
urls.append(url)
else:
urls.append(f"{url}{resolution}")
except (IndexError, TypeError, KeyError):
return []
return urls

def get_iiif_urls(self, iiif):
"""
Expand All @@ -46,16 +69,19 @@ def get_iiif_urls(self, iiif):

manifest = self._get_iiif_manifest(iiif)

urls = []
sequences = manifest.get("sequences", [])
sequence = sequences[0:1] if len(sequences) == 1 else None
canvases = sequence[0].get("canvases", []) if sequence else []
for canvase in canvases:
for image in canvase.get("images", []):
url = image.get("resource", {}).get("@id", None)
if url:
urls.append(url)
return urls
# v2 or v3?
if (
manifest.get("@context", None)
== "http://iiif.io/api/presentation/3/context.json"
):
return self.iiif__v3_urls(manifest)
elif (
manifest.get("@context", None)
== "http://iiif.io/api/presentation/2/context.json"
):
return self.iiif_v2_urls(manifest)
else:
raise IIIFException("Unknown IIIF version")

def _get_iiif_manifest(self, url):
"""
Expand All @@ -64,18 +90,16 @@ def _get_iiif_manifest(self, url):
if not validators.url(url):
raise IIIFException(f"Invalid url {url}")
try:
request = requests.get(url, timeout=30, headers=self.HEADERS)
request = requests.get(
url, timeout=30, allow_redirects=True, headers=self.HEADERS
)
if request.status_code not in [200, 301, 302]:
raise IIIFException(
f"Unable to request: {url} - \
Status code {request.status_code}"
)
raise IIIFException(f"Invalid response code: {request.status_code}")
data = request.content
return json.loads(data)
except json.decoder.JSONDecodeError as jdex:
raise IIIFException(
f"Unable to decode JSON: {url} - \
{str(jdex)}"
) from jdex
raise IIIFException(f"Unable to decode JSON: {url} - {str(jdex)}") from jdex
except requests.exceptions.RequestException as re:
raise IIIFException(f"Unable to request: {url} - {str(re)}") from re
raise IIIFException(f"{str(re)}") from re
except Exception as ex:
raise Exception(f"Unknown error: {str(ex)}") from ex

0 comments on commit 9d49662

Please sign in to comment.