Skip to content

Commit

Permalink
Allow 6 digit dossier number in url parsing and add test
Browse files Browse the repository at this point in the history
  • Loading branch information
TerryvanWalen committed Jul 29, 2024
1 parent d86ea72 commit 4ba7e0f
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/iiif/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def get_info_from_iiif_url(iiif_url, source_file):
}
if source == "edepot": # aka pre-wabo
# ST-00015-ST00000126_00001.jpg=relevant_url_part ST=stadsdeel 00015=dossier ST00000126=document_barcode 00001=file/bestand
# SQ1452-SQ-01452%20(2)-SQ10079651_00001.jpg=relevant_url_part SQ=stadsdeel 01425=dossier SQ100796511=document_barcode 00001=file/bestand
# SQ1452-SQ-01452%20(2)-SQ10079651_00001.jpg=relevant_url_part SQ=stadsdeel 01452=dossier SQ10079651=document_barcode 00001=file/bestand
try:
stadsdeel, dossier, document_barcode, file = re.match(
r"^.*?([A-Z]{2})-(\d{5}).*?-(.+)_(.*?)\.\w+$", relevant_url_part
r"^.*?([A-Z]{2})-(\d*).*?-(.+)_(.*?)\.\w+$", relevant_url_part

Check failure

Code scanning / CodeQL

Polynomial regular expression used on uncontrolled data High

This
regular expression
that depends on a
user-provided value
may run slow on strings with many repetitions of '9'.
This
regular expression
that depends on a
user-provided value
may run slow on strings with many repetitions of '-a'.
This
regular expression
that depends on a
user-provided value
may run slow on strings with many repetitions of 'a_a'.
This
regular expression
that depends on a
user-provided value
may run slow on strings with many repetitions of '9'.
This
regular expression
that depends on a
user-provided value
may run slow on strings with many repetitions of '-a'.
This
regular expression
that depends on a
user-provided value
may run slow on strings with many repetitions of 'a_a'.
).groups()
except Exception as e:
raise InvalidIIIFUrlError(
Expand Down
16 changes: 10 additions & 6 deletions tests/test_settings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from django.conf import settings

PRE_WABO_IMG_URL_BASE = "2/edepot:ST-00015-ST00000126_00001.jpg/"
EDEPOT_PREFIX = "2/edepot:"
WABO_PREFIX = "2/wabo:"

PRE_WABO_IMG_URL_BASE = EDEPOT_PREFIX + "ST-00015-ST00000126_00001.jpg/"
PRE_WABO_INFO_JSON_URL = PRE_WABO_IMG_URL_BASE + "info.json"

PRE_WABO_IMG_URL_WITH_SCALING = PRE_WABO_IMG_URL_BASE + "full/50,50/0/default.jpg"
Expand All @@ -12,19 +14,21 @@
PRE_WABO_IMG_URL_BASE + "10000,10000,48,48/full/0/default.jpg"
)


PRE_WABO_IMG_URL_SOURCE_FILE = (
"2/edepot:ST-00015-ST00000126_00001.jpg/?source_file=true&"
EDEPOT_PREFIX + "ST-00015-ST00000126_00001.jpg/?source_file=true&"
)
PRE_WABO_IMG_URL_NO_SCALING = (
"2/edepot:ST-00015-ST00000126_00001.jpg/full/full/0/default.jpg"
EDEPOT_PREFIX + "ST-00015-ST00000126_00001.jpg/full/full/0/default.jpg"
)

PRE_WABO_IMG_URL_WITH_EXTRA_DOSSIER_DIGIT = (
EDEPOT_PREFIX + "SA-100732-SA00509506_00003.jpg/info.json"
)
PRE_WABO_IMG_URL_WITH_EXTRA_REFERENCE = (
"2/edepot:SQ1452-SQ-01452%20(2)-SQ10079651_00001.jpg/full/full/0/default.jpg"
EDEPOT_PREFIX + "SQ1452-SQ-01452%20(2)-SQ10079651_00001.jpg/full/full/0/default.jpg"
)

WABO_IMG_URL = "2/wabo:SDZ-38657-4900487_628547/full/1000,900/0/default.jpg"
WABO_IMG_URL = WABO_PREFIX + "SDZ-38657-4900487_628547/full/1000,900/0/default.jpg"

with open("test-images/test-image-96x85.jpg", "rb") as file:
IMAGE_BINARY_DATA = file.read()
Expand Down
35 changes: 35 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from main.utils import ImmediateHttpResponse
from tests.test_settings import (
PRE_WABO_IMG_URL_NO_SCALING,
PRE_WABO_IMG_URL_WITH_EXTRA_DOSSIER_DIGIT,
PRE_WABO_IMG_URL_WITH_EXTRA_REFERENCE,
PRE_WABO_IMG_URL_WITH_REGION,
PRE_WABO_IMG_URL_WITH_SCALING,
Expand Down Expand Up @@ -49,6 +50,40 @@ def test_get_info_json_from_pre_wabo_url(self):
assert url_info["formatting"] is None
assert url_info["info_json"] is True

def test_get_info_json_from_pre_wabo_url_with_extra_digit(self):
"""2/edepot:SA-100732-SA00509506_00003.jpg/"""
url_info = get_info_from_iiif_url(
PRE_WABO_IMG_URL_WITH_EXTRA_DOSSIER_DIGIT, False
)
assert url_info["source"] == "edepot"
assert url_info["stadsdeel"] == "SA"
assert url_info["dossier"] == "100732"
assert url_info["document_barcode"] == "SA00509506"
assert url_info["file"] == "00003"
assert url_info["region"] is None
assert url_info["scaling"] is None
assert url_info["source_filename"] == "SA/100732/SA00509506_00003.jpg"
assert url_info["filename"] == "SA-100732-SA00509506_00003.jpg"
assert url_info["formatting"] is None
assert url_info["info_json"] is True

def test_get_info_json_from_pre_wabo_url_with_extra_reference(self):
"""2/edepot:SA-100732-SA00509506_00003.jpg/"""
url_info = get_info_from_iiif_url(PRE_WABO_IMG_URL_WITH_EXTRA_REFERENCE, False)
assert url_info["source"] == "edepot"
assert url_info["stadsdeel"] == "SQ"
assert url_info["dossier"] == "01452"
assert url_info["document_barcode"] == "SQ10079651"
assert url_info["file"] == "00001"
assert url_info["region"] == "full"
assert url_info["scaling"] == "full"
assert (
url_info["source_filename"] == "SQ1452/SQ/01452%20(2)-SQ10079651_00001.jpg"
)
assert url_info["filename"] == "SQ1452-SQ-01452%20(2)-SQ10079651_00001.jpg"
assert url_info["formatting"] == "full/full/0/default.jpg"
assert url_info["info_json"] is False

def test_get_info_from_pre_wabo_url_vanilla(self):
url_info = get_info_from_iiif_url(PRE_WABO_IMG_URL_WITH_SCALING, False)
assert url_info["source"] == "edepot"
Expand Down

0 comments on commit 4ba7e0f

Please sign in to comment.