diff --git a/common.py b/common.py
index f29815f..1aeec7a 100644
--- a/common.py
+++ b/common.py
@@ -1,65 +1,5 @@
 import csv
-import logging
-import os
-import re
-import sys
-import tempfile
-from datetime import datetime
-from enum import Enum
 from typing import IO
-from urllib.parse import urlparse
-from tqdm import tqdm
-
-import boto3
-import requests
-import validators
-from botocore.config import Config
-from mypy_boto3_s3.service_resource import S3ServiceResource
-from requests.adapters import HTTPAdapter
-from urllib3.util import Retry
-from botocore.exceptions import ClientError
-
-from constants import (
-    AUTHORIZATION_HEADER,
-    CONTENT_DM_ISSHOWNAT_REGEX,
-    CONTENTDM_IIIF_INFO,
-    CONTENTDM_IIIF_MANIFEST_JSON,
-    DATA_PROVIDER_FIELD_NAME,
-    DPLA_API_DOCS,
-    DPLA_API_URL_BASE,
-    DPLA_PARTNERS,
-    EDM_AGENT_NAME,
-    HTTP_REQUEST_HEADERS,
-    IIIF_BODY,
-    IIIF_CANVASES,
-    IIIF_DEFAULT_JPG_SUFFIX,
-    IIIF_FULL_RES_JPG_SUFFIX,
-    IIIF_ID,
-    IIIF_IMAGES,
-    IIIF_ITEMS,
-    IIIF_MANIFEST_FIELD_NAME,
-    IIIF_PRESENTATION_API_MANIFEST_V2,
-    IIIF_PRESENTATION_API_MANIFEST_V3,
-    IIIF_RESOURCE,
-    IIIF_SEQUENCES,
-    INSTITUTIONS_FIELD_NAME,
-    INSTITUTIONS_URL,
-    JSON_LD_AT_CONTEXT,
-    JSON_LD_AT_ID,
-    LOGS_DIR_BASE,
-    MEDIA_MASTER_FIELD_NAME,
-    PROVIDER_FIELD_NAME,
-    RIGHTS_CATEGORY_FIELD_NAME,
-    S3_RETRIES,
-    UNLIMITED_RE_USE,
-    UPLOAD_FIELD_NAME,
-    WIKIDATA_FIELD_NAME,
-    S3_BUCKET,
-    EDM_IS_SHOWN_AT,
-)
-
-__http_session: requests.Session | None = None
-__temp_dir: tempfile.TemporaryDirectory | None = None
 
 
 def load_ids(ids_file: IO) -> list[str]:
@@ -70,30 +10,6 @@ def load_ids(ids_file: IO) -> list[str]:
     return dpla_ids
 
 
-def get_http_session() -> requests.Session:
-    global __http_session
-    if __http_session is not None:
-        return __http_session
-    retry_strategy = Retry(
-        connect=3,
-        read=3,
-        redirect=5,
-        status=5,
-        other=5,
-        backoff_factor=1,
-        status_forcelist=[429, 500, 502, 503, 504],
-        allowed_methods=["HEAD", "GET", "OPTIONS"],
-        respect_retry_after_header=True,
-        raise_on_status=True,
-        raise_on_redirect=True,
-    )
-    adapter = HTTPAdapter(max_retries=retry_strategy)
-    __http_session = requests.Session()
-    __http_session.mount("https://", adapter)
-    __http_session.mount("http://", adapter)
-    return __http_session
-
-
 def null_safe[T](data: dict, field_name: str, identity_element: T) -> T:
     if data is not None:
         return data.get(field_name, identity_element)
@@ -114,319 +30,3 @@ def get_str(data: dict, field_name: str) -> str:
 def get_dict(data: dict, field_name: str) -> dict:
     """Null safe shortcut for getting a dict from a dict."""
     return null_safe(data, field_name, {})
-
-
-def check_partner(partner: str) -> None:
-    if partner not in DPLA_PARTNERS:
-        sys.exit("Unrecognized partner.")
-
-
-def get_item_metadata(dpla_id: str, api_key: str) -> dict:
-    url = DPLA_API_URL_BASE + dpla_id
-    headers = {AUTHORIZATION_HEADER: api_key}
-    response = get_http_session().get(url, headers=headers)
-    response_json = response.json()
-    return response_json.get(DPLA_API_DOCS)[0]
-
-
-def extract_urls(item_metadata: dict) -> list[str]:
-    if MEDIA_MASTER_FIELD_NAME in item_metadata:
-        return get_list(item_metadata, MEDIA_MASTER_FIELD_NAME)
-
-    elif IIIF_MANIFEST_FIELD_NAME in item_metadata:
-        return get_iiif_urls(get_str(item_metadata, IIIF_MANIFEST_FIELD_NAME))
-
-    else:
-        raise NotImplementedError(
-            f"No {MEDIA_MASTER_FIELD_NAME} or {IIIF_MANIFEST_FIELD_NAME}"
-        )
-
-
-def iiif_v2_urls(iiif: dict) -> list[str]:
-    """
-    Extracts image URLs from a v2 IIIF manifest and returns them as a list
-    """
-    urls = []
-    sequences = get_list(iiif, IIIF_SEQUENCES)
-    sequence = sequences[0:1] if len(sequences) == 1 else None
-    canvases = get_list(sequence[0], IIIF_CANVASES)
-
-    for canvas in canvases:
-        for image in get_list(canvas, IIIF_IMAGES):
-            resource = get_dict(image, IIIF_RESOURCE)
-            url = get_str(resource, JSON_LD_AT_ID)
-            if url:
-                urls.append(url)
-    return urls
-
-
-def iiif_v3_urls(iiif: dict) -> list[str]:
-    """
-    Extracts image URLs from a v3 IIIF manifest and returns them as a list
-    """
-    urls = []
-    for item in get_list(iiif, IIIF_ITEMS):
-        try:
-            url = get_str(
-                get_dict(item[IIIF_ITEMS][0][IIIF_ITEMS][0], IIIF_BODY), IIIF_ID
-            )
-            # This is a hack to get around that v3 presumes the user supplies the
-            # resolution in the URL
-            if url:
-                # This condition may not be necessary but I'm leaving it in for now
-                # TODO does this end up giving us smaller resources than we want?
-                if url.endswith(IIIF_DEFAULT_JPG_SUFFIX):
-                    urls.append(url)
-                else:
-                    urls.append(url + IIIF_FULL_RES_JPG_SUFFIX)
-        except (IndexError, TypeError, KeyError) as e:
-            logging.warning("Unable to parse IIIF manifest.", e)
-            return []
-    return urls
-
-
-def get_iiif_urls(iiif_presentation_api_url: str) -> list[str]:
-    """
-    Extracts image URLs from IIIF manifest and returns them as a list
-    Currently only supports IIIF v2 and v3
-    """
-    manifest = _get_iiif_manifest(iiif_presentation_api_url)
-    # v2 or v3?
-    if get_str(manifest, JSON_LD_AT_CONTEXT) == IIIF_PRESENTATION_API_MANIFEST_V3:
-        return iiif_v3_urls(manifest)
-    elif get_str(manifest, JSON_LD_AT_CONTEXT) == IIIF_PRESENTATION_API_MANIFEST_V2:
-        return iiif_v2_urls(manifest)
-    else:
-        raise Exception("Unimplemented IIIF version")
-
-
-def _get_iiif_manifest(url: str) -> dict:
-    """
-    :return: parsed JSON
-    """
-    if not validators.url(url):
-        raise Exception(f"Invalid url {url}")
-    try:
-        request = get_http_session().get(url, headers=HTTP_REQUEST_HEADERS)
-        request.raise_for_status()
-        return request.json()
-
-    except Exception as ex:
-        # todo maybe this should return None?
-        raise Exception(f"Error getting IIIF manifest at {url}") from ex
-
-
-def contentdm_iiif_url(is_shown_at: str) -> str | None:
-    """
-    Creates a IIIF presentation API manifest URL from the
-    link to the object in ContentDM
-
-    We want to go from
-    http://www.ohiomemory.org/cdm/ref/collection/p16007coll33/id/126923
-    to
-    http://www.ohiomemory.org/iiif/info/p16007coll33/126923/manifest.json
-
-    """
-    parsed_url = urlparse(is_shown_at)
-    match_result = re.match(CONTENT_DM_ISSHOWNAT_REGEX, parsed_url.path)
-    if not match_result:
-        return None
-    else:
-        return (
-            parsed_url.scheme
-            + "://"
-            + parsed_url.netloc
-            + CONTENTDM_IIIF_INFO
-            + match_result.group(1)
-            + "/"
-            + match_result.group(2)
-            + CONTENTDM_IIIF_MANIFEST_JSON
-        )
-
-
-def get_s3_path(dpla_id: str, ordinal: int, partner: str) -> str:
-    return (
-        f"{partner}/images/{dpla_id[0]}/{dpla_id[1]}/"
-        f"{dpla_id[2]}/{dpla_id[3]}/{dpla_id}/{ordinal}_{dpla_id}"
-    ).strip()
-
-
-def s3_file_exists(path: str, s3: S3ServiceResource):
-    try:
-        s3.Object(S3_BUCKET, path).load()
-        return True
-    except ClientError as e:
-        if e.response["Error"]["Code"] == "404":
-            # The object does not exist.
-            return False
-        else:
-            # Something else has gone wrong.
-            raise
-
-
-def setup_temp_dir() -> None:
-    global __temp_dir
-    if __temp_dir is None:
-        __temp_dir = tempfile.TemporaryDirectory(
-            "tmp", "wiki", dir=".", ignore_cleanup_errors=True, delete=False
-        )
-
-
-def cleanup_temp_dir() -> None:
-    global __temp_dir
-    if __temp_dir is not None:
-        __temp_dir.cleanup()
-
-
-def get_temp_file():
-    global __temp_dir
-    if __temp_dir is None:
-        raise Exception("Temp dir not initialized.")
-    return tempfile.NamedTemporaryFile(delete=False, dir=__temp_dir.name)
-
-
-def clean_up_tmp_file(temp_file) -> None:
-    try:
-        if temp_file:
-            os.unlink(temp_file.name)
-    except Exception as e:
-        logging.warning("Temp file unlink failed.", exc_info=e)
-
-
-def get_s3() -> S3ServiceResource:
-    config = Config(
-        signature_version="s3v4",
-        max_pool_connections=25,
-        retries={"max_attempts": S3_RETRIES},
-    )
-
-    return boto3.resource("s3", config=config)
-
-
-class TqdmLoggingHandler(logging.Handler):
-    def __init__(self, level=logging.NOTSET):
-        super().__init__(level)
-
-    def emit(self, record):
-        try:
-            msg = self.format(record)
-            tqdm.write(msg)
-            self.flush()
-        except Exception:
-            self.handleError(record)
-
-
-def setup_logging(partner: str, event_type: str, level: int = logging.INFO) -> None:
-    os.makedirs(LOGS_DIR_BASE, exist_ok=True)
-    time_str = datetime.now().strftime("%Y%m%d-%H%M%S")
-    log_file_name = f"{time_str}-{partner}-{event_type}.log"
-    filename = f"{LOGS_DIR_BASE}/{log_file_name}"
-    logging.basicConfig(
-        level=level,
-        datefmt="%H:%M:%S",
-        handlers=[
-            TqdmLoggingHandler(),
-            logging.FileHandler(filename=filename, mode="w"),
-        ],
-        format="[%(levelname)s] " "%(asctime)s: " "%(message)s",
-    )
-    logging.info(f"Logging to {filename}.")
-    for d in logging.Logger.manager.loggerDict:
-        if d.startswith("pywiki"):
-            logging.getLogger(d).setLevel(logging.ERROR)
-
-
-Result = Enum("Result", ["DOWNLOADED", "FAILED", "SKIPPED", "UPLOADED", "BYTES"])
-
-
-class Tracker:
-    def __init__(self):
-        self.data = {}
-
-    def increment(self, status: Result, amount=1) -> None:
-        if status not in self.data:
-            self.data[status] = 0
-        self.data[status] = self.data[status] + amount
-
-    def count(self, status: Result) -> int:
-        if status not in self.data:
-            return 0
-        else:
-            return self.data[status]
-
-    def __str__(self) -> str:
-        result = "COUNTS:\n"
-        for key in self.data:
-            value = self.data[key]
-            result += f"{key.name}: {value}\n"
-        return result
-
-
-def is_wiki_eligible(item_metadata: dict, provider: dict, data_provider: dict) -> bool:
-    provider_ok = null_safe(provider, UPLOAD_FIELD_NAME, False) or null_safe(
-        data_provider, UPLOAD_FIELD_NAME, False
-    )
-
-    rights_category_ok = (
-        get_str(item_metadata, RIGHTS_CATEGORY_FIELD_NAME) == UNLIMITED_RE_USE
-    )
-
-    is_shown_at = get_str(item_metadata, EDM_IS_SHOWN_AT)
-    media_master = len(get_list(item_metadata, MEDIA_MASTER_FIELD_NAME)) > 0
-    iiif_manifest = null_safe(item_metadata, IIIF_MANIFEST_FIELD_NAME, False)
-
-    if not iiif_manifest and not media_master:
-        iiif_url = contentdm_iiif_url(is_shown_at)
-        if iiif_url is not None:
-            response = get_http_session().head(iiif_url, allow_redirects=True)
-            if response.status_code < 400:
-                item_metadata[IIIF_MANIFEST_FIELD_NAME] = iiif_url
-                iiif_manifest = True
-
-    asset_ok = media_master or iiif_manifest
-
-    # todo create banlist. item based? sha based? local id based? all three?
-    # todo don't reupload if deleted
-
-    id_ok = True
-
-    logging.info(
-        f"Rights: {rights_category_ok}, Asset: {asset_ok}, Provider: {provider_ok}, ID: {id_ok}"
-    )
-
-    return rights_category_ok and asset_ok and provider_ok and id_ok
-
-
-def get_provider_and_data_provider(
-    item_metadata: dict, providers_json: dict
-) -> tuple[dict, dict]:
-    """
-    Loads metadata about the provider and data provider from the providers json file.
-    """
-
-    provider_name = get_str(
-        get_dict(item_metadata, PROVIDER_FIELD_NAME), EDM_AGENT_NAME
-    )
-    data_provider_name = get_str(
-        get_dict(item_metadata, DATA_PROVIDER_FIELD_NAME), EDM_AGENT_NAME
-    )
-    provider = get_dict(providers_json, provider_name)
-    data_provider = get_dict(
-        get_dict(provider, INSTITUTIONS_FIELD_NAME), data_provider_name
-    )
-    return provider, data_provider
-
-
-def get_providers_data() -> dict:
-    """Loads the institutions file from ingestion3 in GitHub."""
-    return get_http_session().get(INSTITUTIONS_URL).json()
-
-
-def provider_str(provider: dict) -> str:
-    if provider is None:
-        return "Provider: None"
-    else:
-        return (
-            f"Provider: {provider.get(WIKIDATA_FIELD_NAME, "")}, "
-            f"{provider.get(UPLOAD_FIELD_NAME, "")}"
-        )
diff --git a/constants.py b/constants.py
deleted file mode 100644
index 49efe33..0000000
--- a/constants.py
+++ /dev/null
@@ -1,167 +0,0 @@
-DPLA_PARTNERS = [
-    "bpl",
-    "georgia",
-    "il",
-    "indiana",
-    "nara",
-    "northwest-heritage",
-    "ohio",
-    "p2p",
-    "pa",
-    "texas",
-    "minnesota",
-]
-
-LOGS_DIR_BASE = "./logs"
-
-# For temporarily storing local downloads.
-TMP_DIR_BASE = "./tmp"
-
-# Wikimedia constants
-WIKIDATA_URL_BASE = "http://www.wikidata.org/entity/"
-COMMONS_URL_PREFIX = "https://commons.wikimedia.org/wiki/File:"
-ERROR_FILEEXISTS = "fileexists-shared-forbidden"
-ERROR_MIME = "filetype-badmime"
-ERROR_BANNED = "filetype-banned"
-ERROR_DUPLICATE = "duplicate"
-ERROR_NOCHANGE = "no-change"
-COMMONS_SITE_NAME = "commons"
-WMC_UPLOAD_CHUNK_SIZE = 20_000_000  # 20 MB
-VALUE_JOIN_DELIMITER = "; "
-RESERVED_WIKITEXT_STRINGS = ["|", "=", "[[", "]]", "{{", "}}", "''"]
-
-
-# This list exists mainly to exclude 'duplicate' records/images from being uploaded
-# Full list of warnings:
-# https://doc.wikimedia.org/pywikibot/master/_modules/pywikibot/site/_upload.html
-IGNORE_WIKIMEDIA_WARNINGS = [
-    # Target filename has a bad prefix {msg}.
-    "bad-prefix",
-    # Target filename is invalid.
-    "badfilename",
-    # The file is a duplicate of a deleted file {msg}.
-    "duplicate-archive",
-    # The upload is an exact duplicate of older version(s) of this file
-    "duplicate-version",
-    # File {msg} is empty.
-    "empty-file",
-    # File [Page] {msg} already exists
-    "exists",
-    # File exists with different extension as {msg}.
-    "exists-normalized",
-    # File {msg} type is unwanted type.
-    "filetype-unwanted-type",
-    # Target filename exists but with a different file {msg}
-    "page-exists",
-    # The file {msg} was previously deleted.
-    "was-deleted",
-    # Not ignored:
-    # Uploaded file is a duplicate of {msg}
-    # 'duplicate',
-    # The upload is an exact duplicate of the current version  of this file
-    # 'no-change',
-]
-INVALID_CONTENT_TYPES = [
-    "text/html",
-    "application/json",
-    "application/xml",
-    "text/plain",
-]
-
-# API documentation: https://www.mediawiki.org/wiki/API:Allimages
-FIND_BY_HASH_URL_PREFIX: str = (
-    "https://commons.wikimedia.org/w/api.php?action=query&format=json"
-    "&list=allimages&aisha1="
-)
-
-FIND_BY_HASH_QUERY_FIELD_NAME = "query"
-FIND_BY_HASH_ALLIMAGES_FIELD_NAME = "allimages"
-
-# API documentation: https://www.mediawiki.org/wiki/API:Imageinfo
-FIND_BY_TITLE_URL_PREFIX: str = (
-    "https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=imageinfo"
-    "&iiprop=sha1&titles="
-)
-
-# rights statements
-RIGHTS_STATEMENTS_URL_BASE = "http://rightsstatements.org"
-CC_URL_BASE = "http://creativecommons.org"
-CC_URL_REGEX = "^http://creativecommons.org/licenses/(.*)"
-RS_NKC_URL_BASE = RIGHTS_STATEMENTS_URL_BASE + "/vocab/NKC/"
-RS_NKC_TEMPLATE = "NKC"
-RS_NOC_URL_BASE = RIGHTS_STATEMENTS_URL_BASE + "/vocab/NoC-US/"
-NOC_US_TEMPLATE = "NoC-US"
-CC_PD_URL_BASE = CC_URL_BASE + "/publicdomain/mark/"
-PD_US_TEMPLATE = "PD-US"
-CC_ZERO_URL_BASE = CC_URL_BASE + "/publicdomain/zero/"
-CC_ZERO_TEMPLATE = "cc-zero"
-CC_BY_URL_BASE = CC_URL_BASE + "/licenses/by/"
-CC_BY_SA_URL_BASE = CC_URL_BASE + "/licenses/by-sa/"
-
-
-# DPLA API
-DPLA_API_URL_BASE = "https://api.dp.la/v2/items/"
-DPLA_API_DOCS = "docs"
-
-# DPLA MAP field names
-SOURCE_RESOURCE_FIELD_NAME = "sourceResource"
-MEDIA_MASTER_FIELD_NAME = "mediaMaster"
-IIIF_MANIFEST_FIELD_NAME = "iiifManifest"
-PROVIDER_FIELD_NAME = "provider"
-DATA_PROVIDER_FIELD_NAME = "dataProvider"
-EXACT_MATCH_FIELD_NAME = "exactMatch"
-EDM_AGENT_NAME = "name"
-EDM_IS_SHOWN_AT = "isShownAt"
-RIGHTS_CATEGORY_FIELD_NAME = "rightsCategory"
-EDM_RIGHTS_FIELD_NAME = "rights"
-EDM_TIMESPAN_PREF_LABEL = "prefLabel"
-UNLIMITED_RE_USE = "Unlimited Re-Use"
-DC_CREATOR_FIELD_NAME = "creator"
-DC_DATE_FIELD_NAME = "date"
-DC_DESCRIPTION_FIELD_NAME = "description"
-DC_TITLE_FIELD_NAME = "title"
-DC_IDENTIFIER_FIELD_NAME = "identifier"
-
-# Institutions file constants
-INSTITUTIONS_URL = (
-    "https://raw.githubusercontent.com/dpla/ingestion3"
-    "/refs/heads/develop/src/main/resources/wiki/institutions_v2.json"
-)
-UPLOAD_FIELD_NAME = "upload"
-INSTITUTIONS_FIELD_NAME = "institutions"
-WIKIDATA_FIELD_NAME = "Wikidata"
-
-# AWS constants
-S3_RETRIES = 3
-S3_BUCKET = "dpla-mdpdb"  # TODO change for prod
-# we use sha1 because that's what commons uses for identifying files
-S3_KEY_CHECKSUM = "sha1"
-S3_KEY_METADATA = "Metadata"
-S3_KEY_CONTENT_TYPE = "ContentType"
-
-
-# http
-HTTP_REQUEST_HEADERS = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
-            (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
-}
-AUTHORIZATION_HEADER = "Authorization"
-
-# IIIF
-JSON_LD_AT_CONTEXT = "@context"
-JSON_LD_AT_ID = "@id"
-IIIF_DEFAULT_JPG_SUFFIX = "default.jpg"
-IIIF_ID = "id"
-IIIF_BODY = "body"
-IIIF_ITEMS = "items"
-IIIF_RESOURCE = "resource"
-IIIF_IMAGES = "images"
-IIIF_CANVASES = "canvases"
-IIIF_SEQUENCES = "sequences"
-IIIF_FULL_RES_JPG_SUFFIX = "/full/full/0/default.jpg"
-IIIF_PRESENTATION_API_MANIFEST_V2 = "http://iiif.io/api/presentation/2/context.json"
-IIIF_PRESENTATION_API_MANIFEST_V3 = "http://iiif.io/api/presentation/3/context.json"
-
-CONTENTDM_IIIF_MANIFEST_JSON = "/manifest.json"
-CONTENTDM_IIIF_INFO = "/iiif/info/"
-CONTENT_DM_ISSHOWNAT_REGEX = r"^/cdm/ref/collection/(.*)/id/(.*)$"  # todo
diff --git a/downloader.py b/downloader.py
index 9530456..c8932d8 100644
--- a/downloader.py
+++ b/downloader.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import time
+from web import get_http_session
 from typing import IO
 
 import click
@@ -11,32 +12,30 @@
 from tqdm import tqdm
 
 from common import (
-    Result,
-    Tracker,
+    load_ids,
+)
+from wikimedia import INVALID_CONTENT_TYPES
+from dpla import (
     check_partner,
-    cleanup_temp_dir,
-    extract_urls,
-    get_http_session,
     get_item_metadata,
     get_provider_and_data_provider,
     get_providers_data,
-    get_s3,
-    get_s3_path,
-    get_temp_file,
     is_wiki_eligible,
     provider_str,
-    s3_file_exists,
-    setup_logging,
-    setup_temp_dir,
-    load_ids,
+    extract_urls,
 )
-from constants import (
-    INVALID_CONTENT_TYPES,
+from logs import setup_logging
+from s3 import (
+    get_s3,
+    get_s3_path,
+    s3_file_exists,
     S3_BUCKET,
     S3_KEY_CHECKSUM,
-    S3_KEY_CONTENT_TYPE,
     S3_KEY_METADATA,
+    S3_KEY_CONTENT_TYPE,
 )
+from temp import cleanup_temp_dir, get_temp_file, setup_temp_dir
+from tracker import Result, Tracker
 
 
 def download_media(
@@ -101,6 +100,7 @@ def upload_temp_file(
                 unit="B",
                 unit_divisor=1024,
                 unit_scale=True,
+                delay=2,
             ) as t:
                 obj.upload_fileobj(
                     Fileobj=file,
@@ -140,6 +140,7 @@ def download_file_to_temp_path(media_url: str):
             unit="B",
             unit_divisor=1024,
             unit_scale=True,
+            delay=2,
         ) as t:
             with open(temp_file.name, "wb") as f:
                 for chunk in response.iter_content(None):
@@ -184,7 +185,7 @@ def main(
 
         dpla_ids = load_ids(ids_file)
 
-        for dpla_id in tqdm(dpla_ids, desc="Downloading Items", unit=" Items"):
+        for dpla_id in tqdm(dpla_ids, desc="Downloading Items", unit="Item"):
             logging.info(f"DPLA ID: {dpla_id}")
             try:
                 item_metadata = get_item_metadata(dpla_id, api_key)
@@ -212,7 +213,7 @@ def main(
                 logging.info(f"Data Provider: {provider_str(data_provider)}")
 
             for media_url in tqdm(
-                media_urls, desc="Downloading Files", leave=False, unit=" Files"
+                media_urls, desc="Downloading Files", leave=False, unit="File"
             ):
                 count += 1
                 # hack to fix bad nara data
diff --git a/dpla.py b/dpla.py
new file mode 100644
index 0000000..4b08712
--- /dev/null
+++ b/dpla.py
@@ -0,0 +1,265 @@
+import logging
+import re
+import sys
+from urllib.parse import urlparse
+
+import validators
+
+from common import null_safe, get_str, get_list, get_dict
+from web import get_http_session, HTTP_REQUEST_HEADERS
+
+
+def check_partner(partner: str) -> None:
+    if partner not in DPLA_PARTNERS:
+        sys.exit("Unrecognized partner.")
+
+
+def get_item_metadata(dpla_id: str, api_key: str) -> dict:
+    url = DPLA_API_URL_BASE + dpla_id
+    headers = {AUTHORIZATION_HEADER: api_key}
+    response = get_http_session().get(url, headers=headers)
+    response_json = response.json()
+    return response_json.get(DPLA_API_DOCS)[0]
+
+
+def is_wiki_eligible(item_metadata: dict, provider: dict, data_provider: dict) -> bool:
+    provider_ok = null_safe(provider, UPLOAD_FIELD_NAME, False) or null_safe(
+        data_provider, UPLOAD_FIELD_NAME, False
+    )
+
+    rights_category_ok = (
+        get_str(item_metadata, RIGHTS_CATEGORY_FIELD_NAME) == UNLIMITED_RE_USE
+    )
+
+    is_shown_at = get_str(item_metadata, EDM_IS_SHOWN_AT)
+    media_master = len(get_list(item_metadata, MEDIA_MASTER_FIELD_NAME)) > 0
+    iiif_manifest = null_safe(item_metadata, IIIF_MANIFEST_FIELD_NAME, False)
+
+    if not iiif_manifest and not media_master:
+        iiif_url = contentdm_iiif_url(is_shown_at)
+        if iiif_url is not None:
+            response = get_http_session().head(iiif_url, allow_redirects=True)
+            if response.status_code < 400:
+                item_metadata[IIIF_MANIFEST_FIELD_NAME] = iiif_url
+                iiif_manifest = True
+
+    asset_ok = media_master or iiif_manifest
+
+    # todo create banlist. item based? sha based? local id based? all three?
+    # todo don't reupload if deleted
+
+    id_ok = True
+
+    logging.info(
+        f"Rights: {rights_category_ok}, Asset: {asset_ok}, Provider: {provider_ok}, ID: {id_ok}"
+    )
+
+    return rights_category_ok and asset_ok and provider_ok and id_ok
+
+
+def get_provider_and_data_provider(
+    item_metadata: dict, providers_json: dict
+) -> tuple[dict, dict]:
+    """
+    Loads metadata about the provider and data provider from the providers json file.
+    """
+
+    provider_name = get_str(
+        get_dict(item_metadata, PROVIDER_FIELD_NAME), EDM_AGENT_NAME
+    )
+    data_provider_name = get_str(
+        get_dict(item_metadata, DATA_PROVIDER_FIELD_NAME), EDM_AGENT_NAME
+    )
+    provider = get_dict(providers_json, provider_name)
+    data_provider = get_dict(
+        get_dict(provider, INSTITUTIONS_FIELD_NAME), data_provider_name
+    )
+    return provider, data_provider
+
+
+def get_providers_data() -> dict:
+    """Loads the institutions file from ingestion3 in GitHub."""
+    return get_http_session().get(INSTITUTIONS_URL).json()
+
+
+def provider_str(provider: dict) -> str:
+    if provider is None:
+        return "Provider: None"
+    else:
+        return (
+            f"Provider: {provider.get(WIKIDATA_FIELD_NAME, "")}, "
+            f"{provider.get(UPLOAD_FIELD_NAME, "")}"
+        )
+
+
+def extract_urls(item_metadata: dict) -> list[str]:
+    if MEDIA_MASTER_FIELD_NAME in item_metadata:
+        return get_list(item_metadata, MEDIA_MASTER_FIELD_NAME)
+
+    elif IIIF_MANIFEST_FIELD_NAME in item_metadata:
+        return get_iiif_urls(get_str(item_metadata, IIIF_MANIFEST_FIELD_NAME))
+
+    else:
+        raise NotImplementedError(
+            f"No {MEDIA_MASTER_FIELD_NAME} or {IIIF_MANIFEST_FIELD_NAME}"
+        )
+
+
+def iiif_v2_urls(iiif: dict) -> list[str]:
+    """
+    Extracts image URLs from a v2 IIIF manifest and returns them as a list
+    """
+    urls = []
+    sequences = get_list(iiif, IIIF_SEQUENCES)
+    sequence = sequences[0:1] if len(sequences) == 1 else None
+    canvases = get_list(sequence[0], IIIF_CANVASES)
+
+    for canvas in canvases:
+        for image in get_list(canvas, IIIF_IMAGES):
+            resource = get_dict(image, IIIF_RESOURCE)
+            url = get_str(resource, JSON_LD_AT_ID)
+            if url:
+                urls.append(url)
+    return urls
+
+
+def iiif_v3_urls(iiif: dict) -> list[str]:
+    """
+    Extracts image URLs from a v3 IIIF manifest and returns them as a list
+    """
+    urls = []
+    for item in get_list(iiif, IIIF_ITEMS):
+        try:
+            url = get_str(
+                get_dict(item[IIIF_ITEMS][0][IIIF_ITEMS][0], IIIF_BODY), IIIF_ID
+            )
+            # This is a hack to get around that v3 presumes the user supplies the
+            # resolution in the URL
+            if url:
+                # This condition may not be necessary but I'm leaving it in for now
+                # TODO does this end up giving us smaller resources than we want?
+                if url.endswith(IIIF_DEFAULT_JPG_SUFFIX):
+                    urls.append(url)
+                else:
+                    urls.append(url + IIIF_FULL_RES_JPG_SUFFIX)
+        except (IndexError, TypeError, KeyError) as e:
+            logging.warning("Unable to parse IIIF manifest.", e)
+            return []
+    return urls
+
+
+def get_iiif_urls(iiif_presentation_api_url: str) -> list[str]:
+    """
+    Extracts image URLs from IIIF manifest and returns them as a list
+    Currently only supports IIIF v2 and v3
+    """
+    manifest = _get_iiif_manifest(iiif_presentation_api_url)
+    # v2 or v3?
+    if get_str(manifest, JSON_LD_AT_CONTEXT) == IIIF_PRESENTATION_API_MANIFEST_V3:
+        return iiif_v3_urls(manifest)
+    elif get_str(manifest, JSON_LD_AT_CONTEXT) == IIIF_PRESENTATION_API_MANIFEST_V2:
+        return iiif_v2_urls(manifest)
+    else:
+        raise Exception("Unimplemented IIIF version")
+
+
+def _get_iiif_manifest(url: str) -> dict:
+    """
+    :return: parsed JSON
+    """
+    if not validators.url(url):
+        raise Exception(f"Invalid url {url}")
+    try:
+        request = get_http_session().get(url, headers=HTTP_REQUEST_HEADERS)
+        request.raise_for_status()
+        return request.json()
+
+    except Exception as ex:
+        # todo maybe this should return None?
+        raise Exception(f"Error getting IIIF manifest at {url}") from ex
+
+
+def contentdm_iiif_url(is_shown_at: str) -> str | None:
+    """
+    Creates a IIIF presentation API manifest URL from the
+    link to the object in ContentDM
+
+    We want to go from
+    http://www.ohiomemory.org/cdm/ref/collection/p16007coll33/id/126923
+    to
+    http://www.ohiomemory.org/iiif/info/p16007coll33/126923/manifest.json
+
+    """
+    parsed_url = urlparse(is_shown_at)
+    match_result = re.match(CONTENT_DM_ISSHOWNAT_REGEX, parsed_url.path)
+    if not match_result:
+        return None
+    else:
+        return (
+            parsed_url.scheme
+            + "://"
+            + parsed_url.netloc
+            + CONTENTDM_IIIF_INFO
+            + match_result.group(1)
+            + "/"
+            + match_result.group(2)
+            + CONTENTDM_IIIF_MANIFEST_JSON
+        )
+
+
+DPLA_API_URL_BASE = "https://api.dp.la/v2/items/"
+DPLA_API_DOCS = "docs"
+INSTITUTIONS_URL = (
+    "https://raw.githubusercontent.com/dpla/ingestion3"
+    "/refs/heads/develop/src/main/resources/wiki/institutions_v2.json"
+)
+UPLOAD_FIELD_NAME = "upload"
+INSTITUTIONS_FIELD_NAME = "institutions"
+SOURCE_RESOURCE_FIELD_NAME = "sourceResource"
+MEDIA_MASTER_FIELD_NAME = "mediaMaster"
+IIIF_MANIFEST_FIELD_NAME = "iiifManifest"
+PROVIDER_FIELD_NAME = "provider"
+DATA_PROVIDER_FIELD_NAME = "dataProvider"
+EXACT_MATCH_FIELD_NAME = "exactMatch"
+EDM_AGENT_NAME = "name"
+EDM_IS_SHOWN_AT = "isShownAt"
+RIGHTS_CATEGORY_FIELD_NAME = "rightsCategory"
+EDM_RIGHTS_FIELD_NAME = "rights"
+EDM_TIMESPAN_PREF_LABEL = "prefLabel"
+UNLIMITED_RE_USE = "Unlimited Re-Use"
+DC_CREATOR_FIELD_NAME = "creator"
+DC_DATE_FIELD_NAME = "date"
+DC_DESCRIPTION_FIELD_NAME = "description"
+DC_TITLE_FIELD_NAME = "title"
+DC_IDENTIFIER_FIELD_NAME = "identifier"
+WIKIDATA_FIELD_NAME = "Wikidata"
+AUTHORIZATION_HEADER = "Authorization"
+JSON_LD_AT_CONTEXT = "@context"
+JSON_LD_AT_ID = "@id"
+IIIF_DEFAULT_JPG_SUFFIX = "default.jpg"
+IIIF_ID = "id"
+IIIF_BODY = "body"
+IIIF_ITEMS = "items"
+IIIF_RESOURCE = "resource"
+IIIF_IMAGES = "images"
+IIIF_CANVASES = "canvases"
+IIIF_SEQUENCES = "sequences"
+IIIF_FULL_RES_JPG_SUFFIX = "/full/full/0/default.jpg"
+IIIF_PRESENTATION_API_MANIFEST_V2 = "http://iiif.io/api/presentation/2/context.json"
+IIIF_PRESENTATION_API_MANIFEST_V3 = "http://iiif.io/api/presentation/3/context.json"
+CONTENTDM_IIIF_MANIFEST_JSON = "/manifest.json"
+CONTENTDM_IIIF_INFO = "/iiif/info/"
+CONTENT_DM_ISSHOWNAT_REGEX = r"^/cdm/ref/collection/(.*)/id/(.*)$"  # todo
+DPLA_PARTNERS = [
+    "bpl",
+    "georgia",
+    "il",
+    "indiana",
+    "nara",
+    "northwest-heritage",
+    "ohio",
+    "p2p",
+    "pa",
+    "texas",
+    "minnesota",
+]
diff --git a/logs.py b/logs.py
new file mode 100644
index 0000000..55b15c0
--- /dev/null
+++ b/logs.py
@@ -0,0 +1,46 @@
+import os
+import logging
+from datetime import datetime
+
+from tqdm import tqdm
+
+
+class TqdmLoggingHandler(logging.Handler):
+    """
+    This class redirects logging's console output through tqdm so the progress
+    bars don't get mangled.
+    """
+
+    def __init__(self, level=logging.NOTSET):
+        super().__init__(level)
+
+    def emit(self, record):
+        try:
+            msg = self.format(record)
+            tqdm.write(msg)
+            self.flush()
+        except Exception:
+            self.handleError(record)
+
+
+def setup_logging(partner: str, event_type: str, level: int = logging.INFO) -> None:
+    os.makedirs(LOGS_DIR_BASE, exist_ok=True)
+    time_str = datetime.now().strftime("%Y%m%d-%H%M%S")
+    log_file_name = f"{time_str}-{partner}-{event_type}.log"
+    filename = f"{LOGS_DIR_BASE}/{log_file_name}"
+    logging.basicConfig(
+        level=level,
+        datefmt="%H:%M:%S",
+        handlers=[
+            TqdmLoggingHandler(),
+            logging.FileHandler(filename=filename, mode="w"),
+        ],
+        format="[%(levelname)s] " "%(asctime)s: " "%(message)s",
+    )
+    logging.info(f"Logging to {filename}.")
+    for d in logging.Logger.manager.loggerDict:
+        if d.startswith("pywiki"):
+            logging.getLogger(d).setLevel(logging.ERROR)
+
+
+LOGS_DIR_BASE = "logs"
diff --git a/s3.py b/s3.py
new file mode 100644
index 0000000..3a7dd2a
--- /dev/null
+++ b/s3.py
@@ -0,0 +1,41 @@
+import boto3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+from mypy_boto3_s3 import S3ServiceResource
+
+
+def get_s3_path(dpla_id: str, ordinal: int, partner: str) -> str:
+    return (
+        f"{partner}/images/{dpla_id[0]}/{dpla_id[1]}/"
+        f"{dpla_id[2]}/{dpla_id[3]}/{dpla_id}/{ordinal}_{dpla_id}"
+    ).strip()
+
+
+def s3_file_exists(path: str, s3: S3ServiceResource):
+    try:
+        s3.Object(S3_BUCKET, path).load()
+        return True
+    except ClientError as e:
+        if e.response["Error"]["Code"] == "404":
+            # The object does not exist.
+            return False
+        else:
+            # Something else has gone wrong.
+            raise
+
+
+def get_s3() -> S3ServiceResource:
+    config = Config(
+        signature_version="s3v4",
+        max_pool_connections=25,
+        retries={"max_attempts": S3_RETRIES},
+    )
+
+    return boto3.resource("s3", config=config)
+
+
+S3_RETRIES = 3
+S3_BUCKET = "dpla-mdpdb"  # TODO change for prod
+S3_KEY_CHECKSUM = "sha1"
+S3_KEY_METADATA = "Metadata"
+S3_KEY_CONTENT_TYPE = "ContentType"
diff --git a/temp.py b/temp.py
new file mode 100644
index 0000000..e8bb92a
--- /dev/null
+++ b/temp.py
@@ -0,0 +1,33 @@
+import os
+import tempfile
+
+__temp_dir: tempfile.TemporaryDirectory | None = None
+
+
+def setup_temp_dir() -> None:
+    global __temp_dir
+    if __temp_dir is None:
+        __temp_dir = tempfile.TemporaryDirectory(
+            "tmp", "wiki", dir="", ignore_cleanup_errors=True, delete=False
+        )
+
+
+def cleanup_temp_dir() -> None:
+    global __temp_dir
+    if __temp_dir is not None:
+        __temp_dir.cleanup()
+
+
+def get_temp_file():
+    global __temp_dir
+    if __temp_dir is None:
+        raise Exception("Temp dir not initialized.")
+    return tempfile.NamedTemporaryFile(delete=False, dir=__temp_dir.name)
+
+
+def clean_up_tmp_file(temp_file) -> None:
+    try:
+        if temp_file:
+            os.unlink(temp_file.name)
+    except Exception as e:
+        logging.warning("Temp file unlink failed.", exc_info=e)
diff --git a/tracker.py b/tracker.py
new file mode 100644
index 0000000..9676b70
--- /dev/null
+++ b/tracker.py
@@ -0,0 +1,26 @@
+from enum import Enum
+
+Result = Enum("Result", ["DOWNLOADED", "FAILED", "SKIPPED", "UPLOADED", "BYTES"])
+
+
+class Tracker:
+    def __init__(self):
+        self.data = {}
+
+    def increment(self, status: Result, amount=1) -> None:
+        if status not in self.data:
+            self.data[status] = 0
+        self.data[status] = self.data[status] + amount
+
+    def count(self, status: Result) -> int:
+        if status not in self.data:
+            return 0
+        else:
+            return self.data[status]
+
+    def __str__(self) -> str:
+        result = "COUNTS:\n"
+        for key in self.data:
+            value = self.data[key]
+            result += f"{key.name}: {value}\n"
+        return result
diff --git a/uploader.py b/uploader.py
index 33c2601..0e6bc70 100644
--- a/uploader.py
+++ b/uploader.py
@@ -12,70 +12,69 @@
 from pywikibot.tools.chars import replace_invisible
 
 from common import (
-    get_item_metadata,
-    extract_urls,
-    get_s3_path,
-    get_temp_file,
-    setup_temp_dir,
-    cleanup_temp_dir,
-    get_s3,
-    setup_logging,
-    clean_up_tmp_file,
-    Tracker,
-    Result,
-    is_wiki_eligible,
-    get_provider_and_data_provider,
-    get_providers_data,
-    check_partner,
-    provider_str,
     get_str,
     get_list,
     get_dict,
-    get_http_session,
     load_ids,
 )
-from constants import (
-    COMMONS_SITE_NAME,
-    WMC_UPLOAD_CHUNK_SIZE,
-    IGNORE_WIKIMEDIA_WARNINGS,
-    S3_BUCKET,
-    S3_KEY_CHECKSUM,
-    INVALID_CONTENT_TYPES,
-    WIKIDATA_FIELD_NAME,
-    EDM_RIGHTS_FIELD_NAME,
-    RESERVED_WIKITEXT_STRINGS,
+from logs import setup_logging
+from s3 import get_s3_path, get_s3, S3_BUCKET, S3_KEY_CHECKSUM
+from tracker import Result, Tracker
+from temp import setup_temp_dir, cleanup_temp_dir, get_temp_file, clean_up_tmp_file
+from dpla import (
+    check_partner,
+    get_item_metadata,
+    is_wiki_eligible,
+    get_provider_and_data_provider,
+    get_providers_data,
+    provider_str,
     SOURCE_RESOURCE_FIELD_NAME,
-    VALUE_JOIN_DELIMITER,
+    EDM_IS_SHOWN_AT,
+    EDM_RIGHTS_FIELD_NAME,
+    EDM_TIMESPAN_PREF_LABEL,
     DC_CREATOR_FIELD_NAME,
-    DC_TITLE_FIELD_NAME,
-    DC_DESCRIPTION_FIELD_NAME,
     DC_DATE_FIELD_NAME,
-    EDM_TIMESPAN_PREF_LABEL,
-    EDM_IS_SHOWN_AT,
+    DC_DESCRIPTION_FIELD_NAME,
+    DC_TITLE_FIELD_NAME,
     DC_IDENTIFIER_FIELD_NAME,
-    CC_URL_REGEX,
-    CC_BY_SA_URL_BASE,
-    CC_BY_URL_BASE,
-    CC_ZERO_URL_BASE,
-    CC_PD_URL_BASE,
-    RS_NOC_URL_BASE,
-    RS_NKC_URL_BASE,
-    RS_NKC_TEMPLATE,
-    NOC_US_TEMPLATE,
-    PD_US_TEMPLATE,
-    CC_ZERO_TEMPLATE,
-    RIGHTS_STATEMENTS_URL_BASE,
+    WIKIDATA_FIELD_NAME,
+    extract_urls,
+)
+from web import get_http_session
+from wikimedia import (
+    INVALID_CONTENT_TYPES,
     COMMONS_URL_PREFIX,
-    FIND_BY_HASH_URL_PREFIX,
-    FIND_BY_HASH_QUERY_FIELD_NAME,
-    FIND_BY_HASH_ALLIMAGES_FIELD_NAME,
     ERROR_FILEEXISTS,
     ERROR_MIME,
     ERROR_BANNED,
     ERROR_DUPLICATE,
     ERROR_NOCHANGE,
+    COMMONS_SITE_NAME,
+    WMC_UPLOAD_CHUNK_SIZE,
+    VALUE_JOIN_DELIMITER,
+    RESERVED_WIKITEXT_STRINGS,
+    IGNORE_WIKIMEDIA_WARNINGS,
+    FIND_BY_HASH_URL_PREFIX,
+    FIND_BY_HASH_QUERY_FIELD_NAME,
+    FIND_BY_HASH_ALLIMAGES_FIELD_NAME,
 )
 
+CC_URL_REGEX = "^http://creativecommons.org/licenses/(.*)"
+
+RIGHTS_STATEMENTS_URL_BASE = "http://rightsstatements.org"
+RS_NKC_URL_BASE = RIGHTS_STATEMENTS_URL_BASE + "/vocab/NKC/"
+RS_NOC_URL_BASE = RIGHTS_STATEMENTS_URL_BASE + "/vocab/NoC-US/"
+CC_URL_BASE = "http://creativecommons.org"
+CC_PD_URL_BASE = CC_URL_BASE + "/publicdomain/mark/"
+CC_ZERO_URL_BASE = CC_URL_BASE + "/publicdomain/zero/"
+CC_BY_URL_BASE = CC_URL_BASE + "/licenses/by/"
+CC_BY_SA_URL_BASE = CC_URL_BASE + "/licenses/by-sa/"
+
+CC_ZERO_TEMPLATE = "cc-zero"
+RS_NKC_TEMPLATE = "NKC"
+NOC_US_TEMPLATE = "NoC-US"
+PD_US_TEMPLATE = "PD-US"
+
 
 def get_page(site: pywikibot.Site, title: str) -> FilePage:
     """
@@ -288,7 +287,7 @@ def main(ids_file, partner: str, api_key: str, dry_run: bool, verbose: bool) ->
 
         dpla_ids = load_ids(ids_file)
 
-        for dpla_id in tqdm(dpla_ids, desc="Uploading Items", unit=" Items"):
+        for dpla_id in tqdm(dpla_ids, desc="Uploading Items", unit="Item"):
             logging.info(f"DPLA ID: {dpla_id}")
 
             item_metadata = get_item_metadata(dpla_id, api_key)
@@ -313,7 +312,7 @@ def main(ids_file, partner: str, api_key: str, dry_run: bool, verbose: bool) ->
             # todo manifest of files?
             files = extract_urls(item_metadata)
 
-            for file in tqdm(files, desc="Uploading Files", leave=False, unit=" Files"):
+            for file in tqdm(files, desc="Uploading Files", leave=False, unit="File"):
                 ordinal += 1  # todo if we're walking s3, this comes from the name
                 logging.info(f"Page {ordinal}")
                 # one-pagers don't have page numbers in their titles
@@ -385,6 +384,7 @@ def main(ids_file, partner: str, api_key: str, dry_run: bool, verbose: bool) ->
                             unit="B",
                             unit_scale=1024,
                             unit_divisor=True,
+                            delay=2,
                         ) as t:
                             s3_object.download_file(
                                 temp_file.name,
diff --git a/web.py b/web.py
new file mode 100644
index 0000000..ace8804
--- /dev/null
+++ b/web.py
@@ -0,0 +1,35 @@
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3 import Retry
+
+__http_session: requests.Session | None = None
+
+
+def get_http_session() -> requests.Session:
+    global __http_session
+    if __http_session is not None:
+        return __http_session
+    retry_strategy = Retry(
+        connect=3,
+        read=3,
+        redirect=5,
+        status=5,
+        other=5,
+        backoff_factor=1,
+        status_forcelist=[429, 500, 502, 503, 504],
+        allowed_methods=["HEAD", "GET", "OPTIONS"],
+        respect_retry_after_header=True,
+        raise_on_status=True,
+        raise_on_redirect=True,
+    )
+    adapter = HTTPAdapter(max_retries=retry_strategy)
+    __http_session = requests.Session()
+    __http_session.mount("https://", adapter)
+    __http_session.mount("http://", adapter)
+    return __http_session
+
+
+HTTP_REQUEST_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
+            (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
+}
diff --git a/wikimedia.py b/wikimedia.py
new file mode 100644
index 0000000..b0e72d5
--- /dev/null
+++ b/wikimedia.py
@@ -0,0 +1,49 @@
+INVALID_CONTENT_TYPES = [
+    "text/html",
+    "application/json",
+    "application/xml",
+    "text/plain",
+]
+COMMONS_URL_PREFIX = "https://commons.wikimedia.org/wiki/File:"
+ERROR_FILEEXISTS = "fileexists-shared-forbidden"
+ERROR_MIME = "filetype-badmime"
+ERROR_BANNED = "filetype-banned"
+ERROR_DUPLICATE = "duplicate"
+ERROR_NOCHANGE = "no-change"
+COMMONS_SITE_NAME = "commons"
+WMC_UPLOAD_CHUNK_SIZE = 20_000_000  # 20 MB
+VALUE_JOIN_DELIMITER = "; "
+RESERVED_WIKITEXT_STRINGS = ["|", "=", "[[", "]]", "{{", "}}", "''"]
+IGNORE_WIKIMEDIA_WARNINGS = [
+    # Target filename has a bad prefix {msg}.
+    "bad-prefix",
+    # Target filename is invalid.
+    "badfilename",
+    # The file is a duplicate of a deleted file {msg}.
+    "duplicate-archive",
+    # The upload is an exact duplicate of older version(s) of this file
+    "duplicate-version",
+    # File {msg} is empty.
+    "empty-file",
+    # File [Page] {msg} already exists
+    "exists",
+    # File exists with different extension as {msg}.
+    "exists-normalized",
+    # File {msg} type is unwanted type.
+    "filetype-unwanted-type",
+    # Target filename exists but with a different file {msg}
+    "page-exists",
+    # The file {msg} was previously deleted.
+    "was-deleted",
+    # Not ignored:
+    # Uploaded file is a duplicate of {msg}
+    # 'duplicate',
+    # The upload is an exact duplicate of the current version  of this file
+    # 'no-change',
+]
+FIND_BY_HASH_URL_PREFIX: str = (
+    "https://commons.wikimedia.org/w/api.php?action=query&format=json"
+    "&list=allimages&aisha1="
+)
+FIND_BY_HASH_QUERY_FIELD_NAME = "query"
+FIND_BY_HASH_ALLIMAGES_FIELD_NAME = "allimages"