Skip to content

Commit

Permalink
adds sign and nuke (#45)
Browse files Browse the repository at this point in the history
* adds sign and nuke

* fixed early break for signed files

* fixed tqdm unit handling
  • Loading branch information
mdellabitta authored Nov 18, 2024
1 parent d1bf070 commit 40ed627
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 50 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@ build-backend = "hatchling.build"
[project.scripts]
uploader = "tools.uploader:main"
downloader = "tools.downloader:main"

sign = "tools.sign:main"
nuke = "tools.nuke:main"

File renamed without changes.
111 changes: 62 additions & 49 deletions tools/sign.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import sys
import logging
import time

import click

from ingest_wikimedia.logs import setup_logging
from ingest_wikimedia.metadata import check_partner
from ingest_wikimedia.s3 import get_s3
from ingest_wikimedia.common import CHECKSUM
Expand All @@ -14,54 +18,63 @@
from tqdm import tqdm


s3 = get_s3()

setup_temp_dir()
bucket = s3.Bucket("dpla-wikimedia")
provider = sys.argv[1]
check_partner(provider)
@click.command()
@click.argument("partner")
def main(partner: str):
start_time = time.time()
setup_logging(partner, "sign", logging.INFO)
check_partner(partner)
logging.info(f"Starting signing for {partner}")
s3 = get_s3()
setup_temp_dir()
bucket = s3.Bucket("dpla-wikimedia")

try:
for object_summary in tqdm(
bucket.objects.filter(Prefix=f"{provider}/images/").all(),
"Signing files",
unit="File",
):
temp_file = get_temp_file()
temp_file_name = temp_file.name
try:
tqdm.write(object_summary.key)
obj = object_summary.Object()
sha1 = obj.metadata.get(CHECKSUM, "")
if sha1 != "":
pass
try:
for object_summary in tqdm(
bucket.objects.filter(Prefix=f"{partner}/images/").all(),
"Signing files",
unit="File",
):
temp_file = get_temp_file()
temp_file_name = temp_file.name
try:
tqdm.write(object_summary.key)
obj = object_summary.Object()
sha1 = obj.metadata.get(CHECKSUM, "")
if sha1 != "":
continue

with tqdm(
total=obj.content_length,
leave=False,
desc="S3 Download",
unit="B",
unit_scale=1024,
unit_divisor=True,
delay=2,
) as t:
obj.download_file(
temp_file_name,
Callback=lambda bytes_xfer: t.update(bytes_xfer),
with tqdm(
total=obj.content_length,
leave=False,
desc="S3 Download",
unit="B",
unit_divisor=1024,
unit_scale=True,
delay=2,
) as t:
obj.download_file(
temp_file_name,
Callback=lambda bytes_xfer: t.update(bytes_xfer),
)
sha1 = get_file_hash(temp_file_name)
content_type = get_content_type(temp_file_name)
tqdm.write(f"{obj.key} {content_type} {sha1}")
obj.metadata.update({CHECKSUM: sha1})
obj.copy_from(
CopySource={"Bucket": bucket.name, "Key": obj.key},
ContentType=content_type,
Metadata=obj.metadata,
MetadataDirective="REPLACE",
)
sha1 = get_file_hash(temp_file_name)
content_type = get_content_type(temp_file_name)
tqdm.write(f"{obj.key} {content_type} {sha1}")
obj.metadata.update({CHECKSUM: sha1})
obj.copy_from(
CopySource={"Bucket": bucket.name, "Key": obj.key},
ContentType=content_type,
Metadata=obj.metadata,
MetadataDirective="REPLACE",
)
except Exception as e:
tqdm.write(str(e))
finally:
clean_up_tmp_file(temp_file)
finally:
cleanup_temp_dir()
except Exception as e:
tqdm.write(str(e))
finally:
clean_up_tmp_file(temp_file)
finally:
logging.info(f"{time.time() - start_time} seconds.")
cleanup_temp_dir()


if __name__ == "__main__":
main()

0 comments on commit 40ed627

Please sign in to comment.