From 42be47c5679253d103f969e1b1281c11122f6b6e Mon Sep 17 00:00:00 2001 From: Michael Della Bitta Date: Wed, 18 Dec 2024 15:48:23 -0500 Subject: [PATCH 1/2] More tests for local.py --- tests/test_local.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tests/test_local.py b/tests/test_local.py index 8bbfb35..2e31fd0 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -1,5 +1,6 @@ import os import hashlib +from unittest.mock import patch import pytest from ingest_wikimedia.local import ( setup_temp_dir, @@ -16,7 +17,6 @@ def setup_and_teardown_temp_dir(): setup_temp_dir() yield - print("cleanup") cleanup_temp_dir() @@ -52,3 +52,37 @@ def test_get_content_type(tmp_path): test_file = tmp_path / "test_file.txt" test_file.write_bytes(SPACER_GIF) assert get_content_type(str(test_file)) == "image/gif" + + +def test_clean_up_tmp_file_none_does_not_raise(): + clean_up_tmp_file(None) # Should not raise any errors + + +def test_get_file_hash_file_not_found(): + with pytest.raises(FileNotFoundError): + get_file_hash("non_existent_file.txt") + + +def test_get_file_hash_empty_file(tmp_path): + test_file = tmp_path / "empty_file.txt" + test_file.write_text("") + expected_hash = hashlib.sha1(b"").hexdigest() + assert get_file_hash(str(test_file)) == expected_hash + + +@patch("ingest_wikimedia.local.magic.from_file") +def test_get_content_type_magic_called(mock_magic_from_file, tmp_path): + mock_magic_from_file.return_value = "application/octet-stream" + test_file = tmp_path / "test.bin" + test_file.write_bytes(b"some data") + content_type = get_content_type(str(test_file)) + assert content_type == "application/octet-stream" + + +def test_get_content_type_file_not_found(): + with pytest.raises(FileNotFoundError): + get_content_type("non_existent_file.txt") + + +def test_get_bytes_hash_empty_string(): + assert get_bytes_hash("") == "da39a3ee5e6b4b0d3255bfef95601890afd80709" From cfd00c3b144c07619cb611dc1f149259bfed96c1 Mon Sep 17 00:00:00 2001 From: Michael Della Bitta Date: Wed, 18 Dec 2024 15:55:43 -0500 Subject: [PATCH 2/2] Added usedforsecurity=False to sha1 calls --- tests/test_local.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/test_local.py b/tests/test_local.py index 2e31fd0..4c87ab4 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -31,13 +31,17 @@ def test_get_and_cleanup_temp_file(): def test_get_file_hash(tmp_path): test_file = tmp_path / "test_file.txt" test_file.write_text("test content") - expected_hash = hashlib.sha1(test_file.read_bytes()).hexdigest() + expected_hash = hashlib.sha1( + test_file.read_bytes(), usedforsecurity=False + ).hexdigest() assert get_file_hash(str(test_file)) == expected_hash def test_get_bytes_hash(): data = "test content" - expected_hash = hashlib.sha1(data.encode("utf-8")).hexdigest() + expected_hash = hashlib.sha1( + data.encode("utf-8"), usedforsecurity=False + ).hexdigest() assert get_bytes_hash(data) == expected_hash @@ -66,7 +70,7 @@ def test_get_file_hash_file_not_found(): def test_get_file_hash_empty_file(tmp_path): test_file = tmp_path / "empty_file.txt" test_file.write_text("") - expected_hash = hashlib.sha1(b"").hexdigest() + expected_hash = hashlib.sha1(b"", usedforsecurity=False).hexdigest() assert get_file_hash(str(test_file)) == expected_hash