From 093ab3f7d594c6e2429f290943b438ee5060d7c7 Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Fri, 6 Dec 2024 20:01:42 -0800 Subject: [PATCH] store and retrieve extra hashes --- Cargo.lock | 25 ++++++++++++++++ Cargo.toml | 4 +++ src/database.rs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++- src/model.rs | 12 ++++++-- 4 files changed, 116 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bcdefd7..4c903a8 100755 --- a/Cargo.lock +++ b/Cargo.lock @@ -1935,6 +1935,16 @@ dependencies = [ "rayon", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.4" @@ -2190,6 +2200,7 @@ dependencies = [ "chrono", "clap", "criterion", + "digest", "directories", "dunce", "futures", @@ -2203,11 +2214,14 @@ dependencies = [ "kamadak-exif", "keepawake", "libheif-rs", + "md-5", "ntapi", "rayon", "rusqlite", "rusqlite_migration", "self_cell", + "sha1", + "sha2", "static_assertions", "thiserror 2.0.0", "tokio", @@ -2812,6 +2826,17 @@ dependencies = [ "digest", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index 84ebf48..fffe1e3 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,10 @@ vcpkg = "0.2.15" [dev-dependencies] criterion = { version = "0", features = ["html_reports"] } +digest = "0.10.7" +md-5 = "0.10.6" +sha1 = "0.10.6" +sha2 = "0.10.8" [[bench]] name = "databasebench" diff --git a/src/database.rs b/src/database.rs index 0854c4c..249909c 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,4 +1,5 @@ use crate::model::ContentMetadata; +use crate::model::ExtraHashes; use crate::model::FileInfo; use crate::model::Hash32; use crate::model::ImageMetadata; @@ -87,6 +88,18 @@ INSERT OR REPLACE INTO images VALUES (?, ?, ?, ?, ?) "; +const GET_EXTRA_HASHES: &str = "\ +SELECT md5, sha1, sha256 +FROM extra_hashes +WHERE blake3 = ? +"; + +const ADD_EXTRA_HASHES: &str = "\ +INSERT OR REPLACE INTO extra_hashes +(blake3, md5, sha1, sha256) +VALUES (?, ?, ?, ?) +"; + pub struct CachedStatements<'conn> { begin_tx: Statement<'conn>, commit_tx: Statement<'conn>, @@ -96,6 +109,8 @@ pub struct CachedStatements<'conn> { add_file: Statement<'conn>, get_image: Statement<'conn>, add_image: Statement<'conn>, + get_extra_hashes: Statement<'conn>, + add_extra_hashes: Statement<'conn>, } unsafe impl Send for CachedStatements<'_> {} @@ -111,6 +126,8 @@ fn cache_statements(conn: &Connection) -> CachedStatements<'_> { add_file: conn.prepare(ADD_FILE).unwrap(), get_image: conn.prepare(GET_IMAGE).unwrap(), add_image: conn.prepare(ADD_IMAGE).unwrap(), + get_extra_hashes: conn.prepare(GET_EXTRA_HASHES).unwrap(), + add_extra_hashes: conn.prepare(ADD_EXTRA_HASHES).unwrap(), } } @@ -344,6 +361,39 @@ impl Database { jpegrothash: row.get(3)?, }) } + + pub fn get_extra_hashes(&mut self, blake3: &Hash32) -> anyhow::Result> { + self.with_statement(|stmt| { + Ok(stmt + .get_extra_hashes + .query_row((blake3,), Self::extra_hashes_from_single_row) + .optional()?) + }) + } + + fn extra_hashes_from_single_row(row: &rusqlite::Row) -> rusqlite::Result { + Ok(ExtraHashes { + md5: row.get(0)?, + sha1: row.get(1)?, + sha256: row.get(2)?, + }) + } + + pub fn add_extra_hashes( + &mut self, + blake3: &Hash32, + extra_hashes: &ExtraHashes, + ) -> anyhow::Result<()> { + self.with_statement(|stmt| { + stmt.add_extra_hashes.execute(( + &blake3, + &extra_hashes.md5, + &extra_hashes.sha1, + &extra_hashes.sha256, + ))?; + Ok(()) + }) + } } pub fn get_database_path() -> anyhow::Result { @@ -456,7 +506,33 @@ mod tests { let im = ImageMetadata::invalid(); db.add_image_metadata(&blake3, &im)?; - assert_eq!(true, db.get_image_metadata(&blake3)?.unwrap().is_invalid()); + assert!(db.get_image_metadata(&blake3)?.unwrap().is_invalid()); + + Ok(()) + } + + #[test] + fn record_and_retrieve_empty_hashes() -> anyhow::Result<()> { + use digest::Digest; + + let mut db = Database::open_memory()?; + let data = b"hello world"; + let b3_storage = blake3::hash(data); + let b3 = b3_storage.as_bytes(); + + assert_eq!(None, db.get_extra_hashes(b3)?); + + db.add_extra_hashes(b3, &Default::default())?; + assert_eq!(Some(Default::default()), db.get_extra_hashes(b3)?); + + let extra_hashes = ExtraHashes { + md5: Some(md5::Md5::digest(data).into()), + sha1: Some(sha1::Sha1::digest(data).into()), + sha256: Some(sha2::Sha256::digest(data).into()), + }; + + db.add_extra_hashes(b3, &extra_hashes)?; + assert_eq!(Some(extra_hashes), db.get_extra_hashes(b3)?); Ok(()) } diff --git a/src/model.rs b/src/model.rs index 9d0e64d..12e233a 100644 --- a/src/model.rs +++ b/src/model.rs @@ -5,8 +5,9 @@ use std::os::unix::fs::MetadataExt; use std::time::SystemTime; pub type Hash = [u8; N]; -pub type Hash20 = [u8; 20]; -pub type Hash32 = [u8; 32]; +pub type Hash16 = Hash<16>; +pub type Hash20 = Hash<20>; +pub type Hash32 = Hash<32>; /// Only support unicode filenames for efficient conversion into and /// out of SQLite. @@ -98,3 +99,10 @@ impl ImageMetadata { self.dimensions.is_none() } } + +#[derive(Debug, Default, PartialEq)] +pub struct ExtraHashes { + pub md5: Option, + pub sha1: Option, + pub sha256: Option, +}