From 47f61ccfec8835f65e218da32785f3d3b6388315 Mon Sep 17 00:00:00 2001 From: "Taro Matsuzawa aka. btm" Date: Tue, 17 Dec 2024 00:39:26 +0900 Subject: [PATCH] to compress bin data in temprary directory --- Cargo.lock | 238 +++++++++++++++++++++++++++++++++++++++++++++++- app/Cargo.toml | 1 + app/src/main.rs | 19 ++-- 3 files changed, 250 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1cba836..bb335c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,12 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" @@ -107,6 +113,7 @@ dependencies = [ "clap", "env_logger", "glob", + "gzp", "itertools", "kv-extsort", "log", @@ -307,6 +314,15 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +[[package]] +name = "cmake" +version = "0.1.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.3" @@ -319,6 +335,27 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core_affinity" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f" +dependencies = [ + "kernel32-sys", + "libc", + "num_cpus", + "winapi", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.13" @@ -431,6 +468,42 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flate2" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +dependencies = [ + "crc32fast", + "libz-sys", + "miniz_oxide", +] + +[[package]] +name = "flume" +version = "0.10.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" +dependencies = [ + "futures-core", + "futures-sink", + "nanorand", + "pin-project", + "spin", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + [[package]] name = "getrandom" version = "0.2.15" @@ -438,8 +511,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -454,6 +529,23 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "gzp" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcaa63633f99fe42694dbc0004cdc2ba160d9a303f0d08d2e50380a500a19cf3" +dependencies = [ + "byteorder", + "bytes", + "core_affinity", + "flate2", + "flume", + "libdeflater", + "libz-sys", + "num_cpus", + "thiserror", +] + [[package]] name = "hashbrown" version = "0.15.1" @@ -466,6 +558,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "humantime" version = "2.1.0" @@ -545,6 +643,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi", + "winapi-build", +] + [[package]] name = "kv-extsort" version = "0.1.0" @@ -590,12 +698,53 @@ version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +[[package]] +name = "libdeflate-sys" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cceaf6e335d658ec0602685bfe50d0f35248d6d8848b194058bfda37a9eb728" +dependencies = [ + "cc", +] + +[[package]] +name = "libdeflater" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f0c115fd181333eb7a82cf42e2ce2d9fac0ad06babd3ab79a9ec5bd66352fe" +dependencies = [ + "libdeflate-sys", +] + +[[package]] +name = "libz-sys" +version = "1.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472" +dependencies = [ + "cc", + "cmake", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.22" @@ -614,12 +763,30 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "multimap" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" +dependencies = [ + "getrandom", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -629,6 +796,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.20.2" @@ -694,6 +871,32 @@ dependencies = [ "indexmap", ] +[[package]] +name = "pin-project" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + [[package]] name = "prettyplease" version = "0.2.25" @@ -851,6 +1054,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.215" @@ -901,6 +1110,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + [[package]] name = "strsim" version = "0.11.1" @@ -981,6 +1199,12 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -1048,6 +1272,18 @@ version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + [[package]] name = "windows-core" version = "0.52.0" diff --git a/app/Cargo.toml b/app/Cargo.toml index 1ed95c9..772f3f9 100644 --- a/app/Cargo.toml +++ b/app/Cargo.toml @@ -28,3 +28,4 @@ bytemuck = "1.20.0" tinymvt = "0.0.1" kv-extsort = { git = "https://github.com/MIERUNE/kv-extsort-rs.git" } itertools = "0.13.0" +gzp = "0.10.1" diff --git a/app/src/main.rs b/app/src/main.rs index 01da90f..c54096d 100644 --- a/app/src/main.rs +++ b/app/src/main.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::convert::Infallible; use std::ffi::OsStr; use std::fs::File; -use std::io::{BufWriter, Read as _, Write}; +use std::io::{Read as _, Write}; use std::sync::{mpsc, Arc}; use std::thread; use std::{ @@ -16,6 +16,8 @@ use chrono::Local; use clap::Parser; use env_logger::Builder; use glob::glob; +use gzp::MgzipSyncReader; +use gzp::{deflate::Mgzip, par::compress::{ParCompress, ParCompressBuilder}}; use itertools::Itertools as _; use log::LevelFilter; use pcd_parser::reader::csv::CsvPointReader; @@ -114,7 +116,7 @@ fn write_points_to_tile( fs::create_dir_all(tile_path.parent().unwrap())?; let file = File::create(tile_path)?; - let mut writer = BufWriter::new(file); + let mut writer: ParCompress = ParCompressBuilder::new().from_writer(file); let encoded = bitcode::encode(points); writer.write_all(&encoded)?; @@ -123,8 +125,11 @@ fn write_points_to_tile( } fn read_points_from_tile(file_path: &Path) -> std::io::Result> { - let buf = std::fs::read(file_path)?; - let points = bitcode::decode(&buf).unwrap(); + let file = File::open(file_path)?; + let mut buf_reader = MgzipSyncReader::new(file); + let mut buffer = Vec::new(); + buf_reader.read_to_end(&mut buffer).unwrap(); + let points = bitcode::decode(&buffer).unwrap(); Ok(points) } @@ -294,7 +299,7 @@ impl RunFileIterator { fn read_run_file(path: PathBuf) -> Result, Infallible> { let file = File::open(path).unwrap(); - let mut buf_reader = std::io::BufReader::new(file); + let mut buf_reader = MgzipSyncReader::new(file); let mut buffer = Vec::new(); buf_reader.read_to_end(&mut buffer).unwrap(); let data: Vec<(SortKey, Point)> = bitcode::decode(&buffer[..]).unwrap(); @@ -433,7 +438,7 @@ fn main() { .path() .join(format!("run_{}.bin", current_run_index)); let file = fs::File::create(run_file_path).unwrap(); - let mut writer = std::io::BufWriter::new(file); + let mut writer: ParCompress = ParCompressBuilder::new().from_writer(file); let encoded = bitcode::encode(&keyed_points); writer.write_all(&encoded).unwrap(); @@ -486,7 +491,7 @@ fn main() { fs::create_dir_all(tile_path.parent().unwrap()).unwrap(); let file = fs::File::create(tile_path).unwrap(); - let mut writer = BufWriter::new(file); + let mut writer: ParCompress = ParCompressBuilder::new().from_writer(file); let encoded = bitcode::encode(&points); writer.write_all(&encoded).unwrap();