From 65891d17877d48c2df9bf0fb90dce9d1cc315c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Ko=C5=82aczkowski?= Date: Sat, 28 Oct 2023 13:44:15 +0200 Subject: [PATCH] Add --one-fs option to stay within one filesystem when walking file tree Fixes #229 --- Cargo.lock | 2 +- fclones/Cargo.toml | 2 +- fclones/src/config.rs | 7 ++++++ fclones/src/group.rs | 1 + fclones/src/walk.rs | 57 +++++++++++++++++++++++++++++++++++++------ 5 files changed, 60 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 84dbb77..f60da55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -492,7 +492,7 @@ checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "fclones" -version = "0.33.1" +version = "0.34.0" dependencies = [ "assert_matches", "bincode", diff --git a/fclones/Cargo.toml b/fclones/Cargo.toml index 5aa4460..d5c2203 100644 --- a/fclones/Cargo.toml +++ b/fclones/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fclones" -version = "0.33.1" +version = "0.34.0" description = "Finds and removes duplicate files" authors = ["Piotr Kołaczkowski "] homepage = "https://github.com/pkolaczk/fclones" diff --git a/fclones/src/config.rs b/fclones/src/config.rs index 83fbaa9..78a4452 100644 --- a/fclones/src/config.rs +++ b/fclones/src/config.rs @@ -197,6 +197,13 @@ pub struct GroupConfig { #[arg(short('I'), long, conflicts_with("follow_links"))] pub isolate: bool, + /// Don't match files on different filesystems or devices + /// + /// Does not follow symbolic links crossing filesystems or devices. + /// Skips nested mount-points. + #[arg(short('1'), long)] + pub one_fs: bool, + /// Transform each file by the specified program before matching. /// /// The value of this parameter should contain a command: the path to the program diff --git a/fclones/src/group.rs b/fclones/src/group.rs index 978f42d..441cef2 100644 --- a/fclones/src/group.rs +++ b/fclones/src/group.rs @@ -707,6 +707,7 @@ fn scan_files(ctx: &GroupCtx<'_>) -> Vec> { walk.follow_links = config.follow_links; walk.report_links = config.symbolic_links; walk.no_ignore = config.no_ignore; + walk.same_fs = config.one_fs; walk.path_selector = ctx.path_selector.clone(); walk.log = Some(ctx.log); walk.on_visit = spinner_tick; diff --git a/fclones/src/walk.rs b/fclones/src/walk.rs index 9d83beb..50440bb 100644 --- a/fclones/src/walk.rs +++ b/fclones/src/walk.rs @@ -4,6 +4,7 @@ use std::fs::{read_link, symlink_metadata, DirEntry, FileType, ReadDir}; use std::sync::Arc; use std::{fs, io}; +use crate::FileId; use dashmap::DashSet; use ignore::gitignore::{Gitignore, GitignoreBuilder}; use rayon::Scope; @@ -120,6 +121,12 @@ impl IgnoreStack { } } +#[cfg(unix)] +type DeviceId = u64; + +#[cfg(windows)] +type DeviceId = u128; + /// Describes walk configuration. /// Many walks can be initiated from the same instance. pub struct Walk<'a> { @@ -136,6 +143,8 @@ pub struct Walk<'a> { pub report_links: bool, /// Don't honor .gitignore and .fdignore. pub no_ignore: bool, + /// Don't leave the fs of the root paths. + pub same_fs: bool, /// Controls selecting or ignoring files by matching file and path names with regexes / globs. pub path_selector: PathSelector, /// The function to call for each visited file. The directories are not reported. @@ -161,6 +170,7 @@ impl<'a> Walk<'a> { follow_links: false, report_links: false, no_ignore: false, + same_fs: false, path_selector: PathSelector::new(base_dir), on_visit: &|_| {}, log: None, @@ -197,7 +207,14 @@ impl<'a> Walk<'a> { "Skipping directory {} because recursive scan is disabled.", p.display() )), - _ => scope.spawn(|scope| self.visit_path(p, scope, 0, ignore, &state)), + Ok(metadata) => { + let dev = FileId::from_metadata(&metadata).device; + let state = &state; + scope.spawn(move |scope| self.visit_path(p, dev, scope, 0, ignore, state)) + } + Err(err) => { + self.log_warn(format!("Cannot stat {}: {}", p.display(), err)); + } } } }); @@ -207,6 +224,7 @@ impl<'a> Walk<'a> { fn visit_path<'s, 'w, F>( &'s self, path: Path, + dev: DeviceId, scope: &Scope<'w>, level: usize, gitignore: IgnoreStack, @@ -219,7 +237,9 @@ impl<'a> Walk<'a> { Entry::from_path(path.clone()) .map_err(|e| self.log_warn(format!("Failed to stat {}: {}", path.display(), e))) .into_iter() - .for_each(|entry| self.visit_entry(entry, scope, level, gitignore.clone(), state)) + .for_each(|entry| { + self.visit_entry(entry, dev, scope, level, gitignore.clone(), state) + }) } } @@ -228,6 +248,7 @@ impl<'a> Walk<'a> { fn visit_entry<'s, 'w, F>( &'s self, entry: Entry, + dev: DeviceId, scope: &Scope<'w>, level: usize, gitignore: IgnoreStack, @@ -261,8 +282,8 @@ impl<'a> Walk<'a> { match entry.tpe { EntryType::File => self.visit_file(entry.path, state), - EntryType::Dir => self.visit_dir(entry.path, scope, level, gitignore, state), - EntryType::SymLink => self.visit_link(entry.path, scope, level, gitignore, state), + EntryType::Dir => self.visit_dir(entry.path, dev, scope, level, gitignore, state), + EntryType::SymLink => self.visit_link(entry.path, dev, scope, level, gitignore, state), EntryType::Other => {} } } @@ -282,6 +303,7 @@ impl<'a> Walk<'a> { fn visit_link<'s, 'w, F>( &'s self, path: Path, + dev: DeviceId, scope: &Scope<'w>, level: usize, gitignore: IgnoreStack, @@ -294,8 +316,8 @@ impl<'a> Walk<'a> { match self.resolve_link(&path) { Ok((_, EntryType::File)) if self.report_links => self.visit_file(path, state), Ok((target, _)) => { - if self.follow_links { - self.visit_path(target, scope, level, gitignore, state) + if self.follow_links && (!self.same_fs || self.same_fs(&target, dev)) { + self.visit_path(target, dev, scope, level, gitignore, state); } } Err(e) => self.log_warn(format!("Failed to read link {}: {}", path.display(), e)), @@ -308,6 +330,7 @@ impl<'a> Walk<'a> { fn visit_dir<'s, 'w, F>( &'s self, path: Path, + dev: DeviceId, scope: &Scope<'w>, level: usize, gitignore: IgnoreStack, @@ -322,6 +345,9 @@ impl<'a> Walk<'a> { if !self.path_selector.matches_dir(&path) { return; } + if self.same_fs && !self.same_fs(&path, dev) { + return; + } let gitignore = if self.no_ignore { gitignore @@ -333,7 +359,9 @@ impl<'a> Walk<'a> { Ok(rd) => { for entry in Self::sorted_entries(path, rd) { let gitignore = gitignore.clone(); - scope.spawn(move |s| self.visit_entry(entry, s, level + 1, gitignore, state)) + scope.spawn(move |s| { + self.visit_entry(entry, dev, s, level + 1, gitignore, state) + }) } } Err(e) => self.log_warn(format!("Failed to read dir {}: {}", path.display(), e)), @@ -389,6 +417,21 @@ impl<'a> Walk<'a> { Ok((self.absolute(resolved), entry_type)) } + /// Returns true if the file belongs to the given filesystem + fn same_fs(&self, path: &Path, device: DeviceId) -> bool { + match FileId::new(path) { + Ok(file_id) => file_id.device == device, + Err(err) => { + self.log_warn(format!( + "Cannot read device id of {}: {}", + path.display(), + err + )); + false + } + } + } + /// Returns absolute path with removed `.` and `..` components. /// Relative paths are resolved against `self.base_dir`. /// Symbolic links to directories are resolved.