Skip to content

Commit

Permalink
Add --one-fs option to stay within one filesystem when walking file tree
Browse files Browse the repository at this point in the history
Fixes #229
  • Loading branch information
pkolaczk committed Oct 29, 2023
1 parent d524619 commit 65891d1
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion fclones/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fclones"
version = "0.33.1"
version = "0.34.0"
description = "Finds and removes duplicate files"
authors = ["Piotr Kołaczkowski <[email protected]>"]
homepage = "https://github.com/pkolaczk/fclones"
Expand Down
7 changes: 7 additions & 0 deletions fclones/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,13 @@ pub struct GroupConfig {
#[arg(short('I'), long, conflicts_with("follow_links"))]
pub isolate: bool,

/// Don't match files on different filesystems or devices
///
/// Does not follow symbolic links crossing filesystems or devices.
/// Skips nested mount-points.
#[arg(short('1'), long)]
pub one_fs: bool,

/// Transform each file by the specified program before matching.
///
/// The value of this parameter should contain a command: the path to the program
Expand Down
1 change: 1 addition & 0 deletions fclones/src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,7 @@ fn scan_files(ctx: &GroupCtx<'_>) -> Vec<Vec<FileInfo>> {
walk.follow_links = config.follow_links;
walk.report_links = config.symbolic_links;
walk.no_ignore = config.no_ignore;
walk.same_fs = config.one_fs;
walk.path_selector = ctx.path_selector.clone();
walk.log = Some(ctx.log);
walk.on_visit = spinner_tick;
Expand Down
57 changes: 50 additions & 7 deletions fclones/src/walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::fs::{read_link, symlink_metadata, DirEntry, FileType, ReadDir};
use std::sync::Arc;
use std::{fs, io};

use crate::FileId;
use dashmap::DashSet;
use ignore::gitignore::{Gitignore, GitignoreBuilder};
use rayon::Scope;
Expand Down Expand Up @@ -120,6 +121,12 @@ impl IgnoreStack {
}
}

#[cfg(unix)]
type DeviceId = u64;

#[cfg(windows)]
type DeviceId = u128;

/// Describes walk configuration.
/// Many walks can be initiated from the same instance.
pub struct Walk<'a> {
Expand All @@ -136,6 +143,8 @@ pub struct Walk<'a> {
pub report_links: bool,
/// Don't honor .gitignore and .fdignore.
pub no_ignore: bool,
/// Don't leave the fs of the root paths.
pub same_fs: bool,
/// Controls selecting or ignoring files by matching file and path names with regexes / globs.
pub path_selector: PathSelector,
/// The function to call for each visited file. The directories are not reported.
Expand All @@ -161,6 +170,7 @@ impl<'a> Walk<'a> {
follow_links: false,
report_links: false,
no_ignore: false,
same_fs: false,
path_selector: PathSelector::new(base_dir),
on_visit: &|_| {},
log: None,
Expand Down Expand Up @@ -197,7 +207,14 @@ impl<'a> Walk<'a> {
"Skipping directory {} because recursive scan is disabled.",
p.display()
)),
_ => scope.spawn(|scope| self.visit_path(p, scope, 0, ignore, &state)),
Ok(metadata) => {
let dev = FileId::from_metadata(&metadata).device;
let state = &state;
scope.spawn(move |scope| self.visit_path(p, dev, scope, 0, ignore, state))
}
Err(err) => {
self.log_warn(format!("Cannot stat {}: {}", p.display(), err));
}
}
}
});
Expand All @@ -207,6 +224,7 @@ impl<'a> Walk<'a> {
fn visit_path<'s, 'w, F>(
&'s self,
path: Path,
dev: DeviceId,
scope: &Scope<'w>,
level: usize,
gitignore: IgnoreStack,
Expand All @@ -219,7 +237,9 @@ impl<'a> Walk<'a> {
Entry::from_path(path.clone())
.map_err(|e| self.log_warn(format!("Failed to stat {}: {}", path.display(), e)))
.into_iter()
.for_each(|entry| self.visit_entry(entry, scope, level, gitignore.clone(), state))
.for_each(|entry| {
self.visit_entry(entry, dev, scope, level, gitignore.clone(), state)
})
}
}

Expand All @@ -228,6 +248,7 @@ impl<'a> Walk<'a> {
fn visit_entry<'s, 'w, F>(
&'s self,
entry: Entry,
dev: DeviceId,
scope: &Scope<'w>,
level: usize,
gitignore: IgnoreStack,
Expand Down Expand Up @@ -261,8 +282,8 @@ impl<'a> Walk<'a> {

match entry.tpe {
EntryType::File => self.visit_file(entry.path, state),
EntryType::Dir => self.visit_dir(entry.path, scope, level, gitignore, state),
EntryType::SymLink => self.visit_link(entry.path, scope, level, gitignore, state),
EntryType::Dir => self.visit_dir(entry.path, dev, scope, level, gitignore, state),
EntryType::SymLink => self.visit_link(entry.path, dev, scope, level, gitignore, state),
EntryType::Other => {}
}
}
Expand All @@ -282,6 +303,7 @@ impl<'a> Walk<'a> {
fn visit_link<'s, 'w, F>(
&'s self,
path: Path,
dev: DeviceId,
scope: &Scope<'w>,
level: usize,
gitignore: IgnoreStack,
Expand All @@ -294,8 +316,8 @@ impl<'a> Walk<'a> {
match self.resolve_link(&path) {
Ok((_, EntryType::File)) if self.report_links => self.visit_file(path, state),
Ok((target, _)) => {
if self.follow_links {
self.visit_path(target, scope, level, gitignore, state)
if self.follow_links && (!self.same_fs || self.same_fs(&target, dev)) {
self.visit_path(target, dev, scope, level, gitignore, state);
}
}
Err(e) => self.log_warn(format!("Failed to read link {}: {}", path.display(), e)),
Expand All @@ -308,6 +330,7 @@ impl<'a> Walk<'a> {
fn visit_dir<'s, 'w, F>(
&'s self,
path: Path,
dev: DeviceId,
scope: &Scope<'w>,
level: usize,
gitignore: IgnoreStack,
Expand All @@ -322,6 +345,9 @@ impl<'a> Walk<'a> {
if !self.path_selector.matches_dir(&path) {
return;
}
if self.same_fs && !self.same_fs(&path, dev) {
return;
}

let gitignore = if self.no_ignore {
gitignore
Expand All @@ -333,7 +359,9 @@ impl<'a> Walk<'a> {
Ok(rd) => {
for entry in Self::sorted_entries(path, rd) {
let gitignore = gitignore.clone();
scope.spawn(move |s| self.visit_entry(entry, s, level + 1, gitignore, state))
scope.spawn(move |s| {
self.visit_entry(entry, dev, s, level + 1, gitignore, state)
})
}
}
Err(e) => self.log_warn(format!("Failed to read dir {}: {}", path.display(), e)),
Expand Down Expand Up @@ -389,6 +417,21 @@ impl<'a> Walk<'a> {
Ok((self.absolute(resolved), entry_type))
}

/// Returns true if the file belongs to the given filesystem
fn same_fs(&self, path: &Path, device: DeviceId) -> bool {
match FileId::new(path) {
Ok(file_id) => file_id.device == device,
Err(err) => {
self.log_warn(format!(
"Cannot read device id of {}: {}",
path.display(),
err
));
false
}
}
}

/// Returns absolute path with removed `.` and `..` components.
/// Relative paths are resolved against `self.base_dir`.
/// Symbolic links to directories are resolved.
Expand Down

0 comments on commit 65891d1

Please sign in to comment.