diff --git a/Cargo.toml b/Cargo.toml index 8f8df90..9b83536 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,8 @@ description = "Tools for finding and manipulating differences between files" documentation = "https://docs.rs/diffy" repository = "https://github.com/bmwill/diffy" readme = "README.md" -keywords = ["diff", "patch", "merge"] -categories = ["text-processing"] +keywords = ["diff", "patch", "merge", "3-way", "myers"] +categories = ["text-processing", "algorithms"] rust-version = "1.62.1" edition = "2021" diff --git a/src/diff/mod.rs b/src/diff/mod.rs index a456c41..d6f2842 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -96,8 +96,8 @@ impl DiffOptions { /// Produce a Patch between two texts based on the configured options pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> { let mut classifier = Classifier::default(); - let (old_lines, old_ids) = classifier.classify_lines(original); - let (new_lines, new_ids) = classifier.classify_lines(modified); + let (old_lines, old_ids) = classifier.classify_text(original); + let (new_lines, new_ids) = classifier.classify_text(modified); let solution = self.diff_slice(&old_ids, &new_ids); @@ -112,8 +112,8 @@ impl DiffOptions { modified: &'a [u8], ) -> Patch<'a, [u8]> { let mut classifier = Classifier::default(); - let (old_lines, old_ids) = classifier.classify_lines(original); - let (new_lines, new_ids) = classifier.classify_lines(modified); + let (old_lines, old_ids) = classifier.classify_text(original); + let (new_lines, new_ids) = classifier.classify_text(modified); let solution = self.diff_slice(&old_ids, &new_ids); diff --git a/src/lib.rs b/src/lib.rs index 2d4b0dc..d10afa2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,27 @@ //! Tools for finding and manipulating differences between files //! -//! ## Overview +//! # Overview //! //! This library is intended to be a collection of tools used to find and -//! manipulate differences between files inspired by [LibXDiff] and [GNU -//! Diffutils]. Version control systems like [Git] and [Mercurial] generally -//! communicate differences between two versions of a file using a `diff` or -//! `patch`. +//! manipulate differences between files or arbitrary data inspired by +//! [LibXDiff] and [GNU Diffutils]. Version control systems like [Git] and +//! [Mercurial] generally communicate differences between two versions of a +//! file using a `diff` or `patch`. //! //! The current diff implementation is based on the [Myers' diff algorithm]. //! +//! ## Supported features +//! +//! | Feature | UTF-8 strings | non-UTF-8 string | Arbitrary types (that implement `Eq + Hash`) | +//! |------------------|---------------|------------------|----------------------------------------------| +//! | Creating a patch | ✅ | ✅ | | +//! | Applying a patch | ✅ | ✅ | | +//! | 3-way merge | ✅ | ✅ | ✅ | +//! +//! "Arbitrary types" means "any type that implements `Eq + Hash`".
+//! Supporting patches for arbitrary types would not be very helpful, since +//! there is no standardized way of formatting them. +//! //! ## UTF-8 and Non-UTF-8 //! //! This library has support for working with both utf8 and non-utf8 texts. @@ -198,6 +210,22 @@ //! assert_eq!(merge(original, a, b).unwrap_err(), expected); //! ``` //! +//! It is possible to perform 3-way merges between collections of arbitrary +//! types `T` as long as `T: Eq + Hash`. +//! ``` +//! use diffy::merge_custom; +//! +//! let original = [1,2,3,4,5, 6]; +//! let a = [1,2,3,4,5,100,6]; +//! let b = [1, 3,4,5, 6]; +//! let expected = [1, 3,4,5,100,6]; +//! +//! let result = merge_custom(&original, &a, &b).unwrap(); +//! let result_owned: Vec = result.iter().map(|r| **r).collect(); +//! assert_eq!(result_owned, expected); +//! ``` +//! +//! //! [LibXDiff]: http://www.xmailserver.org/xdiff-lib.html //! [Myers' diff algorithm]: http://www.xmailserver.org/diff2.pdf //! [GNU Diffutils]: https://www.gnu.org/software/diffutils/ @@ -221,5 +249,5 @@ mod utils; pub use apply::{apply, apply_bytes, ApplyError}; pub use diff::{create_patch, create_patch_bytes, DiffOptions}; -pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions}; +pub use merge::{merge, merge_bytes, merge_custom, ConflictStyle, MergeConflicts, MergeOptions}; pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter}; diff --git a/src/merge/mod.rs b/src/merge/mod.rs index 83b99fb..4ffc12c 100644 --- a/src/merge/mod.rs +++ b/src/merge/mod.rs @@ -3,6 +3,7 @@ use crate::{ range::{DiffRange, Range, SliceLike}, utils::Classifier, }; +use std::hash::Hash; use std::{cmp, fmt}; #[cfg(test)] @@ -152,9 +153,9 @@ impl MergeOptions { theirs: &'a str, ) -> Result { let mut classifier = Classifier::default(); - let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor); - let (our_lines, our_ids) = classifier.classify_lines(ours); - let (their_lines, their_ids) = classifier.classify_lines(theirs); + let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor); + let (our_lines, our_ids) = classifier.classify_text(ours); + let (their_lines, their_ids) = classifier.classify_text(theirs); let opts = DiffOptions::default(); let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); @@ -183,9 +184,9 @@ impl MergeOptions { theirs: &'a [u8], ) -> Result, Vec> { let mut classifier = Classifier::default(); - let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor); - let (our_lines, our_ids) = classifier.classify_lines(ours); - let (their_lines, their_ids) = classifier.classify_lines(theirs); + let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor); + let (our_lines, our_ids) = classifier.classify_text(ours); + let (their_lines, their_ids) = classifier.classify_text(theirs); let opts = DiffOptions::default(); let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); @@ -205,6 +206,29 @@ impl MergeOptions { self.style, ) } + + pub fn merge_custom<'a, T: Eq + Hash>( + &self, + ancestor: &'a [T], + ours: &'a [T], + theirs: &'a [T], + ) -> Result, MergeConflicts> { + let mut classifier = Classifier::default(); + let (ancestor_lines, ancestor_ids) = classifier.classify(ancestor); + let (our_lines, our_ids) = classifier.classify(ours); + let (their_lines, their_ids) = classifier.classify(theirs); + + let opts = DiffOptions::default(); + let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); + let their_solution = opts.diff_slice(&ancestor_ids, &their_ids); + + let merged = merge_solutions(&our_solution, &their_solution); + let mut merge = diff3_range_to_merge_range(&merged); + + cleanup_conflicts(&mut merge); + + output_result_custom(&ancestor_lines, &our_lines, &their_lines, &merge) + } } impl Default for MergeOptions { @@ -277,6 +301,30 @@ pub fn merge_bytes<'a>( MergeOptions::default().merge_bytes(ancestor, ours, theirs) } +/// Infos about a merge that went wrong +#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] +pub struct MergeConflicts { + /// How many conflicts have occurred + pub count: usize, +} + +impl std::fmt::Display for MergeConflicts { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(f, "{} merge conflicts", self.count) + } +} + +impl std::error::Error for MergeConflicts {} + +/// Perform a 3-way merge between any list of values that support it +pub fn merge_custom<'a, T: Eq + Hash>( + ancestor: &'a [T], + ours: &'a [T], + theirs: &'a [T], +) -> Result, MergeConflicts> { + MergeOptions::default().merge_custom(ancestor, ours, theirs) +} + fn merge_solutions<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( our_solution: &[DiffRange<'ancestor, 'ours, T>], their_solution: &[DiffRange<'ancestor, 'theirs, T>], @@ -490,11 +538,11 @@ fn cleanup_conflicts<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike + PartialE } } -fn output_result<'a, T: ?Sized>( +fn output_result<'a>( ancestor: &[&'a str], ours: &[&'a str], theirs: &[&'a str], - merge: &[MergeRange], + merge: &[MergeRange<[u64]>], marker_len: usize, style: ConflictStyle, ) -> Result { @@ -556,11 +604,11 @@ fn add_conflict_marker( output.push('\n'); } -fn output_result_bytes<'a, T: ?Sized>( +fn output_result_bytes<'a>( ancestor: &[&'a [u8]], ours: &[&'a [u8]], theirs: &[&'a [u8]], - merge: &[MergeRange], + merge: &[MergeRange<[u64]>], marker_len: usize, style: ConflictStyle, ) -> Result, Vec> { @@ -635,3 +683,39 @@ fn add_conflict_marker_bytes( } output.push(b'\n'); } + +fn output_result_custom<'a, T: Eq + Hash>( + ancestor: &[&'a T], + ours: &[&'a T], + theirs: &[&'a T], + merge: &[MergeRange<[u64]>], +) -> Result, MergeConflicts> { + let mut conflicts = 0; + let mut output = Vec::new(); + + for merge_range in merge { + match merge_range { + MergeRange::Equal(range, ..) => { + output.extend(ancestor[range.range()].iter().copied()); + } + MergeRange::Conflict(_ancestor_range, _ours_range, _theirs_range) => { + conflicts += 1; + } + MergeRange::Ours(range) => { + output.extend(ours[range.range()].iter().copied()); + } + MergeRange::Theirs(range) => { + output.extend(theirs[range.range()].iter().copied()); + } + MergeRange::Both(range, _) => { + output.extend(ours[range.range()].iter().copied()); + } + } + } + + if conflicts != 0 { + Err(MergeConflicts { count: conflicts }) + } else { + Ok(output) + } +} diff --git a/src/merge/tests.rs b/src/merge/tests.rs index ff40860..e447e8f 100644 --- a/src/merge/tests.rs +++ b/src/merge/tests.rs @@ -215,6 +215,19 @@ salt ); } +#[test] +#[rustfmt::skip] +fn test_merge_arbitrary_type() { + let original = [1,2,3,4,5, 6]; + let ours = [1,2,3,4,5,100,6]; + let theirs = [1, 3,4,5, 6]; + let expected = [1, 3,4,5,100,6]; + + let result = merge_custom(&original, &ours, &theirs).unwrap(); + let result_owned: Vec = result.iter().map(|r| **r).collect(); + assert_eq!(result_owned, expected); +} + #[test] fn myers_diffy_vs_git() { let original = "\ diff --git a/src/utils.rs b/src/utils.rs index 9b3e70d..aa6d69c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -12,7 +12,7 @@ pub struct Classifier<'a, T: ?Sized> { } impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> { - fn classify(&mut self, record: &'a T) -> u64 { + fn classify_item(&mut self, record: &'a T) -> u64 { match self.unique_ids.entry(record) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { @@ -25,9 +25,17 @@ impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> { } impl<'a, T: ?Sized + Text> Classifier<'a, T> { - pub fn classify_lines(&mut self, text: &'a T) -> (Vec<&'a T>, Vec) { + pub fn classify_text(&mut self, text: &'a T) -> (Vec<&'a T>, Vec) { LineIter::new(text) - .map(|line| (line, self.classify(line))) + .map(|line| (line, self.classify_item(line))) + .unzip() + } +} + +impl<'a, T: Eq + Hash> Classifier<'a, T> { + pub fn classify(&mut self, data: &'a [T]) -> (Vec<&'a T>, Vec) { + data.iter() + .map(|item| (item, self.classify_item(item))) .unzip() } } @@ -227,3 +235,24 @@ fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option { fn find_byte(haystack: &[u8], byte: u8) -> Option { haystack.iter().position(|&b| b == byte) } + +#[cfg(test)] +mod test { + use super::Classifier; + + #[test] + fn classify() { + let input = vec![10, 11, 12, 13]; + let mut classifier = Classifier::default(); + let (lines, _ids) = classifier.classify(&input); + assert_eq!(lines, vec![&10, &11, &12, &13]); + } + + #[test] + fn classify_string() { + let input = "abc\ndef"; + let mut classifier = Classifier::default(); + let (lines, _ids) = classifier.classify_text(input); + assert_eq!(lines, vec!["abc\n", "def"]); + } +}