diff --git a/Cargo.toml b/Cargo.toml
index 8f8df90..9b83536 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,8 +7,8 @@ description = "Tools for finding and manipulating differences between files"
documentation = "https://docs.rs/diffy"
repository = "https://github.com/bmwill/diffy"
readme = "README.md"
-keywords = ["diff", "patch", "merge"]
-categories = ["text-processing"]
+keywords = ["diff", "patch", "merge", "3-way", "myers"]
+categories = ["text-processing", "algorithms"]
rust-version = "1.62.1"
edition = "2021"
diff --git a/src/diff/mod.rs b/src/diff/mod.rs
index a456c41..d6f2842 100644
--- a/src/diff/mod.rs
+++ b/src/diff/mod.rs
@@ -96,8 +96,8 @@ impl DiffOptions {
/// Produce a Patch between two texts based on the configured options
pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> {
let mut classifier = Classifier::default();
- let (old_lines, old_ids) = classifier.classify_lines(original);
- let (new_lines, new_ids) = classifier.classify_lines(modified);
+ let (old_lines, old_ids) = classifier.classify_text(original);
+ let (new_lines, new_ids) = classifier.classify_text(modified);
let solution = self.diff_slice(&old_ids, &new_ids);
@@ -112,8 +112,8 @@ impl DiffOptions {
modified: &'a [u8],
) -> Patch<'a, [u8]> {
let mut classifier = Classifier::default();
- let (old_lines, old_ids) = classifier.classify_lines(original);
- let (new_lines, new_ids) = classifier.classify_lines(modified);
+ let (old_lines, old_ids) = classifier.classify_text(original);
+ let (new_lines, new_ids) = classifier.classify_text(modified);
let solution = self.diff_slice(&old_ids, &new_ids);
diff --git a/src/lib.rs b/src/lib.rs
index 2d4b0dc..d10afa2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,15 +1,27 @@
//! Tools for finding and manipulating differences between files
//!
-//! ## Overview
+//! # Overview
//!
//! This library is intended to be a collection of tools used to find and
-//! manipulate differences between files inspired by [LibXDiff] and [GNU
-//! Diffutils]. Version control systems like [Git] and [Mercurial] generally
-//! communicate differences between two versions of a file using a `diff` or
-//! `patch`.
+//! manipulate differences between files or arbitrary data inspired by
+//! [LibXDiff] and [GNU Diffutils]. Version control systems like [Git] and
+//! [Mercurial] generally communicate differences between two versions of a
+//! file using a `diff` or `patch`.
//!
//! The current diff implementation is based on the [Myers' diff algorithm].
//!
+//! ## Supported features
+//!
+//! | Feature | UTF-8 strings | non-UTF-8 string | Arbitrary types (that implement `Eq + Hash`) |
+//! |------------------|---------------|------------------|----------------------------------------------|
+//! | Creating a patch | ✅ | ✅ | |
+//! | Applying a patch | ✅ | ✅ | |
+//! | 3-way merge | ✅ | ✅ | ✅ |
+//!
+//! "Arbitrary types" means "any type that implements `Eq + Hash`".
+//! Supporting patches for arbitrary types would not be very helpful, since
+//! there is no standardized way of formatting them.
+//!
//! ## UTF-8 and Non-UTF-8
//!
//! This library has support for working with both utf8 and non-utf8 texts.
@@ -198,6 +210,22 @@
//! assert_eq!(merge(original, a, b).unwrap_err(), expected);
//! ```
//!
+//! It is possible to perform 3-way merges between collections of arbitrary
+//! types `T` as long as `T: Eq + Hash`.
+//! ```
+//! use diffy::merge_custom;
+//!
+//! let original = [1,2,3,4,5, 6];
+//! let a = [1,2,3,4,5,100,6];
+//! let b = [1, 3,4,5, 6];
+//! let expected = [1, 3,4,5,100,6];
+//!
+//! let result = merge_custom(&original, &a, &b).unwrap();
+//! let result_owned: Vec = result.iter().map(|r| **r).collect();
+//! assert_eq!(result_owned, expected);
+//! ```
+//!
+//!
//! [LibXDiff]: http://www.xmailserver.org/xdiff-lib.html
//! [Myers' diff algorithm]: http://www.xmailserver.org/diff2.pdf
//! [GNU Diffutils]: https://www.gnu.org/software/diffutils/
@@ -221,5 +249,5 @@ mod utils;
pub use apply::{apply, apply_bytes, ApplyError};
pub use diff::{create_patch, create_patch_bytes, DiffOptions};
-pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions};
+pub use merge::{merge, merge_bytes, merge_custom, ConflictStyle, MergeConflicts, MergeOptions};
pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter};
diff --git a/src/merge/mod.rs b/src/merge/mod.rs
index 83b99fb..4ffc12c 100644
--- a/src/merge/mod.rs
+++ b/src/merge/mod.rs
@@ -3,6 +3,7 @@ use crate::{
range::{DiffRange, Range, SliceLike},
utils::Classifier,
};
+use std::hash::Hash;
use std::{cmp, fmt};
#[cfg(test)]
@@ -152,9 +153,9 @@ impl MergeOptions {
theirs: &'a str,
) -> Result {
let mut classifier = Classifier::default();
- let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor);
- let (our_lines, our_ids) = classifier.classify_lines(ours);
- let (their_lines, their_ids) = classifier.classify_lines(theirs);
+ let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor);
+ let (our_lines, our_ids) = classifier.classify_text(ours);
+ let (their_lines, their_ids) = classifier.classify_text(theirs);
let opts = DiffOptions::default();
let our_solution = opts.diff_slice(&ancestor_ids, &our_ids);
@@ -183,9 +184,9 @@ impl MergeOptions {
theirs: &'a [u8],
) -> Result, Vec> {
let mut classifier = Classifier::default();
- let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor);
- let (our_lines, our_ids) = classifier.classify_lines(ours);
- let (their_lines, their_ids) = classifier.classify_lines(theirs);
+ let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor);
+ let (our_lines, our_ids) = classifier.classify_text(ours);
+ let (their_lines, their_ids) = classifier.classify_text(theirs);
let opts = DiffOptions::default();
let our_solution = opts.diff_slice(&ancestor_ids, &our_ids);
@@ -205,6 +206,29 @@ impl MergeOptions {
self.style,
)
}
+
+ pub fn merge_custom<'a, T: Eq + Hash>(
+ &self,
+ ancestor: &'a [T],
+ ours: &'a [T],
+ theirs: &'a [T],
+ ) -> Result, MergeConflicts> {
+ let mut classifier = Classifier::default();
+ let (ancestor_lines, ancestor_ids) = classifier.classify(ancestor);
+ let (our_lines, our_ids) = classifier.classify(ours);
+ let (their_lines, their_ids) = classifier.classify(theirs);
+
+ let opts = DiffOptions::default();
+ let our_solution = opts.diff_slice(&ancestor_ids, &our_ids);
+ let their_solution = opts.diff_slice(&ancestor_ids, &their_ids);
+
+ let merged = merge_solutions(&our_solution, &their_solution);
+ let mut merge = diff3_range_to_merge_range(&merged);
+
+ cleanup_conflicts(&mut merge);
+
+ output_result_custom(&ancestor_lines, &our_lines, &their_lines, &merge)
+ }
}
impl Default for MergeOptions {
@@ -277,6 +301,30 @@ pub fn merge_bytes<'a>(
MergeOptions::default().merge_bytes(ancestor, ours, theirs)
}
+/// Infos about a merge that went wrong
+#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
+pub struct MergeConflicts {
+ /// How many conflicts have occurred
+ pub count: usize,
+}
+
+impl std::fmt::Display for MergeConflicts {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+ write!(f, "{} merge conflicts", self.count)
+ }
+}
+
+impl std::error::Error for MergeConflicts {}
+
+/// Perform a 3-way merge between any list of values that support it
+pub fn merge_custom<'a, T: Eq + Hash>(
+ ancestor: &'a [T],
+ ours: &'a [T],
+ theirs: &'a [T],
+) -> Result, MergeConflicts> {
+ MergeOptions::default().merge_custom(ancestor, ours, theirs)
+}
+
fn merge_solutions<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>(
our_solution: &[DiffRange<'ancestor, 'ours, T>],
their_solution: &[DiffRange<'ancestor, 'theirs, T>],
@@ -490,11 +538,11 @@ fn cleanup_conflicts<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike + PartialE
}
}
-fn output_result<'a, T: ?Sized>(
+fn output_result<'a>(
ancestor: &[&'a str],
ours: &[&'a str],
theirs: &[&'a str],
- merge: &[MergeRange],
+ merge: &[MergeRange<[u64]>],
marker_len: usize,
style: ConflictStyle,
) -> Result {
@@ -556,11 +604,11 @@ fn add_conflict_marker(
output.push('\n');
}
-fn output_result_bytes<'a, T: ?Sized>(
+fn output_result_bytes<'a>(
ancestor: &[&'a [u8]],
ours: &[&'a [u8]],
theirs: &[&'a [u8]],
- merge: &[MergeRange],
+ merge: &[MergeRange<[u64]>],
marker_len: usize,
style: ConflictStyle,
) -> Result, Vec> {
@@ -635,3 +683,39 @@ fn add_conflict_marker_bytes(
}
output.push(b'\n');
}
+
+fn output_result_custom<'a, T: Eq + Hash>(
+ ancestor: &[&'a T],
+ ours: &[&'a T],
+ theirs: &[&'a T],
+ merge: &[MergeRange<[u64]>],
+) -> Result, MergeConflicts> {
+ let mut conflicts = 0;
+ let mut output = Vec::new();
+
+ for merge_range in merge {
+ match merge_range {
+ MergeRange::Equal(range, ..) => {
+ output.extend(ancestor[range.range()].iter().copied());
+ }
+ MergeRange::Conflict(_ancestor_range, _ours_range, _theirs_range) => {
+ conflicts += 1;
+ }
+ MergeRange::Ours(range) => {
+ output.extend(ours[range.range()].iter().copied());
+ }
+ MergeRange::Theirs(range) => {
+ output.extend(theirs[range.range()].iter().copied());
+ }
+ MergeRange::Both(range, _) => {
+ output.extend(ours[range.range()].iter().copied());
+ }
+ }
+ }
+
+ if conflicts != 0 {
+ Err(MergeConflicts { count: conflicts })
+ } else {
+ Ok(output)
+ }
+}
diff --git a/src/merge/tests.rs b/src/merge/tests.rs
index ff40860..e447e8f 100644
--- a/src/merge/tests.rs
+++ b/src/merge/tests.rs
@@ -215,6 +215,19 @@ salt
);
}
+#[test]
+#[rustfmt::skip]
+fn test_merge_arbitrary_type() {
+ let original = [1,2,3,4,5, 6];
+ let ours = [1,2,3,4,5,100,6];
+ let theirs = [1, 3,4,5, 6];
+ let expected = [1, 3,4,5,100,6];
+
+ let result = merge_custom(&original, &ours, &theirs).unwrap();
+ let result_owned: Vec = result.iter().map(|r| **r).collect();
+ assert_eq!(result_owned, expected);
+}
+
#[test]
fn myers_diffy_vs_git() {
let original = "\
diff --git a/src/utils.rs b/src/utils.rs
index 9b3e70d..aa6d69c 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -12,7 +12,7 @@ pub struct Classifier<'a, T: ?Sized> {
}
impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> {
- fn classify(&mut self, record: &'a T) -> u64 {
+ fn classify_item(&mut self, record: &'a T) -> u64 {
match self.unique_ids.entry(record) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
@@ -25,9 +25,17 @@ impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> {
}
impl<'a, T: ?Sized + Text> Classifier<'a, T> {
- pub fn classify_lines(&mut self, text: &'a T) -> (Vec<&'a T>, Vec) {
+ pub fn classify_text(&mut self, text: &'a T) -> (Vec<&'a T>, Vec) {
LineIter::new(text)
- .map(|line| (line, self.classify(line)))
+ .map(|line| (line, self.classify_item(line)))
+ .unzip()
+ }
+}
+
+impl<'a, T: Eq + Hash> Classifier<'a, T> {
+ pub fn classify(&mut self, data: &'a [T]) -> (Vec<&'a T>, Vec) {
+ data.iter()
+ .map(|item| (item, self.classify_item(item)))
.unzip()
}
}
@@ -227,3 +235,24 @@ fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option {
fn find_byte(haystack: &[u8], byte: u8) -> Option {
haystack.iter().position(|&b| b == byte)
}
+
+#[cfg(test)]
+mod test {
+ use super::Classifier;
+
+ #[test]
+ fn classify() {
+ let input = vec![10, 11, 12, 13];
+ let mut classifier = Classifier::default();
+ let (lines, _ids) = classifier.classify(&input);
+ assert_eq!(lines, vec![&10, &11, &12, &13]);
+ }
+
+ #[test]
+ fn classify_string() {
+ let input = "abc\ndef";
+ let mut classifier = Classifier::default();
+ let (lines, _ids) = classifier.classify_text(input);
+ assert_eq!(lines, vec!["abc\n", "def"]);
+ }
+}