Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge arbitrary types #25

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ description = "Tools for finding and manipulating differences between files"
documentation = "https://docs.rs/diffy"
repository = "https://github.com/bmwill/diffy"
readme = "README.md"
keywords = ["diff", "patch", "merge"]
categories = ["text-processing"]
keywords = ["diff", "patch", "merge", "3-way", "myers"]
categories = ["text-processing", "algorithms"]
rust-version = "1.62.1"
edition = "2021"

Expand Down
8 changes: 4 additions & 4 deletions src/diff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ impl DiffOptions {
/// Produce a Patch between two texts based on the configured options
pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> {
let mut classifier = Classifier::default();
let (old_lines, old_ids) = classifier.classify_lines(original);
let (new_lines, new_ids) = classifier.classify_lines(modified);
let (old_lines, old_ids) = classifier.classify_text(original);
let (new_lines, new_ids) = classifier.classify_text(modified);

let solution = self.diff_slice(&old_ids, &new_ids);

Expand All @@ -112,8 +112,8 @@ impl DiffOptions {
modified: &'a [u8],
) -> Patch<'a, [u8]> {
let mut classifier = Classifier::default();
let (old_lines, old_ids) = classifier.classify_lines(original);
let (new_lines, new_ids) = classifier.classify_lines(modified);
let (old_lines, old_ids) = classifier.classify_text(original);
let (new_lines, new_ids) = classifier.classify_text(modified);

let solution = self.diff_slice(&old_ids, &new_ids);

Expand Down
40 changes: 34 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
//! Tools for finding and manipulating differences between files
//!
//! ## Overview
//! # Overview
//!
//! This library is intended to be a collection of tools used to find and
//! manipulate differences between files inspired by [LibXDiff] and [GNU
//! Diffutils]. Version control systems like [Git] and [Mercurial] generally
//! communicate differences between two versions of a file using a `diff` or
//! `patch`.
//! manipulate differences between files or arbitrary data inspired by
//! [LibXDiff] and [GNU Diffutils]. Version control systems like [Git] and
//! [Mercurial] generally communicate differences between two versions of a
//! file using a `diff` or `patch`.
//!
//! The current diff implementation is based on the [Myers' diff algorithm].
//!
//! ## Supported features
//!
//! | Feature | UTF-8 strings | non-UTF-8 string | Arbitrary types (that implement `Eq + Hash`) |
//! |------------------|---------------|------------------|----------------------------------------------|
//! | Creating a patch | ✅ | ✅ | |
//! | Applying a patch | ✅ | ✅ | |
//! | 3-way merge | ✅ | ✅ | ✅ |
//!
//! "Arbitrary types" means "any type that implements `Eq + Hash`".<br/>
//! Supporting patches for arbitrary types would not be very helpful, since
//! there is no standardized way of formatting them.
//!
//! ## UTF-8 and Non-UTF-8
//!
//! This library has support for working with both utf8 and non-utf8 texts.
Expand Down Expand Up @@ -198,6 +210,22 @@
//! assert_eq!(merge(original, a, b).unwrap_err(), expected);
//! ```
//!
//! It is possible to perform 3-way merges between collections of arbitrary
//! types `T` as long as `T: Eq + Hash`.
//! ```
//! use diffy::merge_custom;
//!
//! let original = [1,2,3,4,5, 6];
//! let a = [1,2,3,4,5,100,6];
//! let b = [1, 3,4,5, 6];
//! let expected = [1, 3,4,5,100,6];
//!
//! let result = merge_custom(&original, &a, &b).unwrap();
//! let result_owned: Vec<i32> = result.iter().map(|r| **r).collect();
//! assert_eq!(result_owned, expected);
//! ```
//!
//!
//! [LibXDiff]: http://www.xmailserver.org/xdiff-lib.html
//! [Myers' diff algorithm]: http://www.xmailserver.org/diff2.pdf
//! [GNU Diffutils]: https://www.gnu.org/software/diffutils/
Expand All @@ -221,5 +249,5 @@ mod utils;

pub use apply::{apply, apply_bytes, ApplyError};
pub use diff::{create_patch, create_patch_bytes, DiffOptions};
pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions};
pub use merge::{merge, merge_bytes, merge_custom, ConflictStyle, MergeConflicts, MergeOptions};
pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter};
104 changes: 94 additions & 10 deletions src/merge/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::{
range::{DiffRange, Range, SliceLike},
utils::Classifier,
};
use std::hash::Hash;
use std::{cmp, fmt};

#[cfg(test)]
Expand Down Expand Up @@ -152,9 +153,9 @@ impl MergeOptions {
theirs: &'a str,
) -> Result<String, String> {
let mut classifier = Classifier::default();
let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor);
let (our_lines, our_ids) = classifier.classify_lines(ours);
let (their_lines, their_ids) = classifier.classify_lines(theirs);
let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor);
let (our_lines, our_ids) = classifier.classify_text(ours);
let (their_lines, their_ids) = classifier.classify_text(theirs);

let opts = DiffOptions::default();
let our_solution = opts.diff_slice(&ancestor_ids, &our_ids);
Expand Down Expand Up @@ -183,9 +184,9 @@ impl MergeOptions {
theirs: &'a [u8],
) -> Result<Vec<u8>, Vec<u8>> {
let mut classifier = Classifier::default();
let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor);
let (our_lines, our_ids) = classifier.classify_lines(ours);
let (their_lines, their_ids) = classifier.classify_lines(theirs);
let (ancestor_lines, ancestor_ids) = classifier.classify_text(ancestor);
let (our_lines, our_ids) = classifier.classify_text(ours);
let (their_lines, their_ids) = classifier.classify_text(theirs);

let opts = DiffOptions::default();
let our_solution = opts.diff_slice(&ancestor_ids, &our_ids);
Expand All @@ -205,6 +206,29 @@ impl MergeOptions {
self.style,
)
}

pub fn merge_custom<'a, T: Eq + Hash>(
&self,
ancestor: &'a [T],
ours: &'a [T],
theirs: &'a [T],
) -> Result<Vec<&'a T>, MergeConflicts> {
let mut classifier = Classifier::default();
let (ancestor_lines, ancestor_ids) = classifier.classify(ancestor);
let (our_lines, our_ids) = classifier.classify(ours);
let (their_lines, their_ids) = classifier.classify(theirs);

let opts = DiffOptions::default();
let our_solution = opts.diff_slice(&ancestor_ids, &our_ids);
let their_solution = opts.diff_slice(&ancestor_ids, &their_ids);

let merged = merge_solutions(&our_solution, &their_solution);
let mut merge = diff3_range_to_merge_range(&merged);

cleanup_conflicts(&mut merge);

output_result_custom(&ancestor_lines, &our_lines, &their_lines, &merge)
}
}

impl Default for MergeOptions {
Expand Down Expand Up @@ -277,6 +301,30 @@ pub fn merge_bytes<'a>(
MergeOptions::default().merge_bytes(ancestor, ours, theirs)
}

/// Infos about a merge that went wrong
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
pub struct MergeConflicts {
/// How many conflicts have occurred
pub count: usize,
}

impl std::fmt::Display for MergeConflicts {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "{} merge conflicts", self.count)
}
}

impl std::error::Error for MergeConflicts {}

/// Perform a 3-way merge between any list of values that support it
pub fn merge_custom<'a, T: Eq + Hash>(
ancestor: &'a [T],
ours: &'a [T],
theirs: &'a [T],
) -> Result<Vec<&'a T>, MergeConflicts> {
MergeOptions::default().merge_custom(ancestor, ours, theirs)
}

fn merge_solutions<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>(
our_solution: &[DiffRange<'ancestor, 'ours, T>],
their_solution: &[DiffRange<'ancestor, 'theirs, T>],
Expand Down Expand Up @@ -490,11 +538,11 @@ fn cleanup_conflicts<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike + PartialE
}
}

fn output_result<'a, T: ?Sized>(
fn output_result<'a>(
ancestor: &[&'a str],
ours: &[&'a str],
theirs: &[&'a str],
merge: &[MergeRange<T>],
merge: &[MergeRange<[u64]>],
marker_len: usize,
style: ConflictStyle,
) -> Result<String, String> {
Expand Down Expand Up @@ -556,11 +604,11 @@ fn add_conflict_marker(
output.push('\n');
}

fn output_result_bytes<'a, T: ?Sized>(
fn output_result_bytes<'a>(
ancestor: &[&'a [u8]],
ours: &[&'a [u8]],
theirs: &[&'a [u8]],
merge: &[MergeRange<T>],
merge: &[MergeRange<[u64]>],
marker_len: usize,
style: ConflictStyle,
) -> Result<Vec<u8>, Vec<u8>> {
Expand Down Expand Up @@ -635,3 +683,39 @@ fn add_conflict_marker_bytes(
}
output.push(b'\n');
}

fn output_result_custom<'a, T: Eq + Hash>(
ancestor: &[&'a T],
ours: &[&'a T],
theirs: &[&'a T],
merge: &[MergeRange<[u64]>],
) -> Result<Vec<&'a T>, MergeConflicts> {
let mut conflicts = 0;
let mut output = Vec::new();

for merge_range in merge {
match merge_range {
MergeRange::Equal(range, ..) => {
output.extend(ancestor[range.range()].iter().copied());
}
MergeRange::Conflict(_ancestor_range, _ours_range, _theirs_range) => {
conflicts += 1;
}
MergeRange::Ours(range) => {
output.extend(ours[range.range()].iter().copied());
}
MergeRange::Theirs(range) => {
output.extend(theirs[range.range()].iter().copied());
}
MergeRange::Both(range, _) => {
output.extend(ours[range.range()].iter().copied());
}
}
}

if conflicts != 0 {
Err(MergeConflicts { count: conflicts })
} else {
Ok(output)
}
}
13 changes: 13 additions & 0 deletions src/merge/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,19 @@ salt
);
}

#[test]
#[rustfmt::skip]
fn test_merge_arbitrary_type() {
let original = [1,2,3,4,5, 6];
let ours = [1,2,3,4,5,100,6];
let theirs = [1, 3,4,5, 6];
let expected = [1, 3,4,5,100,6];

let result = merge_custom(&original, &ours, &theirs).unwrap();
let result_owned: Vec<i32> = result.iter().map(|r| **r).collect();
assert_eq!(result_owned, expected);
}

#[test]
fn myers_diffy_vs_git() {
let original = "\
Expand Down
35 changes: 32 additions & 3 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub struct Classifier<'a, T: ?Sized> {
}

impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> {
fn classify(&mut self, record: &'a T) -> u64 {
fn classify_item(&mut self, record: &'a T) -> u64 {
match self.unique_ids.entry(record) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
Expand All @@ -25,9 +25,17 @@ impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> {
}

impl<'a, T: ?Sized + Text> Classifier<'a, T> {
pub fn classify_lines(&mut self, text: &'a T) -> (Vec<&'a T>, Vec<u64>) {
pub fn classify_text(&mut self, text: &'a T) -> (Vec<&'a T>, Vec<u64>) {
LineIter::new(text)
.map(|line| (line, self.classify(line)))
.map(|line| (line, self.classify_item(line)))
.unzip()
}
}

impl<'a, T: Eq + Hash> Classifier<'a, T> {
pub fn classify(&mut self, data: &'a [T]) -> (Vec<&'a T>, Vec<u64>) {
data.iter()
.map(|item| (item, self.classify_item(item)))
.unzip()
}
}
Expand Down Expand Up @@ -227,3 +235,24 @@ fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
fn find_byte(haystack: &[u8], byte: u8) -> Option<usize> {
haystack.iter().position(|&b| b == byte)
}

#[cfg(test)]
mod test {
use super::Classifier;

#[test]
fn classify() {
let input = vec![10, 11, 12, 13];
let mut classifier = Classifier::default();
let (lines, _ids) = classifier.classify(&input);
assert_eq!(lines, vec![&10, &11, &12, &13]);
}

#[test]
fn classify_string() {
let input = "abc\ndef";
let mut classifier = Classifier::default();
let (lines, _ids) = classifier.classify_text(input);
assert_eq!(lines, vec!["abc\n", "def"]);
}
}
Loading