From 12543674e95664e465478c59b60eaa8fd0f7344e Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Sun, 11 Jul 2021 18:14:37 +0900 Subject: [PATCH 1/9] add apply_all --- src/apply.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++--- src/diff/tests.rs | 3 +++ src/lib.rs | 2 +- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/apply.rs b/src/apply.rs index 884e7cc..b8acf1d 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -1,6 +1,6 @@ use crate::{ patch::{Hunk, Line, Patch}, - utils::LineIter, + utils::{LineIter, Text}, }; use std::{fmt, iter}; @@ -51,6 +51,14 @@ impl Clone for ImageLine<'_, T> { } } +pub struct ApplyOptions {} + +impl ApplyOptions { + pub fn new() -> Self { + ApplyOptions {} + } +} + /// Apply a `Patch` to a base image /// /// ``` @@ -94,7 +102,7 @@ pub fn apply(base_image: &str, patch: &Patch<'_, str>) -> Result) -> Result .collect(); for (i, hunk) in patch.hunks().iter().enumerate() { - apply_hunk(&mut image, hunk).map_err(|_| ApplyError(i + 1))?; + apply_hunk(&mut image, hunk, &ApplyOptions::new()).map_err(|_| ApplyError(i + 1))?; } Ok(image @@ -117,9 +125,44 @@ pub fn apply_bytes(base_image: &[u8], patch: &Patch<'_, [u8]>) -> Result .collect()) } +/// Try applying all hunks a `Patch` to a base image +pub fn apply_all<'a, 'b, T, R, I>( + base_image: &'a T, + patch: &'a Patch<'_, T>, + options: ApplyOptions, +) -> (R, Vec) +where + T: 'a + Text + ToOwned + ?Sized, + I: 'b + Copy, + &'a T: IntoIterator, + R: std::iter::FromIterator, +{ + let mut image: Vec<_> = LineIter::new(base_image) + .map(ImageLine::Unpatched) + .collect(); + + let mut failed_indices = Vec::new(); + + for (i, hunk) in patch.hunks().iter().enumerate() { + if let Some(_) = apply_hunk(&mut image, hunk, &options).err() { + failed_indices.push(i); + } + } + + ( + image + .into_iter() + .flat_map(ImageLine::into_inner) + .copied() + .collect(), + failed_indices, + ) +} + fn apply_hunk<'a, T: PartialEq + ?Sized>( image: &mut Vec>, hunk: &Hunk<'a, T>, + _options: &ApplyOptions, ) -> Result<(), ()> { // Find position let pos = find_position(image, hunk).ok_or(())?; diff --git a/src/diff/tests.rs b/src/diff/tests.rs index aeb1558..8fa425b 100644 --- a/src/diff/tests.rs +++ b/src/diff/tests.rs @@ -341,6 +341,9 @@ macro_rules! assert_patch { crate::apply_bytes($old.as_bytes(), &bpatch).unwrap(), $new.as_bytes() ); + let patched_all: Vec = + crate::apply_all($old.as_bytes(), &bpatch, crate::ApplyOptions::new()).0; + assert_eq!(patched_all, $new.as_bytes()); }; ($old:ident, $new:ident, $expected:ident $(,)?) => { assert_patch!(DiffOptions::default(), $old, $new, $expected); diff --git a/src/lib.rs b/src/lib.rs index 2d4b0dc..906f6ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,7 +219,7 @@ mod patch; mod range; mod utils; -pub use apply::{apply, apply_bytes, ApplyError}; +pub use apply::{apply, apply_all, apply_bytes, ApplyError, ApplyOptions}; pub use diff::{create_patch, create_patch_bytes, DiffOptions}; pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions}; pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter}; From 8a8190088fa228666233610e8d711292ca2f7a50 Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Sun, 11 Jul 2021 19:33:50 +0900 Subject: [PATCH 2/9] fuzzy --- src/apply.rs | 110 +++++++++++++++++++++++++++++++++++++++------- src/diff/tests.rs | 7 +++ 2 files changed, 102 insertions(+), 15 deletions(-) diff --git a/src/apply.rs b/src/apply.rs index b8acf1d..4459c1d 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -2,6 +2,7 @@ use crate::{ patch::{Hunk, Line, Patch}, utils::{LineIter, Text}, }; +use std::collections::VecDeque; use std::{fmt, iter}; /// An error returned when [`apply`]ing a `Patch` fails @@ -51,11 +52,18 @@ impl Clone for ImageLine<'_, T> { } } -pub struct ApplyOptions {} +pub struct ApplyOptions { + max_fuzzy: usize, +} impl ApplyOptions { pub fn new() -> Self { - ApplyOptions {} + ApplyOptions { max_fuzzy: 0 } + } + + pub fn with_max_fuzzy(mut self, max_fuzzy: usize) -> Self { + self.max_fuzzy = max_fuzzy; + self } } @@ -162,15 +170,20 @@ where fn apply_hunk<'a, T: PartialEq + ?Sized>( image: &mut Vec>, hunk: &Hunk<'a, T>, - _options: &ApplyOptions, + options: &ApplyOptions, ) -> Result<(), ()> { // Find position - let pos = find_position(image, hunk).ok_or(())?; + let (pos, fuzzy) = find_position(image, hunk, options.max_fuzzy).ok_or(())?; + let begin = pos + fuzzy; + let end = pos + + pre_image_line_count(hunk.lines()) + .checked_sub(fuzzy) + .unwrap_or(0); // update image image.splice( - pos..pos + pre_image_line_count(hunk.lines()), - post_image(hunk.lines()).map(ImageLine::Patched), + begin..end, + skip_last(post_image(hunk.lines()).skip(fuzzy), fuzzy).map(ImageLine::Patched), ); Ok(()) @@ -185,16 +198,19 @@ fn apply_hunk<'a, T: PartialEq + ?Sized>( fn find_position( image: &[ImageLine], hunk: &Hunk<'_, T>, -) -> Option { + max_fuzzy: usize, +) -> Option<(usize, usize)> { let pos = hunk.new_range().start().saturating_sub(1); - // Create an iterator that starts with 'pos' and then interleaves - // moving pos backward/foward by one. - let backward = (0..pos).rev(); - let forward = pos + 1..image.len(); - for pos in iter::once(pos).chain(interleave(backward, forward)) { - if match_fragment(image, hunk.lines(), pos) { - return Some(pos); + for fuzzy in 0..=max_fuzzy { + // Create an iterator that starts with 'pos' and then interleaves + // moving pos backward/foward by one. + let backward = (0..pos).rev(); + let forward = pos + 1..image.len(); + for pos in iter::once(pos).chain(interleave(backward, forward)) { + if match_fragment(image, hunk.lines(), pos, fuzzy) { + return Some((pos, fuzzy)); + } } } @@ -223,10 +239,13 @@ fn match_fragment( image: &[ImageLine], lines: &[Line<'_, T>], pos: usize, + fuzzy: usize, ) -> bool { let len = pre_image_line_count(lines); + let begin = pos + fuzzy; + let end = pos + len.checked_sub(fuzzy).unwrap_or(0); - let image = if let Some(image) = image.get(pos..pos + len) { + let image = if let Some(image) = image.get(begin..end) { image } else { return false; @@ -284,3 +303,64 @@ where } } } + +fn skip_last(iter: I, count: usize) -> SkipLast { + SkipLast { + iter: iter.fuse(), + buffer: VecDeque::with_capacity(count), + count, + } +} + +#[derive(Debug)] +struct SkipLast, Item> { + iter: iter::Fuse, + buffer: VecDeque, + count: usize, +} + +impl, Item> Iterator for SkipLast { + type Item = Item; + + fn next(&mut self) -> Option { + if self.count == 0 { + return self.iter.next(); + } + while self.buffer.len() != self.count { + self.buffer.push_front(self.iter.next()?); + } + let next = self.iter.next()?; + let res = self.buffer.pop_back()?; + self.buffer.push_front(next); + Some(res) + } +} + +#[cfg(test)] +mod skip_last_test { + use crate::apply::skip_last; + + #[test] + fn skip_last_test() { + let a = [1, 2, 3, 4, 5, 6, 7]; + + assert_eq!( + skip_last(a.iter().copied(), 0) + .collect::>() + .as_slice(), + &[1, 2, 3, 4, 5, 6, 7] + ); + assert_eq!( + skip_last(a.iter().copied(), 5) + .collect::>() + .as_slice(), + &[1, 2] + ); + assert_eq!( + skip_last(a.iter().copied(), 7) + .collect::>() + .as_slice(), + &[] + ); + } +} diff --git a/src/diff/tests.rs b/src/diff/tests.rs index 8fa425b..75b2772 100644 --- a/src/diff/tests.rs +++ b/src/diff/tests.rs @@ -343,7 +343,14 @@ macro_rules! assert_patch { ); let patched_all: Vec = crate::apply_all($old.as_bytes(), &bpatch, crate::ApplyOptions::new()).0; + let patched_fuzzy: Vec = crate::apply_all( + $old.as_bytes(), + &bpatch, + crate::ApplyOptions::new().with_max_fuzzy(1), + ) + .0; assert_eq!(patched_all, $new.as_bytes()); + assert_eq!(patched_fuzzy, $new.as_bytes()); }; ($old:ident, $new:ident, $expected:ident $(,)?) => { assert_patch!(DiffOptions::default(), $old, $new, $expected); From 9349417fa28e657e24d69d73c1900c8d95af840e Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Sun, 11 Jul 2021 19:43:55 +0900 Subject: [PATCH 3/9] implement default for ApplyOptions --- src/apply.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/apply.rs b/src/apply.rs index 4459c1d..36dc096 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -56,6 +56,12 @@ pub struct ApplyOptions { max_fuzzy: usize, } +impl Default for ApplyOptions { + fn default() -> Self { + ApplyOptions::new() + } +} + impl ApplyOptions { pub fn new() -> Self { ApplyOptions { max_fuzzy: 0 } From 0753f0b0ebd565753d740bb90b2ecb2fb8761a5d Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Sun, 11 Jul 2021 19:44:13 +0900 Subject: [PATCH 4/9] implement Debug for ApplyOptions --- src/apply.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/apply.rs b/src/apply.rs index 36dc096..63f78be 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -52,6 +52,7 @@ impl Clone for ImageLine<'_, T> { } } +#[derive(Debug)] pub struct ApplyOptions { max_fuzzy: usize, } From 1d18b258476e281de538f01ee34e07ee0bdaa7c3 Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Sun, 11 Jul 2021 19:54:23 +0900 Subject: [PATCH 5/9] make everything type specific --- src/apply.rs | 40 +++++++++++++++++++++++++++++----------- src/diff/tests.rs | 31 +++++++++++++++++++++---------- src/lib.rs | 2 +- 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/src/apply.rs b/src/apply.rs index 63f78be..8c872e8 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -1,6 +1,6 @@ use crate::{ patch::{Hunk, Line, Patch}, - utils::{LineIter, Text}, + utils::LineIter, }; use std::collections::VecDeque; use std::{fmt, iter}; @@ -141,17 +141,11 @@ pub fn apply_bytes(base_image: &[u8], patch: &Patch<'_, [u8]>) -> Result } /// Try applying all hunks a `Patch` to a base image -pub fn apply_all<'a, 'b, T, R, I>( - base_image: &'a T, - patch: &'a Patch<'_, T>, +pub fn apply_all_bytes( + base_image: &[u8], + patch: &Patch<'_, [u8]>, options: ApplyOptions, -) -> (R, Vec) -where - T: 'a + Text + ToOwned + ?Sized, - I: 'b + Copy, - &'a T: IntoIterator, - R: std::iter::FromIterator, -{ +) -> (Vec, Vec) { let mut image: Vec<_> = LineIter::new(base_image) .map(ImageLine::Unpatched) .collect(); @@ -174,6 +168,30 @@ where ) } +/// Try applying all hunks a `Patch` to a base image +pub fn apply_all( + base_image: &str, + patch: &Patch<'_, str>, + options: ApplyOptions, +) -> (String, Vec) { + let mut image: Vec<_> = LineIter::new(base_image) + .map(ImageLine::Unpatched) + .collect(); + + let mut failed_indices = Vec::new(); + + for (i, hunk) in patch.hunks().iter().enumerate() { + if let Some(_) = apply_hunk(&mut image, hunk, &options).err() { + failed_indices.push(i); + } + } + + ( + image.into_iter().map(ImageLine::into_inner).collect(), + failed_indices, + ) +} + fn apply_hunk<'a, T: PartialEq + ?Sized>( image: &mut Vec>, hunk: &Hunk<'a, T>, diff --git a/src/diff/tests.rs b/src/diff/tests.rs index 75b2772..22373c3 100644 --- a/src/diff/tests.rs +++ b/src/diff/tests.rs @@ -341,16 +341,27 @@ macro_rules! assert_patch { crate::apply_bytes($old.as_bytes(), &bpatch).unwrap(), $new.as_bytes() ); - let patched_all: Vec = - crate::apply_all($old.as_bytes(), &bpatch, crate::ApplyOptions::new()).0; - let patched_fuzzy: Vec = crate::apply_all( - $old.as_bytes(), - &bpatch, - crate::ApplyOptions::new().with_max_fuzzy(1), - ) - .0; - assert_eq!(patched_all, $new.as_bytes()); - assert_eq!(patched_fuzzy, $new.as_bytes()); + assert_eq!( + crate::apply_all_bytes($old.as_bytes(), &bpatch, crate::ApplyOptions::new()).0, + $new.as_bytes() + ); + assert_eq!( + crate::apply_all_bytes( + $old.as_bytes(), + &bpatch, + crate::ApplyOptions::new().with_max_fuzzy(1) + ) + .0, + $new.as_bytes() + ); + assert_eq!( + crate::apply_all($old, &patch, crate::ApplyOptions::new()).0, + $new + ); + assert_eq!( + crate::apply_all($old, &patch, crate::ApplyOptions::new().with_max_fuzzy(1)).0, + $new + ); }; ($old:ident, $new:ident, $expected:ident $(,)?) => { assert_patch!(DiffOptions::default(), $old, $new, $expected); diff --git a/src/lib.rs b/src/lib.rs index 906f6ca..2c8e70f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,7 +219,7 @@ mod patch; mod range; mod utils; -pub use apply::{apply, apply_all, apply_bytes, ApplyError, ApplyOptions}; +pub use apply::{apply, apply_all, apply_all_bytes, apply_bytes, ApplyError, ApplyOptions}; pub use diff::{create_patch, create_patch_bytes, DiffOptions}; pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions}; pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter}; From 67a1c80d1680e1a2ce873453d5463e43b994ecb1 Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Thu, 15 Jul 2021 15:57:51 +0900 Subject: [PATCH 6/9] fix: it may apply to failed position --- src/apply.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/apply.rs b/src/apply.rs index 8c872e8..f98ce4d 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -198,7 +198,11 @@ fn apply_hunk<'a, T: PartialEq + ?Sized>( options: &ApplyOptions, ) -> Result<(), ()> { // Find position - let (pos, fuzzy) = find_position(image, hunk, options.max_fuzzy).ok_or(())?; + + let max_fuzzy = pre_context_line_count(hunk.lines()) + .min(post_context_line_count(hunk.lines())) + .min(options.max_fuzzy); + let (pos, fuzzy) = find_position(image, hunk, max_fuzzy).ok_or(())?; let begin = pos + fuzzy; let end = pos + pre_image_line_count(hunk.lines()) @@ -242,6 +246,21 @@ fn find_position( None } +fn pre_context_line_count(lines: &[Line<'_, T>]) -> usize { + lines + .iter() + .take_while(|x| matches!(x, Line::Context(_))) + .count() +} + +fn post_context_line_count(lines: &[Line<'_, T>]) -> usize { + lines + .iter() + .rev() + .take_while(|x| matches!(x, Line::Context(_))) + .count() +} + fn pre_image_line_count(lines: &[Line<'_, T>]) -> usize { pre_image(lines).count() } From 18b1ebdbe074063fdbe4df686969a1bc40aeaa8d Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Wed, 4 Aug 2021 21:33:48 +0900 Subject: [PATCH 7/9] fix: hunks overlap (cherry picked from commit b367d53f88a3fc755281212ed2eae91385bc5b06) --- src/diff/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/mod.rs b/src/diff/mod.rs index a456c41..8a2ea8f 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -235,7 +235,7 @@ fn to_hunks<'a, T: ?Sized>( // Check to see if we can merge the hunks let start1_next = cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len); - if start1_next < end1 { + if start1_next <= end1 { // Context lines between hunks for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start) { From b1ca69e9ea87fcbb7f6d924720cb30b9eb255ec7 Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Tue, 14 Sep 2021 15:11:00 +0900 Subject: [PATCH 8/9] Revert "fix: hunks overlap" This reverts commit 18b1ebdbe074063fdbe4df686969a1bc40aeaa8d. --- src/diff/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/mod.rs b/src/diff/mod.rs index 8a2ea8f..a456c41 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -235,7 +235,7 @@ fn to_hunks<'a, T: ?Sized>( // Check to see if we can merge the hunks let start1_next = cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len); - if start1_next <= end1 { + if start1_next < end1 { // Context lines between hunks for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start) { From 44b79a97b08ab957dae59e63ab27d1ef62782dcc Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Tue, 14 Sep 2021 16:05:15 +0900 Subject: [PATCH 9/9] add test and fix --- src/apply.rs | 2 +- src/diff/tests.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/apply.rs b/src/apply.rs index f98ce4d..0431823 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -300,7 +300,7 @@ fn match_fragment( return false; } - pre_image(lines).eq(image.iter().map(ImageLine::inner)) + pre_image(&lines[fuzzy..len - fuzzy]).eq(image.iter().map(ImageLine::inner)) } #[derive(Debug)] diff --git a/src/diff/tests.rs b/src/diff/tests.rs index 22373c3..daee0e0 100644 --- a/src/diff/tests.rs +++ b/src/diff/tests.rs @@ -632,3 +632,30 @@ void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size "; assert_patch!(original, a, expected_diffy); } + +#[test] +fn fuzzy_patch() { + let diff = Patch::from_str( + "\ +--- original ++++ modified +@@ -1,6 +1,6 @@ + A + B +-C +-D ++E ++F + G + H +", + ) + .unwrap(); + let newer = "0\nB\nC\nD\nG\nH\n"; + let expected = "0\nB\nE\nF\nG\nH\n"; + println!("{}", diff); + assert_eq!( + crate::apply_all(newer, &diff, crate::ApplyOptions::new().with_max_fuzzy(2)), + (expected.to_owned(), vec![]), + ) +}