From 90b8197bf0b4d1864ce53b05c05af287dd03c411 Mon Sep 17 00:00:00 2001 From: Matthew Craven Date: Sat, 30 Mar 2024 23:27:35 -0400 Subject: [PATCH 1/2] Make D.B.Lazy.zipWith properly lazy As a bonus, the new code is easier to read and doesn't trigger a spurious incomplete-pattern-match warning. (Or finding the bug can be seen as a bonus for cleaning up that messy code.) Fixes #667. --- Data/ByteString/Lazy.hs | 25 ++++++++++++++----------- tests/Properties/ByteString.hs | 25 +++++++++++++++++++++++++ tests/QuickCheckUtils.hs | 2 ++ 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index 9d4de3484..10da78d35 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -1,4 +1,3 @@ -{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} {-# OPTIONS_HADDOCK prune #-} {-# LANGUAGE Trustworthy #-} @@ -1095,6 +1094,7 @@ splitWith p (Chunk c0 cs0) = comb [] (S.splitWith p c0) cs0 comb acc [s] Empty = [revChunks (s:acc)] comb acc [s] (Chunk c cs) = comb (s:acc) (S.splitWith p c) cs comb acc (s:ss) cs = revChunks (s:acc) : comb [] ss cs + comb _ [] _ = error "Strict splitWith returned [] for nonempty input" {-# INLINE splitWith #-} -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte @@ -1122,6 +1122,7 @@ split w (Chunk c0 cs0) = comb [] (S.split w c0) cs0 comb acc [s] Empty = [revChunks (s:acc)] comb acc [s] (Chunk c cs) = comb (s:acc) (S.split w c) cs comb acc (s:ss) cs = revChunks (s:acc) : comb [] ss cs + comb _ [] _ = error "Strict split returned [] for nonempty input" {-# INLINE split #-} -- | The 'group' function takes a ByteString and returns a list of @@ -1441,16 +1442,18 @@ zipWith _ Empty _ = [] zipWith _ _ Empty = [] zipWith f (Chunk a as) (Chunk b bs) = go a as b bs where - go x xs y ys = f (S.unsafeHead x) (S.unsafeHead y) - : to (S.unsafeTail x) xs (S.unsafeTail y) ys - - to x Empty _ _ | S.null x = [] - to _ _ y Empty | S.null y = [] - to x xs y ys | not (S.null x) - && not (S.null y) = go x xs y ys - to x xs _ (Chunk y' ys) | not (S.null x) = go x xs y' ys - to _ (Chunk x' xs) y ys | not (S.null y) = go x' xs y ys - to _ (Chunk x' xs) _ (Chunk y' ys) = go x' xs y' ys + -- This loop is written in a slightly awkward way but ensures we + -- don't have to allocate any 'Chunk' objects to pass to a recursive + -- call. We have in some sense performed SpecConstr manually. + go !x xs !y ys + = f (S.unsafeHead x) (S.unsafeHead y) + : to (S.unsafeTail x) xs (S.unsafeTail y) ys + + to !x xs !y ys + | Chunk x' xs' <- chunk x xs + , Chunk y' ys' <- chunk y ys + = go x' xs' y' ys' + | otherwise = [] -- | A specialised version of `zipWith` for the common case of a -- simultaneous map over two ByteStrings, to build a 3rd. diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs index fec5d7f0a..b8d13e6ea 100644 --- a/tests/Properties/ByteString.hs +++ b/tests/Properties/ByteString.hs @@ -4,6 +4,7 @@ -- License : BSD-style {-# LANGUAGE CPP #-} + {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -603,6 +604,16 @@ tests = \f x y -> (B.zipWith f x y :: [Int]) === zipWith f (B.unpack x) (B.unpack y) , testProperty "packZipWith" $ \f x y -> B.unpack (B.packZipWith ((toElem .) . f) x y) === zipWith ((toElem .) . f) (B.unpack x) (B.unpack y) +# ifdef BYTESTRING_LAZY + -- Don't use (===) in these laziness tests: + -- We don't want printing the test case to fail! + , testProperty "zip is lazy" $ lazyZipTest $ + \x y -> B.zip x y == zip (B.unpack x) (B.unpack y) + , testProperty "zipWith is lazy" $ \f -> lazyZipTest $ + \x y -> (B.zipWith f x y :: [Int]) == zipWith f (B.unpack x) (B.unpack y) + , testProperty "packZipWith is lazy" $ \f -> lazyZipTest $ + \x y -> B.unpack (B.packZipWith ((toElem .) . f) x y) == zipWith ((toElem .) . f) (B.unpack x) (B.unpack y) +# endif , testProperty "unzip" $ \(fmap (toElem *** toElem) -> xs) -> (B.unpack *** B.unpack) (B.unzip xs) === unzip xs #endif @@ -796,3 +807,17 @@ readIntegerUnsigned xs = case readMaybe ys of where (ys, zs) = span isDigit xs #endif + +#ifdef BYTESTRING_LAZY +lazyZipTest + :: Testable prop + => (BYTESTRING_TYPE -> BYTESTRING_TYPE -> prop) + -> BYTESTRING_TYPE -> BYTESTRING_TYPE -> Property +lazyZipTest fun = \x0 y0 -> let + msg = "Input chunks are: " ++ show (B.toChunks x0, B.toChunks y0) + (x, y) | B.length x0 <= B.length y0 + = (x0, y0 <> error "too strict") + | otherwise + = (x0 <> error "too strict", y0) + in counterexample msg (fun x y) +#endif diff --git a/tests/QuickCheckUtils.hs b/tests/QuickCheckUtils.hs index 64bb1d59a..80e7ee64b 100644 --- a/tests/QuickCheckUtils.hs +++ b/tests/QuickCheckUtils.hs @@ -56,6 +56,8 @@ instance Arbitrary L.ByteString where (sizedByteString (n `div` numChunks)) + shrink = map L.fromChunks . shrink . L.toChunks + instance CoArbitrary L.ByteString where coarbitrary s = coarbitrary (L.unpack s) From c8e18d672ad402813f8781c4acf4763b713494bf Mon Sep 17 00:00:00 2001 From: Matthew Craven Date: Tue, 2 Apr 2024 16:02:51 -0400 Subject: [PATCH 2/2] Lazy.zipWith: evaluate the 'unsafeHead' calls eagerly --- Data/ByteString/Lazy.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index 10da78d35..53577b327 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -1445,9 +1445,13 @@ zipWith f (Chunk a as) (Chunk b bs) = go a as b bs -- This loop is written in a slightly awkward way but ensures we -- don't have to allocate any 'Chunk' objects to pass to a recursive -- call. We have in some sense performed SpecConstr manually. - go !x xs !y ys - = f (S.unsafeHead x) (S.unsafeHead y) - : to (S.unsafeTail x) xs (S.unsafeTail y) ys + go !x xs !y ys = let + -- Creating a thunk for reading one byte would + -- be wasteful, so we evaluate these eagerly. + -- See also #558 for a similar issue with uncons. + !xHead = S.unsafeHead x + !yHead = S.unsafeHead y + in f xHead yHead : to (S.unsafeTail x) xs (S.unsafeTail y) ys to !x xs !y ys | Chunk x' xs' <- chunk x xs