From 921d0e4d9474c1e837cb72097e0c7f777aa48da7 Mon Sep 17 00:00:00 2001 From: Matthew Craven Date: Wed, 10 Apr 2024 22:46:00 -0400 Subject: [PATCH] Fix several bugs around the 'byteString' family of Builders --- Data/ByteString/Builder/Internal.hs | 38 +++++++------- tests/Properties/ByteString.hs | 39 +++++++++------ tests/QuickCheckUtils.hs | 6 +++ .../builder/Data/ByteString/Builder/Tests.hs | 50 +++++++++++++++++-- 4 files changed, 94 insertions(+), 39 deletions(-) diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs index 109c762e2..083e39b4a 100644 --- a/Data/ByteString/Builder/Internal.hs +++ b/Data/ByteString/Builder/Internal.hs @@ -133,6 +133,7 @@ import Data.Semigroup (Semigroup(..)) import Data.List.NonEmpty (NonEmpty(..)) import qualified Data.ByteString as S +import qualified Data.ByteString.Unsafe as S import qualified Data.ByteString.Internal.Type as S import qualified Data.ByteString.Lazy.Internal as L import qualified Data.ByteString.Short.Internal as Sh @@ -796,24 +797,23 @@ ensureFree minFree = | ope `minusPtr` op < minFree = return $ bufferFull minFree op k | otherwise = k br --- | Copy the bytes from a 'BufferRange' into the output stream. -wrappedBytesCopyStep :: BufferRange -- ^ Input 'BufferRange'. +-- | Copy the bytes from a 'S.StrictByteString' into the output stream. +wrappedBytesCopyStep :: S.StrictByteString -- ^ Input 'S.StrictByteString'. -> BuildStep a -> BuildStep a -wrappedBytesCopyStep (BufferRange ip0 ipe) k = - go ip0 +wrappedBytesCopyStep bs0 k = + go bs0 where - go !ip (BufferRange op ope) + go !bs@(S.BS ifp inpRemaining) (BufferRange op ope) | inpRemaining <= outRemaining = do - copyBytes op ip inpRemaining + S.unsafeWithForeignPtr ifp $ \ip -> copyBytes op ip inpRemaining let !br' = BufferRange (op `plusPtr` inpRemaining) ope k br' | otherwise = do - copyBytes op ip outRemaining - let !ip' = ip `plusPtr` outRemaining - return $ bufferFull 1 ope (go ip') + S.unsafeWithForeignPtr ifp $ \ip -> copyBytes op ip outRemaining + let !bs' = S.unsafeDrop outRemaining bs + return $ bufferFull 1 ope (go bs') where outRemaining = ope `minusPtr` op - inpRemaining = ipe `minusPtr` ip -- Strict ByteStrings @@ -834,7 +834,7 @@ byteStringThreshold :: Int -> S.StrictByteString -> Builder byteStringThreshold maxCopySize = \bs -> builder $ step bs where - step bs@(S.BS _ len) !k br@(BufferRange !op _) + step bs@(S.BS _ len) k br@(BufferRange !op _) | len <= maxCopySize = byteStringCopyStep bs k br | otherwise = return $ insertChunk op bs k @@ -850,19 +850,17 @@ byteStringCopy = \bs -> builder $ byteStringCopyStep bs {-# INLINE byteStringCopyStep #-} byteStringCopyStep :: S.StrictByteString -> BuildStep a -> BuildStep a -byteStringCopyStep (S.BS ifp isize) !k0 br0@(BufferRange op ope) +byteStringCopyStep bs@(S.BS ifp isize) k br@(BufferRange op ope) -- Ensure that the common case is not recursive and therefore yields -- better code. - | op' <= ope = do copyBytes op ip isize - touchForeignPtr ifp - k0 (BufferRange op' ope) - | otherwise = wrappedBytesCopyStep (BufferRange ip ipe) k br0 + -- What's the reasoning here, more concretely? + | isize <= osize = do + S.unsafeWithForeignPtr ifp $ \ip -> copyBytes op ip isize + k (BufferRange op' ope) + | otherwise = wrappedBytesCopyStep bs k br where + osize = ope `minusPtr` op op' = op `plusPtr` isize - ip = unsafeForeignPtrToPtr ifp - ipe = ip `plusPtr` isize - k br = do touchForeignPtr ifp -- input consumed: OK to release here - k0 br -- | Construct a 'Builder' that always inserts the 'S.StrictByteString' -- directly as a chunk. diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs index cf1bc8634..7b36aaa37 100644 --- a/tests/Properties/ByteString.hs +++ b/tests/Properties/ByteString.hs @@ -367,7 +367,7 @@ tests = , testProperty "toChunks . fromChunks" $ \xs -> B.toChunks (B.fromChunks xs) === filter (/= mempty) xs , testProperty "append lazy" $ - \(toElem -> c) -> B.head (B.singleton c <> undefined) === c + \(toElem -> c) -> B.head (B.singleton c <> tooStrictErr) === c , testProperty "compareLength 1" $ \x -> B.compareLength x (B.length x) === EQ , testProperty "compareLength 2" $ @@ -379,13 +379,13 @@ tests = , testProperty "compareLength 5" $ \x (intToIndexTy -> n) -> B.compareLength x n === compare (B.length x) n , testProperty "dropEnd lazy" $ - \(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> undefined)) === B.singleton c + \(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> tooStrictErr)) === B.singleton c , testProperty "dropWhileEnd lazy" $ - \(toElem -> c) -> B.take 1 (B.dropWhileEnd (const False) (B.singleton c <> undefined)) === B.singleton c + \(toElem -> c) -> B.take 1 (B.dropWhileEnd (const False) (B.singleton c <> tooStrictErr)) === B.singleton c , testProperty "breakEnd lazy" $ - \(toElem -> c) -> B.take 1 (fst $ B.breakEnd (const True) (B.singleton c <> undefined)) === B.singleton c + \(toElem -> c) -> B.take 1 (fst $ B.breakEnd (const True) (B.singleton c <> tooStrictErr)) === B.singleton c , testProperty "spanEnd lazy" $ - \(toElem -> c) -> B.take 1 (fst $ B.spanEnd (const False) (B.singleton c <> undefined)) === B.singleton c + \(toElem -> c) -> B.take 1 (fst $ B.spanEnd (const False) (B.singleton c <> tooStrictErr)) === B.singleton c #endif , testProperty "length" $ @@ -604,12 +604,21 @@ tests = # ifdef BYTESTRING_LAZY -- Don't use (===) in these laziness tests: -- We don't want printing the test case to fail! - , testProperty "zip is lazy" $ lazyZipTest $ - \x y -> B.zip x y == zip (B.unpack x) (B.unpack y) - , testProperty "zipWith is lazy" $ \f -> lazyZipTest $ - \x y -> (B.zipWith f x y :: [Int]) == zipWith f (B.unpack x) (B.unpack y) - , testProperty "packZipWith is lazy" $ \f -> lazyZipTest $ - \x y -> B.unpack (B.packZipWith ((toElem .) . f) x y) == zipWith ((toElem .) . f) (B.unpack x) (B.unpack y) + , testProperty "zip is lazy in the longer input" $ zipLazyInLongerInputTest $ + \x y -> B.zip x y == zip (B.unpack x) (B.unpack y) + , testProperty "zipWith is lazy in the longer input" $ + \f -> zipLazyInLongerInputTest $ + \x y -> (B.zipWith f x y :: [Int]) == zipWith f (B.unpack x) (B.unpack y) + , testProperty "packZipWith is lazy in the longer input" $ + \f -> zipLazyInLongerInputTest $ + \x y -> B.unpack (B.packZipWith ((toElem .) . f) x y) == zipWith ((toElem .) . f) (B.unpack x) (B.unpack y) + , testProperty "zip is maximally lazy" $ \x y -> + zip (B.unpack x) (B.unpack y) `List.isPrefixOf` + B.zip (x <> tooStrictErr) (y <> tooStrictErr) + , testProperty "zipWith is maximally lazy" $ \f x y -> + zipWith f (B.unpack x) (B.unpack y) `List.isPrefixOf` + B.zipWith @Int f (x <> tooStrictErr) (y <> tooStrictErr) + -- (It's not clear if packZipWith is required to be maximally lazy.) # endif , testProperty "unzip" $ \(fmap (toElem *** toElem) -> xs) -> (B.unpack *** B.unpack) (B.unzip xs) === unzip xs @@ -806,15 +815,15 @@ readIntegerUnsigned xs = case readMaybe ys of #endif #ifdef BYTESTRING_LAZY -lazyZipTest +zipLazyInLongerInputTest :: Testable prop => (BYTESTRING_TYPE -> BYTESTRING_TYPE -> prop) -> BYTESTRING_TYPE -> BYTESTRING_TYPE -> Property -lazyZipTest fun = \x0 y0 -> let +zipLazyInLongerInputTest fun = \x0 y0 -> let msg = "Input chunks are: " ++ show (B.toChunks x0, B.toChunks y0) (x, y) | B.length x0 <= B.length y0 - = (x0, y0 <> error "too strict") + = (x0, y0 <> tooStrictErr) | otherwise - = (x0 <> error "too strict", y0) + = (x0 <> tooStrictErr, y0) in counterexample msg (fun x y) #endif diff --git a/tests/QuickCheckUtils.hs b/tests/QuickCheckUtils.hs index 7328aed5d..13e3ae0e2 100644 --- a/tests/QuickCheckUtils.hs +++ b/tests/QuickCheckUtils.hs @@ -7,6 +7,7 @@ module QuickCheckUtils , CByteString(..) , Sqrt(..) , int64OK + , tooStrictErr ) where import Test.Tasty.QuickCheck @@ -19,6 +20,7 @@ import Data.Int import System.IO import Foreign.C (CChar) import GHC.TypeLits (TypeError, ErrorMessage(..)) +import GHC.Stack (withFrozenCallStack, HasCallStack) import qualified Data.ByteString.Short as SB import qualified Data.ByteString as P @@ -134,3 +136,7 @@ instance {-# OVERLAPPING #-} -- defined in "QuickCheckUtils". int64OK :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property int64OK f = propertyForAllShrinkShow arbitrary shrink (\v -> [show v]) f + +tooStrictErr :: forall a. HasCallStack => a +tooStrictErr = withFrozenCallStack $ + error "A lazy sub-expression was unexpectedly evaluated" diff --git a/tests/builder/Data/ByteString/Builder/Tests.hs b/tests/builder/Data/ByteString/Builder/Tests.hs index a40c3fd2f..224f27531 100644 --- a/tests/builder/Data/ByteString/Builder/Tests.hs +++ b/tests/builder/Data/ByteString/Builder/Tests.hs @@ -18,7 +18,7 @@ import Control.Monad.Trans.State (StateT, evalStateT, evalState, put, import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.Writer (WriterT, execWriterT, tell) -import Foreign (minusPtr) +import Foreign (minusPtr, castPtr, ForeignPtr, withForeignPtr, Int64) import Data.Char (chr) import Data.Bits ((.|.), shiftL) @@ -40,7 +40,6 @@ import Data.ByteString.Builder.Prim.TestUtils import Control.Exception (evaluate) import System.IO (openTempFile, hPutStr, hClose, hSetBinaryMode, hSetEncoding, utf8, hSetNewlineMode, noNewlineTranslation) -import Foreign (ForeignPtr, withForeignPtr, castPtr) import Foreign.C.String (withCString) import Numeric (showFFloat) import System.Posix.Internals (c_unlink) @@ -50,7 +49,8 @@ import Test.Tasty.QuickCheck ( Arbitrary(..), oneof, choose, listOf, elements , counterexample, ioProperty, Property, testProperty , (===), (.&&.), conjoin, forAll, forAllShrink - , UnicodeString(..), NonNegative(..) + , UnicodeString(..), NonNegative(..), Positive(..) + , mapSize, (==>) ) import QuickCheckUtils @@ -70,7 +70,8 @@ tests = testsASCII ++ testsFloating ++ testsChar8 ++ - testsUtf8 + testsUtf8 ++ + [testLaziness] ------------------------------------------------------------------------------ @@ -981,3 +982,44 @@ testsUtf8 = [ testBuilderConstr "charUtf8" charUtf8_list charUtf8 , testBuilderConstr "stringUtf8" (foldMap charUtf8_list) stringUtf8 ] + +testLaziness :: TestTree +testLaziness = testGroup "Builder laziness" + [ testProperty "byteString" $ mapSize (+ 10) $ + \bs (Positive chunkSize) -> + let strategy = safeStrategy chunkSize chunkSize + lbs = toLazyByteStringWith strategy L.empty + (byteString bs <> tooStrictErr) + in (S.length bs > max chunkSize 8) ==> L.head lbs == S.head bs + , testProperty "byteStringCopy" $ mapSize (+ 10) $ + \bs (Positive chunkSize) -> + let strategy = safeStrategy chunkSize chunkSize + lbs = toLazyByteStringWith strategy L.empty + (byteStringCopy bs <> tooStrictErr) + in (S.length bs > max chunkSize 8) ==> L.head lbs == S.head bs + , testProperty "byteStringInsert" $ mapSize (+ 10) $ + \bs (Positive chunkSize) -> + let strategy = safeStrategy chunkSize chunkSize + lbs = toLazyByteStringWith strategy L.empty + (byteStringInsert bs <> tooStrictErr) + in L.take (fromIntegral @Int @Int64 (S.length bs)) lbs + == L.fromStrict bs + , testProperty "lazyByteString" $ mapSize (+ 10) $ + \bs (Positive chunkSize) -> + let strategy = safeStrategy chunkSize chunkSize + lbs = toLazyByteStringWith strategy L.empty + (lazyByteString bs <> tooStrictErr) + in (L.length bs > fromIntegral @Int @Int64 (max chunkSize 8)) + ==> L.head lbs == L.head bs + , testProperty "shortByteString" $ mapSize (+ 10) $ + \bs (Positive chunkSize) -> + let strategy = safeStrategy chunkSize chunkSize + lbs = toLazyByteStringWith strategy L.empty + (shortByteString bs <> tooStrictErr) + in (Sh.length bs > max chunkSize 8) ==> L.head lbs == Sh.head bs + , testProperty "flush" $ \recipe -> let + !(b, toLBS) = recipeComponents recipe + !lbs1 = toLazyByteString b + !lbs2 = L.take (L.length lbs1) (toLBS $ b <> flush <> tooStrictErr) + in lbs1 == lbs2 + ]