Skip to content

Commit

Permalink
Check the no-empty-chunks invariant in CI (#565)
Browse files Browse the repository at this point in the history
* Thoroughly check the no-empty-chunks invariant in CI

Fixes #564.

* Avoid even temporary empty chunks in lazy foldl1

* Fiddle with formatting and documentation

This also removes deriving Typeable, which does nothing since ghc-7.10.

* Re-insert reference to invariant-checking functions
  • Loading branch information
clyring authored Feb 8, 2023
1 parent 50b07d9 commit 0bd68ca
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ jobs:
dist-newstyle
key: ${{ runner.os }}-latest
- name: Test
run: cabal test --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts'
run: cabal test --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts -DHS_BYTESTRING_ASSERTIONS'

old-gcc:
needs: build
Expand Down
4 changes: 1 addition & 3 deletions Data/ByteString/Internal/Type.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-}
{-# LANGUAGE UnliftedFFITypes, MagicHash,
UnboxedTuples, DeriveDataTypeable #-}
UnboxedTuples #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
Expand Down Expand Up @@ -135,7 +135,6 @@ import Data.Bits ((.&.))
import Data.Char (ord)
import Data.Word

import Data.Typeable (Typeable)
import Data.Data (Data(..), mkNoRepType)

import GHC.Base (nullAddr#,realWorld#,unsafeChr)
Expand Down Expand Up @@ -242,7 +241,6 @@ pokeFpByteOff fp off val = unsafeWithForeignPtr fp $ \p ->
data ByteString = BS {-# UNPACK #-} !(ForeignPtr Word8) -- payload
{-# UNPACK #-} !Int -- length
-- ^ @since 0.11.0.0
deriving (Typeable)

-- | Type synonym for the strict flavour of 'ByteString'.
--
Expand Down
14 changes: 12 additions & 2 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -495,12 +495,22 @@ foldr' f a = go
-- argument, and thus must be applied to non-empty 'ByteString's.
foldl1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldl1 _ Empty = errorEmptyList "foldl1"
foldl1 f (Chunk c cs) = foldl f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
foldl1 f (Chunk c cs) = go (S.unsafeHead c) (S.unsafeTail c) cs
where
go v x xs = let v' = S.foldl f v x
in case xs of
Empty -> v'
Chunk x' xs' -> go v' x' xs'

-- | 'foldl1'' is like 'foldl1', but strict in the accumulator.
foldl1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8
foldl1' _ Empty = errorEmptyList "foldl1'"
foldl1' f (Chunk c cs) = foldl' f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
foldl1' f (Chunk c cs) = go (S.unsafeHead c) (S.unsafeTail c) cs
where
go !v x xs = let v' = S.foldl' f v x
in case xs of
Empty -> v'
Chunk x' xs' -> go v' x' xs'

-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
-- and thus must be applied to non-empty 'ByteString's
Expand Down
51 changes: 42 additions & 9 deletions Data/ByteString/Lazy/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveLift #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE Unsafe #-}

#ifdef HS_BYTESTRING_ASSERTIONS
{-# LANGUAGE PatternSynonyms #-}
#endif

{-# OPTIONS_HADDOCK not-home #-}

-- |
Expand All @@ -24,7 +29,7 @@
module Data.ByteString.Lazy.Internal (

-- * The lazy @ByteString@ type and representation
ByteString(..),
ByteString(Empty, Chunk),
LazyByteString,
chunk,
foldrChunks,
Expand Down Expand Up @@ -64,23 +69,45 @@ import Control.DeepSeq (NFData, rnf)

import Data.String (IsString(..))

import Data.Typeable (Typeable)
import Data.Data (Data(..), mkNoRepType)

import GHC.Exts (IsList(..))

import qualified Language.Haskell.TH.Syntax as TH

#ifdef HS_BYTESTRING_ASSERTIONS
import Control.Exception (assert)
#endif


-- | A space-efficient representation of a 'Word8' vector, supporting many
-- efficient operations.
--
-- A lazy 'ByteString' contains 8-bit bytes, or by using the operations
-- from "Data.ByteString.Lazy.Char8" it can be interpreted as containing
-- 8-bit characters.
--
data ByteString = Empty | Chunk {-# UNPACK #-} !S.ByteString ByteString
deriving (Typeable, TH.Lift)
-- See 'invariant' function later in this module for internal invariants.
#ifndef HS_BYTESTRING_ASSERTIONS
data ByteString = Empty | Chunk {-# UNPACK #-} !S.ByteString ByteString
-- INVARIANT: The S.ByteString field of any Chunk is not empty.
-- (See also the 'invariant' and 'checkInvariant' functions.)

-- To make testing of this invariant convenient, we add an
-- assertion to that effect when the HS_BYTESTRING_ASSERTIONS
-- preprocessor macro is defined, by renaming the actual constructor
-- and providing a pattern synonym that does the checking:
#else
data ByteString = Empty | Chunk_ {-# UNPACK #-} !S.ByteString ByteString

pattern Chunk :: S.ByteString -> ByteString -> ByteString
pattern Chunk c cs <- Chunk_ c cs where
Chunk c@(S.BS _ len) cs = assert (len > 0) Chunk_ c cs

{-# COMPLETE Empty, Chunk #-}
#endif

deriving instance TH.Lift ByteString


-- | Type synonym for the lazy flavour of 'ByteString'.
--
Expand Down Expand Up @@ -158,15 +185,21 @@ unpackChars (Chunk c cs) = S.unpackAppendCharsLazy c (unpackChars cs)

------------------------------------------------------------------------

-- We no longer use these invariant-checking functions internally,
-- preferring an assertion on `Chunk` itself, controlled by the
-- HS_BYTESTRING_ASSERTIONS preprocessor macro.

-- | The data type invariant:
-- Every ByteString is either 'Empty' or consists of non-null 'S.ByteString's.
-- All functions must preserve this, and the QC properties must check this.
-- Every ByteString is either 'Empty' or consists of non-null
-- 'S.StrictByteString's. All functions must preserve this.
--
invariant :: ByteString -> Bool
invariant Empty = True
invariant (Chunk (S.BS _ len) cs) = len > 0 && invariant cs

-- | In a form that checks the invariant lazily.
-- | Lazily checks that the given 'ByteString' satisfies the data type's
-- "no empty chunks" invariant, raising an exception in place of the
-- first chunk that does not satisfy the invariant.
checkInvariant :: ByteString -> ByteString
checkInvariant Empty = Empty
checkInvariant (Chunk c@(S.BS _ len) cs)
Expand Down
4 changes: 0 additions & 4 deletions tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ import GHC.IO.Encoding
module Properties.ByteStringLazy (tests) where
#define BYTESTRING_TYPE B.ByteString
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B (invariant)
#endif

#else
Expand All @@ -55,7 +54,6 @@ import qualified Data.ByteString.Char8 as B
#else
module Properties.ByteStringLazyChar8 (tests) where
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.ByteString.Lazy.Internal as B (invariant)
#define BYTESTRING_TYPE B.ByteString
#endif

Expand Down Expand Up @@ -353,8 +351,6 @@ tests =
\f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))

#ifdef BYTESTRING_LAZY
, testProperty "invariant" $
\x -> B.invariant x
, testProperty "fromChunks . toChunks" $
\x -> B.fromChunks (B.toChunks x) === x
, testProperty "toChunks . fromChunks" $
Expand Down
4 changes: 1 addition & 3 deletions tests/QuickCheckUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import Foreign.C (CChar)
import qualified Data.ByteString.Short as SB
import qualified Data.ByteString as P
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as L (checkInvariant,ByteString(..))

import qualified Data.ByteString.Char8 as PC
import qualified Data.ByteString.Lazy.Char8 as LC
Expand All @@ -46,8 +45,7 @@ instance Arbitrary L.ByteString where
arbitrary = sized $ \n -> do numChunks <- choose (0, n)
if numChunks == 0
then return L.empty
else fmap (L.checkInvariant .
L.fromChunks .
else fmap (L.fromChunks .
filter (not . P.null)) $
vectorOf numChunks
(sizedByteString
Expand Down

0 comments on commit 0bd68ca

Please sign in to comment.