From 96880aa5050c434b54626b73cc6837a9bb250654 Mon Sep 17 00:00:00 2001 From: Viktor Dukhovni Date: Thu, 12 Jan 2023 21:59:41 -0500 Subject: [PATCH] Avoid per-byte loop in cstring{,Utf8} builders Copy chunks of the input to the output buffer with 'memcpy', up to the shorter of the available buffer space and the "null-free" portion of the remaining string. For the UTF8 version, encoded NUL bytes are located via strstr(3). --- Data/ByteString/Builder/Prim.hs | 116 ++++++++++++------ Data/ByteString/Internal.hs | 1 + Data/ByteString/Internal/Type.hs | 4 + .../Data/ByteString/Builder/Prim/Tests.hs | 7 +- 4 files changed, 87 insertions(+), 41 deletions(-) diff --git a/Data/ByteString/Builder/Prim.hs b/Data/ByteString/Builder/Prim.hs index e08e69e03..894347869 100644 --- a/Data/ByteString/Builder/Prim.hs +++ b/Data/ByteString/Builder/Prim.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} +{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} {-# LANGUAGE MagicHash, UnboxedTuples, PatternGuards #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} {-# LANGUAGE Trustworthy #-} @@ -469,6 +469,7 @@ import Data.ByteString.Builder.Prim.ASCII import Foreign import Foreign.C.Types import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) +import GHC.Int (Int (..)) import GHC.Word (Word8 (..)) import GHC.Exts import GHC.IO @@ -672,50 +673,87 @@ primMapLazyByteStringBounded w = -- -- @since 0.11.0.0 cstring :: Addr# -> Builder -cstring = - \addr0 -> builder $ step addr0 - where - step :: Addr# -> BuildStep r -> BuildStep r - step !addr !k br@(BufferRange op0@(Ptr op0#) ope) - | W8# ch == 0 = k br - | op0 == ope = - return $ bufferFull 1 op0 (step addr k) - | otherwise = do - IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of - s' -> (# s', () #) - let br' = BufferRange (op0 `plusPtr` 1) ope - step (addr `plusAddr#` 1#) k br' - where - !ch = indexWord8OffAddr# addr 0# +cstring = \addr0 -> builder $ \k br -> do +#if __GLASGOW_HASKELL__ >= 811 + let len = cstringLength# addr0 +#else + (I# len) <- fromIntegral <$> c_strlen (Ptr addr0) +#endif + cstring_step addr0 len k br +{-# INLINE cstring #-} + +cstring_step :: Addr# -> Int# -> BuildStep r -> BuildStep r +cstring_step !addr !len !k br@(BufferRange op0 ope) + -- String is empty, process the continuation + | (I# len) == 0 = k br + -- Buffer is full, allocate some more... We ask for just one more + -- byte, but the builder allocation strategy will in practice give + -- us more space, which we'll consume in a single step. + | op0 == ope = + return $ bufferFull 1 op0 (cstring_step addr len k) + -- Copy as much of the string as fits into the available buffer space. + -- If the string is long enough, we may have asked for less than its + -- full length, filling the buffer with the rest will go into the next + -- builder step. + | otherwise = do + let !avail@(I# avail#) = min (I# len) (ope `minusPtr` op0) + br' = BufferRange (op0 `plusPtr` avail) ope + addr' = addr `plusAddr#` avail# + len' = len -# avail# + S.memcpy op0 (Ptr addr) avail + cstring_step addr' len' k br' -- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'. -- Null characters can be encoded as @0xc0 0x80@. -- -- @since 0.11.0.0 cstringUtf8 :: Addr# -> Builder -cstringUtf8 = - \addr0 -> builder $ step addr0 - where - step :: Addr# -> BuildStep r -> BuildStep r - step !addr !k br@(BufferRange op0@(Ptr op0#) ope) - | W8# ch == 0 = k br - | op0 == ope = - return $ bufferFull 1 op0 (step addr k) - -- NULL is encoded as 0xc0 0x80 - | W8# ch == 0xc0 - , W8# (indexWord8OffAddr# addr 1#) == 0x80 = do - let !(W8# nullByte#) = 0 - IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of - s' -> (# s', () #) - let br' = BufferRange (op0 `plusPtr` 1) ope - step (addr `plusAddr#` 2#) k br' - | otherwise = do - IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of - s' -> (# s', () #) - let br' = BufferRange (op0 `plusPtr` 1) ope - step (addr `plusAddr#` 1#) k br' - where - !ch = indexWord8OffAddr# addr 0# +cstringUtf8 = \addr0 -> builder $ \k br -> do +#if __GLASGOW_HASKELL__ >= 811 + let len = cstringLength# addr0 +#else + (I# len) <- fromIntegral <$> c_strlen (Ptr addr0) +#endif + nullAt <- S.c_strstr (Ptr addr0) (Ptr "\xc0\x80"#) + cstringUtf8_step addr0 len nullAt k br +{-# INLINE cstringUtf8 #-} + +cstringUtf8_step :: Addr# -> Int# -> Ptr Word8 -> BuildStep r -> BuildStep r +cstringUtf8_step !addr !len !nullAt !k br@(BufferRange op0@(Ptr op0#) ope) + -- String is empty, process the continuation + | (I# len) == 0 = k br + -- Contains no encoded nulls, use simpler 'cstring' code + | nullPtr == nullAt = + cstring_step addr len k br + -- Buffer is full, allocate some more... We ask for just one more + -- byte, but the builder allocation strategy will in practice give + -- us more space, which we'll consume in a single step. + | op0 == ope = + return $ bufferFull 1 op0 (cstringUtf8_step addr len nullAt k) + -- We're at the encoded null-byte, append a '\0' to the buffer and + -- continue with the rest of the input string, after locating the + -- next encoded null-byte, if any. + | (Ptr addr) == nullAt = do + let !(W8# nullByte#) = 0 + IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of + s' -> (# s', () #) + let br' = BufferRange (op0 `plusPtr` 1) ope + addr' = addr `plusAddr#` 2# + len' = len -# 2# + nullAt' <- S.c_strstr (Ptr addr') (Ptr "\xc0\x80"#) + cstringUtf8_step addr' len' nullAt' k br' + -- Copy as much of the null-free portion of the string as fits into the + -- available buffer space. If the string is long enough, we may have asked + -- for less than its full length, filling the buffer with the rest will go + -- into the next builder step. + | otherwise = do + let !nullFree = nullAt `minusPtr` (Ptr addr) + !avail@(I# avail#) = min nullFree (ope `minusPtr` op0) + br' = BufferRange (op0 `plusPtr` avail) ope + addr' = addr `plusAddr#` avail# + len' = len -# avail# + S.memcpy op0 (Ptr addr) avail + cstringUtf8_step addr' len' nullAt k br' ------------------------------------------------------------------------------ -- Char8 encoding diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs index b4481a833..a0fd23d55 100644 --- a/Data/ByteString/Internal.hs +++ b/Data/ByteString/Internal.hs @@ -65,6 +65,7 @@ module Data.ByteString.Internal ( -- * Standard C Functions c_strlen, + c_strstr, c_free_finalizer, memchr, diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs index 24596c4f6..eb1d2b316 100644 --- a/Data/ByteString/Internal/Type.hs +++ b/Data/ByteString/Internal/Type.hs @@ -82,6 +82,7 @@ module Data.ByteString.Internal.Type ( -- * Standard C Functions c_strlen, + c_strstr, c_free_finalizer, memchr, @@ -1001,6 +1002,9 @@ accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r foreign import ccall unsafe "string.h strlen" c_strlen :: CString -> IO CSize +foreign import ccall unsafe "string.h strstr" c_strstr + :: CString -> CString -> IO (Ptr Word8) + foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer :: FunPtr (Ptr Word8 -> IO ()) diff --git a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs index 230882335..9d499b80d 100644 --- a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs +++ b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs @@ -33,8 +33,11 @@ testCString = testProperty "cstring" $ testCStringUtf8 :: TestTree testCStringUtf8 = testProperty "cstringUtf8" $ - toLazyByteString (BP.cstringUtf8 "hello\xc0\x80world!"#) == - LC.pack "hello" `L.append` L.singleton 0x00 `L.append` LC.pack "world!" + toLazyByteString (BP.cstringUtf8 "hello\xc0\x80world\xc0\x80!"#) == + LC.pack "hello" `L.append` L.singleton 0x00 + `L.append` LC.pack "world" + `L.append` L.singleton 0x00 + `L.append` LC.singleton '!' ------------------------------------------------------------------------------ -- Binary