diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 2d8b337c5..071b9b0d9 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -1816,31 +1816,48 @@ hGetLine h = else ioe_EOF else haveBuf h_ buf' len xss - haveBuf h_@Handle__{haByteBuffer} + haveBuf h_@Handle__{haByteBuffer, haInputNL} buf@Buffer{ bufRaw=raw, bufR=w, bufL=r } len xss = do - off <- findEOL r w raw + (off, sizeNewline) <- findEOL haInputNL r w raw let new_len = len + off - r xs <- mkPS raw r off -- if eol == True, then off is the offset of the '\n' -- otherwise off == w and the buffer is now empty. if off /= w - then do if w == off + 1 - then writeIORef haByteBuffer buf{ bufL=0, bufR=0 } - else writeIORef haByteBuffer buf{ bufL = off + 1 } - mkBigPS new_len (xs:xss) + then do + -- If off + sizeNewline == w then the remaining buffer is empty + if (off + sizeNewline) == w + then writeIORef haByteBuffer buf{ bufL=0, bufR=0 } + else writeIORef haByteBuffer buf{ bufL = off + sizeNewline } + mkBigPS new_len (xs:xss) else fill h_ buf{ bufL=0, bufR=0 } new_len (xs:xss) -- find the end-of-line character, if there is one - findEOL r w raw - | r == w = return w + findEOL haInputNL r w raw + | r == w = return (w, 0) | otherwise = do c <- readWord8Buf raw r if c == fromIntegral (ord '\n') - then return r -- NB. not r+1: don't include the '\n' - else findEOL (r+1) w raw + then do + -- NB. not r+1: don't include the '\n' + -- Also, it is important that it ends the line in both modes + -- To match System.IO.hGetLine's behavior + return (r, 1) + else if haInputNL == CRLF && c == fromIntegral (ord '\r') && r+1 < w + then do + c' <- readWord8Buf raw (r+1) + if c' == fromIntegral (ord '\n') + then return (r, 2) -- NB. not r+1 or r+2: don't include the '\r\n' + else do + -- We cannot jump 2 characters ahead + -- because if we encountered '\r\r\n' + -- We would miss the pattern starting on the second '\r' + findEOL haInputNL (r+1) w raw + else findEOL haInputNL (r+1) w raw + mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString mkPS buf start end = diff --git a/tests/Properties.hs b/tests/Properties.hs index 5d954cdeb..bb41e8899 100644 --- a/tests/Properties.hs +++ b/tests/Properties.hs @@ -1614,6 +1614,27 @@ prop_read_write_file_D x = unsafePerformIO $ do (const $ do y <- D.readFile f return (x==y)) +prop_hgetline_like_s8_hgetline (LinedASCII filetext) (crlfIn, crlfOut) = idempotentIOProperty $ do + (fn, h) <- openTempFile "." "hgetline-prop-test.tmp" + hSetNewlineMode h noNewlineTranslation -- This is to ensure strings like \n are covered on Windows. + hPutStr h filetext + hClose h + bsLines <- readFileByLines C.hGetLine fn + sLines <- readFileByLines System.IO.hGetLine fn + removeFile fn + return (map C.unpack bsLines === sLines) + where + newlineMode = NewlineMode (if crlfIn then CRLF else LF) (if crlfOut then CRLF else LF) + readFileByLines getLine fn = withFile fn ReadMode $ \h -> do + hSetNewlineMode h newlineMode + readByLines getLine h + readByLines getLine h = do + isEnd <- hIsEOF h + if isEnd + then return [] + else (:) <$> getLine h <*> readByLines getLine h + + ------------------------------------------------------------------------ prop_append_file_P x y = unsafePerformIO $ do @@ -1791,7 +1812,8 @@ io_tests = , testProperty "appendFile " prop_append_file_D , testProperty "packAddress " prop_packAddress - + + , testProperty "pack.hGetLine=hGetLine" prop_hgetline_like_s8_hgetline ] misc_tests = diff --git a/tests/QuickCheckUtils.hs b/tests/QuickCheckUtils.hs index a5ff8ccfc..22d710a3f 100644 --- a/tests/QuickCheckUtils.hs +++ b/tests/QuickCheckUtils.hs @@ -93,6 +93,22 @@ instance Arbitrary String8 where toChar :: Word8 -> Char toChar = toEnum . fromIntegral +-- | Strings, but each char is ASCII and there are a lot of newlines generated +-- +newtype LinedASCII = LinedASCII String + deriving (Eq, Ord, Show) + +instance Arbitrary LinedASCII where + arbitrary = fmap LinedASCII . listOf . oneof $ + [ arbitraryASCIIChar + , elements ['\n', '\r'] + ] + + shrink (LinedASCII s) = fmap LinedASCII (shrink s) + +instance CoArbitrary LinedASCII where + coarbitrary (LinedASCII s) = coarbitrary s + ------------------------------------------------------------------------ -- -- We're doing two forms of testing here. Firstly, model based testing.