Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Text.IO.Utf8 module #503

Merged
merged 10 commits into from
Mar 11, 2023
Merged
5 changes: 4 additions & 1 deletion src/Data/Text/IO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
-- The functions in this module obey the runtime system's locale,
-- character set encoding, and line ending conversion settings.
--
-- If you want to do I\/O using the UTF-8 encoding, use @Data.Text.IO.Utf8@,
-- which is faster than this module.
--
-- If you know in advance that you will be working with data that has
-- a specific encoding (e.g. UTF-8), and your application is highly
-- a specific encoding, and your application is highly
-- performance sensitive, you may find that it is faster to perform
-- I\/O with bytestrings and to encode and decode yourself than to use
-- the functions in this module.
Expand Down
93 changes: 93 additions & 0 deletions src/Data/Text/IO/Utf8.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
-- |
-- Module : Data.Text.IO.Utf8
-- License : BSD-style
-- Portability : GHC
--
-- Efficient UTF-8 support for text I\/O.
-- Unlike @Data.Text.IO@, these functions do not depend on the locale
-- and do not do line ending conversion.
module Data.Text.IO.Utf8
(
-- * File-at-a-time operations
readFile
, writeFile
, appendFile
-- * Operations on handles
, hGetContents
, hGetLine
, hPutStr
, hPutStrLn
-- * Special cases for standard input and output
, interact
, getContents
, getLine
, putStr
, putStrLn
) where

import Prelude hiding (readFile, writeFile, appendFile, interact, getContents, getLine, putStr, putStrLn)
import Control.Exception (evaluate)
import Control.Monad ((<=<))
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import GHC.IO.Handle (Handle)
import qualified Data.ByteString.Char8 as B.Char8

decodeUtf8IO :: ByteString -> IO Text
decodeUtf8IO = evaluate . decodeUtf8
oberblastmeister marked this conversation as resolved.
Show resolved Hide resolved

-- | The 'readFile' function reads a file and returns the contents of
-- the file as a string. The entire file is read strictly, as with
oberblastmeister marked this conversation as resolved.
Show resolved Hide resolved
-- 'getContents'.
readFile :: FilePath -> IO Text
readFile = decodeUtf8IO <=< B.readFile
oberblastmeister marked this conversation as resolved.
Show resolved Hide resolved

-- | Write a string to a file. The file is truncated to zero length
-- before writing begins.
writeFile :: FilePath -> Text -> IO ()
writeFile fp = B.writeFile fp . encodeUtf8

-- | Write a string to the end of a file.
appendFile :: FilePath -> Text -> IO ()
appendFile fp = B.appendFile fp . encodeUtf8

-- | Read the remaining contents of a 'Handle' as a string.
hGetContents :: Handle -> IO Text
hGetContents = decodeUtf8IO <=< B.hGetContents

-- | Read a single line from a handle.
hGetLine :: Handle -> IO Text
hGetLine = decodeUtf8IO <=< B.hGetLine

-- | Write a string to a handle.
hPutStr :: Handle -> Text -> IO ()
hPutStr h = B.hPutStr h . encodeUtf8

-- | Write a string to a handle, followed by a newline.
hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn h t = hPutStr h t >> B.hPutStr h (B.Char8.singleton '\n')

-- | The 'interact' function takes a function of type @Text -> Text@
-- as its argument. The entire input from the standard input device is
-- passed to this function as its argument, and the resulting string
-- is output on the standard output device.
interact :: (Text -> Text) -> IO ()
interact f = putStr . f =<< getContents

-- | Read all user input on 'stdin' as a single string.
getContents :: IO Text
getContents = decodeUtf8IO =<< B.getContents

-- | Read a single line of user input from 'stdin'.
getLine :: IO Text
getLine = decodeUtf8IO =<< B.getLine

-- | Write a string to 'stdout'.
putStr :: Text -> IO ()
putStr = B.putStr . encodeUtf8

-- | Write a string to 'stdout', followed by a newline.
putStrLn :: Text -> IO ()
putStrLn t = B.putStr (encodeUtf8 t) >> B.putStr (B.Char8.singleton '\n')
8 changes: 7 additions & 1 deletion tests/Tests/Properties/LowLevel.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.IO as TL
import qualified Data.Text.IO.Utf8 as TU
import qualified System.IO as IO

#ifdef MIN_VERSION_tasty_inspection_testing
Expand Down Expand Up @@ -107,6 +108,9 @@ t_write_read_line m b t = write_read (T.concat . take 1) T.filter T.hPutStrLn
tl_write_read_line m b t = write_read (TL.concat . take 1) TL.filter TL.hPutStrLn
TL.hGetLine m b [t]

utf8_write_read = write_read T.unlines T.filter TU.hPutStr TU.hGetContents
utf8_write_read_line m b t = write_read (T.concat . take 1) T.filter TU.hPutStrLn
TU.hGetLine m b [t]

testLowLevel :: TestTree
testLowLevel =
Expand Down Expand Up @@ -142,7 +146,9 @@ testLowLevel =
testProperty "t_write_read" t_write_read,
testProperty "tl_write_read" tl_write_read,
testProperty "t_write_read_line" t_write_read_line,
testProperty "tl_write_read_line" tl_write_read_line
testProperty "tl_write_read_line" tl_write_read_line,
testProperty "utf8_write_read" utf8_write_read,
testProperty "utf8_write_read_line" utf8_write_read_line
-- These tests are subject to I/O race conditions
-- testProperty "t_put_get" t_put_get,
-- testProperty "tl_put_get" tl_put_get
Expand Down
1 change: 1 addition & 0 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ library
Data.Text.Encoding.Error
Data.Text.Foreign
Data.Text.IO
Data.Text.IO.Utf8
Data.Text.Internal
Data.Text.Internal.Builder
Data.Text.Internal.Builder.Functions
Expand Down