From a2c73244a4508ea82cc78b9ba3ac9b4294840019 Mon Sep 17 00:00:00 2001 From: William Rusnack Date: Tue, 16 Jan 2024 05:52:47 -0500 Subject: [PATCH] added zero padded exponent for scientific floating builder --- Data/ByteString/Builder/RealFloat.hs | 47 +++++++++++----- Data/ByteString/Builder/RealFloat/Internal.hs | 55 +++++++++++++++---- bench/BenchAll.hs | 12 ++++ bytestring.cabal | 3 +- .../builder/Data/ByteString/Builder/Tests.hs | 36 +++++++++++- 5 files changed, 124 insertions(+), 29 deletions(-) diff --git a/Data/ByteString/Builder/RealFloat.hs b/Data/ByteString/Builder/RealFloat.hs index 1d6f89d18..2d33f00d5 100644 --- a/Data/ByteString/Builder/RealFloat.hs +++ b/Data/ByteString/Builder/RealFloat.hs @@ -8,6 +8,7 @@ {-# LANGUAGE TypeApplications #-} {-# LANGUAGE BlockArguments #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE AllowAmbiguousTypes #-} -- | -- Module : Data.ByteString.Builder.RealFloat -- Copyright : (c) Lawrence Wu 2021 @@ -77,6 +78,7 @@ module Data.ByteString.Builder.RealFloat , standard , standardDefaultPrecision , scientific + , scientificZeroPaddedExponent , generic ) where @@ -117,7 +119,7 @@ doubleDec = formatFloating generic -- | Standard notation with `n` decimal places -- -- @since 0.11.2.0 -standard :: Int -> FloatFormat +standard :: Int -> FloatFormat a standard n = FStandard { precision = Just n , specials = standardSpecialStrings {positiveZero, negativeZero} @@ -131,7 +133,7 @@ standard n = FStandard -- | Standard notation with the \'default precision\' (decimal places matching `show`) -- -- @since 0.11.2.0 -standardDefaultPrecision :: FloatFormat +standardDefaultPrecision :: FloatFormat a standardDefaultPrecision = FStandard { precision = Nothing , specials = standardSpecialStrings @@ -140,8 +142,24 @@ standardDefaultPrecision = FStandard -- | Scientific notation with \'default precision\' (decimal places matching `show`) -- -- @since 0.11.2.0 -scientific :: FloatFormat -scientific = fScientific 'e' scientificSpecialStrings +scientific :: FloatFormat a +scientific = fScientific 'e' scientificSpecialStrings False + +-- | Like @scientific@ but has a zero padded exponent. +scientificZeroPaddedExponent :: forall a. ZeroPadCount a => FloatFormat a +scientificZeroPaddedExponent = scientific + { expoZeroPad = True + , specials = scientificSpecialStrings + { positiveZero + , negativeZero = '-' : positiveZero + } + } + where + positiveZero = "0.0e" <> replicate (zeroPadCount @a) '0' + +class ZeroPadCount a where zeroPadCount :: Int +instance ZeroPadCount Float where zeroPadCount = 2 +instance ZeroPadCount Double where zeroPadCount = 3 scientificSpecialStrings, standardSpecialStrings :: R.SpecialStrings scientificSpecialStrings = R.SpecialStrings @@ -159,8 +177,8 @@ standardSpecialStrings = scientificSpecialStrings -- | Standard or scientific notation depending on the exponent. Matches `show` -- -- @since 0.11.2.0 -generic :: FloatFormat -generic = fGeneric 'e' Nothing (0,7) standardSpecialStrings +generic :: FloatFormat a +generic = fGeneric 'e' Nothing (0,7) standardSpecialStrings False -- TODO: support precision argument for FGeneric and FScientific -- | Returns a rendered Float. Returns the \'shortest\' representation in @@ -187,7 +205,7 @@ generic = fGeneric 'e' Nothing (0,7) standardSpecialStrings -- -- @since 0.11.2.0 {-# INLINABLE formatFloat #-} -formatFloat :: FloatFormat -> Float -> Builder +formatFloat :: FloatFormat Float -> Float -> Builder formatFloat = formatFloating -- TODO: support precision argument for FGeneric and FScientific @@ -215,12 +233,12 @@ formatFloat = formatFloating -- -- @since 0.11.2.0 {-# INLINABLE formatDouble #-} -formatDouble :: FloatFormat -> Double -> Builder +formatDouble :: FloatFormat Double -> Double -> Builder formatDouble = formatFloating {-# INLINABLE formatFloating #-} -{-# SPECIALIZE formatFloating :: FloatFormat -> Float -> Builder #-} -{-# SPECIALIZE formatFloating :: FloatFormat -> Double -> Builder #-} +{-# SPECIALIZE formatFloating :: FloatFormat Float -> Float -> Builder #-} +{-# SPECIALIZE formatFloating :: FloatFormat Double -> Double -> Builder #-} formatFloating :: forall a mw ew ei. -- a --( ToS a @@ -230,6 +248,7 @@ formatFloating :: forall a mw ew ei. , R.MantissaBits a , R.CastToWord a , R.MaxEncodedLength a + , R.WriteZeroPaddedExponent a -- mantissa , mw ~ R.MantissaWord a , R.Mantissa mw @@ -243,16 +262,16 @@ formatFloating :: forall a mw ew ei. , R.ToInt ei , Integral ei , R.FromInt ei - ) => FloatFormat -> a -> Builder + ) => FloatFormat a -> a -> Builder formatFloating fmt f = case fmt of FGeneric {stdExpoRange = (minExpo,maxExpo), ..} -> specialsOr specials if e' >= minExpo && e' <= maxExpo then std precision - else sci eE - FScientific {..} -> specialsOr specials $ sci eE + else sci expoZeroPad eE + FScientific {..} -> specialsOr specials $ sci expoZeroPad eE FStandard {..} -> specialsOr specials $ std precision where - sci eE = BP.primBounded (R.toCharsScientific @a Proxy eE sign m e) () + sci expoZeroPad eE = BP.primBounded (R.toCharsScientific @a Proxy expoZeroPad eE sign m e) () std precision = printSign f `mappend` showStandard (toWord64 m) e' precision e' = R.toInt e + R.decimalLength m R.FloatingDecimal m e = toD @a mantissa expo diff --git a/Data/ByteString/Builder/RealFloat/Internal.hs b/Data/ByteString/Builder/RealFloat/Internal.hs index dae38a50a..4b9ec0081 100644 --- a/Data/ByteString/Builder/RealFloat/Internal.hs +++ b/Data/ByteString/Builder/RealFloat/Internal.hs @@ -48,6 +48,7 @@ module Data.ByteString.Builder.RealFloat.Internal , trimNoTrailing , closestCorrectlyRounded , MaxEncodedLength(..) + , WriteZeroPaddedExponent , toCharsScientific , asciiRaw -- hand-rolled division and remainder for f2s and d2s @@ -870,11 +871,11 @@ writeMantissa ptr olength = go (ptr `plusAddr#` olength) in (# ptr `plusAddr#` 3#, s4 #) -- | Write the exponent into the given address. -writeExponent :: forall ei. +writeUnpaddedExponent :: forall ei. ( Integral ei , ToInt ei ) => Addr# -> ei -> State# RealWorld -> (# Addr#, State# RealWorld #) -writeExponent ptr !expo s1 +writeUnpaddedExponent ptr !expo s1 | expo >= 100 = let !(e1, e0) = fquotRem10 (fromIntegral expo) -- TODO s2 = copyWord16 (digit_table `unsafeAt` word2Int# (unsafeRaw e1)) ptr s1 @@ -888,6 +889,28 @@ writeExponent ptr !expo s1 in (# ptr `plusAddr#` 1#, s2 #) where !(I# e) = toInt expo +-- | Write the zero padded exponent into the given address. +class WriteZeroPaddedExponent a where + writeZeroPaddedExponent :: Addr# -> ExponentInt a -> State# RealWorld -> (# Addr#, State# RealWorld #) +instance WriteZeroPaddedExponent Float where + writeZeroPaddedExponent ptr !expo s1 = + let s2 = copyWord16 (digit_table `unsafeAt` e) ptr s1 + in (# ptr `plusAddr#` 2#, s2 #) + where + !(I# e) = toInt expo +instance WriteZeroPaddedExponent Double where + writeZeroPaddedExponent ptr !expo s1 + | expo >= 100 = + let !(e1, e0) = fquotRem10 (fromIntegral expo) -- TODO + s2 = copyWord16 (digit_table `unsafeAt` word2Int# (unsafeRaw e1)) ptr s1 + s3 = poke (ptr `plusAddr#` 2#) (wordToWord8# (toAscii (unsafeRaw e0))) s2 + in (# ptr `plusAddr#` 3#, s3 #) + | otherwise = + let s2 = poke ptr (asciiRaw asciiZero) s1 + s3 = copyWord16 (digit_table `unsafeAt` e) (ptr `plusAddr#` 1#) s2 + in (# ptr `plusAddr#` 3#, s3 #) + where !(I# e) = toInt expo + -- | Write the sign into the given address. writeSign :: Addr# -> Bool -> State# d -> (# Addr#, State# d #) writeSign ptr True s1 = @@ -898,17 +921,19 @@ writeSign ptr False s = (# ptr, s #) -- | Returns the decimal representation of a floating point number in -- scientific (exponential) notation {-# INLINABLE toCharsScientific #-} -{-# SPECIALIZE toCharsScientific :: Proxy Float -> Word8# -> Bool -> Word32 -> Int32 -> BoundedPrim () #-} -{-# SPECIALIZE toCharsScientific :: Proxy Double -> Word8# -> Bool -> Word64 -> Int32 -> BoundedPrim () #-} +{-# SPECIALIZE toCharsScientific :: Proxy Float -> Bool -> Word8# -> Bool -> Word32 -> Int32 -> BoundedPrim () #-} +{-# SPECIALIZE toCharsScientific :: Proxy Double -> Bool -> Word8# -> Bool -> Word64 -> Int32 -> BoundedPrim () #-} toCharsScientific :: forall a mw ei. ( MaxEncodedLength a + , WriteZeroPaddedExponent a , Mantissa mw , DecimalLength mw + , ei ~ ExponentInt a , Integral ei , ToInt ei , FromInt ei - ) => Proxy a -> Word8# -> Bool -> mw -> ei -> BoundedPrim () -toCharsScientific _ eE !sign !mantissa !expo = boundedPrim (maxEncodedLength @a) $ \_ !(Ptr p0)-> do + ) => Proxy a -> Bool -> Word8# -> Bool -> mw -> ei -> BoundedPrim () +toCharsScientific _ expoZeroPad eE !sign !mantissa !expo = boundedPrim (maxEncodedLength @a) $ \_ !(Ptr p0)-> do let !olength@(I# ol) = decimalLength mantissa !expo' = expo + fromInt olength - 1 IO $ \s1 -> @@ -918,6 +943,10 @@ toCharsScientific _ eE !sign !mantissa !expo = boundedPrim (maxEncodedLength @a) !(# p3, s5 #) = writeSign (p2 `plusAddr#` 1#) (expo' < 0) s4 !(# p4, s6 #) = writeExponent p3 (abs expo') s5 in (# s6, (Ptr p4) #) + where + writeExponent = if expoZeroPad + then writeZeroPaddedExponent @a + else writeUnpaddedExponent data FloatingDecimal a = FloatingDecimal { fmantissa :: !(MantissaWord a) @@ -983,11 +1012,12 @@ instance ExponentBits Double where exponentBits = 11 -- | Format type for use with `formatFloat` and `formatDouble`. -- -- @since 0.11.2.0 -data FloatFormat +data FloatFormat a -- | scientific notation = FScientific { eE :: Word8# , specials :: SpecialStrings + , expoZeroPad :: Bool -- ^ pad the exponent with zeros } -- | standard notation with `Maybe Int` digits after the decimal | FStandard @@ -1000,15 +1030,16 @@ data FloatFormat , precision :: Maybe Int , stdExpoRange :: (Int, Int) , specials :: SpecialStrings + , expoZeroPad :: Bool -- ^ pad the exponent with zeros } deriving Show -fScientific :: Char -> SpecialStrings -> FloatFormat -fScientific eE specials = FScientific +fScientific :: Char -> SpecialStrings -> Bool -> FloatFormat a +fScientific eE specials expoZeroPad = FScientific { eE = asciiRaw $ ord eE - , specials + , .. } -fGeneric :: Char -> Maybe Int -> (Int, Int) -> SpecialStrings -> FloatFormat -fGeneric eE precision stdExpoRange specials = FGeneric +fGeneric :: Char -> Maybe Int -> (Int, Int) -> SpecialStrings -> Bool -> FloatFormat a +fGeneric eE precision stdExpoRange specials expoZeroPad = FGeneric { eE = asciiRaw $ ord eE , .. } diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs index 41950dd35..534da8b76 100644 --- a/bench/BenchAll.hs +++ b/bench/BenchAll.hs @@ -372,6 +372,18 @@ main = do [ benchB "Float Average" floatSpecials $ foldMap (formatFloat scientific) , benchB "Double Average" doubleSpecials $ foldMap (formatDouble scientific) ] + , bgroup "Zero Padded" + [ bgroup "Positive" + [ benchB "Float" floatPosData $ foldMap (formatFloat $ scientificZeroPaddedExponent) + , benchB "Double" doublePosData $ foldMap (formatDouble $ scientificZeroPaddedExponent) + , benchB "DoubleSmall" doublePosSmallData $ foldMap (formatDouble $ scientificZeroPaddedExponent) + ] + , bgroup "Negative" + [ benchB "Float" floatNegData $ foldMap (formatFloat $ scientificZeroPaddedExponent) + , benchB "Double" doubleNegData $ foldMap (formatDouble $ scientificZeroPaddedExponent) + , benchB "DoubleSmall" doubleNegSmallData $ foldMap (formatDouble $ scientificZeroPaddedExponent) + ] + ] ] , bgroup "FStandard" [ bgroup "Positive" diff --git a/bytestring.cabal b/bytestring.cabal index 28a4d338a..5d1cbe3f8 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -92,6 +92,7 @@ library Data.ByteString.Builder.Extra Data.ByteString.Builder.Prim Data.ByteString.Builder.RealFloat + Data.ByteString.Builder.RealFloat.Internal -- perhaps only exposed temporarily Data.ByteString.Builder.Internal @@ -103,7 +104,6 @@ library Data.ByteString.Builder.Prim.Internal.Floating Data.ByteString.Builder.RealFloat.F2S Data.ByteString.Builder.RealFloat.D2S - Data.ByteString.Builder.RealFloat.Internal Data.ByteString.Builder.RealFloat.TableGenerator Data.ByteString.Internal.Type Data.ByteString.Lazy.Internal.Deque @@ -181,6 +181,7 @@ test-suite bytestring-tests deepseq, ghc-prim, QuickCheck, + quickcheck-assertions, tasty, tasty-hunit, tasty-quickcheck >= 0.8.1, diff --git a/tests/builder/Data/ByteString/Builder/Tests.hs b/tests/builder/Data/ByteString/Builder/Tests.hs index f35bcfc3f..90d9177f7 100644 --- a/tests/builder/Data/ByteString/Builder/Tests.hs +++ b/tests/builder/Data/ByteString/Builder/Tests.hs @@ -3,6 +3,8 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE BlockArguments #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE TypeApplications #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | @@ -27,7 +29,7 @@ import Control.Monad.Trans.Writer (WriterT, execWriterT, tell) import Foreign (minusPtr) -import Data.Char (chr) +import Data.Char (chr, isDigit) import Data.Bits ((.|.), shiftL) import Data.Foldable import Data.Semigroup (Semigroup(..)) @@ -45,6 +47,7 @@ import Data.ByteString.Builder.Internal (Put, putBuilder, fromPut) import qualified Data.ByteString.Builder.Internal as BI import qualified Data.ByteString.Builder.Prim as BP import Data.ByteString.Builder.Prim.TestUtils +import Data.ByteString.Builder.RealFloat.Internal (FloatFormat(FScientific), expoZeroPad, positiveZero, negativeZero, specials) import Control.Exception (evaluate) import System.IO (openTempFile, hPutStr, hClose, hSetBinaryMode, hSetEncoding, utf8, hSetNewlineMode, noNewlineTranslation) @@ -58,10 +61,11 @@ import Test.Tasty.HUnit (testCase, (@?=), Assertion) import Test.Tasty.QuickCheck ( Arbitrary(..), oneof, choose, listOf, elements, forAll , counterexample, ioProperty, Property, testProperty - , (===), (.&&.), conjoin + , (===), (.&&.), (.||.), conjoin , UnicodeString(..), NonNegative(..) ) import QuickCheckUtils +import Test.QuickCheck.Assertions (binAsrt) tests :: [TestTree] @@ -743,6 +747,20 @@ testsFloating = testGroup "RealFloat" , ( 1.2345678 , "1.2345678" ) , ( 1.23456735e-36 , "1.23456735e-36" ) ] + , testProperty "zero padded exponent" \d -> let + padLen = 2 + bs = toLazyByteString $ formatFloat (scientificZeroPaddedExponent @Float) $ d + s = LC.unpack bs + indexEnd i = bs `LC.index` (LC.length bs - i) + in conjoin + [ binAsrt (s <> " does not read to the value " <> show d) $ + read (LC.unpack bs) == d + , binAsrt (s <> " does not have " <> show padLen <> " exponent digits") $ + LC.all isDigit (LC.takeEnd padLen bs) + , binAsrt (s <> " does not have a proper prefix to exponent digits") + $ indexEnd (padLen + 1) == 'e' + || indexEnd (padLen + 1) == '-' && indexEnd (padLen + 2) == 'e' + ] , testMatches "f2sPowersOf10" floatDec show $ fmap asShowRef [read ("1.0e" ++ show x) :: Float | x <- [-46..39 :: Int]] ] @@ -973,6 +991,20 @@ testsFloating = testGroup "RealFloat" , ( 549755813888.0e+3 , "5.49755813888e14" ) , ( 8796093022208.0e+3 , "8.796093022208e15" ) ] + , testProperty "zero padded exponent" \d -> let + padLen = 3 + bs = toLazyByteString $ formatDouble (scientificZeroPaddedExponent @Double) $ d + s = LC.unpack bs + indexEnd i = bs `LC.index` (LC.length bs - i) + in conjoin + [ binAsrt (s <> " does not read to the value " <> show d) $ + read (LC.unpack bs) == d + , binAsrt (s <> " does not have " <> show padLen <> " exponent digits") $ + LC.all isDigit (LC.takeEnd padLen bs) + , binAsrt (s <> " does not have a proper prefix to exponent digits") + $ indexEnd (padLen + 1) == 'e' + || indexEnd (padLen + 1) == '-' && indexEnd (padLen + 2) == 'e' + ] , testMatches "d2sPowersOf10" doubleDec show $ fmap asShowRef [read ("1.0e" ++ show x) :: Double | x <- [-324..309 :: Int]] ]