From a2c73244a4508ea82cc78b9ba3ac9b4294840019 Mon Sep 17 00:00:00 2001
From: William Rusnack <williamrusnack@gmail.com>
Date: Tue, 16 Jan 2024 05:52:47 -0500
Subject: [PATCH] added zero padded exponent for scientific floating builder

---
 Data/ByteString/Builder/RealFloat.hs          | 47 +++++++++++-----
 Data/ByteString/Builder/RealFloat/Internal.hs | 55 +++++++++++++++----
 bench/BenchAll.hs                             | 12 ++++
 bytestring.cabal                              |  3 +-
 .../builder/Data/ByteString/Builder/Tests.hs  | 36 +++++++++++-
 5 files changed, 124 insertions(+), 29 deletions(-)

diff --git a/Data/ByteString/Builder/RealFloat.hs b/Data/ByteString/Builder/RealFloat.hs
index 1d6f89d18..2d33f00d5 100644
--- a/Data/ByteString/Builder/RealFloat.hs
+++ b/Data/ByteString/Builder/RealFloat.hs
@@ -8,6 +8,7 @@
 {-# LANGUAGE TypeApplications #-}
 {-# LANGUAGE BlockArguments #-}
 {-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE AllowAmbiguousTypes #-}
 -- |
 -- Module      : Data.ByteString.Builder.RealFloat
 -- Copyright   : (c) Lawrence Wu 2021
@@ -77,6 +78,7 @@ module Data.ByteString.Builder.RealFloat
   , standard
   , standardDefaultPrecision
   , scientific
+  , scientificZeroPaddedExponent
   , generic
   ) where
 
@@ -117,7 +119,7 @@ doubleDec = formatFloating generic
 -- | Standard notation with `n` decimal places
 --
 -- @since 0.11.2.0
-standard :: Int -> FloatFormat
+standard :: Int -> FloatFormat a
 standard n = FStandard
   { precision = Just n
   , specials = standardSpecialStrings {positiveZero, negativeZero}
@@ -131,7 +133,7 @@ standard n = FStandard
 -- | Standard notation with the \'default precision\' (decimal places matching `show`)
 --
 -- @since 0.11.2.0
-standardDefaultPrecision :: FloatFormat
+standardDefaultPrecision :: FloatFormat a
 standardDefaultPrecision = FStandard
   { precision = Nothing
   , specials = standardSpecialStrings
@@ -140,8 +142,24 @@ standardDefaultPrecision = FStandard
 -- | Scientific notation with \'default precision\' (decimal places matching `show`)
 --
 -- @since 0.11.2.0
-scientific :: FloatFormat
-scientific = fScientific 'e' scientificSpecialStrings
+scientific :: FloatFormat a
+scientific = fScientific 'e' scientificSpecialStrings False
+
+-- | Like @scientific@ but has a zero padded exponent.
+scientificZeroPaddedExponent :: forall a. ZeroPadCount a => FloatFormat a
+scientificZeroPaddedExponent = scientific
+  { expoZeroPad = True
+  , specials = scientificSpecialStrings
+    { positiveZero
+    , negativeZero = '-' : positiveZero
+    }
+  }
+  where
+  positiveZero = "0.0e" <> replicate (zeroPadCount @a) '0'
+
+class ZeroPadCount a where zeroPadCount :: Int
+instance ZeroPadCount Float where zeroPadCount = 2
+instance ZeroPadCount Double where zeroPadCount = 3
 
 scientificSpecialStrings, standardSpecialStrings :: R.SpecialStrings
 scientificSpecialStrings = R.SpecialStrings
@@ -159,8 +177,8 @@ standardSpecialStrings = scientificSpecialStrings
 -- | Standard or scientific notation depending on the exponent. Matches `show`
 --
 -- @since 0.11.2.0
-generic :: FloatFormat
-generic = fGeneric 'e' Nothing (0,7) standardSpecialStrings
+generic :: FloatFormat a
+generic = fGeneric 'e' Nothing (0,7) standardSpecialStrings False
 
 -- TODO: support precision argument for FGeneric and FScientific
 -- | Returns a rendered Float. Returns the \'shortest\' representation in
@@ -187,7 +205,7 @@ generic = fGeneric 'e' Nothing (0,7) standardSpecialStrings
 --
 -- @since 0.11.2.0
 {-# INLINABLE formatFloat #-}
-formatFloat :: FloatFormat -> Float -> Builder
+formatFloat :: FloatFormat Float -> Float -> Builder
 formatFloat = formatFloating
 
 -- TODO: support precision argument for FGeneric and FScientific
@@ -215,12 +233,12 @@ formatFloat = formatFloating
 --
 -- @since 0.11.2.0
 {-# INLINABLE formatDouble #-}
-formatDouble :: FloatFormat -> Double -> Builder
+formatDouble :: FloatFormat Double -> Double -> Builder
 formatDouble = formatFloating
 
 {-# INLINABLE formatFloating #-}
-{-# SPECIALIZE formatFloating :: FloatFormat -> Float -> Builder #-}
-{-# SPECIALIZE formatFloating :: FloatFormat -> Double -> Builder #-}
+{-# SPECIALIZE formatFloating :: FloatFormat Float  -> Float  -> Builder #-}
+{-# SPECIALIZE formatFloating :: FloatFormat Double -> Double -> Builder #-}
 formatFloating :: forall a mw ew ei.
   -- a
   --( ToS a
@@ -230,6 +248,7 @@ formatFloating :: forall a mw ew ei.
   , R.MantissaBits a
   , R.CastToWord a
   , R.MaxEncodedLength a
+  , R.WriteZeroPaddedExponent a
   -- mantissa
   , mw ~ R.MantissaWord a
   , R.Mantissa mw
@@ -243,16 +262,16 @@ formatFloating :: forall a mw ew ei.
   , R.ToInt ei
   , Integral ei
   , R.FromInt ei
-  ) => FloatFormat -> a -> Builder
+  ) => FloatFormat a -> a -> Builder
 formatFloating fmt f = case fmt of
   FGeneric {stdExpoRange = (minExpo,maxExpo), ..} -> specialsOr specials
     if e' >= minExpo && e' <= maxExpo
        then std precision
-       else sci eE
-  FScientific {..} -> specialsOr specials $ sci eE
+       else sci expoZeroPad eE
+  FScientific {..} -> specialsOr specials $ sci expoZeroPad eE
   FStandard {..} -> specialsOr specials $ std precision
   where
-  sci eE = BP.primBounded (R.toCharsScientific @a Proxy eE sign m e) ()
+  sci expoZeroPad eE = BP.primBounded (R.toCharsScientific @a Proxy expoZeroPad eE sign m e) ()
   std precision = printSign f `mappend` showStandard (toWord64 m) e' precision
   e' = R.toInt e + R.decimalLength m
   R.FloatingDecimal m e = toD @a mantissa expo
diff --git a/Data/ByteString/Builder/RealFloat/Internal.hs b/Data/ByteString/Builder/RealFloat/Internal.hs
index dae38a50a..4b9ec0081 100644
--- a/Data/ByteString/Builder/RealFloat/Internal.hs
+++ b/Data/ByteString/Builder/RealFloat/Internal.hs
@@ -48,6 +48,7 @@ module Data.ByteString.Builder.RealFloat.Internal
     , trimNoTrailing
     , closestCorrectlyRounded
     , MaxEncodedLength(..)
+    , WriteZeroPaddedExponent
     , toCharsScientific
     , asciiRaw
     -- hand-rolled division and remainder for f2s and d2s
@@ -870,11 +871,11 @@ writeMantissa ptr olength = go (ptr `plusAddr#` olength)
            in (# ptr `plusAddr#` 3#, s4 #)
 
 -- | Write the exponent into the given address.
-writeExponent :: forall ei.
+writeUnpaddedExponent :: forall ei.
   ( Integral ei
   , ToInt ei
   ) => Addr# -> ei -> State# RealWorld -> (# Addr#, State# RealWorld #)
-writeExponent ptr !expo s1
+writeUnpaddedExponent ptr !expo s1
   | expo >= 100 =
       let !(e1, e0) = fquotRem10 (fromIntegral expo) -- TODO
           s2 = copyWord16 (digit_table `unsafeAt` word2Int# (unsafeRaw e1)) ptr s1
@@ -888,6 +889,28 @@ writeExponent ptr !expo s1
        in (# ptr `plusAddr#` 1#, s2 #)
   where !(I# e) = toInt expo
 
+-- | Write the zero padded exponent into the given address.
+class WriteZeroPaddedExponent a where
+  writeZeroPaddedExponent :: Addr# -> ExponentInt a -> State# RealWorld -> (# Addr#, State# RealWorld #)
+instance WriteZeroPaddedExponent Float where
+  writeZeroPaddedExponent ptr !expo s1 =
+    let s2 = copyWord16 (digit_table `unsafeAt` e) ptr s1
+     in (# ptr `plusAddr#` 2#, s2 #)
+    where
+    !(I# e) = toInt expo
+instance WriteZeroPaddedExponent Double where
+  writeZeroPaddedExponent ptr !expo s1
+    | expo >= 100 =
+        let !(e1, e0) = fquotRem10 (fromIntegral expo) -- TODO
+            s2 = copyWord16 (digit_table `unsafeAt` word2Int# (unsafeRaw e1)) ptr s1
+            s3 = poke (ptr `plusAddr#` 2#) (wordToWord8# (toAscii (unsafeRaw e0))) s2
+         in (# ptr `plusAddr#` 3#, s3 #)
+    | otherwise =
+        let s2 = poke ptr (asciiRaw asciiZero) s1
+            s3 = copyWord16 (digit_table `unsafeAt` e) (ptr `plusAddr#` 1#) s2
+         in (# ptr `plusAddr#` 3#, s3 #)
+    where !(I# e) = toInt expo
+
 -- | Write the sign into the given address.
 writeSign :: Addr# -> Bool -> State# d -> (# Addr#, State# d #)
 writeSign ptr True s1 =
@@ -898,17 +921,19 @@ writeSign ptr False s = (# ptr, s #)
 -- | Returns the decimal representation of a floating point number in
 -- scientific (exponential) notation
 {-# INLINABLE toCharsScientific #-}
-{-# SPECIALIZE toCharsScientific :: Proxy Float -> Word8# -> Bool -> Word32 -> Int32 -> BoundedPrim () #-}
-{-# SPECIALIZE toCharsScientific :: Proxy Double -> Word8# -> Bool -> Word64 -> Int32 -> BoundedPrim () #-}
+{-# SPECIALIZE toCharsScientific :: Proxy Float  -> Bool -> Word8# -> Bool -> Word32 -> Int32 -> BoundedPrim () #-}
+{-# SPECIALIZE toCharsScientific :: Proxy Double -> Bool -> Word8# -> Bool -> Word64 -> Int32 -> BoundedPrim () #-}
 toCharsScientific :: forall a mw ei.
   ( MaxEncodedLength a
+  , WriteZeroPaddedExponent a
   , Mantissa mw
   , DecimalLength mw
+  , ei ~ ExponentInt a
   , Integral ei
   , ToInt ei
   , FromInt ei
-  ) => Proxy a -> Word8# -> Bool -> mw -> ei -> BoundedPrim ()
-toCharsScientific _ eE !sign !mantissa !expo = boundedPrim (maxEncodedLength @a) $ \_ !(Ptr p0)-> do
+  ) => Proxy a -> Bool -> Word8# -> Bool -> mw -> ei -> BoundedPrim ()
+toCharsScientific _ expoZeroPad eE !sign !mantissa !expo = boundedPrim (maxEncodedLength @a) $ \_ !(Ptr p0)-> do
   let !olength@(I# ol) = decimalLength mantissa
       !expo' = expo + fromInt olength - 1
   IO $ \s1 ->
@@ -918,6 +943,10 @@ toCharsScientific _ eE !sign !mantissa !expo = boundedPrim (maxEncodedLength @a)
         !(# p3, s5 #) = writeSign (p2 `plusAddr#` 1#) (expo' < 0) s4
         !(# p4, s6 #) = writeExponent p3 (abs expo') s5
      in (# s6, (Ptr p4) #)
+  where
+  writeExponent = if expoZeroPad
+    then writeZeroPaddedExponent @a
+    else writeUnpaddedExponent
 
 data FloatingDecimal a = FloatingDecimal
   { fmantissa :: !(MantissaWord a)
@@ -983,11 +1012,12 @@ instance ExponentBits Double where exponentBits = 11
 -- | Format type for use with `formatFloat` and `formatDouble`.
 --
 -- @since 0.11.2.0
-data FloatFormat
+data FloatFormat a
   -- | scientific notation
   = FScientific
     { eE :: Word8#
     , specials :: SpecialStrings
+    , expoZeroPad :: Bool -- ^ pad the exponent with zeros
     }
   -- | standard notation with `Maybe Int` digits after the decimal
   | FStandard
@@ -1000,15 +1030,16 @@ data FloatFormat
     , precision :: Maybe Int
     , stdExpoRange :: (Int, Int)
     , specials :: SpecialStrings
+    , expoZeroPad :: Bool -- ^ pad the exponent with zeros
     }
   deriving Show
-fScientific :: Char -> SpecialStrings -> FloatFormat
-fScientific eE specials = FScientific
+fScientific :: Char -> SpecialStrings -> Bool -> FloatFormat a
+fScientific eE specials expoZeroPad = FScientific
   { eE = asciiRaw $ ord eE
-  , specials
+  , ..
   }
-fGeneric :: Char -> Maybe Int -> (Int, Int) -> SpecialStrings -> FloatFormat
-fGeneric eE precision stdExpoRange specials = FGeneric
+fGeneric :: Char -> Maybe Int -> (Int, Int) -> SpecialStrings -> Bool -> FloatFormat a
+fGeneric eE precision stdExpoRange specials expoZeroPad = FGeneric
   { eE = asciiRaw $ ord eE
   , ..
   }
diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs
index 41950dd35..534da8b76 100644
--- a/bench/BenchAll.hs
+++ b/bench/BenchAll.hs
@@ -372,6 +372,18 @@ main = do
               [ benchB "Float  Average" floatSpecials  $ foldMap (formatFloat  scientific)
               , benchB "Double Average" doubleSpecials $ foldMap (formatDouble scientific)
               ]
+            , bgroup "Zero Padded"
+              [ bgroup "Positive"
+                [ benchB "Float"       floatPosData       $ foldMap (formatFloat  $ scientificZeroPaddedExponent)
+                , benchB "Double"      doublePosData      $ foldMap (formatDouble $ scientificZeroPaddedExponent)
+                , benchB "DoubleSmall" doublePosSmallData $ foldMap (formatDouble $ scientificZeroPaddedExponent)
+                ]
+              , bgroup "Negative"
+                [ benchB "Float"       floatNegData       $ foldMap (formatFloat  $ scientificZeroPaddedExponent)
+                , benchB "Double"      doubleNegData      $ foldMap (formatDouble $ scientificZeroPaddedExponent)
+                , benchB "DoubleSmall" doubleNegSmallData $ foldMap (formatDouble $ scientificZeroPaddedExponent)
+                ]
+              ]
             ]
           , bgroup "FStandard"
             [ bgroup "Positive"
diff --git a/bytestring.cabal b/bytestring.cabal
index 28a4d338a..5d1cbe3f8 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -92,6 +92,7 @@ library
                      Data.ByteString.Builder.Extra
                      Data.ByteString.Builder.Prim
                      Data.ByteString.Builder.RealFloat
+                     Data.ByteString.Builder.RealFloat.Internal
 
                      -- perhaps only exposed temporarily
                      Data.ByteString.Builder.Internal
@@ -103,7 +104,6 @@ library
                      Data.ByteString.Builder.Prim.Internal.Floating
                      Data.ByteString.Builder.RealFloat.F2S
                      Data.ByteString.Builder.RealFloat.D2S
-                     Data.ByteString.Builder.RealFloat.Internal
                      Data.ByteString.Builder.RealFloat.TableGenerator
                      Data.ByteString.Internal.Type
                      Data.ByteString.Lazy.Internal.Deque
@@ -181,6 +181,7 @@ test-suite bytestring-tests
                     deepseq,
                     ghc-prim,
                     QuickCheck,
+                    quickcheck-assertions,
                     tasty,
                     tasty-hunit,
                     tasty-quickcheck >= 0.8.1,
diff --git a/tests/builder/Data/ByteString/Builder/Tests.hs b/tests/builder/Data/ByteString/Builder/Tests.hs
index f35bcfc3f..90d9177f7 100644
--- a/tests/builder/Data/ByteString/Builder/Tests.hs
+++ b/tests/builder/Data/ByteString/Builder/Tests.hs
@@ -3,6 +3,8 @@
 {-# LANGUAGE CPP              #-}
 {-# LANGUAGE BlockArguments   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE TypeApplications #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 
 -- |
@@ -27,7 +29,7 @@ import           Control.Monad.Trans.Writer (WriterT, execWriterT, tell)
 
 import           Foreign (minusPtr)
 
-import           Data.Char (chr)
+import           Data.Char (chr, isDigit)
 import           Data.Bits ((.|.), shiftL)
 import           Data.Foldable
 import           Data.Semigroup (Semigroup(..))
@@ -45,6 +47,7 @@ import           Data.ByteString.Builder.Internal (Put, putBuilder, fromPut)
 import qualified Data.ByteString.Builder.Internal   as BI
 import qualified Data.ByteString.Builder.Prim       as BP
 import           Data.ByteString.Builder.Prim.TestUtils
+import           Data.ByteString.Builder.RealFloat.Internal (FloatFormat(FScientific), expoZeroPad, positiveZero, negativeZero, specials)
 
 import           Control.Exception (evaluate)
 import           System.IO (openTempFile, hPutStr, hClose, hSetBinaryMode, hSetEncoding, utf8, hSetNewlineMode, noNewlineTranslation)
@@ -58,10 +61,11 @@ import           Test.Tasty.HUnit (testCase, (@?=), Assertion)
 import           Test.Tasty.QuickCheck
                    ( Arbitrary(..), oneof, choose, listOf, elements, forAll
                    , counterexample, ioProperty, Property, testProperty
-                   , (===), (.&&.), conjoin
+                   , (===), (.&&.), (.||.), conjoin
                    , UnicodeString(..), NonNegative(..)
                    )
 import           QuickCheckUtils
+import           Test.QuickCheck.Assertions (binAsrt)
 
 
 tests :: [TestTree]
@@ -743,6 +747,20 @@ testsFloating = testGroup "RealFloat"
           , ( 1.2345678      , "1.2345678" )
           , ( 1.23456735e-36 , "1.23456735e-36" )
           ]
+    , testProperty "zero padded exponent" \d -> let
+      padLen = 2
+      bs = toLazyByteString $ formatFloat (scientificZeroPaddedExponent @Float) $ d
+      s = LC.unpack bs
+      indexEnd i = bs `LC.index` (LC.length bs - i)
+      in conjoin
+        [ binAsrt (s <> " does not read to the value " <> show d) $
+            read (LC.unpack bs) == d
+        , binAsrt (s <> " does not have " <> show padLen <> " exponent digits") $
+            LC.all isDigit (LC.takeEnd padLen bs)
+        , binAsrt (s <> " does not have a proper prefix to exponent digits")
+          $  indexEnd (padLen + 1) == 'e'
+          || indexEnd (padLen + 1) == '-' && indexEnd (padLen + 2) == 'e'
+        ]
     , testMatches "f2sPowersOf10" floatDec show $
           fmap asShowRef [read ("1.0e" ++ show x) :: Float | x <- [-46..39 :: Int]]
     ]
@@ -973,6 +991,20 @@ testsFloating = testGroup "RealFloat"
           , ( 549755813888.0e+3  , "5.49755813888e14" )
           , ( 8796093022208.0e+3 , "8.796093022208e15" )
           ]
+    , testProperty "zero padded exponent" \d -> let
+      padLen = 3
+      bs = toLazyByteString $ formatDouble (scientificZeroPaddedExponent @Double) $ d
+      s = LC.unpack bs
+      indexEnd i = bs `LC.index` (LC.length bs - i)
+      in conjoin
+        [ binAsrt (s <> " does not read to the value " <> show d) $
+            read (LC.unpack bs) == d
+        , binAsrt (s <> " does not have " <> show padLen <> " exponent digits") $
+            LC.all isDigit (LC.takeEnd padLen bs)
+        , binAsrt (s <> " does not have a proper prefix to exponent digits")
+          $  indexEnd (padLen + 1) == 'e'
+          || indexEnd (padLen + 1) == '-' && indexEnd (padLen + 2) == 'e'
+        ]
     , testMatches "d2sPowersOf10" doubleDec show $
           fmap asShowRef [read ("1.0e" ++ show x) :: Double | x <- [-324..309 :: Int]]
     ]