1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
{-# LANGUAGE CPP #-}
{-# LANGUAGE TemplateHaskell #-}
-- |
-- Copyright : (c) 2016 Harendra Kumar
--
-- License : BSD-3-Clause
-- Maintainer : harendra.kumar@gmail.com
-- Stability : experimental
-- Portability : GHC
--
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*>))
#endif
import Control.DeepSeq (NFData)
import Data.Text (Text)
import Path (Dir, Path, Rel, mkRelDir, toFilePath, (</>))
import Path.IO (listDir)
import System.FilePath (dropExtensions, takeFileName)
import Gauge.Main (Benchmark, bench, bgroup, defaultMain, env, nf)
import qualified Data.Text as T
import qualified Data.Text.Normalize as UTText
#ifdef BENCH_ICU
import qualified Data.Text.ICU as TI
textICUFuncs :: [(String, Text -> Text)]
textICUFuncs =
[ ("NFD", TI.normalize TI.NFD)
, ("NFKD", TI.normalize TI.NFKD)
, ("NFC", TI.normalize TI.NFC)
, ("NFKC", TI.normalize TI.NFKC)
]
#endif
unicodeTransformTextFuncs :: [(String, Text -> Text)]
unicodeTransformTextFuncs =
[ ("NFD", UTText.normalize UTText.NFD)
, ("NFKD", UTText.normalize UTText.NFKD)
, ("NFC", UTText.normalize UTText.NFC)
, ("NFKC", UTText.normalize UTText.NFKC)
]
dataDir :: Path Rel Dir
dataDir = $(mkRelDir "benchmark") </> $(mkRelDir "data")
-- Truncate or expand all datasets to this size to provide a normalized
-- measurement view across all datasets and to reduce the effect of noise
-- because of the datasets being too small.
dataSetSize :: Int
dataSetSize = 1000000
makeBench :: (NFData a, NFData b) => (String, a -> b) -> (String, IO a) -> Benchmark
makeBench (implName, func) (dataName, setup) =
env setup (\txt -> bench (implName ++ "/" ++ dataName) (nf func txt))
strInput :: FilePath -> (String, IO String)
strInput file = (dataName file,
fmap (take dataSetSize . cycle) (readFile file))
where dataName = dropExtensions . takeFileName
txtInput :: FilePath -> (String, IO Text)
txtInput file = second (fmap T.pack) (strInput file)
where second f (a, b) = (a, f b)
main :: IO ()
main = do
dataFiles <- fmap (map toFilePath . snd) (listDir dataDir)
defaultMain $
[
#ifdef BENCH_ICU
bgroup "text-icu"
$ makeBench <$> textICUFuncs <*> (map txtInput dataFiles)
,
#endif
bgroup "unicode-transforms-text"
$ makeBench <$> unicodeTransformTextFuncs
<*> (map txtInput dataFiles)
]
|