1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
{-# LANGUAGE TemplateHaskell #-}
-- |
-- Copyright : (c) 2016 Harendra Kumar
--
-- License : BSD-3-Clause
-- Maintainer : harendra.kumar@gmail.com
-- Stability : experimental
-- Portability : GHC
--
import Control.DeepSeq (deepseq)
import System.Environment (getArgs)
import qualified Data.Text.Normalize as UT
import Data.Text (pack, Text)
-- Truncate or expand all datasets to this size to provide a normalized
-- measurement view across all datasets and to reduce the effect of noise
-- because of the datasets being too small.
dataSetSize :: Int
dataSetSize = 1000000
txtInput :: FilePath -> IO Text
txtInput file = fmap (pack . take dataSetSize . cycle) (readFile file)
main :: IO ()
main = do
[file] <- getArgs
input <- txtInput file
UT.normalize UT.NFD input `deepseq` return ()
|