File: bench.hs

package info (click to toggle)
haskell-doclayout 0.4.0.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 252 kB
  • sloc: haskell: 1,179; makefile: 3
file content (98 lines) | stat: -rw-r--r-- 4,153 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
{-# LANGUAGE CPP               #-}
{-# LANGUAGE OverloadedStrings #-}
import Text.DocLayout
import Text.Emoji

import Control.DeepSeq (force)
import Control.Exception (evaluate)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as T (readFile)
import Criterion.Main
import Criterion.Types (Config (..))
#if MIN_VERSION_base(4,11,0)
#else
import Data.Semigroup
#endif

main :: IO ()
main = do
    udhrEng <- udhrLang "eng"
    udhrFrn <- udhrLang "frn"
    udhrVie <- udhrLang "vie"
    udhrChn <- udhrLang "chn"
    udhrArz <- udhrLang "arz"
    udhrHnd <- udhrLang "hnd"
    udhrBng <- udhrLang "bng"
    udhrRus <- udhrLang "rus"
    udhrJpn <- udhrLang "jpn"
    udhrKkn <- udhrLang "kkn"
    udhrTcw <- udhrLang "tcw"
    udhrTcv <- udhrLang "tcv"
    udhrThj <- udhrLang "thj"
    udhrGrk <- udhrLang "grk"
    emojiTxt <- evaluate . force . T.replicate 1000 $ mconcat baseEmojis <> mconcat zwjEmojis
    defaultMainWith defaultConfig{ timeLimit = 5.0 } $
      [ bench "sample document 2" $
          nf (render Nothing :: Doc Text -> Text)
             (nest 3 $ cblock 20 $ vcat $ replicate 15 $
               hsep $ map text $ words bigtext)

      , bench "reflow English" $
          nf (render (Just 20) :: Doc Text -> Text) $ flowedDoc udhrEng

      , bench "reflow Greek" $
          nf (render (Just 20) :: Doc Text -> Text) $ flowedDoc udhrGrk

      , bench "tabular English" $
          nf (render (Just 80) :: Doc Text -> Text)
             (let blah = flowedDoc udhrEng
              in  cblock 20 blah <> lblock 30 blah <> rblock 10 blah $$
                  cblock 50 (nest 5 blah) <> rblock 10 blah)

      , bench "tabular Greek" $
          nf (render (Just 80) :: Doc Text -> Text)
             (let blah = flowedDoc udhrGrk
              in  cblock 20 blah <> lblock 30 blah <> rblock 10 blah $$
                  cblock 50 (nest 5 blah) <> rblock 10 blah)

      , bench "soft spaces at end of line" $
          nf (render Nothing :: Doc Text -> Text)
             ("a" <> mconcat (replicate 50 (space <> lblock 1 mempty)))
      ] ++

      -- Benchmarks for languages using all scripts used by more than 50 million people
      -- https://en.wikipedia.org/wiki/List_of_writing_systems#List_of_writing_systems_by_adoption
      -- https://www.unicode.org/udhr/translations.html
      [ bench "UDHR English"    $ nf realLengthNarrowContext udhrEng  -- Plain ASCII
      , bench "UDHR French"     $ nf realLengthNarrowContext udhrFrn  -- Latin with some diacritics
      , bench "UDHR Vietnamese" $ nf realLengthNarrowContext udhrVie  -- Latin with more diacritics
      , bench "UDHR Mandarin"   $ nf realLengthWideContext   udhrChn  -- Mandarin
      , bench "UDHR Arabic"     $ nf realLengthNarrowContext udhrArz  -- Arabic
      , bench "UDHR Hindi"      $ nf realLengthNarrowContext udhrHnd  -- Hindi
      , bench "UDHR Bengali"    $ nf realLengthNarrowContext udhrBng  -- Bengali
      , bench "UDHR Russian"    $ nf realLengthNarrowContext udhrRus  -- Russian
      , bench "UDHR Japanese"   $ nf realLengthWideContext   udhrJpn  -- Japanese
      , bench "UDHR Korean"     $ nf realLengthWideContext   udhrKkn  -- Korean
      , bench "UDHR Telugu"     $ nf realLengthNarrowContext udhrTcw  -- Telugu
      , bench "UDHR Tamil"      $ nf realLengthNarrowContext udhrTcv  -- Tamil
      -- Benchmarks for other languages
      , bench "UDHR Thai"       $ nf realLengthNarrowContext udhrThj  -- Thai
      , bench "UDHR Greek"      $ nf realLengthNarrowContext udhrGrk  -- Greek
      , bench "Emoji"           $ nf realLengthNarrowContext emojiTxt -- Emoji
      , bench "UDHR Mandarin (no shortcuts)" $
          nf realLengthWideContextNoShortcut udhrChn -- No shortcuts
      ]

-- | The Universal declaration of human rights in a given language, repeated 1000 times.
udhrLang :: String -> IO Text
udhrLang lang = do
    txt <- T.readFile ("udhr/txt/" ++ lang ++ ".txt")
    evaluate . force $ T.replicate 10000 txt

bigtext :: String
bigtext = "Hello there. This is a big text."

flowedDoc :: Text -> Doc Text
flowedDoc txt = hsep $ map literal . T.words . T.take 5000 $ txt