File: Main.hs

package info (click to toggle)
haskell-unicode-collation 0.1.3.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,004 kB
  • sloc: haskell: 1,566; makefile: 3
file content (60 lines) | stat: -rw-r--r-- 3,323 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{-# LANGUAGE OverloadedStrings    #-}
{-# LANGUAGE ScopedTypeVariables  #-}
module Main (main) where

import Test.Tasty.Bench
import Test.QuickCheck
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.ICU as ICU
import Data.Text.ICU.Collate (Attribute(..), Strength(..))
import Text.Collate
import Text.Collate.Normalize (toNFD)
import Test.QuickCheck.Instances.Text ()
import Data.List (sortBy)
import Data.Char (isAscii, ord, chr)

main :: IO ()
main = do
  (randomTexts :: [Text]) <- generate (infiniteListOf arbitrary)
  (randomLatinStrings :: [String]) <-
      generate (infiniteListOf (listOf (elements latinChars)))
  (randomAsciiTexts :: [Text]) <-
    generate (infiniteListOf (arbitrary `suchThat` T.all isAscii))
  let tenThousand = take 10000 randomTexts
  let tenThousandLatin = map T.pack $ take 10000 randomLatinStrings
  let tenThousandLatinNFD = map (T.pack . map chr . toNFD . map ord . T.unpack)
                              tenThousandLatin
  let tenThousandString = map T.unpack tenThousand
  let tenThousandAscii = take 10000 randomAsciiTexts
  let tenThousandLong = map ("A bcd efgh ijklmnop qrs tuv WxyZ" <>) tenThousand
  let icuCollator lang = ICU.collatorWith (ICU.Locale lang)
                          [NormalizationMode True, Strength Quaternary]
  let collateString = collateWithUnpacker (collatorFor "en") id
  defaultMain
    [ bench "sort a list of 10000 random Texts (en)"
        (whnf (sortBy (collate (collatorFor "en"))) tenThousand)
    , bench "sort same list with text-icu (en)"
        (whnf (sortBy (ICU.collate (icuCollator "en"))) tenThousand)
    , bench "sort a list of 10000 Texts (composed latin) (en)"
        (whnf (sortBy (collate (collatorFor "en"))) tenThousandLatin)
    , bench "sort same list with text-icu (en)"
        (whnf (sortBy (ICU.collate (icuCollator "en"))) tenThousandLatin)
    , bench "sort same list but pre-normalized (en-u-kk-false)"
        (whnf (sortBy (collate (collatorFor "en-u-kk-false"))) tenThousandLatinNFD)
    , bench "sort a list of 10000 ASCII Texts (en)"
        (whnf (sortBy (collate (collatorFor "en"))) tenThousandAscii)
    , bench "sort same list with text-icu (en)"
        (whnf (sortBy (ICU.collate (icuCollator "en"))) tenThousandAscii)
    , bench "sort a list of 10000 random Texts that agree in first 32 chars"
        (whnf (sortBy (collate (collatorFor "en"))) tenThousandLong)
    , bench "sort same list with text-icu (en)"
        (whnf (sortBy (ICU.collate (icuCollator "en"))) tenThousandLong)
    , bench "sort a list of 10000 identical Texts (en)"
        (whnf (sortBy collateString) (replicate 10000 "ḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔ"))
    , bench "sort a list of 10000 random Strings (en)"
        (whnf (sortBy collateString) tenThousandString)
    ]

latinChars :: [Char]
latinChars = "ḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔḕḖḗḘḙḚḛḜḝḞḟḠḡḢḣḤḥḦḧḨḩḪḫḬḭḮḯḰḱḲḳḴḵḶḷḸḹḺḻḼḽḾḿṀṁṂṃṄṅṆṇṈṉṊṋṌṍṎṏṐṑṒṓṔṕṖṗṘṙṚṛṜṝṞṟṠṡṢṣṤṥṦṧṨṩṪṫṬṭṮṯṰṱṲṳṴṵṶṷṸṹṺṻṼṽṾṿ‐‑‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…"