File: Bench.hs

package info (click to toggle)
haskell-xeno 0.6-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212 kB
  • sloc: haskell: 1,324; xml: 120; makefile: 7
file content (45 lines) | stat: -rw-r--r-- 1,586 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE QuasiQuotes  #-}
import Control.Monad
import Data.Time.Clock
import System.IO.Posix.MMap
import System.Mem
import Xeno.DOM
import qualified Data.ByteString as BS


main :: IO ()
main = do
    let prefix = "ex-data/"
        files' = map (prefix ++)
                [ {- 921 Mb -} "1htq.xml"
                , {- 190 Mb -} "enwiki-20190901-abstract10.xml"
                , {- 1.6 Gb -} "enwiki-20190901-pages-logging1.xml"
                , {- 4.0 Gb -} "enwiki-20190901-pages-meta-current24.xml-p30503451p32003451.xml"
                -- , {-  21 Gb -} "enwiki-20190901-pages-meta-history2.xml"
                ]
        files = concat $ replicate 5 files'
    --
    deltas <- forM files $ \fn -> do
        putStrLn $ "Processing file '" ++ show fn ++ "'"
        --
        -- NOTE: It is need to cache file in memory BEFORE start test.
        --       It can be done with `vmtouch` utility for example (`vmtouch -vtL *`).
        --
        bs <- unsafeMMapFile fn
        -- bs <- BS.readFile fn
        putStrLn $ "  size: " ++ show (BS.length bs `div` (1024*1024)) ++ " Mb"
        performGC
        start <- getCurrentTime
        -- SAX:
        -- let res = validate bs
        -- putStrLn [qc|  process result: {res}|]
        -- DOM:
        (\(Right !_node) -> putStrLn "  processed!") (parse bs)
        finish <- getCurrentTime
        let delta = finish `diffUTCTime` start
        putStrLn $ "  processing time: " ++ show delta
        return delta
    --
    putStrLn "------"
    putStrLn $ "Total: " ++ show (sum deltas)