1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
|
{-
Copyright 2010-2016 Mario Blazevic
This file is part of the Streaming Component Combinators (SCC) project.
The SCC project is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later
version.
SCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with SCC. If not, see
<http://www.gnu.org/licenses/>.
-}
{-# LANGUAGE Haskell2010, BangPatterns, ExistentialQuantification, FlexibleContexts, OverloadedStrings,
ScopedTypeVariables #-}
module Main (main, parseWhole, parseChunked) where
import Prelude hiding (null, splitAt)
import Control.Applicative (Alternative, (<$>), (<*>), (<*), (*>), (<|>), many, pure)
import Control.Monad (void)
import Data.Foldable (foldl')
import Data.Monoid (Monoid, (<>), mappend, mempty)
import Data.Monoid.Textual (TextualMonoid)
import Data.Monoid.Factorial (FactorialMonoid (splitAt))
import Data.Monoid.Null (MonoidNull (null))
import Text.ParserCombinators.Incremental.LeftBiasedLocal hiding (record)
import Control.DeepSeq (NFData(..))
import Criterion.Main (bench, bgroup, defaultMain, nf)
import qualified Data.ByteString as B
import qualified Data.Text.IO as T
import Data.Monoid.Instances.ByteString.UTF8 (ByteStringUTF8(ByteStringUTF8))
import Data.Monoid.Instances.Concat (Concat, extract)
instance NFData ByteStringUTF8 where
rnf (ByteStringUTF8 b) = rnf b
instance NFData a => NFData (Concat a) where
rnf s = rnf (extract s)
endOfInput :: MonoidNull s => Parser s ()
endOfInput = eof
char :: TextualMonoid t => Char -> Parser t t
char = satisfyChar . (==)
sepBy1 :: Alternative f => f a -> f s -> f [a]
sepBy1 p q = (:) <$> p <*> many (q *> p)
lineEnd :: TextualMonoid t => Parser t ()
lineEnd = void (char '\n') <|> void (string "\r\n")
<|> void (char '\r')
<?> "end of line"
unquotedField :: TextualMonoid t => Parser t t
unquotedField = takeCharsWhile (`notElem` (",\n\r\"" :: [Char]))
<?> "unquoted field"
insideQuotes :: TextualMonoid t => Parser t t
insideQuotes = mappend <$> takeCharsWhile (/= '"')
<*> concatMany (mappend <$> dquotes <*> insideQuotes)
<?> "inside of double quotes"
where dquotes = string "\"\"" >> return "\""
<?> "paired double quotes"
quotedField :: TextualMonoid t => Parser t t
quotedField = char '"' *> insideQuotes <* char '"'
<?> "quoted field"
field :: TextualMonoid t => Parser t t
field = quotedField <|> unquotedField
<?> "field"
record :: TextualMonoid t => Parser t [t]
record = field `sepBy1` char ','
-- file1 is not incremental because it's fallible
file1 :: TextualMonoid t => Parser t [[t]]
file1 = (:) <$> record
+<*> manyTill (lineEnd *> ((:[]) <$> record))
(moptional lineEnd *> endOfInput)
<?> "file"
file2 :: forall t. TextualMonoid t => Parser t [[t]]
file2 = (:) <$> record
+<*> many ((notFollowedBy (moptional lineEnd *> endOfInput) :: Parser t ())
*> lineEnd *> record)
<?> "file"
parseWhole :: TextualMonoid t => Parser t [[t]] -> t -> [([[t]], t)]
parseWhole p s = completeResults (feedEof $ feed s p)
parseChunked :: TextualMonoid t => Parser t [[t]] -> [t] -> [([[t]], t)]
parseChunked p chunks = completeResults (feedEof $ foldl' (flip feed) p $ chunks)
parseIncremental :: (Monoid r, TextualMonoid t) => Parser t [[t]] -> ([[t]] -> r) -> [t] -> r
parseIncremental p0 process chunks = let (inc, p') = foldl' feedInc (mempty, p0) chunks
in case completeResults (feedEof p')
of [] -> inc
(r,_):_ -> inc <> process r
where feedInc (!inc, !p) chunk = let !(prefix, p') = resultPrefix (feed chunk p)
in (inc <> process prefix, p')
chunksOf :: FactorialMonoid t => Int -> t -> [t]
chunksOf len t
| null t = []
| (h, t') <- splitAt len t = h : chunksOf len t'
chunkSize :: Int
chunkSize = 1024
data Input = forall t. (NFData t, TextualMonoid t) => Input String t
main :: IO ()
main = do
airportsS <- readFile "Benchmarks/airports.dat"
airportsT <- T.readFile "Benchmarks/airports.dat"
airportsB <- B.readFile "Benchmarks/airports.dat"
let inputs = [Input "UTF8" (ByteStringUTF8 airportsB),
Input "Text" airportsT,
Input "Concat String" (pure airportsS :: Concat String)]
defaultMain [
let chunks = chunksOf chunkSize i
in nf id chunks `seq` bgroup inputName [
bgroup parserName [
bench "whole" $ nf (parseWhole p) i,
bench "chunked" $ nf (parseChunked p) chunks,
bench "incremental" $ nf (parseIncremental p id) chunks,
bench "incremental'" $ nf (parseIncremental p (pure :: a -> Concat a)) chunks]
| (parserName, p) <- [("manyTill", file1), ("many", file2)]]
| Input inputName i <- inputs]
|