1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
|
-- This program is intended for performance analysis. It simply
-- builds a Bloom filter from a list of words, one per line, and
-- queries it exhaustively.
module Main (main) where
import Control.Monad (forM_, mapM_)
import qualified Data.BloomFilter as BF
import Data.BloomFilter.Hash (cheapHashes)
import Data.BloomFilter.Easy (easyList, suggestSizing)
import qualified Data.ByteString.Lazy.Char8 as B
import Data.Time.Clock (diffUTCTime, getCurrentTime)
import System.Environment (getArgs)
conservative, aggressive :: Double -> [B.ByteString] -> BF.Bloom B.ByteString
conservative = easyList
aggressive fpr xs
= let (size, numHashes) = suggestSizing (length xs) fpr
k = 3
in BF.fromList (cheapHashes (numHashes - k)) (size * k) xs
testFunction = conservative
main = do
args <- getArgs
let files | null args = ["/usr/share/dict/words"]
| otherwise = args
forM_ files $ \file -> do
a <- getCurrentTime
words <- B.lines `fmap` B.readFile file
putStrLn $ {-# SCC "words/length" #-} show (length words) ++ " words"
b <- getCurrentTime
putStrLn $ show (diffUTCTime b a) ++ "s to count words"
let filt = {-# SCC "construct" #-} testFunction 0.01 words
print filt
c <- getCurrentTime
putStrLn $ show (diffUTCTime c b) ++ "s to construct filter"
{-# SCC "query" #-} mapM_ print $ filter (not . (`BF.elem` filt)) words
d <- getCurrentTime
putStrLn $ show (diffUTCTime d c) ++ "s to query every element"
|