File: Words.hs

package info (click to toggle)
haskell-bloomfilter 2.0.1.2-3
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 200 kB
  • sloc: ansic: 852; haskell: 709; makefile: 13
file content (41 lines) | stat: -rw-r--r-- 1,490 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
-- This program is intended for performance analysis.  It simply
-- builds a Bloom filter from a list of words, one per line, and
-- queries it exhaustively.

module Main (main) where

import Control.Monad (forM_, mapM_)
import qualified Data.BloomFilter as BF
import Data.BloomFilter.Hash (cheapHashes)
import Data.BloomFilter.Easy (easyList, suggestSizing)
import qualified Data.ByteString.Lazy.Char8 as B
import Data.Time.Clock (diffUTCTime, getCurrentTime)
import System.Environment (getArgs)

conservative, aggressive :: Double -> [B.ByteString] -> BF.Bloom B.ByteString
conservative = easyList

aggressive fpr xs
    = let (size, numHashes) = suggestSizing (length xs) fpr
          k = 3
      in BF.fromList (cheapHashes (numHashes - k)) (size * k) xs

testFunction = conservative

main = do
  args <- getArgs
  let files | null args = ["/usr/share/dict/words"]
            | otherwise = args
  forM_ files $ \file -> do
    a <- getCurrentTime
    words <- B.lines `fmap` B.readFile file
    putStrLn $ {-# SCC "words/length" #-} show (length words) ++ " words"
    b <- getCurrentTime
    putStrLn $ show (diffUTCTime b a) ++ "s to count words"
    let filt = {-# SCC "construct" #-} testFunction 0.01 words
    print filt
    c <- getCurrentTime
    putStrLn $ show (diffUTCTime c b) ++ "s to construct filter"
    {-# SCC "query" #-} mapM_ print $ filter (not . (`BF.elem` filt)) words
    d <- getCurrentTime
    putStrLn $ show (diffUTCTime d c) ++ "s to query every element"