1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
|
{-# LANGUAGE BangPatterns #-}
import qualified Data.HashTable as T
import System.Environment
import Control.Applicative
import Data.List
import Data.Ord
import qualified Data.Map as M
import qualified Data.ByteString.Char8 as S
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Unsafe as S
import Foreign
type Table = T.HashTable S.ByteString Int
newTable :: IO Table
newTable = T.new (==) hash
update :: Table -> S.ByteString -> IO ()
update t w = do
old <- T.lookup t w
case old of
Just c -> do T.update t w $! c + 1; return ()
_ -> T.insert t w 1
main = do
[name] <- getArgs
t <- newTable
S.words <$> S.readFile name >>= mapM_ (update t)
xs <- sortBy (flip (comparing snd)) <$> T.toList t
print (take 20 xs)
------------------------------------------------------------------------
-- a different Ord
hash :: S.ByteString -> Int32
hash ps = go 0 golden
where
len = S.length ps
go :: Int -> Int32 -> Int32
go !n !acc
| n == len = fromIntegral acc
| otherwise = go (n+1)
((fromIntegral (ps `S.unsafeIndex` n))
* 0xdeadbeef + hashInt32 acc)
golden :: Int32
golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32
hashInt32 :: Int32 -> Int32
hashInt32 x = mulHi x golden + x
-- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
mulHi :: Int32 -> Int32 -> Int32
mulHi a b = fromIntegral (r `shiftR` 32)
where r :: Int64
r = fromIntegral a * fromIntegral b
------------------------------------------------------------------------
newtype OrdString = OrdString S.ByteString
deriving Show
eq a@(S.PS p s l) b@(S.PS p' s' l')
| l /= l' = False -- short cut on length
| p == p' && s == s' = True -- short cut for the same string
| otherwise = compare a b == EQ
where
compare (S.PS fp1 off1 len1) (S.PS fp2 off2 len2) = S.inlinePerformIO $
withForeignPtr fp1 $ \p1 ->
withForeignPtr fp2 $ \p2 ->
cmp (p1 `plusPtr` off1)
(p2 `plusPtr` off2) 0 len1 len2
cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
cmp !p1 !p2 !n len1 len2
| n == len1 = if n == len2 then return EQ else return LT
| n == len2 = return GT
| otherwise = do
a <- peekByteOff p1 n :: IO Word8
b <- peekByteOff p2 n
case a `compare` b of
EQ -> cmp p1 p2 (n+1) len1 len2
LT -> return LT
GT -> return GT
|