File: Hash.hs

package info (click to toggle)
ghc 9.0.2-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 177,780 kB
  • sloc: haskell: 494,441; ansic: 70,262; javascript: 9,423; sh: 8,537; python: 2,646; asm: 1,725; makefile: 1,333; xml: 196; cpp: 167; perl: 143; ruby: 84; lisp: 7
file content (88 lines) | stat: -rw-r--r-- 2,612 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{-# LANGUAGE BangPatterns #-}

import qualified Data.HashTable as T
import System.Environment
import Control.Applicative
import Data.List
import Data.Ord
import qualified Data.Map as M
import qualified Data.ByteString.Char8 as S

import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Unsafe   as S
import Foreign


type Table = T.HashTable S.ByteString Int

newTable :: IO Table
newTable = T.new (==) hash

update :: Table -> S.ByteString -> IO ()
update t w = do
    old <- T.lookup t w
    case old of
        Just c -> do T.update t w $! c + 1; return ()
        _      -> T.insert t w 1

main = do
    [name] <- getArgs
    t <- newTable
    S.words <$> S.readFile name >>= mapM_ (update t)
    xs <- sortBy (flip (comparing snd)) <$> T.toList t
    print (take 20 xs)

------------------------------------------------------------------------
-- a different Ord

hash :: S.ByteString -> Int32
hash ps = go 0 golden
  where
    len = S.length ps

    go :: Int -> Int32 -> Int32
    go !n !acc
             | n == len  = fromIntegral acc
             | otherwise = go (n+1)
                              ((fromIntegral (ps `S.unsafeIndex` n))
                                   * 0xdeadbeef + hashInt32 acc)

golden :: Int32
golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32

hashInt32 :: Int32 -> Int32
hashInt32 x = mulHi x golden + x

-- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
mulHi :: Int32 -> Int32 -> Int32
mulHi a b = fromIntegral (r `shiftR` 32)
   where r :: Int64
         r = fromIntegral a * fromIntegral b

------------------------------------------------------------------------

newtype OrdString = OrdString S.ByteString
     deriving Show

eq a@(S.PS p s l) b@(S.PS p' s' l')
         | l /= l'            = False    -- short cut on length
         | p == p' && s == s' = True     -- short cut for the same string
         | otherwise          = compare a b == EQ
  where
    compare (S.PS fp1 off1 len1) (S.PS fp2 off2 len2) = S.inlinePerformIO $
        withForeignPtr fp1 $ \p1 ->
            withForeignPtr fp2 $ \p2 ->
                cmp (p1 `plusPtr` off1)
                    (p2 `plusPtr` off2) 0 len1 len2

cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
cmp !p1 !p2 !n len1 len2
      | n == len1 = if n == len2 then return EQ else return LT
      | n == len2 = return GT
      | otherwise = do
          a <- peekByteOff p1 n :: IO Word8
          b <- peekByteOff p2 n
          case a `compare` b of
                EQ -> cmp p1 p2 (n+1) len1 len2
                LT -> return LT
                GT -> return GT