File: Create.hs

package info (click to toggle)
haskell-publicsuffixlist 0.1-13
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 1,064 kB
  • sloc: haskell: 222; makefile: 4
file content (73 lines) | stat: -rw-r--r-- 2,618 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE CPP #-}

{-|
This script parses the public suffix list, and constructs a data structure which can
be used with the isSuffix function in Lookup.hs. It exports a GSink which produces
the opaque 'DataStructure' and can be fed any Source as input.

This makes an few assumption about the information in the public suffix list:
namely, that no rule is a suffix of another rule. For example, if there is a rule
abc.def.ghi
then then is no other rule
def.ghi
or
!def.ghi

The actual data structure involved here is a tree where the nodes have no value and
the edges are DNS labels. There are two trees: one to handle the exception rules,
and one to handle the regular rules.
-}

module Network.PublicSuffixList.Create (PublicSuffixListException, sink) where

import           Control.Exception
import           Control.Monad.Catch (MonadThrow)
import qualified Data.ByteString      as BS
import qualified Data.Conduit         as C
import qualified Data.Conduit.List    as CL
import qualified Data.Conduit.Text    as CT
import           Data.Default
import qualified Data.Map             as M
import qualified Data.Text            as T
import           Data.Typeable
import           Text.IDNA

import           Network.PublicSuffixList.Types

data PublicSuffixListException = PublicSuffixListException
  deriving (Show, Typeable)

instance Exception PublicSuffixListException

insert :: (Ord e) => Tree e -> [e] -> Tree e
insert _ [] = def
insert t (p : ps) = case M.lookup p $ children t of
  Nothing -> t { children = M.insert p (insert def ps) $ children t }
  Just l -> t { children = M.insert p (insert l ps) $ children t }

foldingFunction :: DataStructure -> T.Text -> DataStructure
foldingFunction d@(rules, exceptions) s'
  | T.null s = d
  | T.take 2 s == "//" = d
  | T.head s == '!' = (rules, insert exceptions $ labelList $ T.tail s)
  | otherwise = (insert rules $ labelList s, exceptions)
  where ss = filter (not . T.null) $ T.words s'
        s
          | null ss = ""
          | otherwise = head ss
        labelList = reverse . map internationalize . T.split (== '.')
        internationalize str
          | str == "*" = str
          | otherwise = case toASCII False True $ T.toLower str of
                          Just x -> x
                          Nothing -> throw PublicSuffixListException

{-
Generate the opaque 'DataStructure'
-}
sink :: MonadThrow m => C.Sink BS.ByteString m DataStructure
sink = CT.decode CT.utf8 C.=$ CT.lines C.=$ CL.fold foldingFunction def