File: GlobPattern.hs

package info (click to toggle)
haskell-filemanip 0.3.6.3-13
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 96 kB
  • sloc: haskell: 520; makefile: 3
file content (177 lines) | stat: -rw-r--r-- 6,733 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
-- |
-- Module:      System.FilePath.GlobPattern
-- Copyright:   Bryan O'Sullivan
-- License:     BSD3
-- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
-- Stability:   unstable
-- Portability: everywhere
module System.FilePath.GlobPattern (
    -- * Glob patterns
    -- $syntax
      GlobPattern
    -- * Matching functions
    , (~~)
    , (/~)
    ) where

import Control.Arrow (second)
import Control.Monad (msum)
import Data.Ix (Ix, inRange)
import Data.List (nub)
import Data.Maybe (isJust)
import System.FilePath (pathSeparator)

-- $syntax
--
-- Basic glob pattern syntax is the same as for the Unix shell
-- environment.
-- 
-- * @*@ matches everything up to a directory separator or end of
-- string.
--
-- * @[/range/]@ matches any character in /range/.
-- 
-- * @[!/range/]@ matches any character /not/ in /range/.
-- 
-- There are three extensions to the traditional glob syntax, taken
-- from modern Unix shells.
--
-- * @\\@ escapes a character that might otherwise have special
-- meaning.  For a literal @\"\\\"@ character, use @\"\\\\\"@.
-- 
-- * @**@ matches everything, including a directory separator.
-- 
-- * @(/s1/|/s2/|/.../)@ matches any of the strings /s1/, /s2/, etc.

-- | Glob pattern type.
type GlobPattern = String

spanClass :: Char -> String -> (String, String)

spanClass c = gs []
    where gs _ [] = error "unterminated character class"
          gs acc (d:ds) | d == c = (reverse acc, ds)
                        | d == '\\' = case ds of
                                     (e:es) -> gs (e:'\\':acc) es
                                     _ -> error "unterminated escape"
                        | otherwise = gs (d:acc) ds

data Ix a => SRange a = SRange [a] [(a, a)]
                      deriving (Show)

inSRange :: Ix a => a -> SRange a -> Bool

inSRange c (SRange d s) = c `elem` d || any (flip inRange c) s

type CharClass = SRange Char

makeClass :: String -> CharClass

makeClass = makeClass' [] []
    where makeClass' :: [(Char, Char)] -> [Char] -> String -> CharClass
          makeClass' dense sparse [] = SRange sparse dense
          makeClass' dense sparse (a:'-':b:cs) =
              makeClass' ((a,b):dense) sparse cs
          makeClass' dense sparse (c:cs) = makeClass' dense (c:sparse) cs

data MatchTerm = MatchLiteral String
               | MatchAny
               | MatchDir
               | MatchChar
               | MatchClass Bool CharClass
               | MatchGroup [String]
                 deriving (Show)

parseGlob :: GlobPattern -> [MatchTerm]
             
parseGlob [] = []
parseGlob ('*':'*':cs) = MatchAny : parseGlob cs
parseGlob ('*':cs) = MatchDir : parseGlob cs
parseGlob ('?':cs) = MatchChar : parseGlob cs
parseGlob ('[':cs) = let (cc, ccs) = spanClass ']' cs
                         cls = case cc of
                               ('!':ccs') -> MatchClass False $ makeClass ccs'
                               _ -> MatchClass True $ makeClass cc
                     in cls : parseGlob ccs
parseGlob ('(':cs) = let (gg, ggs) = spanClass ')' cs
                     in MatchGroup (breakGroup [] gg) : parseGlob ggs
    where breakGroup :: String -> String -> [String]
          breakGroup acc [] = [reverse acc]
          breakGroup _ ['\\'] = error "group: unterminated escape"
          breakGroup acc ('\\':c:cs') = breakGroup (c:acc) cs'
          breakGroup acc ('|':cs') = reverse acc : breakGroup [] cs'
          breakGroup acc (c:cs') = breakGroup (c:acc) cs'
parseGlob ['\\'] = error "glob: unterminated escape"
parseGlob ('\\':c:cs) = MatchLiteral [c] : parseGlob cs
parseGlob (c:cs) = MatchLiteral [c] : parseGlob cs

simplifyTerms :: [MatchTerm] -> [MatchTerm]
simplifyTerms [] = []
simplifyTerms (MatchLiteral []:as) = simplifyTerms as
simplifyTerms (m@(MatchLiteral a):as) =
    case simplifyTerms as of
    (MatchLiteral b:bs) -> MatchLiteral (a ++ b) : bs
    bs -> m : bs
simplifyTerms (MatchClass True (SRange [] []):as) = simplifyTerms as
simplifyTerms (MatchClass True (SRange a@[_] []):as) =
    simplifyTerms $ MatchLiteral a : as
simplifyTerms (MatchGroup []:as) = simplifyTerms as
simplifyTerms (MatchGroup gs:as) =
    case commonPrefix gs of
    (p ,[]) -> simplifyTerms (MatchLiteral p : as)
    ("",ss) -> MatchGroup ss : simplifyTerms as
    (p ,ss) -> simplifyTerms (MatchLiteral p : MatchGroup ss : as)
simplifyTerms (a:as) = a:simplifyTerms as

commonPrefix :: [String] -> (String, [String])
commonPrefix = second nub . pfx ""
    where pfx _ [] = ("", [])
          pfx acc ss | any null ss = (reverse acc, ss)
                     | otherwise = let hs = map head ss
                                       h = head hs
                                   in if all (h==) $ tail hs
                                      then pfx (h:acc) $ map tail ss
                                      else (reverse acc, ss)

matchTerms :: [MatchTerm] -> String -> Maybe ()

matchTerms [] [] = return ()
matchTerms [] _ = fail "residual string"
matchTerms (MatchLiteral m:ts) cs = matchLiteral m cs >>= matchTerms ts
    where matchLiteral (a:as) (b:bs) | a == b = matchLiteral as bs
          matchLiteral [] as = return as
          matchLiteral _ _ = fail "not a prefix"
matchTerms (MatchClass k c:ts) cs = matchClass cs >>= matchTerms ts
    where matchClass (b:bs) | (inClass && k) || not (inClass || k) = return bs
                            where inClass = b `inSRange` c
          matchClass _ = fail "no match"
matchTerms (MatchGroup g:ts) cs = msum (map matchGroup g)
    where matchGroup g = matchTerms (MatchLiteral g : ts) cs
matchTerms [MatchAny] _ = return ()
matchTerms (MatchAny:ts) cs = matchAny cs >>= matchTerms ts
    where matchAny [] = fail "no match"
          matchAny cs' = case matchTerms ts cs' of
                          Nothing -> matchAny (tail cs')
                          _ -> return cs'
matchTerms [MatchDir] cs | pathSeparator `elem` cs = fail "path separator"
                         | otherwise = return ()
matchTerms (MatchDir:ts) cs = matchDir cs >>= matchTerms ts
    where matchDir [] = fail "no match"
          matchDir (c:_) | c == pathSeparator = fail "path separator"
          matchDir cs' = case matchTerms ts cs' of
                         Nothing -> matchDir $ tail cs'
                         _ -> return cs'
matchTerms (MatchChar:_) [] = fail "end of input"
matchTerms (MatchChar:ts) (_:cs) = matchTerms ts cs

-- | Match a file name against a glob pattern.
(~~) :: FilePath -> GlobPattern -> Bool

name ~~ pat = let terms = simplifyTerms (parseGlob pat)
              in (isJust . matchTerms terms) name

-- | Match a file name against a glob pattern, but return 'True' if
-- the match /fail/s.
(/~) :: FilePath -> GlobPattern -> Bool

(/~) = (not . ) . (~~)