1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
-- |
-- Module: System.FilePath.GlobPattern
-- Copyright: Bryan O'Sullivan
-- License: BSD3
-- Maintainer: Bryan O'Sullivan <bos@serpentine.com>
-- Stability: unstable
-- Portability: everywhere
module System.FilePath.GlobPattern (
-- * Glob patterns
-- $syntax
GlobPattern
-- * Matching functions
, (~~)
, (/~)
) where
import Control.Arrow (second)
import Control.Monad (msum)
import Data.Ix (Ix, inRange)
import Data.List (nub)
import Data.Maybe (isJust)
import System.FilePath (pathSeparator)
-- $syntax
--
-- Basic glob pattern syntax is the same as for the Unix shell
-- environment.
--
-- * @*@ matches everything up to a directory separator or end of
-- string.
--
-- * @[/range/]@ matches any character in /range/.
--
-- * @[!/range/]@ matches any character /not/ in /range/.
--
-- There are three extensions to the traditional glob syntax, taken
-- from modern Unix shells.
--
-- * @\\@ escapes a character that might otherwise have special
-- meaning. For a literal @\"\\\"@ character, use @\"\\\\\"@.
--
-- * @**@ matches everything, including a directory separator.
--
-- * @(/s1/|/s2/|/.../)@ matches any of the strings /s1/, /s2/, etc.
-- | Glob pattern type.
type GlobPattern = String
spanClass :: Char -> String -> (String, String)
spanClass c = gs []
where gs _ [] = error "unterminated character class"
gs acc (d:ds) | d == c = (reverse acc, ds)
| d == '\\' = case ds of
(e:es) -> gs (e:'\\':acc) es
_ -> error "unterminated escape"
| otherwise = gs (d:acc) ds
data Ix a => SRange a = SRange [a] [(a, a)]
deriving (Show)
inSRange :: Ix a => a -> SRange a -> Bool
inSRange c (SRange d s) = c `elem` d || any (flip inRange c) s
type CharClass = SRange Char
makeClass :: String -> CharClass
makeClass = makeClass' [] []
where makeClass' :: [(Char, Char)] -> [Char] -> String -> CharClass
makeClass' dense sparse [] = SRange sparse dense
makeClass' dense sparse (a:'-':b:cs) =
makeClass' ((a,b):dense) sparse cs
makeClass' dense sparse (c:cs) = makeClass' dense (c:sparse) cs
data MatchTerm = MatchLiteral String
| MatchAny
| MatchDir
| MatchChar
| MatchClass Bool CharClass
| MatchGroup [String]
deriving (Show)
parseGlob :: GlobPattern -> [MatchTerm]
parseGlob [] = []
parseGlob ('*':'*':cs) = MatchAny : parseGlob cs
parseGlob ('*':cs) = MatchDir : parseGlob cs
parseGlob ('?':cs) = MatchChar : parseGlob cs
parseGlob ('[':cs) = let (cc, ccs) = spanClass ']' cs
cls = case cc of
('!':ccs') -> MatchClass False $ makeClass ccs'
_ -> MatchClass True $ makeClass cc
in cls : parseGlob ccs
parseGlob ('(':cs) = let (gg, ggs) = spanClass ')' cs
in MatchGroup (breakGroup [] gg) : parseGlob ggs
where breakGroup :: String -> String -> [String]
breakGroup acc [] = [reverse acc]
breakGroup _ ['\\'] = error "group: unterminated escape"
breakGroup acc ('\\':c:cs') = breakGroup (c:acc) cs'
breakGroup acc ('|':cs') = reverse acc : breakGroup [] cs'
breakGroup acc (c:cs') = breakGroup (c:acc) cs'
parseGlob ['\\'] = error "glob: unterminated escape"
parseGlob ('\\':c:cs) = MatchLiteral [c] : parseGlob cs
parseGlob (c:cs) = MatchLiteral [c] : parseGlob cs
simplifyTerms :: [MatchTerm] -> [MatchTerm]
simplifyTerms [] = []
simplifyTerms (MatchLiteral []:as) = simplifyTerms as
simplifyTerms (m@(MatchLiteral a):as) =
case simplifyTerms as of
(MatchLiteral b:bs) -> MatchLiteral (a ++ b) : bs
bs -> m : bs
simplifyTerms (MatchClass True (SRange [] []):as) = simplifyTerms as
simplifyTerms (MatchClass True (SRange a@[_] []):as) =
simplifyTerms $ MatchLiteral a : as
simplifyTerms (MatchGroup []:as) = simplifyTerms as
simplifyTerms (MatchGroup gs:as) =
case commonPrefix gs of
(p ,[]) -> simplifyTerms (MatchLiteral p : as)
("",ss) -> MatchGroup ss : simplifyTerms as
(p ,ss) -> simplifyTerms (MatchLiteral p : MatchGroup ss : as)
simplifyTerms (a:as) = a:simplifyTerms as
commonPrefix :: [String] -> (String, [String])
commonPrefix = second nub . pfx ""
where pfx _ [] = ("", [])
pfx acc ss | any null ss = (reverse acc, ss)
| otherwise = let hs = map head ss
h = head hs
in if all (h==) $ tail hs
then pfx (h:acc) $ map tail ss
else (reverse acc, ss)
matchTerms :: [MatchTerm] -> String -> Maybe ()
matchTerms [] [] = return ()
matchTerms [] _ = fail "residual string"
matchTerms (MatchLiteral m:ts) cs = matchLiteral m cs >>= matchTerms ts
where matchLiteral (a:as) (b:bs) | a == b = matchLiteral as bs
matchLiteral [] as = return as
matchLiteral _ _ = fail "not a prefix"
matchTerms (MatchClass k c:ts) cs = matchClass cs >>= matchTerms ts
where matchClass (b:bs) | (inClass && k) || not (inClass || k) = return bs
where inClass = b `inSRange` c
matchClass _ = fail "no match"
matchTerms (MatchGroup g:ts) cs = msum (map matchGroup g)
where matchGroup g = matchTerms (MatchLiteral g : ts) cs
matchTerms [MatchAny] _ = return ()
matchTerms (MatchAny:ts) cs = matchAny cs >>= matchTerms ts
where matchAny [] = fail "no match"
matchAny cs' = case matchTerms ts cs' of
Nothing -> matchAny (tail cs')
_ -> return cs'
matchTerms [MatchDir] cs | pathSeparator `elem` cs = fail "path separator"
| otherwise = return ()
matchTerms (MatchDir:ts) cs = matchDir cs >>= matchTerms ts
where matchDir [] = fail "no match"
matchDir (c:_) | c == pathSeparator = fail "path separator"
matchDir cs' = case matchTerms ts cs' of
Nothing -> matchDir $ tail cs'
_ -> return cs'
matchTerms (MatchChar:_) [] = fail "end of input"
matchTerms (MatchChar:ts) (_:cs) = matchTerms ts cs
-- | Match a file name against a glob pattern.
(~~) :: FilePath -> GlobPattern -> Bool
name ~~ pat = let terms = simplifyTerms (parseGlob pat)
in (isJust . matchTerms terms) name
-- | Match a file name against a glob pattern, but return 'True' if
-- the match /fail/s.
(/~) :: FilePath -> GlobPattern -> Bool
(/~) = (not . ) . (~~)
|