1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
|
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
-- |
-- Module: Filesystem.Path.Internal
-- Copyright: 2010 John Millikin
-- License: MIT
--
-- Maintainer: jmillikin@gmail.com
-- Portability: portable
--
module Filesystem.Path.Internal where
import Prelude hiding (FilePath)
import Control.DeepSeq (NFData, rnf)
import qualified Control.Exception as Exc
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Data.Char (chr, ord)
import Data.Data (Data)
import Data.List (intersperse)
import Data.Ord (comparing)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Text.Encoding.Error (UnicodeException)
import Data.Typeable (Typeable)
-------------------------------------------------------------------------------
-- File Paths
-------------------------------------------------------------------------------
type Chunk = String
type Directory = Chunk
type Basename = Chunk
type Extension = Chunk
data Root
= RootPosix
| RootWindowsVolume Char Bool
| RootWindowsCurrentVolume
| RootWindowsUnc String String Bool
| RootWindowsDoubleQMark
deriving (Eq, Ord, Data, Typeable, Show)
data FilePath = FilePath
{ pathRoot :: Maybe Root
, pathDirectories :: [Directory]
, pathBasename :: Maybe Basename
, pathExtensions :: [Extension]
}
deriving (Data, Typeable)
instance Eq FilePath where
x == y = compare x y == EQ
instance Ord FilePath where
compare = comparing (\p ->
(pathRoot p
, fmap unescape' (pathDirectories p)
, fmap unescape' (pathBasename p)
, fmap unescape' (pathExtensions p)
))
instance NFData Root where
rnf (RootWindowsVolume c extended) = rnf c `seq` rnf extended
rnf (RootWindowsUnc host share extended) = rnf host `seq` rnf share `seq` rnf extended
rnf _ = ()
instance NFData FilePath where
rnf p = rnf (pathRoot p) `seq` rnf (pathDirectories p) `seq` rnf (pathBasename p) `seq` rnf (pathExtensions p)
-- | A file path with no root, directory, or filename
empty :: FilePath
empty = FilePath Nothing [] Nothing []
dot :: Chunk
dot = "."
dots :: Chunk
dots = ".."
filenameChunk :: FilePath -> Chunk
filenameChunk p = concat (name:exts) where
name = maybe "" id (pathBasename p)
exts = case pathExtensions p of
[] -> []
exts' -> intersperse dot ("":exts')
rootChunk :: Maybe Root -> Chunk
rootChunk r = flip (maybe "") r $ \r' -> case r' of
RootPosix -> "/"
RootWindowsVolume c False -> c : ":\\"
RootWindowsVolume c True -> "\\\\?\\" ++ (c : ":\\")
RootWindowsCurrentVolume -> "\\"
RootWindowsUnc host share False -> "\\\\" ++ host ++ "\\" ++ share
RootWindowsUnc host share True -> "\\\\?\\UNC\\" ++ host ++ "\\" ++ share
RootWindowsDoubleQMark -> "\\??\\"
rootText :: Maybe Root -> T.Text
rootText = T.pack . rootChunk
directoryChunks :: FilePath -> [Chunk]
directoryChunks path = pathDirectories path ++ [filenameChunk path]
-------------------------------------------------------------------------------
-- Rules
-------------------------------------------------------------------------------
-- | The type of @platformFormat@ for 'Rules' is conditionally selected at
-- compilation time. As such it is only intended for direct use with external OS
-- functions and code that expects @platformFormat@ to be stable across platforms
-- may fail to subsequently compile on a differing platform.
--
-- For example: on Windows or OSX @platformFormat@ will be 'T.Text',
-- and on Linux it will be 'B.ByteString'.
--
-- If portability is a concern, restrict usage to functions which do not expose
-- @platformFormat@ directly.
data Rules platformFormat = Rules
{ rulesName :: T.Text
-- | Check if a 'FilePath' is valid; it must not contain any illegal
-- characters, and must have a root appropriate to the current
-- 'Rules'.
, valid :: FilePath -> Bool
-- | Split a search path, such as @$PATH@ or @$PYTHONPATH@, into
-- a list of 'FilePath's.
--
-- Note: The type of @platformTextFormat@ can change depending upon the
-- underlying compilation platform. Consider using 'splitSearchPathString'
-- instead. See 'Rules' for more information.
, splitSearchPath :: platformFormat -> [FilePath]
-- | splitSearchPathString is like 'splitSearchPath', but takes a string
-- encoded in the format used by @System.IO@.
, splitSearchPathString :: String -> [FilePath]
-- | Attempt to convert a 'FilePath' to human‐readable text.
--
-- If the path is decoded successfully, the result is a 'Right'
-- containing the decoded text. Successfully decoded text can be
-- converted back to the original path using 'fromText'.
--
-- If the path cannot be decoded, the result is a 'Left' containing an
-- approximation of the original path. If displayed to the user, this
-- value should be accompanied by some warning that the path has an
-- invalid encoding. Approximated text cannot be converted back to the
-- original path.
--
-- This function ignores the user’s locale, and assumes all
-- file paths are encoded in UTF8. If you need to display file paths
-- with an unusual or obscure encoding, use 'encode' and then decode
-- them manually.
--
-- Since: 0.2
, toText :: FilePath -> Either T.Text T.Text
-- | Convert human‐readable text into a 'FilePath'.
--
-- This function ignores the user’s locale, and assumes all
-- file paths are encoded in UTF8. If you need to create file paths
-- with an unusual or obscure encoding, encode them manually and then
-- use 'decode'.
--
-- Since: 0.2
, fromText :: T.Text -> FilePath
-- | Convert a 'FilePath' to a platform‐specific format,
-- suitable for use with external OS functions.
--
-- Note: The type of @platformTextFormat@ can change depending upon the
-- underlying compilation platform. Consider using 'toText' or
-- 'encodeString' instead. See 'Rules' for more information.
--
-- Since: 0.3
, encode :: FilePath -> platformFormat
-- | Convert a 'FilePath' from a platform‐specific format,
-- suitable for use with external OS functions.
--
-- Note: The type of @platformTextFormat@ can change depending upon the
-- underlying compilation platform. Consider using 'fromText' or
-- 'decodeString' instead. See 'Rules' for more information.
--
-- Since: 0.3
, decode :: platformFormat -> FilePath
-- | Attempt to convert a 'FilePath' to a string suitable for use with
-- functions in @System.IO@. The contents of this string are
-- platform‐dependent, and are not guaranteed to be
-- human‐readable. For converting 'FilePath's to a
-- human‐readable format, use 'toText'.
--
-- Since: 0.3.1
, encodeString :: FilePath -> String
-- | Attempt to parse a 'FilePath' from a string suitable for use
-- with functions in @System.IO@. Do not use this function for parsing
-- human‐readable paths, as the character set decoding is
-- platform‐dependent. For converting human‐readable
-- text to a 'FilePath', use 'fromText'.
--
-- Since: 0.3.1
, decodeString :: String -> FilePath
}
instance Show (Rules a) where
showsPrec d r = showParen (d > 10)
(showString "Rules " . shows (rulesName r))
escape :: T.Text -> Chunk
escape t = T.unpack t
unescape :: Chunk -> (T.Text, Bool)
unescape cs = if any (\c -> ord c >= 0xDC80 && ord c <= 0xDCFF) cs
then (T.pack (map (\c -> if ord c >= 0xDC80 && ord c <= 0xDCFF
then chr (ord c - 0xDC00)
else c) cs), False)
else (T.pack cs, True)
unescape' :: Chunk -> T.Text
unescape' = fst . unescape
unescapeBytes' :: Chunk -> B.ByteString
unescapeBytes' cs = if any (\c -> ord c >= 0xDC80 && ord c <= 0xDCFF) cs
then B8.concat (map (\c -> if ord c >= 0xDC80 && ord c <= 0xDCFF
then B8.singleton (chr (ord c - 0xDC00))
else TE.encodeUtf8 (T.singleton c)) cs)
else TE.encodeUtf8 (T.pack cs)
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy p = loop where
loop xs = let
(chunk, rest) = break p xs
cont = chunk : loop (tail rest)
in if null rest then [chunk] else cont
textSplitBy :: (Char -> Bool) -> T.Text -> [T.Text]
#if MIN_VERSION_text(0,11,0)
textSplitBy = T.split
#else
textSplitBy = T.splitBy
#endif
parseFilename :: Chunk -> (Maybe Basename, [Extension])
parseFilename filename = parsed where
parsed = if null filename
then (Nothing, [])
else case span (== '.') filename of
(leadingDots, baseAndExts) -> case splitBy (== '.') baseAndExts of
[] -> (joinDots leadingDots "", [])
(name':exts') -> (joinDots leadingDots name', exts')
joinDots leadingDots base = case leadingDots ++ base of
[] -> Nothing
joined -> Just joined
maybeDecodeUtf8 :: B.ByteString -> Maybe T.Text
maybeDecodeUtf8 bytes = case TE.decodeUtf8' bytes of
Left _ -> Nothing
Right text -> Just text
|