1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
|
-- (c) The GHC Team
--
-- Functions to evaluate whether or not a string is a valid identifier.
-- There is considerable overlap between the logic here and the logic
-- in GHC.Parser.Lexer, but sadly there seems to be no way to merge them.
module GHC.Utils.Lexeme (
-- * Lexical characteristics of Haskell names
-- | Use these functions to figure what kind of name a 'FastString'
-- represents; these functions do /not/ check that the identifier
-- is valid.
isLexCon, isLexVar, isLexId, isLexSym,
isLexConId, isLexConSym, isLexVarId, isLexVarSym,
startsVarSym, startsVarId, startsConSym, startsConId,
-- * Validating identifiers
-- | These functions (working over plain old 'String's) check
-- to make sure that the identifier is valid.
okVarOcc, okConOcc, okTcOcc,
okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
-- Some of the exports above are not used within GHC, but may
-- be of value to GHC API users.
) where
import GHC.Prelude
import GHC.Data.FastString
import Data.Char
import qualified Data.Set as Set
import GHC.Lexeme
{-
************************************************************************
* *
Lexical categories
* *
************************************************************************
These functions test strings to see if they fit the lexical categories
defined in the Haskell report.
Note [Classification of generated names]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some names generated for internal use can show up in debugging output,
e.g. when using -ddump-simpl. These generated names start with a $
but should still be pretty-printed using prefix notation. We make sure
this is the case in isLexVarSym by only classifying a name as a symbol
if all its characters are symbols, not just its first one.
-}
isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
isLexCon cs = isLexConId cs || isLexConSym cs
isLexVar cs = isLexVarId cs || isLexVarSym cs
isLexId cs = isLexConId cs || isLexVarId cs
isLexSym cs = isLexConSym cs || isLexVarSym cs
-------------
isLexConId cs = case unpackFS cs of -- Prefix type or data constructors
[] -> False -- e.g. "Foo", "[]", "(,)"
c:_ -> cs == fsLit "[]" || startsConId c
isLexVarId cs = case unpackFS cs of -- Ordinary prefix identifiers
[] -> False -- e.g. "x", "_x"
c:_ -> startsVarId c
isLexConSym cs = case unpackFS cs of -- Infix type or data constructors
[] -> False -- e.g. ":-:", ":", "->"
c:_ -> cs == fsLit "->" || startsConSym c
isLexVarSym fs -- Infix identifiers e.g. "+"
| fs == (fsLit "~R#") = True
| otherwise
= case (if nullFS fs then [] else unpackFS fs) of
[] -> False
(c:cs) -> startsVarSym c && all isVarSymChar cs
-- See Note [Classification of generated names]
{-
************************************************************************
* *
Detecting valid names for Template Haskell
* *
************************************************************************
-}
----------------------
-- External interface
----------------------
-- | Is this an acceptable variable name?
okVarOcc :: String -> Bool
okVarOcc str@(c:_)
| startsVarId c
= okVarIdOcc str
| startsVarSym c
= okVarSymOcc str
okVarOcc _ = False
-- | Is this an acceptable constructor name?
okConOcc :: String -> Bool
okConOcc str@(c:_)
| startsConId c
= okConIdOcc str
| startsConSym c
= okConSymOcc str
| str == "[]"
= True
okConOcc _ = False
-- | Is this an acceptable type name?
okTcOcc :: String -> Bool
okTcOcc "[]" = True
okTcOcc "->" = True
okTcOcc "~" = True
okTcOcc str@(c:_)
| startsConId c
= okConIdOcc str
| startsConSym c
= okConSymOcc str
| startsVarSym c
= okVarSymOcc str
okTcOcc _ = False
-- | Is this an acceptable alphanumeric variable name, assuming it starts
-- with an acceptable letter?
okVarIdOcc :: String -> Bool
okVarIdOcc str = okIdOcc str &&
-- admit "_" as a valid identifier. Required to support typed
-- holes in Template Haskell. See #10267
(str == "_" || not (str `Set.member` reservedIds))
-- | Is this an acceptable symbolic variable name, assuming it starts
-- with an acceptable character?
okVarSymOcc :: String -> Bool
okVarSymOcc str = all okSymChar str &&
not (str `Set.member` reservedOps) &&
not (isDashes str)
-- | Is this an acceptable alphanumeric constructor name, assuming it
-- starts with an acceptable letter?
okConIdOcc :: String -> Bool
okConIdOcc str = okIdOcc str ||
is_tuple_name1 True str ||
-- Is it a boxed tuple...
is_tuple_name1 False str ||
-- ...or an unboxed tuple (#12407)...
is_sum_name1 str
-- ...or an unboxed sum (#12514)?
where
-- check for tuple name, starting at the beginning
is_tuple_name1 True ('(' : rest) = is_tuple_name2 True rest
is_tuple_name1 False ('(' : '#' : rest) = is_tuple_name2 False rest
is_tuple_name1 _ _ = False
-- check for tuple tail
is_tuple_name2 True ")" = True
is_tuple_name2 False "#)" = True
is_tuple_name2 boxed (',' : rest) = is_tuple_name2 boxed rest
is_tuple_name2 boxed (ws : rest)
| isSpace ws = is_tuple_name2 boxed rest
is_tuple_name2 _ _ = False
-- check for sum name, starting at the beginning
is_sum_name1 ('(' : '#' : rest) = is_sum_name2 False rest
is_sum_name1 _ = False
-- check for sum tail, only allowing at most one underscore
is_sum_name2 _ "#)" = True
is_sum_name2 underscore ('|' : rest) = is_sum_name2 underscore rest
is_sum_name2 False ('_' : rest) = is_sum_name2 True rest
is_sum_name2 underscore (ws : rest)
| isSpace ws = is_sum_name2 underscore rest
is_sum_name2 _ _ = False
-- | Is this an acceptable symbolic constructor name, assuming it
-- starts with an acceptable character?
okConSymOcc :: String -> Bool
okConSymOcc ":" = True
okConSymOcc str = all okSymChar str &&
not (str `Set.member` reservedOps)
----------------------
-- Internal functions
----------------------
-- | Is this string an acceptable id, possibly with a suffix of hashes,
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
= let hashes = dropWhile okIdChar str in
all (== '#') hashes -- -XMagicHash allows a suffix of hashes
-- of course, `all` says "True" to an empty list
-- | Is this character acceptable in an identifier (after the first letter)?
-- See alexGetByte in GHC.Parser.Lexer
okIdChar :: Char -> Bool
okIdChar c = case generalCategory c of
UppercaseLetter -> True
LowercaseLetter -> True
TitlecaseLetter -> True
ModifierLetter -> True -- See #10196
OtherLetter -> True -- See #1103
NonSpacingMark -> True -- See #7650
DecimalNumber -> True
OtherNumber -> True -- See #4373
_ -> c == '\'' || c == '_'
-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
reservedIds :: Set.Set String
reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
, "do", "else", "foreign", "if", "import", "in"
, "infix", "infixl", "infixr", "instance", "let"
, "module", "newtype", "of", "then", "type", "where"
, "_" ]
-- | All reserved operators. Taken from section 2.4 of the 2010 Report,
-- excluding @\@@ and @~@ that are allowed by GHC (see GHC Proposal #229).
reservedOps :: Set.Set String
reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
, "=>" ]
-- | Does this string contain only dashes and has at least 2 of them?
isDashes :: String -> Bool
isDashes ('-' : '-' : rest) = all (== '-') rest
isDashes _ = False
|