1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
|
{-# LANGUAGE PatternGuards, CPP #-}
{-# OPTIONS_GHC -O2 #-}
-- {-# OPTIONS_GHC -ddump-simpl #-}
-- | Lexing is a slow point, the code below is optimised
module Development.Ninja.Lexer(Lexeme(..), lexer, lexerFile) where
import Control.Arrow
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Unsafe as BS
import Development.Ninja.Type
import qualified Data.ByteString.Internal as Internal
import Foreign
import GHC.Exts
---------------------------------------------------------------------
-- LIBRARY BITS
newtype Str0 = Str0 Str -- null terminated
type S = Ptr Word8
chr :: S -> Char
chr x = Internal.w2c $ Internal.inlinePerformIO $ peek x
inc :: S -> S
inc x = x `plusPtr` 1
{-# INLINE dropWhile0 #-}
dropWhile0 :: (Char -> Bool) -> Str0 -> Str0
dropWhile0 f x = snd $ span0 f x
{-# INLINE span0 #-}
span0 :: (Char -> Bool) -> Str0 -> (Str, Str0)
span0 f x = break0 (not . f) x
{-# INLINE break0 #-}
break0 :: (Char -> Bool) -> Str0 -> (Str, Str0)
break0 f (Str0 bs) = (BS.unsafeTake i bs, Str0 $ BS.unsafeDrop i bs)
where
i = Internal.inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> do
let start = castPtr ptr :: S
let end = go start
return $! Ptr end `minusPtr` start
go s@(Ptr a) | c == '\0' || f c = a
| otherwise = go (inc s)
where c = chr s
{-# INLINE break00 #-}
-- The predicate must return true for '\0'
break00 :: (Char -> Bool) -> Str0 -> (Str, Str0)
break00 f (Str0 bs) = (BS.unsafeTake i bs, Str0 $ BS.unsafeDrop i bs)
where
i = Internal.inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> do
let start = castPtr ptr :: S
let end = go start
return $! Ptr end `minusPtr` start
go s@(Ptr a) | f c = a
| otherwise = go (inc s)
where c = chr s
head0 :: Str0 -> Char
head0 (Str0 x) = Internal.w2c $ BS.unsafeHead x
tail0 :: Str0 -> Str0
tail0 (Str0 x) = Str0 $ BS.unsafeTail x
list0 :: Str0 -> (Char, Str0)
list0 x = (head0 x, tail0 x)
take0 :: Int -> Str0 -> Str
take0 i (Str0 x) = BS.takeWhile (/= '\0') $ BS.take i x
---------------------------------------------------------------------
-- ACTUAL LEXER
-- Lex each line separately, rather than each lexeme
data Lexeme
= LexBind Str Expr -- [indent]foo = bar
| LexBuild [Expr] Str [Expr] -- build foo: bar | baz || qux (| and || are represented as Expr)
| LexInclude Str -- include file
| LexSubninja Str -- include file
| LexRule Str -- rule name
| LexPool Str -- pool name
| LexDefault [Expr] -- default foo bar
| LexDefine Str Expr -- foo = bar
deriving Show
isVar, isVarDot :: Char -> Bool
isVar x = x == '-' || x == '_' || (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || (x >= '0' && x <= '9')
isVarDot x = x == '.' || isVar x
endsDollar :: Str -> Bool
endsDollar x = BS.isSuffixOf (BS.singleton '$') x
dropN :: Str0 -> Str0
dropN x = if head0 x == '\n' then tail0 x else x
dropSpace :: Str0 -> Str0
dropSpace x = dropWhile0 (== ' ') x
lexerFile :: Maybe FilePath -> IO [Lexeme]
lexerFile file = fmap lexer $ maybe BS.getContents BS.readFile file
lexer :: Str -> [Lexeme]
lexer x = lexerLoop $ Str0 $ x `BS.append` BS.pack "\n\n\0"
lexerLoop :: Str0 -> [Lexeme]
lexerLoop c_x | (c,x) <- list0 c_x = case c of
'\r' -> lexerLoop x
'\n' -> lexerLoop x
' ' -> lexBind $ dropSpace x
'#' -> lexerLoop $ dropWhile0 (/= '\n') x
'b' | Just x <- strip "uild " x -> lexBuild $ dropSpace x
'r' | Just x <- strip "ule " x -> lexRule $ dropSpace x
'd' | Just x <- strip "efault " x -> lexDefault $ dropSpace x
'p' | Just x <- strip "ool " x -> lexPool $ dropSpace x
'i' | Just x <- strip "nclude " x -> lexInclude $ dropSpace x
's' | Just x <- strip "ubninja " x -> lexSubninja $ dropSpace x
'\0' -> []
_ -> lexDefine c_x
where
strip str (Str0 x) = if b `BS.isPrefixOf` x then Just $ Str0 $ BS.drop (BS.length b) x else Nothing
where b = BS.pack str
lexBind c_x | (c,x) <- list0 c_x = case c of
'\r' -> lexerLoop x
'\n' -> lexerLoop x
'#' -> lexerLoop $ dropWhile0 (/= '\n') x
'\0' -> []
_ -> lexxBind LexBind c_x
lexBuild x
| (outputs,x) <- lexxExprs True x
, (rule,x) <- span0 isVar $ dropSpace x
, (deps,x) <- lexxExprs False $ dropSpace x
= LexBuild outputs rule deps : lexerLoop x
lexDefault x
| (files,x) <- lexxExprs False x
= LexDefault files : lexerLoop x
lexRule x = lexxName LexRule x
lexPool x = lexxName LexPool x
lexInclude x = lexxFile LexInclude x
lexSubninja x = lexxFile LexSubninja x
lexDefine x = lexxBind LexDefine x
lexxBind ctor x
| (var,x) <- span0 isVarDot x
, ('=',x) <- list0 $ dropSpace x
, (exp,x) <- lexxExpr False False $ dropSpace x
= ctor var exp : lexerLoop x
lexxBind _ x = error $ show ("parse failed when parsing binding", take0 100 x)
lexxFile ctor x
| (file,rest) <- splitLineCont x
= ctor file : lexerLoop rest
lexxName ctor x
| (name,rest) <- splitLineCont x
= ctor name : lexerLoop rest
lexxExprs :: Bool -> Str0 -> ([Expr], Str0)
lexxExprs stopColon x = case lexxExpr stopColon True x of
(a,c_x) | c <- head0 c_x, x <- tail0 c_x -> case c of
' ' -> first (a:) $ lexxExprs stopColon $ dropSpace x
':' | stopColon -> ([a], x)
_ | stopColon -> error "expected a colon"
'\r' -> a $: dropN x
'\n' -> a $: x
'\0' -> a $: c_x
where
Exprs [] $: x = ([], x)
a $: x = ([a], x)
{-# NOINLINE lexxExpr #-}
lexxExpr :: Bool -> Bool -> Str0 -> (Expr, Str0) -- snd will start with one of " :\n\r" or be empty
lexxExpr stopColon stopSpace = first exprs . f
where
exprs [x] = x
exprs xs = Exprs xs
special = case (stopColon, stopSpace) of
(True , True ) -> \x -> x <= ':' && (x == ':' || x == ' ' || x == '$' || x == '\r' || x == '\n' || x == '\0')
(True , False) -> \x -> x <= ':' && (x == ':' || x == '$' || x == '\r' || x == '\n' || x == '\0')
(False, True ) -> \x -> x <= '$' && ( x == ' ' || x == '$' || x == '\r' || x == '\n' || x == '\0')
(False, False) -> \x -> x <= '$' && ( x == '$' || x == '\r' || x == '\n' || x == '\0')
f x = case break00 special x of (a,x) -> if BS.null a then g x else Lit a $: g x
x $: (xs,y) = (x:xs,y)
g x | head0 x /= '$' = ([], x)
g x | c_x <- tail0 x, (c,x) <- list0 c_x = case c of
'$' -> Lit (BS.singleton '$') $: f x
' ' -> Lit (BS.singleton ' ') $: f x
':' -> Lit (BS.singleton ':') $: f x
'\n' -> f $ dropSpace x
'\r' -> f $ dropSpace $ dropN x
'{' | (name,x) <- span0 isVarDot x, not $ BS.null name, ('}',x) <- list0 x -> Var name $: f x
_ | (name,x) <- span0 isVar c_x, not $ BS.null name -> Var name $: f x
_ -> error $ "Unexpect $ followed by unexpected stuff"
splitLineCont :: Str0 -> (Str, Str0)
splitLineCont x = first BS.concat $ f x
where
f x = if not $ endsDollar a then ([a], b) else let (c,d) = f $ dropSpace b in (BS.init a : c, d)
where (a,b) = splitLineCR x
splitLineCR :: Str0 -> (Str, Str0)
splitLineCR x = if BS.singleton '\r' `BS.isSuffixOf` a then (BS.init a, dropN b) else (a, dropN b)
where (a,b) = break0 (== '\n') x
|