File: Lexer.hs

package info (click to toggle)
haskell-shake 0.13.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 888 kB
  • ctags: 127
  • sloc: haskell: 6,388; makefile: 35; ansic: 25; sh: 2
file content (219 lines) | stat: -rw-r--r-- 7,458 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
{-# LANGUAGE PatternGuards, CPP #-}
{-# OPTIONS_GHC -O2 #-}
-- {-# OPTIONS_GHC -ddump-simpl #-}

-- | Lexing is a slow point, the code below is optimised
module Development.Ninja.Lexer(Lexeme(..), lexer, lexerFile) where

import Control.Arrow
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Unsafe as BS
import Development.Ninja.Type
import qualified Data.ByteString.Internal as Internal
import Foreign
import GHC.Exts

---------------------------------------------------------------------
-- LIBRARY BITS

newtype Str0 = Str0 Str -- null terminated

type S = Ptr Word8

chr :: S -> Char
chr x = Internal.w2c $ Internal.inlinePerformIO $ peek x

inc :: S -> S
inc x = x `plusPtr` 1

{-# INLINE dropWhile0 #-}
dropWhile0 :: (Char -> Bool) -> Str0 -> Str0
dropWhile0 f x = snd $ span0 f x

{-# INLINE span0 #-}
span0 :: (Char -> Bool) -> Str0 -> (Str, Str0)
span0 f x = break0 (not . f) x

{-# INLINE break0 #-}
break0 :: (Char -> Bool) -> Str0 -> (Str, Str0)
break0 f (Str0 bs) = (BS.unsafeTake i bs, Str0 $ BS.unsafeDrop i bs)
    where
        i = Internal.inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> do
            let start = castPtr ptr :: S
            let end = go start
            return $! Ptr end `minusPtr` start

        go s@(Ptr a) | c == '\0' || f c = a
                     | otherwise = go (inc s)
            where c = chr s

{-# INLINE break00 #-}
-- The predicate must return true for '\0'
break00 :: (Char -> Bool) -> Str0 -> (Str, Str0)
break00 f (Str0 bs) = (BS.unsafeTake i bs, Str0 $ BS.unsafeDrop i bs)
    where
        i = Internal.inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> do
            let start = castPtr ptr :: S
            let end = go start
            return $! Ptr end `minusPtr` start

        go s@(Ptr a) | f c = a
                     | otherwise = go (inc s)
            where c = chr s

head0 :: Str0 -> Char
head0 (Str0 x) = Internal.w2c $ BS.unsafeHead x

tail0 :: Str0 -> Str0
tail0 (Str0 x) = Str0 $ BS.unsafeTail x

list0 :: Str0 -> (Char, Str0)
list0 x = (head0 x, tail0 x)

take0 :: Int -> Str0 -> Str
take0 i (Str0 x) = BS.takeWhile (/= '\0') $ BS.take i x


---------------------------------------------------------------------
-- ACTUAL LEXER

-- Lex each line separately, rather than each lexeme
data Lexeme
    = LexBind Str Expr -- [indent]foo = bar
    | LexBuild [Expr] Str [Expr] -- build foo: bar | baz || qux (| and || are represented as Expr)
    | LexInclude Str -- include file
    | LexSubninja Str -- include file
    | LexRule Str -- rule name
    | LexPool Str -- pool name
    | LexDefault [Expr] -- default foo bar
    | LexDefine Str Expr -- foo = bar
      deriving Show

isVar, isVarDot :: Char -> Bool
isVar x = x == '-' || x == '_' || (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || (x >= '0' && x <= '9')
isVarDot x = x == '.' || isVar x

endsDollar :: Str -> Bool
endsDollar x = BS.isSuffixOf (BS.singleton '$') x

dropN :: Str0 -> Str0
dropN x = if head0 x == '\n' then tail0 x else x

dropSpace :: Str0 -> Str0
dropSpace x = dropWhile0 (== ' ') x


lexerFile :: Maybe FilePath -> IO [Lexeme]
lexerFile file = fmap lexer $ maybe BS.getContents BS.readFile file

lexer :: Str -> [Lexeme]
lexer x = lexerLoop $ Str0 $ x `BS.append` BS.pack "\n\n\0"

lexerLoop :: Str0 -> [Lexeme]
lexerLoop c_x | (c,x) <- list0 c_x = case c of
    '\r' -> lexerLoop x
    '\n' -> lexerLoop x
    ' ' -> lexBind $ dropSpace x
    '#' -> lexerLoop $ dropWhile0 (/= '\n') x
    'b' | Just x <- strip "uild " x -> lexBuild $ dropSpace x
    'r' | Just x <- strip "ule " x -> lexRule $ dropSpace x
    'd' | Just x <- strip "efault " x -> lexDefault $ dropSpace x
    'p' | Just x <- strip "ool " x -> lexPool $ dropSpace x
    'i' | Just x <- strip "nclude " x -> lexInclude $ dropSpace x
    's' | Just x <- strip "ubninja " x -> lexSubninja $ dropSpace x
    '\0' -> []
    _ -> lexDefine c_x
    where
        strip str (Str0 x) = if b `BS.isPrefixOf` x then Just $ Str0 $ BS.drop (BS.length b) x else Nothing
            where b = BS.pack str

lexBind c_x | (c,x) <- list0 c_x = case c of
    '\r' -> lexerLoop x
    '\n' -> lexerLoop x
    '#' -> lexerLoop $ dropWhile0 (/= '\n') x
    '\0' -> []
    _ -> lexxBind LexBind c_x

lexBuild x
    | (outputs,x) <- lexxExprs True x
    , (rule,x) <- span0 isVar $ dropSpace x
    , (deps,x) <- lexxExprs False $ dropSpace x
    = LexBuild outputs rule deps : lexerLoop x

lexDefault x
    | (files,x) <- lexxExprs False x
    = LexDefault files : lexerLoop x

lexRule x = lexxName LexRule x
lexPool x = lexxName LexPool x
lexInclude x = lexxFile LexInclude x
lexSubninja x = lexxFile LexSubninja x
lexDefine x = lexxBind LexDefine x

lexxBind ctor x
    | (var,x) <- span0 isVarDot x
    , ('=',x) <- list0 $ dropSpace x
    , (exp,x) <- lexxExpr False False $ dropSpace x
    = ctor var exp : lexerLoop x
lexxBind _ x = error $ show ("parse failed when parsing binding", take0 100 x)

lexxFile ctor x
    | (file,rest) <- splitLineCont x
    = ctor file : lexerLoop rest

lexxName ctor x
    | (name,rest) <- splitLineCont x
    = ctor name : lexerLoop rest


lexxExprs :: Bool -> Str0 -> ([Expr], Str0)
lexxExprs stopColon x = case lexxExpr stopColon True x of
    (a,c_x) | c <- head0 c_x, x <- tail0 c_x -> case c of
        ' ' -> first (a:) $ lexxExprs stopColon $ dropSpace x
        ':' | stopColon -> ([a], x)
        _ | stopColon -> error "expected a colon"
        '\r' -> a $: dropN x
        '\n' -> a $: x
        '\0' -> a $: c_x
    where
        Exprs [] $: x = ([], x)
        a $: x = ([a], x)


{-# NOINLINE lexxExpr #-}
lexxExpr :: Bool -> Bool -> Str0 -> (Expr, Str0) -- snd will start with one of " :\n\r" or be empty
lexxExpr stopColon stopSpace = first exprs . f
    where
        exprs [x] = x
        exprs xs = Exprs xs

        special = case (stopColon, stopSpace) of
            (True , True ) -> \x -> x <= ':' && (x == ':' || x == ' ' || x == '$' || x == '\r' || x == '\n' || x == '\0')
            (True , False) -> \x -> x <= ':' && (x == ':'             || x == '$' || x == '\r' || x == '\n' || x == '\0')
            (False, True ) -> \x -> x <= '$' && (            x == ' ' || x == '$' || x == '\r' || x == '\n' || x == '\0')
            (False, False) -> \x -> x <= '$' && (                        x == '$' || x == '\r' || x == '\n' || x == '\0')
        f x = case break00 special x of (a,x) -> if BS.null a then g x else Lit a $: g x

        x $: (xs,y) = (x:xs,y)

        g x | head0 x /= '$' = ([], x)
        g x | c_x <- tail0 x, (c,x) <- list0 c_x = case c of
            '$' -> Lit (BS.singleton '$') $: f x
            ' ' -> Lit (BS.singleton ' ') $: f x
            ':' -> Lit (BS.singleton ':') $: f x
            '\n' -> f $ dropSpace x
            '\r' -> f $ dropSpace $ dropN x
            '{' | (name,x) <- span0 isVarDot x, not $ BS.null name, ('}',x) <- list0 x -> Var name $: f x
            _ | (name,x) <- span0 isVar c_x, not $ BS.null name -> Var name $: f x
            _ -> error $ "Unexpect $ followed by unexpected stuff"


splitLineCont :: Str0 -> (Str, Str0)
splitLineCont x = first BS.concat $ f x
    where
        f x = if not $ endsDollar a then ([a], b) else let (c,d) = f $ dropSpace b in (BS.init a : c, d)
            where (a,b) = splitLineCR x

splitLineCR :: Str0 -> (Str, Str0)
splitLineCR x = if BS.singleton '\r' `BS.isSuffixOf` a then (BS.init a, dropN b) else (a, dropN b)
    where (a,b) = break0 (== '\n') x