File: Heuristics.hs

package info (click to toggle)
haskell-cabal-install 1.20.0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,324 kB
  • ctags: 10
  • sloc: haskell: 18,563; sh: 225; ansic: 36; makefile: 6
file content (381 lines) | stat: -rw-r--r-- 14,016 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
-----------------------------------------------------------------------------
-- |
-- Module      :  Distribution.Client.Init.Heuristics
-- Copyright   :  (c) Benedikt Huber 2009
-- License     :  BSD-like
--
-- Maintainer  :  cabal-devel@haskell.org
-- Stability   :  provisional
-- Portability :  portable
--
-- Heuristics for creating initial cabal files.
--
-----------------------------------------------------------------------------
module Distribution.Client.Init.Heuristics (
    guessPackageName,
    scanForModules,     SourceFileEntry(..),
    neededBuildPrograms,
    guessMainFileCandidates,
    guessAuthorNameMail,
    knownCategories,
) where
import Distribution.Text         (simpleParse)
import Distribution.Simple.Setup (Flag(..), flagToMaybe)
import Distribution.ModuleName
    ( ModuleName, toFilePath )
import Distribution.Client.PackageIndex
    ( allPackagesByName )
import qualified Distribution.Package as P
import qualified Distribution.PackageDescription as PD
    ( category, packageDescription )
import Distribution.Simple.Utils
         ( intercalate )
import Distribution.Client.Utils
         ( tryCanonicalizePath )
import Language.Haskell.Extension ( Extension )

import Distribution.Client.Types ( packageDescription, SourcePackageDb(..) )
import Control.Applicative ( pure, (<$>), (<*>) )
import Control.Arrow ( first )
import Control.Monad ( liftM )
import Data.Char   ( isAlphaNum, isNumber, isUpper, isLower, isSpace )
import Data.Either ( partitionEithers )
import Data.List   ( isInfixOf, isPrefixOf, isSuffixOf, sortBy )
import Data.Maybe  ( mapMaybe, catMaybes, maybeToList )
import Data.Monoid ( mempty, mappend, mconcat, )
import Data.Ord    ( comparing )
import qualified Data.Set as Set ( fromList, toList )
import System.Directory ( getCurrentDirectory, getDirectoryContents,
                          doesDirectoryExist, doesFileExist, getHomeDirectory, )
import Distribution.Compat.Environment ( getEnvironment )
import System.FilePath ( takeExtension, takeBaseName, dropExtension,
                         (</>), (<.>), splitDirectories, makeRelative )

import Distribution.Client.Init.Types     ( InitFlags(..) )
import Distribution.Client.Compat.Process ( readProcessWithExitCode )
import System.Exit ( ExitCode(..) )

-- | Return a list of candidate main files for this executable: top-level
-- modules including the word 'Main' in the file name. The list is sorted in
-- order of preference, shorter file names are preferred. 'Right's are existing
-- candidates and 'Left's are those that do not yet exist.
guessMainFileCandidates :: InitFlags -> IO [Either FilePath FilePath]
guessMainFileCandidates flags = do
  dir <-
    maybe getCurrentDirectory return (flagToMaybe $ packageDir flags)
  files <- getDirectoryContents dir
  let existingCandidates = filter isMain files
      -- We always want to give the user at least one default choice.  If either
      -- Main.hs or Main.lhs has already been created, then we don't want to
      -- suggest the other; however, if neither has been created, then we
      -- suggest both.
      newCandidates =
        if any (`elem` existingCandidates) ["Main.hs", "Main.lhs"]
        then []
        else ["Main.hs", "Main.lhs"]
      candidates =
        sortBy (\x y -> comparing (length . either id id) x y
                        `mappend` compare x y)
               (map Left newCandidates ++ map Right existingCandidates)
  return candidates

  where
    isMain f =    (isInfixOf "Main" f || isInfixOf  "main" f)
               && (isSuffixOf ".hs" f || isSuffixOf ".lhs" f)

-- | Guess the package name based on the given root directory.
guessPackageName :: FilePath -> IO P.PackageName
guessPackageName = liftM (P.PackageName . repair . last . splitDirectories)
                 . tryCanonicalizePath
  where
    -- Treat each span of non-alphanumeric characters as a hyphen. Each
    -- hyphenated component of a package name must contain at least one
    -- alphabetic character. An arbitrary character ('x') will be prepended if
    -- this is not the case for the first component, and subsequent components
    -- will simply be run together. For example, "1+2_foo-3" will become
    -- "x12-foo3".
    repair = repair' ('x' :) id
    repair' invalid valid x = case dropWhile (not . isAlphaNum) x of
        "" -> repairComponent ""
        x' -> let (c, r) = first repairComponent $ break (not . isAlphaNum) x'
              in c ++ repairRest r
      where
        repairComponent c | all isNumber c = invalid c
                          | otherwise      = valid c
    repairRest = repair' id ('-' :)

-- |Data type of source files found in the working directory
data SourceFileEntry = SourceFileEntry
    { relativeSourcePath :: FilePath
    , moduleName         :: ModuleName
    , fileExtension      :: String
    , imports            :: [ModuleName]
    , extensions         :: [Extension]
    } deriving Show

sfToFileName :: FilePath -> SourceFileEntry -> FilePath
sfToFileName projectRoot (SourceFileEntry relPath m ext _ _)
  = projectRoot </> relPath </> toFilePath m <.> ext

-- |Search for source files in the given directory
-- and return pairs of guessed Haskell source path and
-- module names.
scanForModules :: FilePath -> IO [SourceFileEntry]
scanForModules rootDir = scanForModulesIn rootDir rootDir

scanForModulesIn :: FilePath -> FilePath -> IO [SourceFileEntry]
scanForModulesIn projectRoot srcRoot = scan srcRoot []
  where
    scan dir hierarchy = do
        entries <- getDirectoryContents (projectRoot </> dir)
        (files, dirs) <- liftM partitionEithers (mapM (tagIsDir dir) entries)
        let modules = catMaybes [ guessModuleName hierarchy file
                                | file <- files
                                , isUpper (head file) ]
        modules' <- mapM (findImportsAndExts projectRoot) modules
        recMods <- mapM (scanRecursive dir hierarchy) dirs
        return $ concat (modules' : recMods)
    tagIsDir parent entry = do
        isDir <- doesDirectoryExist (parent </> entry)
        return $ (if isDir then Right else Left) entry
    guessModuleName hierarchy entry
        | takeBaseName entry == "Setup" = Nothing
        | ext `elem` sourceExtensions   =
            SourceFileEntry <$> pure relRoot <*> modName <*> pure ext <*> pure [] <*> pure []
        | otherwise = Nothing
      where
        relRoot       = makeRelative projectRoot srcRoot
        unqualModName = dropExtension entry
        modName       = simpleParse
                      $ intercalate "." . reverse $ (unqualModName : hierarchy)
        ext           = case takeExtension entry of '.':e -> e; e -> e
    scanRecursive parent hierarchy entry
      | isUpper (head entry) = scan (parent </> entry) (entry : hierarchy)
      | isLower (head entry) && not (ignoreDir entry) =
          scanForModulesIn projectRoot $ foldl (</>) srcRoot (reverse (entry : hierarchy))
      | otherwise = return []
    ignoreDir ('.':_)  = True
    ignoreDir dir      = dir `elem` ["dist", "_darcs"]

findImportsAndExts :: FilePath -> SourceFileEntry -> IO SourceFileEntry
findImportsAndExts projectRoot sf = do
  s <- readFile (sfToFileName projectRoot sf)

  let modules = mapMaybe
                ( getModName
                . drop 1
                . filter (not . null)
                . dropWhile (/= "import")
                . words
                )
              . filter (not . ("--" `isPrefixOf`)) -- poor man's comment filtering
              . lines
              $ s

      -- XXX we should probably make a better attempt at parsing
      -- comments above.  Unfortunately we can't use a full-fledged
      -- Haskell parser since cabal's dependencies must be kept at a
      -- minimum.

      -- A poor man's LANGUAGE pragma parser.
      exts = mapMaybe simpleParse
           . concatMap getPragmas
           . filter isLANGUAGEPragma
           . map fst
           . drop 1
           . takeWhile (not . null . snd)
           . iterate (takeBraces . snd)
           $ ("",s)

      takeBraces = break (== '}') . dropWhile (/= '{')

      isLANGUAGEPragma = ("{-# LANGUAGE " `isPrefixOf`)

      getPragmas = map trim . splitCommas . takeWhile (/= '#') . drop 13

      splitCommas "" = []
      splitCommas xs = x : splitCommas (drop 1 y)
        where (x,y) = break (==',') xs

  return sf { imports    = modules
            , extensions = exts
            }

 where getModName :: [String] -> Maybe ModuleName
       getModName []               = Nothing
       getModName ("qualified":ws) = getModName ws
       getModName (ms:_)           = simpleParse ms



-- Unfortunately we cannot use the version exported by Distribution.Simple.Program
knownSuffixHandlers :: [(String,String)]
knownSuffixHandlers =
  [ ("gc",     "greencard")
  , ("chs",    "chs")
  , ("hsc",    "hsc2hs")
  , ("x",      "alex")
  , ("y",      "happy")
  , ("ly",     "happy")
  , ("cpphs",  "cpp")
  ]

sourceExtensions :: [String]
sourceExtensions = "hs" : "lhs" : map fst knownSuffixHandlers

neededBuildPrograms :: [SourceFileEntry] -> [String]
neededBuildPrograms entries =
    [ handler
    | ext <- nubSet (map fileExtension entries)
    , handler <- maybeToList (lookup ext knownSuffixHandlers)
    ]

-- | Guess author and email using darcs and git configuration options. Use
-- the following in decreasing order of preference:
--
-- 1. vcs env vars ($DARCS_EMAIL, $GIT_AUTHOR_*)
-- 2. Local repo configs
-- 3. Global vcs configs
-- 4. The generic $EMAIL
--
-- Name and email are processed separately, so the guess might end up being
-- a name from DARCS_EMAIL and an email from git config.
--
-- Darcs has preference, for tradition's sake.
guessAuthorNameMail :: IO (Flag String, Flag String)
guessAuthorNameMail = fmap authorGuessPure authorGuessIO

-- Ordered in increasing preference, since Flag-as-monoid is identical to
-- Last.
authorGuessPure :: AuthorGuessIO -> AuthorGuess
authorGuessPure (AuthorGuessIO env darcsLocalF darcsGlobalF gitLocal gitGlobal)
    = mconcat
        [ emailEnv env
        , gitGlobal
        , darcsCfg darcsGlobalF
        , gitLocal
        , darcsCfg darcsLocalF
        , gitEnv env
        , darcsEnv env
        ]

authorGuessIO :: IO AuthorGuessIO
authorGuessIO = AuthorGuessIO
    <$> getEnvironment
    <*> (maybeReadFile $ "_darcs" </> "prefs" </> "author")
    <*> (maybeReadFile =<< liftM (</> (".darcs" </> "author")) getHomeDirectory)
    <*> gitCfg Local
    <*> gitCfg Global

-- Types and functions used for guessing the author are now defined:

type AuthorGuess   = (Flag String, Flag String)
type Enviro        = [(String, String)]
data GitLoc        = Local | Global
data AuthorGuessIO = AuthorGuessIO
    Enviro         -- ^ Environment lookup table
    (Maybe String) -- ^ Contents of local darcs author info
    (Maybe String) -- ^ Contents of global darcs author info
    AuthorGuess    -- ^ Git config --local
    AuthorGuess    -- ^ Git config --global

darcsEnv :: Enviro -> AuthorGuess
darcsEnv = maybe mempty nameAndMail . lookup "DARCS_EMAIL"

gitEnv :: Enviro -> AuthorGuess
gitEnv env = (name, mail)
  where
    name = maybeFlag "GIT_AUTHOR_NAME" env
    mail = maybeFlag "GIT_AUTHOR_EMAIL" env

darcsCfg :: Maybe String -> AuthorGuess
darcsCfg = maybe mempty nameAndMail

emailEnv :: Enviro -> AuthorGuess
emailEnv env = (mempty, mail)
  where
    mail = maybeFlag "EMAIL" env

gitCfg :: GitLoc -> IO AuthorGuess
gitCfg which = do
  name <- gitVar which "user.name"
  mail <- gitVar which "user.email"
  return (name, mail)

gitVar :: GitLoc -> String -> IO (Flag String)
gitVar which = fmap happyOutput . gitConfigQuery which

happyOutput :: (ExitCode, a, t) -> Flag a
happyOutput v = case v of
  (ExitSuccess, s, _) -> Flag s
  _                   -> mempty

gitConfigQuery :: GitLoc -> String -> IO (ExitCode, String, String)
gitConfigQuery which key =
    fmap trim' $ readProcessWithExitCode "git" ["config", w, key] ""
  where
    w = case which of
        Local  -> "--local"
        Global -> "--global"
    trim' (a, b, c) = (a, trim b, c)

maybeFlag :: String -> Enviro -> Flag String
maybeFlag k = maybe mempty Flag . lookup k

maybeReadFile :: String -> IO (Maybe String)
maybeReadFile f = do
    exists <- doesFileExist f
    if exists
        then fmap Just $ readFile f
        else return Nothing

-- |Get list of categories used in Hackage. NOTE: Very slow, needs to be cached
knownCategories :: SourcePackageDb -> [String]
knownCategories (SourcePackageDb sourcePkgIndex _) = nubSet
    [ cat | pkg <- map head (allPackagesByName sourcePkgIndex)
          , let catList = (PD.category . PD.packageDescription . packageDescription) pkg
          , cat <- splitString ',' catList
    ]

-- Parse name and email, from darcs pref files or environment variable
nameAndMail :: String -> (Flag String, Flag String)
nameAndMail str
  | all isSpace nameOrEmail = mempty
  | null erest = (mempty, Flag $ trim nameOrEmail)
  | otherwise  = (Flag $ trim nameOrEmail, Flag mail)
  where
    (nameOrEmail,erest) = break (== '<') str
    (mail,_)            = break (== '>') (tail erest)

trim :: String -> String
trim = removeLeadingSpace . reverse . removeLeadingSpace . reverse
  where
    removeLeadingSpace  = dropWhile isSpace

-- split string at given character, and remove whitespace
splitString :: Char -> String -> [String]
splitString sep str = go str where
    go s = if null s' then [] else tok : go rest where
      s' = dropWhile (\c -> c == sep || isSpace c) s
      (tok,rest) = break (==sep) s'

nubSet :: (Ord a) => [a] -> [a]
nubSet = Set.toList . Set.fromList

{-
test db testProjectRoot = do
  putStrLn "Guessed package name"
  (guessPackageName >=> print) testProjectRoot
  putStrLn "Guessed name and email"
  guessAuthorNameMail >>= print

  mods <- scanForModules testProjectRoot

  putStrLn "Guessed modules"
  mapM_ print mods
  putStrLn "Needed build programs"
  print (neededBuildPrograms mods)

  putStrLn "List of known categories"
  print $ knownCategories db
-}