File: Git.hs

package info (click to toggle)
haskell-filestore 0.6.5-4
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 224 kB
  • sloc: haskell: 1,604; makefile: 4
file content (458 lines) | stat: -rw-r--r-- 20,122 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DoAndIfThenElse #-}

{- |
   Module      : Data.FileStore.Git
   Copyright   : Copyright (C) 2009 John MacFarlane
   License     : BSD 3

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : GHC 6.10 required

   A versioned filestore implemented using git.
   Normally this module should not be imported: import
   "Data.FileStore" instead.

   It is assumed that git >= 1.7.2 is available on
   the system path.
-}

module Data.FileStore.Git
           ( gitFileStore
           )
where
import Data.FileStore.Types
import Data.Maybe (fromMaybe, mapMaybe)
import Data.List.Split (endByOneOf)
import System.Exit
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
import Data.FileStore.Utils (withSanityCheck, hashsMatch, runShellCommand, escapeRegexSpecialChars, withVerifyDir, encodeArg)
import Data.ByteString.Lazy.UTF8 (toString)
import qualified Data.ByteString.Lazy.Char8 as B
import Control.Monad (when)
import System.FilePath ((</>), splitFileName)
import System.Directory (createDirectoryIfMissing, doesDirectoryExist, executable, getPermissions, setPermissions)
import Control.Exception (throwIO)
import qualified Control.Exception as E

-- | Return a filestore implemented using the git distributed revision control system
-- (<http://git-scm.com/>).
gitFileStore :: FilePath -> FileStore
gitFileStore repo = FileStore {
    initialize        = gitInit repo
  , save              = gitSave repo 
  , retrieve          = gitRetrieve repo
  , delete            = gitDelete repo
  , rename            = gitMove repo
  , history           = gitLog repo
  , latest            = gitLatestRevId repo
  , revision          = gitGetRevision repo
  , index             = gitIndex repo
  , directory         = gitDirectory repo
  , search            = gitSearch repo 
  , idsMatch          = const hashsMatch repo
  }

-- | Run a git command and return error status, error output, standard output.  The repository
-- is used as working directory.
runGitCommand :: FilePath -> String -> [String] -> IO (ExitCode, String, B.ByteString)
runGitCommand = runGitCommandWithEnv []

-- | Run a git command with the given environment and return error status, error output, standard
-- output.  The repository is used as working directory.
runGitCommandWithEnv :: [(String, String)] -> FilePath -> String -> [String] -> IO (ExitCode, String, B.ByteString)
runGitCommandWithEnv givenEnv repo command args = do
  let env = Just ([("GIT_DIFF_OPTS", "-u100000")] ++ givenEnv)
  (status, err, out) <- runShellCommand repo env "git" (command : args)
  return (status, toString err, out)

-- | Initialize a repository, creating the directory if needed.
gitInit :: FilePath -> IO ()
gitInit repo = do
  exists <- doesDirectoryExist repo
  when exists $ withVerifyDir repo $ throwIO RepositoryExists
  createDirectoryIfMissing True repo
  (status, err, _) <- runGitCommand repo "init" []
  if status == ExitSuccess
     then do
       -- Add the post-update hook, so that changes made remotely via git
       -- will be reflected in the working directory.
       let postupdatedir = repo </> ".git" </> "hooks"
       createDirectoryIfMissing True postupdatedir
       let postupdate = postupdatedir </> "post-update"
       B.writeFile postupdate postUpdate
       perms <- getPermissions postupdate
       setPermissions postupdate (perms {executable = True})
       -- Set up repo to allow push to current branch
       (status', err', _) <- runGitCommand repo "config" ["receive.denyCurrentBranch","ignore"]
       if status' == ExitSuccess
          then return ()
          else throwIO $ UnknownError $ "git config failed:\n" ++ err'
     else throwIO $ UnknownError $ "git-init failed:\n" ++ err 

-- | Commit changes to a resource.  Raise 'Unchanged' exception if there were
-- no changes.
gitCommit :: FilePath -> [FilePath] -> Author -> String -> IO ()
gitCommit repo names author logMsg = do
  let env = [("GIT_COMMITTER_NAME", authorName author),
             ("GIT_COMMITTER_EMAIL", authorEmail author)]
  (statusCommit, errCommit, _) <- runGitCommandWithEnv env repo "commit" $ ["--author", authorName author ++ " <" ++
                                    authorEmail author ++ ">", "-m", logMsg] ++ names
  if statusCommit == ExitSuccess
     then return ()
     else throwIO $ if null errCommit
                       then Unchanged
                       else UnknownError $ "Could not git commit " ++ unwords names ++ "\n" ++ errCommit

-- | Save changes (creating file and directory if needed), add, and commit.
gitSave :: Contents a => FilePath -> FilePath -> Author -> Description -> a -> IO ()
gitSave repo name author logMsg contents = do
  withSanityCheck repo [".git"] name $ B.writeFile (repo </> encodeArg name) $ toByteString contents
  (statusAdd, errAdd, _) <- runGitCommand repo "add" [name]
  if statusAdd == ExitSuccess
     then gitCommit repo [name] author logMsg
     else throwIO $ UnknownError $ "Could not git add '" ++ name ++ "'\n" ++ errAdd

isSymlink :: FilePath -> FilePath -> Maybe RevisionId -> IO Bool
isSymlink repo name revid = do
  (_, _, out) <- runGitCommand repo "ls-tree" [fromMaybe "HEAD" revid, name]
  -- see http://stackoverflow.com/questions/737673
  return $ (take 6 $ B.unpack out) == "120000"

targetContents :: Contents a => FilePath -> FilePath -> a -> IO (Maybe a)
targetContents repo linkName linkContent = do
  let (dirName, _) = splitFileName linkName
      targetName   = repo </> dirName </> (B.unpack $ toByteString linkContent)
  result <- E.try $ B.readFile targetName
  case result of
    Left (_ :: E.SomeException) -> return Nothing
    Right contents -> return $ Just (fromByteString contents)

-- | Retrieve contents from resource.
gitRetrieve :: Contents a
            => FilePath
            -> FilePath
            -> Maybe RevisionId    -- ^ @Just@ revision ID, or @Nothing@ for latest
            -> IO a
gitRetrieve repo name revid = do
  let objectName = case revid of
                        Nothing  -> "HEAD:" ++ name
                        Just rev -> rev ++ ":" ++ name
  -- Check that the object is a file (blob), not a directory (tree)
  (_, _, output) <- runGitCommand repo "cat-file" ["-t", objectName]
  when (take 4 (toString output) /= "blob") $ throwIO NotFound
  (status', err', output') <- runGitCommand repo "cat-file" ["-p", objectName]
  if status' == ExitSuccess
     then do
       isLink <- isSymlink repo name revid
       if isLink
        then do
          contents <- targetContents repo name output'
          case contents of
            -- ideal output on Nothing would be something like
            -- "broken symlink: <output'>", but I couldn't figure
            -- out the bytestring types to do that.
            -- also didn't bother trying to get the browser
            -- to display the error as text if the symlink is to some
            -- other format.
            Nothing -> return $ fromByteString output'
            Just bs -> return $ fromByteString bs
        else return $ fromByteString output'
     else throwIO $ UnknownError $ "Error in git cat-file:\n" ++ err'

-- | Delete a resource from the repository.
gitDelete :: FilePath -> FilePath -> Author -> Description -> IO ()
gitDelete repo name author logMsg = withSanityCheck repo [".git"] name $ do
  (statusAdd, errRm, _) <- runGitCommand repo "rm" [name]
  if statusAdd == ExitSuccess
     then gitCommit repo [name] author logMsg
     else throwIO $ UnknownError $ "Could not git rm '" ++ name ++ "'\n" ++ errRm

-- | Change the name of a resource.
gitMove :: FilePath -> FilePath -> FilePath -> Author -> Description -> IO ()
gitMove repo oldName newName author logMsg = do
  _ <- gitLatestRevId repo oldName   -- will throw a NotFound error if oldName doesn't exist
  (statusAdd, err, _) <- withSanityCheck repo [".git"] newName $ runGitCommand repo "mv" [oldName, newName] 
  if statusAdd == ExitSuccess
     then gitCommit repo [oldName, newName] author logMsg
     else throwIO $ UnknownError $ "Could not git mv " ++ oldName ++ " " ++ newName ++ "\n" ++ err

-- | Return revision ID for latest commit for a resource.
gitLatestRevId :: FilePath -> FilePath -> IO RevisionId
gitLatestRevId repo name = do
  (revListStatus, _, output) <- runGitCommand repo "rev-list" ["--max-count=1", "HEAD", "--", name]
  -- we need to check separately to make sure the resource hasn't been removed
  -- from the repository:
  (catStatus,_, _) <- runGitCommand repo "cat-file" ["-e", "HEAD:" ++ name]
  if revListStatus == ExitSuccess && catStatus == ExitSuccess
     then do
       let result = takeWhile (`notElem` "\n\r \t") $ toString output
       if null result
          then throwIO NotFound
          else return result
     else throwIO NotFound

-- | Get revision information for a particular revision ID, or latest revision.
gitGetRevision :: FilePath -> RevisionId -> IO Revision
gitGetRevision repo revid = do
  (status, _, output) <- runGitCommand repo "whatchanged" ["-z","--pretty=format:" ++ gitLogFormat, "--max-count=1", revid]
  if status == ExitSuccess
     then parseLogEntry $ B.drop 1 output -- drop initial \1
     else throwIO NotFound

-- | Get a list of all known files inside and managed by a repository.
gitIndex :: FilePath ->IO [FilePath]
gitIndex repo = withVerifyDir repo $ do
  (status, _err, output) <- runGitCommand repo "ls-tree" ["-r","-t","-z","HEAD"]
  if status == ExitSuccess
     then return $ mapMaybe (lineToFilename . words) . endByOneOf ['\0'] . toString $ output
     else return [] -- if error, will return empty list
                    -- note:  on a newly initialized repo, 'git ls-tree HEAD' returns an error
   where lineToFilename (_:"blob":_:rest) = Just $ unwords rest
         lineToFilename _                 = Nothing

-- | Get list of resources in one directory of the repository.
gitDirectory :: FilePath -> FilePath -> IO [Resource]
gitDirectory repo dir = withVerifyDir (repo </> dir) $ do
  (status, _err, output) <- runGitCommand repo "ls-tree" ["-z","HEAD:" ++ dir]
  if status == ExitSuccess
     then return $ map (lineToResource . words) $ endByOneOf ['\0'] $ toString output
     else return []   -- if error, this will return empty list
                      -- note:  on a newly initialized repo, 'git ls-tree HEAD:' returns an error
   where lineToResource (_:"blob":_:rest) = FSFile $ unwords rest
         lineToResource (_:"tree":_:rest) = FSDirectory $ unwords rest
         lineToResource _                 = error "Encountered an item that is neither blob nor tree in git ls-tree"

-- | Uses git-grep to search repository.  Escape regex special characters, so the pattern
-- is interpreted as an ordinary string.
gitSearch :: FilePath -> SearchQuery -> IO [SearchMatch]
gitSearch repo query = do
  let opts = ["-I","-n","--null"] ++
             ["--ignore-case" | queryIgnoreCase query] ++
             ["--all-match" | queryMatchAll query] ++
             ["--word-regexp" | queryWholeWords query]
  (status, errOutput, output) <- runGitCommand repo "grep" (opts ++
                                   concatMap (\term -> ["-e", escapeRegexSpecialChars term]) (queryPatterns query))
  case status of
     ExitSuccess   -> return $ map parseMatchLine $ lines $ toString output
     ExitFailure 1 -> return []  -- status of 1 means no matches in recent versions of git
     ExitFailure _ -> throwIO $ UnknownError $ "git grep returned error status.\n" ++ errOutput

-- Auxiliary function for searchResults
parseMatchLine :: String -> SearchMatch
parseMatchLine str =
  SearchMatch{ matchResourceName = fname
             , matchLineNumber = if not (null ln)
                                    then read ln
                                    else error $ "parseMatchLine: " ++ str
             , matchLine = cont}
    where (fname,xs) = break (== '\NUL') str
          rest = drop 1 xs 
          -- for some reason, NUL is used after line number instead of
          -- : when --match-all is passed to git-grep.
          (ln,ys) = span (`elem` ['0'..'9']) rest
          cont = drop 1 ys   -- drop : or NUL after line number

{-
-- | Uses git-diff to get a dif between two revisions.
gitDiff :: FilePath -> FilePath -> RevisionId -> RevisionId -> IO String
gitDiff repo name from to = do
  (status, _, output) <- runGitCommand repo "diff" [from, to, name]
  if status == ExitSuccess
     then return $ toString output
     else do
       -- try it without the path, since the error might be "not in working tree" for a deleted file
       (status', err', output') <- runGitCommand repo "diff" [from, to]
       if status' == ExitSuccess
          then return $ toString output'
          else throwIO $ UnknownError $ "git diff returned error:\n" ++ err'
-}

gitLogFormat :: String
gitLogFormat = "%x01%H%x00%ct%x00%an%x00%ae%x00%B%n%x00"

-- | Return list of log entries for the given time frame and list of resources.
-- If list of resources is empty, log entries for all resources are returned.
gitLog :: FilePath -> [FilePath] -> TimeRange -> Maybe Int -> IO [Revision]
gitLog repo names (TimeRange mbSince mbUntil) mblimit = do
  (status, err, output) <- runGitCommand repo "whatchanged" $
                           ["-z","--pretty=format:" ++ gitLogFormat] ++
                           (case mbSince of
                                 Just since   -> ["--since='" ++ show since ++ "'"]
                                 Nothing      -> []) ++
                           (case mbUntil of
                                 Just til   -> ["--until='" ++ show til ++ "'"]
                                 Nothing      -> []) ++
                           (case mblimit of
                                 Just lim   -> ["-n", show lim]
                                 Nothing    -> []) ++
                           ["--"] ++ names
  if status == ExitSuccess
     then parseGitLog output
     else throwIO $ UnknownError $ "git whatchanged returned error status.\n" ++ err

--
-- Parsers to parse git log into Revisions.
--

parseGitLog :: B.ByteString -> IO [Revision]
parseGitLog = mapM parseLogEntry . splitEntries

splitEntries :: B.ByteString -> [B.ByteString]
splitEntries = dropWhile B.null . B.split '\1' -- occurs just before each hash

parseLogEntry :: B.ByteString -> IO Revision
parseLogEntry entry = do
  let (rev : date' : author : email : subject : rest) = B.split '\0' entry
  date <- case B.readInteger date' of
               Just (x,_) -> return x
               Nothing    -> throwIO $ UnknownError $ "Could not read date"
  changes <- parseChanges $ takeWhile (not . B.null) rest
  return Revision {
              revId          = toString rev
            , revDateTime    = posixSecondsToUTCTime $ realToFrac date
            , revAuthor      = Author{ authorName = toString author
                                     , authorEmail = toString email }
            , revDescription = toString $ stripTrailingNewlines subject
            , revChanges     = changes }

stripTrailingNewlines :: B.ByteString -> B.ByteString
stripTrailingNewlines = B.reverse . B.dropWhile (=='\n') . B.reverse

-- | This function converts the git "log" %B (raw body) format into a
-- list of Change items (e.g. `Added FilePath`, `Modified FilePath`,
-- or `Deleted FilePath`).  The raw body format is normally pairs of
-- ByteStrings, like:
--
--    ":000000 100644 0000000... 9cf8bba... A", "path/to/file.foo"
--
-- where the last letter of the first element is the type of change.
-- Git can track renames however, and those are noted by a triple of
-- ByteStrings; for example:
--
--   ":100644 100644 6c2c6e2... d333ad0... R063",
--   "old/file/path/name.foo",
--   "new/file/path/newname.bar"
--
-- Since filestore does not track renames, these are converted to
-- a remove of the first file and an add of the second.
--
-- n.b. without reading git sources, it's not clear what the raw body
-- format details are; specifically, the three digits following the R
-- are ignored.
parseChanges :: [B.ByteString] -> IO [Change]
parseChanges (x:y:zs) = do
  when (B.null x) $ pcErr "found empty change description"
  let changeType = B.head $ last $ B.words x
  let file' = toString y
  if changeType == 'R'
  then parseChanges (tail zs) >>=
       return . (++) (Deleted file' : Added (toString $ head zs) : [])
  else
      do next <- case changeType of
                   'A'  -> return $ Added file'
                   'M'  -> return $ Modified file'
                   'D'  -> return $ Deleted file'
                   _    -> pcErr ("found unknown changeType '" ++
                                  (show changeType) ++
                                  "' in: " ++ (show x) ++
                                  " on " ++ (show y))
         rest <- parseChanges zs
         return (next:rest)
parseChanges [_] =
  pcErr "encountered odd number of fields"
parseChanges [] = return []

pcErr :: forall a. String -> IO a
pcErr = throwIO . UnknownError . (++) "filestore parseChanges "

postUpdate :: B.ByteString
postUpdate =
  B.pack 
    "#!/bin/bash\n\
    \#\n\
    \# This hook does two things:\n\
    \#\n\
    \#  1. update the \"info\" files that allow the list of references to be\n\
    \#     queries over dumb transports such as http\n\
    \#\n\
    \#  2. if this repository looks like it is a non-bare repository, and\n\
    \#     the checked-out branch is pushed to, then update the working copy.\n\
    \#     This makes \"push\" function somewhat similarly to darcs and bzr.\n\
    \#\n\
    \# To enable this hook, make this file executable by \"chmod +x post-update\".\n\
    \\n\
    \git-update-server-info\n\
    \\n\
    \is_bare=$(git-config --get --bool core.bare)\n\
    \\n\
    \if [ -z \"$is_bare\" ]\n\
    \then\n\
    \    # for compatibility's sake, guess\n\
    \    git_dir_full=$(cd $GIT_DIR; pwd)\n\
    \    case $git_dir_full in */.git) is_bare=false;; *) is_bare=true;; esac\n\
    \fi\n\
    \\n\
    \update_wc() {\n\
    \    ref=$1\n\
    \    echo \"Push to checked out branch $ref\" >&2\n\
    \    if [ ! -f $GIT_DIR/logs/HEAD ]\n\
    \    then\n\
    \        echo \"E:push to non-bare repository requires a HEAD reflog\" >&2\n\
    \        exit 1\n\
    \    fi\n\
    \    if (cd $GIT_WORK_TREE; git-diff-files -q --exit-code >/dev/null)\n\
    \    then\n\
    \        wc_dirty=0\n\
    \    else\n\
    \        echo \"W:unstaged changes found in working copy\" >&2\n\
    \        wc_dirty=1\n\
    \        desc=\"working copy\"\n\
    \    fi\n\
    \    if git diff-index --cached HEAD@{1} >/dev/null\n\
    \    then\n\
    \        index_dirty=0\n\
    \    else\n\
    \        echo \"W:uncommitted, staged changes found\" >&2\n\
    \        index_dirty=1\n\
    \        if [ -n \"$desc\" ]\n\
    \        then\n\
    \            desc=\"$desc and index\"\n\
    \        else\n\
    \            desc=\"index\"\n\
    \        fi\n\
    \    fi\n\
    \    if [ \"$wc_dirty\" -ne 0 -o \"$index_dirty\" -ne 0 ]\n\
    \    then\n\
    \        new=$(git rev-parse HEAD)\n\
    \        echo \"W:stashing dirty $desc - see git-stash(1)\" >&2\n\
    \        ( trap 'echo trapped $$; git symbolic-ref HEAD \"'\"$ref\"'\"' 2 3 13 15 ERR EXIT\n\
    \        git-update-ref --no-deref HEAD HEAD@{1}\n\
    \        cd $GIT_WORK_TREE\n\
    \        git stash save \"dirty $desc before update to $new\";\n\
    \        git-symbolic-ref HEAD \"$ref\"\n\
    \        )\n\
    \    fi\n\
    \\n\
    \    # eye candy - show the WC updates :)\n\
    \    echo \"Updating working copy\" >&2\n\
    \    (cd $GIT_WORK_TREE\n\
    \    git-diff-index -R --name-status HEAD >&2\n\
    \    git-reset --hard HEAD)\n\
    \}\n\
    \\n\
    \if [ \"$is_bare\" = \"false\" ]\n\
    \then\n\
    \    active_branch=`git-symbolic-ref HEAD`\n\
    \    export GIT_DIR=$(cd $GIT_DIR; pwd)\n\
    \    GIT_WORK_TREE=${GIT_WORK_TREE-..}\n\
    \    for ref\n\
    \    do\n\
    \        if [ \"$ref\" = \"$active_branch\" ]\n\
    \        then\n\
    \            update_wc $ref\n\
    \        fi\n\
    \    done\n\
    \fi"