File: FilterProcess.hs

package info (click to toggle)
git-annex 10.20230126-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 69,344 kB
  • sloc: haskell: 74,654; javascript: 9,103; sh: 1,304; makefile: 203; perl: 136; ansic: 44
file content (89 lines) | stat: -rw-r--r-- 2,823 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
{- git-annex command
 -
 - Copyright 2021 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

module Command.FilterProcess where

import Command
import qualified Command.Smudge
import Git.FilterProcess
import Git.PktLine
import Annex.Link

import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L

cmd :: Command
cmd = noCommit $ noMessages $
	command "filter-process" SectionPlumbing 
		"long running git filter process"
		paramNothing (withParams seek)

seek :: CmdParams -> CommandSeek
seek _ = liftIO longRunningFilterProcessHandshake >>= \case
	Left err -> giveup err
	Right () -> go
  where
	go = liftIO getFilterRequest >>= \case
		Just (Smudge f) -> do
			smudge f
			go
		Just (Clean f) -> do
			clean f
			go
		Nothing -> return ()

smudge :: FilePath -> Annex ()
smudge file = do
	{- The whole git file content is necessarily buffered in memory,
	 - because we have to consume everything git is sending before
	 - we can respond to it. An annexed file will be only a pointer
	 - though. -}
	b <- B.concat . map pktLineToByteString <$> liftIO readUntilFlushPkt
	Command.Smudge.smudge' file (L.fromStrict b)
	{- Git expects us to output the content of unlocked annexed files,
	 - but if we got a pointer, we output only the pointer.
	 - See Command.Smudge.smudge for details of how this works. -}
	liftIO $ respondFilterRequest b

clean :: FilePath -> Annex ()
clean file = do
	{- We have to consume everything git is sending before we can
	 - respond to it. But it can be an arbitrarily large file,
	 - which is being added to the annex, and we do not want to buffer
	 - all that in memory. 
	 -
	 - Start by reading enough to determine if the file is an annex
	 - pointer.
	 -}
	let conv b l = (B.concat (map pktLineToByteString l), b)
	(b, readcomplete) <- 
		either (conv False) (conv True)
			<$> liftIO (readUntilFlushPktOrSize maxPointerSz)
	
	let passthrough
		| readcomplete = liftIO $ respondFilterRequest b
		| otherwise = liftIO $ do
			-- Have to buffer the file content in memory here,
			-- but it's not an annexed file, so not typically
			-- large, and it's all stored in git, which also
			-- buffers files in memory.
			b' <- B.concat . (map pktLineToByteString)
				<$> readUntilFlushPkt
			respondFilterRequest (b <> b')
	let discardreststdin
		| readcomplete = return ()
		| otherwise = liftIO discardUntilFlushPkt
	let emitpointer = liftIO . respondFilterRequest . formatPointer
	-- This does not incrementally hash, so both git and git-annex
	-- read from the file. It may be less expensive to incrementally
	-- hash the content provided by git, but Backend does not currently
	-- have an interface to do so.
	Command.Smudge.clean' (toRawFilePath file)
		(parseLinkTargetOrPointer' b)
		passthrough
		discardreststdin
		emitpointer