File: FilterBranch.hs

package info (click to toggle)
git-annex 10.20230126-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 69,344 kB
  • sloc: haskell: 74,654; javascript: 9,103; sh: 1,304; makefile: 203; perl: 136; ansic: 44
file content (195 lines) | stat: -rw-r--r-- 6,097 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
{- git-annex command
 -
 - Copyright 2021 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings #-}

module Command.FilterBranch where

import Command
import qualified Annex
import qualified Annex.Branch
import Annex.Branch.Transitions
import Types.Transitions
import Annex.HashObject
import Annex.Tmp
import Annex.SpecialRemote.Config
import Types.ProposedAccepted
import Logs
import Logs.Remote
import Git.Types
import Git.FilePath
import Git.Index
import Git.Env
import Git.UpdateIndex
import qualified Git.LsTree as LsTree
import qualified Git.Branch as Git
import Utility.RawFilePath

import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Builder
import qualified System.FilePath.ByteString as P

cmd :: Command
cmd = noMessages $ withAnnexOptions [annexedMatchingOptions] $ 
	command "filter-branch" SectionMaintenance 
		"filter information from the git-annex branch"
		paramPaths (seek <$$> optParser)

data FilterBranchOptions = FilterBranchOptions
	{ includeFiles :: CmdParams
	, keyOptions :: Maybe KeyOptions
	, keyInformation :: [IncludeExclude (DeferredParse UUID)]
	, repoConfig :: [IncludeExclude (DeferredParse UUID)]
	, includeGlobalConfig :: Bool
	}

optParser :: CmdParamsDesc -> Parser FilterBranchOptions
optParser desc = FilterBranchOptions
	<$> cmdParams desc
	<*> optional parseKeyOptions
	<*> many (parseIncludeExclude "key-information")
	<*> many (parseIncludeExclude "repo-config")
	<*> switch
		( long "include-global-config"
		<> help "include global configuration"
		)

data IncludeExclude t
	= Include t
	| Exclude t
	| IncludeAll
	deriving (Show, Eq, Ord)

isInclude :: IncludeExclude t -> Bool
isInclude (Include _) = True
isInclude IncludeAll = True
isInclude (Exclude _) = False

parseIncludeExclude :: String -> Parser (IncludeExclude (DeferredParse UUID))
parseIncludeExclude s = 
	( Include <$> parseRepositoryOption
		("include-" ++ s ++ "-for")
		"include information about a repository"
	) <|>
	( Exclude <$> parseRepositoryOption
		("exclude-" ++ s ++ "-for")
		"exclude information about a repository"
	) <|>
	( flag' IncludeAll 
		( long ("include-all-" ++ s)
		<> help "include information about all non-excluded repositories"
		)
	)

parseRepositoryOption :: String -> String -> Parser (DeferredParse UUID)
parseRepositoryOption s h = parseUUIDOption <$> strOption
	( long s
	<> help h
	<> metavar (paramRemote `paramOr` paramDesc `paramOr` paramUUID)
	<> completeRemotes
	)

mkUUIDMatcher :: [IncludeExclude (DeferredParse UUID)] -> Annex (UUID -> Bool)
mkUUIDMatcher l = do
	sameasmap <- M.mapMaybe
		(toUUID . fromProposedAccepted <$$> M.lookup sameasUUIDField)
		<$> remoteConfigMap
	mkUUIDMatcher' sameasmap <$> mapM get l
  where
	get (Include v) = Include <$> getParsed v
	get (Exclude v) = Exclude <$> getParsed v
	get IncludeAll = pure IncludeAll

mkUUIDMatcher' :: M.Map UUID UUID -> [IncludeExclude UUID] -> (UUID -> Bool)
mkUUIDMatcher' sameasmap l = \u -> 
	let sameas = M.lookup u sameasmap
	in ( S.member (Include u) includes
		|| S.member IncludeAll includes
		|| maybe False (\u' -> S.member (Include u') includes) sameas
		)
		&& S.notMember (Exclude u) excludes
		&& maybe True (\u' -> S.notMember (Exclude u') excludes) sameas
  where
	(includes, excludes) = (S.partition isInclude (S.fromList l))

seek :: FilterBranchOptions -> CommandSeek
seek o = withOtherTmp $ \tmpdir -> do
	let tmpindex = tmpdir P.</> "index"
	gc <- Annex.getGitConfig
	tmpindexrepo <- Annex.inRepo $ \r ->
		addGitEnv r indexEnv (fromRawFilePath tmpindex)
	withUpdateIndex tmpindexrepo $ \h -> do
		keyinfomatcher <- mkUUIDMatcher (keyInformation o)
		repoconfigmatcher <- mkUUIDMatcher (repoConfig o)

		let addtoindex f sha = liftIO $ streamUpdateIndex' h $
			pureStreamer $ L.fromStrict $ LsTree.formatLsTree $ LsTree.TreeItem
				{ LsTree.mode = fromTreeItemType TreeFile
				, LsTree.typeobj = fmtObjectType BlobObject
				, LsTree.sha = sha
				, LsTree.size = Nothing
				, LsTree.file = asTopFilePath f
				}
		
		let filterbanch matcher f c
			| L.null c = noop
			| otherwise = case filterBranch matcher gc f c of
				ChangeFile builder -> do
					let c' = toLazyByteString builder
					unless (L.null c') $
						addtoindex f =<< hashBlob c'
				-- This could perhaps be optimised by looking
				-- up the sha of the file in the branch.
				PreserveFile -> addtoindex f =<< hashBlob c

		-- Add information for all keys that are being included,
		-- filtering out information for repositories that are not
		-- being included.
		let addkeyinfo k = startingCustomOutput k $ do
			forM_ (keyLogFiles gc k) $ \f ->
				filterbanch keyinfomatcher f
					=<< Annex.Branch.get f
			next (return True)
		let seeker = AnnexedFileSeeker
			{ startAction = \_ _ k -> addkeyinfo k
			, checkContentPresent = Nothing
			, usesLocationLog = True
			}
		-- Avoid the usual default of all files in the current
		-- directory and below, because this command is documented
		-- as only including the information it has explicitly been
		-- told to include.
		when (not (null (includeFiles o)) || isJust (keyOptions o)) $
			withKeyOptions (keyOptions o) False seeker
				(commandAction . \(_, k, _) -> addkeyinfo k)
				(withFilesInGitAnnex ww seeker)
				=<< workTreeItems ww (includeFiles o)
	
		-- Add repository configs for all repositories that are
		-- being included.
		forM_ topLevelUUIDBasedLogs $ \f ->
			filterbanch repoconfigmatcher f
				=<< Annex.Branch.get f

		-- Add global configs when included.
		when (includeGlobalConfig o) $
			forM_ otherTopLevelLogs $ \f -> do
				c <- Annex.Branch.get f
				unless (L.null c) $
					addtoindex f =<< hashBlob c

	-- Commit the temporary index, and output the result.
	t <- liftIO $ Git.writeTree tmpindexrepo
	liftIO $ removeWhenExistsWith removeLink tmpindex
	cmode <- annexCommitMode <$> Annex.getGitConfig
	cmessage <- Annex.Branch.commitMessage
	c <- inRepo $ Git.commitTree cmode cmessage [] t
	liftIO $ putStrLn (fromRef c)
  where
	ww = WarnUnmatchLsFiles