File: Recompute.hs

package info (click to toggle)
git-annex 10.20250416-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 73,572 kB
  • sloc: haskell: 90,656; javascript: 9,103; sh: 1,469; makefile: 211; perl: 137; ansic: 44
file content (218 lines) | stat: -rw-r--r-- 7,529 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
{- git-annex command
 -
 - Copyright 2025 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings #-}

module Command.Recompute where

import Command
import qualified Remote.Compute
import qualified Remote
import qualified Types.Remote as Remote
import qualified Git.Ref as Git
import Annex.Content
import Annex.UUID
import Annex.CatFile
import Annex.GitShaKey
import Git.FilePath
import Logs.Location
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
import Backend (maybeLookupBackendVariety, unknownBackendVarietyMessage, chooseBackend)
import Types.Key
import qualified Utility.RawFilePath as R

import qualified Data.Map as M
import System.PosixCompat.Files (isSymbolicLink)

cmd :: Command
cmd = notBareRepo $ withAnnexOptions [jsonOptions] $
	command "recompute" SectionCommon "recompute computed files"
		paramPaths (seek <$$> optParser)

data RecomputeOptions = RecomputeOptions
	{ recomputeThese :: CmdParams
	, originalOption :: Bool
	, reproducible :: Maybe Reproducible
	, computeRemote :: Maybe (DeferredParse Remote)
	}

optParser :: CmdParamsDesc -> Parser RecomputeOptions
optParser desc = RecomputeOptions
	<$> cmdParams desc
	<*> switch
		( long "original"
		<> help "recompute using original content of input files"
		)
	<*> parseReproducible
	<*> optional (mkParseRemoteOption <$> parseRemoteOption)

seek :: RecomputeOptions -> CommandSeek
seek o = startConcurrency commandStages (seek' o)

seek' :: RecomputeOptions -> CommandSeek
seek' o = do
	computeremote <- maybe (pure Nothing) (Just <$$> getParsed)
		(computeRemote o)
	let seeker = AnnexedFileSeeker
		{ startAction = const $ start o computeremote
		, checkContentPresent = Nothing
		, usesLocationLog = True
		}
	withFilesInGitAnnex ww seeker
		=<< workTreeItems ww (recomputeThese o)
  where
	ww = WarnUnmatchLsFiles "recompute"

start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
start o (Just computeremote) si file key = 
	stopUnless (elem (Remote.uuid computeremote) <$> loggedLocations key) $
		start' o computeremote si file key		
start o Nothing si file key = do
	rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
	case sortOn Remote.cost $ filter Remote.Compute.isComputeRemote rs of
		[] -> stop
		(r:_) -> start' o r si file key

start' :: RecomputeOptions -> Remote -> SeekInput -> OsPath -> Key -> CommandStart
start' o r si file key =
	Remote.Compute.getComputeState
		(Remote.remoteStateHandle r) key >>= \case
			Nothing -> stop
			Just state -> shouldrecompute state >>= \case
				Nothing -> stop
				Just mreason -> starting "recompute" ai si $ do
					maybe noop showNote mreason
					perform o r file key state
  where
	ai = mkActionItem (key, file)

	shouldrecompute state
		| originalOption o = return (Just Nothing)
		| otherwise = firstM (inputchanged state)
			(M.toList (Remote.Compute.computeInputs state))
			>>= return . \case
				Nothing -> Nothing
				Just (inputfile, _) -> Just $ Just $
					QuotedPath inputfile <> " changed"

	inputchanged state (inputfile, inputkey) = do
		-- Note that the paths from the remote state are not to be
		-- trusted to point to a file in the repository, but using
		-- the path with git cat-file will only succeed if it
		-- is checked into the repository.
		p <- fromRepo $ fromTopFilePath $ asTopFilePath $
			Remote.Compute.computeSubdir state </> inputfile
		case keyGitSha inputkey of
			Nothing -> 
				catKeyFile p >>= return . \case
					Just k -> k /= inputkey
					Nothing -> inputfilemissing
			Just inputgitsha -> inRepo (Git.fileRef p) >>= \case
				Just fileref -> catObjectMetaData fileref >>= return . \case
					Just (sha, _, _) -> sha /= inputgitsha
					Nothing -> inputfilemissing
				Nothing -> return inputfilemissing
	  where
		-- When an input file is missing, go ahead and recompute.
		-- This way, the user will see the computation fail,
		-- with an error message that explains the problem.
		-- Or, if the input file is only optionally used by the
		-- computation, it might succeed.
		inputfilemissing = True

perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
perform o r file origkey origstate = do
	program <- Remote.Compute.getComputeProgram r
	reproducibleconfig <- getreproducibleconfig
	originallocked <- liftIO $ isSymbolicLink
		<$> R.getSymbolicLinkStatus (fromOsPath file)
	showOutput
	Remote.Compute.runComputeProgram program origstate
		(Remote.Compute.ImmutableState False)
		(getinputcontent program)
		Nothing
		(go program reproducibleconfig originallocked)
	next cleanup
  where
	go program reproducibleconfig originallocked result tmpdir ts = do
		checkbehaviorchange program
			(Remote.Compute.computeState result)
		addComputed Nothing r reproducibleconfig
			choosebackend destfile False (Left originallocked)
			result tmpdir ts

	checkbehaviorchange program state = do
		let check s w a b = forM_ (M.keys (w a)) $ \f ->
			unless (M.member f (w b)) $
				Remote.Compute.computationBehaviorChangeError program s f
		
		check "not using input file"
			Remote.Compute.computeInputs origstate state
		check "outputting"
			Remote.Compute.computeOutputs state origstate
		check "not outputting"
			Remote.Compute.computeOutputs origstate state

	getinputcontent program p required
		| originalOption o =
			case M.lookup p (Remote.Compute.computeInputs origstate) of
				Just inputkey -> getInputContent' False inputkey required
					(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
				Nothing -> Remote.Compute.computationBehaviorChangeError program
					"requesting a new input file" p
		| otherwise = getInputContent False p required
	
	destfile outputfile
		| Just outputfile == origfile = Just file
		| otherwise = Nothing
	
	origfile = headMaybe $ M.keys $ M.filter (== Just origkey)
		(Remote.Compute.computeOutputs origstate)

	origbackendvariety = fromKey keyVariety origkey
	
	recomputingvurl = case origbackendvariety of
		VURLKey -> True
		_ -> False

	getreproducibleconfig = case reproducible o of
		Just (Reproducible True) -> return (Just (Reproducible True))
		-- A VURL key is used when the computation was
		-- unreproducible. So recomputing should too, but that
		-- will result in the same VURL key. Since moveAnnex
		-- will prefer the current annex object to a new one,
		-- delete the annex object first, so that if recomputing
		-- generates a new version of the file, it replaces
		-- the old version.
		v -> if recomputingvurl
			then do
				lockContentForRemoval origkey noop removeAnnex
				return (Just (Reproducible False))
			else return v
	
	cleanup = do
		case reproducible o of
			Just (Reproducible True) -> noop
			-- in case computation failed, update
			-- location log for removal done earlier
			_ -> when recomputingvurl $ do
				u <- getUUID
				unlessM (elem u <$> loggedLocations origkey) $
					logStatus NoLiveUpdate origkey InfoMissing
		return True

	choosebackend outputfile
		-- When converting a VURL to reproducible, can't use
		-- the VURL backend.
		| recomputingvurl && reproducible o == Just (Reproducible True) =
			chooseBackend outputfile
		-- Use the same backend as was used to compute it before,
		-- so if the computed file is the same, there will be
		-- no change.
		| otherwise = maybeLookupBackendVariety origbackendvariety >>= \case
			Just b -> return b
			Nothing -> giveup $ unknownBackendVarietyMessage origbackendvariety