1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
|
{- git-annex command
-
- Copyright 2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.Recompute where
import Command
import qualified Remote.Compute
import qualified Remote
import qualified Types.Remote as Remote
import qualified Git.Ref as Git
import Annex.Content
import Annex.UUID
import Annex.CatFile
import Annex.GitShaKey
import Git.FilePath
import Logs.Location
import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent, getInputContent', addComputed)
import Backend (maybeLookupBackendVariety, unknownBackendVarietyMessage, chooseBackend)
import Types.Key
import qualified Utility.RawFilePath as R
import qualified Data.Map as M
import System.PosixCompat.Files (isSymbolicLink)
cmd :: Command
cmd = notBareRepo $ withAnnexOptions [jsonOptions] $
command "recompute" SectionCommon "recompute computed files"
paramPaths (seek <$$> optParser)
data RecomputeOptions = RecomputeOptions
{ recomputeThese :: CmdParams
, originalOption :: Bool
, reproducible :: Maybe Reproducible
, computeRemote :: Maybe (DeferredParse Remote)
}
optParser :: CmdParamsDesc -> Parser RecomputeOptions
optParser desc = RecomputeOptions
<$> cmdParams desc
<*> switch
( long "original"
<> help "recompute using original content of input files"
)
<*> parseReproducible
<*> optional (mkParseRemoteOption <$> parseRemoteOption)
seek :: RecomputeOptions -> CommandSeek
seek o = startConcurrency commandStages (seek' o)
seek' :: RecomputeOptions -> CommandSeek
seek' o = do
computeremote <- maybe (pure Nothing) (Just <$$> getParsed)
(computeRemote o)
let seeker = AnnexedFileSeeker
{ startAction = const $ start o computeremote
, checkContentPresent = Nothing
, usesLocationLog = True
}
withFilesInGitAnnex ww seeker
=<< workTreeItems ww (recomputeThese o)
where
ww = WarnUnmatchLsFiles "recompute"
start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
start o (Just computeremote) si file key =
stopUnless (elem (Remote.uuid computeremote) <$> loggedLocations key) $
start' o computeremote si file key
start o Nothing si file key = do
rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
case sortOn Remote.cost $ filter Remote.Compute.isComputeRemote rs of
[] -> stop
(r:_) -> start' o r si file key
start' :: RecomputeOptions -> Remote -> SeekInput -> OsPath -> Key -> CommandStart
start' o r si file key =
Remote.Compute.getComputeState
(Remote.remoteStateHandle r) key >>= \case
Nothing -> stop
Just state -> shouldrecompute state >>= \case
Nothing -> stop
Just mreason -> starting "recompute" ai si $ do
maybe noop showNote mreason
perform o r file key state
where
ai = mkActionItem (key, file)
shouldrecompute state
| originalOption o = return (Just Nothing)
| otherwise = firstM (inputchanged state)
(M.toList (Remote.Compute.computeInputs state))
>>= return . \case
Nothing -> Nothing
Just (inputfile, _) -> Just $ Just $
QuotedPath inputfile <> " changed"
inputchanged state (inputfile, inputkey) = do
-- Note that the paths from the remote state are not to be
-- trusted to point to a file in the repository, but using
-- the path with git cat-file will only succeed if it
-- is checked into the repository.
p <- fromRepo $ fromTopFilePath $ asTopFilePath $
Remote.Compute.computeSubdir state </> inputfile
case keyGitSha inputkey of
Nothing ->
catKeyFile p >>= return . \case
Just k -> k /= inputkey
Nothing -> inputfilemissing
Just inputgitsha -> inRepo (Git.fileRef p) >>= \case
Just fileref -> catObjectMetaData fileref >>= return . \case
Just (sha, _, _) -> sha /= inputgitsha
Nothing -> inputfilemissing
Nothing -> return inputfilemissing
where
-- When an input file is missing, go ahead and recompute.
-- This way, the user will see the computation fail,
-- with an error message that explains the problem.
-- Or, if the input file is only optionally used by the
-- computation, it might succeed.
inputfilemissing = True
perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
perform o r file origkey origstate = do
program <- Remote.Compute.getComputeProgram r
reproducibleconfig <- getreproducibleconfig
originallocked <- liftIO $ isSymbolicLink
<$> R.getSymbolicLinkStatus (fromOsPath file)
showOutput
Remote.Compute.runComputeProgram program origstate
(Remote.Compute.ImmutableState False)
(getinputcontent program)
Nothing
(go program reproducibleconfig originallocked)
next cleanup
where
go program reproducibleconfig originallocked result tmpdir ts = do
checkbehaviorchange program
(Remote.Compute.computeState result)
addComputed Nothing r reproducibleconfig
choosebackend destfile False (Left originallocked)
result tmpdir ts
checkbehaviorchange program state = do
let check s w a b = forM_ (M.keys (w a)) $ \f ->
unless (M.member f (w b)) $
Remote.Compute.computationBehaviorChangeError program s f
check "not using input file"
Remote.Compute.computeInputs origstate state
check "outputting"
Remote.Compute.computeOutputs state origstate
check "not outputting"
Remote.Compute.computeOutputs origstate state
getinputcontent program p required
| originalOption o =
case M.lookup p (Remote.Compute.computeInputs origstate) of
Just inputkey -> getInputContent' False inputkey required
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
Nothing -> Remote.Compute.computationBehaviorChangeError program
"requesting a new input file" p
| otherwise = getInputContent False p required
destfile outputfile
| Just outputfile == origfile = Just file
| otherwise = Nothing
origfile = headMaybe $ M.keys $ M.filter (== Just origkey)
(Remote.Compute.computeOutputs origstate)
origbackendvariety = fromKey keyVariety origkey
recomputingvurl = case origbackendvariety of
VURLKey -> True
_ -> False
getreproducibleconfig = case reproducible o of
Just (Reproducible True) -> return (Just (Reproducible True))
-- A VURL key is used when the computation was
-- unreproducible. So recomputing should too, but that
-- will result in the same VURL key. Since moveAnnex
-- will prefer the current annex object to a new one,
-- delete the annex object first, so that if recomputing
-- generates a new version of the file, it replaces
-- the old version.
v -> if recomputingvurl
then do
lockContentForRemoval origkey noop removeAnnex
return (Just (Reproducible False))
else return v
cleanup = do
case reproducible o of
Just (Reproducible True) -> noop
-- in case computation failed, update
-- location log for removal done earlier
_ -> when recomputingvurl $ do
u <- getUUID
unlessM (elem u <$> loggedLocations origkey) $
logStatus NoLiveUpdate origkey InfoMissing
return True
choosebackend outputfile
-- When converting a VURL to reproducible, can't use
-- the VURL backend.
| recomputingvurl && reproducible o == Just (Reproducible True) =
chooseBackend outputfile
-- Use the same backend as was used to compute it before,
-- so if the computed file is the same, there will be
-- no change.
| otherwise = maybeLookupBackendVariety origbackendvariety >>= \case
Just b -> return b
Nothing -> giveup $ unknownBackendVarietyMessage origbackendvariety
|