File: Import.hs

package info (click to toggle)
git-annex 10.20251029-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 75,300 kB
  • sloc: haskell: 91,492; javascript: 9,103; sh: 1,593; makefile: 216; perl: 137; ansic: 44
file content (403 lines) | stat: -rw-r--r-- 14,499 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
{- git-annex command
 -
 - Copyright 2012-2024 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE ApplicativeDo, OverloadedStrings #-}

module Command.Import where

import Command
import qualified Git
import qualified Annex
import qualified Command.Add
import qualified Command.Reinject
import qualified Types.Remote as Remote
import qualified Git.Ref
import Utility.CopyFile
import Utility.OptParse
import Backend
import Types.KeySource
import Annex.CheckIgnore
import Annex.NumCopies
import Annex.FileMatcher
import Annex.Ingest
import Annex.InodeSentinal
import Annex.Import
import Annex.Perms
import Annex.RemoteTrackingBranch
import Utility.InodeCache
import Logs.Location
import Git.FilePath
import Git.Types
import Types.Import
import Utility.Metered
import qualified Utility.RawFilePath as R

import Control.Concurrent.STM
import System.PosixCompat.Files (isDirectory, isSymbolicLink, isRegularFile)

cmd :: Command
cmd = notBareRepo $
	withAnnexOptions opts $
		command "import" SectionCommon 
			"add a tree of files to the repository"
			(paramPaths ++ "|BRANCH")
			(seek <$$> optParser)
  where
	opts =
		[ backendOption
		, jobsOption
		, jsonOptions
		, jsonProgressOption
		-- These options are only used when importing from a
		-- directory, not from a special remote. So it's ok
		-- to use LimitDiskFiles.
		, fileMatchingOptions LimitDiskFiles
		]

data ImportOptions 
	= LocalImportOptions
		{ importFiles :: CmdParams
		, duplicateMode :: DuplicateMode
		, checkGitIgnoreOption :: CheckGitIgnore
		}
	| RemoteImportOptions
		{ importFromRemote :: DeferredParse Remote
		, importToBranch :: Branch
		, importToSubDir :: Maybe FilePath
		, importContent :: Bool
		, checkGitIgnoreOption :: CheckGitIgnore
		, messageOption :: [String]
		}

optParser :: CmdParamsDesc -> Parser ImportOptions
optParser desc = do
	ps <- cmdParams desc
	mfromremote <- optional $ mkParseRemoteOption <$> parseFromOption
	content <- invertableSwitch "content" True
		( help "do not get contents of imported files"
		)
	dupmode <- fromMaybe Default <$> optional duplicateModeParser
	ic <- Command.Add.checkGitIgnoreSwitch
	message <- many (strOption
		( long "message" <> short 'm' <> metavar "MSG"
		<> help "commit message"
		))
	pure $ case mfromremote of
		Nothing -> LocalImportOptions ps dupmode ic
		Just r -> case ps of
			[bs] -> 
				let (branch, subdir) = separate (== ':') bs
				in RemoteImportOptions r
					(Ref (encodeBS branch))
					(if null subdir then Nothing else Just subdir)
					content
					ic
					message
			_ -> giveup "expected BRANCH[:SUBDIR]"

data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
	deriving (Eq)

duplicateModeParser :: Parser DuplicateMode
duplicateModeParser = 
	flag' Duplicate
		( long "duplicate" 
		<> help "do not delete source files"
		)
	<|> flag' DeDuplicate
		( long "deduplicate"
		<> help "delete source files whose content was imported before"
		)
	<|> flag' CleanDuplicates
		( long "clean-duplicates"
		<> help "delete duplicate source files (import nothing)"
		)
	<|> flag' SkipDuplicates
		( long "skip-duplicates"
		<> help "import only new files (do not delete source files)"
		)
	<|> flag' ReinjectDuplicates
		( long "reinject-duplicates"
		<> help "import new files, and reinject the content of files that were imported before"
		)

seek :: ImportOptions -> CommandSeek
seek o@(LocalImportOptions {}) = startConcurrency commandStages $ do
	repopath <- liftIO . absPath =<< fromRepo Git.repoPath
	inrepops <- liftIO $ filter (dirContains repopath)
		<$> mapM (absPath . toOsPath) (importFiles o)
	unless (null inrepops) $ do
		qp <- coreQuotePath <$> Annex.getGitConfig
		giveup $ decodeBS $ quote qp $ 
			"cannot import files from inside the working tree (use git annex add instead): "
				<> quotedPaths inrepops
	largematcher <- largeFilesMatcher
	addunlockedmatcher <- addUnlockedMatcher
	(commandAction . startLocal o addunlockedmatcher largematcher (duplicateMode o))
		`withPathContents` importFiles o
seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
	r <- getParsed (importFromRemote o)
	unlessM (Remote.isImportSupported r) $
		giveup "That remote does not support imports."
	subdir <- maybe
		(pure Nothing)
		(Just <$$> inRepo . toTopFilePath . toOsPath)
		(importToSubDir o)
	addunlockedmatcher <- addUnlockedMatcher
	seekRemote r (importToBranch o) subdir (importContent o) 
		(checkGitIgnoreOption o)
		addunlockedmatcher
		(messageOption o)

startLocal :: ImportOptions -> AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (OsPath, OsPath) -> CommandStart
startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
	ifM (liftIO $ isRegularFile <$> R.getSymbolicLinkStatus (fromOsPath srcfile))
		( starting "import" ai si pickaction
		, stop
		)
  where
 	ai = ActionItemTreeFile destfile
	si = SeekInput []

	deletedup k = do
		showNote $ UnquotedString $ "duplicate of " ++ serializeKey k
		verifyExisting k destfile
			( do
				liftIO $ removeFile srcfile
				next $ return True
			, do
				warning "Could not verify that the content is still present in the annex; not removing from the import location."
				stop
			)
	reinject k = do
		showNote "reinjecting"
		Command.Reinject.perform srcfile k
	importfile ld k = checkdestdir $ do
		ignored <- checkIgnored (checkGitIgnoreOption o) destfile
		if ignored
			then do
				warning $ "not importing " <> QuotedPath destfile <> " which is .gitignored (use --no-check-gitignore to override)"
				stop
			else do
				existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus (fromOsPath destfile))
				case existing of
					Nothing -> importfilechecked ld k
					Just s
						| isDirectory s -> notoverwriting "(is a directory)"
						| isSymbolicLink s -> ifM (Annex.getRead Annex.force)
							( do
								liftIO $ removeWhenExistsWith removeFile destfile
								importfilechecked ld k
							, notoverwriting "(is a symlink)"
							)
						| otherwise -> ifM (Annex.getRead Annex.force)
							( do
								liftIO $ removeWhenExistsWith removeFile destfile
								importfilechecked ld k
							, notoverwriting "(use --force to override, or a duplication option such as --deduplicate to clean up)"
							)
	checkdestdir cont = do
		let destdir = parentDir destfile
		existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus (fromOsPath destdir))
		case existing of
			Nothing -> cont
			Just s
				| isDirectory s -> cont
				| otherwise -> do
					warning $ "not importing " <> QuotedPath destfile <> " because " <> QuotedPath destdir <> " is not a directory"
					stop

	importfilechecked ld k = do
		-- Move or copy the src file to the dest file.
		-- The dest file is what will be ingested.
		createWorkTreeDirectory (parentDir destfile)
		unwind <- liftIO $ if mode == Duplicate || mode == SkipDuplicates
			then do
				void $ copyFileExternal CopyAllMetaData srcfile destfile
				return $ removeWhenExistsWith removeFile destfile
			else do
				moveFile srcfile destfile
				return $ moveFile destfile srcfile
		-- Make sure that the dest file has its write permissions
		-- removed; the src file normally already did, but may
		-- have imported it from a filesystem that does not allow
		-- removing write permissions, to a repo on a filesystem
		-- that does.
		when (lockingFile (lockDownConfig ld)) $ do
			freezeContent destfile
			checkLockedDownWritePerms destfile srcfile >>= \case
				Just err -> do
					liftIO unwind
					qp <- coreQuotePath <$> Annex.getGitConfig
					giveup (decodeBS $ quote qp err)
				Nothing -> noop
		-- Get the inode cache of the dest file. It should be
		-- weakly the same as the originally locked down file's
		-- inode cache. (Since the file may have been copied,
		-- its inodes may not be the same.)
		s <- liftIO $ R.getSymbolicLinkStatus (fromOsPath destfile)
		newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
		let unchanged = case (newcache, inodeCache (keySource ld)) of
			(_, Nothing) -> True
			(Just newc, Just c) | compareWeak c newc -> True
			_ -> False
		unless unchanged $
			giveup "changed while it was being added"
		-- The LockedDown needs to be adjusted, since the destfile
		-- is what will be ingested.
		let ld' = ld
			{ keySource = KeySource
				{ keyFilename = destfile
				, contentLocation = destfile
				, inodeCache = newcache
				}
			}
		ifM (checkFileMatcher NoLiveUpdate largematcher destfile)
			( ingestAdd' nullMeterUpdate (Just ld') (Just k)
				>>= maybe
					stop
					(\addedk -> next $ Command.Add.cleanup addedk True)
			, Command.Add.addSmall False (DryRun False) destfile s
			)
	notoverwriting why = do
		warning $ "not overwriting existing " <> QuotedPath destfile <> " " <> UnquotedString why
		stop
	lockdown a = do
		let mi = MatchingFile $ FileInfo
			{ contentFile = srcfile
			, matchFile = destfile
			, matchKey = Nothing
			}
		lockingfile <- not <$> addUnlocked addunlockedmatcher mi True
		-- Minimal lock down with no hard linking so nothing
		-- has to be done to clean up from it.
		let cfg = LockDownConfig
			{ lockingFile = lockingfile
			, hardlinkFileTmpDir = Nothing
			-- The write perms of the file may not be able to be
			-- removed, if it's being imported from a crippled
			-- filesystem. So lockDown is asked to not check
			-- the write perms. They will be checked later, after
			-- the file gets copied into the repository.
			, checkWritePerms = False
			}
		v <- lockDown cfg srcfile
		case v of
			Just ld -> do
				backend <- chooseBackend destfile
				k <- fst <$> genKey (keySource ld) nullMeterUpdate backend
				a (ld, k)
			Nothing -> stop
	checkdup k dupa notdupa = ifM (isKnownKey k)
		( dupa
		, notdupa
		)
	pickaction = lockdown $ \(ld, k) -> case mode of
		DeDuplicate -> checkdup k (deletedup k) (importfile ld k)
		CleanDuplicates -> checkdup k
			(deletedup k)
			(skipbecause "not duplicate")
		SkipDuplicates -> checkdup k 
			(skipbecause "duplicate")
			(importfile ld k)
		ReinjectDuplicates -> checkdup k
			(reinject k)
			(importfile ld k)
		_ -> importfile ld k
	skipbecause s = do
		showNote (s <> "; skipping")
		next (return True)

verifyExisting :: Key -> OsPath -> (CommandPerform, CommandPerform) -> CommandPerform
verifyExisting key destfile (yes, no) = do
	-- Look up the numcopies setting for the file that it would be
	-- imported to, if it were imported.
	(needcopies, mincopies) <- getFileNumMinCopies destfile

	(tocheck, preverified) <- verifiableCopies key []
	verifyEnoughCopiesToDrop [] key Nothing Nothing needcopies mincopies [] preverified tocheck
		(const yes) no

seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> AddUnlockedMatcher -> [String] -> CommandSeek
seekRemote remote branch msubdir importcontent ci addunlockedmatcher importmessages = do
	importtreeconfig <- case msubdir of
		Nothing -> return ImportTree
		Just subdir ->
			let mk tree = pure $ ImportSubTree subdir tree
			in fromtrackingbranch Git.Ref.tree >>= \case
				Just tree -> mk tree
				Nothing -> inRepo (Git.Ref.tree branch) >>= \case
					Just tree -> mk tree
					Nothing -> giveup $ "Unable to find base tree for branch " ++ fromRef branch
	
	trackingcommit <- fromtrackingbranch Git.Ref.sha
	cmode <- annexCommitMode <$> Annex.getGitConfig
	let importcommitconfig = ImportCommitConfig trackingcommit cmode importmessages'
	let commitimport = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig addunlockedmatcher

	importabletvar <- liftIO $ newTVarIO Nothing
	void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar)
	liftIO (atomically (readTVar importabletvar)) >>= \case
		Nothing -> return ()
		Just importable -> importChanges remote importtreeconfig importcontent False importable >>= \case
			ImportUnfinished -> warning $ UnquotedString $ concat
				[ "Failed to import some files from "
				, Remote.name remote
				, ". Re-run command to resume import."
				]
			ImportFinished postexportlogupdate imported ->
				void $ includeCommandAction $ 
					commitimport imported postexportlogupdate
  where
	importmessages'
		| null importmessages = ["import from " ++ Remote.name remote]
		| otherwise = importmessages

	tb = mkRemoteTrackingBranch remote branch

	fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)

listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize))) -> CommandStart
listContents remote importtreeconfig ci tvar = starting "list" ai si $
	listContents' remote importtreeconfig ci $ \importable -> do
		liftIO $ atomically $ writeTVar tvar importable
		next $ return True
  where
	ai = ActionItemOther (Just (UnquotedString (Remote.name remote)))
	si = SeekInput []

listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
listContents' remote importtreeconfig ci a = 
	makeImportMatcher remote >>= \case
		Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
			Right importable -> a importable
			Left e -> giveup $ "Unable to list contents of " ++ Remote.name remote ++ ": " ++ show e
		Left err -> giveup $ unwords 
			[ "Cannot import from"
			, Remote.name remote
			, "because of a problem with its configuration:"
			, err
			]

commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> AddUnlockedMatcher -> Imported -> PostExportLogUpdate -> CommandStart
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig addunlockedmatcher imported postexportlogupdate =
	starting "update" ai si $ do
		importcommit <- buildImportCommit remote importtreeconfig importcommitconfig addunlockedmatcher imported postexportlogupdate
		next $ updateremotetrackingbranch importcommit
  where
	ai = ActionItemOther (Just $ UnquotedString $ fromRef $ fromRemoteTrackingBranch tb)
	si = SeekInput []
	-- Update the tracking branch. Done even when there
	-- is nothing new to import, to make sure it exists.
	updateremotetrackingbranch importcommit =
		case importcommit <|> trackingcommit of
			Just c -> do
				setRemoteTrackingBranch tb c
				return True
			Nothing -> do
				warning $ UnquotedString $ "Nothing to import and " ++ fromRef branch ++ " does not exist."
				return False