File: ThirdPartyPopulated.hs

package info (click to toggle)
git-annex 10.20230126-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 69,344 kB
  • sloc: haskell: 74,654; javascript: 9,103; sh: 1,304; makefile: 203; perl: 136; ansic: 44
file content (85 lines) | stat: -rw-r--r-- 3,362 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{- Helpers for thirdPartyPopulated remotes
 -
 - Copyright 2020 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings #-}

module Remote.Helper.ThirdPartyPopulated where

import Annex.Common
import Types.Remote
import Types.Import
import Crypto (isEncKey)
import Utility.Metered

import qualified System.FilePath.ByteString as P
import qualified Data.ByteString as S

-- When a remote is thirdPartyPopulated, the files we want are probably
-- in the .git directory. But, git does not really support .git in paths
-- in a git tree. (Such a tree can be built, but it will lead to problems.)
-- And so anything in .git is prevented from being imported.
-- To work around that, this renames that directory when generating an
-- ImportLocation.
mkThirdPartyImportLocation :: RawFilePath -> ImportLocation
mkThirdPartyImportLocation =
	mkImportLocation . P.joinPath . map esc . P.splitDirectories
  where
	esc ".git" = "dotgit"
	esc x
		| "dotgit" `S.isSuffixOf` x = "dot" <> x
		| otherwise = x

fromThirdPartyImportLocation :: ImportLocation -> RawFilePath
fromThirdPartyImportLocation =
	P.joinPath . map unesc . P.splitDirectories . fromImportLocation
  where
	unesc "dotgit" = ".git"
	unesc x
		| "dotgit" `S.isSuffixOf` x = S.drop 3 x
		| otherwise = x

-- When a remote is thirdPartyPopulated, and contains a backup of a
-- git-annex repository or some special remotes, this can be used to
-- find only those ImportLocations that are annex object files.
-- All other ImportLocations are ignored.
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
importKey loc _cid sz _ = return $ importKey' (fromImportLocation loc) (Just sz)

importKey' :: RawFilePath -> Maybe ByteSize -> Maybe Key
importKey' p msz = case fileKey f of
	Just k
		-- Annex objects always are in a subdirectory with the same
		-- name as the filename. If this is not the case for the file
		-- that was backed up, it is probably not a valid annex object.
		-- Eg, it could be something in annex/bad/, or annex/tmp/.
		-- Or it could be a file that only happens to have a name
		-- like an annex object.
		-- (This does unfortunately prevent recognizing files that are
		-- part of special remotes that don't use that layout. The most
		-- likely special remote to be in a backup, the directory
		-- special remote, does use that layout at least.)
		| lastMaybe (P.splitDirectories (P.dropFileName p)) /= Just f -> Nothing
		-- Chunked or encrypted keys used in special remotes are not
		-- supported.
		| isChunkKey k || isEncKey k -> Nothing
		-- Check that the size of the key is the same as the size of the
		-- file stored in the backup. This is a cheap way to make sure it's
		-- probabably the actual content of the file. We don't fully
		-- verify the content here because that could be a very 
		-- expensive operation for a large repository; if the user
		-- wants to detect every possible data corruption problem
		-- (eg, wrong data read off disk during backup, or the object
		-- was corrupt in the git-annex repo and that bad object got
		-- backed up), they can fsck the remote.
		| otherwise -> case (msz, fromKey keySize k) of
			(Just sz, Just sz')
				| sz' == sz -> Just k
				| otherwise -> Nothing
			_ -> Just k
	Nothing -> Nothing
  where
	f = P.takeFileName p