1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
{- Sqlite database of ContentIdentifiers imported from special remotes.
-
- Copyright 2019-2023 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TypeOperators, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DataKinds, FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE StandaloneDeriving #-}
module Database.ContentIdentifier (
ContentIdentifierHandle,
databaseIsEmpty,
openDb,
closeDb,
flushDbQueue,
recordContentIdentifier,
getContentIdentifiers,
getContentIdentifierKeys,
recordAnnexBranchTree,
getAnnexBranchTree,
needsUpdateFromLog,
updateFromLog,
ContentIdentifiersId,
AnnexBranchId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Database.Utility
import Annex.Locations
import Annex.Common hiding (delete)
import qualified Annex.Branch
import Types.Import
import Types.RemoteState
import Git.Types
import Git.Sha
import Git.FilePath
import qualified Git.DiffTree as DiffTree
import Logs
import qualified Logs.ContentIdentifier as Log
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
#if MIN_VERSION_persistent_sqlite(2,13,3)
import Database.RawFilePath
#else
import Database.Persist.Sqlite (runSqlite)
import qualified Data.Text as T
#endif
data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue Bool
databaseIsEmpty :: ContentIdentifierHandle -> Bool
databaseIsEmpty (ContentIdentifierHandle _ b) = b
-- Note on indexes: ContentIndentifiersKeyRemoteCidIndex etc are really
-- uniqueness constraints, which cause sqlite to automatically add indexes.
-- So when adding indexes, have to take care to only add ones that work as
-- uniqueness constraints. (Unfortunately persistent does not support indexes
-- that are not uniqueness constraints;
-- https://github.com/yesodweb/persistent/issues/109)
--
-- ContentIndentifiersKeyRemoteCidIndex speeds up queries like
-- getContentIdentifiers, but it is not used for
-- getContentIdentifierKeys. ContentIndentifiersCidRemoteKeyIndex was
-- addedto speed that up.
share [mkPersist sqlSettings, mkMigrate "migrateContentIdentifier"] [persistLowerCase|
ContentIdentifiers
remote UUID
cid ContentIdentifier
key Key
ContentIndentifiersKeyRemoteCidIndex key remote cid
ContentIndentifiersCidRemoteKeyIndex cid remote key
-- The last git-annex branch tree sha that was used to update
-- ContentIdentifiers
AnnexBranch
tree SSha
UniqueTree tree
|]
{- Opens the database, creating it if it doesn't exist yet.
-
- Only a single process should write to the database at a time, so guard
- any writes with the gitAnnexContentIdentifierLock.
-}
openDb :: Annex ContentIdentifierHandle
openDb = do
dbdir <- calcRepo' gitAnnexContentIdentifierDbDir
let db = dbdir </> literalOsPath "db"
isnew <- liftIO $ not <$> doesFileExist db
if isnew
then initDb db $ void $
runMigrationSilent migrateContentIdentifier
-- Migrate from old versions of database, which had buggy
-- and suboptimal uniqueness constraints.
#if MIN_VERSION_persistent_sqlite(2,13,3)
else liftIO $ runSqlite' (fromOsPath db) $ void $
runMigrationSilent migrateContentIdentifier
#else
else liftIO $ runSqlite (T.pack (fromRawFilePath db)) $ void $
runMigrationSilent migrateContentIdentifier
#endif
h <- liftIO $ H.openDbQueue db "content_identifiers"
return $ ContentIdentifierHandle h isnew
closeDb :: ContentIdentifierHandle -> Annex ()
closeDb (ContentIdentifierHandle h _) = liftIO $ H.closeDbQueue h
queueDb :: ContentIdentifierHandle -> SqlPersistM () -> IO ()
queueDb (ContentIdentifierHandle h _) = H.queueDb h checkcommit
where
-- commit queue after 1000 changes
checkcommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
flushDbQueue :: ContentIdentifierHandle -> IO ()
flushDbQueue (ContentIdentifierHandle h _) = H.flushDbQueue h
-- Be sure to also update the git-annex branch when using this.
recordContentIdentifier :: ContentIdentifierHandle -> RemoteStateHandle -> ContentIdentifier -> Key -> IO ()
recordContentIdentifier h (RemoteStateHandle u) cid k = queueDb h $ do
void $ insertUniqueFast $ ContentIdentifiers u cid k
getContentIdentifiers :: ContentIdentifierHandle -> RemoteStateHandle -> Key -> IO [ContentIdentifier]
getContentIdentifiers (ContentIdentifierHandle h _) (RemoteStateHandle u) k =
H.queryDbQueue h $ do
l <- selectList
[ ContentIdentifiersKey ==. k
, ContentIdentifiersRemote ==. u
] []
return $ map (contentIdentifiersCid . entityVal) l
getContentIdentifierKeys :: ContentIdentifierHandle -> RemoteStateHandle -> ContentIdentifier -> IO [Key]
getContentIdentifierKeys (ContentIdentifierHandle h _) (RemoteStateHandle u) cid =
H.queryDbQueue h $ do
l <- selectList
[ ContentIdentifiersCid ==. cid
, ContentIdentifiersRemote ==. u
] []
return $ map (contentIdentifiersKey . entityVal) l
recordAnnexBranchTree :: ContentIdentifierHandle -> Sha -> IO ()
recordAnnexBranchTree h s = queueDb h $ do
deleteWhere ([] :: [Filter AnnexBranch])
void $ insertUniqueFast $ AnnexBranch $ toSSha s
getAnnexBranchTree :: ContentIdentifierHandle -> IO Sha
getAnnexBranchTree (ContentIdentifierHandle h _) = H.queryDbQueue h $ do
l <- selectList ([] :: [Filter AnnexBranch]) []
case l of
(s:[]) -> return $ fromSSha $ annexBranchTree $ entityVal s
_ -> return emptyTree
{- Check if the git-annex branch has been updated and the database needs
- to be updated with any new content identifiers in it. -}
needsUpdateFromLog :: ContentIdentifierHandle -> Annex (Maybe (Sha, Sha))
needsUpdateFromLog db = do
oldtree <- liftIO $ getAnnexBranchTree db
Annex.Branch.updatedFromTree oldtree
{- The database should be locked for write when calling this. -}
updateFromLog :: ContentIdentifierHandle -> (Sha, Sha) -> Annex ContentIdentifierHandle
updateFromLog db@(ContentIdentifierHandle h _) (oldtree, currtree) = do
(l, cleanup) <- inRepo $
DiffTree.diffTreeRecursive oldtree currtree
mapM_ go l
void $ liftIO $ cleanup
liftIO $ do
recordAnnexBranchTree db currtree
flushDbQueue db
return (ContentIdentifierHandle h False)
where
go ti = case extLogFileKey remoteContentIdentifierExt (getTopFilePath (DiffTree.file ti)) of
Nothing -> return ()
Just k -> do
l <- Log.getContentIdentifiers k
liftIO $ forM_ l $ \(rs, cids) ->
forM_ cids $ \cid ->
recordContentIdentifier db rs cid k
|