1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
|
{- git-annex general metadata
-
- Copyright 2014-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
module Types.MetaData (
MetaData(..),
MetaField(..),
MetaValue(..),
CurrentlySet(..),
serialize,
deserialize,
MetaSerializable,
toMetaField,
mkMetaField,
mkMetaFieldUnchecked,
fromMetaField,
toMetaValue,
mkMetaValue,
unsetMetaValue,
unsetMetaData,
fromMetaValue,
fromMetaData,
emptyMetaData,
updateMetaData,
unionMetaData,
combineMetaData,
differenceMetaData,
isSet,
currentMetaData,
currentMetaDataValues,
metaDataValues,
ModMeta(..),
modMeta,
RemoteMetaData(..),
extractRemoteMetaData,
splitRemoteMetaDataField,
fromRemoteMetaData,
prop_metadata_sane,
prop_metadata_serialize
) where
import Common
import Utility.Base64
import Utility.QuickCheck
import Utility.Aeson
import Types.UUID
import qualified Data.Text as T
import qualified Data.Text.Encoding as E
import qualified Data.Set as S
import qualified Data.Map.Strict as M
import qualified Data.HashMap.Strict as HM
import Data.Char
import qualified Data.CaseInsensitive as CI
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
deriving (Show, Eq, Ord)
instance ToJSON' MetaData where
toJSON' (MetaData m) = object $ map go (M.toList m)
where
go (MetaField f, s) = (CI.original f, toJSON' s)
instance FromJSON MetaData where
parseJSON (Object o) = do
l <- HM.toList <$> parseJSON (Object o)
MetaData . M.fromList <$> mapM go l
where
go (t, l) = case mkMetaField t of
Left e -> fail e
Right f -> (,) <$> pure f <*> parseJSON l
parseJSON _ = fail "expected an object"
{- A metadata value can be currently be set (True), or may have been
- set before and we're remembering it no longer is (False). -}
newtype CurrentlySet = CurrentlySet Bool
deriving (Read, Show, Eq, Ord, Arbitrary)
{- Fields are case insensitive. -}
newtype MetaField = MetaField (CI.CI T.Text)
deriving (Read, Show, Eq, Ord)
data MetaValue = MetaValue CurrentlySet B.ByteString
deriving (Read, Show)
instance ToJSON' MetaValue where
toJSON' (MetaValue _ v) = toJSON' v
instance FromJSON MetaValue where
parseJSON (String v) = return $
MetaValue (CurrentlySet True) (E.encodeUtf8 v)
parseJSON _ = fail "expected a string"
{- Metadata values compare and order the same whether currently set or not. -}
instance Eq MetaValue where
MetaValue _ a == MetaValue _ b = a == b
instance Ord MetaValue where
compare (MetaValue _ x) (MetaValue _ y) = compare x y
{- MetaData is serialized to a format like:
-
- field1 +val1 +val2 -val3 field2 +val4 +val5
-}
class MetaSerializable v where
serialize :: v -> B.ByteString
deserialize :: B.ByteString -> Maybe v
instance MetaSerializable MetaData where
serialize (MetaData m) = B8.unwords $ concatMap go $ M.toList m
where
go (f, vs) = serialize f : map serialize (S.toList vs)
-- Note that B8.words cannot be used here, because UTF-8 encoded
-- field names may contain bytes such as \160 that are whitespace.
deserialize = Just . getfield emptyMetaData . B8.split ' '
where
getfield m [] = m
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
getvalues m [] _ = m
getvalues m l@(w:ws) f = case deserialize w of
Just v -> getvalues (updateMetaData f v m) ws f
Nothing -> getfield m l
instance MetaSerializable MetaField where
serialize (MetaField f) = E.encodeUtf8 (CI.original f)
deserialize = MetaField . CI.mk <$$> eitherToMaybe . E.decodeUtf8'
{- Base64 problematic values. -}
instance MetaSerializable MetaValue where
serialize (MetaValue isset v) =
serialize isset <>
if B8.any (== ' ') v || "!" `B8.isPrefixOf` v
then "!" <> toB64' v
else v
deserialize b = do
(isset, b') <- B8.uncons b
case B8.uncons b' of
Just ('!', b'') -> MetaValue
<$> deserialize (B8.singleton isset)
<*> fromB64Maybe' b''
_ -> MetaValue
<$> deserialize (B8.singleton isset)
<*> pure b'
instance MetaSerializable CurrentlySet where
serialize (CurrentlySet True) = "+"
serialize (CurrentlySet False) = "-"
deserialize "+" = Just (CurrentlySet True)
deserialize "-" = Just (CurrentlySet False)
deserialize _ = Nothing
mkMetaField :: T.Text -> Either String MetaField
mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: T.Text -> String
badField f = "Illegal metadata field name, \"" ++ T.unpack f ++ "\""
{- Does not check that the field name is valid. Use with caution. -}
mkMetaFieldUnchecked :: T.Text -> MetaField
mkMetaFieldUnchecked = MetaField . CI.mk
toMetaField :: T.Text -> Maybe MetaField
toMetaField f
| legalField f = Just $ MetaField $ CI.mk f
| otherwise = Nothing
{- Fields cannot be empty, contain whitespace, or start with "+-" as
- that would break the serialization.
-
- Additionally, fields should not contain any form of path separator, as
- that would break views.
-
- And, fields need to be valid JSON keys.
-
- So, require they have an alphanumeric first letter, with the remainder
- being either alphanumeric or a small set of whitelisted common punctuation.
-}
legalField :: T.Text -> Bool
legalField t = case T.uncons t of
Nothing -> False
Just (c1, t')
| not (isAlphaNum c1) -> False
| otherwise -> T.all legalchars t'
where
legalchars c
| isAlphaNum c = True
| otherwise = c `elem` legalFieldWhiteList
legalFieldWhiteList :: [Char]
legalFieldWhiteList = "_-."
toMetaValue :: B.ByteString -> MetaValue
toMetaValue = MetaValue (CurrentlySet True)
mkMetaValue :: CurrentlySet -> B.ByteString -> MetaValue
mkMetaValue = MetaValue
unsetMetaValue :: MetaValue -> MetaValue
unsetMetaValue (MetaValue _ s) = MetaValue (CurrentlySet False) s
{- Marks all MetaValues as no longer currently set. -}
unsetMetaData :: MetaData -> MetaData
unsetMetaData (MetaData m) = MetaData $ M.map (S.map unsetMetaValue) m
fromMetaField :: MetaField -> T.Text
fromMetaField (MetaField f) = CI.original f
fromMetaValue :: MetaValue -> B.ByteString
fromMetaValue (MetaValue _ f) = f
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
fromMetaData (MetaData m) = M.toList m
emptyMetaData :: MetaData
emptyMetaData = MetaData M.empty
{- Can be used to set a value, or to unset it, depending on whether
- the MetaValue has CurrentlySet or not. -}
updateMetaData :: MetaField -> MetaValue -> MetaData -> MetaData
updateMetaData f v = updateMetaData' f (S.singleton v)
updateMetaData' :: MetaField -> S.Set MetaValue -> MetaData -> MetaData
updateMetaData' f s (MetaData m) = MetaData $ M.insertWith S.union f s m
{- New metadata overrides old._-}
unionMetaData :: MetaData -> MetaData -> MetaData
unionMetaData (MetaData old) (MetaData new) = MetaData $
M.unionWith S.union new old
combineMetaData :: [MetaData] -> MetaData
combineMetaData = foldl' unionMetaData emptyMetaData
differenceMetaData :: MetaData -> MetaData -> MetaData
differenceMetaData (MetaData m) (MetaData excludem) = MetaData $
M.differenceWith diff m excludem
where
diff sl sr =
let s = S.difference sl sr
in if S.null s then Nothing else Just s
isSet :: MetaValue -> Bool
isSet (MetaValue (CurrentlySet isset) _) = isset
{- Gets only currently set values -}
currentMetaDataValues :: MetaField -> MetaData -> S.Set MetaValue
currentMetaDataValues f m = S.filter isSet (metaDataValues f m)
currentMetaData :: MetaData -> MetaData
currentMetaData (MetaData m) = removeEmptyFields $ MetaData $
M.map (S.filter isSet) m
removeEmptyFields :: MetaData -> MetaData
removeEmptyFields (MetaData m) = MetaData $ M.filter (not . S.null) m
{- Gets currently set values, but also values that have been unset. -}
metaDataValues :: MetaField -> MetaData -> S.Set MetaValue
metaDataValues f (MetaData m) = fromMaybe S.empty (M.lookup f m)
mapMetaData :: (S.Set MetaValue -> S.Set MetaValue) -> MetaData -> MetaData
mapMetaData f (MetaData m) = MetaData (M.map f m)
{- Ways that existing metadata can be modified -}
data ModMeta
= AddMeta MetaField MetaValue
| DelMeta MetaField (Maybe MetaValue)
-- ^ delete value of a field. With Just, only that specific value
-- is deleted; with Nothing, all current values are deleted.
| DelAllMeta
-- ^ delete all currently set metadata
| SetMeta MetaField (S.Set MetaValue)
-- ^ removes any existing values
| MaybeSetMeta MetaField MetaValue
-- ^ set when field has no existing value
| ComposeModMeta ModMeta ModMeta
-- ^ composing multiple modifications
deriving (Show)
{- Applies a ModMeta, generating the new MetaData.
- Note that the new MetaData does not include all the
- values set in the input metadata. It only contains changed values. -}
modMeta :: MetaData -> ModMeta -> MetaData
modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData
modMeta _ (DelMeta f (Just oldv)) =
updateMetaData f (unsetMetaValue oldv) emptyMetaData
modMeta m (DelMeta f Nothing) = MetaData $ M.singleton f $
S.fromList $ map unsetMetaValue $ S.toList $ currentMetaDataValues f m
modMeta m DelAllMeta = mapMetaData
(S.fromList . map unsetMetaValue . S.toList)
(currentMetaData m)
modMeta m (SetMeta f s) = updateMetaData' f s $
foldr (updateMetaData f) emptyMetaData $
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
modMeta m (MaybeSetMeta f v)
| S.null (currentMetaDataValues f m) = updateMetaData f v emptyMetaData
| otherwise = emptyMetaData
modMeta m (ComposeModMeta a b) = unionMetaData (modMeta m a) (modMeta m b)
data RemoteMetaData = RemoteMetaData UUID MetaData
deriving (Show, Eq, Ord)
{- Extracts only the fields prefixed with "uuid:", which belong to that
- remote. -}
extractRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
where
belongsremote (MetaField f) _v = prefix `T.isPrefixOf` CI.original f
removeprefix (MetaField f) = MetaField $
CI.mk $ T.drop prefixlen $ CI.original f
prefix = remoteMetaDataPrefix u
prefixlen = T.length prefix
splitRemoteMetaDataField :: MetaField -> Maybe (UUID, MetaField)
splitRemoteMetaDataField (MetaField f) = do
let (su, sf) = T.break (== ':') (CI.original f)
f' <- toMetaField ((T.drop 1 sf))
return $ (toUUID (T.unpack su), f')
remoteMetaDataPrefix :: UUID -> T.Text
remoteMetaDataPrefix u = T.pack (fromUUID u) <> ":"
fromRemoteMetaData :: RemoteMetaData -> MetaData
fromRemoteMetaData (RemoteMetaData u (MetaData m)) = MetaData $
M.mapKeys addprefix m
where
addprefix (MetaField f) = MetaField $ CI.mk $ prefix <> CI.original f
prefix = remoteMetaDataPrefix u
{- Avoid putting too many fields in the map; extremely large maps make
- the seriaization test slow due to the sheer amount of data.
- It's unlikely that more than 100 fields of metadata will be used. -}
instance Arbitrary MetaData where
arbitrary = MetaData . M.filterWithKey legal . M.fromList
<$> resize 10 (listOf arbitrary)
where
legal k _v = legalField $ fromMetaField k
instance Arbitrary MetaValue where
arbitrary = MetaValue
<$> arbitrary
-- Avoid non-ascii metavalues because fully arbitrary
-- strings may not be encoded using the filesystem
-- encoding, which is norally applied to all input.
<*> (encodeBS <$> arbitrary `suchThat` all isAscii)
instance Arbitrary MetaField where
arbitrary = MetaField . CI.mk
<$> (T.pack <$> arbitrary) `suchThat` legalField
prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_metadata_sane m f v = and
[ S.member v $ metaDataValues f m'
, not (isSet v) || S.member v (currentMetaDataValues f m')
, differenceMetaData m' emptyMetaData == m'
]
where
m' = updateMetaData f v m
prop_metadata_serialize :: MetaField -> MetaValue -> MetaData -> Bool
prop_metadata_serialize f v m = and
[ deserialize (serialize f) == Just f
, deserialize (serialize v) == Just v
, deserialize (serialize m') == Just m'
]
where
m' = removeEmptyFields m
|