1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
--------------------------------------------------------------------------------
-- | Internal module to parse metadata
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RecordWildCards #-}
module Hakyll.Core.Provider.Metadata
( loadMetadata
, parsePage
, MetadataException (..)
) where
--------------------------------------------------------------------------------
import Control.Arrow (second)
import Control.Exception (Exception, throwIO)
import Control.Monad (guard)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import Data.List.Extended (breakWhen)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Yaml as Yaml
import Hakyll.Core.Identifier
import Hakyll.Core.Metadata
import Hakyll.Core.Provider.Internal
import System.IO as IO
import System.IO.Error (modifyIOError, ioeSetLocation)
--------------------------------------------------------------------------------
loadMetadata :: Provider -> Identifier -> IO (Metadata, Maybe String)
loadMetadata p identifier = do
hasHeader <- probablyHasMetadataHeader fp
(md, body) <- if hasHeader
then second Just <$> loadMetadataHeader fp
else return (mempty, Nothing)
emd <- case mi of
Nothing -> return mempty
Just mi' -> loadMetadataFile $ resourceFilePath p mi'
return (md <> emd, body)
where
normal = setVersion Nothing identifier
fp = resourceFilePath p identifier
mi = M.lookup normal (providerFiles p) >>= resourceInfoMetadata
--------------------------------------------------------------------------------
loadMetadataHeader :: FilePath -> IO (Metadata, String)
loadMetadataHeader fp = do
fileContent <- modifyIOError (`ioeSetLocation` "loadMetadataHeader") $ readFile fp
case parsePage fileContent of
Right x -> return x
Left err -> throwIO $ MetadataException fp err
--------------------------------------------------------------------------------
loadMetadataFile :: FilePath -> IO Metadata
loadMetadataFile fp = do
fileContent <- modifyIOError (`ioeSetLocation` "loadMetadataFile") $ B.readFile fp
let errOrMeta = Yaml.decodeEither' fileContent
either (fail . show) return errOrMeta
--------------------------------------------------------------------------------
-- | Check if a file "probably" has a metadata header. The main goal of this is
-- to exclude binary files (which are unlikely to start with "---").
probablyHasMetadataHeader :: FilePath -> IO Bool
probablyHasMetadataHeader fp = do
handle <- IO.openFile fp IO.ReadMode
bs <- BC.hGet handle 1024
IO.hClose handle
return $ isMetadataHeader bs
where
isMetadataHeader bs =
let pre = BC.takeWhile (\x -> x /= '\n' && x /= '\r') bs
in BC.length pre >= 3 && BC.all (== '-') pre
--------------------------------------------------------------------------------
-- | Parse the page metadata and body.
splitMetadata :: String -> (Maybe String, String)
splitMetadata str0 = fromMaybe (Nothing, str0) $ do
guard $ leading >= 3
let !str1 = drop leading str0
guard $ all isNewline (take 1 str1)
let !(!meta, !content0) = breakWhen isTrailing str1
guard $ not $ null content0
let !content1 = drop (leading + 1) content0
!content2 = dropWhile isNewline $ dropWhile isInlineSpace content1
-- Adding this newline fixes the line numbers reported by the YAML parser.
-- It's a bit ugly but it works.
return (Just ('\n' : meta), content2)
where
-- Parse the leading "---"
!leading = length $ takeWhile (== '-') str0
-- Predicate to recognize the trailing "---" or "..."
isTrailing [] = False
isTrailing (x : xs) =
isNewline x && length (takeWhile isDash xs) == leading
-- Characters
isNewline c = c == '\n' || c == '\r'
isDash c = c == '-' || c == '.'
isInlineSpace c = c == '\t' || c == ' '
--------------------------------------------------------------------------------
parseMetadata :: String -> Either Yaml.ParseException Metadata
parseMetadata = Yaml.decodeEither' . T.encodeUtf8 . T.pack
--------------------------------------------------------------------------------
parsePage :: String -> Either Yaml.ParseException (Metadata, String)
parsePage fileContent = case mbMetaBlock of
Nothing -> return (mempty, content)
Just metaBlock -> case parseMetadata metaBlock of
Left err -> Left err
Right meta -> return (meta, content)
where
!(!mbMetaBlock, !content) = splitMetadata fileContent
--------------------------------------------------------------------------------
-- | Thrown in the IO monad if things go wrong. Provides a nice-ish error
-- message.
data MetadataException = MetadataException FilePath Yaml.ParseException
--------------------------------------------------------------------------------
instance Exception MetadataException
--------------------------------------------------------------------------------
instance Show MetadataException where
show (MetadataException fp err) =
fp ++ ": " ++ Yaml.prettyPrintParseException err ++ hint
where
hint = case err of
Yaml.InvalidYaml (Just (Yaml.YamlParseException {..}))
| yamlProblem == "mapping values are not allowed in this context" -> "\n" ++
"Hint: if the metadata value contains characters such\n" ++
"as ':' or '-', try enclosing it in quotes."
Yaml.AesonException "Error in $: parsing HashMap ~Text failed, expected Object, but encountered String"
-> "\nHint: in metadata, keys and values are separated by a colon *and* a space."
_ -> ""
|