Description: fix a security vulnerability in MediaBag and T.P.Class.IO.writeMedia
 This vulnerability, discovered by Entroy C,
 allows users to write arbitrary files to any location
 by feeding pandoc a specially crafted URL in an image element.
 The vulnerability is serious
 for anyone using pandoc to process untrusted input.
 The vulnerability does not affect pandoc
 when run with the `--sandbox` flag.
Origin: upstream, https://github.com/jgm/pandoc/commit/5e381e3
Author: John MacFarlane <jgm@berkeley.edu>
Bug: https://github.com/jgm/pandoc/security/advisories/GHSA-xj5q-fv23-575g
Bug-Debian: https://security-tracker.debian.org/tracker/CVE-2023-35936
Forwarded: yes
Last-Update: 2023-07-25
---
This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -49,7 +49,7 @@
 import Network.HTTP.Client.TLS (mkManagerSettings)
 import Network.HTTP.Types.Header ( hContentType )
 import Network.Socket (withSocketsDo)
-import Network.URI (unEscapeString)
+import Network.URI (URI(..), parseURI)
 import System.Directory (createDirectoryIfMissing)
 import System.Environment (getEnv)
 import System.FilePath ((</>), takeDirectory, normalise)
@@ -120,11 +120,11 @@
 
 openURL :: (PandocMonad m, MonadIO m) => Text -> m (B.ByteString, Maybe MimeType)
 openURL u
- | Just u'' <- T.stripPrefix "data:" u = do
-     let mime     = T.takeWhile (/=',') u''
-     let contents = UTF8.fromString $
-                     unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
-     return (decodeLenient contents, Just mime)
+ | Just (URI{ uriScheme = "data:",
+              uriPath = upath }) <- parseURI (T.unpack u) = do
+     let (mime, rest) = break (== '.') upath
+     let contents = UTF8.fromString $ drop 1 rest
+     return (decodeLenient contents, Just (T.pack mime))
  | otherwise = do
      let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
      customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
@@ -222,7 +222,7 @@
            -> m ()
 writeMedia dir (fp, _mt, bs) = do
   -- we normalize to get proper path separators for the platform
-  let fullpath = normalise $ dir </> unEscapeString fp
+  let fullpath = normalise $ dir </> fp
   liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
   logIOError $ BL.writeFile fullpath bs
 
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -28,12 +28,13 @@
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing)
 import Data.Typeable (Typeable)
+import Network.URI (unEscapeString)
 import System.FilePath
 import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.Digest.Pure.SHA (sha1, showDigest)
-import Network.URI (URI (..), parseURI)
+import Network.URI (URI (..), parseURI, isURI)
 
 data MediaItem =
   MediaItem
@@ -52,9 +53,12 @@
 instance Show MediaBag where
   show bag = "MediaBag " ++ show (mediaDirectory bag)
 
--- | We represent paths with /, in normalized form.
+-- | We represent paths with /, in normalized form.  Percent-encoding
+-- is resolved.
 canonicalize :: FilePath -> Text
-canonicalize = T.replace "\\" "/" . T.pack . normalise
+canonicalize fp
+  | isURI fp = T.pack fp
+  | otherwise = T.replace "\\" "/" . T.pack . normalise . unEscapeString $ fp
 
 -- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds
 -- to the given path.
@@ -77,22 +81,22 @@
                              , mediaContents = contents
                              , mediaMimeType = mt }
         fp' = canonicalize fp
+        fp'' = T.unpack fp'
         uri = parseURI fp
-        newpath = if isRelative fp
+        newpath = if isRelative fp''
                        && isNothing uri
-                       && ".." `notElem` splitDirectories fp
-                     then T.unpack fp'
+                       && not (".." `T.isInfixOf` fp')
+                     then fp''
                      else showDigest (sha1 contents) <> "." <> ext
-        fallback = case takeExtension fp of
-                        ".gz" -> getMimeTypeDef $ dropExtension fp
-                        _     -> getMimeTypeDef fp
+        fallback = case takeExtension fp'' of
+                        ".gz" -> getMimeTypeDef $ dropExtension fp''
+                        _     -> getMimeTypeDef fp''
         mt = fromMaybe fallback mbMime
-        path = maybe fp uriPath uri
+        path = maybe fp'' (unEscapeString . uriPath) uri
         ext = case takeExtension path of
                 '.':e -> e
                 _ -> maybe "" T.unpack $ extensionFromMimeType mt
 
-
 -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
 lookupMedia :: FilePath
             -> MediaBag
