1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
|
-- | When we try to read a file that is encoded in UTF-8, and the system locale
-- is not set to UTF-8, the GHC runtime system will throw an error:
--
-- <https://github.com/jaspervdj/patat/issues/127>
--
-- However, we don't want to force people to use UTF-8 for everything. So what
-- we do is provide a replacement readFile, which first tries to read the file
-- in the system locale, and then falls back to forcing UTF-8.
--
-- If we forced UTF-8, we also want to propagate that to the output handle;
-- otherwise will get errors when we try to display these characters; so
-- withHandle should be used on the output handle (typically stdout).
module Patat.EncodingFallback
( EncodingFallback (..)
, readFile
, withHandle
) where
--------------------------------------------------------------------------------
import Control.Exception (bracket, throwIO)
import Control.Monad (when)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Prelude hiding (readFile)
import qualified System.IO as IO
import qualified System.IO.Error as IO
--------------------------------------------------------------------------------
data EncodingFallback = NoFallback | Utf8Fallback
deriving (Eq, Show)
--------------------------------------------------------------------------------
readFile :: FilePath -> IO (EncodingFallback, T.Text)
readFile path = IO.catchIOError readSystem $ \ioe -> do
when (IO.isDoesNotExistError ioe) $ throwIO ioe -- Don't retry on these
readUtf8
where
readSystem = ((,) NoFallback <$> T.readFile path)
readUtf8 = IO.withFile path IO.ReadMode $ \h -> do
IO.hSetEncoding h IO.utf8_bom
(,) Utf8Fallback <$> T.hGetContents h
--------------------------------------------------------------------------------
withHandle :: IO.Handle -> EncodingFallback -> IO a -> IO a
withHandle _ NoFallback mx = mx
withHandle h Utf8Fallback mx = bracket
(do
mbOld <- IO.hGetEncoding h
IO.hSetEncoding h IO.utf8
pure mbOld)
(\mbOld -> traverse (IO.hSetEncoding h) mbOld)
(\_ -> mx)
|