1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
#!/usr/bin/env cabal
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{- cabal:
build-depends: base, text, xml, texmath, containers, parsec
-}
import Text.Parsec hiding (optional, (<|>))
import Control.Applicative hiding (many)
import Data.List
import Text.TeXMath.Types
import Data.Maybe
import Data.Char
import Debug.Trace
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
type Parser = Parsec String ()
c9001 = Record '\9001' [("base", "\\langle"), ("unicode", "\\langle")] Open "Left angle bracket"
c9002 = Record '\9002' [("base", "\\rangle"), ("unicode", "\\rangle")] Close "Right angle bracket"
c8220 = Record '\8220' [("base", "``")] Pun "Opening curly quote"
c8221 = Record '\8221' [("base", "\"")] Pun "Closing curly quote"
-- Insert updates to mapping here
updates :: [M.Map Char Record -> M.Map Char Record]
updates =
[ M.adjust (addCommand ("base", "-")) '-'
, M.insert '\9001' c9001
, M.insert '\9002' c9002
, M.adjust (addCommand ("base", "\\blacksquare")) '\9632'
, M.adjust (addCommand ("base", "\\square")) '\9633'
, M.adjust (addCommand ("base", "\\hat{}")) '\710'
, M.insert '\8220' c8220
, M.insert '\8221' c8221
, M.adjust (addCommand ("base", "\\hat{}")) '\94'
, M.adjust (addCommand ("base", "\\,")) '\8201'
, M.adjust (addCommand ("base", "\\:")) '\8287' ]
-- DO NOT ALTER
addCommand :: (Text, Text) -> Record -> Record
addCommand newcmd@(pkg,_) r@(filter ((/= pkg) . fst ) . commands -> cs) =
r {commands = newcmd : cs}
recordsMap :: [Record] -> M.Map Char Record
recordsMap records = M.fromList (map f records)
where
f r = (uchar r, r)
getSymbolType :: String -> TeXSymbolType
getSymbolType s =
case s of
"mathpunct" -> Pun
"mathord" -> Ord
"mathbin" -> Bin
"mathopen" -> Open
"mathclose" -> Close
"mathaccent" -> Accent
"mathfence" -> Fence
"mathover" -> TOver
"mathunder" -> TUnder
"mathbotaccent" -> BotAccent
"mathop" -> Op
"mathrel" -> Rel
"mathalpha" -> Alpha
"mathradical" -> Rad
_ -> Ord -- default to Ordinary
main :: IO ()
main = do
f <- readFile "unimathsymbols.txt"
let applyUpdates = foldr (.) id updates
let r = applyUpdates . recordsMap $ (either (error .show) id (parse document "" f))
let header = "records :: [Record]\nrecords =\n [ "
let footer = "]"
writeFile "UnicodeToLatex.hs" (header ++ concat (intersperse "\n , " (map show (map snd $ M.toAscList r))) ++ footer)
document :: Parser [Record]
document = do
skipMany comment
manyTill row eof
comment :: Parser String
comment = char '#' *> manyTill anyChar (char '\n')
row :: Parser Record
row = do
hex <- field
_ <- string "^^" <|> field
defcmd <- T.pack <$> field
unicmd <- T.pack <$> field
_uniclass <- field
texclass <- field
reqs <- filter (\z -> head z /= '-') . words <$> field
let reqs' = if null reqs then ["base"] else map T.pack reqs
(alts, comment) <- parseComment
let cmds = filter (\(_,x) -> not (T.null x)) $
zip reqs' (repeat defcmd) ++ alts ++
[("unicode-math", unicmd)]
return (Record (readHex hex) cmds (getSymbolType texclass) comment)
readHex :: String -> Char
readHex = fst . head . readLitChar . ("\\x" ++)
field :: Parser String
field = manyTill anyChar (char '^')
parseComment :: Parsec String () ([(Text, Text)], Text)
parseComment = (,) <$> (catMaybes <$> sepBy command (char ',')) <* optional (many $ char ' ') <*> (T.pack <$> manyTill anyChar (char '\n'))
command :: Parsec String () (Maybe (Text, Text))
command = do
first <- lookAhead anyChar
case first of
'='-> Just <$> cmd
'#'-> Just <$> cmd
'x'-> Nothing <$ skip
't'-> Nothing <$ skip
_ -> Nothing <$ return ()
cmd :: Parsec String () (Text, Text)
cmd = do
anyChar
optional spaces
alt <- many1 (noneOf " ,\t")
optional spaces
package <- option "" (between (char '(') (char ')')
(many1 (satisfy (/= ')'))))
optional (many $ char ' ')
let package' = if null package then "base" else package
return (T.pack package', T.pack alt)
skip :: Parsec String () ()
skip = try $ do
lookAhead (notFollowedBy (many (noneOf ",")) *> newline)
skipMany (satisfy (/= ','))
|