1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
{-# LANGUAGE ForeignFunctionInterface #-}
-- |
-- Module : Data.Text.ICU.BiDi
-- Copyright : (c) 2018 Ondrej Palkovsky
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : GHC
--
-- Implementation of the Unicode Bidirectional Algorithm. See the documentation
-- of the libicu library for additional details.
--
-- -- /Note/: this module is not thread safe. /Do not/ call the
-- functions on one BiDi object from more than one thread!
module Data.Text.ICU.BiDi
(
BiDi
-- ** Basic functions
, open
, openSized
-- ** Set data
, setPara
, setLine
-- ** Access the BiDi object
, countParagraphs
, getParagraphByIndex
, getProcessedLength
-- ** Output text
, writeReordered
, WriteOption(..)
-- ** High-level functions
, reorderParagraphs
) where
#include <unicode/ubidi.h>
import Data.Text.ICU.BiDi.Internal
import Foreign.Marshal.Utils (with)
import Foreign.Storable (peek)
import Foreign.Ptr (FunPtr, Ptr)
import Data.Int (Int32, Int16)
import Data.Text.ICU.Error.Internal (UErrorCode, handleError, handleOverflowError)
import Data.Text (Text)
import Data.Text.ICU.Internal (UChar, useAsUCharPtr, fromUCharPtr, newICUPtr)
import Foreign.C.Types (CInt(..))
import Data.List (foldl')
import Data.Bits ((.|.))
import System.IO.Unsafe (unsafePerformIO)
import Data.Traversable (for)
-- | Allocate a BiDi structure.
open :: IO BiDi
open = newICUPtr BiDi ubidi_close ubidi_open
-- | Allocate a BiDi structure with preallocated memory for internal structures.
openSized ::
Int32 -- ^ is the maximum text or line length that internal memory will be preallocated for.
-- An attempt to associate this object with a longer text will fail, unless this value is 0.
-> Int32 -- ^ is the maximum anticipated number of same-level runs that internal memory will be preallocated for.
-- An attempt to access visual runs on an object that was not preallocated for as many runs as the text was actually resolved to will fail, unless this value is 0.
-> IO BiDi
openSized maxlen maxruncount =
newICUPtr BiDi ubidi_close $ handleError (ubidi_openSized maxlen maxruncount)
-- | Perform the Unicode Bidi algorithm. It is defined in the Unicode Standard Annex #9, version 13,
-- also described in The Unicode Standard, Version 4.0.
-- This function takes a piece of plain text containing one or more paragraphs,
-- with or without externally specified embedding levels from styled text and
-- computes the left-right-directionality of each character.
setPara ::
BiDi
-> Text
-> Int32 -- ^ specifies the default level for the text; it is typically 0 (LTR) or 1 (RTL)
-> IO ()
setPara bidi t paraLevel =
withBiDi bidi $ \bptr ->
useAsUCharPtr t $ \sptr slen -> handleError (ubidi_setPara bptr sptr (fromIntegral slen) paraLevel)
-- | Sets a BiDi to contain the reordering information, especially the resolved levels,
-- for all the characters in a line of text
setLine ::
BiDi -- ^ the parent paragraph object. It must have been set by a successful call to 'setPara'.
-> Int32 -- ^ is the line's first index into the text
-> Int32 -- ^ is just behind the line's last index into the text (its last index +1).
-> BiDi -- ^ is the object that will now represent a line of the text
-> IO ()
setLine paraBidi start limit lineBidi =
withBiDi paraBidi $ \paraptr ->
withBiDi lineBidi $ \lineptr ->
handleError (ubidi_setLine paraptr start limit lineptr)
-- | Get the number of paragraphs.
countParagraphs :: BiDi -> IO Int32
countParagraphs bidi = withBiDi bidi ubidi_countParagraphs
-- | Get a paragraph, given the index of this paragraph.
getParagraphByIndex ::
BiDi
-> Int32 -- ^ is the number of the paragraph, in the range [0..ubidi_countParagraphs(pBiDi)-1].
-> IO (Int32, Int32) -- ^ index of the first character of the paragraph in the text and limit of the paragraph
getParagraphByIndex bidi paraIndex =
withBiDi bidi $ \bptr ->
with 0 $ \pstart ->
with 0 $ \pend -> do
handleError (ubidi_getParagraphByIndex bptr paraIndex pstart pend)
(,) <$> (fromIntegral <$> peek pstart)
<*> (fromIntegral <$> peek pend)
-- | Get the length of the source text processed by the last call to 'setPara'.
getProcessedLength :: BiDi -> IO Int32
getProcessedLength bidi = withBiDi bidi ubidi_getProcessedLength
data WriteOption =
DoMirroring
-- ^ replace characters with the "mirrored" property in RTL runs by their mirror-image mappings
| InsertLrmForNumeric
-- ^ surround the run with LRMs if necessary; this is part of the approximate "inverse Bidi" algorithm
| KeepBaseCombining
-- ^ keep combining characters after their base characters in RTL runs
| OutputReverse
-- ^ write the output in reverse order
| RemoveBidiControls
-- ^ remove Bidi control characters (this does not affect InsertLrmForNumeric)
deriving (Show)
reduceWriteOpts :: [WriteOption] -> Int16
reduceWriteOpts = foldl' orO 0
where a `orO` b = a .|. fromWriteOption b
fromWriteOption :: WriteOption -> Int16
fromWriteOption DoMirroring = #const UBIDI_DO_MIRRORING
fromWriteOption InsertLrmForNumeric = #const UBIDI_INSERT_LRM_FOR_NUMERIC
fromWriteOption KeepBaseCombining = #const UBIDI_KEEP_BASE_COMBINING
fromWriteOption OutputReverse = #const UBIDI_OUTPUT_REVERSE
fromWriteOption RemoveBidiControls = #const UBIDI_REMOVE_BIDI_CONTROLS
-- | Take a BiDi object containing the reordering information for a piece of text
-- (one or more paragraphs) set by 'setPara' or for a line of text set by 'setLine'
-- and write a reordered string to the destination buffer.
writeReordered :: BiDi -> [WriteOption] -> IO Text
writeReordered bidi opts = do
destLen <- getProcessedLength bidi
let options' = reduceWriteOpts opts
withBiDi bidi $ \bptr ->
handleOverflowError (fromIntegral destLen)
(\dptr dlen -> ubidi_writeReordered bptr dptr (fromIntegral dlen) options')
(\dptr dlen -> fromUCharPtr dptr (fromIntegral dlen))
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_open" ubidi_open
:: IO (Ptr UBiDi)
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_openSized" ubidi_openSized
:: Int32 -> Int32 -> Ptr UErrorCode -> IO (Ptr UBiDi)
foreign import ccall unsafe "hs_text_icu.h &__hs_ubidi_close" ubidi_close
:: FunPtr (Ptr UBiDi -> IO ())
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_setPara" ubidi_setPara
:: Ptr UBiDi -> Ptr UChar -> Int32 -> Int32 -> Ptr UErrorCode -> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_countParagraphs" ubidi_countParagraphs
:: Ptr UBiDi -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_getParagraphByIndex" ubidi_getParagraphByIndex
:: Ptr UBiDi -> Int32 -> Ptr CInt -> Ptr CInt -> Ptr UErrorCode -> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_getProcessedLength" ubidi_getProcessedLength
:: Ptr UBiDi -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_writeReordered" ubidi_writeReordered
:: Ptr UBiDi -> Ptr UChar -> Int32 -> Int16 -> Ptr UErrorCode -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubidi_setLine" ubidi_setLine
:: Ptr UBiDi -> Int32 -> Int32 -> Ptr UBiDi -> Ptr UErrorCode -> IO ()
-- | Helper function to reorder a text to a series of paragraphs.
reorderParagraphs :: [WriteOption] -> Text -> [Text]
reorderParagraphs options input =
unsafePerformIO $ do
bidi <- open
setPara bidi input 0
pcount <- countParagraphs bidi
lineBidi <- open
for [0..pcount-1] $ \pidx -> do
(start,limit) <- getParagraphByIndex bidi pidx
setLine bidi start limit lineBidi
writeReordered lineBidi options
|