1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
|
/* Pango
* break-indic.c:
*
* Copyright (C) 2006 Red Hat Software
* Author: Akira TAGOH <tagoh@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "config.h"
#include "pango-break.h"
#define DEV_RRA 0x0931 /* 0930 + 093c */
#define DEV_QA 0x0958 /* 0915 + 093c */
#define DEV_YA 0x095F /* 092f + 003c */
#define DEV_KHHA 0x0959
#define DEV_GHHA 0x095A
#define DEV_ZA 0x095B
#define DEV_DDDHA 0x095C
#define DEV_RHA 0x095D
#define DEV_FA 0x095E
#define DEV_YYA 0x095F
/* Bengali */
/* for split matras in all brahmi based script */
#define BENGALI_SIGN_O 0x09CB /* 09c7 + 09be */
#define BENGALI_SIGN_AU 0x09CC /* 09c7 + 09d7 */
#define BENGALI_RRA 0x09DC
#define BENGALI_RHA 0x09DD
#define BENGALI_YYA 0x09DF
/* Gurumukhi */
#define GURUMUKHI_LLA 0x0A33
#define GURUMUKHI_SHA 0x0A36
#define GURUMUKHI_KHHA 0x0A59
#define GURUMUKHI_GHHA 0x0A5A
#define GURUMUKHI_ZA 0x0A5B
#define GURUMUKHI_RRA 0x0A5C
#define GURUMUKHI_FA 0x0A5E
/* Oriya */
#define ORIYA_AI 0x0B48
#define ORIYA_O 0x0B4B
#define ORIYA_AU 0x0B4C
/* Telugu */
#define TELUGU_EE 0x0C47
#define TELUGU_AI 0x0C48
/* Tamil */
#define TAMIL_O 0x0BCA
#define TAMIL_OO 0x0BCB
#define TAMIL_AU 0x0BCC
/* Kannada */
#define KNDA_EE 0x0CC7
#define KNDA_AI 0x0CC8
#define KNDA_O 0x0CCA
#define KNDA_OO 0x0CCB
/* Malayalam */
#define MLYM_O 0x0D4A
#define MLYM_OO 0x0D4B
#define MLYM_AU 0x0D4C
#define IS_COMPOSITE_WITH_BRAHMI_NUKTA(c) ( \
(c >= BENGALI_RRA && c <= BENGALI_YYA) || \
(c >= DEV_QA && c <= DEV_YA) || (c == DEV_RRA) || (c >= DEV_KHHA && c <= DEV_YYA) || \
(c >= KNDA_EE && c <= KNDA_AI) ||(c >= KNDA_O && c <= KNDA_OO) || \
(c == TAMIL_O) || (c == TAMIL_OO) || (c == TAMIL_AU) || \
(c == TELUGU_EE) || (c == TELUGU_AI) || \
(c == ORIYA_AI) || (c == ORIYA_O) || (c == ORIYA_AU) || \
(c >= GURUMUKHI_KHHA && c <= GURUMUKHI_RRA) || (c == GURUMUKHI_FA)|| (c == GURUMUKHI_LLA)|| (c == GURUMUKHI_SHA) || \
FALSE)
#define IS_SPLIT_MATRA_BRAHMI(c) ( \
(c == BENGALI_SIGN_O) || (c == BENGALI_SIGN_AU) || \
(c >= MLYM_O && c <= MLYM_AU) || \
FALSE)
static void
not_cursor_position (PangoLogAttr *attr)
{
if (!attr->is_mandatory_break)
{
attr->is_cursor_position = FALSE;
attr->is_char_break = FALSE;
attr->is_line_break = FALSE;
attr->is_mandatory_break = FALSE;
}
}
static void
break_indic (const char *text,
int length,
const PangoAnalysis *analysis,
PangoLogAttr *attrs,
int attrs_len G_GNUC_UNUSED)
{
const gchar *p, *next = NULL, *next_next;
gunichar prev_wc, this_wc, next_wc, next_next_wc;
gboolean is_conjunct = FALSE;
int i;
for (p = text, prev_wc = 0, i = 0;
p != NULL && p < (text + length);
p = next, prev_wc = this_wc, i++)
{
this_wc = g_utf8_get_char (p);
next = g_utf8_next_char (p);
if (G_UNLIKELY (
IS_COMPOSITE_WITH_BRAHMI_NUKTA(this_wc) || IS_SPLIT_MATRA_BRAHMI(this_wc))) {
attrs[i+1].backspace_deletes_character = FALSE;
}
if (next != NULL && next < (text + length))
{
next_wc = g_utf8_get_char (next);
next_next = g_utf8_next_char (next);
}
else
{
next_wc = 0;
next_next = NULL;
}
if (next_next != NULL && next_next < (text + length))
next_next_wc = g_utf8_get_char (next_next);
else
next_next_wc = 0;
switch (analysis->script)
{
case PANGO_SCRIPT_SINHALA:
/*
* TODO: The cursor position should be based on the state table.
* This is the wrong place to be doing this.
*/
/*
* The cursor should treat as a single glyph:
* SINHALA CONS + 0x0DCA + 0x200D + SINHALA CONS
* SINHALA CONS + 0x200D + 0x0DCA + SINHALA CONS
*/
if ((this_wc == 0x0DCA && next_wc == 0x200D)
|| (this_wc == 0x200D && next_wc == 0x0DCA))
{
not_cursor_position(&attrs[i]);
not_cursor_position(&attrs[i + 1]);
is_conjunct = TRUE;
}
else if (is_conjunct
&& (prev_wc == 0x200D || prev_wc == 0x0DCA)
&& this_wc >= 0x0D9A
&& this_wc <= 0x0DC6)
{
not_cursor_position(&attrs[i]);
is_conjunct = FALSE;
}
/*
* Consonant clusters do NOT result in implicit conjuncts
* in SINHALA orthography.
*/
else if (!is_conjunct && prev_wc == 0x0DCA && this_wc != 0x200D)
{
attrs[i].is_cursor_position = TRUE;
}
break;
default:
if (prev_wc != 0 && (this_wc == 0x200D || this_wc == 0x200C))
{
not_cursor_position(&attrs[i]);
if (next_wc != 0)
{
not_cursor_position(&attrs[i+1]);
if ((next_next_wc != 0) &&
(next_wc == 0x09CD || /* Bengali */
next_wc == 0x0ACD || /* Gujarati */
next_wc == 0x094D || /* Hindi */
next_wc == 0x0CCD || /* Kannada */
next_wc == 0x0D4D || /* Malayalam */
next_wc == 0x0B4D || /* Oriya */
next_wc == 0x0A4D || /* Punjabi */
next_wc == 0x0BCD || /* Tamil */
next_wc == 0x0C4D)) /* Telugu */
{
not_cursor_position(&attrs[i+2]);
}
}
}
break;
}
}
}
|