1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
|
/*
* Authors:
* - Assem Chelli, < assem [dot] ch [at] gmail >
* - Abdelkrim Aries <ab [underscore] aries [at] esi [dot] dz>
*
*/
stringescapes { }
/* the Arabic letters in Unicode */
// Hamza
stringdef o hex '621' // Hamza
stringdef ao hex '623' // Hamza above Alef
stringdef ao_ hex '625' // Hamza below Alef
stringdef a~ hex '622' // Alef madda
stringdef wo hex '624' // Hamza above waw
stringdef yo hex '626' // Hamza above yeh
// Letters
stringdef a hex '627' // Alef
stringdef a_ hex '649' // Alef Maksura
stringdef b hex '628' // Beh
stringdef t_ hex '629' // Teh_Marbuta
stringdef t hex '62a' // Teh
stringdef th hex '62b' // Theh
stringdef j hex '62c' // Jeem
stringdef h hex '62d' // Hah
stringdef x hex '62e' // Khah
stringdef d hex '62f' // Dal
stringdef dz hex '630' // Thal
stringdef r hex '631' // Reh
stringdef z hex '632' // Zain
stringdef s hex '633' // Seen
stringdef sh hex '634' // Sheen
stringdef c hex '635' // Sad
stringdef dh hex '636' // Dad
stringdef tt hex '637' // Tah
stringdef zh hex '638' // Zah
stringdef i hex '639' // Ain
stringdef gh hex '63a' // Ghain
stringdef f hex '641' // Feh
stringdef q hex '642' // Qaf
stringdef k hex '643' // Kaf
stringdef l hex '644' // Lam
stringdef m hex '645' // Meem
stringdef n hex '646' // Noon
stringdef e hex '647' // Heh
stringdef w hex '648' // Waw
stringdef y hex '64a' // Yeh
// Diacritics
stringdef aan hex '64b' // FatHatan
stringdef uun hex '64c' // Dammatan
stringdef iin hex '64d' // Kasratan
stringdef aa hex '64e' // FatHa
stringdef uu hex '64f' // Damma
stringdef ii hex '650' // Kasra
stringdef oo hex '652' // Sukun
stringdef ~ hex '651' // Shadda
// Hindu–Arabic numerals
stringdef 0 hex '0660'
stringdef 1 hex '0661'
stringdef 2 hex '0662'
stringdef 3 hex '0663'
stringdef 4 hex '0664'
stringdef 5 hex '0665'
stringdef 6 hex '0666'
stringdef 7 hex '0667'
stringdef 8 hex '0668'
stringdef 9 hex '0669'
stringdef % hex '066a' // PERCENT
stringdef . hex '066b' // DECIMAL
stringdef ' hex '066c' // THOUSANDS
// Kasheeda
stringdef _ hex '640' // Kasheeda, Tatweel
// Ponctuation marks
stringdef , hex '060C' // COMMA
stringdef ; hex '061B' // SEMICOLON
stringdef ? hex '061F' // QUESTION
// Shaped forms
stringdef o1 hex 'fe80' // HAMZA
stringdef ao1 hex 'fe83' // ALEF_HAMZA_ABOVE
stringdef ao2 hex 'fe84' // ALEF_HAMZA_ABOVE
stringdef ao_1 hex 'fe87' // ALEF_HAMZA_BELOW
stringdef ao_2 hex 'fe88' // ALEF_HAMZA_BELOW
stringdef yo1 hex 'fe8b' // YEH_HAMZA
stringdef yo2 hex 'fe8c' // YEH_HAMZA
stringdef yo3 hex 'fe89' // YEH_HAMZA
stringdef yo4 hex 'fe8a' // YEH_HAMZA
stringdef a~1 hex 'fe81' // ALEF_MADDA
stringdef a~2 hex 'fe82' // ALEF_MADDA
stringdef wo1 hex 'fe85' // WAW_HAMZA
stringdef wo2 hex 'fe86' // WAW_HAMZA
stringdef a1 hex 'fe8d' // ALEF
stringdef a2 hex 'fe8e' // ALEF
stringdef b1 hex 'fe8f' // BEH
stringdef b2 hex 'fe90' // BEH
stringdef b3 hex 'fe91' // BEH
stringdef b4 hex 'fe92' // BEH
stringdef t_1 hex 'fe93' // TEH_MARBUTA
stringdef t_2 hex 'fe94' // TEH_MARBUTA
stringdef t1 hex 'fe97' // TEH
stringdef t2 hex 'fe98' // TEH
stringdef t3 hex 'fe95' // TEH
stringdef t4 hex 'fe96' // TEH
stringdef th1 hex 'fe9b' // THEH
stringdef th2 hex 'fe9c' // THEH
stringdef th3 hex 'fe9a' // THEH
stringdef th4 hex 'fe99' // THEH
stringdef j1 hex 'fe9f' // JEEM
stringdef j2 hex 'fea0' // JEEM
stringdef j3 hex 'fe9d' // JEEM
stringdef j4 hex 'fe9e' // JEEM
stringdef h1 hex 'fea3' // HAH
stringdef h2 hex 'fea4' // HAH
stringdef h3 hex 'fea1' // HAH
stringdef h4 hex 'fea2' // HAH
stringdef x1 hex 'fea7' // KHAH
stringdef x2 hex 'fea8' // KHAH
stringdef x3 hex 'fea5' // KHAH
stringdef x4 hex 'fea6' // KHAH
stringdef d1 hex 'fea9' // DAL
stringdef d2 hex 'feaa' // DAL
stringdef dz1 hex 'feab' // THAL
stringdef dz2 hex 'feac' // THAL
stringdef r1 hex 'fead' // REH
stringdef r2 hex 'feae' // REH
stringdef z1 hex 'feaf' // ZAIN
stringdef z2 hex 'feb0' // ZAIN
stringdef s1 hex 'feb3' // SEEN
stringdef s2 hex 'feb4' // SEEN
stringdef s3 hex 'feb1' // SEEN
stringdef s4 hex 'feb2' // SEEN
stringdef sh1 hex 'feb7' // SHEEN
stringdef sh2 hex 'feb8' // SHEEN
stringdef sh3 hex 'feb5' // SHEEN
stringdef sh4 hex 'feb6' // SHEEN
stringdef c1 hex 'febb' // SAD
stringdef c2 hex 'febc' // SAD
stringdef c3 hex 'feb9' // SAD
stringdef c4 hex 'feba' // SAD
stringdef dh1 hex 'febf' // DAD
stringdef dh2 hex 'fec0' // DAD
stringdef dh3 hex 'febd' // DAD
stringdef dh4 hex 'febe' // DAD
stringdef tt1 hex 'fec3' // TAH
stringdef tt2 hex 'fec4' // TAH
stringdef tt3 hex 'fec1' // TAH
stringdef tt4 hex 'fec2' // TAH
stringdef zh1 hex 'fec7' // ZAH
stringdef zh2 hex 'fec8' // ZAH
stringdef zh3 hex 'fec5' // ZAH
stringdef zh4 hex 'fec6' // ZAH
stringdef i1 hex 'fecb' // AIN
stringdef i2 hex 'fecc' // AIN
stringdef i3 hex 'fec9' // AIN
stringdef i4 hex 'feca' // AIN
stringdef gh1 hex 'fecf' // GHAIN
stringdef gh2 hex 'fed0' // GHAIN
stringdef gh3 hex 'fecd' // GHAIN
stringdef gh4 hex 'fece' // GHAIN
stringdef f1 hex 'fed3' // FEH
stringdef f2 hex 'fed4' // FEH
stringdef f3 hex 'fed1' // FEH
stringdef f4 hex 'fed2' // FEH
stringdef q1 hex 'fed7' // QAF
stringdef q2 hex 'fed8' // QAF
stringdef q3 hex 'fed5' // QAF
stringdef q4 hex 'fed6' // QAF
stringdef k1 hex 'fedb' // KAF
stringdef k2 hex 'fedc' // KAF
stringdef k3 hex 'fed9' // KAF
stringdef k4 hex 'feda' // KAF
stringdef l1 hex 'fedf' // LAM
stringdef l2 hex 'fee0' // LAM
stringdef l3 hex 'fedd' // LAM
stringdef l4 hex 'fede' // LAM
stringdef m1 hex 'fee3' // MEEM
stringdef m2 hex 'fee4' // MEEM
stringdef m3 hex 'fee1' // MEEM
stringdef m4 hex 'fee2' // MEEM
stringdef n1 hex 'fee7' // NOON
stringdef n2 hex 'fee8' // NOON
stringdef n3 hex 'fee5' // NOON
stringdef n4 hex 'fee6' // NOON
stringdef e1 hex 'feeb' // HEH
stringdef e2 hex 'feec' // HEH
stringdef e3 hex 'fee9' // HEH
stringdef e4 hex 'feea' // HEH
stringdef w1 hex 'feed' // WAW
stringdef w2 hex 'feee' // WAW
stringdef a_1 hex 'feef' // ALEF_MAKSURA
stringdef a_2 hex 'fef0' // ALEF_MAKSURA
stringdef y1 hex 'fef3' // YEH
stringdef y2 hex 'fef4' // YEH
stringdef y3 hex 'fef1' // YEH
stringdef y4 hex 'fef2' // YEH
// Ligatures Lam-Alef
stringdef la hex 'fefb' // LAM_ALEF
stringdef la2 hex 'fefc' // LAM_ALEF
stringdef lao hex 'fef7' // LAM_ALEF_HAMZA_ABOVE
stringdef lao2 hex 'fef8' // LAM_ALEF_HAMZA_ABOVE
stringdef lao_ hex 'fef9' // LAM_ALEF_HAMZA_BELOW
stringdef lao_2 hex 'fefa' // LAM_ALEF_HAMZA_BELOW
stringdef la~ hex 'fef5' // LAM_ALEF_MADDA_ABOVE
stringdef la~2 hex 'fef6' // LAM_ALEF_MADDA_ABOVE
integers (
word_len
)
booleans (
is_noun
is_verb
is_defined
)
routines (
Prefix_Step1
Prefix_Step2
Prefix_Step3a_Noun
Prefix_Step3b_Noun
Prefix_Step3_Verb
Prefix_Step4_Verb
Suffix_All_alef_maqsura
Suffix_Noun_Step1a
Suffix_Noun_Step1b
Suffix_Noun_Step2a
Suffix_Noun_Step2b
Suffix_Noun_Step2c1
Suffix_Noun_Step2c2
Suffix_Noun_Step3
Suffix_Verb_Step1
Suffix_Verb_Step2a
Suffix_Verb_Step2b
Suffix_Verb_Step2c
Normalize_post
Normalize_pre
Checks1
)
externals ( stem )
groupings ( )
// Normalizations
define Normalize_pre as (
loop len (
(
[substring] among (
'{aan}' '{uun}' '{iin}' '{aa}' '{uu}' '{ii}' '{oo}' '{~}'( delete ) // strip vocalization
'{_}' ( delete ) // strip kasheeda
// Ponctuation marks
'.' ',' ';' ':' '?' '!' '/' '*' '%' '\' '"' ( delete) // General
'{,}' '{;}' '{?}' ( delete ) // Arabic-specific
// Hindu–Arabic numerals
'{0}' ( <- '0')
'{1}' ( <- '1')
'{2}' ( <- '2')
'{3}' ( <- '3')
'{4}' ( <- '4')
'{5}' ( <- '5')
'{6}' ( <- '6')
'{7}' ( <- '7')
'{8}' ( <- '8')
'{9}' ( <- '9')
'{%}' '{.}' '{'}' ( delete )
// Shaped forms
'{o1}' ( <- '{o}' ) // HAMZA
'{ao1}' '{ao2}' ( <- '{ao}' ) // ALEF_HAMZA_ABOVE
'{ao_1}' '{ao_2}' ( <- '{ao_}' ) // ALEF_HAMZA_BELOW
'{yo1}' '{yo2}' '{yo3}' '{yo4}' ( <- '{yo}' ) // YEH_HAMZA
'{a~1}' '{a~2}'( <- '{a~}' ) // ALEF_MADDA
'{wo1}' '{wo2}'( <- '{wo}' ) // WAW_HAMZA
'{a1}' '{a2}' ( <- '{a}' ) // ALEF
'{b1}' '{b2}' '{b3}' '{b4}' ( <- '{b}' ) // BEH
'{t_1}' '{t_2}' ( <- '{t_}' ) // TEH_MARBUTA
'{t1}' '{t2}' '{t3}' '{t4}' ( <- '{t}' ) // TEH
'{th1}' '{th2}' '{th3}' '{th4}' ( <- '{th}' ) // THEH
'{j1}' '{j2}' '{j3}' '{j4}'( <- '{j}' ) // JEEM
'{h1}' '{h2}' '{h3}' '{h4}' ( <- '{h}' ) // HAH
'{x1}' '{x2}' '{x3}' '{x4}'( <- '{x}' ) // KHAH
'{d1}' '{d2}' ( <- '{d}' ) // DAL
'{dz1}''{dz2}' ( <- '{dz}' ) // THAL
'{r1}' '{r2}'( <- '{r}' ) // REH
'{z1}' '{z2}' ( <- '{z}' ) // ZAIN
'{s1}' '{s2}' '{s3}' '{s4}'( <- '{s}' ) // SEEN
'{sh1}' '{sh2}' '{sh3}' '{sh4}' ( <- '{sh}' ) // SHEEN
'{c1}' '{c2}' '{c3}' '{c4}'( <- '{c}' ) // SAD
'{dh1}' '{dh2}' '{dh3}' '{dh4}'( <- '{dh}' ) // DAD
'{tt1}' '{tt2}' '{tt3}' '{tt4}' ( <- '{tt}' ) // TAH
'{zh1}' '{zh2}' '{zh3}' '{zh4}'( <- '{zh}' ) // ZAH
'{i1}' '{i2}' '{i3}' '{i4}'( <- '{i}' ) // AIN
'{gh1}' '{gh2}' '{gh3}' '{gh4}'( <- '{gh}' ) // GHAIN
'{f1}' '{f2}' '{f3}' '{f4}' ( <- '{f}' ) // FEH
'{q1}' '{q2}' '{q3}' '{q4}' ( <- '{q}' ) // QAF
'{k1}' '{k2}' '{k3}' '{k4}'( <- '{k}' ) // KAF
'{l1}' '{l2}' '{l3}' '{l4}'( <- '{l}' ) // LAM
'{m1}' '{m2}' '{m3}' '{m4}' ( <- '{m}' ) // MEEM
'{n1}' '{n2}' '{n3}' '{n4}'( <- '{n}' ) // NOON
'{e1}' '{e2}' '{e3}' '{e4}' ( <- '{e}' ) // HEH
'{w1}' '{w2}' ( <- '{w}' ) // WAW
'{a_1}' '{a_2}' ( <- '{a_}' ) // ALEF_MAKSURA
'{y1}' '{y2}' '{y3}' '{y4}' ( <- '{y}' ) // YEH
// Ligatures Lam-Alef
'{la}' '{la2}' (<- '{l}{a}')
'{lao}' '{lao2}' (<- '{l}{ao}')
'{lao_}' '{lao_2}' (<- '{l}{ao_}')
'{la~}' '{la~2}' (<- '{l}{a~}')
)
)
or
next
)
)
define Normalize_post as (
do (
// normalize last hamza
backwards (
[substring] among (
'{ao}''{ao_}' '{a~}' ( <- '{o}')
'{wo}' ( <- '{o}')
'{yo}' ( <- '{o}')
)
)
)
do loop word_len (
(
// normalize other hamza's
[substring] among (
'{ao}''{ao_}' '{a~}' ( <- '{a}')
'{wo}' ( <- '{w}')
'{yo}' ( <- '{y}')
)
)
or
next
)
)
// Checks
define Checks1 as (
$word_len = len
[substring] among (
'{b}{a}{l}' '{k}{a}{l}' ($word_len > 4 set is_noun unset is_verb set is_defined)
'{l}{l}' '{a}{l}' ($word_len > 3 set is_noun unset is_verb set is_defined)
)
)
//prefixes
define Prefix_Step1 as (
$word_len = len
[substring] among (
'{ao}{ao}' ($word_len > 3 <- '{ao}' )
'{ao}{a~}' ($word_len > 3 <- '{a~}' )
'{ao}{wo}' ($word_len > 3 <- '{ao}' )
'{ao}{a}' ($word_len > 3 <- '{a}' )
'{ao}{ao_}' ($word_len > 3 <- '{ao_}' )
// '{ao}' ($word_len > 3 delete) //rare case
)
)
define Prefix_Step2 as (
$word_len = len
not '{f}{a}'
not '{w}{a}'
[substring] among (
'{f}' ($word_len > 3 delete)
'{w}' ($word_len > 3 delete)
)
)
define Prefix_Step3a_Noun as ( // it is noun and defined
$word_len = len
[substring] among (
'{b}{a}{l}' '{k}{a}{l}' ($word_len > 5 delete)
'{l}{l}' '{a}{l}' ($word_len > 4 delete)
)
)
define Prefix_Step3b_Noun as ( // probably noun and defined
$word_len = len
not '{b}{a}' // exception
[substring] among (
'{b}' ($word_len > 3 delete)
// '{k}' '{l}' ($word_len > 3 delete) // BUG: cause confusion
'{b}{b}' ($word_len > 3 <- '{b}' )
'{k}{k}' ($word_len > 3 <- '{k}' )
)
)
define Prefix_Step3_Verb as (
$word_len = len
[substring] among (
//'{s}' ($word_len > 4 delete)// BUG: cause confusion
'{s}{y}' ($word_len > 4 <- '{y}' )
'{s}{t}' ($word_len > 4 <- '{t}')
'{s}{n}' ($word_len > 4 <- '{n}')
'{s}{ao}' ($word_len > 4 <- '{ao}')
)
)
define Prefix_Step4_Verb as (
$word_len = len
[substring] among (
'{y}{s}{t}' '{n}{s}{t}' '{t}{s}{t}' ($word_len > 4 set is_verb unset is_noun <- '{a}{s}{t}' )
)
)
// suffixes
backwardmode (
define Suffix_Noun_Step1a as (
$word_len = len
[substring] among (
'{y}' '{k}' '{e}' ($word_len >= 4 delete)
'{n}{a}' '{k}{m}' '{e}{a}' '{e}{n}' '{e}{m}' ($word_len >= 5 delete)
'{k}{m}{a}' '{e}{m}{a}' ($word_len >= 6 delete)
)
)
define Suffix_Noun_Step1b as (
$word_len = len
[substring] among (
'{n}' ($word_len > 5 delete)
)
)
define Suffix_Noun_Step2a as (
$word_len = len
[substring] among (
'{a}' '{y}' '{w}' ($word_len > 4 delete)
)
)
define Suffix_Noun_Step2b as (
$word_len = len
[substring] among (
'{a}{t}' ($word_len >= 5 delete)
)
)
define Suffix_Noun_Step2c1 as (
$word_len = len
[substring] among (
'{t}' ($word_len >= 4 delete)
)
)
define Suffix_Noun_Step2c2 as ( // feminine t_
$word_len = len
[substring] among (
'{t_}' ($word_len >= 4 delete)
)
)
define Suffix_Noun_Step3 as ( // ya' nisbiya
$word_len = len
[substring] among (
'{y}' ($word_len >= 3 delete)
)
)
define Suffix_Verb_Step1 as (
$word_len = len
[substring] among (
'{e}' '{k}' ($word_len >= 4 delete)
'{n}{y}' '{n}{a}' '{e}{a}' '{e}{m}' '{e}{n}' '{k}{m}' '{k}{n}' ($word_len >= 5 delete)
'{e}{m}{a}' '{k}{m}{a}' '{k}{m}{w}'($word_len >= 6 delete)
)
)
define Suffix_Verb_Step2a as (
$word_len = len
[substring] among (
'{t}' ($word_len >= 4 delete)
'{a}' '{n}' '{y}' ($word_len >= 4 delete)
'{n}{a}' '{t}{a}' '{t}{n}' ($word_len >= 5 delete)// past
'{a}{n}' '{w}{n}' '{y}{n}' ($word_len > 5 delete) // present
'{t}{m}{a}' ($word_len >= 6 delete)
)
)
define Suffix_Verb_Step2b as (
$word_len = len
[substring] among (
'{w}{a}' '{t}{m}' ($word_len >= 5 delete) // len >= 5
)
)
define Suffix_Verb_Step2c as (
$word_len = len
[substring] among (
'{w}' ($word_len >= 4 delete)
'{t}{m}{w}' ($word_len >= 6 delete)
)
)
define Suffix_All_alef_maqsura as (
$word_len = len
[substring] among (
'{a_}' ( <- '{y}' ) // spell error
// '{a_}' ( delete ) // if noun > 3
// '{a_}' ( <- '{a}') // if verb
)
)
)
define stem as (
// set initial values
set is_noun
set is_verb
unset is_defined
// guess type and properties
do Checks1
// normalization pre-stemming
do Normalize_pre
backwards (
do (
//Suffixes for verbs
(
is_verb
(
(
(atleast 1 Suffix_Verb_Step1)
( Suffix_Verb_Step2a or Suffix_Verb_Step2c or next)
)
or Suffix_Verb_Step2b
or Suffix_Verb_Step2a
)
)
//Suffixes for nouns
or (
is_noun
(
try (
Suffix_Noun_Step2c2
or (not is_defined Suffix_Noun_Step1a (
Suffix_Noun_Step2a
or Suffix_Noun_Step2b
or Suffix_Noun_Step2c1
or next))
or (Suffix_Noun_Step1b (
Suffix_Noun_Step2a
or Suffix_Noun_Step2b
or Suffix_Noun_Step2c1))
or (not is_defined Suffix_Noun_Step2a)
or (Suffix_Noun_Step2b)
)
Suffix_Noun_Step3
)
)
// Suffixes for alef maqsura
or Suffix_All_alef_maqsura
)
)
//Prefixes
do (
try Prefix_Step1
try Prefix_Step2
( Prefix_Step3a_Noun
or (is_noun Prefix_Step3b_Noun)
or (is_verb try Prefix_Step3_Verb Prefix_Step4_Verb)
)
)
// normalization post-stemming
do Normalize_post
)
|