File: arabic.sbl

package info (click to toggle)
rust-rust-stemmers 1.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,832 kB
  • sloc: makefile: 4
file content (597 lines) | stat: -rw-r--r-- 18,854 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
/*
 * Authors:
 * - Assem Chelli, < assem [dot] ch [at] gmail >
 * - Abdelkrim Aries <ab [underscore] aries [at] esi [dot] dz>
 *
*/

stringescapes { }

/* the Arabic letters in Unicode */
// Hamza
stringdef o     hex '621' // Hamza
stringdef ao    hex '623' // Hamza above Alef
stringdef ao_   hex '625' // Hamza below Alef
stringdef a~    hex '622' // Alef madda
stringdef wo    hex '624' // Hamza above waw
stringdef yo    hex '626' // Hamza above yeh

// Letters
stringdef a     hex '627' // Alef
stringdef a_    hex '649' // Alef Maksura
stringdef b     hex '628' // Beh
stringdef t_    hex '629' // Teh_Marbuta
stringdef t     hex '62a' // Teh
stringdef th    hex '62b' // Theh
stringdef j     hex '62c' // Jeem
stringdef h     hex '62d' // Hah
stringdef x     hex '62e' // Khah
stringdef d     hex '62f' // Dal
stringdef dz    hex '630' // Thal
stringdef r     hex '631' // Reh
stringdef z     hex '632' // Zain
stringdef s     hex '633' // Seen
stringdef sh    hex '634' // Sheen
stringdef c     hex '635' // Sad
stringdef dh    hex '636' // Dad
stringdef tt    hex '637' // Tah
stringdef zh    hex '638' // Zah
stringdef i     hex '639' // Ain
stringdef gh    hex '63a' // Ghain
stringdef f     hex '641' // Feh
stringdef q     hex '642' // Qaf
stringdef k     hex '643' // Kaf
stringdef l     hex '644' // Lam
stringdef m     hex '645' // Meem
stringdef n     hex '646' // Noon
stringdef e     hex '647' // Heh
stringdef w     hex '648' // Waw
stringdef y     hex '64a' // Yeh

// Diacritics
stringdef aan   hex '64b' // FatHatan
stringdef uun   hex '64c' // Dammatan
stringdef iin   hex '64d' // Kasratan
stringdef aa    hex '64e' // FatHa
stringdef uu    hex '64f' // Damma
stringdef ii    hex '650' // Kasra
stringdef oo    hex '652' // Sukun
stringdef ~     hex '651' // Shadda

// Hindu–Arabic numerals
stringdef 0     hex '0660'
stringdef 1     hex '0661'
stringdef 2     hex '0662'
stringdef 3     hex '0663'
stringdef 4     hex '0664'
stringdef 5     hex '0665'
stringdef 6     hex '0666'
stringdef 7     hex '0667'
stringdef 8     hex '0668'
stringdef 9     hex '0669'
stringdef %     hex '066a'  // PERCENT
stringdef .     hex '066b'  // DECIMAL
stringdef '     hex '066c'  // THOUSANDS

// Kasheeda
stringdef _     hex '640' // Kasheeda, Tatweel

// Ponctuation marks
stringdef ,   hex '060C' // COMMA
stringdef ;   hex '061B' // SEMICOLON
stringdef ?   hex '061F' // QUESTION

// Shaped forms
stringdef o1     hex  'fe80'  // HAMZA
stringdef ao1    hex  'fe83'  // ALEF_HAMZA_ABOVE
stringdef ao2    hex  'fe84'  // ALEF_HAMZA_ABOVE
stringdef ao_1   hex  'fe87'  // ALEF_HAMZA_BELOW
stringdef ao_2   hex  'fe88'  // ALEF_HAMZA_BELOW
stringdef yo1    hex  'fe8b'  // YEH_HAMZA
stringdef yo2    hex  'fe8c'  // YEH_HAMZA
stringdef yo3    hex  'fe89'  // YEH_HAMZA
stringdef yo4    hex  'fe8a'  // YEH_HAMZA
stringdef a~1    hex  'fe81'  // ALEF_MADDA
stringdef a~2    hex  'fe82'  // ALEF_MADDA
stringdef wo1    hex  'fe85'  // WAW_HAMZA
stringdef wo2    hex  'fe86'  // WAW_HAMZA
stringdef a1     hex  'fe8d'  // ALEF
stringdef a2     hex  'fe8e'  // ALEF
stringdef b1     hex  'fe8f'  // BEH
stringdef b2     hex  'fe90'  // BEH
stringdef b3     hex  'fe91'  // BEH
stringdef b4     hex  'fe92'  // BEH
stringdef t_1    hex  'fe93'  // TEH_MARBUTA
stringdef t_2    hex  'fe94'  // TEH_MARBUTA
stringdef t1     hex  'fe97'  // TEH
stringdef t2     hex  'fe98'  // TEH
stringdef t3     hex  'fe95'  // TEH
stringdef t4     hex  'fe96'  // TEH
stringdef th1    hex  'fe9b'  // THEH
stringdef th2    hex  'fe9c'  // THEH
stringdef th3    hex  'fe9a'  // THEH
stringdef th4    hex  'fe99'  // THEH
stringdef j1     hex  'fe9f'  // JEEM
stringdef j2     hex  'fea0'  // JEEM
stringdef j3     hex  'fe9d'  // JEEM
stringdef j4     hex  'fe9e'  // JEEM
stringdef h1     hex  'fea3'  // HAH
stringdef h2     hex  'fea4'  // HAH
stringdef h3     hex  'fea1'  // HAH
stringdef h4     hex  'fea2'  // HAH
stringdef x1     hex  'fea7'  // KHAH
stringdef x2     hex  'fea8'  // KHAH
stringdef x3     hex  'fea5'  // KHAH
stringdef x4     hex  'fea6'  // KHAH
stringdef d1     hex  'fea9'  // DAL
stringdef d2     hex  'feaa'  // DAL
stringdef dz1    hex  'feab'  // THAL
stringdef dz2    hex  'feac'  // THAL
stringdef r1     hex  'fead'  // REH
stringdef r2     hex  'feae'  // REH
stringdef z1     hex  'feaf'  // ZAIN
stringdef z2     hex  'feb0'  // ZAIN
stringdef s1     hex  'feb3'  // SEEN
stringdef s2     hex  'feb4'  // SEEN
stringdef s3     hex  'feb1'  // SEEN
stringdef s4     hex  'feb2'  // SEEN
stringdef sh1    hex  'feb7'  // SHEEN
stringdef sh2    hex  'feb8'  // SHEEN
stringdef sh3    hex  'feb5'  // SHEEN
stringdef sh4    hex  'feb6'  // SHEEN
stringdef c1     hex  'febb'  // SAD
stringdef c2     hex  'febc'  // SAD
stringdef c3     hex  'feb9'  // SAD
stringdef c4     hex  'feba'  // SAD
stringdef dh1    hex  'febf'  // DAD
stringdef dh2    hex  'fec0'  // DAD
stringdef dh3    hex  'febd'  // DAD
stringdef dh4    hex  'febe'  // DAD
stringdef tt1    hex  'fec3'  // TAH
stringdef tt2    hex  'fec4'  // TAH
stringdef tt3    hex  'fec1'  // TAH
stringdef tt4    hex  'fec2'  // TAH
stringdef zh1    hex  'fec7'  // ZAH
stringdef zh2    hex  'fec8'  // ZAH
stringdef zh3    hex  'fec5'  // ZAH
stringdef zh4    hex  'fec6'  // ZAH
stringdef i1     hex  'fecb'  // AIN
stringdef i2     hex  'fecc'  // AIN
stringdef i3     hex  'fec9'  // AIN
stringdef i4     hex  'feca'  // AIN
stringdef gh1    hex  'fecf'  // GHAIN
stringdef gh2    hex  'fed0'  // GHAIN
stringdef gh3    hex  'fecd'  // GHAIN
stringdef gh4    hex  'fece'  // GHAIN
stringdef f1     hex  'fed3'  // FEH
stringdef f2     hex  'fed4'  // FEH
stringdef f3     hex  'fed1'  // FEH
stringdef f4     hex  'fed2'  // FEH
stringdef q1     hex  'fed7'  // QAF
stringdef q2     hex  'fed8'  // QAF
stringdef q3     hex  'fed5'  // QAF
stringdef q4     hex  'fed6'  // QAF
stringdef k1     hex  'fedb'  // KAF
stringdef k2     hex  'fedc'  // KAF
stringdef k3     hex  'fed9'  // KAF
stringdef k4     hex  'feda'  // KAF
stringdef l1     hex  'fedf'  // LAM
stringdef l2     hex  'fee0'  // LAM
stringdef l3     hex  'fedd'  // LAM
stringdef l4     hex  'fede'  // LAM
stringdef m1     hex  'fee3'  // MEEM
stringdef m2     hex  'fee4'  // MEEM
stringdef m3     hex  'fee1'  // MEEM
stringdef m4     hex  'fee2'  // MEEM
stringdef n1     hex  'fee7'  // NOON
stringdef n2     hex  'fee8'  // NOON
stringdef n3     hex  'fee5'  // NOON
stringdef n4     hex  'fee6'  // NOON
stringdef e1     hex  'feeb'  // HEH
stringdef e2     hex  'feec'  // HEH
stringdef e3     hex  'fee9'  // HEH
stringdef e4     hex  'feea'  // HEH
stringdef w1     hex  'feed'  // WAW
stringdef w2     hex  'feee'  // WAW
stringdef a_1    hex  'feef'  // ALEF_MAKSURA
stringdef a_2    hex  'fef0'  // ALEF_MAKSURA
stringdef y1     hex  'fef3'  // YEH
stringdef y2     hex  'fef4'  // YEH
stringdef y3     hex  'fef1'  // YEH
stringdef y4     hex  'fef2'  // YEH

// Ligatures Lam-Alef
stringdef la      hex  'fefb' // LAM_ALEF
stringdef la2     hex  'fefc' // LAM_ALEF
stringdef lao     hex  'fef7' // LAM_ALEF_HAMZA_ABOVE
stringdef lao2    hex  'fef8' // LAM_ALEF_HAMZA_ABOVE
stringdef lao_    hex  'fef9' // LAM_ALEF_HAMZA_BELOW
stringdef lao_2   hex  'fefa' // LAM_ALEF_HAMZA_BELOW
stringdef la~     hex  'fef5' // LAM_ALEF_MADDA_ABOVE
stringdef la~2    hex  'fef6' // LAM_ALEF_MADDA_ABOVE



integers (
            word_len
         )

booleans (
            is_noun
            is_verb
            is_defined
         )

routines (
    Prefix_Step1
    Prefix_Step2
    Prefix_Step3a_Noun
    Prefix_Step3b_Noun
    Prefix_Step3_Verb
    Prefix_Step4_Verb

    Suffix_All_alef_maqsura
    Suffix_Noun_Step1a
    Suffix_Noun_Step1b
    Suffix_Noun_Step2a
    Suffix_Noun_Step2b
    Suffix_Noun_Step2c1
    Suffix_Noun_Step2c2
    Suffix_Noun_Step3
    Suffix_Verb_Step1
    Suffix_Verb_Step2a
    Suffix_Verb_Step2b
    Suffix_Verb_Step2c

    Normalize_post
    Normalize_pre

    Checks1
)

externals ( stem )

groupings (  )


// Normalizations
define Normalize_pre as (
    loop len (
        (
            [substring] among (
                '{aan}' '{uun}' '{iin}' '{aa}' '{uu}' '{ii}' '{oo}' '{~}'( delete ) // strip vocalization
                '{_}' ( delete ) // strip kasheeda

                // Ponctuation marks
                '.' ',' ';' ':'  '?' '!' '/' '*' '%' '\' '"' ( delete) // General
                '{,}' '{;}' '{?}'  ( delete ) // Arabic-specific

                // Hindu–Arabic numerals
                '{0}' ( <- '0')
                '{1}' ( <- '1')
                '{2}' ( <- '2')
                '{3}' ( <- '3')
                '{4}' ( <- '4')
                '{5}' ( <- '5')
                '{6}' ( <- '6')
                '{7}' ( <- '7')
                '{8}' ( <- '8')
                '{9}' ( <- '9')
                '{%}' '{.}' '{'}' ( delete )

                // Shaped forms
                '{o1}' ( <- '{o}' ) // HAMZA
                '{ao1}' '{ao2}'  ( <- '{ao}' ) // ALEF_HAMZA_ABOVE
                '{ao_1}' '{ao_2}' ( <- '{ao_}' ) // ALEF_HAMZA_BELOW
                '{yo1}'  '{yo2}' '{yo3}'  '{yo4}'  ( <- '{yo}' ) // YEH_HAMZA
                '{a~1}'  '{a~2}'( <- '{a~}' ) // ALEF_MADDA
                '{wo1}' '{wo2}'( <- '{wo}' ) // WAW_HAMZA
                '{a1}' '{a2}' ( <- '{a}' ) // ALEF
                '{b1}' '{b2}' '{b3}'  '{b4}'  ( <- '{b}' ) // BEH
                '{t_1}'  '{t_2}' ( <- '{t_}' ) // TEH_MARBUTA
                '{t1}'   '{t2}' '{t3}' '{t4}'  ( <- '{t}' ) // TEH
                '{th1}' '{th2}' '{th3}' '{th4}' ( <- '{th}' ) // THEH
                '{j1}' '{j2}'  '{j3}' '{j4}'(  <- '{j}' ) // JEEM
                '{h1}' '{h2}' '{h3}' '{h4}' ( <- '{h}' ) // HAH
                '{x1}' '{x2}' '{x3}' '{x4}'( <- '{x}' ) // KHAH
                '{d1}'  '{d2}'  ( <- '{d}' ) // DAL
                '{dz1}''{dz2}' ( <- '{dz}' ) // THAL
                '{r1}' '{r2}'( <- '{r}' ) // REH
                '{z1}' '{z2}'  ( <- '{z}' ) // ZAIN
                '{s1}'  '{s2}'   '{s3}' '{s4}'( <- '{s}' ) // SEEN
                '{sh1}' '{sh2}' '{sh3}' '{sh4}' ( <- '{sh}' ) // SHEEN
                '{c1}' '{c2}' '{c3}' '{c4}'( <- '{c}' ) // SAD
                '{dh1}'    '{dh2}'   '{dh3}'  '{dh4}'( <- '{dh}' ) // DAD
                '{tt1}'  '{tt2}'  '{tt3}'  '{tt4}' ( <- '{tt}' ) // TAH
                '{zh1}' '{zh2}' '{zh3}'    '{zh4}'( <- '{zh}' ) // ZAH
                '{i1}' '{i2}' '{i3}'  '{i4}'( <- '{i}' ) // AIN
                '{gh1}' '{gh2}' '{gh3}'  '{gh4}'( <- '{gh}' ) // GHAIN
                '{f1}'  '{f2}' '{f3}'  '{f4}' ( <- '{f}' ) // FEH
                '{q1}' '{q2}' '{q3}' '{q4}' ( <- '{q}' ) // QAF
                '{k1}' '{k2}' '{k3}'  '{k4}'( <- '{k}' ) // KAF
                '{l1}' '{l2}' '{l3}' '{l4}'( <- '{l}' ) // LAM
                '{m1}' '{m2}'  '{m3}' '{m4}'   ( <- '{m}' ) // MEEM
                '{n1}'  '{n2}' '{n3}'  '{n4}'( <- '{n}' ) // NOON
                '{e1}' '{e2}' '{e3}' '{e4}' ( <- '{e}' ) // HEH
                '{w1}'  '{w2}'  ( <- '{w}' ) // WAW
                '{a_1}' '{a_2}' ( <- '{a_}' ) // ALEF_MAKSURA
                '{y1}' '{y2}' '{y3}' '{y4}' ( <- '{y}' ) // YEH

                // Ligatures Lam-Alef
                '{la}'  '{la2}'     (<- '{l}{a}')
                '{lao}'  '{lao2}'   (<- '{l}{ao}')
                '{lao_}'  '{lao_2}' (<- '{l}{ao_}')
                '{la~}'  '{la~2}'    (<- '{l}{a~}')

            )
        )
        or
        next
    )
)

define Normalize_post as (

    do (
        // normalize last hamza
        backwards (
        [substring] among (
            '{ao}''{ao_}' '{a~}' ( <- '{o}')
        '{wo}' ( <- '{o}')
        '{yo}' ( <- '{o}')
        )
        )
    )

    do loop word_len (
        (
        // normalize other hamza's
            [substring] among (
                '{ao}''{ao_}' '{a~}' ( <- '{a}')
                '{wo}' ( <- '{w}')
                '{yo}' ( <- '{y}')
            )
        )
        or
        next
    )
)

// Checks
define Checks1 as (
    $word_len = len
    [substring] among (
        '{b}{a}{l}' '{k}{a}{l}' ($word_len > 4  set is_noun  unset is_verb set is_defined)
        '{l}{l}' '{a}{l}' ($word_len > 3  set is_noun unset is_verb set is_defined)
    )
)


//prefixes
define Prefix_Step1 as (
        $word_len = len
         [substring] among (
             '{ao}{ao}' ($word_len > 3 <-  '{ao}'  )
             '{ao}{a~}' ($word_len > 3 <-  '{a~}'  )
             '{ao}{wo}' ($word_len > 3 <-  '{ao}'  )
             '{ao}{a}' ($word_len > 3 <-  '{a}'  )
             '{ao}{ao_}' ($word_len > 3 <-  '{ao_}'  )
            // '{ao}' ($word_len > 3 delete) //rare case
        )
)

define Prefix_Step2 as (
        $word_len = len
        not '{f}{a}'
        not '{w}{a}'
        [substring] among (
            '{f}' ($word_len > 3 delete)
            '{w}' ($word_len > 3 delete)
        )
)

define Prefix_Step3a_Noun as ( // it is noun and defined
        $word_len = len
        [substring] among (
            '{b}{a}{l}' '{k}{a}{l}' ($word_len > 5 delete)
            '{l}{l}' '{a}{l}' ($word_len > 4 delete)
        )
)

define Prefix_Step3b_Noun as ( // probably  noun and defined
        $word_len = len
        not '{b}{a}' // exception
        [substring] among (
            '{b}' ($word_len > 3 delete)
            // '{k}'  '{l}' ($word_len > 3 delete) // BUG: cause confusion
            '{b}{b}' ($word_len > 3 <-  '{b}'  )
            '{k}{k}'  ($word_len > 3 <-  '{k}'  )
           )

)

define Prefix_Step3_Verb as (
        $word_len = len
        [substring] among (
            //'{s}' ($word_len > 4 delete)// BUG: cause confusion
            '{s}{y}' ($word_len > 4 <- '{y}' )
            '{s}{t}' ($word_len > 4 <- '{t}')
            '{s}{n}' ($word_len > 4 <- '{n}')
            '{s}{ao}' ($word_len > 4 <- '{ao}')
        )
)

define Prefix_Step4_Verb as (
        $word_len = len
        [substring] among (
            '{y}{s}{t}' '{n}{s}{t}' '{t}{s}{t}' ($word_len > 4 set is_verb unset is_noun <-  '{a}{s}{t}' )
        )
)

// suffixes
backwardmode (

        define Suffix_Noun_Step1a as (
                $word_len = len
                [substring] among (
                        '{y}' '{k}' '{e}' ($word_len >= 4 delete)
                        '{n}{a}' '{k}{m}' '{e}{a}' '{e}{n}' '{e}{m}' ($word_len >= 5  delete)
                        '{k}{m}{a}' '{e}{m}{a}' ($word_len >= 6 delete)
                )
            )
        define Suffix_Noun_Step1b as (
            $word_len = len
            [substring] among (
                '{n}' ($word_len > 5 delete)
            )
        )

        define Suffix_Noun_Step2a as (
                $word_len = len
                [substring] among (
                        '{a}' '{y}' '{w}' ($word_len > 4 delete)
                )
            )

        define Suffix_Noun_Step2b as (
            $word_len = len
            [substring] among (
                '{a}{t}' ($word_len >= 5 delete)
            )
        )

        define Suffix_Noun_Step2c1 as (
            $word_len = len
            [substring] among (
                '{t}' ($word_len >= 4 delete)
            )
        )
        define Suffix_Noun_Step2c2 as ( // feminine t_
            $word_len = len
            [substring] among (
                '{t_}' ($word_len >= 4 delete)
            )
        )
        define Suffix_Noun_Step3 as ( // ya' nisbiya
            $word_len = len
            [substring] among (
                '{y}' ($word_len >= 3 delete)
            )
        )

        define Suffix_Verb_Step1 as (
                $word_len = len
                [substring] among (
                        '{e}' '{k}' ($word_len >= 4 delete)
                        '{n}{y}' '{n}{a}' '{e}{a}' '{e}{m}' '{e}{n}' '{k}{m}' '{k}{n}' ($word_len >= 5 delete)
                        '{e}{m}{a}' '{k}{m}{a}' '{k}{m}{w}'($word_len >= 6 delete)
                )
            )
        define Suffix_Verb_Step2a as (
                $word_len = len
                [substring] among (
                       '{t}' ($word_len >= 4  delete)
                        '{a}' '{n}' '{y}' ($word_len >= 4 delete)
                        '{n}{a}' '{t}{a}'  '{t}{n}'  ($word_len >= 5 delete)// past
                        '{a}{n}' '{w}{n}' '{y}{n}' ($word_len > 5 delete) // present
                        '{t}{m}{a}' ($word_len >= 6 delete)
                )
            )

        define Suffix_Verb_Step2b as (
            $word_len = len
            [substring] among (
                '{w}{a}' '{t}{m}' ($word_len >= 5 delete) // len >= 5
            )
        )


        define Suffix_Verb_Step2c as (
            $word_len = len
            [substring] among (
                '{w}' ($word_len >= 4 delete)
                '{t}{m}{w}' ($word_len >= 6 delete)
            )
        )

        define Suffix_All_alef_maqsura as (
            $word_len = len
            [substring] among (
                '{a_}' ( <- '{y}' ) // spell error
                // '{a_}' ( delete ) // if noun > 3
                // '{a_}' ( <- '{a}') // if verb
            )
        )
)

define stem as (
    // set initial values
    set is_noun
    set is_verb
    unset is_defined

    // guess type and properties
    do Checks1

    // normalization pre-stemming
    do Normalize_pre


    backwards (

       do (
              //Suffixes for verbs
            (
           is_verb
           (
               (
                  (atleast 1 Suffix_Verb_Step1)
                  ( Suffix_Verb_Step2a or Suffix_Verb_Step2c  or next)
                )
                or Suffix_Verb_Step2b
                or Suffix_Verb_Step2a
            )
           )
            //Suffixes for nouns
          or (
               is_noun
                (

                 try (
                     Suffix_Noun_Step2c2
                     or (not is_defined Suffix_Noun_Step1a (
                            Suffix_Noun_Step2a
                            or Suffix_Noun_Step2b
                            or Suffix_Noun_Step2c1
                            or next))
                     or (Suffix_Noun_Step1b (
                            Suffix_Noun_Step2a
                            or Suffix_Noun_Step2b
                            or Suffix_Noun_Step2c1))
                     or (not is_defined Suffix_Noun_Step2a)
                     or (Suffix_Noun_Step2b)
                 )
                 Suffix_Noun_Step3
                 )

            )

            // Suffixes for alef maqsura
            or  Suffix_All_alef_maqsura
        )
    )

    //Prefixes
    do (
       try Prefix_Step1
       try Prefix_Step2
       ( Prefix_Step3a_Noun
         or (is_noun Prefix_Step3b_Noun)
         or (is_verb try Prefix_Step3_Verb Prefix_Step4_Verb)
         )
    )

    // normalization post-stemming
    do Normalize_post

)