File: bidi.py

package info (click to toggle)
fpdf2 2.8.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 114,352 kB
  • sloc: python: 50,410; sh: 133; makefile: 12
file content (811 lines) | stat: -rw-r--r-- 32,784 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
# This is an implementation of the Unicode Standard Annex #9
# Unicode bidirectional algorithm - Revision 48 for Unicode 15.1.0
# https://unicode.org/reports/tr9/

import unicodedata
from collections import deque
from dataclasses import dataclass, replace
from operator import itemgetter
from typing import Optional, TypedDict

from .enums import TextDirection

MAX_DEPTH: int = 125

# BidiBrackets 15.1.0 2023-01-18
# Loaded from https://www.unicode.org/Public/UNIDATA/BidiBrackets.txt
# This table can be dropped when the information is added on "unicodedata"
BIDI_BRACKETS: dict[str, dict[str, str]] = {
    "(": {"pair": ")", "type": "o"},
    ")": {"pair": "(", "type": "c"},
    "[": {"pair": "]", "type": "o"},
    "]": {"pair": "[", "type": "c"},
    "{": {"pair": "}", "type": "o"},
    "}": {"pair": "{", "type": "c"},
    "༺": {"pair": "༻", "type": "o"},
    "༻": {"pair": "༺", "type": "c"},
    "༼": {"pair": "༽", "type": "o"},
    "༽": {"pair": "༼", "type": "c"},
    "᚛": {"pair": "᚜", "type": "o"},
    "᚜": {"pair": "᚛", "type": "c"},
    "⁅": {"pair": "⁆", "type": "o"},
    "⁆": {"pair": "⁅", "type": "c"},
    "⁽": {"pair": "⁾", "type": "o"},
    "⁾": {"pair": "⁽", "type": "c"},
    "₍": {"pair": "₎", "type": "o"},
    "₎": {"pair": "₍", "type": "c"},
    "⌈": {"pair": "⌉", "type": "o"},
    "⌉": {"pair": "⌈", "type": "c"},
    "⌊": {"pair": "⌋", "type": "o"},
    "⌋": {"pair": "⌊", "type": "c"},
    "〈": {"pair": "〉", "type": "o"},
    "〉": {"pair": "〈", "type": "c"},
    "❨": {"pair": "❩", "type": "o"},
    "❩": {"pair": "❨", "type": "c"},
    "❪": {"pair": "❫", "type": "o"},
    "❫": {"pair": "❪", "type": "c"},
    "❬": {"pair": "❭", "type": "o"},
    "❭": {"pair": "❬", "type": "c"},
    "❮": {"pair": "❯", "type": "o"},
    "❯": {"pair": "❮", "type": "c"},
    "❰": {"pair": "❱", "type": "o"},
    "❱": {"pair": "❰", "type": "c"},
    "❲": {"pair": "❳", "type": "o"},
    "❳": {"pair": "❲", "type": "c"},
    "❴": {"pair": "❵", "type": "o"},
    "❵": {"pair": "❴", "type": "c"},
    "⟅": {"pair": "⟆", "type": "o"},
    "⟆": {"pair": "⟅", "type": "c"},
    "⟦": {"pair": "⟧", "type": "o"},
    "⟧": {"pair": "⟦", "type": "c"},
    "⟨": {"pair": "⟩", "type": "o"},
    "⟩": {"pair": "⟨", "type": "c"},
    "⟪": {"pair": "⟫", "type": "o"},
    "⟫": {"pair": "⟪", "type": "c"},
    "⟬": {"pair": "⟭", "type": "o"},
    "⟭": {"pair": "⟬", "type": "c"},
    "⟮": {"pair": "⟯", "type": "o"},
    "⟯": {"pair": "⟮", "type": "c"},
    "⦃": {"pair": "⦄", "type": "o"},
    "⦄": {"pair": "⦃", "type": "c"},
    "⦅": {"pair": "⦆", "type": "o"},
    "⦆": {"pair": "⦅", "type": "c"},
    "⦇": {"pair": "⦈", "type": "o"},
    "⦈": {"pair": "⦇", "type": "c"},
    "⦉": {"pair": "⦊", "type": "o"},
    "⦊": {"pair": "⦉", "type": "c"},
    "⦋": {"pair": "⦌", "type": "o"},
    "⦌": {"pair": "⦋", "type": "c"},
    "⦍": {"pair": "⦐", "type": "o"},
    "⦎": {"pair": "⦏", "type": "c"},
    "⦏": {"pair": "⦎", "type": "o"},
    "⦐": {"pair": "⦍", "type": "c"},
    "⦑": {"pair": "⦒", "type": "o"},
    "⦒": {"pair": "⦑", "type": "c"},
    "⦓": {"pair": "⦔", "type": "o"},
    "⦔": {"pair": "⦓", "type": "c"},
    "⦕": {"pair": "⦖", "type": "o"},
    "⦖": {"pair": "⦕", "type": "c"},
    "⦗": {"pair": "⦘", "type": "o"},
    "⦘": {"pair": "⦗", "type": "c"},
    "⧘": {"pair": "⧙", "type": "o"},
    "⧙": {"pair": "⧘", "type": "c"},
    "⧚": {"pair": "⧛", "type": "o"},
    "⧛": {"pair": "⧚", "type": "c"},
    "⧼": {"pair": "⧽", "type": "o"},
    "⧽": {"pair": "⧼", "type": "c"},
    "⸢": {"pair": "⸣", "type": "o"},
    "⸣": {"pair": "⸢", "type": "c"},
    "⸤": {"pair": "⸥", "type": "o"},
    "⸥": {"pair": "⸤", "type": "c"},
    "⸦": {"pair": "⸧", "type": "o"},
    "⸧": {"pair": "⸦", "type": "c"},
    "⸨": {"pair": "⸩", "type": "o"},
    "⸩": {"pair": "⸨", "type": "c"},
    "⹕": {"pair": "⹖", "type": "o"},
    "⹖": {"pair": "⹕", "type": "c"},
    "⹗": {"pair": "⹘", "type": "o"},
    "⹘": {"pair": "⹗", "type": "c"},
    "⹙": {"pair": "⹚", "type": "o"},
    "⹚": {"pair": "⹙", "type": "c"},
    "⹛": {"pair": "⹜", "type": "o"},
    "⹜": {"pair": "⹛", "type": "c"},
    "〈": {"pair": "〉", "type": "o"},
    "〉": {"pair": "〈", "type": "c"},
    "《": {"pair": "》", "type": "o"},
    "》": {"pair": "《", "type": "c"},
    "「": {"pair": "」", "type": "o"},
    "」": {"pair": "「", "type": "c"},
    "『": {"pair": "』", "type": "o"},
    "』": {"pair": "『", "type": "c"},
    "【": {"pair": "】", "type": "o"},
    "】": {"pair": "【", "type": "c"},
    "〔": {"pair": "〕", "type": "o"},
    "〕": {"pair": "〔", "type": "c"},
    "〖": {"pair": "〗", "type": "o"},
    "〗": {"pair": "〖", "type": "c"},
    "〘": {"pair": "〙", "type": "o"},
    "〙": {"pair": "〘", "type": "c"},
    "〚": {"pair": "〛", "type": "o"},
    "〛": {"pair": "〚", "type": "c"},
    "﹙": {"pair": "﹚", "type": "o"},
    "﹚": {"pair": "﹙", "type": "c"},
    "﹛": {"pair": "﹜", "type": "o"},
    "﹜": {"pair": "﹛", "type": "c"},
    "﹝": {"pair": "﹞", "type": "o"},
    "﹞": {"pair": "﹝", "type": "c"},
    "(": {"pair": ")", "type": "o"},
    ")": {"pair": "(", "type": "c"},
    "[": {"pair": "]", "type": "o"},
    "]": {"pair": "[", "type": "c"},
    "{": {"pair": "}", "type": "o"},
    "}": {"pair": "{", "type": "c"},
    "⦅": {"pair": "⦆", "type": "o"},
    "⦆": {"pair": "⦅", "type": "c"},
    "「": {"pair": "」", "type": "o"},
    "」": {"pair": "「", "type": "c"},
}


class BidiCharacter:
    __slots__ = [
        "character_index",
        "character",
        "bidi_class",
        "original_bidi_class",
        "embedding_level",
        "direction",
    ]

    def __init__(
        self, character_index: int, character: str, embedding_level: int, debug: bool
    ) -> None:
        self.character_index = character_index
        self.character = character
        if debug and character.isupper():
            self.bidi_class = "R"
        else:
            self.bidi_class = unicodedata.bidirectional(character)
        self.original_bidi_class = self.bidi_class
        self.embedding_level = embedding_level
        self.direction = None

    def get_direction_from_level(self) -> str:
        return "R" if self.embedding_level % 2 else "L"

    def set_class(self, cls: str) -> None:
        self.bidi_class = cls

    def __repr__(self) -> str:
        return (
            f"character_index: {self.character_index} character: {self.character}"
            + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}"
            + f" embedding_level: {self.embedding_level} direction: {self.direction}"
        )


@dataclass
class DirectionalStatus:
    __slots__ = [
        "embedding_level",
        "directional_override_status",
        "directional_isolate_status",
    ]
    embedding_level: int  # between 0 and MAX_DEPTH
    directional_override_status: str  # "N" (Neutral), "L" (Left) or "R" (Right)
    directional_isolate_status: bool


class IsolatingRun:
    __slots__ = ["characters", "previous_direction", "next_direction"]

    def __init__(self, characters: list[BidiCharacter], sos: str, eos: str) -> None:
        self.characters = characters
        self.previous_direction = sos
        self.next_direction = eos
        self.resolve_weak_types()
        self.resolve_neutral_types()
        self.resolve_implicit_levels()

    def resolve_weak_types(self) -> None:
        # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral
        #     if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise.
        #     If the NSM is at the start of the isolating run sequence, it will get the type of sos.
        for i, bidi_char in enumerate(self.characters):
            if bidi_char.bidi_class == "NSM":
                if i == 0:
                    bidi_char.set_class(self.previous_direction)
                else:
                    bidi_char.set_class(
                        "ON"
                        if self.characters[i - 1].bidi_class
                        in ("LRI", "RLI", "FSI", "PDI")
                        else self.characters[i - 1].bidi_class
                    )

        # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found.
        #     If an AL is found, change the type of the European number to Arabic number.
        # W3. Change all ALs to R.

        last_strong_type = self.previous_direction
        for bidi_char in self.characters:
            if bidi_char.bidi_class in ("R", "L", "AL"):
                last_strong_type = bidi_char.bidi_class
            if bidi_char.bidi_class == "AL":
                bidi_char.set_class("R")
            if bidi_char.bidi_class == "EN" and last_strong_type == "AL":
                bidi_char.set_class("AN")

        # W4. A single European separator between two European numbers changes to a European number.
        #     A single common separator between two numbers of the same type changes to that type.
        for i, bidi_char in enumerate(self.characters):
            if i in (0, len(self.characters) - 1):
                continue
            if (
                bidi_char.bidi_class == "ES"
                and self.characters[i - 1].bidi_class == "EN"
                and self.characters[i + 1].bidi_class == "EN"
            ):
                bidi_char.set_class("EN")

            if (
                bidi_char.bidi_class == "CS"
                and self.characters[i - 1].bidi_class in ("AN", "EN")
                and self.characters[i + 1].bidi_class
                == self.characters[i - 1].bidi_class
            ):
                bidi_char.set_class(self.characters[i - 1].bidi_class)

        # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
        # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral.
        def prev_is_en(i: int) -> bool:
            if i == 0:
                return False
            if self.characters[i - 1].bidi_class == "ET":
                return prev_is_en(i - 1)
            return self.characters[i - 1].bidi_class == "EN"

        def next_is_en(i: int) -> bool:
            if i == len(self.characters) - 1:
                return False
            if self.characters[i + 1].bidi_class == "ET":
                return next_is_en(i + 1)
            return self.characters[i + 1].bidi_class == "EN"

        for i, bidi_char in enumerate(self.characters):
            if bidi_char.bidi_class == "ET":
                if prev_is_en(i) or next_is_en(i):
                    bidi_char.set_class("EN")

            if bidi_char.bidi_class in ("ET", "ES", "CS"):
                bidi_char.set_class("ON")
        # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found.
        #     If an L is found, then change the type of the European number to L.
        last_strong_type = self.previous_direction
        for bidi_char in self.characters:
            if bidi_char.bidi_class in ("R", "L", "AL"):
                last_strong_type = bidi_char.bidi_class
            if bidi_char.bidi_class == "EN" and last_strong_type == "L":
                bidi_char.set_class("L")

    def pair_brackets(self) -> list[tuple[int, int]]:
        """
        Calculate all the bracket pairs on an isolate run, to be used on rule N0
        How to calculate bracket pairs:
        - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14
        - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/
        """
        open_brackets: list[tuple[str, int]] = []
        open_bracket_count = 0
        bracket_pairs: list[tuple[int, int]] = []
        for index, char in enumerate(self.characters):
            if char.character in BIDI_BRACKETS and char.bidi_class == "ON":
                if BIDI_BRACKETS[char.character]["type"] == "o":
                    if open_bracket_count >= 63:
                        return []
                    open_brackets.append((char.character, index))
                    open_bracket_count += 1
                if BIDI_BRACKETS[char.character]["type"] == "c":
                    if open_bracket_count == 0:
                        continue
                    for current_open_bracket in range(open_bracket_count, 0, -1):
                        open_char, open_index = open_brackets[current_open_bracket - 1]
                        if (BIDI_BRACKETS[open_char]["pair"] == char.character) or (
                            BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉")
                            and char.character in ("〉", "〉")
                        ):
                            bracket_pairs.append((open_index, index))
                            open_brackets = open_brackets[: current_open_bracket - 1]
                            open_bracket_count = current_open_bracket - 1
                            break
        return sorted(bracket_pairs, key=itemgetter(0))

    def resolve_neutral_types(self) -> None:
        def previous_strong(index: int) -> str:
            if index == 0:
                return self.previous_direction
            if self.characters[index - 1].bidi_class == "L":
                return "L"
            if self.characters[index - 1].bidi_class in ("R", "AN", "EN"):
                return "R"
            return previous_strong(index - 1)

        def next_strong(index: int) -> str:
            if index >= len(self.characters) - 1:
                return self.next_direction
            if self.characters[index + 1].bidi_class == "L":
                return "L"
            if self.characters[index + 1].bidi_class in ("R", "AN", "EN"):
                return "R"
            return next_strong(index + 1)

        # N0-N2: Resolving neutral types
        # N0
        brackets = self.pair_brackets()
        if brackets:
            embedding_direction = self.characters[0].get_direction_from_level()
            for b in brackets:
                strong_same_direction = False
                strong_opposite_direction = False
                resulting_direction = None
                for index in range(b[0], b[1]):
                    if (
                        self.characters[index].bidi_class == "L"
                        and embedding_direction == "L"
                    ) or (
                        self.characters[index].bidi_class in ("R", "AN", "EN")
                        and embedding_direction == "R"
                    ):
                        strong_same_direction = True
                        break
                    if (
                        self.characters[index].bidi_class == "L"
                        and embedding_direction == "R"
                    ) or (
                        self.characters[index].bidi_class in ("R", "AN", "EN")
                        and embedding_direction == "L"
                    ):
                        strong_opposite_direction = True
                if strong_same_direction:
                    resulting_direction = embedding_direction
                elif strong_opposite_direction:
                    opposite_direction = "L" if embedding_direction == "R" else "R"
                    if previous_strong(b[0]) == opposite_direction:
                        resulting_direction = opposite_direction
                    else:
                        resulting_direction = embedding_direction
                if resulting_direction:
                    self.characters[b[0]].bidi_class = resulting_direction
                    self.characters[b[1]].bidi_class = resulting_direction
                    if len(self.characters) > b[1] + 1:
                        next_char = self.characters[b[1] + 1]
                        if (
                            next_char.original_bidi_class == "NSM"
                            and next_char.bidi_class == "ON"
                        ):
                            next_char.bidi_class = resulting_direction

        for i, bidi_char in enumerate(self.characters):
            # N1-N2
            if bidi_char.bidi_class in (
                "B",
                "S",
                "WS",
                "ON",
                "FSI",
                "LRI",
                "RLI",
                "PDI",
            ):
                if previous_strong(i) == next_strong(i):
                    bidi_char.bidi_class = previous_strong(i)
                else:
                    bidi_char.bidi_class = bidi_char.get_direction_from_level()

    def resolve_implicit_levels(self) -> None:
        for bidi_char in self.characters:
            # I1. For all characters with an even (left-to-right) embedding level,
            #     those of type R go up one level and those of type AN or EN go up two levels.
            if bidi_char.embedding_level % 2 == 0:
                if bidi_char.bidi_class == "R":
                    bidi_char.embedding_level += 1
                if bidi_char.bidi_class in ("AN", "EN"):
                    bidi_char.embedding_level += 2

            # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level.
            else:
                if bidi_char.bidi_class in ("L", "EN", "AN"):
                    bidi_char.embedding_level += 1


def auto_detect_base_direction(
    string: str, stop_at_pdi: bool = False, debug: bool = False
) -> TextDirection:
    """
    This function applies rules P2 and P3 to detect the direction of a paragraph, retuning
    the first strong direction and skipping over isolate sequences.
    P1 must be applied before calling this function (breaking into paragraphs)
    stop_at_pdi can be set to True to get the direction of a single isolate sequence
    """
    # Auto-LTR (standard BIDI) uses the first L/R/AL character, and is LTR if none is found.
    isolate = 0
    for char in string:
        bidi_class = unicodedata.bidirectional(char)
        if debug and bidi_class.isupper():
            bidi_class = "R"
        if bidi_class == "PDI" and isolate == 0 and stop_at_pdi:
            return TextDirection.LTR
        if bidi_class in ("LRI", "RLI", "FSI"):
            isolate += 1
        if bidi_class == "PDI" and isolate > 0:
            isolate -= 1
        if bidi_class in ("R", "AL") and isolate == 0:
            return TextDirection.RTL
        if bidi_class == "L" and isolate == 0:
            return TextDirection.LTR
    return TextDirection.LTR


def calculate_isolate_runs(paragraph: list[BidiCharacter]) -> list[IsolatingRun]:
    # BD13 and X10
    class LevelRun(TypedDict):
        level: int
        text: list[BidiCharacter]
        complete: bool
        sos: str
        eos: str

    level_run: list[LevelRun] = []
    lr: list[BidiCharacter] = []
    lr_embedding_level: int = paragraph[0].embedding_level

    for bidi_char in paragraph:
        if bidi_char.embedding_level != lr_embedding_level:
            level_run.append(
                {
                    "level": lr_embedding_level,
                    "text": lr,
                    "complete": False,
                    "sos": "",
                    "eos": "",
                }
            )
            lr = []
            lr_embedding_level = bidi_char.embedding_level
        lr.append(bidi_char)
    level_run.append(
        {
            "level": lr_embedding_level,
            "text": lr,
            "complete": False,
            "sos": "",
            "eos": "",
        }
    )

    def level_to_direction(level: int) -> str:
        if level % 2 == 0:
            return "L"
        return "R"

    # compute sos, eos for each level run
    for index, lr1 in enumerate(level_run):
        if lr1["complete"]:
            continue
        if index == 0:
            sos = level_to_direction(lr1["level"])
        else:
            sos = level_to_direction(max(lr1["level"], level_run[index - 1]["level"]))
        if index == len(level_run) - 1:
            eos = level_to_direction(lr1["level"])
        else:
            if lr1["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"):
                # X10 - last char is an isolator without matching PDI - set EOS to embedding level
                eos = level_to_direction(lr1["level"])
            else:
                eos = level_to_direction(
                    max(lr1["level"], level_run[index + 1]["level"])
                )
        lr1["sos"] = sos
        lr1["eos"] = eos

    # combine levels runs to create isolate runs
    isolate_runs: list[IsolatingRun] = []
    for index, lr2 in enumerate(level_run):
        if lr2["complete"]:
            continue
        sos = lr2["sos"]
        eos = lr2["eos"]
        ir_chars = lr2["text"]
        lr2["complete"] = True
        if lr2["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"):
            for nlr in level_run[index + 1 :]:
                if (
                    nlr["level"] == lr2["level"]
                    and nlr["text"][0].original_bidi_class == "PDI"
                ):
                    lr2["text"] += nlr["text"]
                    nlr["complete"] = True
                    eos = nlr["eos"]
                    if nlr["text"][-1].original_bidi_class not in ("LRI", "RLI", "FSI"):
                        break
        isolate_runs.append(IsolatingRun(characters=ir_chars, sos=sos, eos=eos))

    return isolate_runs


class BidiParagraph:
    __slots__ = (
        "text",
        "base_direction",
        "debug",
        "base_embedding_level",
        "characters",
    )

    def __init__(
        self,
        text: str,
        base_direction: Optional[TextDirection] = None,
        debug: bool = False,
    ) -> None:
        self.text = text
        self.base_direction = (
            auto_detect_base_direction(self.text, debug)
            if not base_direction
            else base_direction
        )
        self.debug = debug
        self.base_embedding_level = (
            0 if self.base_direction == TextDirection.LTR else 1
        )  # base level
        self.characters: list[BidiCharacter] = []
        self.get_bidi_characters()

    def get_characters(self) -> list[BidiCharacter]:
        return self.characters

    def get_characters_with_embedding_level(self) -> list[BidiCharacter]:
        # Calculate embedding level for each character after breaking isolating runs.
        # Only used on conformance testing
        self.reorder_resolved_levels()
        return self.characters

    def get_reordered_characters(self) -> tuple[BidiCharacter, ...]:
        return self.reorder_resolved_levels()

    def get_all(self) -> tuple[list[BidiCharacter], tuple[BidiCharacter, ...]]:
        return self.characters, self.reorder_resolved_levels()

    def get_reordered_string(self) -> str:
        "Used for conformance validation"
        return "".join(c.character for c in self.reorder_resolved_levels())

    def get_bidi_fragments(self) -> tuple[tuple[str, TextDirection], ...]:
        return self.split_bidi_fragments()

    def get_bidi_characters(self) -> None:
        # Explicit levels and directions. Rule X1

        stack: deque[DirectionalStatus] = deque()
        current_status = DirectionalStatus(
            embedding_level=self.base_embedding_level,
            directional_override_status="N",
            directional_isolate_status=False,
        )
        stack.append(replace(current_status))
        overflow_isolate_count = 0
        overflow_embedding_count = 0
        valid_isolate_count = 0
        results: list[BidiCharacter] = []

        # Explicit embeddings. Process each character individually applying rules X2 through X8
        for index, char in enumerate(self.text):
            bidi_char = BidiCharacter(
                index, char, current_status.embedding_level, self.debug
            )
            new_bidi_class = None

            if bidi_char.bidi_class == "FSI":
                bidi_char.bidi_class = (
                    "LRI"
                    if auto_detect_base_direction(
                        self.text[index + 1 :], stop_at_pdi=True, debug=self.debug
                    )
                    == TextDirection.LTR
                    else "RLI"
                )

            if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"):
                # X2 - X5: calculate explicit embeddings and explicit overrides
                if bidi_char.bidi_class[0] == "R":
                    new_embedding_level = (
                        current_status.embedding_level + 1
                    ) | 1  # least greater odd
                else:
                    new_embedding_level = (
                        current_status.embedding_level + 2
                    ) & ~1  # least greater even
                if (
                    bidi_char.bidi_class[2] == "I"
                    and current_status.directional_override_status != "N"
                ):
                    new_bidi_class = current_status.directional_override_status
                if (
                    new_embedding_level <= MAX_DEPTH
                    and overflow_isolate_count == 0
                    and overflow_embedding_count == 0
                ):
                    current_status.embedding_level = new_embedding_level
                    current_status.directional_override_status = (
                        bidi_char.bidi_class[0]
                        if bidi_char.bidi_class[2] == "O"
                        else "N"
                    )
                    if bidi_char.bidi_class[2] == "I":
                        valid_isolate_count += 1
                        current_status.directional_isolate_status = True
                    else:
                        current_status.directional_isolate_status = False
                    stack.append(replace(current_status))
                else:
                    if bidi_char.bidi_class[2] == "I":
                        overflow_isolate_count += 1
                    else:
                        if overflow_isolate_count == 0:
                            overflow_embedding_count += 1

            if bidi_char.bidi_class not in (
                "B",
                "BN",
                "RLE",
                "LRE",
                "RLO",
                "LRO",
                "PDF",
                "FSI",
                "PDI",
            ):  # X6
                if current_status.directional_override_status != "N":
                    new_bidi_class = current_status.directional_override_status

            if bidi_char.bidi_class == "PDI":  # X6a
                if overflow_isolate_count > 0:
                    overflow_isolate_count -= 1
                elif valid_isolate_count > 0:
                    overflow_embedding_count = 0
                    while True:
                        if not stack[-1].directional_isolate_status:
                            stack.pop()
                            continue
                        break
                    stack.pop()
                    current_status = replace(stack[-1])
                    valid_isolate_count -= 1
                assert isinstance(current_status, DirectionalStatus)
                bidi_char.embedding_level = current_status.embedding_level
                if current_status.directional_override_status != "N":
                    new_bidi_class = current_status.directional_override_status

            if bidi_char.bidi_class == "PDF":  # X7
                if overflow_isolate_count == 0:
                    if overflow_embedding_count > 0:
                        overflow_embedding_count -= 1
                    else:
                        if (
                            not current_status.directional_isolate_status
                            and len(stack) > 1
                        ):
                            stack.pop()
                            current_status = replace(stack[-1])

            if new_bidi_class:
                bidi_char.bidi_class = new_bidi_class
            if bidi_char.bidi_class not in (
                "RLE",
                "LRE",
                "RLO",
                "LRO",
                "PDF",
                "BN",
            ):  # X9
                if bidi_char.bidi_class == "B":
                    bidi_char.embedding_level = self.base_embedding_level
                elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"):
                    bidi_char.embedding_level = current_status.embedding_level
                results.append(bidi_char)

        if not results:
            self.characters = []
            return
        self.characters = results
        calculate_isolate_runs(results)

    def split_bidi_fragments(self) -> tuple[tuple[str, TextDirection], ...]:
        bidi_fragments: list[tuple[str, TextDirection]] = []
        if len(self.characters) == 0:
            return ()
        current_fragment = ""
        current_direction = ""
        for c in self.characters:
            if c.get_direction_from_level() != current_direction:
                if current_fragment:
                    bidi_fragments.append(
                        (
                            current_fragment,
                            (
                                TextDirection.RTL
                                if current_direction == "R"
                                else TextDirection.LTR
                            ),
                        )
                    )
                current_fragment = ""
                current_direction = c.get_direction_from_level()
            current_fragment += c.character
        if current_fragment:
            bidi_fragments.append(
                (
                    current_fragment,
                    (
                        TextDirection.RTL
                        if current_direction == "R"
                        else TextDirection.LTR
                    ),
                )
            )
        return tuple(bidi_fragments)

    def reorder_resolved_levels(self) -> tuple[BidiCharacter, ...]:
        before_separator = True
        end_of_line = True
        max_level = 0
        min_odd_level = 999
        for bidi_char in reversed(self.characters):
            # Rule L1. Reset the embedding level of segment separators, paragraph separators,
            # and any adjacent whitespace.
            if bidi_char.original_bidi_class in ("S", "B"):
                bidi_char.embedding_level = self.base_embedding_level
                before_separator = True
            elif bidi_char.original_bidi_class in (
                "BN",
                "WS",
                "FSI",
                "LRI",
                "RLI",
                "PDI",
            ):
                if before_separator or end_of_line:
                    bidi_char.embedding_level = self.base_embedding_level
            else:
                before_separator = False
                end_of_line = False

            if bidi_char.embedding_level > max_level:
                max_level = bidi_char.embedding_level
            if (
                bidi_char.embedding_level % 2 != 0
                and bidi_char.embedding_level < min_odd_level
            ):
                min_odd_level = bidi_char.embedding_level

        # Rule L2. From the highest level found in the text to the lowest odd level on each line,
        # reverse any contiguous sequence of characters that are at that level or higher.
        reordered_paragraph = self.characters.copy()
        for level in range(max_level, min_odd_level - 1, -1):
            temp_results: list[BidiCharacter] = []
            rev: list[BidiCharacter] = []
            for bidi_char in reordered_paragraph:
                if bidi_char.embedding_level >= level:
                    rev.append(bidi_char)
                else:
                    if rev:
                        rev.reverse()
                        temp_results += rev
                        rev = []
                    temp_results.append(bidi_char)
            if rev:
                rev.reverse()
                temp_results += rev
            reordered_paragraph = temp_results
        return tuple(reordered_paragraph)