File: bidi.py

package info (click to toggle)
fpdf2 2.8.7-2
links: PTS, VCS
area: main
in suites: forky, sid
size: 114,352 kB
sloc: python: 50,410; sh: 133; makefile: 12
file content (811 lines) | stat: -rw-r--r-- 32,784 bytes
# This is an implementation of the Unicode Standard Annex #9
# Unicode bidirectional algorithm - Revision 48 for Unicode 15.1.0
# https://unicode.org/reports/tr9/

import unicodedata
from collections import deque
from dataclasses import dataclass, replace
from operator import itemgetter
from typing import Optional, TypedDict

from .enums import TextDirection

MAX_DEPTH: int = 125

# BidiBrackets 15.1.0 2023-01-18
# Loaded from https://www.unicode.org/Public/UNIDATA/BidiBrackets.txt
# This table can be dropped when the information is added on "unicodedata"
BIDI_BRACKETS: dict[str, dict[str, str]] = {
    "(": {"pair": ")", "type": "o"},
    ")": {"pair": "(", "type": "c"},
    "[": {"pair": "]", "type": "o"},
    "]": {"pair": "[", "type": "c"},
    "{": {"pair": "}", "type": "o"},
    "}": {"pair": "{", "type": "c"},
    "༺": {"pair": "༻", "type": "o"},
    "༻": {"pair": "༺", "type": "c"},
    "༼": {"pair": "༽", "type": "o"},
    "༽": {"pair": "༼", "type": "c"},
    "᚛": {"pair": "᚜", "type": "o"},
    "᚜": {"pair": "᚛", "type": "c"},
    "⁅": {"pair": "⁆", "type": "o"},
    "⁆": {"pair": "⁅", "type": "c"},
    "⁽": {"pair": "⁾", "type": "o"},
    "⁾": {"pair": "⁽", "type": "c"},
    "₍": {"pair": "₎", "type": "o"},
    "₎": {"pair": "₍", "type": "c"},
    "⌈": {"pair": "⌉", "type": "o"},
    "⌉": {"pair": "⌈", "type": "c"},
    "⌊": {"pair": "⌋", "type": "o"},
    "⌋": {"pair": "⌊", "type": "c"},
    "〈": {"pair": "〉", "type": "o"},
    "〉": {"pair": "〈", "type": "c"},
    "❨": {"pair": "❩", "type": "o"},
    "❩": {"pair": "❨", "type": "c"},
    "❪": {"pair": "❫", "type": "o"},
    "❫": {"pair": "❪", "type": "c"},
    "❬": {"pair": "❭", "type": "o"},
    "❭": {"pair": "❬", "type": "c"},
    "❮": {"pair": "❯", "type": "o"},
    "❯": {"pair": "❮", "type": "c"},
    "❰": {"pair": "❱", "type": "o"},
    "❱": {"pair": "❰", "type": "c"},
    "❲": {"pair": "❳", "type": "o"},
    "❳": {"pair": "❲", "type": "c"},
    "❴": {"pair": "❵", "type": "o"},
    "❵": {"pair": "❴", "type": "c"},
    "⟅": {"pair": "⟆", "type": "o"},
    "⟆": {"pair": "⟅", "type": "c"},
    "⟦": {"pair": "⟧", "type": "o"},
    "⟧": {"pair": "⟦", "type": "c"},
    "⟨": {"pair": "⟩", "type": "o"},
    "⟩": {"pair": "⟨", "type": "c"},
    "⟪": {"pair": "⟫", "type": "o"},
    "⟫": {"pair": "⟪", "type": "c"},
    "⟬": {"pair": "⟭", "type": "o"},
    "⟭": {"pair": "⟬", "type": "c"},
    "⟮": {"pair": "⟯", "type": "o"},
    "⟯": {"pair": "⟮", "type": "c"},
    "⦃": {"pair": "⦄", "type": "o"},
    "⦄": {"pair": "⦃", "type": "c"},
    "⦅": {"pair": "⦆", "type": "o"},
    "⦆": {"pair": "⦅", "type": "c"},
    "⦇": {"pair": "⦈", "type": "o"},
    "⦈": {"pair": "⦇", "type": "c"},
    "⦉": {"pair": "⦊", "type": "o"},
    "⦊": {"pair": "⦉", "type": "c"},
    "⦋": {"pair": "⦌", "type": "o"},
    "⦌": {"pair": "⦋", "type": "c"},
    "⦍": {"pair": "⦐", "type": "o"},
    "⦎": {"pair": "⦏", "type": "c"},
    "⦏": {"pair": "⦎", "type": "o"},
    "⦐": {"pair": "⦍", "type": "c"},
    "⦑": {"pair": "⦒", "type": "o"},
    "⦒": {"pair": "⦑", "type": "c"},
    "⦓": {"pair": "⦔", "type": "o"},
    "⦔": {"pair": "⦓", "type": "c"},
    "⦕": {"pair": "⦖", "type": "o"},
    "⦖": {"pair": "⦕", "type": "c"},
    "⦗": {"pair": "⦘", "type": "o"},
    "⦘": {"pair": "⦗", "type": "c"},
    "⧘": {"pair": "⧙", "type": "o"},
    "⧙": {"pair": "⧘", "type": "c"},
    "⧚": {"pair": "⧛", "type": "o"},
    "⧛": {"pair": "⧚", "type": "c"},
    "⧼": {"pair": "⧽", "type": "o"},
    "⧽": {"pair": "⧼", "type": "c"},
    "⸢": {"pair": "⸣", "type": "o"},
    "⸣": {"pair": "⸢", "type": "c"},
    "⸤": {"pair": "⸥", "type": "o"},
    "⸥": {"pair": "⸤", "type": "c"},
    "⸦": {"pair": "⸧", "type": "o"},
    "⸧": {"pair": "⸦", "type": "c"},
    "⸨": {"pair": "⸩", "type": "o"},
    "⸩": {"pair": "⸨", "type": "c"},
    "⹕": {"pair": "⹖", "type": "o"},
    "⹖": {"pair": "⹕", "type": "c"},
    "⹗": {"pair": "⹘", "type": "o"},
    "⹘": {"pair": "⹗", "type": "c"},
    "⹙": {"pair": "⹚", "type": "o"},
    "⹚": {"pair": "⹙", "type": "c"},
    "⹛": {"pair": "⹜", "type": "o"},
    "⹜": {"pair": "⹛", "type": "c"},
    "〈": {"pair": "〉", "type": "o"},
    "〉": {"pair": "〈", "type": "c"},
    "《": {"pair": "》", "type": "o"},
    "》": {"pair": "《", "type": "c"},
    "「": {"pair": "」", "type": "o"},
    "」": {"pair": "「", "type": "c"},
    "『": {"pair": "』", "type": "o"},
    "』": {"pair": "『", "type": "c"},
    "【": {"pair": "】", "type": "o"},
    "】": {"pair": "【", "type": "c"},
    "〔": {"pair": "〕", "type": "o"},
    "〕": {"pair": "〔", "type": "c"},
    "〖": {"pair": "〗", "type": "o"},
    "〗": {"pair": "〖", "type": "c"},
    "〘": {"pair": "〙", "type": "o"},
    "〙": {"pair": "〘", "type": "c"},
    "〚": {"pair": "〛", "type": "o"},
    "〛": {"pair": "〚", "type": "c"},
    "﹙": {"pair": "﹚", "type": "o"},
    "﹚": {"pair": "﹙", "type": "c"},
    "﹛": {"pair": "﹜", "type": "o"},
    "﹜": {"pair": "﹛", "type": "c"},
    "﹝": {"pair": "﹞", "type": "o"},
    "﹞": {"pair": "﹝", "type": "c"},
    "（": {"pair": "）", "type": "o"},
    "）": {"pair": "（", "type": "c"},
    "［": {"pair": "］", "type": "o"},
    "］": {"pair": "［", "type": "c"},
    "｛": {"pair": "｝", "type": "o"},
    "｝": {"pair": "｛", "type": "c"},
    "｟": {"pair": "｠", "type": "o"},
    "｠": {"pair": "｟", "type": "c"},
    "｢": {"pair": "｣", "type": "o"},
    "｣": {"pair": "｢", "type": "c"},
}


class BidiCharacter:
    __slots__ = [
        "character_index",
        "character",
        "bidi_class",
        "original_bidi_class",
        "embedding_level",
        "direction",
    ]

    def __init__(
        self, character_index: int, character: str, embedding_level: int, debug: bool
    ) -> None:
        self.character_index = character_index
        self.character = character
        if debug and character.isupper():
            self.bidi_class = "R"
        else:
            self.bidi_class = unicodedata.bidirectional(character)
        self.original_bidi_class = self.bidi_class
        self.embedding_level = embedding_level
        self.direction = None

    def get_direction_from_level(self) -> str:
        return "R" if self.embedding_level % 2 else "L"

    def set_class(self, cls: str) -> None:
        self.bidi_class = cls

    def __repr__(self) -> str:
        return (
            f"character_index: {self.character_index} character: {self.character}"
            + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}"
            + f" embedding_level: {self.embedding_level} direction: {self.direction}"
        )


@dataclass
class DirectionalStatus:
    __slots__ = [
        "embedding_level",
        "directional_override_status",
        "directional_isolate_status",
    ]
    embedding_level: int  # between 0 and MAX_DEPTH
    directional_override_status: str  # "N" (Neutral), "L" (Left) or "R" (Right)
    directional_isolate_status: bool


class IsolatingRun:
    __slots__ = ["characters", "previous_direction", "next_direction"]

    def __init__(self, characters: list[BidiCharacter], sos: str, eos: str) -> None:
        self.characters = characters
        self.previous_direction = sos
        self.next_direction = eos
        self.resolve_weak_types()
        self.resolve_neutral_types()
        self.resolve_implicit_levels()

    def resolve_weak_types(self) -> None:
        # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral
        #     if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise.
        #     If the NSM is at the start of the isolating run sequence, it will get the type of sos.
        for i, bidi_char in enumerate(self.characters):
            if bidi_char.bidi_class == "NSM":
                if i == 0:
                    bidi_char.set_class(self.previous_direction)
                else:
                    bidi_char.set_class(
                        "ON"
                        if self.characters[i - 1].bidi_class
                        in ("LRI", "RLI", "FSI", "PDI")
                        else self.characters[i - 1].bidi_class
                    )

        # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found.
        #     If an AL is found, change the type of the European number to Arabic number.
        # W3. Change all ALs to R.

        last_strong_type = self.previous_direction
        for bidi_char in self.characters:
            if bidi_char.bidi_class in ("R", "L", "AL"):
                last_strong_type = bidi_char.bidi_class
            if bidi_char.bidi_class == "AL":
                bidi_char.set_class("R")
            if bidi_char.bidi_class == "EN" and last_strong_type == "AL":
                bidi_char.set_class("AN")

        # W4. A single European separator between two European numbers changes to a European number.
        #     A single common separator between two numbers of the same type changes to that type.
        for i, bidi_char in enumerate(self.characters):
            if i in (0, len(self.characters) - 1):
                continue
            if (
                bidi_char.bidi_class == "ES"
                and self.characters[i - 1].bidi_class == "EN"
                and self.characters[i + 1].bidi_class == "EN"
            ):
                bidi_char.set_class("EN")

            if (
                bidi_char.bidi_class == "CS"
                and self.characters[i - 1].bidi_class in ("AN", "EN")
                and self.characters[i + 1].bidi_class
                == self.characters[i - 1].bidi_class
            ):
                bidi_char.set_class(self.characters[i - 1].bidi_class)

        # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
        # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral.
        def prev_is_en(i: int) -> bool:
            if i == 0:
                return False
            if self.characters[i - 1].bidi_class == "ET":
                return prev_is_en(i - 1)
            return self.characters[i - 1].bidi_class == "EN"

        def next_is_en(i: int) -> bool:
            if i == len(self.characters) - 1:
                return False
            if self.characters[i + 1].bidi_class == "ET":
                return next_is_en(i + 1)
            return self.characters[i + 1].bidi_class == "EN"

        for i, bidi_char in enumerate(self.characters):
            if bidi_char.bidi_class == "ET":
                if prev_is_en(i) or next_is_en(i):
                    bidi_char.set_class("EN")

            if bidi_char.bidi_class in ("ET", "ES", "CS"):
                bidi_char.set_class("ON")
        # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found.
        #     If an L is found, then change the type of the European number to L.
        last_strong_type = self.previous_direction
        for bidi_char in self.characters:
            if bidi_char.bidi_class in ("R", "L", "AL"):
                last_strong_type = bidi_char.bidi_class
            if bidi_char.bidi_class == "EN" and last_strong_type == "L":
                bidi_char.set_class("L")

    def pair_brackets(self) -> list[tuple[int, int]]:
        """
        Calculate all the bracket pairs on an isolate run, to be used on rule N0
        How to calculate bracket pairs:
        - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14
        - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/
        """
        open_brackets: list[tuple[str, int]] = []
        open_bracket_count = 0
        bracket_pairs: list[tuple[int, int]] = []
        for index, char in enumerate(self.characters):
            if char.character in BIDI_BRACKETS and char.bidi_class == "ON":
                if BIDI_BRACKETS[char.character]["type"] == "o":
                    if open_bracket_count >= 63:
                        return []
                    open_brackets.append((char.character, index))
                    open_bracket_count += 1
                if BIDI_BRACKETS[char.character]["type"] == "c":
                    if open_bracket_count == 0:
                        continue
                    for current_open_bracket in range(open_bracket_count, 0, -1):
                        open_char, open_index = open_brackets[current_open_bracket - 1]
                        if (BIDI_BRACKETS[open_char]["pair"] == char.character) or (
                            BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉")
                            and char.character in ("〉", "〉")
                        ):
                            bracket_pairs.append((open_index, index))
                            open_brackets = open_brackets[: current_open_bracket - 1]
                            open_bracket_count = current_open_bracket - 1
                            break
        return sorted(bracket_pairs, key=itemgetter(0))

    def resolve_neutral_types(self) -> None:
        def previous_strong(index: int) -> str:
            if index == 0:
                return self.previous_direction
            if self.characters[index - 1].bidi_class == "L":
                return "L"
            if self.characters[index - 1].bidi_class in ("R", "AN", "EN"):
                return "R"
            return previous_strong(index - 1)

        def next_strong(index: int) -> str:
            if index >= len(self.characters) - 1:
                return self.next_direction
            if self.characters[index + 1].bidi_class == "L":
                return "L"
            if self.characters[index + 1].bidi_class in ("R", "AN", "EN"):
                return "R"
            return next_strong(index + 1)

        # N0-N2: Resolving neutral types
        # N0
        brackets = self.pair_brackets()
        if brackets:
            embedding_direction = self.characters[0].get_direction_from_level()
            for b in brackets:
                strong_same_direction = False
                strong_opposite_direction = False
                resulting_direction = None
                for index in range(b[0], b[1]):
                    if (
                        self.characters[index].bidi_class == "L"
                        and embedding_direction == "L"
                    ) or (
                        self.characters[index].bidi_class in ("R", "AN", "EN")
                        and embedding_direction == "R"
                    ):
                        strong_same_direction = True
                        break
                    if (
                        self.characters[index].bidi_class == "L"
                        and embedding_direction == "R"
                    ) or (
                        self.characters[index].bidi_class in ("R", "AN", "EN")
                        and embedding_direction == "L"
                    ):
                        strong_opposite_direction = True
                if strong_same_direction:
                    resulting_direction = embedding_direction
                elif strong_opposite_direction:
                    opposite_direction = "L" if embedding_direction == "R" else "R"
                    if previous_strong(b[0]) == opposite_direction:
                        resulting_direction = opposite_direction
                    else:
                        resulting_direction = embedding_direction
                if resulting_direction:
                    self.characters[b[0]].bidi_class = resulting_direction
                    self.characters[b[1]].bidi_class = resulting_direction
                    if len(self.characters) > b[1] + 1:
                        next_char = self.characters[b[1] + 1]
                        if (
                            next_char.original_bidi_class == "NSM"
                            and next_char.bidi_class == "ON"
                        ):
                            next_char.bidi_class = resulting_direction

        for i, bidi_char in enumerate(self.characters):
            # N1-N2
            if bidi_char.bidi_class in (
                "B",
                "S",
                "WS",
                "ON",
                "FSI",
                "LRI",
                "RLI",
                "PDI",
            ):
                if previous_strong(i) == next_strong(i):
                    bidi_char.bidi_class = previous_strong(i)
                else:
                    bidi_char.bidi_class = bidi_char.get_direction_from_level()

    def resolve_implicit_levels(self) -> None:
        for bidi_char in self.characters:
            # I1. For all characters with an even (left-to-right) embedding level,
            #     those of type R go up one level and those of type AN or EN go up two levels.
            if bidi_char.embedding_level % 2 == 0:
                if bidi_char.bidi_class == "R":
                    bidi_char.embedding_level += 1
                if bidi_char.bidi_class in ("AN", "EN"):
                    bidi_char.embedding_level += 2

            # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level.
            else:
                if bidi_char.bidi_class in ("L", "EN", "AN"):
                    bidi_char.embedding_level += 1


def auto_detect_base_direction(
    string: str, stop_at_pdi: bool = False, debug: bool = False
) -> TextDirection:
    """
    This function applies rules P2 and P3 to detect the direction of a paragraph, retuning
    the first strong direction and skipping over isolate sequences.
    P1 must be applied before calling this function (breaking into paragraphs)
    stop_at_pdi can be set to True to get the direction of a single isolate sequence
    """
    # Auto-LTR (standard BIDI) uses the first L/R/AL character, and is LTR if none is found.
    isolate = 0
    for char in string:
        bidi_class = unicodedata.bidirectional(char)
        if debug and bidi_class.isupper():
            bidi_class = "R"
        if bidi_class == "PDI" and isolate == 0 and stop_at_pdi:
            return TextDirection.LTR
        if bidi_class in ("LRI", "RLI", "FSI"):
            isolate += 1
        if bidi_class == "PDI" and isolate > 0:
            isolate -= 1
        if bidi_class in ("R", "AL") and isolate == 0:
            return TextDirection.RTL
        if bidi_class == "L" and isolate == 0:
            return TextDirection.LTR
    return TextDirection.LTR


def calculate_isolate_runs(paragraph: list[BidiCharacter]) -> list[IsolatingRun]:
    # BD13 and X10
    class LevelRun(TypedDict):
        level: int
        text: list[BidiCharacter]
        complete: bool
        sos: str
        eos: str

    level_run: list[LevelRun] = []
    lr: list[BidiCharacter] = []
    lr_embedding_level: int = paragraph[0].embedding_level

    for bidi_char in paragraph:
        if bidi_char.embedding_level != lr_embedding_level:
            level_run.append(
                {
                    "level": lr_embedding_level,
                    "text": lr,
                    "complete": False,
                    "sos": "",
                    "eos": "",
                }
            )
            lr = []
            lr_embedding_level = bidi_char.embedding_level
        lr.append(bidi_char)
    level_run.append(
        {
            "level": lr_embedding_level,
            "text": lr,
            "complete": False,
            "sos": "",
            "eos": "",
        }
    )

    def level_to_direction(level: int) -> str:
        if level % 2 == 0:
            return "L"
        return "R"

    # compute sos, eos for each level run
    for index, lr1 in enumerate(level_run):
        if lr1["complete"]:
            continue
        if index == 0:
            sos = level_to_direction(lr1["level"])
        else:
            sos = level_to_direction(max(lr1["level"], level_run[index - 1]["level"]))
        if index == len(level_run) - 1:
            eos = level_to_direction(lr1["level"])
        else:
            if lr1["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"):
                # X10 - last char is an isolator without matching PDI - set EOS to embedding level
                eos = level_to_direction(lr1["level"])
            else:
                eos = level_to_direction(
                    max(lr1["level"], level_run[index + 1]["level"])
                )
        lr1["sos"] = sos
        lr1["eos"] = eos

    # combine levels runs to create isolate runs
    isolate_runs: list[IsolatingRun] = []
    for index, lr2 in enumerate(level_run):
        if lr2["complete"]:
            continue
        sos = lr2["sos"]
        eos = lr2["eos"]
        ir_chars = lr2["text"]
        lr2["complete"] = True
        if lr2["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"):
            for nlr in level_run[index + 1 :]:
                if (
                    nlr["level"] == lr2["level"]
                    and nlr["text"][0].original_bidi_class == "PDI"
                ):
                    lr2["text"] += nlr["text"]
                    nlr["complete"] = True
                    eos = nlr["eos"]
                    if nlr["text"][-1].original_bidi_class not in ("LRI", "RLI", "FSI"):
                        break
        isolate_runs.append(IsolatingRun(characters=ir_chars, sos=sos, eos=eos))

    return isolate_runs


class BidiParagraph:
    __slots__ = (
        "text",
        "base_direction",
        "debug",
        "base_embedding_level",
        "characters",
    )

    def __init__(
        self,
        text: str,
        base_direction: Optional[TextDirection] = None,
        debug: bool = False,
    ) -> None:
        self.text = text
        self.base_direction = (
            auto_detect_base_direction(self.text, debug)
            if not base_direction
            else base_direction
        )
        self.debug = debug
        self.base_embedding_level = (
            0 if self.base_direction == TextDirection.LTR else 1
        )  # base level
        self.characters: list[BidiCharacter] = []
        self.get_bidi_characters()

    def get_characters(self) -> list[BidiCharacter]:
        return self.characters

    def get_characters_with_embedding_level(self) -> list[BidiCharacter]:
        # Calculate embedding level for each character after breaking isolating runs.
        # Only used on conformance testing
        self.reorder_resolved_levels()
        return self.characters

    def get_reordered_characters(self) -> tuple[BidiCharacter, ...]:
        return self.reorder_resolved_levels()

    def get_all(self) -> tuple[list[BidiCharacter], tuple[BidiCharacter, ...]]:
        return self.characters, self.reorder_resolved_levels()

    def get_reordered_string(self) -> str:
        "Used for conformance validation"
        return "".join(c.character for c in self.reorder_resolved_levels())

    def get_bidi_fragments(self) -> tuple[tuple[str, TextDirection], ...]:
        return self.split_bidi_fragments()

    def get_bidi_characters(self) -> None:
        # Explicit levels and directions. Rule X1

        stack: deque[DirectionalStatus] = deque()
        current_status = DirectionalStatus(
            embedding_level=self.base_embedding_level,
            directional_override_status="N",
            directional_isolate_status=False,
        )
        stack.append(replace(current_status))
        overflow_isolate_count = 0
        overflow_embedding_count = 0
        valid_isolate_count = 0
        results: list[BidiCharacter] = []

        # Explicit embeddings. Process each character individually applying rules X2 through X8
        for index, char in enumerate(self.text):
            bidi_char = BidiCharacter(
                index, char, current_status.embedding_level, self.debug
            )
            new_bidi_class = None

            if bidi_char.bidi_class == "FSI":
                bidi_char.bidi_class = (
                    "LRI"
                    if auto_detect_base_direction(
                        self.text[index + 1 :], stop_at_pdi=True, debug=self.debug
                    )
                    == TextDirection.LTR
                    else "RLI"
                )

            if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"):
                # X2 - X5: calculate explicit embeddings and explicit overrides
                if bidi_char.bidi_class[0] == "R":
                    new_embedding_level = (
                        current_status.embedding_level + 1
                    ) | 1  # least greater odd
                else:
                    new_embedding_level = (
                        current_status.embedding_level + 2
                    ) & ~1  # least greater even
                if (
                    bidi_char.bidi_class[2] == "I"
                    and current_status.directional_override_status != "N"
                ):
                    new_bidi_class = current_status.directional_override_status
                if (
                    new_embedding_level <= MAX_DEPTH
                    and overflow_isolate_count == 0
                    and overflow_embedding_count == 0
                ):
                    current_status.embedding_level = new_embedding_level
                    current_status.directional_override_status = (
                        bidi_char.bidi_class[0]
                        if bidi_char.bidi_class[2] == "O"
                        else "N"
                    )
                    if bidi_char.bidi_class[2] == "I":
                        valid_isolate_count += 1
                        current_status.directional_isolate_status = True
                    else:
                        current_status.directional_isolate_status = False
                    stack.append(replace(current_status))
                else:
                    if bidi_char.bidi_class[2] == "I":
                        overflow_isolate_count += 1
                    else:
                        if overflow_isolate_count == 0:
                            overflow_embedding_count += 1

            if bidi_char.bidi_class not in (
                "B",
                "BN",
                "RLE",
                "LRE",
                "RLO",
                "LRO",
                "PDF",
                "FSI",
                "PDI",
            ):  # X6
                if current_status.directional_override_status != "N":
                    new_bidi_class = current_status.directional_override_status

            if bidi_char.bidi_class == "PDI":  # X6a
                if overflow_isolate_count > 0:
                    overflow_isolate_count -= 1
                elif valid_isolate_count > 0:
                    overflow_embedding_count = 0
                    while True:
                        if not stack[-1].directional_isolate_status:
                            stack.pop()
                            continue
                        break
                    stack.pop()
                    current_status = replace(stack[-1])
                    valid_isolate_count -= 1
                assert isinstance(current_status, DirectionalStatus)
                bidi_char.embedding_level = current_status.embedding_level
                if current_status.directional_override_status != "N":
                    new_bidi_class = current_status.directional_override_status

            if bidi_char.bidi_class == "PDF":  # X7
                if overflow_isolate_count == 0:
                    if overflow_embedding_count > 0:
                        overflow_embedding_count -= 1
                    else:
                        if (
                            not current_status.directional_isolate_status
                            and len(stack) > 1
                        ):
                            stack.pop()
                            current_status = replace(stack[-1])

            if new_bidi_class:
                bidi_char.bidi_class = new_bidi_class
            if bidi_char.bidi_class not in (
                "RLE",
                "LRE",
                "RLO",
                "LRO",
                "PDF",
                "BN",
            ):  # X9
                if bidi_char.bidi_class == "B":
                    bidi_char.embedding_level = self.base_embedding_level
                elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"):
                    bidi_char.embedding_level = current_status.embedding_level
                results.append(bidi_char)

        if not results:
            self.characters = []
            return
        self.characters = results
        calculate_isolate_runs(results)

    def split_bidi_fragments(self) -> tuple[tuple[str, TextDirection], ...]:
        bidi_fragments: list[tuple[str, TextDirection]] = []
        if len(self.characters) == 0:
            return ()
        current_fragment = ""
        current_direction = ""
        for c in self.characters:
            if c.get_direction_from_level() != current_direction:
                if current_fragment:
                    bidi_fragments.append(
                        (
                            current_fragment,
                            (
                                TextDirection.RTL
                                if current_direction == "R"
                                else TextDirection.LTR
                            ),
                        )
                    )
                current_fragment = ""
                current_direction = c.get_direction_from_level()
            current_fragment += c.character
        if current_fragment:
            bidi_fragments.append(
                (
                    current_fragment,
                    (
                        TextDirection.RTL
                        if current_direction == "R"
                        else TextDirection.LTR
                    ),
                )
            )
        return tuple(bidi_fragments)

    def reorder_resolved_levels(self) -> tuple[BidiCharacter, ...]:
        before_separator = True
        end_of_line = True
        max_level = 0
        min_odd_level = 999
        for bidi_char in reversed(self.characters):
            # Rule L1. Reset the embedding level of segment separators, paragraph separators,
            # and any adjacent whitespace.
            if bidi_char.original_bidi_class in ("S", "B"):
                bidi_char.embedding_level = self.base_embedding_level
                before_separator = True
            elif bidi_char.original_bidi_class in (
                "BN",
                "WS",
                "FSI",
                "LRI",
                "RLI",
                "PDI",
            ):
                if before_separator or end_of_line:
                    bidi_char.embedding_level = self.base_embedding_level
            else:
                before_separator = False
                end_of_line = False

            if bidi_char.embedding_level > max_level:
                max_level = bidi_char.embedding_level
            if (
                bidi_char.embedding_level % 2 != 0
                and bidi_char.embedding_level < min_odd_level
            ):
                min_odd_level = bidi_char.embedding_level

        # Rule L2. From the highest level found in the text to the lowest odd level on each line,
        # reverse any contiguous sequence of characters that are at that level or higher.
        reordered_paragraph = self.characters.copy()
        for level in range(max_level, min_odd_level - 1, -1):
            temp_results: list[BidiCharacter] = []
            rev: list[BidiCharacter] = []
            for bidi_char in reordered_paragraph:
                if bidi_char.embedding_level >= level:
                    rev.append(bidi_char)
                else:
                    if rev:
                        rev.reverse()
                        temp_results += rev
                        rev = []
                    temp_results.append(bidi_char)
            if rev:
                rev.reverse()
                temp_results += rev
            reordered_paragraph = temp_results
        return tuple(reordered_paragraph)