"""Common routines for the Python zchunk tests."""

from __future__ import annotations

import argparse
import dataclasses
import functools
import os
import subprocess  # noqa: S404
import sys
import typing

import pyparsing as pyp

from pychunk import defs


if typing.TYPE_CHECKING:
    import pathlib
    from typing import Final


@dataclasses.dataclass(frozen=True)
class Config:
    """Common runtime configuration settings."""

    bindir: pathlib.Path
    env: dict[str, str]

    orig: pathlib.Path
    compressed: pathlib.Path


@dataclasses.dataclass(frozen=True)
class Chunk:
    """A single chunk descriptor."""

    cstart: int
    start: int
    csize: int
    size: int
    cend: int
    end: int


def get_runenv() -> dict[str, str]:
    """Set up the environment for running the zchunk programs."""
    env: Final = dict(os.environ)
    env["LC_ALL"] = "C.UTF-8"
    env["LANGUAGE"] = ""
    return env


def base_parser(prog: str) -> argparse.ArgumentParser:
    """Create a parser with the common options."""
    parser: Final = argparse.ArgumentParser(prog=prog)
    parser.add_argument(
        "-d",
        "--bindir",
        type=str,
        required=True,
        help="path to the directory containing the zchunk tools",
    )
    parser.add_argument(
        "-f",
        "--filename",
        type=str,
        required=True,
        help="path to the filename to compress",
    )

    return parser


def do_compress(cfg: Config, orig_size: int) -> int:
    """Compress the original file."""
    print(f"About to compress {cfg.orig} to {cfg.compressed}")
    if cfg.compressed.exists():
        sys.exit(f"Did not expect {cfg.compressed} to exist")
    subprocess.check_call(
        [cfg.bindir / "zck", "-o", cfg.compressed, "--", cfg.orig],
        shell=False,
        env=cfg.env,
    )
    if not cfg.compressed.is_file():
        sys.exit(f"zck did not create the {cfg.compressed} file")
    comp_size: Final = cfg.compressed.stat().st_size
    print(f"{cfg.compressed} size is {comp_size} bytes long")
    if comp_size >= orig_size:
        sys.exit(f"sizeof({cfg.compressed}) == {comp_size} : sizeof({cfg.orig}) == {orig_size}")
    start: Final = cfg.compressed.open(mode="rb").read(5)
    print(f"{cfg.compressed} starts with {start!r}")
    if start != defs.MAGIC:
        sys.exit(f"{cfg.compressed} does not start with {defs.MAGIC!r}: {start!r}")

    return comp_size


@dataclasses.dataclass(frozen=True)
class PChunk:
    """A description of a single chunk."""

    idx: int
    cksum: str
    start: int
    comp_size: int
    size: int


@dataclasses.dataclass(frozen=True)
class PChunks:
    """All the parsed chunks, still in `PChunk` format."""

    chunks: list[PChunk]


_p_ws = pyp.Char(" \t\f\b")[...].suppress()
"""Skip whitespace within a line."""

_p_total_size = (
    pyp.Literal("Data size:").suppress()
    + _p_ws.suppress()
    + pyp.common.integer("data_size")
    + _p_ws.suppress()
    + pyp.Char("\n").suppress()
)
"""Match the header line specifying the total size of the compressed data."""

_p_chunk_count = (
    pyp.Literal("Chunk count:").suppress()
    + _p_ws.suppress()
    + pyp.common.integer("count")
    + _p_ws.suppress()
    + pyp.Char("\n").suppress()
)
"""Match the header line specifying the number of chunks."""

_p_chunks_header = (
    _p_ws.suppress()
    + pyp.Literal("Chunk")
    + _p_ws.suppress()
    + pyp.Literal("Checksum")
    + _p_ws.suppress()
    + pyp.Literal("Start")
    + _p_ws.suppress()
    + pyp.Literal("Comp size")
    + _p_ws.suppress()
    + pyp.Literal("Size")
    + _p_ws.suppress()
    + pyp.Char("\n").suppress()
)
"""Match the header line of the chunks table itself."""

_p_chunk: Final[pyp.ParserElement] = (
    _p_ws.suppress()
    + pyp.common.integer("idx")
    + _p_ws.suppress()
    + pyp.Word("0123456789abcdef")("cksum")
    + _p_ws.suppress()
    + pyp.common.integer("start")
    + _p_ws.suppress()
    + pyp.common.integer("comp_size")
    + _p_ws.suppress()
    + pyp.common.integer("size")
    + _p_ws.suppress()
    + pyp.Char("\n").suppress()
)
"""Match a single chunk line within the chunks table."""


@_p_chunk.set_parse_action
def _parse_chunk(tokens: pyp.ParseResults) -> PChunk:
    """Parse a single chunk description line."""
    return PChunk(
        idx=tokens["idx"],
        cksum=tokens["cksum"],
        start=tokens["start"],
        comp_size=tokens["comp_size"],
        size=tokens["size"],
    )


_p_chunks: Final[pyp.ParserElement] = _p_chunk()[1, ...]
"""Match all the chunks in the chunks table."""


@_p_chunks.set_parse_action
def _parse_chunks(tokens: pyp.ParseResults) -> PChunks:
    """Parse all the chunks."""
    chunks: Final[list[PChunk]] = tokens.as_list()
    weird: Final = [elem for elem in chunks if not isinstance(elem, PChunk)]
    if weird:
        raise ValueError(repr(weird))
    return PChunks(chunks=chunks)


_p_all_chunks: Final[pyp.ParserElement] = (
    ...
    + _p_total_size("total_size")
    + ...
    + _p_chunk_count("chunk_count")
    + ...
    + _p_chunks_header.suppress()
    + _p_chunks("chunks")
)
"""Match all the chunks along with the data from the header."""


@_p_all_chunks.set_parse_action
def _parse_all_chunks(tokens: pyp.ParseResults) -> list[Chunk]:
    """Parse and validate all the chunks."""
    total_size: Final[int] = tokens["total_size"].as_list()[0]
    chunk_count: Final[int] = tokens["chunk_count"].as_list()[0]
    chunks: Final[PChunks] = tokens["chunks"]
    if len(chunks.chunks) != chunk_count:
        raise ValueError(repr((chunks.chunks, chunk_count)))

    # The first fake chunk should always represent the header, right?
    hdr_chunk: Final = chunks.chunks[0]
    if (
        hdr_chunk.start < 1
        or hdr_chunk.size != 0
        or hdr_chunk.comp_size != 0
        or any(char != "0" for char in hdr_chunk.cksum)
    ):
        raise ValueError(repr(hdr_chunk))

    def single(acc: list[Chunk], chunk: PChunk) -> list[Chunk]:
        """Validate and process a single parsed chunk."""
        last_chunk: Final = acc[-1]
        if chunk.start != last_chunk.cend:
            raise ValueError(repr((acc, chunk)))
        acc.append(
            Chunk(
                cstart=chunk.start,
                start=last_chunk.end,
                csize=chunk.comp_size,
                size=chunk.size,
                cend=last_chunk.cend + chunk.comp_size,
                end=last_chunk.end + chunk.size,
            ),
        )
        return acc

    res: Final = functools.reduce(
        single,
        chunks.chunks[1:],
        [
            Chunk(
                cstart=0,
                start=0,
                csize=0,
                size=0,
                cend=hdr_chunk.start,
                end=0,
            ),
        ],
    )
    if len(res) != chunk_count or res[-1].cend != hdr_chunk.start + total_size:
        raise ValueError(repr((chunk_count, total_size, chunks, res)))
    return res


_p_all_chunks_complete: Final = _p_all_chunks.leave_whitespace()
"""Parse the full output of the `zck_read_header` utility."""


def read_chunks(cfg: Config, orig_size: int, comp_size: int) -> Chunk:
    """Parse the chunks of the compressed file."""
    output: Final = subprocess.check_output(
        [cfg.bindir / "zck_read_header", "-c", "--", cfg.compressed],
        encoding="UTF-8",
        env=cfg.env,
    )
    res: Final[list[Chunk]] = _p_all_chunks_complete.parse_string(output, parse_all=True).as_list()
    if not all(isinstance(elem, Chunk) for elem in res):
        raise ValueError(repr(res))

    if res[-1].end != orig_size or res[-1].cend != comp_size:
        raise ValueError(repr(res))

    try:
        hdr_chunk, first_chunk, second_chunk = res[:3]
    except ValueError as err:
        raise ValueError(repr(res)) from err
    if (
        hdr_chunk.size != 0  # noqa: PLR0916  # we do need to check all of those
        or hdr_chunk.end != 0
        or first_chunk.start != hdr_chunk.end
        or first_chunk.size == 0
        or first_chunk.end == 0
        or second_chunk.start != first_chunk.end
    ):
        raise ValueError(repr(res))

    return second_chunk
