# (C) Copyright 2025- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation nor
# does it submit to any jurisdiction.

import logging
from collections.abc import Collection, Generator
from pathlib import Path
from typing import Any

from pyfdb._internal import (
    _FDB,
    Config,
    ConfigMapper,
    FDBToolRequest,
    init_bindings,
)
from pyfdb.pyfdb_iterator import (
    ControlElement,
    IndexAxis,
    ListElement,
    PurgeElement,
    StatsElement,
    StatusElement,
    WipeElement,
)
from pyfdb.pyfdb_type import (
    ControlAction,
    ControlIdentifier,
    DataHandle,
    MarsIdentifier,
    MarsSelection,
    UserInputMapper,
)


class FDB:
    def __init__(
        self,
        config: str | dict | Path | None = None,
        user_config: str | dict | Path | None = None,
    ) -> None:
        """
        Constructor for FDB object.

        Parameters
        ----------
        `config`: `str` | `dict` | `Path` | `None`, *optional*
            Config object for setting up the FDB. See Notes.
        `user_config`: `str` | `dict` | `Path` | `None`, *optional*
            Config object for setting up user specific options, e.g., enabling sub-TOCs. See Notes.

        Returns
        -------
        :returns: FDB object

        Note
        ----
        Every config parameter but is converted accordingly depending on its type:
            - `str` is used as a yaml representation to parse the config
            - `dict` is interpreted as hierarchical format to represent a config, see example
            - `Path` is interpreted as a location of the config and read as a YAML file
            - `None` is the fallback. The default config in `$FDB_HOME` is loaded

        Using a single PyFDB instance per individual threads is safe. Sharing the instances across threads isn't supported.
        However, the underlying FDB and its methods are thread-safe; the caller needs to be aware that flush acts on all archive calls,
        including archived messages from other threads. A call to flush will persist all archived messages regardless
        from which thread the message has been archived. In case the caller wants a finer control it is advised to
        instantiate one FDB object per thread to ensure only messages are flushed that have been archived on the same FDB
        object.

        Examples
        --------
        >>> fdb = pyfdb.FDB(fdb_config_path)
        >>> config = {
        ...     "type":"local",
        ...     "engine":"toc",
        ...     "schema":"<schema_path>",
        ...     "spaces":[
        ...         {
        ...             "handler":"Default",
        ...             "roots":[
        ...                 {"path": "<db_store_path>"},
        ...             ],
        ...         }
        ...     ],
        ... }
        >>> fdb = pyfdb.FDB(config)

        Or leveraging the context manager:

        >>> with pyfdb.FDB(fdb_config_path) as fdb:
        ...     # Call methods of fdb
        ...     pass
        """

        init_bindings()
        self.logger = logging.getLogger(__name__ + ".FDB")

        # Convert to JSON if set
        config = ConfigMapper.to_json(config)
        user_config = ConfigMapper.to_json(user_config)

        if config is not None and user_config is not None:
            internal_config = Config(config, user_config)
            self.FDB = _FDB(internal_config)
        elif config is not None:
            internal_config = Config(config, None)
            self.FDB = _FDB(internal_config)
        else:
            self.FDB = _FDB()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.flush()

    def archive(self, data: bytes, identifier: MarsIdentifier | None = None):
        """
        Archive binary data into the underlying FDB.
        In case an identifier is supplied, that identifier is used to archive the data.
        *No consistency checks are applied. The caller needs to ensure the provided identifier matches metadata present in data.*

        If no identifier is supplied, `data` is interpreted as GRIB data and the metadata is taken from
        the GRIB messages.

        In any case, the supplied or derived metadata needs to provide values for all required keys of the FDB schema.

        Parameters
        ----------
        `data`: `bytes`
            The binary data to be archived. If no key is provided this is interpreted by `eccodes` and may contain multiple GRIB messages.
        `identifier` : `Identifier` | None, optional
            A unique identifier for the archived data.
            - If provided, the data will be stored under this identifier.
            - If None, the data will be archived without an explicit identifier, metadata has to be derivable from the data, which is interpreted as GRIB data.

        Note
        ----
        Sometimes an identifier is also referred to as a Key.


        Returns
        -------
        None

        Examples
        --------
        >>> fdb = pyfdb.FDB()
        >>> filename = data_path / "x138-300.grib"
        >>> fdb.archive(data=filename.read_bytes()) # Archive
        >>> fdb.archive(identifier=Identifier([("key-1", "value-1")]), data=filename.read_bytes())
        >>> fdb.flush() # Sync the archive call
        """
        if identifier is None:
            self.FDB.archive(data, len(data))
        else:
            mapped_identifier = UserInputMapper.map_identifier_to_internal(identifier)
            self.FDB.archive(mapped_identifier, data, len(data))

    def flush(self):
        """
        Flush all buffers and close all data handles of the underlying FDB into a consistent DB state.
        *Always safe to call*

        Parameters
        ----------
        None

        Returns
        -------
        None

        Examples
        --------
        >>> fdb = pyfdb.FDB()
        >>> filename = data_path / "x138-300.grib"
        >>> fdb.archive(bytes=filename.read_bytes()) # Archive
        >>> fdb.flush() # Data is synced
        """
        self.FDB.flush()

    def retrieve(self, mars_selection: MarsSelection) -> DataHandle:
        """
        Retrieve data which is specified by a MARS selection.

        Parameters
        ----------
        `mars_selection`
            MARS selection which describes the data which should be retrieved

        Note
        ----
        The returned data handle doesn't guarantee the order of the GRIB messages.

        Returns
        -------
        DataHandle
            A data handle which contains unordered GRIB messages and can be read like a `BytesLike` object.

        Examples
        --------
        >>> mars_selection = {"key-1": "value-1", ...}
        >>> data_handle = pyfdb.retrieve(mars_selection)
        >>> data_handle.open()
        >>> data_handle.read(4)
        >>> data_handle.close()

        Or leveraging the context manager:

        >>> with pyfdb.retrieve(selection) as data_handle:
        >>>     assert data_handle
        >>>     assert data_handle.read(4) == b"GRIB"
        """
        if len(mars_selection) == 0:
            raise TypeError("FDB.retrieve: Wildcard selection aren't support for retrieving.")

        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        return DataHandle(self.FDB.retrieve(internal_mars_selection), _internal=True)

    def list(
        self,
        mars_selection: MarsSelection,
        include_masked: bool = False,
        level: int = 3,
    ) -> Generator[ListElement, None, None]:
        """
        List data present at the underlying fdb archive and which can be retrieved.

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            A MARS selection which describes the data which can be listed. If `None` is given, all data will be listed.
        `include_masked` : bool, *optional*
            If True, the returned iterator lists masked data, if False the elements are unique.
        `level` : int [1-3], *optional*
            Specifies the FDB schema level of the elements which are matching the selection.
            A level of 1 means return a level 1 key (of the FDB schema) which is matching the MARS selection.

        Returns
        -------
        Generator[ListElement, None, None]
            A generator for `ListElement` describing FDB entries containing data of the MARS selection

        Note
        ----
        *this call lists masked elements if `include_masked` is `True`.*

        Examples
        --------
        >>> selection = {
        >>>     "type": "an",
        >>>     "class": "ea",
        >>>     "domain": "g",
        >>>     "expver": "0001",
        >>>     "stream": "oper",
        >>>     "date": "20200101",
        >>>     "levtype": "sfc",
        >>>     "step": "0",
        >>>     "time": "1800",
        >>> }
        >>> list_iterator = pyfdb.list(selection) # level == 3
        >>> elements = list(list_iterator)
        >>> print(elements[0])

        {class=ea,expver=0001,stream=oper,date=20200101,time=1800,domain=g}
        {type=an,levtype=sfc}
        {step=0,param=131},
        tocfieldlocation[uri=uri[scheme=file,name=<location>],offset=10732,length=10732,remapkey={}],
        length=10732,
        timestamp=176253515

        >>> list_iterator = pyfdb.list(selection, level=2)
        >>> elements = list(list_iterator)
        >>> print(elements[0])

        {class=ea,expver=0001,stream=oper,date=20200101,time=1800,domain=g}
        {type=an,levtype=sfc},
        length=0,
        timestamp=0

        >>> list_iterator = pyfdb.list(selection, level=1)
        >>> elements = list(list_iterator)
        >>> print(elements[0])

        {class=ea,expver=0001,stream=oper,date=20200101,time=1800,domain=g},
        length=0,
        timestamp=0
        """

        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        fdb_tool_request = FDBToolRequest.from_internal_mars_selection(internal_mars_selection)
        iterator = self.FDB.list(fdb_tool_request.tool_request, not include_masked, level)
        for list_element in iterator:
            try:
                yield ListElement(list_element, _internal=True)
            except StopIteration:
                return

    def inspect(self, mars_selection: MarsSelection) -> Generator[ListElement, None, None]:
        """
        Inspects the content of the underlying FDB and returns a generator of list elements
        describing which field was part of the MARS selection.

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            An MARS selection for which the inspect should be executed

        Returns
        -------
        Generator[ListElement, None, None]
            A generator for `ListElement` describing FDB entries containing data of the MARS selection


        Examples
        --------
        >>> selection = {
        >>>         "type": "an",
        >>>         "class": "ea",
        >>>         "domain": "g",
        >>>         "expver": "0001",
        >>>         "stream": "oper",
        >>>         "date": "20200101",
        >>>         "levtype": "sfc",
        >>>         "step": "0",
        >>>         "param": "167",
        >>>         "time": "1800",
        >>>     }
        >>> list_iterator = pyfdb.inspect(selection)
        >>> elements = list(list_iterator) # single element in iterator
        >>> elements[0]
        {class=ea,expver=0001,stream=oper,date=20200101,time=1800,domain=g}
        {type=an,levtype=sfc}
        {param=167,step=0},
        TocFieldLocation[
            uri=URI[scheme=<location>],
            offset=0,
            length=10732,
            remapKey={}
        ],
        length=10732,
        timestamp=1762537447
        """
        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        iterator = self.FDB.inspect(internal_mars_selection)
        for inspect_element in iterator:
            try:
                yield ListElement(inspect_element, _internal=True)
            except StopIteration:
                return

    def status(self, mars_selection: MarsSelection) -> Generator[StatusElement, None, None]:
        """
        List the status of all FDB entries with their control identifiers, e.g., whether a certain
        database was locked for retrieval.

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            An MARS selection which specifies the queried data

        Returns
        -------
        Generator[StatusElement, None, None]
            A generator for `StatusElement` describing FDB entries and their control identifier


        Examples
        --------
        >>> selection = {
        >>>         "type": "an",
        >>>         "class": "ea",
        >>>         "domain": "g",
        >>>     },
        >>> )
        >>> status_iterator = pyfdb.status(selection)
        >>> elements = list(status_iterator)
        >>> elements[0]
        StatusElement(
            control_identifiers=[],
            key={
                'class': ['ea'],
                'type': ['an'],
                'date': ['20200104'],
                'domain': ['g'],
                'expver': ['0001'],
                'stream': ['oper'],
                'time': ['2100']
                },
            location=/<path_to_root>/ea:0001:oper:20200104:2100:g
        )
        """
        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        fdb_tool_request = FDBToolRequest.from_internal_mars_selection(internal_mars_selection)
        iterator = self.FDB.status(fdb_tool_request.tool_request)
        while True:
            try:
                yield StatusElement(next(iterator), _internal=True)
            except StopIteration:
                return

    def wipe(
        self,
        mars_selection: MarsSelection,
        doit: bool = False,
        porcelain: bool = False,
        unsafe_wipe_all: bool = False,
    ) -> Generator[WipeElement, None, None]:
        """
        Wipe data from the database.

        Delete FDB databases and the data therein contained. Use the passed
        selection to identify the database to delete. This is equivalent to a UNIX rm command.
        This function deletes either whole databases, or whole indexes within databases

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            An MARS selection which specifies the affected data
        `doit` : `bool`, *optional*
            If true the wipe command is executed, per default there are only dry-run
        `porcelain` : `bool`, *optional*
            Restricts the output to the wiped files
        `unsafe_wipe_all` : `bool`, *optional*
            Flag for disabling all security checks and force a wipe

        Returns
        -------
        Generator[WipeElement, None, None]
            A generator for `WipeElement`

        Note
        ----
        Wipe elements are not directly corresponding to the wiped files. This can be a cause for confusion.
        The individual wipe elements strings of the wipe output.

        Examples
        --------
        >>> fdb = pyfdb.FDB(fdb_config_path)
        >>> wipe_iterator = fdb.wipe({"class": "ea"})
        >>> wiped_elements = list(wipe_iterator)
        ...
        Toc files to delete:
        <path_to_database>/toc
        ...
        """
        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        fdb_tool_request = FDBToolRequest.from_internal_mars_selection(internal_mars_selection)
        iterator = self.FDB.wipe(fdb_tool_request.tool_request, doit, porcelain, unsafe_wipe_all)
        for wipe_element in iterator:
            try:
                yield WipeElement(wipe_element, _internal=True)
            except StopIteration:
                return

    def purge(
        self,
        mars_selection: MarsSelection,
        doit: bool = False,
        porcelain: bool = False,
    ) -> Generator[PurgeElement, None, None]:
        """
        Remove duplicate data from the database.

        Purge duplicate entries from the database and remove the associated data if the data is owned and not adopted.
        Data in the FDB5 is immutable. It is masked, but not removed, when overwritten with new data using the same key.
        Masked data can no longer be accessed. Indexes and data files that only contains masked data may be removed.

        If an index refers to data that is not owned by the FDB (in particular data which has been adopted from an
        existing FDB5), this data will not be removed.

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            A MARS selection which describes the data which is purged.
        `doit` : `bool`, *optional*
            If true the wipe command is executed, per default there are only dry-run
        `porcelain` : `bool`, *optional*
            Restricts the output to the wiped files

        Returns
        -------
        Generator[PurgeElement, None, None]
            A generator for `PurgeElement`

        Examples
        --------
        >>> fdb = pyfdb.FDB(fdb_config_path)
        >>> purge_iterator = fdb.purge({"class": "ea"}), doit=True)
        >>> purged_elements = list(purge_iterator)
        >>> print(purged_elements[0])
        {class=ea,expver=0001,stream=oper,date=20200104,time=1800,domain=g}
        {type=an,levtype=sfc}
        {step=0,param=167},
        TocFieldLocation[
            uri=URI[
                scheme=file,
                name=<location>
            ],
            offset=32196,
            length=10732,
            remapKey={}
        ],
        length=10732,
        timestamp=176253976
        """
        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        fdb_tool_request = FDBToolRequest.from_internal_mars_selection(internal_mars_selection)
        iterator = self.FDB.purge(fdb_tool_request.tool_request, doit, porcelain)
        for purge_element in iterator:
            try:
                yield PurgeElement(purge_element, _internal=True)
            except StopIteration:
                return

    def stats(self, mars_selection: MarsSelection) -> Generator[StatsElement, None, None]:
        """
        Print information about FDB databases, aggregating the
        information over all the databases visited into a final summary.

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            A MARS selection which specifies the affected data.

        Returns
        -------
        Generator[StatsElement, None, None]
            A generator for `StatsElement`

        Examples
        --------
        >>> fdb = pyfdb.FDB(fdb_config_path)
        >>> stats_iterator = fdb.stats(selection)
        >>> for el list(stats_iterator):
        >>>     print(el)
        Index Statistics:
        Fields                          : 3
        Size of fields                  : 32,196 (31.4414 Kbytes)
        Reacheable fields               : 3
        Reachable size                  : 32,196 (31.4414 Kbytes)

        DB Statistics:
        Databases                       : 1
        TOC records                     : 2
        Size of TOC files               : 2,048 (2 Kbytes)
        Size of schemas files           : 228 (228 bytes)
        TOC records                     : 2
        Owned data files                : 1
        Size of owned data files        : 32,196 (31.4414 Kbytes)
        Index files                     : 1
        Size of index files             : 131,072 (128 Kbytes)
        Size of TOC files               : 2,048 (2 Kbytes)
        Total owned size                : 165,544 (161.664 Kbytes)
        Total size                      : 165,544 (161.664 Kbytes)
        """
        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        fdb_tool_request = FDBToolRequest.from_internal_mars_selection(internal_mars_selection)
        iterator = self.FDB.stats(fdb_tool_request.tool_request)
        for stats_element in iterator:
            try:
                yield StatsElement(stats_element, _internal=True)
            except StopIteration:
                return

    def control(
        self,
        mars_selection: MarsSelection,
        control_action: ControlAction,
        control_identifiers: Collection[ControlIdentifier],
    ) -> Generator[ControlElement, None, None]:
        """
        Enable certain features of FDB databases, e.g., disables or enables retrieving, list, etc.

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            A MARS selection which specifies the affected data.
        `control_action` : `ControlAction`
            Which action should be modified, e.g., ControlAction.RETRIEVE
        `control_identifiers` : `list[ControlIdentifier]`
            Should an action be enabled or disabled, e.g., ControlIdentifier.ENABLE or ControlIdentifier.DISABLE

        Returns
        -------
        Generator[ControlElement, None, None]
            A generator for `ControlElement`

        Note
        ----
        Disabling of an ControlAction, e.g., ControlAction.RETRIEVE leads to the creation
        of a `retrieve.lock` in the corresponding FDB database. This is true for all actions.
        The file is removed after the Action has been disabled.

        **It's important to consume the iterator, otherwise the lock file isn't deleted which
        can cause unexpected behavior. Also, due to internal reuse of databases, create a new FDB
        object before relying on the newly set control_identifier, to propagate the status.**

        Examples
        --------
        >>> fdb = pyfdb.FDB(fdb_config_path)
        >>> selection = {
        >>>         "class": "ea",
        >>>         "domain": "g",
        >>>         "expver": "0001",
        >>>         "stream": "oper",
        >>>         "date": "20200101",
        >>>         "time": "1800",
        >>> }
        >>> control_iterator = fdb.control(
        >>>     selection,
        >>>     ControlAction.DISABLE,
        >>>     [ControlIdentifier.RETRIEVE],
        >>> )
        >>> elements = list(control_iterator)
        >>> print(elements[0])
        ControlElement(
            control_identifiers=[RETRIEVE],
            key={
                'class': ['ea'],
                'date': ['20200104'],
                'domain': ['g'],
                'expver': ['0001'],
                'stream': ['oper'],
                'time': ['2100']
                },
            location=/<path_to_root>/ea:0001:oper:20200104:2100:g
        )
        """
        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        fdb_tool_request = FDBToolRequest.from_internal_mars_selection(internal_mars_selection)
        raw_control_identifiers = [control_identifier._to_raw() for control_identifier in control_identifiers]
        iterator = self.FDB.control(
            fdb_tool_request.tool_request,
            control_action._to_raw(),
            raw_control_identifiers,
        )
        for control_element in iterator:
            try:
                yield ControlElement(control_element, _internal=True)
            except StopIteration:
                return

    def axes(self, mars_selection: MarsSelection, level: int = 3) -> IndexAxis:
        """
        Return the 'axes' and their extent of a MARS selection for a given level of the schema in
        an IndexAxis object.

        If a key isn't specified the entire extent (all values) are returned.

        Parameters
        ----------
        `mars_selection` : `MarsSelection`
            A MARS selection which specifies the affected data.
        `level` : int [1-3], *optional*
            Level of the FDB Schema. Only keys of the given level are returned.

        Returns
        -------
        IndexAxis
            A map containing Key-Value pairs of the axes and their extent

        Examples
        --------
        >>> fdb = pyfdb.FDB(fdb_config_path)
        >>> selection = {
        ...         "type": "an",
        ...         "class": "ea",
        ...         "domain": "g",
        ...         "expver": "0001",
        ...         "stream": "oper",
        ...         "levtype": "sfc",
        ...         "step": "0",
        ...         "time": "1800",
        ... }
        >>> index_axis: IndexAxis = fdb.axes(selection) # level == 3
        >>> for k, v in index_axis.items():
        ...     print(f"k={k} \t| v={v}")
        k=class    | v=['ea']
        k=date     | v=['20200101', '20200102', '20200103', '20200104']
        k=domain   | v=['g']
        k=expver   | v=['0001']
        k=levelist | v=['']
        k=levtype  | v=['sfc']
        k=param    | v=['131', '132', '167']
        k=step     | v=['0']
        k=stream   | v=['oper']
        k=time     | v=['1800']
        k=type     | v=['an']
        """
        internal_mars_selection = UserInputMapper.map_selection_to_internal(mars_selection)
        fdb_tool_request = FDBToolRequest.from_internal_mars_selection(internal_mars_selection)
        return IndexAxis(self.FDB.axes(fdb_tool_request.tool_request, level))

    def enabled(self, control_identifier: ControlIdentifier) -> bool:
        """
        Check whether a specific control identifier is enabled

        Parameters
        ----------
        `control_identifier` : `ControlIdentifier`
            A given control identifier

        Returns
        -------
        `bool`
            `True` if the given control identifier is set, `False` otherwise.

        Examples
        --------
        >>> fdb_config = yaml.safe_load(fdb_config_path)
        >>> fdb_config["writable"] = False
        >>> fdb = pyfdb.FDB(fdb_config)
        >>> fdb.enabled(ControlIdentifier.NONE) # == True
        >>> fdb.enabled(ControlIdentifier.LIST) # == True
        >>> fdb.enabled(ControlIdentifier.RETRIEVE) # == True
        >>> fdb.enabled(ControlIdentifier.ARCHIVE) # == False, default True
        >>> fdb.enabled(ControlIdentifier.WIPE) # == False, default True
        >>> fdb.enabled(ControlIdentifier.UNIQUEROOT) # == True

        """
        return self.FDB.enabled(control_identifier._to_raw())

    def dirty(self):
        """
        Return whether a flush of the FDB is needed, for example if data was archived since the last flush.

        Parameters
        ----------
        None

        Returns
        -------
        `bool`
            `True` if an archive happened and a flush is needed, `False` otherwise.


        Examples
        --------
        >>> fdb = FDB(fdb_config_file)
        >>> filename = <data_path>
        >>> fdb.archive(open(filename, "rb").read())
        >>> fdb.dirty()                         # == True
        >>> fdb.flush()
        >>> fdb.dirty()                         # == False

        """
        return self.FDB.dirty()

    def config(self) -> tuple[dict[str, Any], dict[str, Any]]:
        """
        Return the system and user configuration of the underlying FDB.

        Parameters
        ----------
        None

        Returns
        -------
        `tuple[dict[str, Any], dict[str, Any]]`
            Python dictionaries describing the system and user configuration


        Examples
        --------
        >>> fdb = FDB(config_file)
        >>> system_config, user_config = fdb.config()
        >>> print(system_config)
        >>> print(user_config)

        """
        system_config = ConfigMapper.from_json(self.FDB.config().json())
        user_config = ConfigMapper.from_json(self.FDB.config().userConfig().json())

        return system_config, user_config

    def __repr__(self) -> str:
        return repr(self.FDB)
