# -*- coding: utf-8 -*-

# (c) Jérôme Laheurte 2015-2026
# See LICENSE.txt

import inspect
import re
import collections
import warnings

from ptk.utils import Singleton, callback_by_name, chars


LexerPosition = collections.namedtuple('_LexerPosition', ['column', 'line'])

# In Python 3 we'd use __prepare__ and an ordered dict...
_TOKREGISTER = []


class _LexerMeta(type):
    def __new__(mcs, name, bases, attrs):
        global _TOKREGISTER # pylint: disable=W0603
        try:
            attrs['__tokens__'] = (set(), []) # Set of token names, list of (rx, callback, defaultType)
            klass = super().__new__(mcs, name, bases, attrs)
            for func, rx, toktypes in _TOKREGISTER:
                klass.add_token_type(func.__name__, callback_by_name(func.__name__), rx, toktypes)
            return klass
        finally:
            _TOKREGISTER = []


def token(rx, types=None):
    """
    The method decorator for tokens
    """
    def _wrap(func):
        if any(func.__name__ == aFunc.__name__ and func != aFunc for aFunc, _, _ in _TOKREGISTER):
            raise TypeError(f'Duplicate token method name "{func.__name__}"')
        _TOKREGISTER.append((func, rx, types))
        return func
    return _wrap


class SkipToken(Exception):
    """
    Raise this from your consumer to ignore the token.
    """


class LexerError(Exception):
    """
    Unrecognized token in input

    :ivar lineno: Line in input
    :ivar colno: Column in input
    """
    def __init__(self, char, pos):
        super().__init__(f'Unrecognized token {repr(char)}')
        self.position = pos

    # Getters for compatibility with <1.3.8
    @property
    def colno(self):
        """
        Column
        """
        return self.position.column
    @property
    def lineno(self):
        """
        Line
        """
        return self.position.line


class EOF(metaclass=Singleton):
    """
    End symbol
    """

    __reprval__ = '$'

    @property
    def type(self):
        """Read-only attribute for Token duck-typing"""
        return self
    @property
    def value(self):
        """Read-only attribute for Token duck-typing"""
        return self


class LexerBase(metaclass=_LexerMeta):
    """
    This defines the interface for lexer classes. For concrete
    implementations, see :py:class:`ProgressiveLexer` and
    :py:class:`ReLexer`.
    """

    Token = collections.namedtuple('Token', ['type', 'value', 'position'])

    # Shut up pychecker. Those are actually set by the metaclass.
    __tokens__ = ()

    class _MutableToken: # pylint: disable=too-few-public-methods
        def __init__(self, type_, value, position):
            self.type = type_
            self.value = value
            self.position = position

        def token(self):
            """Returns the unmutable equivalent"""
            return EOF if EOF in [self.type, self.value] else LexerBase.Token(self.type, self.value, self.position)

    def __init__(self):
        super().__init__()

        self._pos = None
        self._consumer = None

        self.restart_lexer()

    def restart_lexer(self, reset_pos=True):
        if reset_pos:
            self._pos = LexerPosition(column=1, line=1)
            self._input = []
        self._consumer = None

    def restartLexer(self, resetPos=True): # pylint: disable=invalid-name
        warnings.warn('restartLexer is deprecated in favor of restart_lexer', DeprecationWarning)
        self.restart_lexer(reset_pos=resetPos)

    def position(self):
        """
        :return: The current position in stream as a 2-tuple (column, line).
        """
        return self._pos

    def advance_column(self, count=1):
        """
        Advances the current position by *count* columns.
        """
        self._pos = self._pos._replace(column=self._pos.column + count)

    def advanceColumn(self, count=1): # pylint: disable=invalid-name
        warnings.warn('advanceColumn is deprecated in favor of advance_column', DeprecationWarning)
        self.advance_column(count=count)

    def advance_line(self, count=1):
        """
        Advances the current position by *count* lines.
        """
        self._pos = self._pos._replace(column=1, line=self._pos.line + count)

    def advanceLine(self, count=1): # pylint: disable=invalid-name
        warnings.warn('advanceLine is deprecated in favor of advance_line', DeprecationWarning)
        self.advance_line(count=count)

    @staticmethod
    def ignore(char):
        """
        Override this to ignore characters in input stream. The
        default is to ignore spaces and tabs.

        :param char: The character to test
        :return: True if *char* should be ignored
        """
        return char in chars(' ') + chars('\t')

    def set_consumer(self, consumer):
        """
        Sets the current consumer. A consumer is an object with a
        *feed* method; all characters seen on the input stream after
        the consumer is set are passed directly to it. When the *feed*
        method returns a 2-tuple (type, value), the corresponding
        token is generated and the consumer reset to None. This may be
        handy to parse tokens that are not easily recognized by a
        regular expression but easily by code; for instance the
        following lexer recognizes C strings without having to use
        negative lookahead:

        .. code-block:: python

           class MyLexer(ReLexer):
               @token('"')
               def cstring(self, tok):
                   class CString:
                       def __init__(self):
                           self.state = 0
                           self.value = StringIO.StringIO()
                       def feed(self, char):
                           if self.state == 0:
                               if char == '"':
                                   return 'cstring', self.value.getvalue()
                               if char == '\\\\':
                                   self.state = 1
                               else:
                                   self.value.write(char)
                           elif self.state == 1:
                               self.value.write(char)
                               self.state = 0
                   self.set_consumer(CString())

        You can also raise SkipToken instead of returning a token if it
        is to be ignored (comments).
        """
        self._consumer = consumer

    def setConsumer(self, consumer): # pylint: disable=invalid-name
        warnings.warn('setConsumer is deprecated in favor of set_consumer', DeprecationWarning)
        self.set_consumer(consumer)

    def consumer(self):
        return self._consumer

    def parse(self, string): # pragma: no cover
        """
        Parses the whole *string*; returns the start symbol semantic value
        """
        raise NotImplementedError

    def new_token(self, tok): # pragma: no cover
        """
        This method will be invoked as soon as a token is recognized on input.

        :param tok: The token. This is a named tuple with *type* and *value* attributes.
        """
        raise NotImplementedError

    def newToken(self, tok): # pylint: disable=invalid-name
        warnings.warn('newToken is deprecated in favor of new_token', DeprecationWarning)
        self.new_token(tok)

    @classmethod
    def add_token_type(cls, name, callback, regex, types=None):
        for type_name in [name] if types is None else types:
            if type_name is not EOF:
                cls.__tokens__[0].add(type_name)
        cls.__tokens__[1].append((regex, callback, name if types is None else None))

    @classmethod
    def _all_tokens(cls):
        tokens = (set(), [])
        for base in inspect.getmro(cls):
            if issubclass(base, LexerBase):
                tokens[0].update(base.__tokens__[0])
                tokens[1].extend(base.__tokens__[1])
        return tokens

    @classmethod
    def token_types(cls):
        """
        :return: the set of all token names, as strings.
        """
        return cls._all_tokens()[0]

    @classmethod
    def tokenTypes(cls): # pylint: disable=invalid-name
        warnings.warn('tokenTypes is deprecated in favor of token_types', DeprecationWarning)
        return cls.token_types()


class ReLexer(LexerBase): # pylint: disable=W0223
    """
    Concrete lexer based on Python regular expressions.
    """
    def __init__(self):
        self._regexes = []
        for rx, callback, default_type in self._all_tokens()[1]:
            crx = re.compile((b'^' if isinstance(rx, bytes) else '^') + rx)
            self._regexes.append((crx, callback, default_type))
        super().__init__()

    def _parse(self, string, pos):
        # pylint: disable=too-many-nested-blocks
        while pos < len(string):
            char = string[pos]
            try:
                if self.consumer() is None:
                    if self.ignore(char):
                        pos += 1
                        continue
                    pos = self._find_match(string, pos)
                else:
                    try:
                        tok = self.consumer().feed(char)
                    except SkipToken:
                        self.set_consumer(None)
                    else:
                        if tok is not None:
                            self.set_consumer(None)
                            if tok[0] is not None:
                                self.new_token(self.Token(*tok, self.position()))
                    pos += 1
            finally:
                if char in chars('\n'):
                    self.advance_line()
                else:
                    self.advance_column()
        return pos

    def parse(self, string):
        try:
            self._parse(string, 0)
            return self.new_token(EOF)
        except LexerError:
            self.restart_lexer()
            raise

    def _find_match(self, string, pos):
        match = None
        matchlen = 0
        pos2d = self.position()

        for rx, callback, default_type in self._regexes:
            mtc = rx.match(string[pos:])
            if mtc:
                value = mtc.group(0)
                if len(value) > matchlen:
                    match = value, callback, default_type
                    matchlen = len(value)

        if match:
            value, callback, default_type = match
            tok = self._MutableToken(default_type, value, pos2d)
            callback(self, tok)
            pos += matchlen
            if self.consumer() is None and tok.type is not None:
                self.new_token(tok.token())
            self.advance_column(matchlen - 1)
            return pos

        raise LexerError(self._guess_token(string, pos), pos2d)

    def _guess_token(self, string, pos):
        start = pos
        while True:
            pos += 1
            if pos == len(string) or self.ignore(string[pos]):
                break

            for rx, _, _ in self._regexes:
                mtc = rx.match(string[pos:])
                if mtc:
                    break
            else:
                continue
            break
        return string[start:pos]