#! /usr/bin/env python
"""
Amply: a GNU MathProg data-parser

This module implements a parser for a subset of the GNU MathProg language,
namely parameter and set data records.

Amply uses the Pyparsing library to parse input:
    http://pyparsing.wikispaces.com

Usage:
    Create an Amply object, optionally passing in a string to parse.

    >>> a = Amply("param T := 3;")

    Symbols that are defined can be accessed as attributes or items.

    >>> print(a.T)
    3.0
    >>> print(a['T'])
    3.0

    The load_string and load_file methods can be used to parse additional data

    >>> a.load_string("set N := 1 2 3;")
    >>> a.load_file(open('some_file.dat'))

    An Amply object can be constructed from a file using Amply.from_file

    >>> a = Amply.from_file(open('some_file.dat'))


How it works:
    The Amply class parses the input using Pyparsing. This results in a list
    of Stmt objects, each representing a MathProg statement. The statements
    are then evaluated by calling their eval() method.
"""
from pyparsing import (
    Combine,
    Group,
    Keyword,
    Literal,
    NotAny,
    OneOrMore,
    Optional,
    ParseException,
    ParserElement,
    ParseResults,
    QuotedString,
    SkipTo,
    StringEnd,
    Suppress,
    Word,
    ZeroOrMore,
    alphanums,
    delimitedList,
    lineEnd,
    nums,
    oneOf,
)

ParserElement.enablePackrat()


__all__ = ["Amply", "AmplyError"]


class AmplyObject(object):
    """
    Represents the value of some object (e.g. a Set object
    or Parameter object
    """


class AmplyStmt(object):
    """
    Represents a statement that has been parsed

    Statements implement an eval method. When the eval method is called, the
    Stmt object is responsible for modifying the Amply object that
    gets passed in appropriately (i.e. by adding or modifying a symbol)
    """

    def eval(self, amply):  # pragma: no coverage
        raise NotImplementedError()


class NoDefault(object):
    """
    Sentinel
    """


class AmplyError(Exception):
    """
    Amply Exception Class
    """


def chunk(it, n):
    """
    Yields n-tuples from iterator
    """
    c = []
    for i, x in enumerate(it):
        c.append(x)
        if (i + 1) % n == 0:
            yield tuple(c)
            c = []
    if c:
        yield tuple(c)


def access_data(curr_dict, keys, default=NoDefault):
    """
    Convenience method for walking down a series of nested dictionaries

    keys is a tuple of strings

    access_data(dict, ('key1', 'key2', 'key3') is equivalent to
    dict['key1']['key2']['key3']

    All dictionaries must exist, but the last dictionary in the hierarchy
    does not have to contain the final key, if default is set.
    """

    if keys in curr_dict:
        return curr_dict[keys]

    if isinstance(keys, tuple):
        for sym in keys[:-1]:
            curr_dict = curr_dict[sym]
        r = curr_dict.get(keys[-1], default)
        if r is not NoDefault:
            return r

    if default is not NoDefault:
        return default

    raise KeyError()


def transpose(data):
    """
    Transpose a matrix represented as a dict of dicts
    """

    rows = list(data.keys())
    cols = set()
    for d in list(data.values()):
        cols.update(list(d.keys()))

    d = {}

    for col in cols:
        d[col] = {}
        for row in rows:
            d[col][row] = data[row][col]
    return d


class SetDefStmt(AmplyStmt):
    """
    Represents a set definition statement
    """

    def __init__(self, tokens):
        assert tokens[0] == "set"
        self.name = tokens[1]
        self.dimen = tokens.get("dimen", None)
        self.subscripts = len(tokens.get("subscripts", ()))

    def __repr__(self):  # pragma: no cover
        return "<%s: %s[%s]>" % (self.__class__.__name__, self.name, self.dimen)

    def eval(self, amply):
        set_obj = SetObject(subscripts=self.subscripts, dimen=self.dimen)
        amply._addSymbol(self.name, set_obj)


class SetStmt(AmplyStmt):
    """
    Represents a set statement
    """

    def __init__(self, tokens):
        assert tokens[0] == "set"
        self.name = tokens[1]
        self.records = tokens.get("records")
        self.member = tokens.get("member", None)

    def __repr__(self):
        return "<%s: %s[%s] = %s>" % (
            self.__class__.__name__,
            self.name,
            self.member,
            self.records,
        )

    def eval(self, amply):
        if self.name in amply.symbols:
            obj = amply.symbols[self.name]
            assert isinstance(obj, SetObject)
        else:
            obj = SetObject()

        obj.addData(self.member, self.records)
        amply._addSymbol(self.name, obj)


class SliceRecord(object):
    """
    Represents a parameter or set slice record
    """

    def __init__(self, tokens):
        self.components = tuple(tokens)

    def __repr__(self):
        return "<%s: %s>" % (self.__class__.__name__, self.components)


class TabularRecord(object):
    """
    Represents a parameter tabular record
    """

    def __init__(self, tokens):
        self._columns = tokens.columns
        self._data = tokens.data
        self.transposed = False

    def setTransposed(self, t):
        self.transposed = t

    def _rows(self):
        c = Chunker(self._data)
        while c.notEmpty():
            row_label = c.chunk()
            data = c.chunk(len(self._columns))
            yield row_label, data

    def data(self):
        d = {}
        for row, data in self._rows():
            d[row] = {}
            for col, value in zip(self._columns, data):
                d[row][col] = value
        if self.transposed:
            return transpose(d)
        else:
            return d

    def __repr__(self):
        return "<%s: %s>" % (self.__class__.__name__, self.data())


class MatrixData(TabularRecord):
    """
    Represents a set matrix data record
    """

    def _rows(self):
        for row in self._data:
            yield row[0], row[1:]

    def data(self):
        d = []
        for row_label, data in self._rows():
            for col, value in zip(self._columns, data):
                if value == "+":
                    if self.transposed:
                        d.append((col, row_label))
                    else:
                        d.append((row_label, col))
        return d


class ParamStmt(AmplyStmt):
    """
    Represents a parameter statement
    """

    def __init__(self, tokens):
        assert tokens[0] == "param"
        self.name = tokens.name
        self.records = tokens.records
        self.default = tokens.get("default", 0)
        self.tokens = tokens

    def __repr__(self):
        return "<%s: %s = %s>" % (self.__class__.__name__, self.name, self.records)

    def eval(self, amply):
        if self.name in amply.symbols:
            obj = amply.symbols[self.name]
            assert isinstance(obj, ParamObject)
        else:
            obj = ParamObject()

        if obj.subscripts == 0:
            if len(self.records) != 1:
                raise AmplyError(
                    "Error in number of records of {} when reading {}".format(
                        self.name, self.tokens
                    )
                )
            assert len(self.records[0]) == 1
            amply._addSymbol(self.name, self.records[0][0])
        else:
            obj.addData(self.records.asList(), default=self.default)

            amply._addSymbol(self.name, obj)


class Chunker(object):
    """
    Chunker class - used to consume tuples from
    an iterator
    """

    def __init__(self, it):
        """
        it is a sequence or iterator
        """

        self.it = iter(it)
        self.empty = False
        self.next = None
        self._getNext()

    def _getNext(self):
        """
        basically acts as a 1 element buffer so that
        we can detect if we've reached the end of the
        iterator
        """

        old = self.next
        try:
            self.next = next(self.it)
        except StopIteration:
            self.empty = True
        return old

    def notEmpty(self):
        """
        Test if the iterator has reached the end
        """

        return not self.empty

    def chunk(self, n=None):
        """
        Return a list with the next n elements from the iterator,
        or the next element if n is None
        """
        if n is None:
            return self._getNext()
        return [self._getNext() for i in range(n)]


class ParamTabbingStmt(AmplyStmt):
    """
    Represents a parameter tabbing data statement
    """

    def __init__(self, tokens):
        assert tokens[0] == "param"

        self.default = tokens.get("default", 0)
        self.params = tokens.params
        self.data = tokens.data

    def eval(self, amply):
        for i, param_name in enumerate(self.params):
            if param_name in amply.symbols:
                obj = amply.symbols[param_name]
            else:
                raise AmplyError("Param %s not previously defined" % param_name)

            for subs, data in self._rows(obj.subscripts):
                obj.setValue(subs, data[i])

    def _rows(self, n_subscripts):
        c = Chunker(self.data)
        while c.notEmpty():
            subscripts = c.chunk(n_subscripts)
            data = c.chunk(len(self.params))
            yield (subscripts, data)


class ParamDefStmt(AmplyStmt):
    """
    Represents a parameter definition
    """

    def __init__(self, tokens):
        assert tokens[0] == "param"
        self.name = tokens.get("name")
        self.subscripts = tokens.get("subscripts")
        self.default = tokens.get("default", NoDefault)

    def eval(self, amply):
        def _getDimen(symbol):
            s = amply[symbol]
            if s is None or s.dimen is None:
                return 1
            return s.dimen

        try:
            num_subscripts = sum(_getDimen(s) for s in self.subscripts)
        except TypeError:
            num_subscripts = 1
        amply._addSymbol(self.name, ParamObject(num_subscripts, self.default))


class ParamObject(AmplyObject):
    def __init__(self, subscripts=0, default=NoDefault):
        self.subscripts = subscripts
        self.default = default

        self.data = {}

        # initial slice is all *'s
        self._setSlice(SliceRecord(["*"] * self.subscripts))

    def addData(self, data, default=0):
        def _v(v):
            if v == ".":
                return default
            return v

        for record in data:
            if isinstance(record, SliceRecord):
                self._setSlice(record)
            elif isinstance(record, list):
                # a plain data record
                rec_len = len(self.free_indices) + 1
                if len(record) % rec_len != 0:
                    raise AmplyError(
                        "Incomplete data record, expecting %d"
                        " subscripts per value" % len(self.free_indices)
                    )
                for c in chunk(record, len(self.free_indices) + 1):
                    self.setValue(c[:-1], _v(c[-1]))
            elif isinstance(record, TabularRecord):
                record_data = record.data()
                for row_symbol in record_data:
                    for col_symbol, value in list(record_data[row_symbol].items()):
                        self.setValue((row_symbol, col_symbol), _v(value))

    def _setSlice(self, slice):
        self.current_slice = list(slice.components)  # copy
        self.free_indices = [i for i, v in enumerate(self.current_slice) if v == "*"]

    def setValue(self, symbols, value):
        if value == ".":
            value = self.default

        assert len(symbols) == len(self.free_indices)
        symbol_path = self.current_slice
        for index, symbol in zip(self.free_indices, symbols):
            symbol_path[index] = symbol

        curr_dict = self.data
        for symbol in symbol_path[:-1]:
            if symbol not in curr_dict:
                curr_dict[symbol] = {}
            curr_dict = curr_dict[symbol]
        curr_dict[symbol_path[-1]] = value

    def __getitem__(self, key):
        return access_data(self.data, key, self.default)

    def __repr__(self):
        return "<%s: %s>" % (self.__class__.__name__, self.data)

    def __eq__(self, other):
        return self.data == other

    def __ne__(self, other):
        return self.data != other


class SetObject(AmplyObject):
    def __init__(self, subscripts=0, dimen=None):
        self.dimen = dimen
        self.subscripts = subscripts

        if self.subscripts == 0:
            self.data = []
        else:
            self.data = {}

        self.current_slice = None

    def addData(self, member, data):
        dest_list = self._memberList(member)

        if self.dimen is not None and self.current_slice is None:
            self._setSlice(["*"] * self.dimen)

        for record in data:
            if isinstance(record, SliceRecord):
                self._setSlice(record.components)
            elif isinstance(record, MatrixData):
                if self.dimen is None:
                    self.dimen = 2
                    self._setSlice(["*"] * 2)
                d = record.data()
                for v in d:
                    self._addValue(dest_list, v)

            else:  # simple-data
                self._addSimpleData(dest_list, record)

    def _setSlice(self, slice):
        self.current_slice = slice
        self.free_indices = [i for i, v in enumerate(self.current_slice) if v == "*"]

    def _memberList(self, member):
        if member is None:
            return self.data
        assert len(member) == self.subscripts

        curr_dict = self.data
        for symbol in member[:-1]:
            if symbol not in curr_dict:
                curr_dict[symbol] = {}
            curr_dict = curr_dict[symbol]
        if member[-1] not in curr_dict:
            curr_dict[member[-1]] = []
        return curr_dict[member[-1]]

    def _dataLen(self, d):
        if isinstance(d, (tuple, list)):
            return len(d)
        return 1

    def _addSimpleData(self, data_list, data):
        if isinstance(data[0], ParseResults):
            inferred_dimen = len(data[0])
        else:
            inferred_dimen = 1

        if self.dimen is None:
            # infer dimension from records
            self.dimen = inferred_dimen

        if self.current_slice is None:
            self._setSlice(tuple(["*"] * self.dimen))

        if len(self.free_indices) == inferred_dimen:
            for d in data.asList():
                self._addValue(data_list, d)
        elif len(self.free_indices) > 1 and inferred_dimen:
            for c in chunk(data, len(self.free_indices)):
                self._addValue(data_list, c)
        else:
            raise AmplyError(
                "Dimension of elements (%d) does not match "
                "declared dimension, (%d)" % (inferred_dimen, self.dimen)
            )

    def _addValue(self, data_list, item):
        if self.dimen == 1:
            data_list.append(item)
        else:
            assert len(self.free_indices) == self._dataLen(item)

            to_add = list(self.current_slice)
            if isinstance(item, (tuple, list)):
                for index, value in zip(self.free_indices, item):
                    to_add[index] = value
            else:
                assert len(self.free_indices) == 1
                to_add[self.free_indices[0]] = item
            data_list.append(tuple(to_add))

    def __getitem__(self, key):
        if not self.subscripts:
            return self.data[key]
        return access_data(self.data, key)

    def __len__(self):
        return len(self.data)

    def __iter__(self):
        return iter(self.data)

    def __contains__(self, item):
        return item in self.data

    def __eq__(self, other):
        return self.data == other

    def __ne__(self, other):
        return self.data != other

    def __repr__(self):
        return "<%s: %s>" % (self.__class__.__name__, self.data)


def mark_transposed(tokens):
    tokens[0].setTransposed(True)
    return tokens


# What follows is a Pyparsing description of the grammar

index = Word(alphanums, exact=1)
symbol = Word(alphanums, bodyChars=alphanums + "_", min=1)
sign = Optional(oneOf("+ -"))
integer = Combine(sign + Word(nums)).setParseAction(lambda t: int(t[0]))
number = Combine(
    Word("+-" + nums, nums)
    + Optional("." + Optional(Word(nums)))
    + Optional(oneOf("e E") + Word("+-" + nums, nums))
).setParseAction(lambda t: float(t[0]))

LPAREN = Suppress("(")
RPAREN = Suppress(")")
LBRACE = Suppress("{")
RBRACE = Suppress("}")
LBRACKET = Suppress("[")
RBRACKET = Suppress("]")
END = Suppress(";")

PLUS = Literal("+")
MINUS = Literal("-")

# Keywords
KW_PARAM = Keyword("param")
KW_SET = Keyword("set")
KW_DEFAULT = Keyword("default")

single = number ^ symbol | QuotedString('"') | QuotedString("'")
tuple_ = Group(LPAREN + delimitedList(single) + RPAREN)

domain_index = Suppress(index + Keyword("in"))
subscript_domain = (
    LBRACE
    + delimitedList(Optional(domain_index) + symbol).setResultsName("subscripts")
    + RBRACE
)

data = single | tuple_

# should not match a single (tr)
simple_data = Group(NotAny("(tr)") + data + ZeroOrMore(Optional(Suppress(",")) + data))
# the first element of a set data record  cannot be 'dimen', or else
# these would match set_def_stmts
non_dimen_simple_data = ~Keyword("dimen") + simple_data

matrix_row = Group(single + OneOrMore(PLUS | MINUS))
matrix_data = (
    ":"
    + OneOrMore(single).setResultsName("columns")
    + ":="
    + OneOrMore(matrix_row).setResultsName("data")
)
matrix_data.setParseAction(MatrixData)

tr_matrix_data = Suppress("(tr)") + matrix_data
tr_matrix_data.setParseAction(mark_transposed)

set_slice_component = number | symbol | "*"
set_slice_record = LPAREN + NotAny("tr") + delimitedList(set_slice_component) + RPAREN
set_slice_record.setParseAction(SliceRecord)

_set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress(":=")
set_record = simple_data | _set_record
non_dimen_set_record = non_dimen_simple_data | _set_record

set_def_stmt = (
    KW_SET
    + symbol
    + Optional(subscript_domain)
    + Optional(Keyword("dimen") + integer.setResultsName("dimen"))
    + END
)
set_def_stmt.setParseAction(SetDefStmt)

set_member = LBRACKET + delimitedList(data) + RBRACKET

set_stmt = (
    KW_SET
    + symbol
    + Optional(set_member).setResultsName("member")
    + Group(
        non_dimen_set_record + ZeroOrMore(Optional(Suppress(",")) + set_record)
    ).setResultsName("records")
    + END
)
set_stmt.setParseAction(SetStmt)

subscript = single

param_data = data | "."
plain_data = (
    param_data
    | subscript + ZeroOrMore(Optional(Suppress(",")) + subscript) + param_data
)
# should not match a single (tr)
plain_data_record = Group(
    NotAny("(tr)") + plain_data + NotAny(plain_data)
    | plain_data + OneOrMore(plain_data) + NotAny(plain_data)
)

tabular_record = (
    ":"
    + OneOrMore(single).setResultsName("columns")
    + ":="
    + OneOrMore(single | ".").setResultsName("data")
)
tabular_record.setParseAction(TabularRecord)

tr_tabular_record = Suppress("(tr)") + tabular_record
tr_tabular_record.setParseAction(mark_transposed)

param_slice_component = number | symbol | "*"
param_slice_record = LBRACKET + delimitedList(param_slice_component) + RBRACKET
param_slice_record.setParseAction(SliceRecord)

param_record = (
    param_slice_record
    | plain_data_record
    | tabular_record
    | tr_tabular_record
    | Suppress(":=")
)

param_default = Optional(KW_DEFAULT + single.setResultsName("default"))

param_stmt = (
    KW_PARAM
    + ~KW_DEFAULT
    + symbol.setResultsName("name")
    + param_default
    + Group(OneOrMore(param_record)).setResultsName("records")
    + END
)
param_stmt.setParseAction(ParamStmt)

param_tabbing_stmt = (
    KW_PARAM
    + param_default
    + ":"
    + Optional(symbol + ": ")
    + OneOrMore(data).setResultsName("params")
    + ":="
    + ZeroOrMore(single).setResultsName("data")
    + END
)
param_tabbing_stmt.setParseAction(ParamTabbingStmt)

param_def_stmt = (
    KW_PARAM
    + symbol.setResultsName("name")
    + Optional(subscript_domain)
    + param_default
    + END
)
param_def_stmt.setParseAction(ParamDefStmt)

stmts = set_stmt | set_def_stmt | param_def_stmt | param_stmt | param_tabbing_stmt
grammar = ZeroOrMore(stmts) + StringEnd()
grammar.ignore("#" + SkipTo(lineEnd))
grammar.ignore("end;" + SkipTo(lineEnd))


class Amply(object):
    """
    Data parsing interface
    """

    def __init__(self, s=""):
        """
        Create an Amply parser instance

        @param s (default ""): initial string to parse
        """

        self.symbols = {}

        self.load_string(s)

    def __getitem__(self, key):
        """
        Override so that symbols can be accessed using
        [] subscripts
        """
        if key in self.symbols:
            return self.symbols[key]

    def __getattr__(self, name):
        """
        Override so that symbols can be accessed as attributes
        """
        if name in self.symbols:
            return self.symbols[name]
        return super(Amply, self).__getattr__(name)

    def _addSymbol(self, name, value):
        """
        Adds a symbol to this instance.

        Typically, this class is called by objects created by
        the parser, and should not need to be called by users
        directly
        """

        self.symbols[name] = value

    def load_string(self, string):
        """
        Load and parse string

        @param string string to parse
        """
        try:
            for obj in grammar.parseString(string):
                obj.eval(self)
        except ParseException as ex:
            print(string)
            raise ParseException(ex)

    def load_file(self, f):
        """
        Load and parse file

        @param f file-like object
        """
        self.load_string(f.read())

    @staticmethod
    def from_file(f):
        """
        Create a new Amply instance from file (factory method)

        @param f file-like object
        """
        return Amply(f.read())


if __name__ == "__main__":

    grammar.create_diagram("parser_rr_diag.html")