# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""An extensible ASCII table reader and writer.

fixedwidth.py:
  Read or write a table with fixed width columns.

:Copyright: Smithsonian Astrophysical Observatory (2011)
:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu)
"""

from __future__ import absolute_import, division, print_function

from ...extern.six.moves import zip

from . import core
from .core import InconsistentTableError


class FixedWidthSplitter(core.BaseSplitter):
    """Split line based on fixed start and end positions for each ``col`` in
    ``self.cols``.

    This class requires that the Header class will have defined ``col.start``
    and ``col.end`` for each column.  The reference to the ``header.cols`` gets
    put in the splitter object by the base Reader.read() function just in time
    for splitting data lines by a ``data`` object.

    Note that the ``start`` and ``end`` positions are defined in the pythonic
    style so line[start:end] is the desired substring for a column.  This splitter
    class does not have a hook for ``process_lines`` since that is generally not
    useful for fixed-width input.
    """
    delimiter_pad = ''
    bookend = False

    def __call__(self, lines):
        for line in lines:
            vals = [line[x.start:x.end] for x in self.cols]
            if self.process_val:
                yield [self.process_val(x) for x in vals]
            else:
                yield vals

    def join(self, vals, widths):
        pad = self.delimiter_pad or ''
        delimiter = self.delimiter or ''
        padded_delim = pad + delimiter + pad
        if self.bookend:
            bookend_left = delimiter + pad
            bookend_right = pad + delimiter
        else:
            bookend_left = ''
            bookend_right = ''
        vals = [' ' * (width - len(val)) + val for val, width in zip(vals, widths)]
        return bookend_left + padded_delim.join(vals) + bookend_right


class FixedWidthHeader(core.BaseHeader):
    """Fixed width table header reader.

    The key settable class attributes are:

    :param auto_format: format string for auto-generating column names
    :param start_line: None, int, or a function of ``lines`` that returns None or int
    :param comment: regular expression for comment lines
    :param splitter_class: Splitter class for splitting data lines into columns
    :param position_line: row index of line that specifies position (default = 1)
    :param position_char: character used to write the position line (default = "-")
    :param col_starts: list of start positions for each column (0-based counting)
    :param col_ends: list of end positions (inclusive) for each column
    :param delimiter_pad: padding around delimiter when writing (default = None)
    :param bookend: put the delimiter at start and end of line when writing (default = False)
    """

    position_line = None   # secondary header line position

    def get_line(self, lines, index):
        for i, line in enumerate(self.process_lines(lines)):
            if i == index:
                break
        else:  # No header line matching
            raise InconsistentTableError('No header line found in table')
        return line

    def get_cols(self, lines):
        """Initialize the header Column objects from the table ``lines``.

        Based on the previously set Header attributes find or create the column names.
        Sets ``self.cols`` with the list of Columns. 

        :param lines: list of table lines
        :returns: None
        """

        # See "else" clause below for explanation of start_line and position_line
        start_line = core._get_line_index(self.start_line, self.process_lines(lines))
        position_line = core._get_line_index(self.position_line, self.process_lines(lines))

        # If start_line is none then there is no header line.  Column positions are
        # determined from first data line and column names are either supplied by user
        # or auto-generated.
        if start_line is None:
            if position_line is not None:
                raise ValueError("Cannot set position_line without also setting header_start")
            data_lines = self.data.process_lines(lines)
            if not data_lines:
                raise InconsistentTableError(
                    'No data lines found so cannot autogenerate column names')
            vals, starts, ends = self.get_fixedwidth_params(data_lines[0])

            self.names = [self.auto_format % i for i in range(1, len(vals) + 1)]

        else:
            # This bit of code handles two cases:
            # start_line = <index> and position_line = None
            #    Single header line where that line is used to determine both the
            #    column positions and names.
            # start_line = <index> and position_line = <index2>
            #    Two header lines where the first line defines the column names and
            #    the second line defines the column positions

            if position_line is not None:
                # Define self.col_starts and self.col_ends so that the call to
                # get_fixedwidth_params below will use those to find the header
                # column names.  Note that get_fixedwidth_params returns Python
                # slice col_ends but expects inclusive col_ends on input (for
                # more intuitive user interface).
                line = self.get_line(lines, position_line)
                vals, self.col_starts, col_ends = self.get_fixedwidth_params(line)
                self.col_ends = [x - 1 for x in col_ends]

            # Get the header column names and column positions
            line = self.get_line(lines, start_line)
            vals, starts, ends = self.get_fixedwidth_params(line)

            self.names = vals

        self._set_cols_from_names()

        # Set column start and end positions. 
        for i, col in enumerate(self.cols):
            col.start = starts[i]
            col.end = ends[i]

    def get_fixedwidth_params(self, line):
        """Split ``line`` on the delimiter and determine column values and
        column start and end positions.  This might include null columns with
        zero length (e.g. for ``header row = "| col1 || col2 | col3 |"`` or
        ``header2_row = "----- ------- -----"``).  The null columns are
        stripped out.  Returns the values between delimiters and the
        corresponding start and end positions.

        :param line: input line
        :returns: (vals, starts, ends)
        """

        # If column positions are already specified then just use those, otherwise
        # figure out positions between delimiters.
        if self.col_starts is not None and self.col_ends is not None:
            starts = list(self.col_starts)  # could be any iterable, e.g. np.array
            ends = [x + 1 for x in self.col_ends]  # user supplies inclusive endpoint
            if len(starts) != len(ends):
                raise ValueError('Fixed width col_starts and col_ends must have the same length')
            vals = [line[start:end].strip() for start, end in zip(starts, ends)]
        else:
            # There might be a cleaner way to do this but it works...
            vals = line.split(self.splitter.delimiter)
            starts = [0]
            ends = []
            for val in vals:
                if val:
                    ends.append(starts[-1] + len(val))
                    starts.append(ends[-1] + 1)
                else:
                    starts[-1] += 1
            starts = starts[:-1]
            vals = [x.strip() for x in vals if x]
            if len(vals) != len(starts) or len(vals) != len(ends):
                raise InconsistentTableError('Error parsing fixed width header')

        return vals, starts, ends

    def write(self, lines):
        # Header line not written until data are formatted.  Until then it is
        # not known how wide each column will be for fixed width.
        pass


class FixedWidthData(core.BaseData):
    """Base table data reader.

    :param start_line: None, int, or a function of ``lines`` that returns None or int
    :param end_line: None, int, or a function of ``lines`` that returns None or int
    :param comment: Regular expression for comment lines
    :param splitter_class: Splitter class for splitting data lines into columns
    """

    splitter_class = FixedWidthSplitter

    def write(self, lines):

        self._set_fill_values(self.cols)
        self._set_col_formats()
        for col in self.cols:
            col.str_vals = list(col.iter_str_vals())
        self._replace_vals(self.cols)
        col_str_iters = [col.str_vals for col in self.cols]

        vals_list = []
        # Col iterator does the formatting defined above so each val is a string
        # and vals is a tuple of strings for all columns of each row
        for vals in zip(*col_str_iters):
            vals_list.append(vals)

        for i, col in enumerate(self.cols):
            col.width = max([len(vals[i]) for vals in vals_list])
            if self.header.start_line is not None:
                col.width = max(col.width, len(col.name))

        widths = [col.width for col in self.cols]

        if self.header.start_line is not None:
            lines.append(self.splitter.join([col.name for col in self.cols], widths))

        if self.header.position_line is not None:
            char = self.header.position_char
            if len(char) != 1:
                raise ValueError('Position_char="%s" must be a single character' % char)
            vals = [char * col.width for col in self.cols]
            lines.append(self.splitter.join(vals, widths))

        for vals in vals_list:
            lines.append(self.splitter.join(vals, widths))

        return lines


class FixedWidth(core.BaseReader):
    """Read or write a fixed width table with a single header line that defines column
    names and positions.  Examples::

      # Bar delimiter in header and data

      |  Col1 |   Col2      |  Col3 |
      |  1.2  | hello there |     3 |
      |  2.4  | many words  |     7 |

      # Bar delimiter in header only

      Col1 |   Col2      | Col3
      1.2    hello there    3
      2.4    many words     7

      # No delimiter with column positions specified as input

      Col1       Col2Col3
       1.2hello there   3
       2.4many words    7

    See the :ref:`fixed_width_gallery` for specific usage examples.

    :param col_starts: list of start positions for each column (0-based counting)
    :param col_ends: list of end positions (inclusive) for each column
    :param delimiter_pad: padding around delimiter when writing (default = None)
    :param bookend: put the delimiter at start and end of line when writing (default = False)
    """
    _format_name = 'fixed_width'
    _description = 'Fixed width'

    def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True):
        core.BaseReader.__init__(self)

        self.header = FixedWidthHeader()
        self.data = FixedWidthData()
        self.data.header = self.header
        self.header.data = self.data

        self.header.splitter.delimiter = '|'
        self.data.splitter.delimiter = '|'
        self.data.splitter.delimiter_pad = delimiter_pad
        self.data.splitter.bookend = bookend
        self.header.start_line = 0
        self.data.start_line = 1
        self.header.comment = r'\s*#'
        self.header.write_comment = '# '
        self.data.comment = r'\s*#'
        self.data.write_comment = '# '
        self.header.col_starts = col_starts
        self.header.col_ends = col_ends


class FixedWidthNoHeader(FixedWidth):
    """Read or write a fixed width table which has no header line.  Column
    names are either input (``names`` keyword) or auto-generated.  Column
    positions are determined either by input (``col_starts`` and ``col_stops``
    keywords) or by splitting the first data line.  In the latter case a
    ``delimiter`` is required to split the data line.

    Examples::

      # Bar delimiter in header and data

      |  1.2  | hello there |     3 |
      |  2.4  | many words  |     7 |

      # Compact table having no delimiter and column positions specified as input

      1.2hello there3
      2.4many words 7

    This class is just a convenience wrapper around the ``FixedWidth`` reader
    but with ``header.start_line = None`` and ``data.start_line = 0``.

    See the :ref:`fixed_width_gallery` for specific usage examples.

    :param col_starts: list of start positions for each column (0-based counting)
    :param col_ends: list of end positions (inclusive) for each column
    :param delimiter_pad: padding around delimiter when writing (default = None)
    :param bookend: put the delimiter at start and end of line when writing (default = False)
    """
    _format_name = 'fixed_width_no_header'
    _description = 'Fixed width with no header'

    def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True):
        FixedWidth.__init__(self, col_starts, col_ends,
                            delimiter_pad=delimiter_pad, bookend=bookend)
        self.header.start_line = None
        self.data.start_line = 0


class FixedWidthTwoLine(FixedWidth):
    """Read or write a fixed width table which has two header lines.  The first
    header line defines the column names and the second implicitly defines the
    column positions.  Examples::

      # Typical case with column extent defined by ---- under column names.

       col1    col2         <== header_start = 0
      -----  ------------   <== position_line = 1, position_char = "-"
        1     bee flies     <== data_start = 2
        2     fish swims

      # Pretty-printed table

      +------+------------+
      | Col1 |   Col2     |
      +------+------------+
      |  1.2 | "hello"    |
      |  2.4 | there world|
      +------+------------+

    See the :ref:`fixed_width_gallery` for specific usage examples.

    :param position_line: row index of line that specifies position (default = 1)
    :param position_char: character used to write the position line (default = "-")
    :param delimiter_pad: padding around delimiter when writing (default = None)
    :param bookend: put the delimiter at start and end of line when writing (default = False)
    """
    _format_name = 'fixed_width_two_line'
    _description = 'Fixed width with second header line'

    def __init__(self, position_line=1, position_char='-', delimiter_pad=None, bookend=False):
        FixedWidth.__init__(self, delimiter_pad=delimiter_pad, bookend=bookend)
        self.header.position_line = position_line
        self.header.position_char = position_char
        self.data.start_line = position_line + 1
        self.header.splitter.delimiter = ' '
        self.data.splitter.delimiter = ' '
