import json
import random
import hashlib

import pandas as pd
from toolz.curried import curry, pipe  # noqa
from typing import Callable

from .core import sanitize_dataframe
from .core import sanitize_geo_interface 
from .plugin_registry import PluginRegistry


# ==============================================================================
# Data transformer registry
# ==============================================================================
DataTransformerType = Callable

class DataTransformerRegistry(PluginRegistry[DataTransformerType]):
    _global_settings = {'consolidate_datasets': True}

    @property
    def consolidate_datasets(self):
        return self._global_settings['consolidate_datasets']

    @consolidate_datasets.setter
    def consolidate_datasets(self, value):
        self._global_settings['consolidate_datasets'] = value


# ==============================================================================
# Data model transformers
#
# A data model transformer is a pure function that takes a dict or DataFrame
# and returns a transformed version of a dict or DataFrame. The dict objects
# will be the Data portion of the VegaLite schema. The idea is that user can
# pipe a sequence of these data transformers together to prepare the data before
# it hits the renderer.
#
# In this version of Altair, renderers only deal with the dict form of a
# VegaLite spec, after the Data model has been put into a schema compliant
# form.
#
# A data model transformer has the following type signature:
# DataModelType = Union[dict, pd.DataFrame]
# DataModelTransformerType = Callable[[DataModelType, KwArgs], DataModelType]
# ==============================================================================


class MaxRowsError(Exception):
    """Raised when a data model has too many rows."""
    pass


@curry
def limit_rows(data, max_rows=5000):
    """Raise MaxRowsError if the data model has more than max_rows.

    If max_rows is None, then do not perform any check.
    """
    check_data_type(data)
    if hasattr(data, '__geo_interface__'):
        if data.__geo_interface__['type'] == 'FeatureCollection':
            values = data.__geo_interface__['features']
        else:
            values = data.__geo_interface__
    elif isinstance(data, pd.DataFrame):
        values = data
    elif isinstance(data, dict):
        if 'values' in data:
            values = data['values']
        else:
            return data
    if max_rows is not None and len(values) > max_rows:
        raise MaxRowsError('The number of rows in your dataset is greater '
                           'than the maximum allowed ({}). '
                           'For information on how to plot larger datasets '
                           'in Altair, see the documentation'.format(max_rows))
    return data


@curry
def sample(data, n=None, frac=None):
    """Reduce the size of the data model by sampling without replacement."""
    check_data_type(data)
    if isinstance(data, pd.DataFrame):
        return data.sample(n=n, frac=frac)
    elif isinstance(data, dict):
        if 'values' in data:
            values = data['values']
            n = n if n else int(frac*len(values))
            values = random.sample(values, n)
            return {'values': values}


@curry
def to_json(data, prefix='altair-data', extension='json',
            filename="{prefix}-{hash}.{extension}"):
    """
    Write the data model to a .json file and return a url based data model.
    """
    data_json = _data_to_json_string(data)
    data_hash = _compute_data_hash(data_json)
    filename = filename.format(prefix=prefix, hash=data_hash,
                               extension=extension)
    with open(filename, 'w') as f:
        f.write(data_json)
    return {
        'url': filename,
        'format': {'type': 'json'}
    }


@curry
def to_csv(data, prefix='altair-data', extension='csv',
           filename="{prefix}-{hash}.{extension}"):
    """Write the data model to a .csv file and return a url based data model."""
    data_csv = _data_to_csv_string(data)
    data_hash = _compute_data_hash(data_csv)
    filename = filename.format(prefix=prefix, hash=data_hash,
                               extension=extension)
    with open(filename, 'w') as f:
        f.write(data_csv)
    return {
        'url': filename,
        'format': {'type': 'csv'}
    }


@curry
def to_values(data):
    """Replace a DataFrame by a data model with values."""
    check_data_type(data)
    if hasattr(data, '__geo_interface__'):
        if isinstance(data, pd.DataFrame):
            data = sanitize_dataframe(data)        
        data = sanitize_geo_interface(data.__geo_interface__)
        return {'values': data}
    elif isinstance(data, pd.DataFrame):
        data = sanitize_dataframe(data)
        return {'values': data.to_dict(orient='records')}
    elif isinstance(data, dict):
        if 'values' not in data:
            raise KeyError('values expected in data dict, but not present.')
        return data


def check_data_type(data):
    """Raise if the data is not a dict or DataFrame."""
    if not isinstance(data, (dict, pd.DataFrame)) and not hasattr(data, '__geo_interface__'):
        raise TypeError('Expected dict, DataFrame or a __geo_interface__ attribute, got: {}'.format(type(data)))


# ==============================================================================
# Private utilities
# ==============================================================================

def _compute_data_hash(data_str):
    return hashlib.md5(data_str.encode()).hexdigest()


def _data_to_json_string(data):
    """Return a JSON string representation of the input data"""
    check_data_type(data)
    if hasattr(data, '__geo_interface__'):
        if isinstance(data, pd.DataFrame):
            data = sanitize_dataframe(data)        
        data = sanitize_geo_interface(data.__geo_interface__)
        return json.dumps(data)
    elif isinstance(data, pd.DataFrame):
        data = sanitize_dataframe(data)
        return data.to_json(orient='records')
    elif isinstance(data, dict):
        if 'values' not in data:
            raise KeyError('values expected in data dict, but not present.')
        return json.dumps(data['values'], sort_keys=True)
    else:
        raise NotImplementedError("to_json only works with data expressed as "
                                  "a DataFrame or as a dict")


def _data_to_csv_string(data):
    """return a CSV string representation of the input data"""
    check_data_type(data)
    if hasattr(data, '__geo_interface__'):    
        raise NotImplementedError("to_csv does not work with data that "
                                  "contains the __geo_interface__ attribute")
    elif isinstance(data, pd.DataFrame):
        data = sanitize_dataframe(data)
        return data.to_csv(index=False)
    elif isinstance(data, dict):
        if 'values' not in data:
            raise KeyError('values expected in data dict, but not present')
        return pd.DataFrame.from_dict(data['values']).to_csv(index=False)
    else:
        raise NotImplementedError("to_csv only works with data expressed as "
                                  "a DataFrame or as a dict")