# -*- coding: utf-8 -*-
"""
Implementation of the C{@property} value handling.

RDFa 1.0 and RDFa 1.1 are fairly different. RDFa 1.0 generates only literals, see
U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>} for the details.
On the other hand, RDFa 1.1, beyond literals, can also generate URI references. Hence the duplicate method in the L{ProcessProperty} class, one for RDFa 1.0 and the other for RDFa 1.1.

@summary: RDFa Literal generation
@requires: U{RDFLib package<http://rdflib.net>}
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
"""

"""
$Id: property.py,v 1.14 2013-07-26 16:10:16 ivan Exp $
$Date: 2013-07-26 16:10:16 $
"""

from rdflib import BNode
from rdflib import Literal, URIRef
from rdflib import RDF as ns_rdf
from rdflib.term import XSDToPython

from . import IncorrectBlankNodeUsage, IncorrectLiteral, err_no_blank_node
from .utils import has_one_of_attributes, return_XML

import re

XMLLiteral  = ns_rdf["XMLLiteral"]
HTMLLiteral = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML")

class ProcessProperty:
    """Generate the value for C{@property} taking into account datatype, etc.
    Note: this class is created only if the C{@property} is indeed present, no need to check.
    
    @ivar node: DOM element node
    @ivar graph: the (RDF) graph to add the properies to
    @ivar subject: the RDFLib URIRef serving as a subject for the generated triples
    @ivar state: the current state to be used for the CURIE-s
    @type state: L{state.ExecutionContext}
    @ivar typed_resource: Typically the bnode generated by a @typeof
    """
    def __init__(self, node, graph, subject, state, typed_resource=None):
        """
        @param node: DOM element node
        @param graph: the (RDF) graph to add the properies to
        @param subject: the RDFLib URIRef serving as a subject for the generated triples
        @param state: the current state to be used for the CURIE-s
        @param state: L{state.ExecutionContext}
        @param typed_resource: Typically the bnode generated by a @typeof; in RDFa 1.1, that becomes the object for C{@property}
        """
        self.node =           node
        self.graph =          graph
        self.subject =        subject
        self.state =          state
        self.typed_resource = typed_resource
        
    def generate(self):
        """
        Common entry point for the RDFa 1.0 and RDFa 1.1 versions; bifurcates based on the RDFa version, as retrieved from the state object.
        """
        if self.state.rdfa_version >= "1.1":
            self.generate_1_1()
        else:
            self.generate_1_0()

    def generate_1_1(self):
        """Generate the property object, 1.1 version"""

        #########################################################################        
        # See if the target is _not_ a literal
        irirefs =    ("resource", "href", "src")
        noiri =      ("content", "datatype", "rel", "rev")
        notypediri = ("content", "datatype", "rel", "rev", "about", "about_pruned")
        if has_one_of_attributes(self.node, irirefs) and not has_one_of_attributes(self.node, noiri):
            # @href/@resource/@src takes the lead here...
            obj = self.state.getResource(irirefs)
        elif self.node.hasAttribute("typeof") and not has_one_of_attributes(self.node, notypediri) and self.typed_resource != None:
                # a @typeof creates a special branch in case the typed resource was set during parsing
                obj = self.typed_resource
        else:
            # We have to generate a literal
            
            # Get, if exists, the value of @datatype
            datatype = ''
            dtset = False
            if self.node.hasAttribute("datatype"):
                dtset = True
                dt = self.node.getAttribute("datatype")
                if dt != "":
                    datatype = self.state.getURI("datatype")

            # Supress lange is set in case some elements explicitly want to supress the effect of language
            # There were discussions, for example, that the <time> element should do so. Although,
            # after all, this was reversed, the functionality is kept in the code in case another
            # element might need it...
            if self.state.lang != None and self.state.supress_lang == False:
                lang = self.state.lang
            else:
                lang = ''

            # The simple case: separate @content attribute
            if self.node.hasAttribute("content"):
                val = self.node.getAttribute("content")
                # Handling the automatic uri conversion case
                if dtset == False:
                    obj = Literal(val, lang=lang)
                else:
                    obj = self._create_Literal(val, datatype=datatype, lang=lang)
                # The value of datatype has been set, and the keyword paramaters take care of the rest
            else:
                # see if there *is* a datatype (even if it is empty!)
                if dtset:
                    if datatype == XMLLiteral:
                        litval = self._get_XML_literal(self.node)
                        obj = Literal(litval,datatype=XMLLiteral)
                    elif datatype == HTMLLiteral:
                        # I am not sure why this hack is necessary, but otherwise an encoding error occurs
                        # In Python3 all this should become moot, due to the unicode everywhere approach...
                        obj = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral)
                    else:
                        obj = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
                else:
                    obj = self._create_Literal(self._get_literal(self.node), lang=lang)

        if obj != None:
            for prop in self.state.getURI("property"):
                if not isinstance(prop, BNode):
                    if self.node.hasAttribute("inlist"):
                        self.state.add_to_list_mapping(prop, obj)
                    else :            
                        self.graph.add( (self.subject, prop, obj) )
                else:
                    self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
    
        # return

    def generate_1_0(self):
        """Generate the property object, 1.0 version"""
                
        #########################################################################        
        # We have to generate a literal indeed.
        # Get, if exists, the value of @datatype
        datatype = ''
        dtset = False
        if self.node.hasAttribute("datatype"):
            dtset = True
            dt = self.node.getAttribute("datatype")
            if dt != "":
                datatype = self.state.getURI("datatype")
    
        if self.state.lang != None:
            lang = self.state.lang
        else:
            lang = ''

        # The simple case: separate @content attribute
        if self.node.hasAttribute("content"):
            val = self.node.getAttribute("content")
            # Handling the automatic uri conversion case
            if dtset == False:
                obj = Literal(val, lang=lang)
            else:
                obj = self._create_Literal(val, datatype=datatype, lang=lang)
            # The value of datatype has been set, and the keyword paramaters take care of the rest
        else:
            # see if there *is* a datatype (even if it is empty!)
            if dtset:
                # yep. The Literal content is the pure text part of the current element:
                # We have to check whether the specified datatype is, in fact, an
                # explicit XML Literal
                if datatype == XMLLiteral:
                    litval = self._get_XML_literal(self.node)
                    obj = Literal(litval,datatype=XMLLiteral)
                elif datatype == HTMLLiteral:
                    # I am not sure why this hack is necessary, but otherwise an encoding error occurs
                    # In Python3 all this should become moot, due to the unicode everywhere approach...
                    obj = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral)
                else:
                    obj = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
            else:
                # no controlling @datatype. We have to see if there is markup in the contained
                # element
                if True in [ n.nodeType == self.node.ELEMENT_NODE for n in self.node.childNodes ]:
                    # yep, and XML Literal should be generated
                    obj = self._create_Literal(self._get_XML_literal(self.node), datatype=XMLLiteral)
                else:
                    # At this point, there might be entities in the string that are returned as real characters by the dom
                    # implementation. That should be turned back
                    obj = self._create_Literal(self._get_literal(self.node), lang=lang)
    
        for prop in self.state.getURI("property"):
            if not isinstance(prop,BNode):
                self.graph.add( (self.subject,prop,obj) )
            else:
                self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
    
        # return
    
    ######################################################################################################################################
    
    
    def _putBackEntities(self, data):
        """Put 'back' entities for the '&','<', and '>' characters, to produce a proper XML string.
        Used by the XML Literal extraction.
        @param data: string to be converted
        @return: string with entities
        @rtype: string
        """
        return data.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
        
    def _get_literal(self, Pnode):
        """
        Get (recursively) the full text from a DOM Node.
    
        @param Pnode: DOM Node
        @return: string
        """
        rc = ""
        for node in Pnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                rc = rc + node.data
            elif node.nodeType == node.ELEMENT_NODE:
                rc = rc + self._get_literal(node)
    
        # The decision of the group in February 2008 is not to normalize the result by default.
        # This is reflected in the default value of the option        
        
        if self.state.options.space_preserve:
            return rc
        else:
            return re.sub(r'(\r| |\n|\t)+'," ",rc).strip()
    # end getLiteral
    
    def _get_XML_literal(self, Pnode):
        """
        Get (recursively) the XML Literal content of a DOM Node. 
    
        @param Pnode: DOM Node
        @return: string
        """    
        rc = ""        
        for node in Pnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                rc = rc + self._putBackEntities(node.data)
            elif node.nodeType == node.ELEMENT_NODE:
                rc = rc + return_XML(self.state, node, base = False)
        return rc
    # end getXMLLiteral

    def _get_HTML_literal(self, Pnode):
        """
        Get (recursively) the XML Literal content of a DOM Node. 
    
        @param Pnode: DOM Node
        @return: string
        """    
        rc = ""        
        for node in Pnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                rc = rc + self._putBackEntities(node.data)
            elif node.nodeType == node.ELEMENT_NODE:
                rc = rc + return_XML(self.state, node, base = False, xmlns = False )
        return rc
    # end getHTMLLLiteral
    
    def _create_Literal(self, val, datatype = '', lang = ''):
        """
        Create a literal, taking into account the datatype and language.
        @return: Literal
        """
        if datatype == None or datatype == '':
            return Literal(val, lang=lang)
        #elif datatype == ns_xsd["string"]:
        #    return Literal(val)
        else:
            # This is a bit convoluted... the default setup of rdflib does not gracefully react if the
            # datatype cannot properly be converted to Python. I have to copy and reuse some of the
            # rdflib code to get this working...
            # To make things worse: rdlib 3.1.0 does not handle the various xsd date types properly, ie,
            # the conversion function below will generate errors. Ie, the check should be skipped for those
            convFunc = XSDToPython.get(datatype, None)
            if convFunc:
                try:
                    _pv = convFunc(val)
                    # If we got there the literal value and its datatype match
                except:
                    self.state.options.add_warning("Incompatible value (%s) and datatype (%s) in Literal definition." % (val, datatype), warning_type=IncorrectLiteral, node=self.node.nodeName)
            return Literal(val, datatype=datatype)