#!/usr/bin/python
# CSS Test Source Manipulation Library
# Initial code by fantasai, joint copyright 2010 W3C and Microsoft
# Licensed under BSD 3-Clause: <http://www.w3.org/Consortium/Legal/2008/03-bsd-license>

from __future__ import print_function
from os.path import basename, exists, join
import os
import filecmp
import shutil
import re
import codecs
import collections
from xml import dom
import html5lib
<<<<<<< ours
from html5lib import treebuilders, inputstream
=======
from html5lib import treebuilders
>>>>>>> theirs
from lxml import etree
from lxml.etree import ParseError
from Utils import getMimeFromExt, escapeToNamedASCII, basepath, isPathInsideBase, relativeURL, assetName
import HTMLSerializer
import warnings
import hashlib

class SourceTree(object):
  """Class that manages structure of test repository source.
     Temporarily hard-coded path and filename rules, this should be configurable.
  """

  def __init__(self, repository = None):
    self.mTestExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.svg']
    self.mReferenceExtensions = ['.xht', '.html', '.xhtml', '.htm', '.xml', '.png', '.svg']
    self.mRepository = repository

  def _splitDirs(self, dir):
    if ('' == dir):
      pathList = []
    elif ('/' in dir):
      pathList = dir.split('/')
    else:
      pathList = dir.split(os.path.sep)
    return pathList

  def _splitPath(self, filePath):
    """split a path into a list of directory names and the file name
       paths may come form the os or mercurial, which always uses '/' as the
       directory separator
    """
    dir, fileName = os.path.split(filePath.lower())
    return (self._splitDirs(dir), fileName)

  def isTracked(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return (not self._isIgnored(pathList, fileName))

  def _isApprovedPath(self, pathList):
    return ((1 < len(pathList)) and ('approved' == pathList[0]) and (('support' == pathList[1]) or ('src' in pathList)))

  def isApprovedPath(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return (not self._isIgnored(pathList, fileName)) and self._isApprovedPath(pathList)

  def _isIgnoredPath(self, pathList):
      return (('.hg' in pathList) or ('.git' in pathList) or
              ('.svn' in pathList) or ('cvs' in pathList) or
              ('incoming' in pathList) or ('work-in-progress' in pathList) or
              ('data' in pathList) or ('archive' in pathList) or
              ('reports' in pathList) or ('tools' == pathList[0]) or
              ('test-plan' in pathList) or ('test-plans' in pathList))

  def _isIgnored(self, pathList, fileName):
    if (pathList):  # ignore files in root
      return (self._isIgnoredPath(pathList) or
              fileName.startswith('.directory') or ('lock' == fileName) or
              ('.ds_store' == fileName) or
              fileName.startswith('.hg') or fileName.startswith('.git') or
              ('sections.dat' == fileName) or ('get-spec-sections.pl' == fileName))
    return True

  def isIgnored(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return self._isIgnored(pathList, fileName)

  def isIgnoredDir(self, dir):
    pathList = self._splitDirs(dir)
    return self._isIgnoredPath(pathList)

  def _isToolPath(self, pathList):
    return ('tools' in pathList)

  def _isTool(self, pathList, fileName):
    return self._isToolPath(pathList)

  def isTool(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return (not self._isIgnored(pathList, fileName)) and self._isTool(pathList, fileName)

  def _isSupportPath(self, pathList):
    return ('support' in pathList)

  def _isSupport(self, pathList, fileName):
    return (self._isSupportPath(pathList) or
            ((not self._isTool(pathList, fileName)) and
             (not self._isReference(pathList, fileName)) and
             (not self._isTestCase(pathList, fileName))))

  def isSupport(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return (not self._isIgnored(pathList, fileName)) and self._isSupport(pathList, fileName)

  def _isReferencePath(self, pathList):
    return (('reftest' in pathList) or ('reference' in pathList))

  def _isReference(self, pathList, fileName):
    if ((not self._isSupportPath(pathList)) and (not self._isToolPath(pathList))):
      baseName, fileExt = os.path.splitext(fileName)[:2]
      if (bool(re.search('(^ref-|^notref-).+', baseName)) or
          bool(re.search('.+(-ref[0-9]*$|-notref[0-9]*$)', baseName)) or
          ('-ref-' in baseName) or ('-notref-' in baseName)):
        return (fileExt in self.mReferenceExtensions)
      if (self._isReferencePath(pathList)):
        return (fileExt in self.mReferenceExtensions)
    return False

  def isReference(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return (not self._isIgnored(pathList, fileName)) and self._isReference(pathList, fileName)

  def isReferenceAnywhere(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return self._isReference(pathList, fileName)

  def _isTestCase(self, pathList, fileName):
    if ((not self._isToolPath(pathList)) and (not self._isSupportPath(pathList)) and (not self._isReference(pathList, fileName))):
      fileExt = os.path.splitext(fileName)[1]
      return (fileExt in self.mTestExtensions)
    return False

  def isTestCase(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    return (not self._isIgnored(pathList, fileName)) and self._isTestCase(pathList, fileName)

  def getAssetName(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    if (self._isReference(pathList, fileName) or self._isTestCase(pathList, fileName)):
      return assetName(fileName)
    return fileName.lower() # support files keep full name

  def getAssetType(self, filePath):
    pathList, fileName = self._splitPath(filePath)
    if (self._isReference(pathList, fileName)):
      return intern('reference')
    if (self._isTestCase(pathList, fileName)):
      return intern('testcase')
    if (self._isTool(pathList, fileName)):
      return intern('tool')
    return intern('support')


class SourceCache:
  """Cache for FileSource objects. Supports one FileSource object
     per sourcepath.
  """
  def __init__(self, sourceTree):
    self.__cache = {}
    self.sourceTree = sourceTree

  def generateSource(self, sourcepath, relpath, data = None):
    """Return a FileSource or derivative based on the extensionMap.

       Uses a cache to avoid creating more than one of the same object:
       does not support creating two FileSources with the same sourcepath;
       asserts if this is tried. (.htaccess files are not cached.)

       Cache is bypassed if loading form a change context
    """
    if ((None == data) and self.__cache.has_key(sourcepath)):
      source = self.__cache[sourcepath]
      assert relpath == source.relpath
      return source

    if basename(sourcepath) == '.htaccess':
      return ConfigSource(self.sourceTree, sourcepath, relpath, data)
    mime = getMimeFromExt(sourcepath)
    if (mime == 'application/xhtml+xml'):
      source = XHTMLSource(self.sourceTree, sourcepath, relpath, data)
    elif (mime == 'text/html'):
      source = HTMLSource(self.sourceTree, sourcepath, relpath, data)
    elif (mime == 'image/svg+xml'):
      source = SVGSource(self.sourceTree, sourcepath, relpath, data)
    elif (mime == 'application/xml'):
      source = XMLSource(self.sourceTree, sourcepath, relpath, data)
    else:
      source = FileSource(self.sourceTree, sourcepath, relpath, mime, data)
    if (None == data):
      self.__cache[sourcepath] = source
    return source

class SourceSet:
  """Set of FileSource objects. No two FileSources of the same type in the set may
     have the same name (except .htaccess files, which are merged).
  """
  def __init__(self, sourceCache):
    self.sourceCache = sourceCache
    self.pathMap = {} # type/name -> source

  def __len__(self):
    return len(self.pathMap)

  def _keyOf(self, source):
    return source.type() + '/' + source.keyName()

  def __contains__(self, source):
    return self._keyOf(source) in self.pathMap


  def iter(self):
    """Iterate over FileSource objects in SourceSet.
    """
    return self.pathMap.itervalues()

  def addSource(self, source, ui):
    """Add FileSource `source`. Throws exception if we already have
       a FileSource with the same path relpath but different contents.
       (ConfigSources are exempt from this requirement.)
    """
    cachedSource = self.pathMap.get(self._keyOf(source))
    if not cachedSource:
      self.pathMap[self._keyOf(source)] = source
    else:
      if source != cachedSource:
        if isinstance(source, ConfigSource):
          cachedSource.append(source)
        else:
          ui.warn("File merge mismatch %s vs %s for %s\n" % \
                (cachedSource.sourcepath, source.sourcepath, source.name()))

  def add(self, sourcepath, relpath, ui):
    """Generate and add FileSource from sourceCache. Return the resulting
       FileSource.

       Throws exception if we already have a FileSource with the same path
       relpath but different contents.
    """
    source = self.sourceCache.generateSource(sourcepath, relpath)
    self.addSource(source, ui)
    return source

  @staticmethod
  def combine(a, b, ui):
    """Merges a and b, and returns whichever one contains the merger (which
       one is chosen based on merge efficiency). Can accept None as an argument.
    """
    if not (a and b):
      return a or b
    if len(a) < len(b):
      return b.merge(a, ui)
    return a.merge(b, ui)

  def merge(self, other, ui):
    """Merge sourceSet's contents into this SourceSet.

       Throws a RuntimeError if there's a sourceCache mismatch.
       Throws an Exception if two files with the same relpath mismatch.
       Returns merge result (i.e. self)
    """
    if self.sourceCache is not other.sourceCache:
      raise RuntimeError

    for source in other.pathMap.itervalues():
      self.addSource(source, ui)
    return self

  def adjustContentPaths(self, format):
    for source in self.pathMap.itervalues():
      source.adjustContentPaths(format)

  def write(self, format):
    """Write files out through OutputFormat `format`.
    """
    for source in self.pathMap.itervalues():
      format.write(source)


class StringReader(object):
  """Wrapper around a string to give it a file-like api
  """
  def __init__(self, string):
    self.mString = string
    self.mIndex = 0

  def read(self, maxSize = None):
    if (self.mIndex < len(self.mString)):
      if (maxSize and (0 < maxSize)):
        slice = self.mString[self.mIndex:self.mIndex + maxSize]
        self.mIndex += len(slice)
        return slice
      else:
        self.mIndex = len(self.mString)
        return self.mString
    return ''


class NamedDict(object):
    def get(self, key):
        if (key in self):
            return self[key]
        return None

    def __eq__(self, other):
        for key in self.__slots__:
            if (self[key] != other[key]):
                return False
        return True

    def __ne__(self, other):
        for key in self.__slots__:
            if (self[key] != other[key]):
                return True
        return False

    def __len__(self):
        return len(self.__slots__)

    def __iter__(self):
        return iter(self.__slots__)

    def __contains__(self, key):
        return (key in self.__slots__)

    def copy(self):
        clone = self.__class__()
        for key in self.__slots__:
            clone[key] = self[key]
        return clone

    def keys(self):
        return self.__slots__

    def has_key(self, key):
        return (key in self)

    def items(self):
        return [(key, self[key]) for key in self.__slots__]

    def iteritems(self):
        return iter(self.items())

    def iterkeys(self):
        return self.__iter__()

    def itervalues(self):
        return iter(self.items())

    def __str__(self):
        return '{ ' + ', '.join([key + ': ' + str(self[key]) for key in self.__slots__]) + ' }'


class Metadata(NamedDict):
    __slots__ = ('name', 'title', 'asserts', 'credits', 'reviewers', 'flags', 'links', 'references', 'revision', 'selftest', 'scripttest')

    def __init__(self, name = None, title = None, asserts = [], credits = [], reviewers = [], flags = [], links = [],
                 references = [], revision = None, selftest = True, scripttest = False):
        self.name = name
        self.title = title
        self.asserts = asserts
        self.credits = credits
        self.reviewers = reviewers
        self.flags = flags
        self.links = links
        self.references = references
        self.revision = revision
        self.selftest = selftest
        self.scripttest = scripttest

    def __getitem__(self, key):
        if ('name' == key):
            return self.name
        if ('title' == key):
            return self.title
        if ('asserts' == key):
            return self.asserts
        if ('credits' == key):
            return self.credits
        if ('reviewers' == key):
            return self.reviewers
        if ('flags' == key):
            return self.flags
        if ('links' == key):
            return self.links
        if ('references' == key):
            return self.references
        if ('revision' == key):
            return self.revision
        if ('selftest' == key):
            return self.selftest
        if ('scripttest' == key):
            return self.scripttest
        return None

    def __setitem__(self, key, value):
        if ('name' == key):
            self.name = value
        elif ('title' == key):
            self.title = value
        elif ('asserts' == key):
            self.asserts = value
        elif ('credits' == key):
            self.credits = value
        elif ('reviewers' == key):
            self.reviewers = value
        elif ('flags' == key):
            self.flags = value
        elif ('links' == key):
            self.links = value
        elif ('references' == key):
            self.references = value
        elif ('revision' == key):
            self.revision = value
        elif ('selftest' == key):
            self.selftest = value
        elif ('scripttest' == key):
            self.scripttest = value
        else:
            raise KeyError()


class ReferenceData(NamedDict):
    __slots__ = ('name', 'type', 'relpath', 'repopath')

    def __init__(self, name = None, type = None, relpath = None, repopath = None):
        self.name = name
        self.type = type
        self.relpath = relpath
        self.repopath = repopath

    def __getitem__(self, key):
        if ('name' == key):
            return self.name
        if ('type' == key):
            return self.type
        if ('relpath' == key):
            return self.relpath
        if ('repopath' == key):
            return self.repopath
        return None

    def __setitem__(self, key, value):
        if ('name' == key):
            self.name = value
        elif ('type' == key):
            self.type = value
        elif ('relpath' == key):
            self.relpath = value
        elif ('repopath' == key):
            self.repopath = value
        else:
            raise KeyError()

UserData = collections.namedtuple('UserData', ('name', 'link'))

class LineString(str):
    def __new__(cls, value, line):
        self = str.__new__(cls, value)
        self.line = line
        return self

    def lineValue(self):
        return 'Line ' + str(self.line) + ': ' + str.__str__(self) if (self.line) else str.__str__(self)


class FileSource:
  """Object representing a file. Two FileSources are equal if they represent
     the same file contents. It is recommended to use a SourceCache to generate
     FileSources.
  """

  def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None):
    """Init FileSource from source path. Give it relative path relpath.

       `mimetype` should be the canonical MIME type for the file, if known.
        If `mimetype` is None, guess type from file extension, defaulting to
        the None key's value in extensionMap.

       `data` if provided, is a the contents of the file. Otherwise the file is read
        from disk.
    """
    self.sourceTree = sourceTree
    self.sourcepath = sourcepath
    self.relpath    = relpath
    self.mimetype   = mimetype or getMimeFromExt(sourcepath)
    self._data      = data
    self.errors     = None
    self.encoding   = 'utf-8'
    self.refs       = {}
    self.scripts    = {}
    self.metadata   = None
    self.metaSource = None

  def __eq__(self, other):
    if not isinstance(other, FileSource):
      return False
    return self.sourcepath == other.sourcepath or \
           filecmp.cmp(self.sourcepath, other.sourcepath)

  def __ne__(self, other):
    return not self == other

  def __cmp__(self, other):
    return cmp(self.name(), other.name())

  def name(self):
    return self.sourceTree.getAssetName(self.sourcepath)

  def keyName(self):
    if ('support' == self.type()):
      return os.path.relpath(self.relpath, 'support')
    return self.name()

  def type(self):
    return self.sourceTree.getAssetType(self.sourcepath)

  def relativeURL(self, other):
    return relativeURL(self.relpath, other.relpath)

  def data(self):
    """Return file contents as a byte string."""
    if (self._data is None):
      with open(self.sourcepath, 'r') as f:
        self._data = f.read()
    if (self._data.startswith(codecs.BOM_UTF8)):
      self.encoding = 'utf-8-sig' # XXX look for other unicode BOMs
    return self._data

  def unicode(self):
    try:
      return self.data().decode(self.encoding)
    except UnicodeDecodeError, e:
      return None

  def parse(self):
    """Parses and validates FileSource data from sourcepath."""
    self.loadMetadata()

  def validate(self):
    """Ensure data is loaded from sourcepath."""
    self.parse()

  def adjustContentPaths(self, format):
    """Adjust any paths in file content for output format
       XXX need to account for group paths"""
    if (self.refs):
      seenRefs = {}
      seenRefs[self.sourcepath] = '=='
      def adjustReferences(source):
        newRefs = {}
        for refName in source.refs:
          refType, refPath, refNode, refSource = source.refs[refName]
          if refSource:
            refPath = relativeURL(format.dest(self.relpath), format.dest(refSource.relpath))
            if (refSource.sourcepath not in seenRefs):
              seenRefs[refSource.sourcepath] = refType
              adjustReferences(refSource)
          else:
            refPath = relativeURL(format.dest(self.relpath), format.dest(refPath))
          if (refPath != refNode.get('href')):
            refNode.set('href', refPath)
          newRefs[refName] = (refType, refPath, refNode, refSource) # update path in metadata
        source.refs = newRefs
      adjustReferences(self)

    if (self.scripts):   # force testharness.js scripts to absolute path
      for src in self.scripts:
        if (src.endswith('/resources/testharness.js')):   # accept relative paths to testharness.js
            scriptNode = self.scripts[src]
            scriptNode.set('src', '/resources/testharness.js')
        elif (src.endswith('/resources/testharnessreport.js')):
            scriptNode = self.scripts[src]
            scriptNode.set('src', '/resources/testharnessreport.js')


  def write(self, format):
    """Writes FileSource.data() out to `self.relpath` through Format `format`."""
    data = self.data()
    with open(format.dest(self.relpath), 'w') as f:
      f.write(data)
    if (self.metaSource):
      self.metaSource.write(format) # XXX need to get output path from format, but not let it choose actual format

  def compact(self):
    """Clears all cached data, preserves computed data."""
    pass

  def revision(self):
    """Returns hash of the contents of this file and any related file, references, support files, etc.
       XXX also needs to account for .meta file
    """
    sha = hashlib.sha1()
    sha.update(self.data())
    seenRefs = set(self.sourcepath)
    def hashReference(source):
        for refName in source.refs:
            refSource = source.refs[refName][3]
            if (refSource and (refSource.sourcepath not in seenRefs)):
                sha.update(refSource.data())
                seenRefs.add(refSource.sourcepath)
                hashReference(refSource)
    hashReference(self)
    return sha.hexdigest()

  def loadMetadata(self):
    """Look for .meta file and load any metadata from it if present
    """
    pass

  def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None):
    if (self.metaSource):
      return self.metaSource.augmentMetadata(next, prev, reference, notReference)
    return None

  # See http://wiki.csswg.org/test/css2.1/format for more info on metadata
  def getMetadata(self, asUnicode = False):
    """Return dictionary of test metadata. Stores list of errors
       in self.errors if there are parse or metadata errors.
       Data fields include:
         - asserts [list of strings]
         - credits [list of (name string, url string) tuples]
         - reviewers [ list of (name string, url string) tuples]
         - flags   [list of token strings]
         - links   [list of url strings]
         - name    [string]
         - title   [string]
         - references [list of ReferenceData per reference; None if not reftest]
         - revision   [revision id of last commit]
         - selftest [bool]
         - scripttest [bool]
       Strings are given in ascii unless asUnicode==True.
    """

    self.validate()

    def encode(str):
        return str if (hasattr(str, 'line')) else intern(str.encode('utf-8'))

    def escape(str, andIntern = True):
      return str.encode('utf-8') if asUnicode else intern(escapeToNamedASCII(str)) if andIntern else escapeToNamedASCII(str)

    def listReferences(source, seen):
        refGroups = []
        for refType, refRelPath, refNode, refSource in source.refs.values():
            if ('==' == refType):
                if (refSource):
                    refSourcePath = refSource.sourcepath
                else:
                    refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath))
                if (refSourcePath in seen):
                    continue
                seen.add(refSourcePath)
                if (refSource):
                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
                                               relpath = refRelPath, repopath = refSourcePath)
                    if (refSource.refs):
                        subRefLists = listReferences(refSource, seen.copy())
                        if (subRefLists):
                            for subRefList in subRefLists:
                                refGroups.append([sourceData] + subRefList)
                        else:
                            refGroups.append([sourceData])
                    else:
                        refGroups.append([sourceData])
                else:
                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
                                               relpath = relativeURL(self.sourcepath, refSourcePath),
                                               repopath = refSourcePath)
                    refGroups.append([sourceData])
        notRefs = {}
        for refType, refRelPath, refNode, refSource in source.refs.values():
            if ('!=' == refType):
                if (refSource):
                    refSourcePath = refSource.sourcepath
                else:
                    refSourcePath = os.path.normpath(join(basepath(source.sourcepath), refRelPath))
                if (refSourcePath in seen):
                    continue
                seen.add(refSourcePath)
                if (refSource):
                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
                                               relpath = refRelPath, repopath = refSourcePath)
                    notRefs[sourceData.name] = sourceData
                    if (refSource.refs):
                        for subRefList in listReferences(refSource, seen):
                            for subRefData in subRefList:
                                notRefs[subRefData.name] = subRefData
                else:
                    sourceData = ReferenceData(name = self.sourceTree.getAssetName(refSourcePath), type = refType,
                                               relpath = relativeURL(self.sourcepath, refSourcePath),
                                               repopath = refSourcePath)
                    notRefs[sourceData.name] = sourceData
        if (notRefs):
            for refData in notRefs.values():
                refData.type = '!='
            if (refGroups):
                for refGroup in refGroups:
                    for notRef in notRefs.values():
                        for ref in refGroup:
                            if (ref.name == notRef.name):
                                break
                        else:
                            refGroup.append(notRef)
            else:
                refGroups.append(notRefs.values())
        return refGroups

    references = listReferences(self, set([self.sourcepath])) if (self.refs) else None

    if (self.metadata):
      data = Metadata(
              name       = encode(self.name()),
              title      = escape(self.metadata['title'], False),
              asserts    = [escape(assertion, False) for assertion in self.metadata['asserts']],
              credits    = [UserData(escape(name), encode(link)) for name, link in self.metadata['credits']],
              reviewers  = [UserData(escape(name), encode(link)) for name, link in self.metadata['reviewers']],
              flags      = [encode(flag) for flag in self.metadata['flags']],
              links      = [encode(link) for link in self.metadata['links']],
              references = references,
              revision   = self.revision(),
              selftest   = self.isSelftest(),
              scripttest = self.isScripttest()
             )
      return data
    return None

  def addReference(self, referenceSource, match = None):
    """Add reference source."""
    self.validate()
    refName = referenceSource.name()
    refPath = self.relativeURL(referenceSource)
    if refName not in self.refs:
      node = None
      if match == '==':
        node = self.augmentMetadata(reference=referenceSource).reference
      elif match == '!=':
        node = self.augmentMetadata(notReference=referenceSource).notReference
      self.refs[refName] = (match, refPath, node, referenceSource)
    else:
      node = self.refs[refName][2]
      node.set('href', refPath)
      if (match):
        node.set('rel', 'mismatch' if ('!=' == match) else 'match')
      else:
        match = self.refs[refName][0]
      self.refs[refName] = (match, refPath, node, referenceSource)

  def getReferencePaths(self):
    """Get list of paths to references as tuple(path, relPath, refType)."""
    self.validate()
    return [(os.path.join(os.path.dirname(self.sourcepath), ref[1]),
             os.path.join(os.path.dirname(self.relpath), ref[1]),
             ref[0])
            for ref in self.refs.values()]

  def isTest(self):
    self.validate()
    return bool(self.metadata) and bool(self.metadata.get('links'))

  def isReftest(self):
    return self.isTest() and bool(self.refs)

  def isSelftest(self):
    return self.isTest() and (not bool(self.refs))

  def isScripttest(self):
    if (self.isTest() and self.scripts):
        for src in self.scripts:
            if (src.endswith('/resources/testharness.js')):   # accept relative paths to testharness.js
                return True
    return False

  def hasFlag(self, flag):
    data = self.getMetadata()
    if data:
      return flag in data['flags']
    return False


class ConfigSource(FileSource):
  """Object representing a text-based configuration file.
     Capable of merging multiple config-file contents.
  """

  def __init__(self, sourceTree, sourcepath, relpath, mimetype = None, data = None):
    """Init ConfigSource from source path. Give it relative path relpath.
    """
    FileSource.__init__(self, sourceTree, sourcepath, relpath, mimetype, data)
    self.sourcepath = [sourcepath]

  def __eq__(self, other):
    if not isinstance(other, ConfigSource):
      return False
    if self is other or self.sourcepath == other.sourcepath:
      return True
    if len(self.sourcepath) != len(other.sourcepath):
      return False
    for this, that in zip(self.sourcepath, other.sourcepath):
      if not filecmp.cmp(this, that):
        return False
    return True

  def __ne__(self, other):
    return not self == other

  def name(self):
    return '.htaccess'

  def type(self):
    return intern('support')

  def data(self):
    """Merge contents of all config files represented by this source."""
    data = ''
    for src in self.sourcepath:
      with open(src) as f:
        data += f.read()
      data += '\n'
    return data

  def getMetadata(self, asUnicode = False):
    return None

  def append(self, other):
    """Appends contents of ConfigSource `other` to this source.
       Asserts if self.relpath != other.relpath.
    """
    assert isinstance(other, ConfigSource)
    assert self != other and self.relpath == other.relpath
    self.sourcepath.extend(other.sourcepath)

class ReftestFilepathError(Exception):
  pass

class ReftestManifest(ConfigSource):
  """Object representing a reftest manifest file.
     Iterating the ReftestManifest returns (testpath, refpath) tuples
     with paths relative to the manifest.
  """
  def __init__(self, sourceTree, sourcepath, relpath, data = None):
    """Init ReftestManifest from source path. Give it relative path `relpath`
       and load its .htaccess file.
    """
    ConfigSource.__init__(self, sourceTree, sourcepath, relpath, mimetype = 'config/reftest', data = data)

  def basepath(self):
    """Returns the base relpath of this reftest manifest path, i.e.
       the parent of the manifest file.
    """
    return basepath(self.relpath)

  baseRE = re.compile(r'^#\s*relstrip\s+(\S+)\s*')
  stripRE = re.compile(r'#.*')
  parseRE = re.compile(r'^\s*([=!]=)\s*(\S+)\s+(\S+)')

  def __iter__(self):
    """Parse the reftest manifest files represented by this ReftestManifest
       and return path information about each reftest pair as
         ((test-sourcepath, ref-sourcepath), (test-relpath, ref-relpath), reftype)
       Raises a ReftestFilepathError if any sources file do not exist or
       if any relpaths point higher than the relpath root.
    """
    striplist = []
    for src in self.sourcepath:
      relbase = basepath(self.relpath)
      srcbase = basepath(src)
      with open(src) as f:
        for line in f:
          strip = self.baseRE.search(line)
          if strip:
            striplist.append(strip.group(1))
          line = self.stripRE.sub('', line)
          m = self.parseRE.search(line)
          if m:
            record = ((join(srcbase, m.group(2)), join(srcbase, m.group(3))), \
                      (join(relbase, m.group(2)), join(relbase, m.group(3))), \
                      m.group(1))
  #          for strip in striplist:
              # strip relrecord
            if not exists(record[0][0]):
              raise ReftestFilepathError("Manifest Error in %s: "
                                         "Reftest test file %s does not exist." \
                                          % (src, record[0][0]))
            elif not exists(record[0][1]):
              raise ReftestFilepathError("Manifest Error in %s: "
                                         "Reftest reference file %s does not exist." \
                                         % (src, record[0][1]))
            elif not isPathInsideBase(record[1][0]):
              raise ReftestFilepathError("Manifest Error in %s: "
                                         "Reftest test replath %s not within relpath root." \
                                         % (src, record[1][0]))
            elif not isPathInsideBase(record[1][1]):
              raise ReftestFilepathError("Manifest Error in %s: "
                                         "Reftest test replath %s not within relpath root." \
                                         % (src, record[1][1]))
            yield record

import Utils # set up XML catalog
xhtmlns = '{http://www.w3.org/1999/xhtml}'
svgns = '{http://www.w3.org/2000/svg}'
xmlns = '{http://www.w3.org/XML/1998/namespace}'
xlinkns = '{http://www.w3.org/1999/xlink}'

class XMLSource(FileSource):
  """FileSource object with support reading XML trees."""

  NodeTuple = collections.namedtuple('NodeTuple', ['next', 'prev', 'reference', 'notReference'])

  # Public Data
  syntaxErrorDoc = \
  u"""
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  <html xmlns="http://www.w3.org/1999/xhtml">
    <head><title>Syntax Error</title></head>
    <body>
      <p>The XML file <![CDATA[%s]]> contains a syntax error and could not be parsed.
      Please correct it and try again.</p>
      <p>The parser's error report was:</p>
      <pre><![CDATA[%s]]></pre>
    </body>
  </html>
  """

  # Private Data and Methods
  __parser = etree.XMLParser(no_network=True,
  # perf nightmare           dtd_validation=True,
                             remove_comments=False,
                             strip_cdata=False,
                             resolve_entities=False)

  # Public Methods

  def __init__(self, sourceTree, sourcepath, relpath, data = None):
    """Initialize XMLSource by loading from XML file `sourcepath`.
      Parse errors are reported in `self.errors`,
      and the source is replaced with an XHTML error message.
    """
    FileSource.__init__(self, sourceTree, sourcepath, relpath, data = data)
    self.tree = None
    self.injectedTags = {}

  def cacheAsParseError(self, filename, e):
      """Replace document with an error message."""
      errorDoc = self.syntaxErrorDoc % (filename, e)
      from StringIO import StringIO
      self.tree = etree.parse(StringIO(errorDoc), parser=self.__parser)

  def parse(self):
    """Parse file and store any parse errors in self.errors"""
    self.errors = None
    try:
      data = self.data()
      if (data):
        self.tree = etree.parse(StringReader(data), parser=self.__parser)
        self.encoding = self.tree.docinfo.encoding or 'utf-8'
        self.injectedTags = {}
      else:
        self.tree = None
        self.errors = ['Empty source file']
        self.encoding = 'utf-8'

      FileSource.loadMetadata(self)
      if ((not self.metadata) and self.tree and (not self.errors)):
        self.extractMetadata(self.tree)
    except etree.ParseError as e:
      print("PARSE ERROR: " + self.sourcepath)
      self.cacheAsParseError(self.sourcepath, e)
      e.W3CTestLibErrorLocation = self.sourcepath
      self.errors = [str(e)]
      self.encoding = 'utf-8'

  def validate(self):
    """Parse file if not parsed, and store any parse errors in self.errors"""
    if self.tree is None:
      self.parse()

  def getMeatdataContainer(self):
    return self.tree.getroot().find(xhtmlns+'head')

  def injectMetadataLink(self, rel, href, tagCode = None):
    """Inject (prepend) <link> with data given inside metadata container.
       Injected element is tagged with `tagCode`, which can be
       used to clear it with clearInjectedTags later.
    """
    self.validate()
    container = self.getMeatdataContainer()
    if (container):
      node = etree.Element(xhtmlns+'link', {'rel': rel, 'href': href})
      node.tail = container.text
      container.insert(0, node)
      self.injectedTags[node] = tagCode or True
      return node
    return None

  def clearInjectedTags(self, tagCode = None):
    """Clears all injected elements from the tree, or clears injected
       elements tagged with `tagCode` if `tagCode` is given.
    """
    if not self.injectedTags or not self.tree: return
    for node in self.injectedTags:
      node.getparent().remove(node)
      del self.injectedTags[node]

  def serializeXML(self):
    self.validate()
    return etree.tounicode(self.tree)

  def data(self):
    if ((not self.tree) or (self.metaSource)):
      return FileSource.data(self)
    return self.serializeXML().encode(self.encoding, 'xmlcharrefreplace')

  def unicode(self):
    if ((not self.tree) or (self.metaSource)):
      return FileSource.unicode(self)
    return self.serializeXML()

  def write(self, format, output=None):
    """Write Source through OutputFormat `format`.
       Write contents as string `output` instead if specified.
    """
    if not output:
      output = self.unicode()

    # write
    with open(format.dest(self.relpath), 'w') as f:
      f.write(output.encode(self.encoding, 'xmlcharrefreplace'))

  def compact(self):
    self.tree = None

  def getMetadataElements(self, tree):
    container = self.getMeatdataContainer()
    if (None != container):
      return [node for node in container]
    return None

  def extractMetadata(self, tree):
    """Extract metadata from tree."""
    links = []; credits = []; reviewers = []; flags = []; asserts = []; title = ''

    def tokenMatch(token, string):
        return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False

    errors = []
    readFlags = False
    metaElements = self.getMetadataElements(tree)
    if (not metaElements):
        errors.append("Missing <head> element")
    else:
        # Scan and cache metadata
        for node in metaElements:
            if (node.tag == xhtmlns+'link'):
                # help links
                if tokenMatch('help', node.get('rel')):
                    link = node.get('href').strip() if node.get('href') else None
                    if (not link):
                        errors.append(LineString("Help link missing href value.", node.sourceline))
                    elif (not (link.startswith('http://') or link.startswith('https://'))):
                        errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline))
                    elif (link in links):
                        errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline))
                    else:
                        links.append(LineString(link, node.sourceline))
                # == references
                elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')):
                    refPath = node.get('href').strip() if node.get('href') else None
                    if (not refPath):
                        errors.append(LineString("Reference link missing href value.", node.sourceline))
                    else:
                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
                        if (refName in self.refs):
                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
                        else:
                            self.refs[refName] = ('==', refPath, node, None)
                # != references
                elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')):
                    refPath = node.get('href').strip() if node.get('href') else None
                    if (not refPath):
                        errors.append(LineString("Reference link missing href value.", node.sourceline))
                    else:
                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
                        if (refName in self.refs):
                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
                        else:
                            self.refs[refName] = ('!=', refPath, node, None)
                else: # may have both author and reviewer in the same link
                    # credits
                    if tokenMatch('author', node.get('rel')):
                        name = node.get('title')
                        name = name.strip() if name else name
                        if (not name):
                            errors.append(LineString("Author link missing name (title attribute).", node.sourceline))
                        else:
                            link = node.get('href').strip() if node.get('href') else None
                            if (not link):
                                errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
                            else:
                                credits.append((name, link))
                    # reviewers
                    if tokenMatch('reviewer', node.get('rel')):
                        name = node.get('title')
                        name = name.strip() if name else name
                        if (not name):
                            errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline))
                        else:
                            link = node.get('href').strip() if node.get('href') else None
                            if (not link):
                                errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
                            else:
                                reviewers.append((name, link))
            elif (node.tag == xhtmlns+'meta'):
                metatype = node.get('name')
                metatype = metatype.strip() if metatype else metatype
                # requirement flags
                if ('flags' == metatype):
                    if (readFlags):
                        errors.append(LineString("Flags must only be specified once.", node.sourceline))
                    else:
                        readFlags = True
                        if (None == node.get('content')):
                            errors.append(LineString("Flags meta missing content attribute.", node.sourceline))
                        else:
                            for flag in sorted(node.get('content').split()):
                                flags.append(flag)
                # test assertions
                elif ('assert' == metatype):
                    if (None == node.get('content')):
                        errors.append(LineString("Assert meta missing content attribute.", node.sourceline))
                    else:
                        asserts.append(node.get('content').strip().replace('\t', ' '))
            # title
            elif (node.tag == xhtmlns+'title'):
                title = node.text.strip() if node.text else ''
                match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL)
                if (match):
                    title = match.group(1)
                title = title.strip()
            # script
            elif (node.tag == xhtmlns+'script'):
                src = node.get('src').strip() if node.get('src') else None
                if (src):
                    self.scripts[src] = node

    if (asserts or credits or reviewers or flags or links or title):
        self.metadata = {'asserts'   : asserts,
                         'credits'   : credits,
                         'reviewers' : reviewers,
                         'flags'     : flags,
                         'links'     : links,
                         'title'     : title
                        }

    if (errors):
        if (self.errors):
            self.errors += errors
        else:
            self.errors = errors


  def augmentMetadata(self, next=None, prev=None, reference=None, notReference=None):
     """Add extra useful metadata to the head. All arguments are optional.
          * Adds next/prev links to  next/prev Sources given
          * Adds reference link to reference Source given
     """
     self.validate()
     if next:
       next = self.injectMetadataLink('next', self.relativeURL(next), 'next')
     if prev:
       prev = self.injectMetadataLink('prev', self.relativeURL(prev), 'prev')
     if reference:
       reference = self.injectMetadataLink('match', self.relativeURL(reference), 'ref')
     if notReference:
       notReference = self.injectMetadataLink('mismatch', self.relativeURL(notReference), 'not-ref')
     return self.NodeTuple(next, prev, reference, notReference)


class XHTMLSource(XMLSource):
  """FileSource object with support for XHTML->HTML conversions."""

  # Public Methods

  def __init__(self, sourceTree, sourcepath, relpath, data = None):
    """Initialize XHTMLSource by loading from XHTML file `sourcepath`.
      Parse errors are stored in `self.errors`,
      and the source is replaced with an XHTML error message.
    """
    XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)

  def serializeXHTML(self, doctype = None):
    return self.serializeXML()

  def serializeHTML(self, doctype = None):
    self.validate()
    # Serialize
#    print self.relpath
    serializer = HTMLSerializer.HTMLSerializer()
    output = serializer.serializeHTML(self.tree, doctype)
    return output


class SVGSource(XMLSource):
  """FileSource object with support for extracting metadata from SVG."""

  def __init__(self, sourceTree, sourcepath, relpath, data = None):
    """Initialize SVGSource by loading from SVG file `sourcepath`.
      Parse errors are stored in `self.errors`,
      and the source is replaced with an XHTML error message.
    """
    XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)

  def getMeatdataContainer(self):
    groups = self.tree.getroot().findall(svgns+'g')
    for group in groups:
      if ('testmeta' == group.get('id')):
        return group
    return None

  def extractMetadata(self, tree):
    """Extract metadata from tree."""
    links = []; credits = []; reviewers = []; flags = []; asserts = []; title = ''

    def tokenMatch(token, string):
        return bool(re.search('(^|\s+)%s($|\s+)' % token, string)) if (string) else False

    errors = []
    readFlags = False
    metaElements = self.getMetadataElements(tree)
    if (not metaElements):
        errors.append("Missing <g id='testmeta'> element")
    else:
        # Scan and cache metadata
        for node in metaElements:
            if (node.tag == xhtmlns+'link'):
                # help links
                if tokenMatch('help', node.get('rel')):
                    link = node.get('href').strip() if node.get('href') else None
                    if (not link):
                        errors.append(LineString("Help link missing href value.", node.sourceline))
                    elif (not (link.startswith('http://') or link.startswith('https://'))):
                        errors.append(LineString("Help link " + link.encode('utf-8') + " must be absolute URL.", node.sourceline))
                    elif (link in links):
                        errors.append(LineString("Duplicate help link " + link.encode('utf-8') + ".", node.sourceline))
                    else:
                        links.append(LineString(link, node.sourceline))
                # == references
                elif tokenMatch('match', node.get('rel')) or tokenMatch('reference', node.get('rel')):
                    refPath = node.get('href').strip() if node.get('href') else None
                    if (not refPath):
                        errors.append(LineString("Reference link missing href value.", node.sourceline))
                    else:
                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
                        if (refName in self.refs):
                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
                        else:
                            self.refs[refName] = ('==', refPath, node, None)
                # != references
                elif tokenMatch('mismatch', node.get('rel')) or tokenMatch('not-reference', node.get('rel')):
                    refPath = node.get('href').strip() if node.get('href') else None
                    if (not refPath):
                        errors.append(LineString("Reference link missing href value.", node.sourceline))
                    else:
                        refName = self.sourceTree.getAssetName(join(self.sourcepath, refPath))
                        if (refName in self.refs):
                            errors.append(LineString("Reference " + refName.encode('utf-8') + " already specified.", node.sourceline))
                        else:
                            self.refs[refName] = ('!=', refPath, node, None)
                else: # may have both author and reviewer in the same link
                    # credits
                    if tokenMatch('author', node.get('rel')):
                        name = node.get('title')
                        name = name.strip() if name else name
                        if (not name):
                            errors.append(LineString("Author link missing name (title attribute).", node.sourceline))
                        else:
                            link = node.get('href').strip() if node.get('href') else None
                            if (not link):
                                errors.append(LineString("Author link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
                            else:
                                credits.append((name, link))
                    # reviewers
                    if tokenMatch('reviewer', node.get('rel')):
                        name = node.get('title')
                        name = name.strip() if name else name
                        if (not name):
                            errors.append(LineString("Reviewer link missing name (title attribute).", node.sourceline))
                        else:
                            link = node.get('href').strip() if node.get('href') else None
                            if (not link):
                                errors.append(LineString("Reviewer link for \"" + name.encode('utf-8') + "\" missing contact URL (http or mailto).", node.sourceline))
                            else:
                                reviewers.append((name, link))
            elif (node.tag == svgns+'metadata'):
                metatype = node.get('class')
                metatype = metatype.strip() if metatype else metatype
                # requirement flags
                if ('flags' == metatype):
                    if (readFlags):
                        errors.append(LineString("Flags must only be specified once.", node.sourceline))
                    else:
                        readFlags = True
                        text = node.find(svgns+'text')
                        flagString = text.text if (text) else node.text
                        if (flagString):
                            for flag in sorted(flagString.split()):
                                flags.append(flag)
            elif (node.tag == svgns+'desc'):
                metatype = node.get('class')
                metatype = metatype.strip() if metatype else metatype
                # test assertions
                if ('assert' == metatype):
                    asserts.append(node.text.strip().replace('\t', ' '))
            # test title
            elif node.tag == svgns+'title':
                title = node.text.strip() if node.text else ''
                match = re.match('(?:[^:]*)[tT]est(?:[^:]*):(.*)', title, re.DOTALL)
                if (match):
                    title = match.group(1)
                title = title.strip()
            # script tag (XXX restricted to metadata container?)
            elif (node.tag == svgns+'script'):
                src = node.get('src').strip() if node.get('src') else None
                if (src):
                    self.scripts[src] = node

    if (asserts or credits or reviewers or flags or links or title):
        self.metadata = {'asserts'   : asserts,
                         'credits'   : credits,
                         'reviewers' : reviewers,
                         'flags'     : flags,
                         'links'     : links,
                         'title'     : title
                        }
    if (errors):
        if (self.errors):
            self.errors += errors
        else:
            self.errors = errors


class HTMLSource(XMLSource):
  """FileSource object with support for HTML metadata and HTML->XHTML conversions (untested)."""

  # Private Data and Methods
  __parser = html5lib.HTMLParser(tree = treebuilders.getTreeBuilder('lxml'))

  # Public Methods

  def __init__(self, sourceTree, sourcepath, relpath, data = None):
    """Initialize HTMLSource by loading from HTML file `sourcepath`.
    """
    XMLSource.__init__(self, sourceTree, sourcepath, relpath, data = data)

  def parse(self):
    """Parse file and store any parse errors in self.errors"""
    self.errors = None
    try:
      data = self.data()
      if data:
        with warnings.catch_warnings():
          warnings.simplefilter("ignore")
<<<<<<< ours
          htmlStream = html5lib.inputstream.HTMLInputStream(data)
          if ('utf-8-sig' != self.encoding):  # if we found a BOM, respect it
            self.encoding = htmlStream.detectEncoding()[0]
          self.tree = self.__parser.parse(data, encoding = self.encoding)
=======
          self.tree = self.__parser.parse(data)
          self.encoding = self.__parser.documentEncoding
>>>>>>> theirs
          self.injectedTags = {}
      else:
        self.tree = None
        self.errors = ['Empty source file']
        self.encoding = 'utf-8'

      FileSource.loadMetadata(self)
      if ((not self.metadata) and self.tree and (not self.errors)):
        self.extractMetadata(self.tree)
    except Exception as e:
      print("PARSE ERROR: " + self.sourcepath)
      e.W3CTestLibErrorLocation = self.sourcepath
      self.errors = [str(e)]
      self.encoding = 'utf-8'

  def _injectXLinks(self, element, nodeList):
    injected = False

    xlinkAttrs = ['href', 'type', 'role', 'arcrole', 'title', 'show', 'actuate']
    if (element.get('href') or element.get(xlinkns + 'href')):
      for attr in xlinkAttrs:
        if (element.get(xlinkns + attr)):
          injected = True
        if (element.get(attr)):
          injected = True
          value = element.get(attr)
          del element.attrib[attr]
          element.set(xlinkns + attr, value)
          nodeList.append((element, xlinkns + attr, attr))

    for child in element:
        if (type(child.tag) == type('')): # element node
            qName = etree.QName(child.tag)
            if ('foreignobject' != qName.localname.lower()):
                injected |= self._injectXLinks(child, nodeList)
    return injected


  def _findElements(self, namespace, elementName):
      elements = self.tree.findall('.//{' + namespace + '}' + elementName)
      if (self.tree.getroot().tag == '{' + namespace + '}' + elementName):
          elements.insert(0, self.tree.getroot())
      return elements

  def _injectNamespace(self, elementName, prefix, namespace, doXLinks, nodeList):
    attr = xmlns + prefix if (prefix) else 'xmlns'
    elements = self._findElements(namespace, elementName)
    for element in elements:
      if not element.get(attr):
        element.set(attr, namespace)
        nodeList.append((element, attr, None))
        if (doXLinks):
          if (self._injectXLinks(element, nodeList)):
            element.set(xmlns + 'xlink', 'http://www.w3.org/1999/xlink')
            nodeList.append((element, xmlns + 'xlink', None))

  def injectNamespaces(self):
    nodeList = []
    self._injectNamespace('html', None, 'http://www.w3.org/1999/xhtml', False, nodeList)
    self._injectNamespace('svg', None, 'http://www.w3.org/2000/svg', True, nodeList)
    self._injectNamespace('math', None, 'http://www.w3.org/1998/Math/MathML', True, nodeList)
    return nodeList

  def removeNamespaces(self, nodeList):
      if nodeList:
          for element, attr, oldAttr in nodeList:
              if (oldAttr):
                  value = element.get(attr)
                  del element.attrib[attr]
                  element.set(oldAttr, value)
              else:
                  del element.attrib[attr]

  def serializeXHTML(self, doctype = None):
    self.validate()
    # Serialize
    nodeList = self.injectNamespaces()
#    print self.relpath
    serializer = HTMLSerializer.HTMLSerializer()
    o = serializer.serializeXHTML(self.tree, doctype)

    self.removeNamespaces(nodeList)
    return o

  def serializeHTML(self, doctype = None):
    self.validate()
    # Serialize
#    print self.relpath
    serializer = HTMLSerializer.HTMLSerializer()
    o = serializer.serializeHTML(self.tree, doctype)

    return o

  def data(self):
    if ((not self.tree) or (self.metaSource)):
      return FileSource.data(self)
    return self.serializeHTML().encode(self.encoding, 'xmlcharrefreplace')

  def unicode(self):
    if ((not self.tree) or (self.metaSource)):
      return FileSource.unicode(self)
    return self.serializeHTML()