import os, sys, sgmllib, cookielib, urllib, htmlentitydefs

if len(sys.argv) > 1:
    ref = sys.argv[1]
else:
    ref = "../html/gcode.html"

if len(sys.argv) > 2:
    targets = sys.argv[2:]
else:
    targets = None

def get(attr, attrs, default=""):
    attr = attr.lower()
    for k, v in attrs:
        if k.lower() == attr: return v
    return default

class MetaHandler:
    def do_meta(self,  attrs):
        equiv = get("http-equiv", attrs)
        content = get("content", attrs)
        if equiv != "content-type": return
        attrs = cookielib.split_header_words([content])[0]
        encoding = get("charset", attrs)
        if encoding == "ASCII": encoding = "ISO-8859-1"
        if encoding: self.encoding = encoding

class get_refs(sgmllib.SGMLParser, MetaHandler):
    entitydefs = htmlentitydefs.entitydefs

    def __init__(self, verbose=0):
        sgmllib.SGMLParser.__init__(self, verbose)
        self.refs = set()
        self.encoding = None

    def do_a(self, attrs):
        href = get('href', attrs)
        if self.encoding:
            href = href.decode(self.encoding)
        href = urllib.unquote(href)
        self.refs.add(href)

class get_anchors(sgmllib.SGMLParser, MetaHandler):
    entitydefs = htmlentitydefs.entitydefs

    def __init__(self, verbose=0):
        sgmllib.SGMLParser.__init__(self, verbose)
        self.anchors = set()
        self.encoding = None

    def unknown_starttag(self, tag, attrs):
        id = get('id', attrs)
        if id:
            self.do_a([('name', id)])

    def unknown_endtag(self, tag): pass

    def do_a(self, attrs):
        name = get('name', attrs, get('id', attrs))
        if self.encoding:
            name = name.decode(self.encoding)
        name = urllib.unquote(name)
        if name:
            self.anchors.add(name)

_anchors = {}
def get_anchors_cached(filename):
    if filename not in _anchors:
        a = get_anchors()
        a.feed(open(filename).read())
        _anchors[filename] = a.anchors
    return _anchors[filename]

def resolve_file(src, target):
    if "#" in target:
        a, b = target.split("#", 1)
    else:
        a, b = target, None

    a = a or src

    return os.path.join(os.path.dirname(ref), a), b

def resolve(target, anchor):
    if not anchor: return True

    anchors = get_anchors_cached(target)
    return anchor in anchors

refs = get_refs()
refs.feed(open(ref).read())
refs = refs.refs

missing_anchor = set()
missing_file = set()
unlisted_targets = set()
good = set()
for r in refs:
    target, anchor = resolve_file(ref, r)
    if targets and not target in targets:
        unlisted_targets.add(target)
    elif not os.path.exists(target):
        missing_file.add(r)
    elif not resolve(target, anchor):
        missing_anchor.add(r)
    else:
        good.add(r)

if missing_file:
    print("Files linked to in %s but could not be found:" % (
        os.path.basename(ref),))
    for i in sorted(missing_file):
        print("\t%r" % i)
if missing_anchor:
    print("Anchors used in %s but not defined in linked file:" % (
        os.path.basename(ref),))
    for i in sorted(missing_anchor):
        print("\t%r" % i)
if unlisted_targets:
    print("Links to files not listed as targets:")
    for i in sorted(unlisted_targets):
        print("\t%r" % i)
    print("If all link targets are not listed in the Submakefile, then the results of this program is unreliable.")
print("Good links: %d/%d" % (len(good), len(refs)))
if missing_anchor or missing_file or unlisted_targets:
    raise SystemExit, 1
