#
#  $Id: HTML.py,v 1.2 1999/05/30 09:21:22 rob Exp $
#
#  Copyright 1999 Rob Tillotson <robt@debian.org>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Library General Public License, version 2,
#  as published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  Library General Public License for more details.
#
#  You should have received a copy of the GNU Library General Public License
#  along with this program; if not, write the Free Software Foundation,
#  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
"""
"""

__version__ = '$Id: HTML.py,v 1.2 1999/05/30 09:21:22 rob Exp $'

__copyright__ = 'Copyright 1999 Rob Tillotson <robt@debian.org>'

import formatter, htmllib, string

from Sulfur.Options import Boolean

import DTKInput

class HTMLDTKInput(DTKInput.DTKInput):
    name = 'HTML'
    version = '1.1.1'
    author = 'Rob Tillotson <robt@debian.org>'
    url = ''
    description = 'HTML input.'
    options = [
	Boolean('no-link-footnotes', 0, 'Do not mark or footnote links'),
	Boolean('no-anchor-bookmarks', 0, 'Do not bookmark local anchor targets'),
	]
    
    def convert(self, f, w):
	fmt = formatter.AbstractFormatter(w)
	p = DocHTMLParser(fmt)
	p.no_anchor_bookmarks = self.get_option('no-anchor-bookmarks')
	p.no_link_footnotes = self.get_option('no-link-footnotes')
	
	ttbl = string.maketrans('','')  # why?

	while 1:
	    l = f.readline()
	    if not l: break
	    l = string.translate(l, ttbl, '\r')
	    p.feed(l)

	if p.anchorlist and not self.get_option('no-link-footnotes'):
	    fmt.end_paragraph(1)
	    fmt.add_hor_rule()
	    w.set_bookmark('%s Links' % chr(187))
	    w.send_heading('Links:', 3)
	    for x in range(0, len(p.anchorlist)):
		fmt.add_label_data('[1] ', x+1)
		fmt.add_flowing_data(p.anchorlist[x])
		fmt.add_line_break()


class DocHTMLParser(htmllib.HTMLParser):
    """A HTML parser with some support for Doc-format e-texts."""
    def __init__(self, *a, **kw):
	apply(htmllib.HTMLParser.__init__, (self,)+a, kw)
	self.writer = self.formatter.writer
	self.tcol = 0
	self.no_anchor_bookmarks = 0
	self.no_link_footnotes = 0
	
    def end_title(self):
	htmllib.HTMLParser.end_title(self)
	self.writer.set_title(self.title)
#	if not self.writer.has_title():
#	    self.writer.set_title(self.title)

    # entities
    from DocToolkit.entitydefs import entitydefs
    
    # headings
    def start_h1(self, attr):
	self.save_bgn()

    def end_h1(self):
	text = self.save_end()
	self.formatter.end_paragraph(1)
	self.writer.send_heading(text, 1)

    def start_h2(self, attr):
	self.save_bgn()

    def end_h2(self):
	text = self.save_end()
	self.formatter.end_paragraph(1)
	self.writer.send_heading(text, 2)

    def start_h3(self, attr):
	self.save_bgn()

    def end_h3(self):
	text = self.save_end()
	self.formatter.end_paragraph(1)
	self.writer.send_heading(text, 3)

    def start_h4(self, attr):
	self.save_bgn()

    def end_h4(self):
	text = self.save_end()
	self.formatter.end_paragraph(1)
	self.writer.send_heading(text, 4)

    def start_h5(self, attr):
	self.save_bgn()

    def end_h5(self):
	text = self.save_end()
	self.formatter.end_paragraph(1)
	self.writer.send_heading(text, 5)

    def start_h6(self, attr):
	self.save_bgn()

    def end_h6(self):
	text = self.save_end()
	self.formatter.end_paragraph(1)
	self.writer.send_heading(text, 6)

    # anchors.
    def anchor_bgn(self, href, name, type):
	if name and not self.no_anchor_bookmarks:
	    if name[0] == '#': name = name[1:]
	    self.writer.set_bookmark(name)
	    
	#if self.writer.has_option('teal-links'):
	#    if name:
	#	if name[0] == '#': name = name[1:]
	#	self.writer.send_raw_tag('LABEL',{'NAME':'"%s"' % name})
	#    if href and href[0] == '#':
	#	self.writer.send_raw_tag('LINK',{'TEXT':'"%s"' % (chr(187)*2),
	#					 'FONT':'0',
	#					 'TAG':'"%s"' % href[1:],
	#					 'STYLE':'UNDERLINE'})
	#    elif href and not self.writer.has_option('no-links'):
	#	self.anchor = href
	#	self.anchorlist.append(href)
	elif href and href[0] != '#' and not self.no_link_footnotes:
	    self.anchor = href
	    self.anchorlist.append(href)

	self.formatter.push_style('link')
	
    def anchor_end(self):
	self.formatter.pop_style()
	if self.anchor and not self.no_link_footnotes:
	    self.handle_data('[%d]' % len(self.anchorlist))
	    self.anchor = None
	
    # now, let's see what we can do about tables.
    # the simplest thing to do is to treat each table row as a separate line;
    def do_tr(self, attrs):
	self.tcol = 0
	self.formatter.end_paragraph(0)

    def do_td(self, attrs):
	if self.tcol: self.formatter.add_flowing_data(' ')
	self.tcol = self.tcol+1

    def start_table(self, attrs):
	pass
    
    def end_table(self):
	self.formatter.end_paragraph(1)

    #-- Lists, mostly cribbed from htmllib.
    def start_ul(self, attrs):
	type = 'disc'
	for a, v in attrs:
	    if a == 'type': type = v
	if type == 'square': label = chr(0x8d)
	elif type == 'circle': label = 'o'
	else: label = chr(0x95)
	self.formatter.end_paragraph(not self.list_stack)
	self.formatter.push_margin('ul')
	self.list_stack.append(['ul', label, 0])

    def do_li(self, attrs):
	self.formatter.end_paragraph(0)
	if self.list_stack:
	    [dummy, label, counter] = top = self.list_stack[-1]
	    top[2] = counter = counter+1
	else:
	    label, counter = chr(0x95), 0
	self.formatter.add_label_data(label, counter)

	    
