/*
 * XMLWriter.java
 * Copyright (C) 1999,2000,2001 The Free Software Foundation
 * 
 * This file is part of GNU JAXP, a library.
 *
 * GNU JAXP is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * GNU JAXP is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * As a special exception, if you link this library with other files to
 * produce an executable, this library does not by itself cause the
 * resulting executable to be covered by the GNU General Public License.
 * This exception does not however invalidate any other reasons why the
 * executable file might be covered by the GNU General Public License. 
 */

package gnu.xml.util;

import java.io.BufferedWriter;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Stack;

import org.xml.sax.*;
import org.xml.sax.ext.*;
import org.xml.sax.helpers.*;


/**
 * This class is a SAX handler which writes all its input as a well formed
 * XML or XHTML document.  If driven using SAX2 events, this output may
 * include a recreated document type declaration, subject to limitations
 * of SAX (no internal subset exposed) or DOM (the important declarations,
 * with their documentation, are discarded).
 *
 * <p> By default, text is generated "as-is", but some optional modes
 * are supported.  Pretty-printing is supported, to make life easier
 * for people reading the output.  XHTML (1.0) output has can be made
 * particularly pretty; all the built-in character entities are known.
 * Canonical XML can also be generated, assuming the input is properly
 * formed.
 *
 * <hr>
 *
 * <p> Some of the methods on this class are intended for applications to
 * use directly, rather than as pure SAX2 event callbacks.  Some of those
 * methods access the JavaBeans properties (used to tweak output formats,
 * for example canonicalization and pretty printing).  Subclasses
 * are expected to add new behaviors, not to modify current behavior, so
 * many such methods are final.</p>
 *
 * <p> The <em>write*()</em> methods may be slightly simpler for some
 * applications to use than direct callbacks.  For example, they support
 * a simple policy for encoding data items as the content of a single element.
 *
 * <p> To reuse an XMLWriter you must provide it with a new Writer, since
 * this handler closes the writer it was given as part of its endDocument()
 * handling.  (XML documents have an end of input, and the way to encode
 * that on a stream is to close it.) </p>
 *
 * <hr>
 *
 * <p> Note that any relative URIs in the source document, as found in
 * entity and notation declarations, ought to have been fully resolved by
 * the parser providing events to this handler.  This means that the
 * output text should only have fully resolved URIs, which may not be
 * the desired behavior in cases where later binding is desired. </p>
 *
 * <p> <em>Note that due to SAX2 defaults, you may need to manually
 * ensure that the input events are XML-conformant with respect to namespace
 * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is
 * one solution to this problem, in the context of processing pipelines.</em>
 * Something as simple as connecting this handler to a parser might not
 * generate the correct output.  Another workaround is to ensure that the
 * <em>namespace-prefixes</em> feature is always set to true, if you're
 * hooking this directly up to some XMLReader implementation.
 *
 * @see gnu.xml.pipeline.TextConsumer
 *
 * @author David Brownell
 */
public class XMLWriter
    implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
{
    // text prints/escapes differently depending on context
    //	CTX_ENTITY ... entity literal value
    //	CTX_ATTRIBUTE ... attribute literal value
    //	CTX_CONTENT ... content of an element
    //	CTX_UNPARSED ... CDATA, comment, PI, names, etc
    //  CTX_NAME ... name or nmtoken, no escapes possible
    private static final int	CTX_ENTITY = 1;
    private static final int	CTX_ATTRIBUTE = 2;
    private static final int	CTX_CONTENT = 3;
    private static final int	CTX_UNPARSED = 4;
    private static final int	CTX_NAME = 5;

// FIXME: names (element, attribute, PI, notation, etc) are not
// currently written out with range checks (escapeChars).
// In non-XHTML, some names can't be directly written; panic!

    private static String	sysEOL;

    static {
	try {
	    sysEOL = System.getProperty ("line.separator", "\n");

	    // don't use the system's EOL if it's illegal XML.
	    if (!isLineEnd (sysEOL))
		sysEOL = "\n";

	} catch (SecurityException e) {
	    sysEOL = "\n";
	}
    }

    private static boolean isLineEnd (String eol)
    {
	return "\n".equals (eol)
		    || "\r".equals (eol)
		    || "\r\n".equals (eol);
    }

    private Writer		out;
    private boolean		inCDATA;
    private int			elementNestLevel;
    private String		eol = sysEOL;

    private short		dangerMask;
    private StringBuffer	stringBuf;
    private Locator		locator;
    private ErrorHandler	errHandler;

    private boolean		expandingEntities = false;
    private int			entityNestLevel;
    private boolean		xhtml;
    private boolean		startedDoctype;
    private String		encoding;

    private boolean		canonical;
    private boolean		inDoctype;
    private boolean		inEpilogue;

    // pretty printing controls
    private boolean		prettyPrinting;
    private int			column;
    private boolean		noWrap;
    private Stack		space = new Stack ();

    // this is not a hard'n'fast rule -- longer lines are OK,
    // but are to be avoided.  Here, prettyprinting is more to
    // show structure "cleanly" than to be precise about it.
    // better to have ragged layout than one line 24Kb long.
    private static final int	lineLength = 75;


    /**
     * Constructs this handler with System.out used to write SAX events
     * using the UTF-8 encoding.  Avoid using this except when you know
     * it's safe to close System.out at the end of the document.
     */
    public XMLWriter () throws IOException
	{ this (System.out); }

    /**
     * Constructs a handler which writes all input to the output stream
     * in the UTF-8 encoding, and closes it when endDocument is called.
     * (Yes it's annoying that this throws an exception -- but there's
     * really no way around it, since it's barely possible a JDK may
     * exist somewhere that doesn't know how to emit UTF-8.)
     */
    public XMLWriter (OutputStream out) throws IOException
    {
	this (new OutputStreamWriter (out, "UTF8"));
    }

    /**
     * Constructs a handler which writes all input to the writer, and then
     * closes the writer when the document ends.  If an XML declaration is
     * written onto the output, and this class can determine the name of
     * the character encoding for this writer, that encoding name will be
     * included in the XML declaration.
     *
     * <P> See the description of the constructor which takes an encoding
     * name for imporant information about selection of encodings.
     *
     * @param writer XML text is written to this writer.
     */
    public XMLWriter (Writer writer)
    {
	this (writer, null);
    }

    /**
     * Constructs a handler which writes all input to the writer, and then
     * closes the writer when the document ends.  If an XML declaration is
     * written onto the output, this class will use the specified encoding
     * name in that declaration.  If no encoding name is specified, no
     * encoding name will be declared unless this class can otherwise
     * determine the name of the character encoding for this writer.
     *
     * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")
     * output encodings are fully lossless with respect to XML data.  If you
     * use any other encoding you risk having your data be silently mangled
     * on output, as the standard Java character encoding subsystem silently
     * maps non-encodable characters to a question mark ("?") and will not
     * report such errors to applications.
     *
     * <p> For a few other encodings the risk can be reduced. If the writer is
     * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",
     * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which
     * can't be encoded in those encodings will be written safely.  Where
     * relevant, the XHTML entity names will be used; otherwise, numeric
     * character references will be emitted.
     *
     * <P> However, there remain a number of cases where substituting such
     * entity or character references is not an option.  Such references are
     * not usable within a DTD, comment, PI, or CDATA section.  Neither may
     * they be used when element, attribute, entity, or notation names have
     * the problematic characters.
     *
     * @param writer XML text is written to this writer.
     * @param encoding if non-null, and an XML declaration is written,
     *	this is the name that will be used for the character encoding.
     */
    public XMLWriter (Writer writer, String encoding)
    {
	setWriter (writer, encoding);
    }
    
    private void setEncoding (String encoding)
    {
	if (encoding == null && out instanceof OutputStreamWriter)
	    encoding = ((OutputStreamWriter)out).getEncoding ();

	if (encoding != null) {
	    encoding = encoding.toUpperCase ();

	    // Use official encoding names where we know them,
	    // avoiding the Java-only names.  When using common
	    // encodings where we can easily tell if characters
	    // are out of range, we'll escape out-of-range
	    // characters using character refs for safety.

	    // I _think_ these are all the main synonyms for these!
	    if ("UTF8".equals (encoding)) {
		encoding = "UTF-8";
	    } else if ("US-ASCII".equals (encoding)
		    || "ASCII".equals (encoding)) {
		dangerMask = (short) 0xff80;
		encoding = "US-ASCII";
	    } else if ("ISO-8859-1".equals (encoding)
		    || "8859_1".equals (encoding)
		    || "ISO8859_1".equals (encoding)) {
		dangerMask = (short) 0xff00;
		encoding = "ISO-8859-1";
	    } else if ("UNICODE".equals (encoding)
		    || "UNICODE-BIG".equals (encoding)
		    || "UNICODE-LITTLE".equals (encoding)) {
		encoding = "UTF-16";

		// TODO: UTF-16BE, UTF-16LE ... no BOM; what
		// release of JDK supports those Unicode names?
	    }

	    if (dangerMask != 0)
		stringBuf = new StringBuffer ();
	}

	this.encoding = encoding;
    }


    /**
     * Resets the handler to write a new text document.
     *
     * @param writer XML text is written to this writer.
     * @param encoding if non-null, and an XML declaration is written,
     *	this is the name that will be used for the character encoding.
     *
     * @exception IllegalStateException if the current
     *	document hasn't yet ended (with {@link #endDocument})
     */
    final public void setWriter (Writer writer, String encoding)
    {
	if (out != null)
	    throw new IllegalStateException (
		"can't change stream in mid course");
	out = writer;
	if (out != null)
	    setEncoding (encoding);
	if (!(out instanceof BufferedWriter))
	    out = new BufferedWriter (out);
	space.push ("default");
    }

    /**
     * Assigns the line ending style to be used on output.
     * @param eolString null to use the system default; else
     *	"\n", "\r", or "\r\n".
     */
    final public void setEOL (String eolString)
    {
	if (eolString == null)
	    eol = sysEOL;
	else if (!isLineEnd (eolString))
	    eol = eolString;
	else
	    throw new IllegalArgumentException (eolString);
    }

    /**
     * Assigns the error handler to be used to present most fatal
     * errors.
     */
    public void setErrorHandler (ErrorHandler handler)
    {
	errHandler = handler;
    }

    /**
     * Used internally and by subclasses, this encapsulates the logic
     * involved in reporting fatal errors.  It uses locator information
     * for good diagnostics, if available, and gives the application's
     * ErrorHandler the opportunity to handle the error before throwing
     * an exception.
     */
    protected void fatal (String message, Exception e)
    throws SAXException
    {
	SAXParseException	x;

	if (locator == null)
	    x = new SAXParseException (message, null, null, -1, -1, e);
	else
	    x = new SAXParseException (message, locator, e);
	if (errHandler != null)
	    errHandler.fatalError (x);
	throw x;
    }


    // JavaBeans properties

    /**
     * Controls whether the output should attempt to follow the "transitional"
     * XHTML rules so that it meets the "HTML Compatibility Guidelines"
     * appendix in the XHTML specification.  A "transitional" Document Type
     * Declaration (DTD) is placed near the beginning of the output document,
     * instead of whatever DTD would otherwise have been placed there, and
     * XHTML empty elements are printed specially.  When writing text in
     * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal
     * entity names are used (in preference to character references) when
     * writing content characters which can't be expressed in those encodings.
     *
     * <p> When this option is enabled, it is the caller's responsibility
     * to ensure that the input is otherwise valid as XHTML.  Things to
     * be careful of in all cases, as described in the appendix referenced
     * above, include:  <ul>
     *
     *	<li> Element and attribute names must be in lower case, both
     *		in the document and in any CSS style sheet.
     *	<li> All XML constructs must be valid as defined by the XHTML
     *		"transitional" DTD (including all familiar constructs,
     *		even deprecated ones).
     *	<li> The root element must be "html".
     *	<li> Elements that must be empty (such as <em>&lt;br&gt;</em>
     *		must have no content.
     *	<li> Use both <em>lang</em> and <em>xml:lang</em> attributes
     *		when specifying language.
     *	<li> Similarly, use both <em>id</em> and <em>name</em> attributes
     *		when defining elements that may be referred to through
     *		URI fragment identifiers ... and make sure that the
     *		value is a legal NMTOKEN, since not all such HTML 4.0
     *		identifiers are valid in XML.
     *	<li> Be careful with character encodings; make sure you provide
     *		a <em>&lt;meta http-equiv="Content-type"
     *		content="text/xml;charset=..." /&gt;</em> element in
     *		the HTML "head" element, naming the same encoding
     *		used to create this handler.  Also, if that encoding
     *		is anything other than US-ASCII, make sure that if
     *		the document is given a MIME content type, it has
     *		a <em>charset=...</em> attribute with that encoding.
     *	</ul>
     *
     * <p> Additionally, some of the oldest browsers have additional
     * quirks, to address with guidelines such as: <ul>
     *
     *	<li> Processing instructions may be rendered, so avoid them.
     *		(Similarly for an XML declaration.)
     *	<li> Embedded style sheets and scripts should not contain XML
     *		markup delimiters:  &amp;, &lt;, and ]]&gt; are trouble.
     *	<li> Attribute values should not have line breaks or multiple
     *		consecutive white space characters.
     *	<li> Use no more than one of the deprecated (transitional)
     *		<em>&lt;isindex&gt;</em> elements.
     *	<li> Some boolean attributes (such as <em>compact, checked,
     *		disabled, readonly, selected,</em> and more) confuse
     *		some browsers, since they only understand minimized
     *		versions which are illegal in XML.
     *	</ul>
     *
     * <p> Also, some characteristics of the resulting output may be
     * a function of whether the document is later given a MIME
     * content type of <em>text/html</em> rather than one indicating
     * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,
     * some browsers ignore MIME content types and prefer to rely URI
     * name suffixes -- so an "index.xml" could always be XML, never
     * XHTML, no matter its MIME type.
     */
    final public void setXhtml (boolean value)
    {
	if (locator != null)
	    throw new IllegalStateException ("started parsing");
	xhtml = value;
	if (xhtml)
	    canonical = false;
    }

    /**
     * Returns true if the output attempts to echo the input following
     * "transitional" XHTML rules and matching the "HTML Compatibility
     * Guidelines" so that an HTML version 3 browser can read the output
     * as HTML; returns false (the default) othewise.
     */
    final public boolean isXhtml ()
    {
	return xhtml;
    }

    /**
     * Controls whether the output text contains references to
     * entities (the default), or instead contains the expanded
     * values of those entities.
     */
    final public void setExpandingEntities (boolean value)
    {
	if (locator != null)
	    throw new IllegalStateException ("started parsing");
	expandingEntities = value;
	if (!expandingEntities)
	    canonical = false;
    }

    /**
     * Returns true if the output will have no entity references;
     * returns false (the default) otherwise.
     */
    final public boolean isExpandingEntities ()
    {
	return expandingEntities;
    }

    /**
     * Controls pretty-printing, which by default is not enabled
     * (and currently is most useful for XHTML output).
     * Pretty printing enables structural indentation, sorting of attributes
     * by name, line wrapping, and potentially other mechanisms for making
     * output more or less readable.
     *
     * <p> At this writing, structural indentation and line wrapping are
     * enabled when pretty printing is enabled and the <em>xml:space</em>
     * attribute has the value <em>default</em> (its other legal value is
     * <em>preserve</em>, as defined in the XML specification).  The three
     * XHTML element types which use another value are recognized by their
     * names (namespaces are ignored).
     *
     * <p> Also, for the record, the "pretty" aspect of printing here
     * is more to provide basic structure on outputs that would otherwise
     * risk being a single long line of text.  For now, expect the
     * structure to be ragged ... unless you'd like to submit a patch
     * to make this be more strictly formatted!
     *
     * @exception IllegalStateException thrown if this method is invoked
     *	after output has begun.
     */
    final public void setPrettyPrinting (boolean value)
    {
	if (locator != null)
	    throw new IllegalStateException ("started parsing");
	prettyPrinting = value;
	if (prettyPrinting)
	    canonical = false;
    }

    /**
     * Returns value of flag controlling pretty printing.
     */
    final public boolean isPrettyPrinting ()
    {
	return prettyPrinting;
    }


    /**
     * Sets the output style to be canonicalized.  Input events must
     * meet requirements that are slightly more stringent than the
     * basic well-formedness ones, and include:  <ul>
     *
     *	<li> Namespace prefixes must not have been changed from those
     *	in the original document.  (This may only be ensured by setting
     *	the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
     *	by default, it is cleared.)
     *
     *	<li> Redundant namespace declaration attributes have been
     *	removed.  (If an ancestor element defines a namespace prefix
     *	and that declaration hasn't been overriden, an element must
     *	not redeclare it.)
     *
     *	<li> If comments are not to be included in the canonical output,
     *	they must first be removed from the input event stream; this
     *	<em>Canonical XML with comments</em> by default.
     *
     *	<li> If the input character encoding was not UCS-based, the
     *	character data must have been normalized using Unicode
     *	Normalization Form C.  (UTF-8 and UTF-16 are UCS-based.)
     *
     *	<li> Attribute values must have been normalized, as is done
     *	by any conformant XML processor which processes all external
     *	parameter entities.
     *
     *	<li> Similarly, attribute value defaulting has been performed.
     *
     *	</ul>
     *
     * <p> Note that fragments of XML documents, as specified by an XPath
     * node set, may be canonicalized.  In such cases, elements may need
     * some fixup (for <em>xml:*</em> attributes and application-specific
     * context).
     *
     * @exception IllegalArgumentException if the output encoding
     *	is anything other than UTF-8.
     */
    final public void setCanonical (boolean value)
    {
	if (value && !"UTF-8".equals (encoding))
	    throw new IllegalArgumentException ("encoding != UTF-8");
	canonical = value;
	if (canonical) {
	    prettyPrinting = xhtml = false;
	    expandingEntities = true;
	    eol = "\n";
	}
    }


    /**
     * Returns value of flag controlling canonical output.
     */
    final public boolean isCanonical ()
    {
	return canonical;
    }


    /**
     * Flushes the output stream.  When this handler is used in long lived
     * pipelines, it can be important to flush buffered state, for example
     * so that it can reach the disk as part of a state checkpoint.
     */
    final public void flush ()
    throws IOException
    {
	if (out != null)
	    out.flush ();
    }


    // convenience routines

// FIXME:  probably want a subclass that holds a lot of these...
// and maybe more!
    
    /**
     * Writes the string as if characters() had been called on the contents
     * of the string.  This is particularly useful when applications act as
     * producers and write data directly to event consumers.
     */
    final public void write (String data)
    throws SAXException
    {
	char	buf [] = data.toCharArray ();
	characters (buf, 0, buf.length);
    }


    /**
     * Writes an element that has content consisting of a single string.
     * @see #writeEmptyElement
     * @see #startElement
     */
    public void writeElement (
	String uri,
	String localName,
	String qName,
	Attributes atts,
	String content
    ) throws SAXException
    {
	if (content == null || content.length () == 0) {
	    writeEmptyElement (uri, localName, qName, atts);
	    return;
	}
	startElement (uri, localName, qName, atts);
	char chars [] = content.toCharArray ();
	characters (chars, 0, chars.length);
	endElement (uri, localName, qName);
    }


    /**
     * Writes an element that has content consisting of a single integer,
     * encoded as a decimal string.
     * @see #writeEmptyElement
     * @see #startElement
     */
    public void writeElement (
	String uri,
	String localName,
	String qName,
	Attributes atts,
	int content
    ) throws SAXException
    {
	writeElement (uri, localName, qName, atts, Integer.toString (content));
    }


    // SAX1 ContentHandler
    /** <b>SAX1</b>:  provides parser status information */
    final public void setDocumentLocator (Locator l)
    {
	locator = l;
    }


    // URL for dtd that validates against all normal HTML constructs
    private static final String xhtmlFullDTD =
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";

    
    /**
     * <b>SAX1</b>:  indicates the beginning of a document parse.
     * If you're writing (well formed) fragments of XML, neither
     * this nor endDocument should be called.
     */
    // NOT final
    public void startDocument ()
    throws SAXException
    {
	try {
	    if (out == null)
		throw new IllegalStateException (
		    "null Writer given to XMLWriter");

	    // Not all parsers provide the locator we want; this also
	    // flags whether events are being sent to this object yet.
	    // We could only have this one call if we only printed whole
	    // documents ... but we also print fragments, so most of the
	    // callbacks here replicate this test.

	    if (locator == null)
		locator = new LocatorImpl ();
	    
	    // Unless the data is in US-ASCII or we're canonicalizing, write
	    // the XML declaration if we know the encoding.  US-ASCII won't
	    // normally get mangled by web server confusion about the
	    // character encodings used.  Plus, it's an easy way to
	    // ensure we can write ASCII that's unlikely to confuse
	    // elderly HTML parsers.

	    if (!canonical
		    && dangerMask != (short) 0xff80
		    && encoding != null) {
		rawWrite ("<?xml version='1.0'");
		rawWrite (" encoding='" + encoding + "'");
		rawWrite ("?>");
		newline ();
	    }

	    if (xhtml) {

		rawWrite ("<!DOCTYPE html PUBLIC");
		newline ();
		rawWrite ("  '-//W3C//DTD XHTML 1.0 Transitional//EN'");
		newline ();
		rawWrite ("  '");
		    // NOTE:  URL (above) matches the REC
		rawWrite (xhtmlFullDTD);
		rawWrite ("'>");
		newline ();
		newline ();

		// fake the rest of the handler into ignoring
		// everything until the root element, so any
		// XHTML DTD comments, PIs, etc are ignored
		startedDoctype = true;
	    }

	    entityNestLevel = 0;

	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /**
     * <b>SAX1</b>:  indicates the completion of a parse.
     * Note that all complete SAX event streams make this call, even
     * if an error is reported during a parse.
     */
    // NOT final
    public void endDocument ()
    throws SAXException
    {
	try {
	    if (!canonical) {
		newline ();
		newline ();
	    }
	    out.close ();
	    out = null;
	    locator = null;
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    // XHTML elements declared as EMPTY print differently
    final private static boolean isEmptyElementTag (String tag)
    {
	switch (tag.charAt (0)) {
	  case 'a':	return "area".equals (tag);
	  case 'b':	return "base".equals (tag)
			    || "basefont".equals (tag)
			    || "br".equals (tag);
	  case 'c':	return "col".equals (tag);
	  case 'f':	return "frame".equals (tag);
	  case 'h':	return "hr".equals (tag);
	  case 'i':	return "img".equals (tag)
			    || "input".equals (tag)
			    || "isindex".equals (tag);
	  case 'l':	return "link".equals (tag);
	  case 'm':	return "meta".equals (tag);
	  case 'p':	return "param".equals (tag);
	}
	return false;
    }

    private static boolean indentBefore (String tag)
    {
	// basically indent before block content
	// and within structure like tables, lists
	switch (tag.charAt (0)) {
	  case 'a':	return "applet".equals (tag);
	  case 'b':	return "body".equals (tag)
			    || "blockquote".equals (tag);
	  case 'c':	return "center".equals (tag);
	  case 'f':	return "frame".equals (tag)
			    || "frameset".equals (tag);
	  case 'h':	return "head".equals (tag);
	  case 'm':	return "meta".equals (tag);
	  case 'o':	return "object".equals (tag);
	  case 'p':	return "param".equals (tag)
			    || "pre".equals (tag);
	  case 's':	return "style".equals (tag);
	  case 't':	return "title".equals (tag)
			    || "td".equals (tag)
			    || "th".equals (tag);
	}
	// ... but not inline elements like "em", "b", "font"
	return false;
    }

    private static boolean spaceBefore (String tag)
    {
	// blank line AND INDENT before certain structural content
	switch (tag.charAt (0)) {
	  case 'h':	return "h1".equals (tag)
			    || "h2".equals (tag)
			    || "h3".equals (tag)
			    || "h4".equals (tag)
			    || "h5".equals (tag)
			    || "h6".equals (tag)
			    || "hr".equals (tag);
	  case 'l':	return "li".equals (tag);
	  case 'o':	return "ol".equals (tag);
	  case 'p':	return "p".equals (tag);
	  case 't':	return "table".equals (tag)
			    || "tr".equals (tag);
	  case 'u':	return "ul".equals (tag);
	}
	return false;
    }

    // XHTML DTDs say these three have xml:space="preserve"
    private static boolean spacePreserve (String tag)
    {
	return "pre".equals (tag)
		|| "style".equals (tag)
		|| "script".equals (tag);
    }

    /**
     * <b>SAX2</b>:  ignored.
     */
    final public void startPrefixMapping (String prefix, String uri)
	{}

    /**
     * <b>SAX2</b>:  ignored.
     */
    final public void endPrefixMapping (String prefix)
	{}

    private void writeStartTag (
	String name,
	Attributes atts,
	boolean isEmpty
    ) throws SAXException, IOException
    {
	rawWrite ('<');
	rawWrite (name);

	// write out attributes ... sorting is particularly useful
	// with output that's been heavily defaulted.
	if (atts != null && atts.getLength () != 0) {

	    // Set up to write, with optional sorting
	    int 	indices [] = new int [atts.getLength ()];

	    for (int i= 0; i < indices.length; i++)
		indices [i] = i;
	    
	    // optionally sort

// FIXME:  canon xml demands xmlns nodes go first,
// and sorting by URI first (empty first) then localname
// it should maybe use a different sort

	    if (canonical || prettyPrinting) {

		// insertion sort by attribute name
		for (int i = 1; i < indices.length; i++) {
		    int	n = indices [i], j;
		    String	s = atts.getQName (n);

		    for (j = i - 1; j >= 0; j--) {
			if (s.compareTo (atts.getQName (indices [j]))
				>= 0)
			    break;
			indices [j + 1] = indices [j];
		    }
		    indices [j + 1] = n;
		}
	    }

	    // write, sorted or no
	    for (int i= 0; i < indices.length; i++) {
		String	s = atts.getQName (indices [i]);

		    if (s == null || "".equals (s))
			throw new IllegalArgumentException ("no XML name");
		rawWrite (" ");
		rawWrite (s);
		rawWrite ("=");
		writeQuotedValue (atts.getValue (indices [i]),
		    CTX_ATTRIBUTE);
	    }
	}
	if (isEmpty)
	    rawWrite (" /");
	rawWrite ('>');
    }

    /**
     * <b>SAX2</b>:  indicates the start of an element.
     * When XHTML is in use, avoid attribute values with
     * line breaks or multiple whitespace characters, since
     * not all user agents handle them correctly.
     */
    final public void startElement (
	String uri,
	String localName,
	String qName,
	Attributes atts
    ) throws SAXException
    {
	startedDoctype = false;

	if (locator == null)
	    locator = new LocatorImpl ();
	    
	if (qName == null || "".equals (qName))
	    throw new IllegalArgumentException ("no XML name");

	try {
	    if (entityNestLevel != 0)
		return;
	    if (prettyPrinting) {
		String whitespace = null;

		if (xhtml && spacePreserve (qName))
		    whitespace = "preserve";
		else if (atts != null)
		    whitespace = atts.getValue ("xml:space");
		if (whitespace == null)
		    whitespace = (String) space.peek ();
		space.push (whitespace);

		if ("default".equals (whitespace)) {
		    if (xhtml) {
			if (spaceBefore (qName)) {
			    newline ();
			    doIndent ();
			} else if (indentBefore (qName))
			    doIndent ();
			// else it's inlined, modulo line length
			// FIXME: incrementing element nest level
			// for inlined elements causes ugliness
		    } else
			doIndent ();
		}
	    }
	    elementNestLevel++;
	    writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));

	    if (xhtml) {
// FIXME: if this is an XHTML "pre" element, turn
// off automatic wrapping.
	    }

	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /**
     * Writes an empty element.
     * @see #startElement
     */
    public void writeEmptyElement (
	String uri,
	String localName,
	String qName,
	Attributes atts
    ) throws SAXException
    {
	if (canonical) {
	    startElement (uri, localName, qName, atts);
	    endElement (uri, localName, qName);
	} else {
	    try {
		writeStartTag (qName, atts, true);
	    } catch (IOException e) {
		fatal ("can't write", e);
	    }
	}
    }


    /** <b>SAX2</b>:  indicates the end of an element */
    final public void endElement (String uri, String localName, String qName)
    throws SAXException
    {
	if (qName == null || "".equals (qName))
	    throw new IllegalArgumentException ("no XML name");

	try {
	    elementNestLevel--;
	    if (entityNestLevel != 0)
		return;
	    if (xhtml && isEmptyElementTag (qName))
		return;
	    rawWrite ("</");
	    rawWrite (qName);
	    rawWrite ('>');

	    if (prettyPrinting) {
		if (!space.empty ())
		    space.pop ();
		else
		    fatal ("stack discipline", null);
	    }
	    if (elementNestLevel == 0)
		inEpilogue = true;

	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX1</b>:  reports content characters */
    final public void characters (char ch [], int start, int length)
    throws SAXException
    {
	if (locator == null)
	    locator = new LocatorImpl ();

	try {
	    if (entityNestLevel != 0)
		return;
	    if (inCDATA) {
		escapeChars (ch, start, length, CTX_UNPARSED);
	    } else {
		escapeChars (ch, start, length, CTX_CONTENT);
	    }
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX1</b>:  reports ignorable whitespace */
    final public void ignorableWhitespace (char ch [], int start, int length)
    throws SAXException
    {
	if (locator == null)
	    locator = new LocatorImpl ();

	try {
	    if (entityNestLevel != 0)
		return;
	    // don't forget to map NL to CRLF, CR, etc
	    escapeChars (ch, start, length, CTX_CONTENT);
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /**
     * <b>SAX1</b>:  reports a PI.
     * This doesn't check for illegal target names, such as "xml" or "XML",
     * or namespace-incompatible ones like "big:dog"; the caller is
     * responsible for ensuring those names are legal.
     */
    final public void processingInstruction (String target, String data)
    throws SAXException
    {
	if (locator == null)
	    locator = new LocatorImpl ();

	// don't print internal subset for XHTML
	if (xhtml && startedDoctype)
	    return;

	// ancient HTML browsers might render these ... their loss.
	// to prevent:  "if (xhtml) return;".

	try {
	    if (entityNestLevel != 0)
		return;
	    if (canonical && inEpilogue)
		newline ();
	    rawWrite ("<?");
	    rawWrite (target);
	    rawWrite (' ');
	    escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
	    rawWrite ("?>");
	    if (elementNestLevel == 0 && !(canonical && inEpilogue))
		newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX1</b>: indicates a non-expanded entity reference */
    public void skippedEntity (String name)
    throws SAXException
    {
	try {
	    rawWrite ("&");
	    rawWrite (name);
	    rawWrite (";");
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    // SAX2 LexicalHandler

    /** <b>SAX2</b>:  called before parsing CDATA characters */
    final public void startCDATA ()
    throws SAXException
    {
	if (locator == null)
	    locator = new LocatorImpl ();
	
	if (canonical)
	    return;

	try {
	    inCDATA = true;
	    if (entityNestLevel == 0)
		rawWrite ("<![CDATA[");
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX2</b>:  called after parsing CDATA characters */
    final public void endCDATA ()
    throws SAXException
    {
	if (canonical)
	    return;

	try {
	    inCDATA = false;
	    if (entityNestLevel == 0)
		rawWrite ("]]>");
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /**
     * <b>SAX2</b>:  called when the doctype is partially parsed
     * Note that this, like other doctype related calls, is ignored
     * when XHTML is in use.
     */
    final public void startDTD (String name, String publicId, String systemId)
    throws SAXException
    {
	if (locator == null)
	    locator = new LocatorImpl ();
	if (xhtml)
	    return;
	try {
	    inDoctype = startedDoctype = true;
	    if (canonical)
		return;
	    rawWrite ("<!DOCTYPE ");
	    rawWrite (name);
	    rawWrite (' ');

	    if (!expandingEntities) {
		if (publicId != null)
		    rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
		else if (systemId != null)
		    rawWrite ("SYSTEM '" + systemId + "' ");
	    }

	    rawWrite ('[');
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX2</b>:  called after the doctype is parsed */
    final public void endDTD ()
    throws SAXException
    {
	inDoctype = false;
	if (canonical || xhtml)
	    return;
	try {
	    rawWrite ("]>");
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /**
     * <b>SAX2</b>:  called before parsing a general entity in content
     */
    final public void startEntity (String name)
    throws SAXException
    {
	try {
	    boolean	writeEOL = true;

	    // Predefined XHTML entities (for characters) will get
	    // mapped back later.
	    if (xhtml || expandingEntities)
		return;

	    entityNestLevel++;
	    if (name.equals ("[dtd]"))
		return;
	    if (entityNestLevel != 1)
		return;
	    if (!name.startsWith ("%")) {
		writeEOL = false;
		rawWrite ('&');
	    }
	    rawWrite (name);
	    rawWrite (';');
	    if (writeEOL)
		newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /**
     * <b>SAX2</b>:  called after parsing a general entity in content
     */
    final public void endEntity (String name)
    throws SAXException
    {
	if (xhtml || expandingEntities)
	    return;
	entityNestLevel--;
    }

    /**
     * <b>SAX2</b>:  called when comments are parsed.
     * When XHTML is used, the old HTML tradition of using comments
     * to for inline CSS, or for JavaScript code is  discouraged.
     * This is because XML processors are encouraged to discard, on
     * the grounds that comments are for users (and perhaps text
     * editors) not programs.  Instead, use external scripts
     */
    final public void comment (char ch [], int start, int length)
    throws SAXException
    {
	if (locator == null)
	    locator = new LocatorImpl ();

	// don't print internal subset for XHTML
	if (xhtml && startedDoctype)
	    return;
	// don't print comment in doctype for canon xml
	if (canonical && inDoctype)
	    return;

	try {
	    boolean indent;

	    if (prettyPrinting && space.empty ())
		fatal ("stack discipline", null);
	    indent = prettyPrinting && "default".equals (space.peek ());
	    if (entityNestLevel != 0)
		return;
	    if (indent)
		doIndent ();
	    if (canonical && inEpilogue)
		newline ();
	    rawWrite ("<!--");
	    escapeChars (ch, start, length, CTX_UNPARSED);
	    rawWrite ("-->");
	    if (indent)
		doIndent ();
	    if (elementNestLevel == 0 && !(canonical && inEpilogue))
		newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    // SAX1 DTDHandler

    /** <b>SAX1</b>:  called on notation declarations */
    final public void notationDecl (String name,
    	String publicId, String systemId)
    throws SAXException
    {
	if (xhtml)
	    return;
	try {
	    // At this time, only SAX2 callbacks start these.
	    if (!startedDoctype)
		return;

	    if (entityNestLevel != 0)
		return;
	    rawWrite ("<!NOTATION " + name + " ");
	    if (publicId != null)
		rawWrite ("PUBLIC \"" + publicId + '"');
	    else
		rawWrite ("SYSTEM ");
	    if (systemId != null)
		rawWrite ('"' + systemId + '"');
	    rawWrite (">");
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX1</b>:  called on unparsed entity declarations */
    final public void unparsedEntityDecl (String name,
	String publicId, String systemId,
	String notationName)
    throws SAXException
    {
	if (xhtml)
	    return;
	try {
	    // At this time, only SAX2 callbacks start these.
	    if (!startedDoctype)  {
		// FIXME: write to temporary buffer, and make the start
		// of the root element write these declarations.
		return;
	    }

	    if (entityNestLevel != 0)
		return;
	    rawWrite ("<!ENTITY " + name + " ");
	    if (publicId != null)
		rawWrite ("PUBLIC \"" + publicId + '"');
	    else
		rawWrite ("SYSTEM ");
	    rawWrite ('"' + systemId + '"');
	    rawWrite (" NDATA " + notationName + ">");
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    // SAX2 DeclHandler

    /** <b>SAX2</b>:  called on attribute declarations */
    final public void attributeDecl (String eName, String aName,
	    String type, String mode, String value)
    throws SAXException
    {
	if (xhtml)
	    return;
	try {
	    // At this time, only SAX2 callbacks start these.
	    if (!startedDoctype)
		return;
	    if (entityNestLevel != 0)
		return;
	    rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
	    rawWrite (type);
	    rawWrite (' ');
	    if (mode != null)
		rawWrite (mode + ' ');
	    if (value != null) 
		writeQuotedValue (value, CTX_ATTRIBUTE);
	    rawWrite ('>');
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX2</b>:  called on element declarations */
    final public void elementDecl (String name, String model)
    throws SAXException
    {
	if (xhtml)
	    return;
	try {
	    // At this time, only SAX2 callbacks start these.
	    if (!startedDoctype)
		return;
	    if (entityNestLevel != 0)
		return;
	    rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX2</b>:  called on external entity declarations */
    final public void externalEntityDecl (
	String name,
	String publicId,
	String systemId)
    throws SAXException
    {
	if (xhtml)
	    return;
	try {
	    // At this time, only SAX2 callbacks start these.
	    if (!startedDoctype)
		return;
	    if (entityNestLevel != 0)
		return;
	    rawWrite ("<!ENTITY ");
	    if (name.startsWith ("%")) {
		rawWrite ("% ");
		rawWrite (name.substring (1));
	    } else
		rawWrite (name);
	    if (publicId != null)
		rawWrite (" PUBLIC \"" + publicId + '"');
	    else
		rawWrite (" SYSTEM ");
	    rawWrite ('"' + systemId + "\">");
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    /** <b>SAX2</b>:  called on internal entity declarations */
    final public void internalEntityDecl (String name, String value)
    throws SAXException
    {
	if (xhtml)
	    return;
	try {
	    // At this time, only SAX2 callbacks start these.
	    if (!startedDoctype)
		return;
	    if (entityNestLevel != 0)
		return;
	    rawWrite ("<!ENTITY ");
	    if (name.startsWith ("%")) {
		rawWrite ("% ");
		rawWrite (name.substring (1));
	    } else
		rawWrite (name);
	    rawWrite (' ');
	    writeQuotedValue (value, CTX_ENTITY);
	    rawWrite ('>');
	    newline ();
	} catch (IOException e) {
	    fatal ("can't write", e);
	}
    }

    private void writeQuotedValue (String value, int code)
    throws SAXException, IOException
    {
	char	buf [] = value.toCharArray ();
	int	off = 0, len = buf.length;

	// we can't add line breaks to attribute/entity/... values
	noWrap = true;
	rawWrite ('"');
	escapeChars (buf, off, len, code);
	rawWrite ('"');
	noWrap = false;
    }
    
    // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
    // (Latin/1) characters, all codes:  160-255 (0xA0-0xFF).
    // Codes 128-159 have no assigned values.
    private static final String HTMLlat1x [] = {
	// 160
	"nbsp", "iexcl", "cent", "pound", "curren",
	"yen", "brvbar", "sect", "uml", "copy",

	// 170
	"ordf", "laquo", "not", "shy", "reg",
	"macr", "deg", "plusmn", "sup2", "sup3",

	// 180
	"acute", "micro", "para", "middot", "cedil",
	"sup1", "ordm", "raquo", "frac14", "frac12",

	// 190
	"frac34", "iquest", "Agrave", "Aacute", "Acirc",
	"Atilde", "Auml", "Aring", "AElig", "Ccedil",

	// 200
	"Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
	"Iacute", "Icirc", "Iuml", "ETH", "Ntilde",

	// 210
	"Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
	"times", "Oslash", "Ugrave", "Uacute", "Ucirc",

	// 220
	"Uuml", "Yacute", "THORN", "szlig", "agrave",
	"aacute", "acirc", "atilde", "auml", "aring",

	// 230
	"aelig", "ccedil", "egrave", "eacute", "ecirc",
	"euml", "igrave", "iacute", "icirc", "iuml",

	// 240
	"eth", "ntilde", "ograve", "oacute", "ocirc",
	"otilde", "ouml", "divide", "oslash", "ugrave",

	// 250
	"uacute", "ucirc", "uuml", "yacute", "thorn",
	"yuml"
    };

    // From "HTMLsymbolx.ent" ... some of the symbols that
    // we can conveniently handle.  Entities for the Greek.
    // alphabet (upper and lower cases) are compact.
    private static final String HTMLsymbolx_GR [] = {
	// 913
	"Alpha", "Beta", "Gamma", "Delta", "Epsilon",
	"Zeta", "Eta", "Theta", "Iota", "Kappa",

	// 923
	"Lambda", "Mu", "Nu", "Xi", "Omicron",
	"Pi", "Rho", null, "Sigma", "Tau",

	// 933
	"Upsilon", "Phi", "Chi", "Psi", "Omega"
    };

    private static final String HTMLsymbolx_gr [] = {
	// 945
	"alpha", "beta", "gamma", "delta", "epsilon",
	"zeta", "eta", "theta", "iota", "kappa",

	// 955
	"lambda", "mu", "nu", "xi", "omicron",
	"pi", "rho", "sigmaf", "sigma", "tau",

	// 965
	"upsilon", "phi", "chi", "psi", "omega"
    };


    // General routine to write text and substitute predefined
    // entities (XML, and a special case for XHTML) as needed.
    private void escapeChars (char buf [], int off, int len, int code)
    throws SAXException, IOException
    {
	int	first = 0;

	if (off < 0) {
	    off = 0;
	    len = buf.length;
	}
	for (int i = 0; i < len; i++) {
	    String	esc;
	    char 	c = buf [off + i];

	    switch (c) {
	      // Note that CTX_ATTRIBUTE isn't explicitly tested here;
	      // all syntax delimiters are escaped in CTX_ATTRIBUTE,
	      // otherwise it's similar to CTX_CONTENT

	      // ampersand flags entity references; entity replacement
	      // text has unexpanded references, other text doesn't.
	      case '&':
		if (code == CTX_ENTITY || code == CTX_UNPARSED)
		    continue;
		esc = "amp";
		break;

	      // attributes and text may NOT have literal '<', but
	      // entities may have markup constructs
	      case '<':
		if (code == CTX_ENTITY || code == CTX_UNPARSED)
		    continue;
		esc = "lt";
		break;

	      // as above re markup constructs; but otherwise
	      // except when canonicalizing, this is for consistency
	      case '>':
		if (code == CTX_ENTITY || code == CTX_UNPARSED)
		    continue;
	        esc = "gt";
		break;
	      case '\'':
		if (code == CTX_CONTENT || code == CTX_UNPARSED)
		    continue;
		if (canonical)
		    continue;
		esc = "apos";
		break;

	      // needed when printing quoted attribute/entity values
	      case '"':
		if (code == CTX_CONTENT || code == CTX_UNPARSED)
		    continue;
		esc = "quot";
		break;

	      // make line ends work per host OS convention
	      case '\n':
		esc = eol;
		break;

	      //
	      // No other characters NEED special treatment ... except
	      // for encoding-specific issues, like whether the character
	      // can really be represented in that encoding.
	      //
	      default:
		//
		// There are characters we can never write safely; getting
		// them is an error.
		//
		//   (a) They're never legal in XML ... detected by range 
		//	checks, and (eventually) by remerging surrogate
		//	pairs on output.  (Easy error for apps to prevent.)
		//
		//   (b) This encoding can't represent them, and we
		//	can't make reference substitution (e.g. inside
		//	CDATA sections, names, PI data, etc).  (Hard for
		//	apps to prevent, except by using UTF-8 or UTF-16
		//	as their output encoding.)
		//
		// We know a very little bit about what characters
		// the US-ASCII and ISO-8859-1 encodings support.  For
		// other encodings we can't detect the second type of
		// error at all.  (Never an issue for UTF-8 or UTF-16.)
		//

// FIXME:  CR in CDATA is an error; in text, turn to a char ref

// FIXME:  CR/LF/TAB in attributes should become char refs

		if ((c > 0xfffd)
			|| ((c < 0x0020) && !((c == 0x0009)
				|| (c == 0x000A) || (c == 0x000D)))
			|| (((c & dangerMask) != 0)
			    && (code == CTX_UNPARSED))) {

		    // if case (b) in CDATA, we might end the section,
		    // write a reference, then restart ... possible
		    // in one DOM L3 draft.

		    throw new CharConversionException (
			    "Illegal or non-writable character: U+"
			    + Integer.toHexString (c));
		}

		//
		// If the output encoding represents the character
		// directly, let it do so!  Else we'll escape it.
		//
		if ((c & dangerMask) == 0)
		    continue;
		esc = null;

		// Avoid numeric refs where symbolic ones exist, as
		// symbolic ones make more sense to humans reading!
		if (xhtml) {
		    // all the HTMLlat1x.ent entities
		    // (all the "ISO-8859-1" characters)
		    if (c >= 160 && c <= 255)
			esc = HTMLlat1x [c - 160];

		    // not quite half the HTMLsymbolx.ent entities
		    else if (c >= 913 && c <= 937)
			esc = HTMLsymbolx_GR [c - 913];
		    else if (c >= 945 && c <= 969)
			esc = HTMLsymbolx_gr [c - 945];

		    else switch (c) {
			// all of the HTMLspecialx.ent entities
			case  338: esc = "OElig";	break;
			case  339: esc = "oelig";	break;
			case  352: esc = "Scaron";	break;
			case  353: esc = "scaron";	break;
			case  376: esc = "Yuml";	break;
			case  710: esc = "circ";	break;
			case  732: esc = "tilde";	break;
			case 8194: esc = "ensp";	break;
			case 8195: esc = "emsp";	break;
			case 8201: esc = "thinsp";	break;
			case 8204: esc = "zwnj";	break;
			case 8205: esc = "zwj";		break;
			case 8206: esc = "lrm";		break;
			case 8207: esc = "rlm";		break;
			case 8211: esc = "ndash";	break;
			case 8212: esc = "mdash";	break;
			case 8216: esc = "lsquo";	break;
			case 8217: esc = "rsquo";	break;
			case 8218: esc = "sbquo";	break;
			case 8220: esc = "ldquo";	break;
			case 8221: esc = "rdquo";	break;
			case 8222: esc = "bdquo";	break;
			case 8224: esc = "dagger";	break;
			case 8225: esc = "Dagger";	break;
			case 8240: esc = "permil";	break;
			case 8249: esc = "lsaquo";	break;
			case 8250: esc = "rsaquo";	break;
			case 8364: esc = "euro";	break;

			// the other HTMLsymbox.ent entities
			case  402: esc = "fnof";	break;
			case  977: esc = "thetasym";	break;
			case  978: esc = "upsih";	break;
			case  982: esc = "piv";		break;
			case 8226: esc = "bull";	break;
			case 8230: esc = "hellip";	break;
			case 8242: esc = "prime";	break;
			case 8243: esc = "Prime";	break;
			case 8254: esc = "oline";	break;
			case 8260: esc = "frasl";	break;
			case 8472: esc = "weierp";	break;
			case 8465: esc = "image";	break;
			case 8476: esc = "real";	break;
			case 8482: esc = "trade";	break;
			case 8501: esc = "alefsym";	break;
			case 8592: esc = "larr";	break;
			case 8593: esc = "uarr";	break;
			case 8594: esc = "rarr";	break;
			case 8595: esc = "darr";	break;
			case 8596: esc = "harr";	break;
			case 8629: esc = "crarr";	break;
			case 8656: esc = "lArr";	break;
			case 8657: esc = "uArr";	break;
			case 8658: esc = "rArr";	break;
			case 8659: esc = "dArr";	break;
			case 8660: esc = "hArr";	break;
			case 8704: esc = "forall";	break;
			case 8706: esc = "part";	break;
			case 8707: esc = "exist";	break;
			case 8709: esc = "empty";	break;
			case 8711: esc = "nabla";	break;
			case 8712: esc = "isin";	break;
			case 8713: esc = "notin";	break;
			case 8715: esc = "ni";		break;
			case 8719: esc = "prod";	break;
			case 8721: esc = "sum";		break;
			case 8722: esc = "minus";	break;
			case 8727: esc = "lowast";	break;
			case 8730: esc = "radic";	break;
			case 8733: esc = "prop";	break;
			case 8734: esc = "infin";	break;
			case 8736: esc = "ang";		break;
			case 8743: esc = "and";		break;
			case 8744: esc = "or";		break;
			case 8745: esc = "cap";		break;
			case 8746: esc = "cup";		break;
			case 8747: esc = "int";		break;
			case 8756: esc = "there4";	break;
			case 8764: esc = "sim";		break;
			case 8773: esc = "cong";	break;
			case 8776: esc = "asymp";	break;
			case 8800: esc = "ne";		break;
			case 8801: esc = "equiv";	break;
			case 8804: esc = "le";		break;
			case 8805: esc = "ge";		break;
			case 8834: esc = "sub";		break;
			case 8835: esc = "sup";		break;
			case 8836: esc = "nsub";	break;
			case 8838: esc = "sube";	break;
			case 8839: esc = "supe";	break;
			case 8853: esc = "oplus";	break;
			case 8855: esc = "otimes";	break;
			case 8869: esc = "perp";	break;
			case 8901: esc = "sdot";	break;
			case 8968: esc = "lceil";	break;
			case 8969: esc = "rceil";	break;
			case 8970: esc = "lfloor";	break;
			case 8971: esc = "rfloor";	break;
			case 9001: esc = "lang";	break;
			case 9002: esc = "rang";	break;
			case 9674: esc = "loz";		break;
			case 9824: esc = "spades";	break;
			case 9827: esc = "clubs";	break;
			case 9829: esc = "hearts";	break;
			case 9830: esc = "diams";	break;
		    }
		}

		// else escape with numeric char refs
		if (esc == null) {
		    stringBuf.setLength (0);
		    stringBuf.append ("#x");
		    stringBuf.append (Integer.toHexString (c).toUpperCase ());
		    esc = stringBuf.toString ();

		    // FIXME:  We don't write surrogate pairs correctly.
		    // They should work as one ref per character, since
		    // each pair is one character.  For reading back into
		    // Unicode, it matters beginning in Unicode 3.1 ...
		}
		break;
	    }
	    if (i != first)
		rawWrite (buf, off + first, i - first);
	    first = i + 1;
	    if (esc == eol)
		newline ();
	    else {
		rawWrite ('&');
		rawWrite (esc);
		rawWrite (';');
	    }
	}
	if (first < len)
	    rawWrite (buf, off + first, len - first);
    }



    private void newline ()
    throws SAXException, IOException
    {
	out.write (eol);
	column = 0;
    }

    private void doIndent ()
    throws SAXException, IOException
    {
	int	space = elementNestLevel * 2;

	newline ();
	column = space;
	// track tabs only at line starts
	while (space > 8) {
	    out.write ("\t");
	    space -= 8;
	}
	while (space > 0) {
	    out.write ("  ");
	    space -= 2;
	}
    }

    private void rawWrite (char c)
    throws IOException
    {
	out.write (c);
	column++;
    }

    private void rawWrite (String s)
    throws SAXException, IOException
    {
	if (prettyPrinting && "default".equals (space.peek ())) {
	    char data [] = s.toCharArray ();
	    rawWrite (data, 0, data.length);
	} else {
	    out.write (s);
	    column += s.length ();
	}
    }

    // NOTE:  if xhtml, the REC gives some rules about whitespace
    // which we could follow ... notably, many places where conformant
    // agents "must" consolidate/normalize whitespace.  Line ends can
    // be removed there, etc.  This may not be the right place to do
    // such mappings though.

    // Line buffering may help clarify algorithms and improve results.

    // It's likely xml:space needs more attention.

    private void rawWrite (char buf [], int offset, int length)
    throws SAXException, IOException
    {
	boolean		wrap;

	if (prettyPrinting && space.empty ())
	    fatal ("stack discipline", null);

	wrap = prettyPrinting && "default".equals (space.peek ());
	if (!wrap) {
	    out.write (buf, offset, length);
	    column += length;
	    return;
	}

	// we're pretty printing and want to fill lines out only
	// to the desired line length.
	while (length > 0) {
	    int		target = lineLength - column;
	    boolean	wrote = false;

	    // Do we even have a problem?
	    if (target > length || noWrap) {
		out.write (buf, offset, length);
		column += length;
		return;
	    }

	    // break the line at a space character, trying to fill
	    // as much of the line as possible.
	    char	c;

	    for (int i = target - 1; i >= 0; i--) {
		if ((c = buf [offset + i]) == ' ' || c == '\t') {
		    i++;
		    out.write (buf, offset, i);
		    doIndent ();
		    offset += i;
		    length -= i;
		    wrote = true;
		    break;
		}
	    }
	    if (wrote)
		continue;
	    
	    // no space character permitting break before target
	    // line length is filled.  So, take the next one.
	    if (target < 0)
		target = 0;
	    for (int i = target; i < length; i++)
		if ((c = buf [offset + i]) == ' ' || c == '\t') {
		    i++;
		    out.write (buf, offset, i);
		    doIndent ();
		    offset += i;
		    length -= i;
		    wrote = true;
		    break;
		}
	    if (wrote)
		continue;
	    
	    // no such luck.
	    out.write (buf, offset, length);
	    column += length;
	    break;
	}
    }
}
