/*
 * Copyright (c) 2008, Aaron Digulla
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 * 
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Aaron Digulla nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package de.pdark.decentxml;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;

/**
 * Create Java code for HTML entities. Download the source for the Wikipedia page
 * http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
 * and run it through this code (filename is the first argument).
 * 
 * @author DIGULAA
 *
 */
public class CreateHtmlEntities
{
    public static void main (String[] args)
    {
        try
        {
            File f = new File (args[0]);
            BufferedReader r = new BufferedReader (new InputStreamReader (new FileInputStream (f), "iso-8859-1"));
            String line;
            
            while ((line = r.readLine ()) != null)
            {
                line = line.trim ();
                if (!"|-".equals (line))
                    continue;
                
                line = r.readLine (); // Name
                String name = line.substring (2).trim ();
                line = r.readLine (); // Character
                line = r.readLine (); // Unicode code point
                String unicode = line.substring (4, 8);
                if ("00A0".compareTo (unicode) > 0)
                    continue;
                line = r.readLine (); // Standard
                line = r.readLine (); // DTD
                line = r.readLine (); // Old ISO subset
                line = r.readLine (); // Description
                String desc = line.substring (2).trim ();
                desc = desc
                .replaceAll ("''", "")
                .replaceAll ("\\{\\{[^}]+\\}\\}", "")
                .replaceAll ("\\[\\[", "")
                .replaceAll ("\\]\\]", "")
                ;
                System.out.println ("        add (\""+name+"\", \"\\u"+unicode.toLowerCase ()+"\"); // "+desc);
            }
            
            r.close ();
        }
        catch (Throwable t)
        {
            t.printStackTrace ();
        }
    }
}
