/*
 * Copyright (c) 2008-2009 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.htmlparser.generator;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import nu.validator.htmlparser.annotation.CharacterName;
import nu.validator.htmlparser.annotation.NoLength;

public class GenerateNamedCharacters {
    
    private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);

    private static final Pattern LINE_PATTERN = Pattern.compile("<td> <code title=\"\">([^<]*)</code> </td> <td> U\\+(\\S*) (?:U\\+(\\S*) )?</td>");

    private static String toUString(int c) {
        String hexString = Integer.toHexString(c);
        switch (hexString.length()) {
            case 1:
                return "\\u000" + hexString;
            case 2:
                return "\\u00" + hexString;
            case 3:
                return "\\u0" + hexString;
            case 4:
                return "\\u" + hexString;
            default:
                throw new RuntimeException("Unreachable.");
        }
    }

    private static int charToIndex(char c) {
        if (c >= 'a' && c <= 'z') {
            return c - 'a' + 26;
        } else if (c >= 'A' && c <= 'Z') {
            return c - 'A';
        }
        throw new IllegalArgumentException("Bad char in named character name: "
                + c);
    }

    private static boolean allZero(int[] arr) {
        for (int i = 0; i < arr.length; i++) {
            if (arr[i] != 0) {
                return false;
            }
        }
        return true;
    }

    /**
     * @param args
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
        TreeMap<String, String> entities = new TreeMap<String, String>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(
                System.in, "utf-8"));
        String line;
        while ((line = reader.readLine()) != null) {
            Matcher m = LINE_PATTERN.matcher(line);
            while (m.find()) {
                String value;
                if (m.group(3) != null) {
                    // two BMP chars
                    int firstIntVal = Integer.parseInt(m.group(2), 16);
                    int secondIntVal = Integer.parseInt(m.group(3), 16);
                    value = ("" + (char)firstIntVal) + (char)secondIntVal;
                } else {
                    // one code point
                    int intVal = Integer.parseInt(m.group(2), 16);
                    if (intVal <= 0xFFFF) {
                        value = "" + (char)intVal;
                    } else {
                        int high = (LEAD_OFFSET + (intVal >> 10));
                        int low = (0xDC00 + (intVal & 0x3FF));
                        value = ("" + (char)high) + (char)low;
                    }
                }
                entities.put(m.group(1), value);
            }
        }

        // Java initializes arrays to zero. Zero is our magic value for no hilo
        // value.
        int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1];

        String firstName = entities.entrySet().iterator().next().getKey();
        int firstKey = charToIndex(firstName.charAt(0));
        int secondKey = firstName.charAt(1);
        int row = 0;
        int lo = 0;

        System.out.print("static final @NoLength @CharacterName String[] NAMES = {\n");
        for (Map.Entry<String, String> entity : entities.entrySet()) {
            String name = entity.getKey();
            int newFirst = charToIndex(name.charAt(0));
            int newSecond = name.charAt(1);
            assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA";
            if (firstKey != newFirst || secondKey != newSecond) {
                hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo;
                lo = row;
                firstKey = newFirst;
                secondKey = newSecond;
            }
            System.out.print("\"");
            System.out.print(name.substring(2));
            System.out.print("\",\n");
            row++;
        }
        System.out.print("};\n");

        hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo;

        System.out.print("static final @NoLength char[][] VALUES = {\n");
        for (Map.Entry<String, String> entity : entities.entrySet()) {
            String value = entity.getValue();
            System.out.print("{");
            if (value.length() == 1) {
                char c = value.charAt(0);
                if (c == '\'') {
                    System.out.print("\'\\\'\'");
                } else if (c == '\n') {
                    System.out.print("\'\\n\'");
                } else if (c == '\\') {
                    System.out.print("\'\\\\\'");
                } else if (c <= 0xFFFF) {
                    System.out.print("\'");
                    System.out.print(toUString(c));
                    System.out.print("\'");
                }
            } else {
                System.out.print("\'");
                System.out.print(toUString(value.charAt(0)));
                System.out.print("\', \'");
                System.out.print(toUString(value.charAt(1)));
                System.out.print("\'");                
            }
            System.out.print("},\n");
        }
        System.out.print("};\n");

        System.out.print("static final @NoLength int[][] HILO_ACCEL = {\n");
        for (int i = 0; i < hiLoTable.length; i++) {
            if (allZero(hiLoTable[i])) {
                System.out.print("null,\n");
            } else {
                System.out.print("{");
                for (int j = 0; j < hiLoTable[i].length; j++) {
                    System.out.print(hiLoTable[i][j]);
                    System.out.print(", ");
                }
                System.out.print("},\n");
            }
        }
        System.out.print("};\n");
    }

}
