1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
/*
* Copyright (C) 2001-2013 Michael Fuchs
*
* This file is part of herold.
*
* herold is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* herold is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with herold. If not, see <http://www.gnu.org/licenses/>.
*/
package org.dbdoclet.html.tokenizer;
import java.text.ParseException;
import java.util.ArrayList;
import org.dbdoclet.html.tokenizer.parser.HtmlTokenizer;
import org.dbdoclet.html.tokenizer.parser.TokenMgrError;
import org.dbdoclet.progress.ProgressListener;
/**
* Die Klasse <code>Tokenizer</code> implementiert einen Tokenizer für HTML.
*
* @author Michael Fuchs
* @version 1.0
*/
public class Tokenizer {
private ArrayList<Token> tokens;
private Token currentToken;
private StringBuffer buffer = null;
private int tokenPosition = 0;
private ArrayList<ProgressListener> listeners;
/**
* Creates a new <code>MLTokenizer</code> instance.
*
* @param data
* {@link String (String)}
*/
public Tokenizer(String data) {
buffer = new StringBuffer(data);
}
public void tokenize() throws TokenizerException {
tokenPosition = 0;
tokens = parse();
}
/* ======================================================================== */
/* PUBLIC METHODS */
/* ======================================================================== */
/**
* The method <code>hasNext</code> returns true if there are still tokens to
* be fetched.
*
* @return True if another token exists.
*/
public boolean hasNext() {
if (tokenPosition < tokens.size()) {
return true;
} else {
return false;
}
}
/**
* The method <code>next</code> returns the next token.
*
* If there are no more tokens left, a null value is returned.
*
* @return The next token or null {@link Token (MLToken)}.
*/
public Token next() {
if (tokenPosition >= tokens.size()) {
return null;
}
currentToken = tokens.get(tokenPosition);
tokenPosition++;
return currentToken;
}
public int size() {
if (tokens == null) {
throw new IllegalStateException("Variable tokens is null!");
}
return tokens.size();
}
public int position() {
return tokenPosition;
}
/* ======================================================================== */
/* PRIVATE METHODS */
/* ======================================================================== */
/**
* The method <code>parse</code> parses the buffer and adds all tokens into
* an object of the type <code>ArrayList</code>, which is returned in the
* end.
*
* @return {@link java.util.ArrayList (ArrayList)}
* @exception TokenizerException
* if an error occurs
*/
private ArrayList<Token> parse() throws TokenizerException {
HtmlTokenizer parser;
try {
parser = new HtmlTokenizer(buffer.toString(), "UTF-8");
parser.setProgressListeners(listeners);
return parser.parse();
} catch (Throwable oops) {
String msg = "Tokenizer Error";
if (oops instanceof TokenMgrError || oops instanceof ParseException) {
msg = "Parse error " + oops.getMessage() + " while parsing \""
+ buffer.toString() + "\".";
}
throw new TokenizerException(msg, oops);
}
}
public void setProgressListeners(ArrayList<ProgressListener> listeners) {
this.listeners = listeners;
}
}
|