1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
package nokogiri.internals;
import org.apache.xerces.xni.parser.XMLParseException;
import org.jruby.Ruby;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* Error Handler for XML document when recover is true (default).
*
* @author sergio
* @author Yoko Harada <yokolet@gmail.com>
*/
public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler
{
public
NokogiriNonStrictErrorHandler(Ruby runtime, boolean noerror, boolean nowarning)
{
super(runtime, noerror, nowarning);
}
public void
warning(SAXParseException ex) throws SAXException
{
addError(ex);
}
public void
error(SAXParseException ex) throws SAXException
{
addError(ex);
}
public void
fatalError(SAXParseException ex) throws SAXException
{
// fix #837
// Xerces won't skip the reference entity (and other invalid) constructs
// found in the prolog, instead it will keep calling this method and we'll
// keep inserting the error in the document errors array until we run
// out of memory
addError(ex);
String message = ex.getMessage();
// The problem with Xerces is that some errors will cause the
// parser not to advance the reader and it will keep reporting
// the same error over and over, which will cause the parser
// to enter an infinite loop unless we throw the exception.
if (message != null && isFatal(message)) {
throw ex;
}
}
public void
error(String domain, String key, XMLParseException e)
{
addError(e);
}
public void
fatalError(String domain, String key, XMLParseException e)
{
addError(e);
}
public void
warning(String domain, String key, XMLParseException e)
{
addError(e);
}
/*
* Determine whether this is a fatal error that should cause
* the parsing to stop, or an error that can be ignored.
*/
private static boolean
isFatal(String msg)
{
String msgLowerCase = msg.toLowerCase();
return
msgLowerCase.contains("in prolog") ||
msgLowerCase.contains("limit") ||
msgLowerCase.contains("preceding the root element must be well-formed") ||
msgLowerCase.contains("following the root element must be well-formed");
}
}
|