1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
#! /usr/local/bin/ruby -Ku
require 'xml/dom/builder'
require 'nkf'
#require 'uconv'
class XMLRetry<Exception; end
#def Uconv.unknown_unicode_handler(u)
# return '??'
## return "#[#{format('%04x', u)}]"
#end
class EUCTreeBuilder < XML::DOM::Builder
# def nameConverter(str)
# Uconv.u8toeuc(str)
# end
# def cdataConverter(str)
# Uconv.u8toeuc(str)
# end
end
builder = EUCTreeBuilder.new
def builder.unknownEncoding(e)
raise XMLRetry, e
end
xml = $<.read
begin
tree = builder.parse(xml)
rescue XMLRetry
newencoding = nil
e = $!.to_s
if e =~ /^iso-2022-jp$/i
xml = NKF.nkf("-Je", xml)
newencoding = "EUC-JP"
end
builder = EUCTreeBuilder.new(0, newencoding)
retry
rescue XML::Parser::Error
line = builder.line
print "#{$0}: #{$!} (in line #{line})\n"
exit 1
end
#print tree.to_s.gsub(/\#\[([0-9a-f]{4})\]/, "&#x\\1;"), "\n"
tree.dump
|