File: xmlconv.rb

package info (click to toggle)
libuconv-ruby 0.4.9-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 3,752 kB
  • ctags: 135
  • sloc: ansic: 160,433; xml: 13,060; ruby: 396; makefile: 51
file content (62 lines) | stat: -rwxr-xr-x 1,515 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#! /usr/local/bin/ruby

## XML encoding converter
## 1998 by yoshidam
##
## XML եΥ󥳡ǥ󥰤 EUC-JP Ѵޤ
## ѴǤʤʸʸȤ֤ޤ

require 'kconv'
include Kconv
require 'uconv'
include Uconv

def Uconv.unknown_unicode_handler(u)
  return "&#x#{format('%04x', u)};"
end

## empty file
if ((xml = $<.gets).nil?); exit 1; end

$KCODE="NONE" 
## rewrite encoding in XML decl.
if xml =~ /^<\?xml\sversion=.+\sencoding=.EUC-JP./i
  $stderr.print "This file is already EUC-JP.\n"
  exit 1
elsif xml =~ /^<\?xml\sversion=.+\sencoding=.Shift_JIS./i
  xml.sub!(/Shift_JIS/i, "EUC-JP")
  encoding = "Shift_JIS"
elsif xml =~ /^<\?xml\sversion=.+\sencoding=.ISO-2022-JP./i
  xml.sub!(/ISO-2022-JP/i, "EUC-JP")
  encoding = "ISO-2022-JP"
elsif xml =~ /^<\?xml\sversion=.+\sencoding=.UTF-8./i
  xml.sub!(/UTF-8/i, "EUC-JP")
  encoding = "UTF-8"
elsif xml =~ /^<\?xml(\sversion=.+)?\s*\?>/i
  xml.sub!(/^<\?(.*)\?>/, "<?xml version='1.0' encoding='EUC-JP'?>")
  encoding = "UTF-8"
elsif xml =~ /^\377\376/
  encoding = "UTF-16-LE"
elsif xml !~ /^<\?xml/i
  xml.sub!(/^/, "<?xml version='1.0' encoding='EUC-JP'?>\n")
  encoding = "UTF-8"
else
  $stderr.print "Unknown encoding\n"
  exit 2
end
p encoding

## read body
xml += String($<.read)

## convert body encoding
if encoding == "Shift_JIS"
  xml = euctou8(kconv(xml, EUC, SJIS))
elsif encoding == "ISO-2022-JP"
  xml = euctou8(kconv(xml, EUC, JIS))
end
if encoding == "UTF-16-LE"
  print u2toeuc(xml)
else
  print u8toeuc(xml)
end