File: xmlgrep.rb.old

package info (click to toggle)
libxml-parser-ruby 0.5.16-1
  • links: PTS
  • area: main
  • in suites: potato
  • size: 596 kB
  • ctags: 702
  • sloc: ruby: 4,474; ansic: 1,254; xml: 542; makefile: 53
file content (133 lines) | stat: -rwxr-xr-x 2,622 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#! /usr/local/bin/ruby

## Expat for Ruby sample
## 1998 by yoshidam
##
## XML եΥƥФɽԤʤ

require 'parsearg'
require 'xmlparser'
require 'uconv'
include Uconv
require 'kconv'
include Kconv

def xmlgrep (pattern, file = nil, printfile = 0, printline = 0)

  ## file open
  if !file
    f = $stdin
  else
    begin
      f = open(file, "r")
    rescue
      $stderr.print "#{$0}: #{$!}\n";
      return
    end
  end


  ## empty file
  if ((xml = f.gets).nil?); return; end

  ## rewrite encoding in XML decl.
  if xml =~ /^<\?xml\sversion=.+\sencoding=.EUC-JP./i
    xml.sub!(/EUC-JP/i, "UTF-8")
    encoding = 'EUC-JP'
  elsif xml =~ /^<\?xml\sversion=.+\sencoding=.Shift_JIS./i
    xml.sub!(/Shift_JIS/i, "UTF-8")
    encoding = "Shift_JIS"
  elsif xml =~ /^<\?xml\sversion=.+\sencoding=.ISO-2022-JP./i
    xml.sub!(/ISO-2022-JP/i, "UTF-8")
    encoding = "ISO-2022-JP"
  end

  ## read body
  xml += String(f.read)
  f.close

  ## convert body encoding
  if encoding == "EUC-JP"
    xml = euctou8(xml)
  elsif encoding == "Shift_JIS"
    xml = euctou8(kconv(xml, EUC, SJIS))
  elsif encoding == "ISO-2022-JP"
    xml = euctou8(kconv(xml, EUC, JIS))
  end

  ## dummy default handler
  parser = XMLParser.new
  def parser.default
  end

  ## start to parse
  current = 0
  begin
    parser.parse(xml) do |type, name, data|
      case type
      when XMLParser::START_ELEM

      when XMLParser::END_ELEM

      when XMLParser::CDATA
        next if data =~ /^\s*$/;
        data = u8toeuc(data)

        if data =~ pattern
          line = parser.line
          print "#{file}:" if (file && printfile == 1)
          print "#{line}:" if printline == 1
          parser.defaultCurrent
          current = 1
        end
      when XMLParser::PI

      else
        next if current == 0
        data.gsub!("\n", "\\n")
        print u8toeuc("#{data}\n")
        current = 0
      end
    end
  rescue XMLParserError
    line = parser.line
    print "#{$0}: #{$!} (#{file}:#{line})\n"
  end
end

def usage
  $stderr.print "Usage: #{$0} [-n] [-e] <expr> <files...>\n"
  exit 1
end

parseArgs(0, nil, "n", "e:")
if (ARGV.length == 0 && $OPT_e == nil)
  usage
end
if ($OPT_e)
  expr = $OPT_e
else
  expr = ARGV.shift
end

begin
  pattern = Regexp.new(expr);
rescue
  $stderr.print "#{$0}: #{$!}\n"
  exit 1
end

if (ARGV.length > 1)
  ARGV.each do |file|
    xmlgrep(pattern, file, 1, $OPT_n ? 1 : 0)
  end
elsif (ARGV.length == 1)
  xmlgrep(pattern, ARGV[0], 0, $OPT_n ? 1 : 0)
else
  xmlgrep(pattern, nil, 0, $OPT_n ? 1 : 0)
end

__END__
## Local variables:
## mode: ruby
## End: