1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
#!/usr/bin/env ruby
# DocDiff: word/character-oriented text comparison utility
# Copyright (C) 2002-2011 Hisashi MORITA
# Requirements: Ruby (>= 1.8)
require 'docdiff'
require 'optparse'
# do_config_stuff
default_config = {
:resolution => "word",
:encoding => "auto",
:eol => "auto",
:format => "html",
:cache => true,
:digest => false,
:verbose => false
}
clo = command_line_options = {}
# if invoked as "worddiff" or "chardiff",
# appropriate resolution is set respectively.
case File.basename($0, ".*")
when "worddiff" then; clo[:resolution] = "word"
when "chardiff" then; clo[:resolution] = "char"
end
ARGV.options {|o|
o.def_option('--resolution=RESOLUTION',
possible_resolutions = ['line', 'word', 'char'],
'specify resolution (granularity)',
possible_resolutions.join('|') + ' (default is word)'
){|s| clo[:resolution] = (s || "word")}
o.def_option('--line', 'set resolution to line'){clo[:resolution] = "line"}
o.def_option('--word', 'set resolution to word'){clo[:resolution] = "word"}
o.def_option('--char', 'set resolution to char'){clo[:resolution] = "char"}
o.def_option('--encoding=ENCODING',
possible_encodings = ['ASCII','EUC-JP','Shift_JIS','CP932','UTF-8','auto'],
'specify character encoding',
possible_encodings.join('|'), "(default is auto. try ASCII for single byte encodings such as ISO-8859-X)"
){|s| clo[:encoding] = (s || "auto")}
o.def_option('--ascii', 'same as --encoding=ASCII'){clo[:encoding] = "ASCII"}
o.def_option('--iso8859x', 'same as --encoding=ASCII'){clo[:encoding] = "ASCII"}
o.def_option('--eucjp', 'same as --encoding=EUC-JP'){clo[:encoding] = "EUC-JP"}
o.def_option('--sjis', 'same as --encoding=Shift_JIS'){clo[:encoding] = "Shift_JIS"}
o.def_option('--cp932', 'same as --encoding=CP932'){clo[:encoding] = "CP932"}
o.def_option('--utf8', 'same as --encoding=UTF-8'){clo[:encoding] = "UTF-8"}
o.def_option('--eol=EOL',
possible_eols = ['CR','LF','CRLF','auto'],
'specify end-of-line character',
possible_eols.join('|') + ' (default is auto)'
){|s| clo[:eol] = (s || "auto")}
o.def_option('--cr', 'same as --eol=CR'){clo[:eol] = "CR"}
o.def_option('--lf', 'same as --eol=LF'){clo[:eol] = "LF"}
o.def_option('--crlf', 'same as --eol=CRLF'){clo[:eol] = "CRLF"}
o.def_option('--format=FORMAT',
possible_formats = ['tty','manued','html','wdiff','stat','user'],
'specify output format',
possible_formats.join('|'),
"(default is html)",
'(user tags can be defined in config file)'
){|s| clo[:format] = (s || "manued")}
o.def_option('--tty', 'same as --format=tty'){clo[:format] = "tty"}
o.def_option('--manued', 'same as --format=manued'){clo[:format] = "manued"}
o.def_option('--html', 'same as --format=html'){clo[:format] = "html"}
o.def_option('--wdiff', 'same as --format=wdiff'){clo[:format] = "wdiff"}
o.def_option('--stat', 'same as --format=stat (not supported yet)'){clo[:format] = "stat"}
o.def_option('--label LABEL', '-L LABEL',
'Use label instead of filename (not supported yet)'
){|s1, s2| clo[:label1], clo[:label2] = s1, s2}
o.def_option('--digest', 'digest output, do not show all'){clo[:digest] = true}
o.def_option('--summary', 'same as --digest'){clo[:digest] = true}
o.def_option('--display=DISPLAY',
possible_types = ['inline', 'multi'],
'specify presentation type (effective only with digest; experimental feature)',
possible_types.join('|'),
'(default is inline)'){|s| clo[:display] ||= s.downcase}
o.def_option('--cache', 'use file cache (not supported yet)'){clo[:cache] = true}
o.def_option('--no-config-file',
'do not read config files'){clo[:no_config_file] = true}
o.def_option('--verbose', 'run verbosely (not supported yet)'){clo[:verbose] = true}
o.def_option('--help', 'show this message'){puts o; exit(0)}
o.def_option('--version', 'show version'){puts DocDiff::AppVersion; exit(0)}
o.def_option('--license', 'show license'){puts DocDiff::License; exit(0)}
o.def_option('--author', 'show author(s)'){puts DocDiff::Author; exit(0)}
o.on_tail("When invoked as worddiff or chardiff, resolution will be set accordingly.",
"Config files: /etc/docdiff/docdiff.conf, ~/etc/docdiff/docdiff.conf")
o.parse!
} or exit(1)
docdiff = DocDiff.new()
docdiff.config.update(default_config)
unless clo[:no_config_file] == true # process_commandline_option
message = docdiff.process_config_file(DocDiff::SystemConfigFileName)
if clo[:verbose] == true || docdiff.config[:verbose] == true
STDERR.print message
end
# message = docdiff.process_config_file(DocDiff::UserConfigFileName)
case
when File.exist?(DocDiff::UserConfigFileName) && File.exist?(DocDiff::AltUserConfigFileName)
raise "#{DocDiff::UserConfigFileName} and #{DocDiff::AltUserConfigFileName} cannot be used at the same time. Remove or rename either one."
when File.exist?(DocDiff::UserConfigFileName)
message = docdiff.process_config_file(DocDiff::UserConfigFileName)
when File.exist?(DocDiff::AltUserConfigFileName)
message = docdiff.process_config_file(DocDiff::AltUserConfigFileName)
end
if clo[:verbose] == true || docdiff.config[:verbose] == true
STDERR.print message
end
end
docdiff.config.update(clo)
# config stuff done
# process the documents
file1_content = nil
file2_content = nil
raise "Try `#{File.basename($0)} --help' for more information." if ARGV[0].nil?
raise "Specify at least 2 target files." unless ARGV[0] && ARGV[1]
raise "No such file: #{ARGV[0]}." unless FileTest.exist?(ARGV[0])
raise "No such file: #{ARGV[1]}." unless FileTest.exist?(ARGV[1])
raise "#{ARGV[0]} is not a file." unless FileTest.file?(ARGV[0])
raise "#{ARGV[1]} is not a file." unless FileTest.file?(ARGV[1])
File.open(ARGV[0], "r"){|f| file1_content = f.read}
File.open(ARGV[1], "r"){|f| file2_content = f.read}
doc1 = nil
doc2 = nil
encoding1 = docdiff.config[:encoding]
encoding2 = docdiff.config[:encoding]
eol1 = docdiff.config[:eol]
eol2 = docdiff.config[:eol]
if docdiff.config[:encoding] == "auto"
encoding1 = DocDiff::CharString.guess_encoding(file1_content)
encoding2 = DocDiff::CharString.guess_encoding(file2_content)
case
when (encoding1 == "UNKNOWN" or encoding2 == "UNKNOWN")
raise "Document encoding unknown (#{encoding1}, #{encoding2})."
when encoding1 != encoding2
raise "Document encoding mismatch (#{encoding1}, #{encoding2})."
end
end
if docdiff.config[:eol] == "auto"
eol1 = DocDiff::CharString.guess_eol(file1_content)
eol2 = DocDiff::CharString.guess_eol(file2_content)
case
when (eol1.nil? or eol2.nil?)
raise "Document eol is nil (#{eol1.inspect}, #{eol2.inspect}). The document might be empty."
when (eol1 == 'UNKNOWN' or eol2 == 'UNKNOWN')
raise "Document eol unknown (#{eol1.inspect}, #{eol2.inspect})."
when (eol1 != eol2)
raise "Document eol mismatch (#{eol1}, #{eol2})."
end
end
doc1 = DocDiff::Document.new(file1_content, encoding1, eol1)
doc2 = DocDiff::Document.new(file2_content, encoding2, eol2)
output = docdiff.run(doc1, doc2,
{:resolution => docdiff.config[:resolution],
:format => docdiff.config[:format],
:digest => docdiff.config[:digest],
:display => docdiff.config[:display]})
print output
|