1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
#!/usr/bin/env tclsh
## -*- tcl -*-
# Perform a diff on two CSV files.
# The result is a CSV file
package require csv
package require cmdline
# ----------------------------------------------------
# csvdiff ?-sep sepchar? ?-key LIST? file1 file2
#
# Argument processing and checks.
set sepChar ,
set usage "Usage: $argv0 ?-n? ?-sep sepchar? ?-key LIST? file1 file2\n\tLIST=idx,...\n\tidx in \{n, -m, n-, n-m\}"
set keySpec "0-"
# lineout = boolean flag, indicates if linenumbers has to be written
# as part of the output (1) or not (0). Defaults to 0.
set lineout 0
while {[set ok [cmdline::getopt argv {sep.arg key.arg n} opt val]] > 0} {
#puts stderr "= $opt $val"
switch -exact -- $opt {
sep {set sepChar $val}
key {set keySpec $val}
n {set lineout 1}
}
}
if {($ok < 0) || ([llength $argv] != 2)} {
puts stderr $usage
exit -1
}
foreach {fileA fileB} $argv break
if {[llength $keySpec] == 0} {
#puts stderr >>$keySpec<<
#puts stderr B
puts stderr $usage
exit -1
}
set idx [list]
foreach i $keySpec {
if {[regexp -- {[0-9]+-[0-9]+} $i]} {
foreach {f t} [split $i -] break
lappend idx [list $f $t]
} elseif {[regexp -- {[0-9]+-} $i]} {
foreach {f t} [split $i -] break
lappend idx [list $f end]
} elseif {[regexp -- {-[0-9]+} $i]} {
foreach {f t} [split $i -] break
lappend idx [list 0 $t]
} elseif {[regexp -- {[0-9]+} $i]} {
lappend idx [list $i $i]
} else {
#puts stderr >>$idx<<
#puts stderr C
puts stderr $usage
exit -1
}
}
set keySpec $idx
set inA [open $fileA r]
set inB [open $fileB r]
# ----------------------------------------------------
# Actual processing, uses the following information from the
# commandline:
#
# inA - channel for input A
# inB - channel for input B
# sepChar - separator character
# We read file2 completely and then go through the records of
# file1. For any record we don't find we write a "deleted" record. If
# we find the matching record we remove it from the internal
# storage. In a second sweep through the internal array we write
# "added" records for the remaining data as that was not in file1 but
# is in file2.
proc keyof {data} {
global keySpec
set key [list]
foreach i $keySpec {
foreach {f t} $i break
eval lappend key [lrange $data $f $t]
}
return $key
}
set order [list]
array set map {}
set linenum 0
while {![eof $inB]} {
if {[gets $inB line] < 0} {
continue
}
incr linenum
set data [::csv::split $line $sepChar]
set key [keyof $data]
if {[info exist map($key)]} {
puts stderr "warning: $key occurs multiple times in $fileB (lines $linenum and $map($key))"
}
set map($key) $linenum
lappend order $data
}
close $inB
set linenum 0
if {$lineout} {
array set lmap {}
}
while {![eof $inA]} {
if {[gets $inA line] < 0} {
continue
}
incr linenum
set data [::csv::split $line $sepChar]
set key [keyof $data]
if {$lineout} {set lmap($key) $linenum}
if {[info exists map($key)]} {
if {$map($key) < 0} {
puts stderr "warning: $key occurs multiple times\
in $fileA (lines $linenum and [expr {-$map($key)}]"
} else {
set map($key) [expr {-$linenum}]
}
continue
}
if {$lineout} {
puts stdout [::csv::join [linsert $data 0 - $linenum] $sepChar]
} else {
puts stdout [::csv::join [linsert $data 0 -] $sepChar]
}
}
close $inA
foreach data $order {
set key [keyof $data]
if {$map($key) > 0} {
if {$lineout} {
puts stdout [::csv::join [linsert $data 0 + $lmap($key)] $sepChar]
} else {
puts stdout [::csv::join [linsert $data 0 +] $sepChar]
}
}
}
exit
|