1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
#!/usr/bin/perl
# Original code: $Id: valid,v 2.8 1993/06/10 16:26:32 hobbs
#
$RCS_ID = '$Id: nsq-valid,v 0.9 1998/03/04 09:12:49 carlos Exp $' ;
$0 =~ s-.*/-- ;
$SEP = '|' ;
$HelpInfo = <<EOH ;
NoSQL operator: $0
Usage: $0 [options] [rdbtable ...]
Options:
-help Print this help info.
-l[x] List exact data values with visible delimiters, using 'x' as the
delimiter. The value of 'x' may be multi-char, default is "$SEP".
-nw (Default) No warning messages shown. The total number of any
warning conditions that exist is shown.
-size Report max size of actual data in each column.
-templ Generate a template file from the header of the table, on STDOUT.
Does NOT check the body of the table.
-w Print all warning messages.
Validates the structure of the rdbtable. Checks for consistent number of data
values per line, max width of column names and data values, and checks data
values for content type in defined numeric columns. The first type of error
above is a serious structure error; the others are only warnings.
Reads from STDIN if rdbtables are not given. Options may be abbreviated.
$RCS_ID
----------------------
NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.
This program comes with ABSOLUTELY NO WARRANTY; for details
refer to the GNU General Public License.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave., Cambridge, MA 02139, USA.
----------------------
EOH
$NOW++ ;
while ( $ARGV[0] =~ /^-/ ) { # Get args
$_ = shift ;
if( /^-h.*/ ){ print $HelpInfo ; exit 1 ; }
if( /-l(.*)/ ){ $LST++ ; $SEP = $1 if $1 ; next ; }
if( /-n.*/ ){ $NOW++ ; next ; }
if( /-s.*/ ){ $MAXSZ++ ; next ; }
if( /-t.*/ ){ $TPL++ ; next ; }
if( /-w.*/ ){ $NOW = 0 ; next ; }
die "\nBad arg: $_\n", "For help type \"$0 -help\".\n" ;
}
if( @ARGV == 0 ){
while(<STDIN>){
if( /^\s*#/ ){ # comment
push( @hdrv, $_ ) ; next; }
last if $TPL && $lln >= 2 ;
&chk ; }
&fin ; }
else{
$ARGCNT = @ARGV ;
while( $file = shift ){
open( IN, $file ) || die "\nCan't open file: $file\n" ;
$dis = $lln = $wrn1 = $wrn2 = $err = 0 ; @hdrv = @wdth = @dtyp = () ;
print "==== Checking: $file ...\n" if $ARGCNT > 1 ;
while(<IN>){
if( /^\s*#/ ){ # comment
push( @hdrv, $_ ) ; next; }
last if $TPL && $lln >= 2 ;
&chk ; }
&fin ;
}
}
exit $toterr ;
sub chk{ # check current line, $_
$lln++ ;
chop ;
$y = tr/\t/\t/ +1 ;
if( $lln <= 2 ){
@F = split( /\t/, $_ );
if( $lln == 1 ){
@hdrs = @F ; # col names
$nrf = $y ; # nr fields per line
if( $y ne @F ){
print ">>> NULL field in COLUMN NAME line of header\n" ;
$err++ ; }
for $_ (@hdrs){ # chk col names
if( /^[#-]/ ){
print ">>> Bad COLUMN NAME: $_\n" ;
$wrn1++ ; }
# elsif( ! /[a-zA-Z]/ ){ }
}
&println if $LST ; }
if( $lln == 2 ){
@defn = @F ; # definitions
if( $y ne @F ){
print ">>> NULL field in DEFINITION line of header\n" ;
$err++ ; }
if( $y != $nrf ){
$err++ ;
print ">>> Bad nr fields in DEFINITION line of header: $y\n" ;}
for $_ (@F){
if( /(\d+)/ ){ # column width
push( @wdth, $1 ) ; }
else{
push( @wdth, length($_) ) ; }
$dis += $wdth[$#wdth] +1 ;
if( /(\S+)/ && $1 =~ /N/i ){ # data type
push( @dtyp, "N" ) ; }
else{
push( @dtyp, "S" ) ; }
}
$dis-- ;
for ( $i=0; $i <= $#hdrs; $i++ ){
if( length($hdrs[$i]) > $wdth[$i] ){
print " Column name too long: $hdrs[$i]\n" if ! $NOW ;
$wrn1++ ; }
}
&println if $LST ;
if( $TPL ){
print @hdrv ;
$k = 0 ;
for $_ ( @hdrs ){
printf( "%2d %20s %s\n", $k++, $_, shift(@F)) ; }
}
}
return ;
} # data lines
@F = split( /\t/, $_, $nrf );
if( $y != $nrf ){
print ">>> Line: $., Bad nr fields: $y\n" ;
$err++ ; }
for( $i=0; $i <= $#F; $i++ ){
$wd = length($F[$i]) ;
if( $MAXSZ ){
$maxsz[$i] = $wd if $wd > $maxsz[$i] ;
}
if( $wd > $wdth[$i] ){
printf( " Line: %d, Column: %s, Data too long(%d) \"%s\"\n",
$., $hdrs[$i], length($F[$i]), $F[$i] ) if ! $NOW ;
$wrn1++ ;
}
if( $dtyp[$i] eq "N" ){ # chk type numeric
if( $F[$i] =~ /[^0-9Ee+-. ]/ ){
printf( " Line: %d, Column: %s, Bad Numeric data \"%s\"\n",
$., $hdrs[$i], $F[$i] ) if ! $NOW ;
$wrn2++ ;
}
}
}
&println if $LST ;
}
sub fin{ # finish up
return if $TPL ;
if( $MAXSZ ){
print @hdrv ;
$k = 0 ;
for $_ ( @hdrs ){
@t = split( ' ', $defn[$k], 2 ) ;
printf( "%2d %20s %-3s %s %s\n",
$k, $_, $t[0], $maxsz[$k], $t[1] ) ;
$k++ ; }
}
$y = @hdrv +2 ; # nr lines in header
$x = $lln -2 ; # nr lines in body
print "Columns: $nrf/($dis), Rows: $y/$x, " ;
if( ! $err && ! $wrn1 && ! $wrn2 ){ print "rdbtable ok" ; }
else{
if( $wrn1 || $wrn2 ){ print "WARNINGS: $wrn1/$wrn2, " ; }
if( $err ){ print ">>> STRUCTURE ERRORS: $err, <<< " ; }
}
print ": $file\n" ;
$toterr += $err ; # total errors
}
sub println {
print join( $SEP, @F ), "\n" ;
}
|