File: rmcolumn

package info (click to toggle)
nosql 3.1-4
  • links: PTS
  • area: main
  • in suites: woody
  • size: 1,448 kB
  • ctags: 267
  • sloc: cpp: 1,028; ansic: 915; awk: 732; perl: 502; tcl: 292; sh: 289; makefile: 44
file content (121 lines) | stat: -rwxr-xr-x 3,405 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/mawk -We
# *********************************************************************
#  Written by and copyright Carlo Strozzi <carlos@linux.it>.
#
#  rmcolumn: remove selected columns from a NoSQL table.
#  Copyright (C) 1998-2001 Carlo Strozzi <carlos@linux.it>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  2001-01-03 Ported to NoSQL v3
#  2001-04-17 Added inline help
#  2001-08-17 Added stdio portability
#
#  $Id$
# *********************************************************************

BEGIN {
  NULL = ""; FS = OFS = "\t"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-N" || ARGV[i] == "--no-header") no_hdr = 1
    else if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
    else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' @DOCPATH@/rmcolumn.txt")
       rc = 1
       exit(rc)
    }
    else command_cols[++j] = ARGV[i]
  }

  ARGC = 1					# Fix argv[]

  if (o_file == NULL) o_file = "@STDOUT@"
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }
}

NR == 1 {
  # Start building the back-end awk program.

  awkpgm = "'BEGIN{FS=OFS=\"\\t\";}"

  # Load the column position array.
  while (++p <= NF) {
    # Make sure we pick the first occurrence of duplicated column
    # names (it may happen after a join).

    if (P[$p] == NULL) { P[$p] = p; N[p] = $p; good_cols++ }
  }

  # Now remove unwanted columns.
  for (i in command_cols) {
    if (P[command_cols[i]] != NULL) {
      delete P[command_cols[i]]
      # Exit if all columns have been removed.
      if (--good_cols == 0)  exit
    }
  }

  # Build the list of columns, in awk format.
  for (i = 1; i <= NF; i++) {
    if (P[$i] != NULL) {
      out_rec = out_rec OFS $i
      fieldlist = fieldlist $i "=$(" P[$i] ");"
      out_list = out_list "," $i
    }
  }

  # Remove leading extra comma from out_list.
  sub(/^,/, NULL, out_list)

  # Remove leading extra OFS from out_rec, then print header and dashline.
  if (!no_hdr) {
    sub(/^\t/, "", out_rec); print out_rec > o_file
    gsub(/[^\t]/, "-", out_rec); print out_rec > o_file
  }

  # Make sure the header is printed before calling mawk(1) again.
  fflush()

  # Close output file before the back-end mawk(1) appends to it.
  if (o_file != "@STDOUT@") close(o_file)

  awkpgm = awkpgm "{" fieldlist "print " out_list ";}'"

  unix_cmd = "mawk " awkpgm
  next
}

# Dashline
NR == 2 {
  # Let's save one concurrent process and a couple of msec.
  unix_cmd = "exec " unix_cmd

  if (o_file != "@STDOUT@") unix_cmd = unix_cmd " >> " o_file

  next
}

{ print | unix_cmd }

END {
   if (rc) exit(rc)
   exit(close(unix_cmd))	  # Return back-end mawk(1) exit status
}

#
# End of program
#