File: ddawk

package info (click to toggle)
nosql 3.1-4
links: PTS
area: main
in suites: woody
size: 1,448 kB
ctags: 267
sloc: cpp: 1,028; ansic: 915; awk: 732; perl: 502; tcl: 292; sh: 289; makefile: 44
file content (148 lines) | stat: -rwxr-xr-x 4,780 bytes
#!/usr/bin/awk -f
#****************************************************************************
#  ##   ##         #####   #####  ##     **       NoSQL RDBMS - ddawk       *
#  ###  ##        ####### ####### ##     **        $Revision: 2.4.0 $       *
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **   Carlo Strozzi (c) 1998-2000   *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Written by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
#       to print, pipe through 'pr -t -e8'
#****************************************************************************
#
# NAME
#        ddawk - print column positions in the requested order
#
# SYNOPSIS
#        ddawk < table
#
#        Note: options must be passed through the environment
#	 variable _awk_args, i.e.:
#
#        _awk_args='[options] [-H|--header file] column ...'
#
#
# DESCRIPTION
#
#     Selects columns by name (and order) and outputs the list of their
#     respective positions in the table, in a format suitable for using
#     by the awk(1) utility. If a column name does not match any of the
#     columns in table it is silently ignored. If no columns are
#     specified, then the current sequence of fields as found in the
#     table header is printed.
#
# OPTIONS
#      -l|--last
#          If the input table contains duplicated column names
#          pick the last occurrence of each. The default is to
#          pick the first one. This is sometimes useful after
#          the 'join' operator.
#
#     -e|--eval
#          The output string is to be eval'ed by the invoking shell
#          program. The output field list is enclosed in double quotes
#          and any '$' signs in it are prepended with a backslash.
#
#     -a|--add-missing
#          If a column name does not match any of the columns in table,
#          instead of ignoring it print a non existent field, i.e. a
#          field with position NF+1.
#
#     -H|--header file
#          Specifies a file where to store the header information
#          corresponding to the requested columns.
#          Warning: if 'file' already exists it is overwritten.
#
########################################################################

BEGIN {
  NULL = "" ; FS = OFS = "\t"; split( ENVIRON["_awk_args"], args, " " )

  while ( args[++i] != NULL )
  {
    if ( args[i] == "-e" || args[i] == "--eval" ) eval = 1
    else if ( args[i] == "-l" || args[i] == "--last" ) pick_last = 1
    else if ( args[i] == "-a" || args[i] == "--add-missing" )
    {
      add_missing = 1
    }
    else if ( args[i] == "-H" || args[i] == "--header" )
    {
      o_file = args[++i]
    }
    else cols[++j] = args[i]
  }

  # This is necessary only if using ARGV.
  #j-- ; ARGC = 0
}

########################################################################
# Main loop
########################################################################

NR == 1 {
  nf = NF
  i = 0
  while ( ++i <= NF )
  {
    if ( pick_last ) { P[$i] = i ; N[i] = $i }
    else
    {
      if ( ! P[$i] ) { P[$i] = i ; N[i] = $i }
    }
  }

  # If no columns were specified, then print all column positions.
  if ( !j )
  {
    for ( j = 1; j <= NF; j++ ) cols[j] = N[j]
    j--
  }

  i = 0
  while ( ++i <= j )
  {
    if ( !P[cols[i]] )
    {
      if ( !add_missing ) continue
      P[cols[i]] = ++nf
    }

    if ( k ) out = out sprintf(",")
    out = out sprintf("$(%d)", P[cols[i]] )
    k++
  }
  if ( k ) 
  { 
    if ( eval )
    {
      gsub(/\$/,"\\$",out) ; out = "\"" out "\""
    }
    printf("%s", out)
  }

  # Write the output header file, if any.
  if ( k && o_file != NULL )
  {
    i = 1
    o_names = sprintf("%s", cols[i])
    while ( cols[++i] != NULL ) o_names = o_names sprintf("%c%s", OFS, cols[i])
    o_dash = o_names ; gsub( /[^\t]/, "-", o_dash )
    printf("%s\n%s\n", o_names, o_dash) > o_file
  }
}

NR > 1 { exit }		# Skip the rest of the input table.

########################################################################
# End of program.
########################################################################