File: gbfilternear.c

package info (click to toggle)
gbutils 6.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,508 kB
  • sloc: ansic: 24,219; sh: 4,723; makefile: 155
file content (147 lines) | stat: -rw-r--r-- 4,204 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
  gbfilternear (ver. 5.6) -- Filter too near data point in Euclidean metric
  Copyright (C) 2008-2018 Giulio Bottazzi

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License
  (version 2) as published by the Free Software Foundation;
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

#include "tools.h"

int main(int argc,char* argv[]){

  /* data storage variables */
  size_t rows=0,columns=0;
  double **vals=NULL;

  /* minimum distance */
  double mindist = 1;
  char *deleted;

  /* options */
  int o_verbose=0;

  char *splitstring = strdup(" \t");
  
  /* variables for reading command line options */
  /* ------------------------------------------ */
  int opt;
  /* ------------------------------------------ */

  /* COMMAND LINE PROCESSING */    
  while((opt=getopt_long(argc,argv,"vhF:d:",gb_long_options, &gb_option_index))!=EOF){
    if(opt==0){
      gbutils_header(argv[0],stdout);
      exit(0);
    }
    else if(opt=='?'){
      fprintf(stderr,"option %c not recognized\n",optopt);
      exit(-1);
    }
    else if(opt=='h'){
      /*print help*/
      fprintf(stdout,"Filter out too near points. Each row represents Cartesian coordinates\n");
      fprintf(stdout,"of a point in an Euclidean space, whose dimension is set by the number\n");
      fprintf(stdout,"of columns. Rows are removed which are nearer than a minimal distance\n");
      fprintf(stdout,"set with the option '-d'. The order is relevant as first entries are\n");
      fprintf(stdout,"the last to be removed.\n");
      fprintf(stdout,"\nUsage: %s [options]\n\n",argv[0]);
      fprintf(stdout,"Options:\n");
      fprintf(stdout," -d  minimal allowed distance (default 1)\n");
      fprintf(stdout," -F  specify the input fields separators (default \" \\t\")\n");
      fprintf(stdout," -h  this help\n");
      fprintf(stdout," -v  verbose mode\n");
      exit(0);
    }
    else if(opt=='F'){
      /*set the fields separator string*/
      free(splitstring);
      splitstring = strdup(optarg);
    }
    else if(opt=='d'){
      /*set the number of quantiles*/
       mindist=atof(optarg);
    }
    else if(opt=='v'){
      /*set the verbose mode*/
      o_verbose=1;
    }
  }    
  /* END OF COMMAND LINE PROCESSING */

  /* initialize global variables */
  initialize_program(argv[0]);

  /* read data vals[col][row] */
  loadtable(&vals,&rows,&columns,0,splitstring);

  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
  if(o_verbose){
    fprintf(stderr,"loaded points:      %zd\n",rows);
    fprintf(stderr,"dimension of space: %zd\n",columns);
  }
  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */


  /* allocate space for array of deleted entries */
  deleted = (char *) my_calloc(rows,sizeof(char));

  /* compute deleted entries */
  {
    size_t i,j,h;
    double distance;

    for(i=0;i<rows;i++){
      if(deleted[i]==1) continue;
      for(j=i+1;j<rows;j++)
	{
	  if(deleted[j]==1) continue;
	  distance = 0;
	  for(h=0;h<columns;h++)
	    distance += pow(vals[h][i] - vals[h][j],2);
	  distance=sqrt(distance);
	  if(distance < mindist) deleted[j]=1;
  	}
    }

  }

  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
  if(o_verbose){
    size_t i,delnum=0;

    for(i=0;i<rows;i++)
      if(deleted[i]==1) delnum++;

    fprintf(stderr,"minimum radius:     %g\n",mindist);
    fprintf(stderr,"deleted entries:    %zd\n",delnum);

  }
  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */

  /* print remaining entries */
  {
    size_t i,j;

    for(i=0;i<rows;i++)
      if(deleted[i]==0){
	for(j=0;j<columns-1;j++)
	  printf(FLOAT_SEP,vals[j][i]);
	printf(FLOAT_NL,vals[columns-1][i]);
      }
  }


  return 0;

}