File: cluster.h

package info (click to toggle)
cluster 2.0.7-1-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,496 kB
  • sloc: ansic: 2,981; fortran: 123; sh: 18; makefile: 2
file content (197 lines) | stat: -rw-r--r-- 8,122 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/* Declare everything, Fortran & C -- so we can register them */

#include <R.h>
#include <Rinternals.h>
/* -> Rconfig.h, but also Boolean.h RS.h */

#ifdef ENABLE_NLS
#include <libintl.h>
#define _(String) dgettext ("cluster", String)
#else
#define _(String) (String)
#endif

// These codes must match those in ../R/clara.q  <==>  'diss_kind'
typedef enum {
    EUCLIDEAN = 1,
    MANHATTAN = 2,
    JACCARD = 3
} DISS_KIND;

/* --------- ./clara.c ------------------*/

double randm(int *nrun);

void cl_clara(int *n,  /* = number of objects */
	      int *jpp,/* = number of variables */
	      int *kk, /* = number of clusters, 1 <= kk <= n-1 */
	      double *x,	/* Input:  the data x[n, jpp] _rowwise_ (transposed)
				 * Output: the first `n' values are the `clustering'
				 *	   (integers in 1,2,..,kk) */
	      int *nran,	/* = #{random samples} drawn	   (= `samples' in R)*/
	      int *nsam,	/* = #{objects} drawn from data set (`sampsize' in R) */
	      double *dys,/* [1:(1 + (nsam * (nsam - 1))/2)]
			   * Output: to contain the distances */
	      int *mdata,	/*= {0,1}; 1: min(x) is missing value (NA);  0: no NA */
	      double *valmd,/*[j]= missing value code (instead of NA) for x[,j]*/
	      int *jtmd,	/* [j]= {-1,1};	 -1: x[,j] has NA; 1: no NAs in x[,j] */
	      DISS_KIND *diss_kind, // = {EUCLIDEAN, MANHATTAN, JACCARD}
	      int/*logical*/ *rng_R,/*= {0,1};  0 : use clara's internal weak RNG;
				     *	        1 : use R's RNG (and seed) */
	      int/*logical*/ *pam_like,/* if (1), we do "swap()" as in pam(), otherwise
					  use the code as it was in clara() "forever"
					  upto 2011-04 */
	      int *correct_d,/* option for dist.computation: if (0), use the "fishy"
			     formula to update distances in the NA-case,
			     if (1), use a dysta2()-compatible formula */
	      int *nrepr, /* logical (0/1): 1 = "is representative object"  */
	      int *nsel,
	      int *nbest,/* x[nbest[j],] : the j-th obs in the final sample */
	      int *nr, int *nrx,/* prov. and final "medoids" aka representatives */
	      double *radus, double *ttd, double *ratt,
	      double *ttbes, double *rdbes, double *rabes,
	      int *mtt, double *obj,
	      double *avsyl, double *ttsyl, double *sylinf,
	      int *jstop, int *trace_lev,
	      double *tmp, /* = double [ 3 * nsam ] */
	      int *itmp	/* = integer[ 6 * nsam ] */
    );

void dysta2(int nsam, int jpp, int *nsel,
	    double *x, int n, double *dys, DISS_KIND diss_kind,
	    int *jtmd, double *valmd, Rboolean has_NA, Rboolean *toomany_NA);


void bswap2(int kk, int nsam, double s, const double dys[],
	    Rboolean pam_like, int trace_lev,
	    // result:
	    double *sky, int *nrepr,
	    double *dysma, double *dysmb, double *beter);

void selec(int kk, int n, int jpp, DISS_KIND diss_kind,
	   double *zb, int nsam, Rboolean has_NA, int *jtmd, double *valmd,
	   int trace_lev,
	   int *nrepr, int *nsel, double *dys, double *x, int *nr,
	   Rboolean *nafs, double *ttd, double *radus, double *ratt,
	   int *nrnew, int *nsnew, int *npnew, int *ns, int *np, int *new,
	   double *ttnew, double *rdnew, int correct_d);

void resul(int kk, int n, int jpp, DISS_KIND diss_kind, Rboolean has_NA,
	   int *jtmd, double *valmd, double *x, int *nrx, int *mtt, int correct_d);

void black(int kk, int jpp, int nsam, int *nbest,
	   double *dys, double s, double *x,
	   /* --> Output : */
	   double *avsyl, double *ttsyl, double *sylinf,
	   int *ncluv, int *nsend, int *nelem, int *negbr,
	   double *syl, double *srank);

/* -------- ./dysta.f --- (was in pam.f) -------------------- */
int F77_NAME(dysta)(int *nn, int *jpp, double *x, double *dys, int *ndyst,
		    int *jtmd, double *valmd, int *jhalt);
/* --------- ./pam.c ------------------*/

#ifdef _UNUSED_C_pam
void cl_pam(int *nn, int *jpp, int *kk, double *x, double *dys,
	    int *jdyss, /* jdyss = 0 : compute distances from x
			 *	      = 1 : distances provided	in x */
	    double *valmd, int *jtmd,
	    int *ndyst, int *nsend, int *nrepr, int *nelem,
	    double *radus, double *damer, double *ttd, double *separ,
	    double *ttsyl, double *obj, int *med, int *ncluv,
	    double *clusinf, double *sylinf, int *nisol, int* optim);
#endif

SEXP cl_Pam(SEXP k_, SEXP n_,
	    SEXP do_diss_, /* == !diss;  if true, compute distances from x (= x_or_diss);
			      otherwise distances provided by x_or_diss */
	    SEXP x_or_diss,// this "is"  if(do_diss) "x[]" (n x p) else "dys[]"
	    SEXP all_stats_, // all_stats == !cluster.only
	    SEXP medoids, // NULL or integer(k) subset {1:n}
	    SEXP do_swap_, SEXP trace_lev_,
	    SEXP keep_diss_, SEXP pam_once_,

	    // the next 3 are only needed  if(do_diss)
	    SEXP val_md, SEXP j_md, // "md" := [m]issing [d]ata
	    SEXP dist_kind); // = 1 ("euclidean")  or 2 ("manhattan")

void bswap(int kk, int nsam, int *nrepr,
	   /* nrepr[]: here is boolean (0/1): 1 = "is representative object"  */
	   Rboolean med_given, Rboolean do_swap, int trace_lev,
	   double *dysma, double *dysmb, double *beter,
	   const double dys[], double s, double *obj, int pamonce);

void cstat(int kk, int nn, int *nsend, int *nrepr, Rboolean all_stats,
	   double *radus, double *damer, double *ttd, double *separ, double *s,
	   double *dys, int *ncluv, int *nelem, int *med, int *nisol);

void dark(int kk, int nn, const int ncluv[], const double dys[], double s,
	  int *nsend, int *nelem, int *negbr,
	  double *syl, double *srank, double *avsyl, double *ttsyl,
	  double *sylinf);


/* --------- ./spannel.c ------------------*/

void cl_sweep(double *, int *, int *, int *, double *);

void spannel(int *ncas, /* = number of objects */
	     int *ndep, /* = number of variables */
	     double *dat,/* [ncas, 0:ndep] */
	     double *dstopt, /* = squared distances [1:ncas] */
	     double *cov,/* matrix [0:ndep, 0:ndep] */
	     double *varsum,	/* [1:ndep] */
	     double *varss,	/* [1:ndep] */
	     double *prob,	/* [1:ncas] */
	     double *work,	/* [0:ndep] */
	     double *eps,
	     int *maxit, /* = maximal # iterations (and returns #{iter.})*/
	     int *ierr);

void sildist(double *d,		/* distance : in matrix or dist format; i.e.,
				   of length n^2 or n*(n-1)/2; see 'ismat' */
	     int    *n,		/* number of Subjects (attr(d,'Size')) */
	     int    *clustering,/* clustering vector, values from {1..k} */
	     int    *k,		/* number of clusters */
	     double *diC,	/* diC */
	     int    *counts,	/* counts[k] :=  #{cluster k} */
	     double *si,	/* (a_i - b_i) / max(ai,bi) */
	     int    *neighbor,	/* neighbor */
	     int    *ismat);	/* boolean : is 'd' a matrix or 'dist' ? */

void cl_fanny(int *nn, int *jpp, int *kk,
	      double *x, double *dss, int *jdyss, double *valmd,
	      int *jtmd, int *ndyst, int *nsend, int *nelem,
	      int *negbr, double *syl, double *p, double *dp,
	      double *pt, int *nfuzz, double *esp, double *ef,
	      double *dvec, double *ttsyl, double *obj,
	      int *ncluv, double *sylinf, double *r, double *tol, int *maxit);


/* ================= Fortran things (remainder) ======================== */

/* -------- ./daisy.f ---------------------------------- */
int F77_NAME(cldaisy)(int *nn, int *jpp, double *x,
		       double *valmd, int *jtmd, int *jdat, int *vtype,
		       int *ndyst, int *mdata, double *disv);

/* -------- ./fanny.c ---------------------------------- */
/* R-level: called only from ../tests/dysta-ex.R  (now via .C()): */
void dysta3(int *nn, int *p, double *x, double *dys,
	    int *ndyst, int *jtmd, double *valmd, int *jhalt);

/* -------- ./mona.f ---------------------------------- */
int F77_NAME(clmona)(int *nn, int *pp, int *x, int *jerr,
		      int *nban, int *ner, int *kwan, int *lava, int *jlack);

/* -------- ./twins.c ---------------------------------- */
void R_bncoef(int *nn, double *ban, double *cf);
double bncoef(int  nn, double *ban);

void twins(int *nn, int *jpp, double *x,
	   double *dys, double *dys2, int *jdyss, double *valmd,
	   int *jtmd, int *ndyst, int *jalg, int *method,
	   int *kwan, int *ner, double *ban, double *coef,
	   double *alpha, int *merge, int *trace_lev);