File: esl_quicksort.c

package info (click to toggle)
hmmer 3.2.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 23,380 kB
  • sloc: ansic: 119,305; perl: 8,791; sh: 3,266; makefile: 1,871; python: 598
file content (235 lines) | stat: -rw-r--r-- 7,480 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/* Quicksort.
 * 
 * Differs from stdlib's qsort() in that Easel provides a standardized
 * and reentrant way to sort arbitrary data arrays/structures by
 * sorting an array of unique integer identifiers.
 * 
 * Contents:
 *    1. Quicksort algorithm
 *    2. Unit tests
 *    3. Test driver
 *    4. Example
 */
#include "esl_config.h"

#include "easel.h"
#include "esl_quicksort.h"

static void partition(const void *data, int (*comparison)(const void *data, int o1, int o2), int *ord, int lo, int hi);


/*****************************************************************
 * 1. Quicksort
 *****************************************************************/

/* Function:  esl_quicksort()
 * Synopsis:  Quicksort algorithm.
 * Incept:    SRE, Wed Nov 11 16:34:05 2015
 *
 * Purpose:   <data> points to an array or structure that consists of
 *            <n> elements that we can identify with unique integer
 *            identifiers 0..<n-1>. Sort these elements, using a
 *            <*comparison()> function that returns -1, 0, or 1 when
 *            element <o1> should be sorted before, equal to, or after
 *            element <o2>. Caller provides an allocated array
 *            <sorted_at> to hold the result, allocated for at least
 *            <n> integers. The result in <sorted_at> consists of the
 *            ordered array of identifiers. For example,
 *            <sorted_at[0]> is the index of the best (first) element
 *            in the original <data> array, and <sorted_at[n-1]> is
 *            the id of the worst (last) element; <data[sorted_at[0]]>
 *            and <data[sorted_at[n-1]]> are the best and worst
 *            elements themselves. The original <data> array is 
 *            unaltered.
 *            
 *            Compared to standard <qsort()>, the main advantage of
 *            <esl_qsort()> is that it is reentrant: it passes an
 *            arbitrary <data> pointer to the comparison function, so
 *            it can sort the elements of arbitrary structures or
 *            arrays without having to globally or statically expose
 *            those data. (A reentrant <qsort_r()> function is
 *            available in the libc of some platforms, but is not
 *            standard POSIX ANSI C, so we cannot rely on it.)  The
 *            main disadvantage is that <esl_quicksort()> takes
 *            additional memory (the <sorted_at> array), whereas
 *            <qsort()> sorts in place.
 *            
 * Args:      data         - generic pointer to the data to be sorted.
 *            n            - <data> consists of <n> elements numbered 0..n-1
 *            comparison() - returns -1,0,1 if o1 is better, equal, worse than o2
 *            sorted_at    - sorted array of the data[] idx's 0..n-1.
 *            
 * Returns:   <eslOK> on success.
 */
int
esl_quicksort(const void *data, int n, int (*comparison)(const void *data, int o1, int o2), int *sorted_at)
{
  int i;
  for (i = 0; i < n; i++) sorted_at[i] = i;
  partition(data, comparison, sorted_at, 0, n-1);
  return eslOK;
}



/* 
 * We're sorting a subrange of <ord>, from <ord[lo]..ord[hi]>
 *   Values in <ord> are unique identifiers for <data>.
 *   We're sorting <ord> by data[ord[i]] better than data[ord[i+1]]
 */
static void
partition(const void *data, int (*comparison)(const void *data, int o1, int o2), int *ord, int lo, int hi)
{
  int i = lo;
  int j = hi+1;
  int swap, pivot;

  /* Select pivot position from lo, hi, lo + (hi-lo)/2 by median-of-three */
  if (comparison(data, ord[hi], ord[lo]) < 0) { swap = ord[hi]; ord[hi] = ord[lo]; ord[hi] = swap; }
  pivot = lo + (hi-lo)/2;
  if      (comparison(data, ord[pivot], ord[lo]) < 0) pivot = lo;
  else if (comparison(data, ord[pivot], ord[hi]) > 0) pivot = hi;
  swap = ord[pivot]; ord[pivot] = ord[lo]; ord[lo] = swap;

  /* Partition */
  while (1) 
    {
      do { i++; } while (i <= hi && comparison(data, ord[i], ord[lo]) < 0);
      do { j--; } while (           comparison(data, ord[j], ord[lo]) > 0);

      if (j > i) { swap = ord[j]; ord[j] = ord[i]; ord[i] = swap; }
      else break;
    }
  swap = ord[lo]; ord[lo] = ord[j]; ord[j] = swap;

  /* Recurse, doing the smaller partition first */
  if (j-lo < hi-j) {  
    if (j-lo > 1) partition(data, comparison, ord, lo, j-1);
    if (hi-j > 1) partition(data, comparison, ord, j+1, hi);
  } else {
    if (hi-j > 1) partition(data, comparison, ord, j+1, hi);
    if (j-lo > 1) partition(data, comparison, ord, lo, j-1);
  }
}
  



/*****************************************************************
 * 2. Unit tests
 *****************************************************************/
#ifdef eslQUICKSORT_TESTDRIVE

#include "esl_random.h"

int 
sort_floats_ascending(const void *data, int o1, int o2)
{
  float *x = (float *) data;
  if (x[o1] < x[o2]) return -1;
  if (x[o1] > x[o2]) return 1;
  return 0;
}

static void 
utest_floatsort(ESL_RANDOMNESS *rng, int N, int K)
{
  char   msg[]     = "esl_quicksort: float sort test failed";
  float *x         = malloc(sizeof(float) * N);
  int   *sorted_at = malloc(sizeof(int)   * N);
  int    i,r;

  for (i = 0; i < N; i++)
    x[i] = esl_random(rng) * K;
  esl_quicksort(x, N, sort_floats_ascending, sorted_at);
  
  for (r = 1; r < N; r++)
    if (x[sorted_at[r]] < x[sorted_at[r-1]]) esl_fatal(msg);
       
  free(x);
  free(sorted_at);
}

#endif /*eslQUICKSORT_TESTDRIVE*/

/*****************************************************************
 * 3. Test driver
 *****************************************************************/
#ifdef eslQUICKSORT_TESTDRIVE

#include "easel.h"
#include "esl_getopts.h"
#include "esl_random.h"
#include "esl_quicksort.h"

#include <stdio.h>

static ESL_OPTIONS options[] = {
   /* name  type         default  env   range togs  reqs  incomp  help                docgrp */
  {"-h",  eslARG_NONE,    FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage",           0},
  {"-s",  eslARG_INT,       "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0},

  { 0,0,0,0,0,0,0,0,0,0},
};
static char usage[]  = "[-options]";
static char banner[] = "test driver for quicksort module";

int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go  = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
  int             N   = 100;
  int             K   = 10;

  utest_floatsort(rng, N, K);
  
  esl_randomness_Destroy(rng);
  esl_getopts_Destroy(go);
  return 0;
}
#endif /*eslQUICKSORT_TESTDRIVE*/


/*****************************************************************
 * 4. Example
 *****************************************************************/
#ifdef eslQUICKSORT_EXAMPLE

#include "easel.h"
#include "esl_random.h"
#include "esl_quicksort.h"

int 
sort_floats_descending(const void *data, int o1, int o2)
{
  float *x = (float *) data;
  if (x[o1] > x[o2]) return -1;
  if (x[o1] < x[o2]) return 1;
  return 0;
}

int
main(int argc, char **argv)
{
  ESL_RANDOMNESS *rng = esl_randomness_Create(0);
  int    N            = 100;
  float  K            = 10.0;        
  float *x            = malloc(sizeof(float) * N);
  int   *sorted_at    = malloc(sizeof(int)   * N);
  int    i,r;
  
  for (i = 0; i < N; i++)
    x[i] = esl_random(rng) * K;
  
  esl_quicksort(x, N, sort_floats_descending, sorted_at);

  for (r = 0; r < N; r++)
    printf("%.4f\n", x[sorted_at[r]]);

  return 0;
}

#endif /*eslQUICKSORT_EXAMPLE*/