| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 
 | #ifndef ALGORITHMS_SORTING_KARKKAINEN_HPP_
#define ALGORITHMS_SORTING_KARKKAINEN_HPP_
#include <pbdata/DNASequence.hpp>
inline bool leq(DNALength a1, DNALength a2, DNALength b1, DNALength b2)  // lexicographic order
{
    return ((a1 < b1) || ((a1 == b1) && (a2 <= b2)));
}  // for pairs
inline bool leq(DNALength a1, DNALength a2, DNALength a3, DNALength b1, DNALength b2, DNALength b3)
{
    return ((a1 < b1) || ((a1 == b1) && leq(a2, a3, b2, b3)));
}  // and triples
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
template <typename T_R>
void radixPass(DNALength* a, DNALength* b, T_R* r, DNALength n, DNALength K)
{                                         // count occurrences
    DNALength* c = new DNALength[K + 1];  // counter array
    for (DNALength i = 0; i <= K; i++)
        c[i] = 0;  // reset counters
    for (DNALength i = 0; i < n; i++)
        c[r[a[i]]]++;  // count occurrences
    for (DNALength i = 0, sum = 0; i <= K; i++) {  // exclusive prefix sums
        DNALength t = c[i];
        c[i] = sum;
        sum += t;
    }
    for (DNALength i = 0; i < n; i++)
        b[c[r[a[i]]]++] = a[i];  // sort
    delete[] c;
}
// find the suffix array SA of T[0..n-1] in {1..K}^n
// require T[n]=T[n+1]=T[n+2]=0, n>=2
template <typename T_T>
void KarkkainenBuildSuffixArray(T_T* T, DNALength* SA, DNALength n, int K)
{
    DNALength n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2;
    DNALength* R = new DNALength[n02 + 3];
    R[n02] = R[n02 + 1] = R[n02 + 2] = 0;
    DNALength* SA12 = new DNALength[n02 + 3];
    SA12[n02] = SA12[n02 + 1] = SA12[n02 + 2] = 0;
    DNALength* R0 = new DNALength[n0];
    DNALength* SA0 = new DNALength[n0];
    //******* Step 0: Construct sample ********
    // generate positions of mod 1 and mod 2 suffixes
    // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
    for (DNALength i = 0; i < n02 + 3; i++) {
        R[i] = 0;
    }
    for (DNALength i = 0, j = 0; i < n + (n0 - n1); i++)
        if (i % 3 != 0) R[j++] = i;
    //******* Step 1: Sort sample suffixes ********
    // lsb radix sort the mod 1 and mod 2 triples
    radixPass(R, SA12, T + 2, n02, K);
    radixPass(SA12, R, T + 1, n02, K);
    radixPass(R, SA12, T, n02, K);
    // find lexicographic names of triples and
    // write them to correct places in R
    T_T name = 0, c0 = -1, c1 = -1, c2 = -1;
    for (DNALength i = 0; i < n02; i++) {
        if (T[SA12[i]] != c0 || T[SA12[i] + 1] != c1 || T[SA12[i] + 2] != c2) {
            name++;
            c0 = T[SA12[i]];
            c1 = T[SA12[i] + 1];
            c2 = T[SA12[i] + 2];
        }
        if (SA12[i] % 3 == 1) {
            R[SA12[i] / 3] = name;
        }  // write to R1
        else {
            R[SA12[i] / 3 + n0] = name;
        }  // write to R2
    }
    // recurse if names are not yet unique
    if (static_cast<DNALength>(name) < n02) {
        KarkkainenBuildSuffixArray<DNALength>(R, SA12, n02, name);
        // store unique names in R using the suffix array
        for (DNALength i = 0; i < n02; i++)
            R[SA12[i]] = i + 1;
    } else {  // generate the suffix array of R directly
        for (DNALength i = 0; i < n02; i++)
            SA12[R[i] - 1] = i;
    }
    //******* Step 2: Sort nonsample suffixes ********
    // stably sort the mod 0 suffixes from SA12 by their first character
    for (DNALength i = 0, j = 0; i < n02; i++)
        if (SA12[i] < n0) R0[j++] = 3 * SA12[i];
    radixPass(R0, SA0, T, n0, K);
    //******* Step 3: Merge ********
    // merge sorted SA0 suffixes and sorted SA12 suffixes
    for (DNALength p = 0, t = n0 - n1, k = 0; k < n; k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
        DNALength i = GetI();  // pos of current offset 12 suffix
        DNALength j = SA0[p];  // pos of current offset 0 suffix
        if (SA12[t] < n0 ?     // different compares for mod 1 and mod 2 suffixes
                leq(T[i], R[SA12[t] + n0], T[j], R[j / 3])
                         : leq(T[i], T[i + 1], R[SA12[t] - n0 + 1], T[j], T[j + 1],
                               R[j / 3 + n0])) {  // suffix from SA12 is smaller
            SA[k] = i;
            t++;
            if (t == n02)  // done --- only SA0 suffixes left
                for (k++; p < n0; p++, k++)
                    SA[k] = SA0[p];
        } else {  // suffix from SA0 is smaller
            SA[k] = j;
            p++;
            if (p == n0)  // done --- only SA12 suffixes left
                for (k++; t < n02; t++, k++)
                    SA[k] = GetI();
        }
    }
    delete[] R;
    delete[] SA12;
    delete[] SA0;
    delete[] R0;
}
#endif
 |