File: SimpleEditDistance.h

package info (click to toggle)
libedlib 1.2.7-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,520 kB
  • sloc: cpp: 2,002; sh: 304; python: 131; makefile: 89; ansic: 7
file content (114 lines) | stat: -rw-r--r-- 3,133 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#ifndef SIMPLE_EDIT_DISTANCE_H
#define SIMPLE_EDIT_DISTANCE_H

#include <algorithm>
#include <cstdio>
#include <vector>
#include "edlib.h"

using namespace std;

#ifdef __cplusplus
extern "C" {
#endif


int min(int x, int y) {
    return x < y ? x : y;
}

int min3(int x, int y, int z) {
    return min(x, min(y, z));
}

int calcEditDistanceSimple(const char* query, int queryLength,
                           const char* target, int targetLength,
                           const EdlibAlignMode mode, int* score,
                           int** positions_, int* numPositions_) {
    int bestScore = -1;
    vector<int> positions;

    // Handle as a special situation when one of the sequences has length 0.
    if (queryLength == 0 || targetLength == 0) {
        if (mode == EDLIB_MODE_NW) {
            *score = std::max(queryLength, targetLength);
            *positions_ = new int[1];
            *positions_[0] = targetLength - 1;
            *numPositions_ = 1;
        } else if (mode == EDLIB_MODE_SHW || mode == EDLIB_MODE_HW) {
            *score = queryLength;
            *positions_ = new int[1];
            *positions_[0] = -1;
            *numPositions_ = 1;
        } else {
            return EDLIB_STATUS_ERROR;
        }
        return EDLIB_STATUS_OK;
    }

    int* C = new int[queryLength];
    int* newC = new int[queryLength];

    // set first column (column zero)
    for (int i = 0; i < queryLength; i++) {
        C[i] = i + 1;
    }
    /*
    for (int i = 0; i < queryLength; i++)
        printf("%3d ", C[i]);
    printf("\n");
    */
    for (int c = 0; c < targetLength; c++) { // for each column
        newC[0] = min3((mode == EDLIB_MODE_HW ? 0 : c + 1) + 1, // up
                       (mode == EDLIB_MODE_HW ? 0 : c)
                       + (target[c] == query[0] ? 0 : 1), // up left
                       C[0] + 1); // left
        for (int r = 1; r < queryLength; r++) {
            newC[r] = min3(newC[r-1] + 1, // up
                           C[r-1] + (target[c] == query[r] ? 0 : 1), // up left
                           C[r] + 1); // left
        }

        /*  for (int i = 0; i < queryLength; i++)
            printf("%3d ", newC[i]);
            printf("\n");*/

        if (mode != EDLIB_MODE_NW || c == targetLength - 1) { // For NW check only last column
            int newScore = newC[queryLength - 1];
            if (bestScore == -1 || newScore <= bestScore) {
                if (newScore < bestScore) {
                    positions.clear();
                }
                bestScore = newScore;
                positions.push_back(c);
            }
        }

        int *tmp = C;
        C = newC;
        newC = tmp;
    }

    delete[] C;
    delete[] newC;

    *score = bestScore;
    if (positions.size() > 0) {
        *positions_ = new int[positions.size()];
        *numPositions_ = static_cast<int>(positions.size());
        copy(positions.begin(), positions.end(), *positions_);
    } else {
        *positions_ = NULL;
        *numPositions_ = 0;
    }

    return EDLIB_STATUS_OK;
}



#ifdef __cplusplus
}
#endif

#endif // SIMPLE_EDIT_DISTANCE_H