1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
#ifndef __aspeller_leditdist_hh__
#define __aspeller_leditdist_hh__
#include "weights.hpp"
namespace aspeller {
// limit_edit_distance finds the shortest edit distance but will
// stop and return a number at least as large as LARGE_NUM if it has
// to do more edits than a set limit.
// Note that this does NOT mean that the score returned is <= limit*w.max
// as "sub" vs "submarine" will return 6*(cost of insertion) no matter what
// the limit is.
// The edit distance is
// (cost of swap)(# of swaps) + (cost of deletion)(# of deletions)
// + (cost of insertion)(# of insertions)
// + (cost of substitutions)(# of substitutions)
// Preconditions:
// max(strlen(a), strlen(b))*max(of the edit weights) <= 2^15
// if violated than an incorrect result may be returned (which may be negative)
// due to overflow of a short integer
// (limit+1)*w.min < limit*w.max
// limit <= 5 (use edit_distance if limit > 5)
// where w.min and w.max is the minimum and maximum cost of an edit
// respectfully.
// The running time is asymptotically bounded above by
// (3^l)*n where l is the limit and n is the maximum of strlen(a),strlen(b)
// Based on my informal tests, however, the n does not really matter
// and the running time is more like (3^l).
// limit_edit_distance, based on my informal tests, turns out to be
// faster than edit_dist for l < 5. For l == 5 it is about the
// smaller for short strings (<= 5) and less than for longer strings
// limit2_edit_distance(a,b,w) = limit_edit_distance(a,b,2,w)
// but is roughly 2/3's faster
struct EditDist {
int score;
const char * stopped_at;
EditDist() {}
EditDist(int s, const char * p)
: score(s), stopped_at(p) {}
operator int () const {return score;}
};
static const int LARGE_NUM = 0xFFFFF;
// this needs to be SMALLER than INT_MAX since it may be incremented
// a few times
int limit_edit_distance(const char * a, const char * b, int limit,
const EditDistanceWeights & w
= EditDistanceWeights());
EditDist limit0_edit_distance(const char * a, const char * b,
const EditDistanceWeights & w
= EditDistanceWeights());
EditDist limit1_edit_distance(const char * a, const char * b,
const EditDistanceWeights & w
= EditDistanceWeights());
EditDist limit2_edit_distance(const char * a, const char * b,
const EditDistanceWeights & w
= EditDistanceWeights());
}
#endif
|