File: levenshtein.py

package info (click to toggle)
sagemath 9.5-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid
  • size: 144,264 kB
  • sloc: python: 1,113,444; cpp: 37,499; ansic: 4,606; sh: 4,105; makefile: 1,097; javascript: 326; lisp: 5
file content (98 lines) | stat: -rw-r--r-- 2,963 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# -*- coding: utf-8 -*-
u"""
Levenshtein Distance

The Levenshtein distance between two words is the minimal number of
edits that turn one word into the other. Here, "edit" means a
single-letter addition, single-letter deletion, or exchange of a
letter with another letter.

http://en.wikipedia.org/wiki/Levenshtein_distance

EXAMPLES::

    >>> from sage_bootstrap.levenshtein import Levenshtein
    >>> Levenshtein(5)(u'Queensryche', u'Queensrÿche')
    1
"""


#*****************************************************************************
#       Copyright (C) 2015 Volker Braun <vbraun.name@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#                  http://www.gnu.org/licenses/
#*****************************************************************************


class DistanceExceeded(Exception):
    pass


class Levenshtein(object):

    def __init__(self, limit):
        """
        Levenshtein Distance with Maximum Distance Cutoff
        
        Args:
            limit (int): if the distance exceeds the limit, a
                :class:`DistanceExceeded` is raised and the
                computation is aborted.

        EXAMPLES::

            >>> from sage_bootstrap.levenshtein import Levenshtein
            >>> lev3 = Levenshtein(3)
            >>> lev3(u'saturday', u'sunday')
            3
            >>> lev3(u'kitten', u'sitting')
            3
            >>> lev2 = Levenshtein(2)
            >>> lev2(u'kitten', u'sitting')
            Traceback (most recent call last):
            ...
            DistanceExceeded
        """
        self._limit = limit
        
    def __call__(self, a, b):
        """
        calculate the levenshtein distance

        args: 
            a,b (str): the two strings to compare

        returns:
            int: the Levenshtein distance if it is less or equal to
                the distance limit.

        Example::

            >>> from app.scoring.levenshtein import Levenshtein
            >>> lev3 = Levenshtein(3)
            >>> lev3(u'Saturday', u'Sunday')
            3
        """
        n, m = len(a), len(b)
        if n > m:
            # Optimization to use O(min(n,m)) space
            a, b, n, m = b, a, m, n
        curr = range(n+1)
        for i in range(1, m+1):
            prev, curr = curr, [i]+[0]*n
            for j in range(1, n+1):
                cost_add, cost_del = prev[j]+1, curr[j-1]+1
                cost_change = prev[j-1]
                if a[j-1] != b[i-1]:
                    cost_change += 1
                curr[j] = min(cost_add, cost_del, cost_change)
            if min(curr) > self._limit:
                raise DistanceExceeded
        if curr[n] > self._limit:
            raise DistanceExceeded
        return curr[n]