File: editdistance.m

package info (click to toggle)
octave-strings 1.0.5-1
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 452 kB
  • ctags: 29
  • sloc: makefile: 79; cpp: 67; sh: 25
file content (82 lines) | stat: -rw-r--r-- 2,569 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
## Copyright (C) 2006 Muthiah Annamalai <muthiah.annamalai@uta.edu>
## 
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; If not, see <http://www.gnu.org/licenses/>.
##

## -*- texinfo -*-
## @deftypefn {Function File} [@var{dist},@var{L}] = {} editdistance(@var{string1},@var{string2},@var{weights})
## computes the Levenshtein edit distance between the two strings.
## @var{string1} and @var{string2}. This operation is symmetrical.
## The optional argument @var{weights} specifies weights for the
## deletion, matched, and insertion operations; by default it is set to
## +1, 0, +1 respectively, so that a least editdistance means a 
## closer match between the two strings. This function implements
## the Levenshtein edit distance as presented in Wikipedia article,
## accessed Nov 2006. Also the levenshtein edit distance of a string
## with an empty string is defined to be its length.
## 
## The default return value is @var{dist} the edit distance, and
## the other return value  @var{L} is the distance matrix.
##
## @example
## @group  
##          editdistance('marry','marie') 
##          ##returns value +2 for the distance.
## @end group
## @end example
##
## @end deftypefn
##

function [dist,L]=editdistance(str1,str2,weights)
    if(nargin < 2 || (nargin == 3 && length(weights)  < 3) )
      print_usage();
    end
    
    L1=length(str1)+1;
    L2=length(str2)+1;
    L=zeros(L1,L2);
    
    if(nargin < 3)
      g=+1;%insertion
      m=+0;%match
      d=+1;%deletion
    else
      g=weights(1);
      m=weights(2);
      d=weights(3);
    end
    

    L(:,1)=[0:L1-1]'*g;
    L(1,:)=[0:L2-1]*g;
    
    m4=0;
    for idx=2:L1;
        for idy=2:L2
            if(str1(idx-1)==str2(idy-1))
                score=m;
            else
                score=d;
            end            
            m1=L(idx-1,idy-1) + score;
            m2=L(idx-1,idy) + g;
            m3=L(idx,idy-1) + g;
            L(idx,idy)=min(m1,min(m2,m3));
        end
    end
    
    dist=L(L1,L2);
    return
end