1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
<?xml version="1.0" encoding="utf-8"?>
<!-- $Revision$ -->
<refentry xml:id="function.similar-text" xmlns="http://docbook.org/ns/docbook">
<refnamediv>
<refname>similar_text</refname>
<refpurpose>Calculate the similarity between two strings</refpurpose>
</refnamediv>
<refsect1 role="description">
&reftitle.description;
<methodsynopsis>
<type>int</type><methodname>similar_text</methodname>
<methodparam><type>string</type><parameter>string1</parameter></methodparam>
<methodparam><type>string</type><parameter>string2</parameter></methodparam>
<methodparam choice="opt"><type>float</type><parameter role="reference">percent</parameter><initializer>&null;</initializer></methodparam>
</methodsynopsis>
<para>
This calculates the similarity between two strings as described in
&book.programming.classics;. Note that this implementation does not use a
stack as in Oliver's pseudo code, but recursive calls which may or may not
speed up the whole process. Note also that the complexity of this algorithm
is O(N**3) where N is the length of the longest string.
</para>
</refsect1>
<refsect1 role="parameters">
&reftitle.parameters;
<para>
<variablelist>
<varlistentry>
<term><parameter>string1</parameter></term>
<listitem>
<para>
The first string.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>string2</parameter></term>
<listitem>
<para>
The second string.
</para>
<note>
<para>
Swapping the <parameter>string1</parameter> and
<parameter>string2</parameter> may yield a different result; see the
example below.
</para>
</note>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>percent</parameter></term>
<listitem>
<para>
By passing a reference as third argument,
<function>similar_text</function> will calculate the similarity in
percent, by dividing the result of <function>similar_text</function> by
the average of the lengths of the given strings times
<literal>100</literal>.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect1>
<refsect1 role="returnvalues">
&reftitle.returnvalues;
<para>
Returns the number of matching chars in both strings.
</para>
<para>
The number of matching characters is calculated by finding the longest first
common substring, and then doing this for the prefixes and the suffixes,
recursively. The lengths of all found common substrings are added.
</para>
</refsect1>
<refsect1 role="examples">
&reftitle.examples;
<example xml:id="similar_text.example.swapping">
<title><function>similar_text</function> argument swapping example</title>
<para>
This example shows that swapping the <parameter>string1</parameter> and
<parameter>string2</parameter> argument may yield different results.
</para>
<programlisting role="php">
<![CDATA[
<?php
$sim = similar_text('bafoobar', 'barfoo', $perc);
echo "similarity: $sim ($perc %)\n";
$sim = similar_text('barfoo', 'bafoobar', $perc);
echo "similarity: $sim ($perc %)\n";
]]>
</programlisting>
&example.outputs.similar;
<screen>
<![CDATA[
similarity: 5 (71.428571428571 %)
similarity: 3 (42.857142857143 %)
]]>
</screen>
</example>
</refsect1>
<refsect1 role="seealso">
&reftitle.seealso;
<para>
<simplelist>
<member><function>levenshtein</function></member>
<member><function>soundex</function></member>
</simplelist>
</para>
</refsect1>
</refentry>
<!-- Keep this comment at the end of the file
Local variables:
mode: sgml
sgml-omittag:t
sgml-shorttag:t
sgml-minimize-attributes:nil
sgml-always-quote-attributes:t
sgml-indent-step:1
sgml-indent-data:t
indent-tabs-mode:nil
sgml-parent-document:nil
sgml-default-dtd-file:"~/.phpdoc/manual.ced"
sgml-exposed-tags:nil
sgml-local-catalogs:nil
sgml-local-ecat-files:nil
End:
vim600: syn=xml fen fdm=syntax fdl=2 si
vim: et tw=78 syn=sgml
vi: ts=1 sw=1
-->
|