File: Keywords.php

package info (click to toggle)
horde3 3.1.3-4etch7
  • links: PTS
  • area: main
  • in suites: etch
  • size: 22,876 kB
  • ctags: 18,071
  • sloc: php: 75,151; xml: 2,979; sql: 1,069; makefile: 79; sh: 64
file content (182 lines) | stat: -rw-r--r-- 6,490 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
<?php
/**
 * This class provides a parser which can construct an SQL WHERE
 * clause from a Google-like search expression.
 *
 * $Horde: framework/SQL/SQL/Keywords.php,v 1.2.10.4 2006/01/01 21:28:33 jan Exp $
 *
 * Copyright 2004-2006 Cronosys, LLC <http://www.cronosys.com/>
 *
 * See the enclosed file COPYING for license information (LGPL).  If you
 * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
 *
 * The expression recognizes boolean "AND", "OR", and "NOT" (providing
 * no operator between keywords implies "AND"), like so:
 *
 *   cat and dog
 *   cat or dog
 *   cat and not dog
 *
 * If no operator appears between keywords or quoted strings, "AND" is
 * assumed.  A comma can be used instead of "OR":
 *
 *   cat dog
 *   cat, dog
 *   cat not dog
 *
 * The parser recognizes parentheses, so complex expressions can be
 * created:
 *
 *   cat and not (dog or puppy)
 *
 * Quoted strings are also recognized, and are taken as literal
 * keywords:
 *
 *   "cat and dog"
 *
 * Parsing is designed to be as fuzzy as possible, so it shouldn't
 * error unless people search for "AND", "OR", or "NOT" without
 * quoting it or use unbalanced parentheses.
 *
 * @author  Jason M. Felice <jfelice@cronosys.com>
 * @since   Horde 3.0
 * @package Horde_SQL
 */
class Horde_SQL_Keywords {

    /**
     * Parse a keyword expression.
     *
     * @param string $column        This is the SQL field name the resulting
     *                              expression should test against.
     * @param string $expr          This is the keyword expression we want to
     *                              parse.
     * @return mixed the query expression or a PEAR_Error on failure.
     */
    function parse($column, $expr)
    {
        /* First pass - scan the string for tokens.  Bare words are
         * tokens, or the user can quote strings to have embedded
         * spaces, keywords, or parentheses.  Parentheses can be used
         * for grouping boolean operators, and the boolean operators
         * AND, OR, and NOT are all recognized.
         *
         * The tokens are returned in the $tokens array -- an array of
         * strings.  Each string in the array starts with either a `!'
         * or a `='.  `=' is a bare word or quoted string we are
         * searching for, and `!' indicates a boolean operator or
         * parenthesis.  A token that starts with a '.'  indicates a
         * PostgreSQL word boundary search. */
        $tokens = array();
        while (!empty($expr)) {
            $expr = preg_replace("/^\s+/", "", $expr);
            if (empty($expr)) {
                break;
            }
            if (substr($expr,0,1) == '(') {
                $expr = substr($expr, 1);
                $token = '!(';
            } elseif (substr($expr, 0, 1) == ')') {
                $expr = substr($expr, 1);
                $token = '!)';
            } elseif (substr($expr, 0, 1) == ',') {
                $expr = substr($expr, 1);
                $token = '!OR';
            } elseif (preg_match("/^(AND|OR|NOT)([^a-z].*)?$/i", $expr,
                                 $matches)) {
                $token = '!' . strtoupper($matches[1]);
                $expr = substr($expr, strlen($matches[1]));
            } elseif (preg_match("/^\"(([^\"]|\\[0-7]+|\\[Xx][0-9a-fA-F]+|\\[^Xx0-7])*)\"/",
                                 $expr, $matches)) {
                $token = '=' . stripcslashes($matches[1]);
                $expr = substr($expr, strlen($matches[0]));
            } elseif (preg_match("/^[^\\s\\(\\),]+/", $expr, $matches)) {
                $token = '=' . $matches[0];
                $expr = substr($expr,strlen($token)-1);
            } else {
                return PEAR::raiseError(_("Syntax error in search terms"));
            }
            if ($token == '!AND') {
                /* !AND is implied by concatenation. */
                continue;
            }
            $tokens[] = $token;
        }

        /* Call the expression parser. */
        return Horde_SQL_Keywords::_parseKeywords1($column, $tokens);
    }

    function _parseKeywords1($column, &$tokens)
    {
        if (count($tokens) == 0) {
            return PEAR::raiseError(_("Empty search terms"));
        }
        $lhs = Horde_SQL_Keywords::_parseKeywords2($column, $tokens);
        if (is_a($lhs, 'PEAR_Error')) {
            return $lhs;
        }
        if (count($tokens) == 0 || $tokens[0] != '!OR') {
            return $lhs;
        }
        array_shift($tokens);
        $rhs = Horde_SQL_Keywords::_parseKeywords1($column, $tokens);
        if (is_a($rhs, 'PEAR_Error')) {
            return $rhs;
        }
        return "( $lhs OR $rhs )";
    }

    function _parseKeywords2($column, &$tokens)
    {
        $lhs = Horde_SQL_Keywords::_parseKeywords3($column, $tokens);
        if (is_a($lhs, 'PEAR_Error')) {
            return $lhs;
        }
        if (sizeof($tokens) == 0 || $tokens[0] == '!)' || $tokens[0] == '!OR') {
            return $lhs;
        }
        $rhs = Horde_SQL_Keywords::_parseKeywords2($column, $tokens);
        if (is_a($rhs, 'PEAR_Error')) {
            return $rhs;
        }
        return "( $lhs AND $rhs )";
    }

    function _parseKeywords3($column, &$tokens)
    {
        if ($tokens[0] == '!NOT') {
            array_shift($tokens);
            $lhs = Horde_SQL_Keywords::_parseKeywords4($column, $tokens);
            if (is_a($lhs, 'PEAR_Error')) {
                return $lhs;
            }
            return "( NOT $lhs )";
        }
        return Horde_SQL_Keywords::_parseKeywords4($column, $tokens);
    }

    function _parseKeywords4($column, &$tokens)
    {
        if ( $tokens[0] == '!(' ) {
            array_shift($tokens);
            $lhs = Horde_SQL_Keywords::_parseKeywords1($column, $tokens);
            if (is_a($lhs, 'PEAR_Error')) {
                return $lhs;
            }
            if (sizeof($tokens) == 0 || $tokens[0] != '!)') {
                return PEAR::raiseError(_("Expected ')'"));
            }
            array_shift($tokens);
            return $lhs;
        }
        if (substr($tokens[0], 0, 1) != '=' &&
            substr($tokens[0], 0, 2) != '=.') {
            return PEAR::raiseError(_("Expected bare word or quoted search term"));
        }
        $val = strtolower(substr(array_shift($tokens), 1));
        $val = addslashes(ereg_replace("([\\%])", "\\\\1", $val));
        return "( LOWER($column) LIKE '%$val%' )";
    }

}