1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace Wikimedia\Rdbms;
use Wikimedia\Rdbms\Platform\SQLPlatform;
/**
* This is to contain any regex on SQL work and get rid of them eventually
*
* This is a radioactive swamp and an extremely flawed and buggy last resort
* for when the information has not been provided via Query object.
* Bugs are to be expected in the regexes here.
*
* @ingroup Database
* @internal
* @since 1.41
*/
class QueryBuilderFromRawSql {
/** All the bits of QUERY_WRITE_* flags */
private const QUERY_CHANGE_MASK = (
SQLPlatform::QUERY_CHANGE_NONE |
SQLPlatform::QUERY_CHANGE_TRX |
SQLPlatform::QUERY_CHANGE_ROWS |
SQLPlatform::QUERY_CHANGE_SCHEMA |
SQLPlatform::QUERY_CHANGE_LOCKS
);
private const SCHEMA_CHANGE_VERBS = [
'CREATE',
'CREATE TEMPORARY',
'CREATE INDEX',
'CREATE DATABASE',
'ALTER',
'ALTER DATABASE',
'DROP',
'DROP INDEX',
'DROP DATABASE',
];
private const TRX_VERBS = [
'BEGIN',
'COMMIT',
'ROLLBACK',
'SAVEPOINT',
'RELEASE SAVEPOINT',
'ROLLBACK TO SAVEPOINT',
];
private static string $queryVerbRegex;
/**
* @param string $sql
* @param int $flags
* @param string $tablePrefix
* @return Query
*/
public static function buildQuery( string $sql, $flags, string $tablePrefix = '' ) {
$verb = self::getQueryVerb( $sql );
if ( ( $flags & self::QUERY_CHANGE_MASK ) == 0 ) {
$isWriteQuery = self::isWriteQuery( $sql );
if ( $isWriteQuery ) {
if ( in_array( $verb, self::SCHEMA_CHANGE_VERBS, true ) ) {
$flags |= SQLPlatform::QUERY_CHANGE_SCHEMA;
} else {
$flags |= SQLPlatform::QUERY_CHANGE_ROWS;
}
} else {
if ( in_array( $verb, self::TRX_VERBS, true ) ) {
$flags |= SQLPlatform::QUERY_CHANGE_TRX;
} else {
$flags |= SQLPlatform::QUERY_CHANGE_NONE;
}
}
}
return new Query(
$sql,
$flags,
$verb,
self::getWriteTable( $sql, $tablePrefix )
);
}
private static function isWriteQuery( $rawSql ) {
// Treat SELECT queries without FOR UPDATE queries as non-writes. This matches
// how MySQL enforces read_only (FOR SHARE and LOCK IN SHADE MODE are allowed).
// Handle (SELECT ...) UNION (SELECT ...) queries in a similar fashion.
if ( preg_match( '/^\s*\(?SELECT\b/i', $rawSql ) ) {
return (bool)preg_match( '/\bFOR\s+UPDATE\)?\s*$/i', $rawSql );
}
// BEGIN and COMMIT queries are considered non-write queries here.
// Database backends and drivers (MySQL, MariaDB, php-mysqli) generally
// treat these as write queries, in that their results have "affected rows"
// as meta data as from writes, instead of "num rows" as from reads.
// But, we treat them as non-write queries because when reading data (from
// either replica or primary DB) we use transactions to enable repeatable-read
// snapshots, which ensures we get consistent results from the same snapshot
// for all queries within a request. Use cases:
// - Treating these as writes would trigger ChronologyProtector (see method doc).
// - We use this method to reject writes to replicas, but we need to allow
// use of transactions on replicas for read snapshots. This is fine given
// that transactions by themselves don't make changes, only actual writes
// within the transaction matter, which we still detect.
return !preg_match(
'/^\s*(BEGIN|ROLLBACK|COMMIT|SAVEPOINT|RELEASE|SET|SHOW|EXPLAIN|USE)\b/i',
$rawSql
);
}
/**
* @param string $sql SQL query
* @return string
*/
private static function getQueryVerb( $sql ) {
// @phan-suppress-next-line PhanRedundantCondition https://github.com/phan/phan/issues/4720
if ( !isset( self::$queryVerbRegex ) ) {
$multiwordVerbsRegex = implode( '|', array_map(
fn ( $words ) => str_replace( ' ', '\s+', $words ),
Query::MULTIWORD_VERBS
) );
self::$queryVerbRegex = "/^\s*($multiwordVerbsRegex|[a-z]+)/i";
}
return preg_match( self::$queryVerbRegex, $sql, $m ) ? strtoupper( $m[1] ) : '';
}
/**
* @param string $sql
* @param string $tablePrefix
* @return string|null
*/
private static function getWriteTable( $sql, $tablePrefix ) {
// Regex for basic queries that can create/change/drop temporary tables.
// For simplicity, this only looks for tables with sensible alphanumeric names.
// Temporary tables only need simple programming names anyway.
$regex = <<<REGEX
/^
(?:
(?:INSERT|REPLACE)\s+(?:\w+\s+)*?INTO
| UPDATE(?:\s+OR\s+\w+|\s+IGNORE|\s+ONLY)?
| DELETE\s+(?:\w+\s+)*?FROM(?:\s+ONLY)?
| CREATE\s+(?:TEMPORARY\s+)?TABLE(?:\s+IF\s+NOT\s+EXISTS)?
| DROP\s+(?:TEMPORARY\s+)?TABLE(?:\s+IF\s+EXISTS)?
| TRUNCATE\s+(?:TEMPORARY\s+)?TABLE
| ALTER\s+TABLE
) \s+
(\w+|`\w+`|'\w+'|"\w+")
/ix
REGEX;
if ( preg_match( $regex, $sql, $m ) ) {
$tableName = trim( $m[1], "\"'`" );
if ( str_starts_with( $tableName, $tablePrefix ) ) {
$tableName = substr( $tableName, strlen( $tablePrefix ) );
}
return $tableName;
}
return null;
}
/**
* Removes most variables from an SQL query and replaces them with X or N for numbers.
* It's only slightly flawed. Don't use for anything important.
*
* @param string $sql A SQL Query
*
* @return string
*/
public static function generalizeSQL( $sql ) {
# This does the same as the regexp below would do, but in such a way
# as to avoid crashing php on some large strings.
# $sql = preg_replace( "/'([^\\\\']|\\\\.)*'|\"([^\\\\\"]|\\\\.)*\"/", "'X'", $sql );
$sql = str_replace( "\\\\", '', $sql );
$sql = str_replace( "\\'", '', $sql );
$sql = str_replace( "\\\"", '', $sql );
$sql = preg_replace( "/'.*'/s", "'X'", $sql );
$sql = preg_replace( '/".*"/s', "'X'", $sql );
# All newlines, tabs, etc replaced by single space
$sql = preg_replace( '/\s+/', ' ', $sql );
# All numbers => N,
# except the ones surrounded by characters, e.g. l10n
$sql = preg_replace( '/-?\d++(,-?\d++)+/', 'N,...,N', $sql );
$sql = preg_replace( '/(?<![a-zA-Z])-?\d+(?![a-zA-Z])/', 'N', $sql );
return $sql;
}
}
|