1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
|
<?php
use Wikimedia\Rdbms\IReadableDatabase;
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
*/
/**
* Allows iterating a large number of rows in batches transparently.
* By default when iterated over returns the full query result as an
* array of rows. Can be wrapped in RecursiveIteratorIterator to
* collapse those arrays into a single stream of rows queried in batches.
*
* @newable
*/
class BatchRowIterator implements RecursiveIterator {
/**
* @var IReadableDatabase
*/
protected $db;
/**
* @var string|array The name or names of the table to read from
*/
protected $table;
/**
* @var array The name of the primary key(s)
*/
protected $primaryKey;
/**
* @var int The number of rows to fetch per iteration
*/
protected $batchSize;
/**
* @var array Array of strings containing SQL conditions to add to the query
*/
protected $conditions = [];
/**
* @var array
*/
protected $joinConditions = [];
/**
* @var array List of column names to select from the table suitable for use
* with IDatabase::select()
*/
protected $fetchColumns;
/**
* @var string SQL Order by condition generated from $this->primaryKey
*/
protected $orderBy;
/**
* @var array The current iterator value
*/
private $current = [];
/**
* @var int 0-indexed number of pages fetched since self::reset()
*/
private $key = -1;
/**
* @var array Additional query options
*/
protected $options = [];
/**
* @var string|null For debugging which method is using this class.
*/
protected $caller;
/**
* @stable to call
*
* @param IReadableDatabase $db
* @param string|array $table The name or names of the table to read from
* @param string|array $primaryKey The name or names of the primary key columns
* @param int $batchSize The number of rows to fetch per iteration
*/
public function __construct( IReadableDatabase $db, $table, $primaryKey, $batchSize ) {
if ( $batchSize < 1 ) {
throw new InvalidArgumentException( 'Batch size must be at least 1 row.' );
}
$this->db = $db;
$this->table = $table;
$this->primaryKey = (array)$primaryKey;
$this->fetchColumns = $this->primaryKey;
$this->orderBy = implode( ' ASC,', $this->primaryKey ) . ' ASC';
$this->batchSize = $batchSize;
}
/**
* @param array $conditions Query conditions suitable for use with
* IDatabase::select
*/
public function addConditions( array $conditions ) {
$this->conditions = array_merge( $this->conditions, $conditions );
}
/**
* @param array $options Query options suitable for use with
* IDatabase::select
*/
public function addOptions( array $options ) {
$this->options = array_merge( $this->options, $options );
}
/**
* @param array $conditions Query join conditions suitable for use
* with IDatabase::select
*/
public function addJoinConditions( array $conditions ) {
$this->joinConditions = array_merge( $this->joinConditions, $conditions );
}
/**
* @param array $columns List of column names to select from the
* table suitable for use with IDatabase::select()
*/
public function setFetchColumns( array $columns ) {
// If it's not the all column selector merge in the primary keys we need
if ( count( $columns ) === 1 && reset( $columns ) === '*' ) {
$this->fetchColumns = $columns;
} else {
$this->fetchColumns = array_unique( array_merge(
$this->primaryKey,
$columns
) );
}
}
/**
* Use ->setCaller( __METHOD__ ) to indicate which code is using this
* class. Only used in debugging output.
* @since 1.36
*
* @param string $caller
* @return self
*/
public function setCaller( $caller ) {
$this->caller = $caller;
return $this;
}
/**
* Extracts the primary key(s) from a database row.
*
* @param stdClass $row An individual database row from this iterator
* @return array Map of primary key column to value within the row
*/
public function extractPrimaryKeys( $row ) {
$pk = [];
foreach ( $this->primaryKey as $alias => $column ) {
$name = is_numeric( $alias ) ? $column : $alias;
$pk[$name] = $row->{$name};
}
return $pk;
}
/**
* @return array The most recently fetched set of rows from the database
*/
public function current(): array {
return $this->current;
}
/**
* @return int 0-indexed count of the page number fetched
*/
public function key(): int {
return $this->key;
}
/**
* Reset the iterator to the beginning of the table.
*/
public function rewind(): void {
$this->key = -1; // self::next() will turn this into 0
$this->current = [];
$this->next();
}
/**
* @return bool True when the iterator is in a valid state
*/
public function valid(): bool {
return (bool)$this->current;
}
/**
* @return bool True when this result set has rows
*/
public function hasChildren(): bool {
return $this->current && count( $this->current );
}
/**
* @return null|RecursiveIterator
*/
public function getChildren(): ?RecursiveIterator {
return new NotRecursiveIterator( new ArrayIterator( $this->current ) );
}
/**
* Fetch the next set of rows from the database.
*/
public function next(): void {
$caller = __METHOD__;
if ( (string)$this->caller !== '' ) {
$caller .= " (for {$this->caller})";
}
$res = $this->db->newSelectQueryBuilder()
->tables( is_array( $this->table ) ? $this->table : [ $this->table ] )
->fields( $this->fetchColumns )
->where( $this->buildConditions() )
->caller( $caller )
->limit( $this->batchSize )
->orderBy( $this->orderBy )
->options( $this->options )
->joinConds( $this->joinConditions )
->fetchResultSet();
// The iterator is converted to an array because in addition to
// returning it in self::current() we need to use the end value
// in self::buildConditions()
$this->current = iterator_to_array( $res );
$this->key++;
}
/**
* Uses the primary key list and the maximal result row from the
* previous iteration to build an SQL condition sufficient for
* selecting the next page of results.
*
* @return array The SQL conditions necessary to select the next set
* of rows in the batched query
*/
protected function buildConditions() {
if ( !$this->current ) {
return $this->conditions;
}
$maxRow = end( $this->current );
$maximumValues = [];
foreach ( $this->primaryKey as $alias => $column ) {
$name = is_numeric( $alias ) ? $column : $alias;
$maximumValues[$column] = $maxRow->$name;
}
$conditions = $this->conditions;
$conditions[] = $this->db->buildComparison( '>', $maximumValues );
return $conditions;
}
}
|