File: DataCollection.php

package info (click to toggle)
matomo 5.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 95,068 kB
  • sloc: php: 289,425; xml: 127,249; javascript: 112,130; python: 202; sh: 178; makefile: 20; sql: 10
file content (425 lines) | stat: -rw-r--r-- 13,685 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
<?php

/**
 * Matomo - free/libre analytics platform
 *
 * @link    https://matomo.org
 * @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
 */

namespace Piwik\Archive;

use Exception;
use Piwik\DataTable;

/**
 * This class is used to hold and transform archive data for the Archive class.
 *
 * Archive data is loaded into an instance of this type, can be indexed by archive
 * metadata (such as the site ID, period string, etc.), and can be transformed into
 * DataTable and Map instances.
 */
class DataCollection
{
    public const METADATA_CONTAINER_ROW_KEY = '_metadata';

    /**
     * The archive data, indexed first by site ID and then by period date range. Eg,
     *
     * array(
     *     '0' => array(
     *         array(
     *             '2012-01-01,2012-01-01' => array(...),
     *             '2012-01-02,2012-01-02' => array(...),
     *         )
     *     ),
     *     '1' => array(
     *         array(
     *             '2012-01-01,2012-01-01' => array(...),
     *         )
     *     )
     * )
     *
     * Archive data can be either a numeric value or a serialized string blob. Every
     * piece of archive data is associated by it's archive name. For example,
     * the array(...) above could look like:
     *
     * array(
     *    'nb_visits' => 1,
     *    'nb_actions' => 2
     * )
     *
     * There is a special element '_metadata' in data rows that holds values treated
     * as DataTable metadata.
     */
    private $data = [];

    /**
     * The whole list of metric/record names that were used in the archive query.
     *
     * @var array
     */
    private $dataNames;

    /**
     * The type of data that was queried for (ie, "blob" or "numeric").
     *
     * @var string
     */
    private $dataType;

    /**
     * The default values to use for each metric/record name that's being queried
     * for.
     *
     * @var array
     */
    private $defaultRow;

    /**
     * The list of all site IDs that were queried for.
     *
     * @var array
     */
    private $sitesId;

    /**
     * The list of all periods that were queried for. Each period is associated with
     * the period's range string. Eg,
     *
     * array(
     *     '2012-01-01,2012-01-31' => new Period(...),
     *     '2012-02-01,2012-02-28' => new Period(...),
     * )
     *
     * @var \Piwik\Period[]
     */
    private $periods;

    /**
     * The segment that was queried
     *
     * @var \Piwik\Segment
     */
    private $segment;

    private $isBuiltWithoutArchives = true;

    /**
     * Constructor.
     *
     * @param array $dataNames @see $this->dataNames
     * @param string $dataType @see $this->dataType
     * @param array $sitesId @see $this->sitesId
     * @param \Piwik\Period[] $periods @see $this->periods
     * @param \Piwik\Segment $segment @see $this->segment
     * @param array $defaultRow @see $this->defaultRow
     */
    public function __construct($dataNames, $dataType, $sitesId, $periods, $segment, $defaultRow = null)
    {
        $this->dataNames = $dataNames;
        $this->dataType = $dataType;

        if ($defaultRow === null) {
            $defaultRow = array_fill_keys($dataNames, 0);
        }

        $this->sitesId = $sitesId;

        foreach ($periods as $period) {
            $this->periods[$period->getRangeString()] = $period;
        }

        $this->segment = $segment;
        $this->defaultRow = $defaultRow;
    }

    public function setAsBuiltWithoutArchives(bool $flag): void
    {
        $this->isBuiltWithoutArchives = $flag;
    }

    public function wasBuiltWithoutArchives(): bool
    {
        return $this->isBuiltWithoutArchives;
    }

    /**
     * Returns a reference to the data for a specific site & period. If there is
     * no data for the given site ID & period, it is set to the default row.
     *
     * @param int $idSite
     * @param string $period eg, '2012-01-01,2012-01-31'
     */
    public function &get($idSite, $period)
    {
        if (!isset($this->data[$idSite][$period])) {
            $this->data[$idSite][$period] = $this->defaultRow;
        }
        return $this->data[$idSite][$period];
    }

    /**
     * Set data for a specific site & period. If there is no data for the given site ID & period,
     * it is set to the default row.
     *
     * @param int           $idSite
     * @param string        $period eg, '2012-01-01,2012-01-31'
     * @param string        $name   eg 'nb_visits'
     * @param string        $value  eg 5
     * @param array|null    $meta   Optional metadata to add to the row
     */
    public function set($idSite, $period, $name, $value, ?array $meta = null)
    {
        $row = & $this->get($idSite, $period);
        $row[$name] = $value;
        if ($meta) {
            foreach ($meta as $k => $v) {
                $row[self::METADATA_CONTAINER_ROW_KEY][$k] = $v;
            }
        }
    }

    /**
     * Adds a new metadata to the data for specific site & period. If there is no
     * data for the given site ID & period, it is set to the default row.
     *
     * Note: Site ID and period range string are two special types of metadata. Since
     * the data stored in this class is indexed by site & period, this metadata is not
     * stored in individual data rows.
     *
     * @param int $idSite
     * @param string $period eg, '2012-01-01,2012-01-31'
     * @param string $name The metadata name.
     * @param mixed $value The metadata name.
     */
    public function addMetadata($idSite, $period, $name, $value)
    {
        $row = & $this->get($idSite, $period);
        $row[self::METADATA_CONTAINER_ROW_KEY][$name] = $value;
    }

    /**
     * Returns archive data as an array indexed by metadata.
     *
     * @param array $resultIndices An array mapping metadata names to pretty labels
     *                             for them. Each archive data row will be indexed
     *                             by the metadata specified here.
     *
     *                             Eg, array('site' => 'idSite', 'period' => 'Date')
     * @return array
     */
    public function getIndexedArray($resultIndices)
    {
        $indexKeys = array_keys($resultIndices);

        $result = $this->createOrderedIndex($indexKeys);
        foreach ($this->data as $idSite => $rowsByPeriod) {
            foreach ($rowsByPeriod as $period => $row) {
                // FIXME: This hack works around a strange bug that occurs when getting
                //         archive IDs through ArchiveProcessing instances. When a table
                //         does not already exist, for some reason the archive ID for
                //         today (or from two days ago) will be added to the Archive
                //         instances list. The Archive instance will then select data
                //         for periods outside of the requested set.
                //         working around the bug here, but ideally, we need to figure
                //         out why incorrect idarchives are being selected.
                if (empty($this->periods[$period])) {
                    continue;
                }

                $this->putRowInIndex($result, $indexKeys, $row, $idSite, $period);
            }
        }

        return $result;
    }

    /**
     * Returns archive data as a DataTable indexed by metadata. Indexed data will
     * be represented by Map instances.
     *
     * @param array $resultIndices An array mapping metadata names to pretty labels
     *                             for them. Each archive data row will be indexed
     *                             by the metadata specified here.
     *
     *                             Eg, array('site' => 'idSite', 'period' => 'Date')
     * @return DataTable|DataTable\Map
     */
    public function getDataTable($resultIndices)
    {
        $dataTableFactory = new DataTableFactory(
            $this->dataNames,
            $this->dataType,
            $this->sitesId,
            $this->periods,
            $this->segment,
            $this->defaultRow
        );

        $index = $this->getIndexedArray($resultIndices);

        return $dataTableFactory->make($index, $resultIndices);
    }

    /**
     * See {@link DataTableFactory::makeMerged()}
     *
     * @param array $resultIndices
     * @return DataTable|DataTable\Map
     * @throws Exception
     */
    public function getMergedDataTable($resultIndices)
    {
        $dataTableFactory = new DataTableFactory(
            $this->dataNames,
            $this->dataType,
            $this->sitesId,
            $this->periods,
            $this->segment,
            $this->defaultRow
        );

        $index = $this->getIndexedArray($resultIndices);

        return $dataTableFactory->makeMerged($index, $resultIndices);
    }

    /**
     * Returns archive data as a DataTable indexed by metadata. Indexed data will
     * be represented by Map instances. Each DataTable will have
     * its subtable IDs set.
     *
     * This function will only work if blob data was loaded and only one record
     * was loaded (not including subtables of the record).
     *
     * @param array $resultIndices An array mapping metadata names to pretty labels
     *                             for them. Each archive data row will be indexed
     *                             by the metadata specified here.
     *
     *                             Eg, array('site' => 'idSite', 'period' => 'Date')
     * @param int|null $idSubTable The subtable to return.
     * @param int|null $depth max depth for subtables.
     * @param bool $addMetadataSubTableId Whether to add the DB subtable ID as metadata
     *                                    to each datatable, or not.
     * @throws Exception
     * @return DataTable|DataTable\Map
     */
    public function getExpandedDataTable($resultIndices, $idSubTable = null, $depth = null, $addMetadataSubTableId = false)
    {
        $this->checkExpandedMethodPrerequisites();

        $dataTableFactory = new DataTableFactory(
            $this->dataNames,
            'blob',
            $this->sitesId,
            $this->periods,
            $this->segment,
            $this->defaultRow
        );
        $dataTableFactory->expandDataTable($depth, $addMetadataSubTableId);
        $dataTableFactory->useSubtable($idSubTable);

        $index = $this->getIndexedArray($resultIndices);

        return $dataTableFactory->make($index, $resultIndices);
    }

    /**
     * Returns metadata for a data row.
     *
     * @param array $data The data row.
     * @return array
     */
    public static function getDataRowMetadata($data)
    {
        if (isset($data[self::METADATA_CONTAINER_ROW_KEY])) {
            return $data[self::METADATA_CONTAINER_ROW_KEY];
        } else {
            return [];
        }
    }

    /**
     * Removes all table metadata from a data row.
     *
     * @param array $data The data row.
     */
    public static function removeMetadataFromDataRow(&$data)
    {
        unset($data[self::METADATA_CONTAINER_ROW_KEY]);
    }

    /**
     * Creates an empty index using a list of metadata names. If the 'site' and/or
     * 'period' metadata names are supplied, empty rows are added for every site/period
     * that was queried for.
     *
     * Using this function ensures consistent ordering in the indexed result.
     *
     * @param array $metadataNamesToIndexBy List of metadata names to index archive data by.
     * @return array
     */
    private function createOrderedIndex($metadataNamesToIndexBy)
    {
        $result = [];

        if (!empty($metadataNamesToIndexBy)) {
            $metadataName = array_shift($metadataNamesToIndexBy);
            $indexKeyValues = [];

            if ($metadataName == DataTableFactory::TABLE_METADATA_SITE_INDEX) {
                $indexKeyValues = array_values($this->sitesId);
            } elseif ($metadataName == DataTableFactory::TABLE_METADATA_PERIOD_INDEX) {
                $indexKeyValues = array_keys($this->periods);
            }

            if (empty($metadataNamesToIndexBy)) {
                $result = array_fill_keys($indexKeyValues, []);
            } else {
                foreach ($indexKeyValues as $key) {
                    $result[$key] = $this->createOrderedIndex($metadataNamesToIndexBy);
                }
            }
        }

        return $result;
    }

    /**
     * Puts an archive data row in an index.
     */
    private function putRowInIndex(&$index, $metadataNamesToIndexBy, $row, $idSite, $period)
    {
        $currentLevel = & $index;

        foreach ($metadataNamesToIndexBy as $metadataName) {
            if ($metadataName == DataTableFactory::TABLE_METADATA_SITE_INDEX) {
                $key = $idSite;
            } elseif ($metadataName == DataTableFactory::TABLE_METADATA_PERIOD_INDEX) {
                $key = $period;
            } else {
                $key = $row[self::METADATA_CONTAINER_ROW_KEY][$metadataName];
            }

            if (!isset($currentLevel[$key])) {
                $currentLevel[$key] = [];
            }

            $currentLevel = & $currentLevel[$key];
        }

        $currentLevel = $row;
    }

    private function checkExpandedMethodPrerequisites()
    {
        if ($this->dataType != 'blob') {
            throw new Exception("DataCollection: cannot call getExpandedDataTable with {$this->dataType} data types. Only works with blob data.");
        }

        if (count($this->dataNames) !== 1) {
            throw new Exception("DataCollection: cannot call getExpandedDataTable with more than one record.");
        }
    }
}