1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
|
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import logging
from typing import (
TYPE_CHECKING,
Mapping,
TypedDict,
cast,
)
import attr
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import (
DatabasePool,
LoggingDatabaseConnection,
LoggingTransaction,
make_tuple_comparison_clause,
)
from synapse.storage.databases.main.monthly_active_users import (
MonthlyActiveUsersWorkerStore,
)
from synapse.types import JsonDict, UserID
from synapse.util.caches.lrucache import LruCache
from synapse.util.duration import Duration
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
# Number of msec of granularity to store the user IP 'last seen' time. Smaller
# times give more inserts into the database even for readonly API hits
# 120 seconds == 2 minutes
LAST_SEEN_GRANULARITY = 120 * 1000
@attr.s(slots=True, frozen=True, auto_attribs=True)
class DeviceLastConnectionInfo:
"""Metadata for the last connection seen for a user and device combination"""
# These types must match the columns in the `devices` table
user_id: str
device_id: str
ip: str | None
user_agent: str | None
last_seen: int | None
class LastConnectionInfo(TypedDict):
"""Metadata for the last connection seen for an access token and IP combination"""
# These types must match the columns in the `user_ips` table
access_token: str
ip: str
user_agent: str
last_seen: int
class ClientIpBackgroundUpdateStore(SQLBaseStore):
def __init__(
self,
database: DatabasePool,
db_conn: LoggingDatabaseConnection,
hs: "HomeServer",
):
super().__init__(database, db_conn, hs)
self.db_pool.updates.register_background_index_update(
"user_ips_device_index",
index_name="user_ips_device_id",
table="user_ips",
columns=["user_id", "device_id", "last_seen"],
)
self.db_pool.updates.register_background_index_update(
"user_ips_last_seen_index",
index_name="user_ips_last_seen",
table="user_ips",
columns=["user_id", "last_seen"],
)
self.db_pool.updates.register_background_index_update(
"user_ips_last_seen_only_index",
index_name="user_ips_last_seen_only",
table="user_ips",
columns=["last_seen"],
)
self.db_pool.updates.register_background_update_handler(
"user_ips_analyze", self._analyze_user_ip
)
self.db_pool.updates.register_background_update_handler(
"user_ips_remove_dupes", self._remove_user_ip_dupes
)
# Register a unique index
self.db_pool.updates.register_background_index_update(
"user_ips_device_unique_index",
index_name="user_ips_user_token_ip_unique_index",
table="user_ips",
columns=["user_id", "access_token", "ip"],
unique=True,
)
# Drop the old non-unique index
self.db_pool.updates.register_background_update_handler(
"user_ips_drop_nonunique_index", self._remove_user_ip_nonunique
)
# Update the last seen info in devices.
self.db_pool.updates.register_background_update_handler(
"devices_last_seen", self._devices_last_seen_update
)
async def _remove_user_ip_nonunique(
self, progress: JsonDict, batch_size: int
) -> int:
def f(conn: LoggingDatabaseConnection) -> None:
txn = conn.cursor()
txn.execute("DROP INDEX IF EXISTS user_ips_user_ip")
txn.close()
await self.db_pool.runWithConnection(f)
await self.db_pool.updates._end_background_update(
"user_ips_drop_nonunique_index"
)
return 1
async def _analyze_user_ip(self, progress: JsonDict, batch_size: int) -> int:
# Background update to analyze user_ips table before we run the
# deduplication background update. The table may not have been analyzed
# for ages due to the table locks.
#
# This will lock out the naive upserts to user_ips while it happens, but
# the analyze should be quick (28GB table takes ~10s)
def user_ips_analyze(txn: LoggingTransaction) -> None:
txn.execute("ANALYZE user_ips")
await self.db_pool.runInteraction("user_ips_analyze", user_ips_analyze)
await self.db_pool.updates._end_background_update("user_ips_analyze")
return 1
async def _remove_user_ip_dupes(self, progress: JsonDict, batch_size: int) -> int:
# This works function works by scanning the user_ips table in batches
# based on `last_seen`. For each row in a batch it searches the rest of
# the table to see if there are any duplicates, if there are then they
# are removed and replaced with a suitable row.
# Fetch the start of the batch
begin_last_seen: int = progress.get("last_seen", 0)
def get_last_seen(txn: LoggingTransaction) -> int | None:
txn.execute(
"""
SELECT last_seen FROM user_ips
WHERE last_seen > ?
ORDER BY last_seen
LIMIT 1
OFFSET ?
""",
(begin_last_seen, batch_size),
)
row = cast(tuple[int] | None, txn.fetchone())
if row:
return row[0]
else:
return None
# Get a last seen that has roughly `batch_size` since `begin_last_seen`
end_last_seen = await self.db_pool.runInteraction(
"user_ips_dups_get_last_seen", get_last_seen
)
# If it returns None, then we're processing the last batch
last = end_last_seen is None
logger.info(
"Scanning for duplicate 'user_ips' rows in range: %s <= last_seen < %s",
begin_last_seen,
end_last_seen,
)
def remove(txn: LoggingTransaction) -> None:
# This works by looking at all entries in the given time span, and
# then for each (user_id, access_token, ip) tuple in that range
# checking for any duplicates in the rest of the table (via a join).
# It then only returns entries which have duplicates, and the max
# last_seen across all duplicates, which can the be used to delete
# all other duplicates.
# It is efficient due to the existence of (user_id, access_token,
# ip) and (last_seen) indices.
# Define the search space, which requires handling the last batch in
# a different way
args: tuple[int, ...]
if last:
clause = "? <= last_seen"
args = (begin_last_seen,)
else:
assert end_last_seen is not None
clause = "? <= last_seen AND last_seen < ?"
args = (begin_last_seen, end_last_seen)
# (Note: The DISTINCT in the inner query is important to ensure that
# the COUNT(*) is accurate, otherwise double counting may happen due
# to the join effectively being a cross product)
txn.execute(
"""
SELECT user_id, access_token, ip,
MAX(device_id), MAX(user_agent), MAX(last_seen),
COUNT(*)
FROM (
SELECT DISTINCT user_id, access_token, ip
FROM user_ips
WHERE {}
) c
INNER JOIN user_ips USING (user_id, access_token, ip)
GROUP BY user_id, access_token, ip
HAVING count(*) > 1
""".format(clause),
args,
)
res = cast(
list[tuple[str, str, str, str | None, str, int, int]], txn.fetchall()
)
# We've got some duplicates
for i in res:
user_id, access_token, ip, device_id, user_agent, last_seen, count = i
# We want to delete the duplicates so we end up with only a
# single row.
#
# The naive way of doing this would be just to delete all rows
# and reinsert a constructed row. However, if there are a lot of
# duplicate rows this can cause the table to grow a lot, which
# can be problematic in two ways:
# 1. If user_ips is already large then this can cause the
# table to rapidly grow, potentially filling the disk.
# 2. Reinserting a lot of rows can confuse the table
# statistics for postgres, causing it to not use the
# correct indices for the query above, resulting in a full
# table scan. This is incredibly slow for large tables and
# can kill database performance. (This seems to mainly
# happen for the last query where the clause is simply `? <
# last_seen`)
#
# So instead we want to delete all but *one* of the duplicate
# rows. That is hard to do reliably, so we cheat and do a two
# step process:
# 1. Delete all rows with a last_seen strictly less than the
# max last_seen. This hopefully results in deleting all but
# one row the majority of the time, but there may be
# duplicate last_seen
# 2. If multiple rows remain, we fall back to the naive method
# and simply delete all rows and reinsert.
#
# Note that this relies on no new duplicate rows being inserted,
# but if that is happening then this entire process is futile
# anyway.
# Do step 1:
txn.execute(
"""
DELETE FROM user_ips
WHERE user_id = ? AND access_token = ? AND ip = ? AND last_seen < ?
""",
(user_id, access_token, ip, last_seen),
)
if txn.rowcount == count - 1:
# We deleted all but one of the duplicate rows, i.e. there
# is exactly one remaining and so there is nothing left to
# do.
continue
elif txn.rowcount >= count:
raise Exception(
"We deleted more duplicate rows from 'user_ips' than expected"
)
# The previous step didn't delete enough rows, so we fallback to
# step 2:
# Drop all the duplicates
txn.execute(
"""
DELETE FROM user_ips
WHERE user_id = ? AND access_token = ? AND ip = ?
""",
(user_id, access_token, ip),
)
# Add in one to be the last_seen
txn.execute(
"""
INSERT INTO user_ips
(user_id, access_token, ip, device_id, user_agent, last_seen)
VALUES (?, ?, ?, ?, ?, ?)
""",
(user_id, access_token, ip, device_id, user_agent, last_seen),
)
self.db_pool.updates._background_update_progress_txn(
txn, "user_ips_remove_dupes", {"last_seen": end_last_seen}
)
await self.db_pool.runInteraction("user_ips_dups_remove", remove)
if last:
await self.db_pool.updates._end_background_update("user_ips_remove_dupes")
return batch_size
async def _devices_last_seen_update(
self, progress: JsonDict, batch_size: int
) -> int:
"""Background update to insert last seen info into devices table"""
last_user_id: str = progress.get("last_user_id", "")
last_device_id: str = progress.get("last_device_id", "")
def _devices_last_seen_update_txn(txn: LoggingTransaction) -> int:
# This consists of two queries:
#
# 1. The sub-query searches for the next N devices and joins
# against user_ips to find the max last_seen associated with
# that device.
# 2. The outer query then joins again against user_ips on
# user/device/last_seen. This *should* hopefully only
# return one row, but if it does return more than one then
# we'll just end up updating the same device row multiple
# times, which is fine.
where_args: list[str | int]
where_clause, where_args = make_tuple_comparison_clause(
[("user_id", last_user_id), ("device_id", last_device_id)],
)
sql = """
SELECT
last_seen, ip, user_agent, user_id, device_id
FROM (
SELECT
user_id, device_id, MAX(u.last_seen) AS last_seen
FROM devices
INNER JOIN user_ips AS u USING (user_id, device_id)
WHERE %(where_clause)s
GROUP BY user_id, device_id
ORDER BY user_id ASC, device_id ASC
LIMIT ?
) c
INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
""" % {"where_clause": where_clause}
txn.execute(sql, where_args + [batch_size])
rows = cast(list[tuple[int, str, str, str, str]], txn.fetchall())
if not rows:
return 0
sql = """
UPDATE devices
SET last_seen = ?, ip = ?, user_agent = ?
WHERE user_id = ? AND device_id = ?
"""
txn.execute_batch(sql, rows)
_, _, _, user_id, device_id = rows[-1]
self.db_pool.updates._background_update_progress_txn(
txn,
"devices_last_seen",
{"last_user_id": user_id, "last_device_id": device_id},
)
return len(rows)
updated = await self.db_pool.runInteraction(
"_devices_last_seen_update", _devices_last_seen_update_txn
)
if not updated:
await self.db_pool.updates._end_background_update("devices_last_seen")
return updated
class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorkerStore):
def __init__(
self,
database: DatabasePool,
db_conn: LoggingDatabaseConnection,
hs: "HomeServer",
):
super().__init__(database, db_conn, hs)
self.server_name = hs.hostname
if hs.config.redis.redis_enabled:
# If we're using Redis, we can shift this update process off to
# the background worker
self._update_on_this_worker = hs.config.worker.run_background_tasks
else:
# If we're NOT using Redis, this must be handled by the master
self._update_on_this_worker = hs.get_instance_name() == "master"
self.user_ips_max_age = hs.config.server.user_ips_max_age
# (user_id, access_token, ip,) -> last_seen
self.client_ip_last_seen = LruCache[tuple[str, str, str], int](
cache_name="client_ip_last_seen",
server_name=self.server_name,
max_size=50000,
clock=hs.get_clock(),
)
if hs.config.worker.run_background_tasks and self.user_ips_max_age:
self.clock.looping_call(self._prune_old_user_ips, Duration(seconds=5))
if self._update_on_this_worker:
# This is the designated worker that can write to the client IP
# tables.
# (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
self._batch_row_update: dict[
tuple[str, str, str], tuple[str, str | None, int]
] = {}
self.clock.looping_call(self._update_client_ips_batch, Duration(seconds=5))
hs.register_async_shutdown_handler(
phase="before",
eventType="shutdown",
shutdown_func=self._update_client_ips_batch,
)
@wrap_as_background_process("prune_old_user_ips")
async def _prune_old_user_ips(self) -> None:
"""Removes entries in user IPs older than the configured period."""
if self.user_ips_max_age is None:
# Nothing to do
return
if not await self.db_pool.updates.has_completed_background_update(
"devices_last_seen"
):
# Only start pruning if we have finished populating the devices
# last seen info.
return
# We do a slightly funky SQL delete to ensure we don't try and delete
# too much at once (as the table may be very large from before we
# started pruning).
#
# This works by finding the max last_seen that is less than the given
# time, but has no more than N rows before it, deleting all rows with
# a lesser last_seen time. (We use an `IN` clause to force postgres to
# use the index, otherwise it tends to do a seq scan).
sql = """
DELETE FROM user_ips
WHERE last_seen IN (
SELECT last_seen FROM user_ips
WHERE last_seen <= ?
ORDER BY last_seen ASC
LIMIT 5000
)
"""
timestamp = self.clock.time_msec() - self.user_ips_max_age
def _prune_old_user_ips_txn(txn: LoggingTransaction) -> None:
txn.execute(sql, (timestamp,))
await self.db_pool.runInteraction(
"_prune_old_user_ips", _prune_old_user_ips_txn
)
async def _get_last_client_ip_by_device_from_database(
self, user_id: str, device_id: str | None
) -> dict[tuple[str, str], DeviceLastConnectionInfo]:
"""For each device_id listed, give the user_ip it was last seen on.
The result might be slightly out of date as client IPs are inserted in batches.
Args:
user_id: The user to fetch devices for.
device_id: If None fetches all devices for the user
Returns:
A dictionary mapping a tuple of (user_id, device_id) to DeviceLastConnectionInfo.
"""
keyvalues = {"user_id": user_id}
if device_id is not None:
keyvalues["device_id"] = device_id
res = cast(
list[tuple[str, str | None, str | None, str, int | None]],
await self.db_pool.simple_select_list(
table="devices",
keyvalues=keyvalues,
retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"),
),
)
return {
(user_id, device_id): DeviceLastConnectionInfo(
user_id=user_id,
device_id=device_id,
ip=ip,
user_agent=user_agent,
last_seen=last_seen,
)
for user_id, ip, user_agent, device_id, last_seen in res
}
async def _get_user_ip_and_agents_from_database(
self, user: UserID, since_ts: int = 0
) -> list[LastConnectionInfo]:
"""Fetch the IPs and user agents for a user since the given timestamp.
The result might be slightly out of date as client IPs are inserted in batches.
Args:
user: The user for which to fetch IP addresses and user agents.
since_ts: The timestamp after which to fetch IP addresses and user agents,
in milliseconds.
Returns:
A list of dictionaries, each containing:
* `access_token`: The access token used.
* `ip`: The IP address used.
* `user_agent`: The last user agent seen for this access token and IP
address combination.
* `last_seen`: The timestamp at which this access token and IP address
combination was last seen, in milliseconds.
Only the latest user agent for each access token and IP address combination
is available.
"""
user_id = user.to_string()
def get_recent(txn: LoggingTransaction) -> list[tuple[str, str, str, int]]:
txn.execute(
"""
SELECT access_token, ip, user_agent, last_seen FROM user_ips
WHERE last_seen >= ? AND user_id = ?
ORDER BY last_seen
DESC
""",
(since_ts, user_id),
)
return cast(list[tuple[str, str, str, int]], txn.fetchall())
rows = await self.db_pool.runInteraction(
desc="get_user_ip_and_agents", func=get_recent
)
return [
{
"access_token": access_token,
"ip": ip,
"user_agent": user_agent,
"last_seen": last_seen,
}
for access_token, ip, user_agent, last_seen in rows
]
async def insert_client_ip(
self,
user_id: str,
access_token: str,
ip: str,
user_agent: str,
device_id: str | None,
now: int | None = None,
) -> None:
"""Record that `user_id` used `access_token` from this `ip` address.
This method does two things.
1. It queues up a row to be upserted into the `client_ips` table. These happen
periodically; see _update_client_ips_batch.
2. It immediately records this user as having taken action for the purposes of
MAU tracking.
Any DB writes take place on the background tasks worker, falling back to the
main process. If we're not that worker, this method emits a replication payload
to run this logic on that worker.
Two caveats to note:
- We only take action once per LAST_SEEN_GRANULARITY, to avoid spamming the
DB with writes.
- Requests using the sliding-sync proxy's user agent are excluded, as its
requests are not directly driven by end-users. This is a hack and we're not
very proud of it.
"""
# The sync proxy continuously triggers /sync even if the user is not
# present so should be excluded from user_ips entries.
if user_agent == "sync-v3-proxy-":
return
if not now:
now = int(self.clock.time_msec())
key = (user_id, access_token, ip)
try:
last_seen = self.client_ip_last_seen.get(key)
except KeyError:
last_seen = None
# Rate-limited inserts
if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY:
return
self.client_ip_last_seen.set(key, now)
if self._update_on_this_worker:
await self.populate_monthly_active_users(user_id)
self._batch_row_update[key] = (user_agent, device_id, now)
else:
# We are not the designated writer-worker, so stream over replication
self.hs.get_replication_command_handler().send_user_ip(
user_id, access_token, ip, user_agent, device_id, now
)
@wrap_as_background_process("update_client_ips")
async def _update_client_ips_batch(self) -> None:
assert self._update_on_this_worker, (
"This worker is not designated to update client IPs"
)
# If the DB pool has already terminated, don't try updating
if not self.db_pool.is_running():
return
to_update = self._batch_row_update
self._batch_row_update = {}
if to_update:
await self.db_pool.runInteraction(
"_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
)
def _update_client_ips_batch_txn(
self,
txn: LoggingTransaction,
to_update: Mapping[tuple[str, str, str], tuple[str, str | None, int]],
) -> None:
assert self._update_on_this_worker, (
"This worker is not designated to update client IPs"
)
# Keys and values for the `user_ips` upsert.
user_ips_keys = []
user_ips_values = []
# Keys and values for the `devices` update.
devices_keys = []
devices_values = []
for entry in to_update.items():
(user_id, access_token, ip), (user_agent, device_id, last_seen) = entry
user_ips_keys.append((user_id, access_token, ip))
user_ips_values.append((user_agent, device_id, last_seen))
# Technically an access token might not be associated with
# a device so we need to check.
if device_id:
devices_keys.append((user_id, device_id))
devices_values.append((user_agent, last_seen, ip))
self.db_pool.simple_upsert_many_txn(
txn,
table="user_ips",
key_names=("user_id", "access_token", "ip"),
key_values=user_ips_keys,
value_names=("user_agent", "device_id", "last_seen"),
value_values=user_ips_values,
)
if devices_values:
self.db_pool.simple_update_many_txn(
txn,
table="devices",
key_names=("user_id", "device_id"),
key_values=devices_keys,
value_names=("user_agent", "last_seen", "ip"),
value_values=devices_values,
)
async def get_last_client_ip_by_device(
self, user_id: str, device_id: str | None
) -> dict[tuple[str, str], DeviceLastConnectionInfo]:
"""For each device_id listed, give the user_ip it was last seen on
Args:
user_id: The user to fetch devices for.
device_id: If None fetches all devices for the user
Returns:
A dictionary mapping a tuple of (user_id, device_id) to DeviceLastConnectionInfo.
"""
ret = await self._get_last_client_ip_by_device_from_database(user_id, device_id)
if not self._update_on_this_worker:
# Only the writing-worker has additional in-memory data to enhance
# the result
return ret
# Update what is retrieved from the database with data which is pending
# insertion, as if it has already been stored in the database.
for key in self._batch_row_update:
uid, _access_token, ip = key
if uid == user_id:
user_agent, did, last_seen = self._batch_row_update[key]
if did is None:
# These updates don't make it to the `devices` table
continue
if not device_id or did == device_id:
ret[(user_id, did)] = DeviceLastConnectionInfo(
user_id=user_id,
ip=ip,
user_agent=user_agent,
device_id=did,
last_seen=last_seen,
)
return ret
async def get_user_ip_and_agents(
self, user: UserID, since_ts: int = 0
) -> list[LastConnectionInfo]:
"""Fetch the IPs and user agents for a user since the given timestamp.
Args:
user: The user for which to fetch IP addresses and user agents.
since_ts: The timestamp after which to fetch IP addresses and user agents,
in milliseconds.
Returns:
A list of dictionaries, each containing:
* `access_token`: The access token used.
* `ip`: The IP address used.
* `user_agent`: The last user agent seen for this access token and IP
address combination.
* `last_seen`: The timestamp at which this access token and IP address
combination was last seen, in milliseconds.
Only the latest user agent for each access token and IP address combination
is available.
"""
rows_from_db = await self._get_user_ip_and_agents_from_database(user, since_ts)
if not self._update_on_this_worker:
# Only the writing-worker has additional in-memory data to enhance
# the result
return rows_from_db
results: dict[tuple[str, str], LastConnectionInfo] = {
(connection["access_token"], connection["ip"]): connection
for connection in rows_from_db
}
# Overlay data that is pending insertion on top of the results from the
# database.
user_id = user.to_string()
for key in self._batch_row_update:
uid, access_token, ip = key
if uid == user_id:
user_agent, _, last_seen = self._batch_row_update[key]
if last_seen >= since_ts:
results[(access_token, ip)] = {
"access_token": access_token,
"ip": ip,
"user_agent": user_agent,
"last_seen": last_seen,
}
return list(results.values())
async def get_last_seen_for_user_id(self, user_id: str) -> int | None:
"""Get the last seen timestamp for a user, if we have it."""
return await self.db_pool.simple_select_one_onecol(
table="user_ips",
keyvalues={"user_id": user_id},
retcol="MAX(last_seen)",
allow_none=True,
desc="get_last_seen_for_user_id",
)
|