File: bookmark_update_preprocessing.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (267 lines) | stat: -rw-r--r-- 10,785 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/sync/engine/bookmark_update_preprocessing.h"

#include <array>

#include "base/base64.h"
#include "base/containers/span.h"
#include "base/hash/sha1.h"
#include "base/logging.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/uuid.h"
#include "components/sync/base/data_type.h"
#include "components/sync/base/hash_util.h"
#include "components/sync/base/unique_position.h"
#include "components/sync/protocol/bookmark_specifics.pb.h"
#include "components/sync/protocol/entity_specifics.pb.h"
#include "components/sync/protocol/sync_entity.pb.h"

namespace syncer {

namespace {

// Used in metric "Sync.BookmarkGUIDSource2". These values are persisted to
// logs. Entries should not be renumbered and numeric values should never be
// reused.
// LINT.IfChange(BookmarkGUIDSource)
enum class BookmarkGuidSource {
  // UUID came from specifics.
  kSpecifics = 0,
  // UUID came from originator_client_item_id and is valid.
  kValidOCII = 1,
  // UUID not found in the specifics and originator_client_item_id is invalid,
  // so field left empty (currently unused).
  kDeprecatedLeftEmpty = 2,
  // UUID not found in the specifics and originator_client_item_id is invalid,
  // so the UUID is inferred from combining originator_client_item_id and
  // originator_cache_guid.
  kInferred = 3,
  // UUID not found in the specifics and the update doesn't have enough
  // information to infer it. This is likely because the update contains a
  // client tag instead of originator information.
  kLeftEmptyPossiblyForClientTag = 4,
  kMaxValue = kLeftEmptyPossiblyForClientTag,
};
// LINT.ThenChange(/tools/metrics/histograms/metadata/sync/enums.xml:BookmarkGUIDSource)

inline void LogGuidSource(BookmarkGuidSource source) {
  base::UmaHistogramEnumeration("Sync.BookmarkGUIDSource2", source);
}

std::string ComputeUuidFromBytes(base::span<const uint8_t> bytes) {
  DCHECK_GE(bytes.size(), 16U);

  // This implementation is based on the equivalent logic in base/guid.cc.

  // Set the UUID to version 4 as described in RFC 4122, section 4.4.
  // The format of UUID version 4 must be xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx,
  // where y is one of [8, 9, A, B].

  // Clear the version bits and set the version to 4:
  const uint8_t byte6 = (bytes[6] & 0x0fU) | 0xf0U;

  // Set the two most significant bits (bits 6 and 7) of the
  // clock_seq_hi_and_reserved to zero and one, respectively:
  const uint8_t byte8 = (bytes[8] & 0x3fU) | 0x80U;

  return base::StringPrintf(
      "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
      bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], byte6,
      bytes[7], byte8, bytes[9], bytes[10], bytes[11], bytes[12], bytes[13],
      bytes[14], bytes[15]);
}

// Bookmarks created before 2015 (https://codereview.chromium.org/1136953013)
// have an originator client item ID that is NOT a UUID. Hence, an alternative
// method must be used to infer a UUID deterministically from a combination of
// sync fields that is known to be a) immutable and b) unique per synced
// bookmark.
std::string InferGuidForLegacyBookmark(
    const std::string& originator_cache_guid,
    const std::string& originator_client_item_id) {
  DCHECK(
      !base::Uuid::ParseCaseInsensitive(originator_client_item_id).is_valid());

  const std::string unique_tag =
      base::StrCat({originator_cache_guid, originator_client_item_id});
  const base::SHA1Digest hash = base::SHA1Hash(base::as_byte_span(unique_tag));

  static_assert(base::kSHA1Length >= 16, "16 bytes needed to infer UUID");

  const std::string guid = ComputeUuidFromBytes(base::span(hash));
  DCHECK(base::Uuid::ParseLowercase(guid).is_valid());
  return guid;
}

// Legacy method to calculate unique position suffix for the bookmarks which did
// not have client tag hash.
UniquePosition::Suffix GenerateUniquePositionSuffixForBookmark(
    const std::string& originator_cache_guid,
    const std::string& originator_client_item_id) {
  // Blank PB with just the field in it has termination symbol,
  // handy for delimiter.
  sync_pb::EntitySpecifics serialized_type;
  AddDefaultFieldValue(BOOKMARKS, &serialized_type);
  std::string hash_input;
  serialized_type.AppendToString(&hash_input);
  hash_input.append(originator_cache_guid + originator_client_item_id);
  UniquePosition::Suffix suffix;
  std::string suffix_str =
      base::Base64Encode(base::SHA1Hash(base::as_byte_span(hash_input)));
  CHECK_EQ(suffix.size(), suffix_str.size());
  std::ranges::copy(suffix_str, suffix.begin());
  return suffix;
}

sync_pb::UniquePosition GetUniquePositionFromSyncEntity(
    const sync_pb::SyncEntity& update_entity) {
  if (update_entity.has_unique_position()) {
    return update_entity.unique_position();
  }

  UniquePosition::Suffix suffix;
  if (update_entity.has_originator_cache_guid() &&
      update_entity.has_originator_client_item_id()) {
    suffix = GenerateUniquePositionSuffixForBookmark(
        update_entity.originator_cache_guid(),
        update_entity.originator_client_item_id());
  } else {
    suffix = UniquePosition::RandomSuffix();
  }

  if (update_entity.has_position_in_parent()) {
    return UniquePosition::FromInt64(update_entity.position_in_parent(), suffix)
        .ToProto();
  }

  if (update_entity.has_insert_after_item_id()) {
    return UniquePosition::FromInt64(0, suffix).ToProto();
  }

  // No positioning information whatsoever, which should be unreachable today.
  // For future-compatibility in case the fields in SyncEntity get removed,
  // let's use a random position, which is better than dropping the whole
  // update.
  return UniquePosition::InitialPosition(suffix).ToProto();
}

}  // namespace

bool AdaptUniquePositionForBookmark(const sync_pb::SyncEntity& update_entity,
                                    sync_pb::EntitySpecifics* specifics) {
  DCHECK(specifics);
  // Nothing to do if the field is set or if it's a deletion.
  if (specifics->bookmark().has_unique_position() || update_entity.deleted()) {
    return false;
  }

  // Permanent folders don't need positioning information.
  if (update_entity.folder() &&
      !update_entity.server_defined_unique_tag().empty()) {
    return false;
  }

  *specifics->mutable_bookmark()->mutable_unique_position() =
      GetUniquePositionFromSyncEntity(update_entity);
  return true;
}

void AdaptTypeForBookmark(const sync_pb::SyncEntity& update_entity,
                          sync_pb::EntitySpecifics* specifics) {
  DCHECK(specifics);
  // Nothing to do if the field is set or if it's a deletion.
  if (specifics->bookmark().has_type() || update_entity.deleted()) {
    return;
  }
  DCHECK(specifics->has_bookmark());
  // For legacy data, SyncEntity.folder is always populated.
  if (update_entity.has_folder()) {
    specifics->mutable_bookmark()->set_type(
        update_entity.folder() ? sync_pb::BookmarkSpecifics::FOLDER
                               : sync_pb::BookmarkSpecifics::URL);
    return;
  }
  // Remaining cases should be unreachable today. In case SyncEntity.folder gets
  // removed in the future, with legacy data still being around prior to M94,
  // infer folderness based on the present of field `url` (only populated for
  // URL bookmarks).
  specifics->mutable_bookmark()->set_type(
      specifics->bookmark().has_url() ? sync_pb::BookmarkSpecifics::URL
                                      : sync_pb::BookmarkSpecifics::FOLDER);
}

void AdaptTitleForBookmark(const sync_pb::SyncEntity& update_entity,
                           sync_pb::EntitySpecifics* specifics,
                           bool specifics_were_encrypted) {
  DCHECK(specifics);
  if (specifics_were_encrypted || update_entity.deleted()) {
    // If encrypted, the name field is never populated (unencrypted) for privacy
    // reasons. Encryption was also introduced after moving the name out of
    // SyncEntity so this hack is not needed at all.
    return;
  }
  DCHECK(specifics->has_bookmark());
  // Legacy clients populate the name field in the SyncEntity instead of the
  // title field in the BookmarkSpecifics.
  if (!specifics->bookmark().has_legacy_canonicalized_title() &&
      !update_entity.name().empty()) {
    specifics->mutable_bookmark()->set_legacy_canonicalized_title(
        update_entity.name());
  }
}

void AdaptGuidForBookmark(const sync_pb::SyncEntity& update_entity,
                          sync_pb::EntitySpecifics* specifics) {
  DCHECK(specifics);
  // Tombstones and permanent entities don't have a UUID.
  if (update_entity.deleted() ||
      !update_entity.server_defined_unique_tag().empty()) {
    return;
  }
  DCHECK(specifics->has_bookmark());
  // Legacy clients don't populate the guid field in the BookmarkSpecifics, so
  // we use the originator_client_item_id instead, if it is a valid UUID.
  // Otherwise, we leave the field empty.
  if (specifics->bookmark().has_guid()) {
    LogGuidSource(BookmarkGuidSource::kSpecifics);
    return;
  }
  if (base::Uuid::ParseCaseInsensitive(
          update_entity.originator_client_item_id())
          .is_valid()) {
    // Bookmarks created around 2016, between [M44..M52) use an uppercase UUID
    // as originator client item ID, so it needs to be lowercased to adhere to
    // the invariant that UUIDs in specifics are canonicalized.
    specifics->mutable_bookmark()->set_guid(
        base::ToLowerASCII(update_entity.originator_client_item_id()));
    DCHECK(base::Uuid::ParseLowercase(specifics->bookmark().guid()).is_valid());
    LogGuidSource(BookmarkGuidSource::kValidOCII);
  } else if (update_entity.originator_cache_guid().empty() &&
             update_entity.originator_client_item_id().empty()) {
    // There's no UUID that could be inferred from empty originator
    // information.
    LogGuidSource(BookmarkGuidSource::kLeftEmptyPossiblyForClientTag);
  } else {
    specifics->mutable_bookmark()->set_guid(
        InferGuidForLegacyBookmark(update_entity.originator_cache_guid(),
                                   update_entity.originator_client_item_id()));
    DCHECK(base::Uuid::ParseLowercase(specifics->bookmark().guid()).is_valid());
    LogGuidSource(BookmarkGuidSource::kInferred);
  }
}

std::string InferGuidForLegacyBookmarkForTesting(
    const std::string& originator_cache_guid,
    const std::string& originator_client_item_id) {
  return InferGuidForLegacyBookmark(originator_cache_guid,
                                    originator_client_item_id);
}

}  // namespace syncer