File: passages_util_unittest.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (124 lines) | stat: -rw-r--r-- 4,487 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/history_embeddings/passages_util.h"

#include <memory>

#include "base/logging.h"
#include "base/rand_util.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/timer/elapsed_timer.h"
#include "components/history_embeddings/proto/history_embeddings.pb.h"
#include "components/os_crypt/async/browser/test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace history_embeddings {

namespace {

std::string RandomPassage() {
  constexpr char kLoremIpsum[] =
      "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "
      "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim "
      "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea "
      "commodo consequat. Duis aute irure dolor in reprehenderit in voluptate "
      "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint "
      "occaecat cupidatat non proident, sunt in culpa qui officia deserunt "
      "mollit anim id est laborum.";
  static auto kLoremIpsumPieces = base::SplitStringPiece(
      kLoremIpsum, " ,.", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

  constexpr size_t kWordCount = 200u;
  std::vector<std::string> pieces;
  pieces.reserve(kWordCount);
  for (size_t i = 0; i < kWordCount; i++) {
    pieces.emplace_back(
        kLoremIpsumPieces[base::RandInt(0, kLoremIpsumPieces.size() - 1)]);
  }
  return base::JoinString(pieces, " ");
}

}  // namespace

using HistoryEmbeddingsPassagesUtilTest = testing::Test;

// Note: Disabled by default so as to not burden the bots. Enable when needed.
TEST_F(HistoryEmbeddingsPassagesUtilTest,
       DISABLED_EncryptDecryptMicrobenchmark) {
  constexpr size_t kPassageCount = 1000u;
  std::vector<std::string> passages;
  passages.reserve(kPassageCount);
  for (size_t i = 0; i < kPassageCount; i++) {
    passages.push_back(RandomPassage());
  }

  base::ElapsedTimer encrypt_timer;
  std::vector<std::string> encrypted;
  encrypted.reserve(kPassageCount);
  const auto encryptor = os_crypt_async::GetTestEncryptorForTesting();
  for (size_t i = 0; i < kPassageCount; i++) {
    std::string ciphertext;
    ASSERT_TRUE(encryptor.EncryptString(passages[i], &ciphertext));
    EXPECT_NE(passages[i], ciphertext);

    EXPECT_LT(ciphertext.size(), passages[i].size() * 2)
        << "Verify that the encryption doesn't expand the size of the original "
           "passage by more than 2x.";

    encrypted.push_back(std::move(ciphertext));
  }
  LOG(INFO) << "Encrypted " << kPassageCount << " passages in "
            << encrypt_timer.Elapsed();

  base::ElapsedTimer decrypt_timer;
  for (size_t i = 0; i < kPassageCount; i++) {
    std::string decrypted_plaintext;
    ASSERT_TRUE(encryptor.DecryptString(encrypted[i], &decrypted_plaintext));
    EXPECT_EQ(decrypted_plaintext, passages[i]);
  }
  LOG(INFO) << "Decrypted " << kPassageCount << " passages in "
            << decrypt_timer.Elapsed();
}

TEST_F(HistoryEmbeddingsPassagesUtilTest, ProtoToBlobAndBack) {
  const auto encryptor = os_crypt_async::GetTestEncryptorForTesting();
  constexpr int kPassageCount = 50u;
  proto::PassagesValue original_proto;

  size_t total_passage_size = 0;

  for (int i = 0; i < kPassageCount; i++) {
    std::string passage = RandomPassage();
    total_passage_size += passage.size();
    original_proto.add_passages(std::move(passage));
  }
  ASSERT_EQ(kPassageCount, original_proto.passages_size());

  std::vector<uint8_t> blob;

  base::ElapsedTimer proto_to_blob_timer;
  blob = PassagesProtoToBlob(original_proto, encryptor);
  ASSERT_FALSE(blob.empty());

  LOG(INFO) << "Proto to Blob in: " << proto_to_blob_timer.Elapsed();
  LOG(INFO) << "Blob size: " << blob.size();
  LOG(INFO) << "Total passages size: " << total_passage_size;

  std::optional<proto::PassagesValue> read_proto;
  base::ElapsedTimer blob_to_proto_timer;
  read_proto = PassagesBlobToProto(blob, encryptor);
  ASSERT_TRUE(read_proto.has_value());

  LOG(INFO) << "Blob to Proto in: " << blob_to_proto_timer.Elapsed();

  // Now verify that every single passage is restored.
  ASSERT_EQ(kPassageCount, read_proto->passages_size());
  for (int i = 0; i < kPassageCount; i++) {
    EXPECT_EQ(read_proto->passages().at(i), original_proto.passages().at(i));
  }
}

}  // namespace history_embeddings