1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/history_embeddings/passages_util.h"
#include <memory>
#include "base/logging.h"
#include "base/rand_util.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/timer/elapsed_timer.h"
#include "components/history_embeddings/proto/history_embeddings.pb.h"
#include "components/os_crypt/async/browser/test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace history_embeddings {
namespace {
std::string RandomPassage() {
constexpr char kLoremIpsum[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "
"tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim "
"veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea "
"commodo consequat. Duis aute irure dolor in reprehenderit in voluptate "
"velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint "
"occaecat cupidatat non proident, sunt in culpa qui officia deserunt "
"mollit anim id est laborum.";
static auto kLoremIpsumPieces = base::SplitStringPiece(
kLoremIpsum, " ,.", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
constexpr size_t kWordCount = 200u;
std::vector<std::string> pieces;
pieces.reserve(kWordCount);
for (size_t i = 0; i < kWordCount; i++) {
pieces.emplace_back(
kLoremIpsumPieces[base::RandInt(0, kLoremIpsumPieces.size() - 1)]);
}
return base::JoinString(pieces, " ");
}
} // namespace
using HistoryEmbeddingsPassagesUtilTest = testing::Test;
// Note: Disabled by default so as to not burden the bots. Enable when needed.
TEST_F(HistoryEmbeddingsPassagesUtilTest,
DISABLED_EncryptDecryptMicrobenchmark) {
constexpr size_t kPassageCount = 1000u;
std::vector<std::string> passages;
passages.reserve(kPassageCount);
for (size_t i = 0; i < kPassageCount; i++) {
passages.push_back(RandomPassage());
}
base::ElapsedTimer encrypt_timer;
std::vector<std::string> encrypted;
encrypted.reserve(kPassageCount);
const auto encryptor = os_crypt_async::GetTestEncryptorForTesting();
for (size_t i = 0; i < kPassageCount; i++) {
std::string ciphertext;
ASSERT_TRUE(encryptor.EncryptString(passages[i], &ciphertext));
EXPECT_NE(passages[i], ciphertext);
EXPECT_LT(ciphertext.size(), passages[i].size() * 2)
<< "Verify that the encryption doesn't expand the size of the original "
"passage by more than 2x.";
encrypted.push_back(std::move(ciphertext));
}
LOG(INFO) << "Encrypted " << kPassageCount << " passages in "
<< encrypt_timer.Elapsed();
base::ElapsedTimer decrypt_timer;
for (size_t i = 0; i < kPassageCount; i++) {
std::string decrypted_plaintext;
ASSERT_TRUE(encryptor.DecryptString(encrypted[i], &decrypted_plaintext));
EXPECT_EQ(decrypted_plaintext, passages[i]);
}
LOG(INFO) << "Decrypted " << kPassageCount << " passages in "
<< decrypt_timer.Elapsed();
}
TEST_F(HistoryEmbeddingsPassagesUtilTest, ProtoToBlobAndBack) {
const auto encryptor = os_crypt_async::GetTestEncryptorForTesting();
constexpr int kPassageCount = 50u;
proto::PassagesValue original_proto;
size_t total_passage_size = 0;
for (int i = 0; i < kPassageCount; i++) {
std::string passage = RandomPassage();
total_passage_size += passage.size();
original_proto.add_passages(std::move(passage));
}
ASSERT_EQ(kPassageCount, original_proto.passages_size());
std::vector<uint8_t> blob;
base::ElapsedTimer proto_to_blob_timer;
blob = PassagesProtoToBlob(original_proto, encryptor);
ASSERT_FALSE(blob.empty());
LOG(INFO) << "Proto to Blob in: " << proto_to_blob_timer.Elapsed();
LOG(INFO) << "Blob size: " << blob.size();
LOG(INFO) << "Total passages size: " << total_passage_size;
std::optional<proto::PassagesValue> read_proto;
base::ElapsedTimer blob_to_proto_timer;
read_proto = PassagesBlobToProto(blob, encryptor);
ASSERT_TRUE(read_proto.has_value());
LOG(INFO) << "Blob to Proto in: " << blob_to_proto_timer.Elapsed();
// Now verify that every single passage is restored.
ASSERT_EQ(kPassageCount, read_proto->passages_size());
for (int i = 0; i < kPassageCount; i++) {
EXPECT_EQ(read_proto->passages().at(i), original_proto.passages().at(i));
}
}
} // namespace history_embeddings
|