File: posting_list_table.cc

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (177 lines) | stat: -rw-r--r-- 6,441 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chromeos/ash/components/file_manager/indexing/posting_list_table.h"

#include "sql/statement.h"

namespace ash::file_manager {

namespace {

#define POSTING_LIST_TABLE "posting_list_table"
#define TERM_ID "term_id"
#define URL_ID "url_id"
#define POSTING_LIST_INDEX "posting_list_index"
#define URL_ID_INDEX "url_id_index"

// The statement used to create the posting list table.
static constexpr char kCreatePostingListTableQuery[] =
    // clang-format off
    "CREATE TABLE IF NOT EXISTS " POSTING_LIST_TABLE "("
      TERM_ID " INTEGER NOT NULL,"
      URL_ID " INTEGER NOT NULL,"
      "FOREIGN KEY(" TERM_ID ") REFERENCES term_table(" TERM_ID "),"
      "FOREIGN KEY(" URL_ID ") REFERENCES file_info_table(url_id),"
      "PRIMARY KEY (" TERM_ID ", " URL_ID "))";
// clang-format on

// The statement that creates an inverted index from term_id to
// url_id. This facilitates quick retrieval of all URL IDs for the given
// term.
static constexpr char kCreatePostingIndexQuery[] =
    // clang-format off
    "CREATE INDEX IF NOT EXISTS " POSTING_LIST_INDEX " ON "
    POSTING_LIST_TABLE "(" TERM_ID ")";
// clang-format on

// The statement that creates an plain index from URL IDs to term IDs.
// This facilitates quick retrieval of all terms associated with the
// given URL ID (and thus, a file).
static constexpr char kCreateUrlIndexQuery[] =
    // clang-format off
    "CREATE INDEX IF NOT EXISTS " URL_ID_INDEX " ON "
    POSTING_LIST_TABLE "(" URL_ID ")";
// clang-format on

// The statement used to insert a new association between the term ID
// and the URL ID.
static constexpr char kInsertAssociationQuery[] =
    // clang-format off
    "INSERT OR IGNORE INTO " POSTING_LIST_TABLE "(" TERM_ID ", "
    URL_ID ") VALUES (?, ?)";
// clang-format on

// The statement used to delete an association between the term ID
// and the URL ID.
static constexpr char kDeleteAssociationQuery[] =
    // clang-format off
    "DELETE FROM " POSTING_LIST_TABLE " WHERE " TERM_ID "=? "
    "AND " URL_ID "=?";
// clang-format on

// A query that fetches all URL IDs for the given term ID. This
// query utilizes the posting_list_index.
static constexpr char kGetUrlIdsForTermQuery[] =
    // clang-format off
    "SELECT " URL_ID " FROM " POSTING_LIST_TABLE " INDEXED BY "
    POSTING_LIST_INDEX " WHERE " TERM_ID "=?";
// clang-format on

// A query that fetches all term IDs for the given URL ID.
// This query utilizes the url_id_index.
static constexpr char kGetTermIdsForUrlQuery[] =
    // clang-format off
    "SELECT " TERM_ID " FROM " POSTING_LIST_TABLE " INDEXED BY "
    URL_ID_INDEX " WHERE " URL_ID "=?";
// clang-format on
}  // namespace

PostingListTable::PostingListTable(sql::Database* db) : db_(db) {}
PostingListTable::~PostingListTable() = default;

bool PostingListTable::Init() {
  if (!db_->is_open()) {
    LOG(WARNING) << "Faield to initialize " << POSTING_LIST_TABLE
                 << "due to closed database";
    return false;
  }
  sql::Statement create_table(
      db_->GetCachedStatement(SQL_FROM_HERE, kCreatePostingListTableQuery));
  DCHECK(create_table.is_valid()) << "Invalid create table statement: \""
                                  << create_table.GetSQLStatement() << "\"";
  if (!create_table.Run()) {
    LOG(ERROR) << "Failed to create table " << POSTING_LIST_TABLE;
    return false;
  }
  sql::Statement create_posting_index(
      db_->GetCachedStatement(SQL_FROM_HERE, kCreatePostingIndexQuery));
  DCHECK(create_posting_index.is_valid())
      << "Invalid create index statement: \""
      << create_posting_index.GetSQLStatement() << "\"";
  if (!create_posting_index.Run()) {
    LOG(ERROR) << "Failed to create posting index";
    return false;
  }
  sql::Statement create_url_index(
      db_->GetCachedStatement(SQL_FROM_HERE, kCreateUrlIndexQuery));
  DCHECK(create_url_index.is_valid())
      << "Invalid create index statement: \""
      << create_url_index.GetSQLStatement() << "\"";
  if (!create_url_index.Run()) {
    LOG(ERROR) << "Failed to create url index";
    return false;
  }
  return true;
}

size_t PostingListTable::AddToPostingList(int64_t term_id, int64_t url_id) {
  sql::Statement add_association(
      db_->GetCachedStatement(SQL_FROM_HERE, kInsertAssociationQuery));
  DCHECK(add_association.is_valid())
      << "Invalid insert statement: \"" << add_association.GetSQLStatement()
      << "\"";
  add_association.BindInt64(0, term_id);
  add_association.BindInt64(1, url_id);
  if (!add_association.Run()) {
    LOG(ERROR) << "Failed to create association between term and URL";
    return 0;
  }
  return db_->GetLastChangeCount();
}

size_t PostingListTable::DeleteFromPostingList(int64_t term_id,
                                               int64_t url_id) {
  sql::Statement delete_association(
      db_->GetCachedStatement(SQL_FROM_HERE, kDeleteAssociationQuery));
  DCHECK(delete_association.is_valid())
      << "Invalid delete statement: \"" << delete_association.GetSQLStatement()
      << "\"";
  delete_association.BindInt64(0, term_id);
  delete_association.BindInt64(1, url_id);
  if (!delete_association.Run()) {
    LOG(ERROR) << "Failed to delete association between term and URL";
    return 0;
  }
  return db_->GetLastChangeCount();
}

std::set<int64_t> PostingListTable::GetUrlIdsForTerm(int64_t term_id) const {
  sql::Statement get_url_ids(
      db_->GetCachedStatement(SQL_FROM_HERE, kGetUrlIdsForTermQuery));
  DCHECK(get_url_ids.is_valid()) << "Invalid select statement: \""
                                 << get_url_ids.GetSQLStatement() << "\"";
  get_url_ids.BindInt64(0, term_id);
  std::set<int64_t> url_ids;
  while (get_url_ids.Step()) {
    url_ids.emplace(get_url_ids.ColumnInt64(0));
  }
  return url_ids;
}

const std::set<int64_t> PostingListTable::GetTermIdsForUrl(
    int64_t url_id) const {
  sql::Statement get_term_ids(
      db_->GetCachedStatement(SQL_FROM_HERE, kGetTermIdsForUrlQuery));
  DCHECK(get_term_ids.is_valid()) << "Invalid select statement: \""
                                  << get_term_ids.GetSQLStatement() << "\"";
  get_term_ids.BindInt64(0, url_id);
  std::set<int64_t> term_ids;
  while (get_term_ids.Step()) {
    term_ids.emplace(get_term_ids.ColumnInt64(0));
  }
  return term_ids;
}

}  // namespace ash::file_manager