1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <type_traits>
#include "block.h"
#include "block_cache.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/reader_common.h"
// The file contains some member functions of BlockBasedTable that
// cannot be implemented in block_based_table_reader.cc because
// it's called by other files (e.g. block_based_iterator.h) and
// are templates.
namespace ROCKSDB_NAMESPACE {
namespace {
using IterPlaceholderCacheInterface =
PlaceholderCacheInterface<CacheEntryRole::kMisc>;
template <typename TBlockIter>
struct IterTraits {};
template <>
struct IterTraits<DataBlockIter> {
using IterBlocklike = Block_kData;
};
template <>
struct IterTraits<IndexBlockIter> {
using IterBlocklike = Block_kIndex;
};
} // namespace
// Convert an index iterator value (i.e., an encoded BlockHandle)
// into an iterator over the contents of the corresponding block.
// If input_iter is null, new a iterator
// If input_iter is not null, update this iter and return it
template <typename TBlockIter>
TBlockIter* BlockBasedTable::NewDataBlockIterator(
const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter,
BlockType block_type, GetContext* get_context,
BlockCacheLookupContext* lookup_context,
FilePrefetchBuffer* prefetch_buffer, bool for_compaction, bool async_read,
Status& s, bool use_block_cache_for_lookup) const {
using IterBlocklike = typename IterTraits<TBlockIter>::IterBlocklike;
PERF_TIMER_GUARD(new_table_block_iter_nanos);
TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
if (!s.ok()) {
iter->Invalidate(s);
return iter;
}
CachableEntry<Block> block;
if (rep_->uncompression_dict_reader && block_type == BlockType::kData) {
CachableEntry<UncompressionDict> uncompression_dict;
// For async scans, don't use the prefetch buffer since an async prefetch
// might already be under way and this would invalidate it. Also, the
// uncompression dict is typically at the end of the file and would
// most likely break the sequentiality of the access pattern.
// Same is with auto_readahead_size. It iterates over index to lookup for
// data blocks. And this could break the the sequentiality of the access
// pattern.
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
((ro.async_io || ro.auto_readahead_size) ? nullptr : prefetch_buffer),
ro, get_context, lookup_context, &uncompression_dict);
if (!s.ok()) {
iter->Invalidate(s);
return iter;
}
const UncompressionDict& dict = uncompression_dict.GetValue()
? *uncompression_dict.GetValue()
: UncompressionDict::GetEmptyDict();
s = RetrieveBlock(
prefetch_buffer, ro, handle, dict, &block.As<IterBlocklike>(),
get_context, lookup_context, for_compaction,
/* use_cache */ true, async_read, use_block_cache_for_lookup);
} else {
s = RetrieveBlock(
prefetch_buffer, ro, handle, UncompressionDict::GetEmptyDict(),
&block.As<IterBlocklike>(), get_context, lookup_context, for_compaction,
/* use_cache */ true, async_read, use_block_cache_for_lookup);
}
if (s.IsTryAgain() && async_read) {
return iter;
}
if (!s.ok()) {
assert(block.IsEmpty());
iter->Invalidate(s);
return iter;
}
assert(block.GetValue() != nullptr);
// Block contents are pinned and it is still pinned after the iterator
// is destroyed as long as cleanup functions are moved to another object,
// when:
// 1. block cache handle is set to be released in cleanup function, or
// 2. it's pointing to immortal source. If own_bytes is true then we are
// not reading data from the original source, whether immortal or not.
// Otherwise, the block is pinned iff the source is immortal.
const bool block_contents_pinned =
block.IsCached() ||
(!block.GetValue()->own_bytes() && rep_->immortal_table);
iter = InitBlockIterator<TBlockIter>(rep_, block.GetValue(), block_type, iter,
block_contents_pinned);
if (!block.IsCached()) {
if (!ro.fill_cache) {
IterPlaceholderCacheInterface block_cache{
rep_->table_options.block_cache.get()};
if (block_cache) {
// insert a dummy record to block cache to track the memory usage
Cache::Handle* cache_handle = nullptr;
CacheKey key =
CacheKey::CreateUniqueForCacheLifetime(block_cache.get());
s = block_cache.Insert(key.AsSlice(),
block.GetValue()->ApproximateMemoryUsage(),
&cache_handle);
if (s.ok()) {
assert(cache_handle != nullptr);
iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache.get(),
cache_handle);
}
}
}
} else {
iter->SetCacheHandle(block.GetCacheHandle());
}
block.TransferTo(iter);
return iter;
}
// Convert an uncompressed data block (i.e CachableEntry<Block>)
// into an iterator over the contents of the corresponding block.
// If input_iter is null, new a iterator
// If input_iter is not null, update this iter and return it
template <typename TBlockIter>
TBlockIter* BlockBasedTable::NewDataBlockIterator(const ReadOptions& ro,
CachableEntry<Block>& block,
TBlockIter* input_iter,
Status s) const {
PERF_TIMER_GUARD(new_table_block_iter_nanos);
TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
if (!s.ok()) {
iter->Invalidate(s);
return iter;
}
assert(block.GetValue() != nullptr);
// Block contents are pinned and it is still pinned after the iterator
// is destroyed as long as cleanup functions are moved to another object,
// when:
// 1. block cache handle is set to be released in cleanup function, or
// 2. it's pointing to immortal source. If own_bytes is true then we are
// not reading data from the original source, whether immortal or not.
// Otherwise, the block is pinned iff the source is immortal.
const bool block_contents_pinned =
block.IsCached() ||
(!block.GetValue()->own_bytes() && rep_->immortal_table);
iter = InitBlockIterator<TBlockIter>(rep_, block.GetValue(), BlockType::kData,
iter, block_contents_pinned);
if (!block.IsCached()) {
if (!ro.fill_cache) {
IterPlaceholderCacheInterface block_cache{
rep_->table_options.block_cache.get()};
if (block_cache) {
// insert a dummy record to block cache to track the memory usage
Cache::Handle* cache_handle = nullptr;
CacheKey key =
CacheKey::CreateUniqueForCacheLifetime(block_cache.get());
s = block_cache.Insert(key.AsSlice(),
block.GetValue()->ApproximateMemoryUsage(),
&cache_handle);
if (s.ok()) {
assert(cache_handle != nullptr);
iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache.get(),
cache_handle);
}
}
}
} else {
iter->SetCacheHandle(block.GetCacheHandle());
}
block.TransferTo(iter);
return iter;
}
} // namespace ROCKSDB_NAMESPACE
|