1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
|
// SPDX-License-Identifier: CDDL-1.0
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
* Copyright (c) 2023, Klara Inc.
*/
#ifndef _SYS_DDT_IMPL_H
#define _SYS_DDT_IMPL_H
#include <sys/ddt.h>
#include <sys/bitops.h>
#ifdef __cplusplus
extern "C" {
#endif
/* DDT version numbers */
#define DDT_VERSION_LEGACY (0)
#define DDT_VERSION_FDT (1)
/* Dummy version to signal that configure is still necessary */
#define DDT_VERSION_UNCONFIGURED (UINT64_MAX)
/* Names of interesting objects in the DDT root dir */
#define DDT_DIR_VERSION "version"
#define DDT_DIR_FLAGS "flags"
/* Fill a lightweight entry from a live entry. */
#define DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, ddlwe) do { \
memset((ddlwe), 0, sizeof (*ddlwe)); \
(ddlwe)->ddlwe_key = (dde)->dde_key; \
(ddlwe)->ddlwe_type = (dde)->dde_type; \
(ddlwe)->ddlwe_class = (dde)->dde_class; \
memcpy(&(ddlwe)->ddlwe_phys, (dde)->dde_phys, DDT_PHYS_SIZE(ddt)); \
} while (0)
#define DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe) do { \
memset((ddlwe), 0, sizeof (*ddlwe)); \
(ddlwe)->ddlwe_key = (ddle)->ddle_key; \
(ddlwe)->ddlwe_type = (ddle)->ddle_type; \
(ddlwe)->ddlwe_class = (ddle)->ddle_class; \
memcpy(&(ddlwe)->ddlwe_phys, (ddle)->ddle_phys, DDT_PHYS_SIZE(ddt)); \
} while (0)
/*
* An entry on the log tree. These are "frozen", and a record of what's in
* the on-disk log. They can't be used in place, but can be "loaded" back into
* the live tree.
*/
typedef struct {
ddt_key_t ddle_key; /* ddt_log_tree key */
avl_node_t ddle_node; /* ddt_log_tree node */
ddt_type_t ddle_type; /* storage type */
ddt_class_t ddle_class; /* storage class */
/* extra allocation for flat/trad phys */
ddt_univ_phys_t ddle_phys[];
} ddt_log_entry_t;
/* On-disk log record types. */
typedef enum {
DLR_INVALID = 0, /* end of block marker */
DLR_ENTRY = 1, /* an entry to add or replace in the log tree */
} ddt_log_record_type_t;
/* On-disk log record header. */
typedef struct {
/*
* dlr_info is a packed u64, use the DLR_GET/DLR_SET macros below to
* access it.
*
* bits 0-7: record type (ddt_log_record_type_t)
* bits 8-15: length of record header+payload
* bits 16-47: reserved, all zero
* bits 48-55: if type==DLR_ENTRY, storage type (ddt_type)
* otherwise all zero
* bits 56-63: if type==DLR_ENTRY, storage class (ddt_class)
* otherwise all zero
*/
uint64_t dlr_info;
uint8_t dlr_payload[];
} ddt_log_record_t;
#define DLR_GET_TYPE(dlr) BF64_GET((dlr)->dlr_info, 0, 8)
#define DLR_SET_TYPE(dlr, v) BF64_SET((dlr)->dlr_info, 0, 8, v)
#define DLR_GET_RECLEN(dlr) BF64_GET((dlr)->dlr_info, 8, 16)
#define DLR_SET_RECLEN(dlr, v) BF64_SET((dlr)->dlr_info, 8, 16, v)
#define DLR_GET_ENTRY_TYPE(dlr) BF64_GET((dlr)->dlr_info, 48, 8)
#define DLR_SET_ENTRY_TYPE(dlr, v) BF64_SET((dlr)->dlr_info, 48, 8, v)
#define DLR_GET_ENTRY_CLASS(dlr) BF64_GET((dlr)->dlr_info, 56, 8)
#define DLR_SET_ENTRY_CLASS(dlr, v) BF64_SET((dlr)->dlr_info, 56, 8, v)
/* Payload for DLR_ENTRY. */
typedef struct {
ddt_key_t dlre_key;
ddt_univ_phys_t dlre_phys[];
} ddt_log_record_entry_t;
/* Log flags (ddl_flags, dlh_flags) */
#define DDL_FLAG_FLUSHING (1 << 0) /* this log is being flushed */
#define DDL_FLAG_CHECKPOINT (1 << 1) /* header has a checkpoint */
/* On-disk log header, stored in the bonus buffer. */
typedef struct {
/*
* dlh_info is a packed u64, use the DLH_GET/DLH_SET macros below to
* access it.
*
* bits 0-7: log version
* bits 8-15: log flags
* bits 16-63: reserved, all zero
*/
uint64_t dlh_info;
uint64_t dlh_length; /* log size in bytes */
uint64_t dlh_first_txg; /* txg this log went active */
ddt_key_t dlh_checkpoint; /* last checkpoint */
} ddt_log_header_t;
#define DLH_GET_VERSION(dlh) BF64_GET((dlh)->dlh_info, 0, 8)
#define DLH_SET_VERSION(dlh, v) BF64_SET((dlh)->dlh_info, 0, 8, v)
#define DLH_GET_FLAGS(dlh) BF64_GET((dlh)->dlh_info, 8, 8)
#define DLH_SET_FLAGS(dlh, v) BF64_SET((dlh)->dlh_info, 8, 8, v)
/* DDT log update state */
typedef struct {
dmu_tx_t *dlu_tx; /* tx the update is being applied to */
dnode_t *dlu_dn; /* log object dnode */
dmu_buf_t **dlu_dbp; /* array of block buffer pointers */
int dlu_ndbp; /* number of block buffer pointers */
uint16_t dlu_reclen; /* cached length of record */
uint64_t dlu_block; /* block for next entry */
uint64_t dlu_offset; /* offset for next entry */
} ddt_log_update_t;
/*
* Ops vector to access a specific DDT object type.
*/
typedef struct {
char ddt_op_name[32];
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
int (*ddt_op_lookup)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, void *phys, size_t psize);
int (*ddt_op_contains)(objset_t *os, uint64_t object,
const ddt_key_t *ddk);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
const ddt_key_t *ddk);
void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object);
int (*ddt_op_update)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, const void *phys, size_t psize,
dmu_tx_t *tx);
int (*ddt_op_remove)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
ddt_key_t *ddk, void *phys, size_t psize);
int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
} ddt_ops_t;
extern const ddt_ops_t ddt_zap_ops;
/* Dedup log API */
extern void ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx,
ddt_log_update_t *dlu);
extern void ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *dde,
ddt_log_update_t *dlu);
extern void ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu);
extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,
ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
ddt_lightweight_entry_t *ddlwe);
extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,
const ddt_key_t *ddk);
extern void ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe,
dmu_tx_t *tx);
extern void ddt_log_truncate(ddt_t *ddt, dmu_tx_t *tx);
extern boolean_t ddt_log_swap(ddt_t *ddt, dmu_tx_t *tx);
extern void ddt_log_destroy(ddt_t *ddt, dmu_tx_t *tx);
extern int ddt_log_load(ddt_t *ddt);
extern void ddt_log_alloc(ddt_t *ddt);
extern void ddt_log_free(ddt_t *ddt);
extern void ddt_log_init(void);
extern void ddt_log_fini(void);
/*
* These are only exposed so that zdb can access them. Try not to use them
* outside of the DDT implementation proper, and if you do, consider moving
* them up.
*/
/*
* We use a histogram to convert a percentage request into a
* cutoff value where entries older than the cutoff get pruned.
*
* The histogram bins represent hours in power-of-two increments.
* 16 bins covers up to four years.
*/
#define HIST_BINS 16
typedef struct ddt_age_histo {
uint64_t dah_entries;
uint64_t dah_age_histo[HIST_BINS];
} ddt_age_histo_t;
void ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram);
#if defined(_KERNEL) || !defined(ZFS_DEBUG)
#define ddt_dump_age_histogram(histo, cutoff) ((void)0)
#else
static inline void
ddt_dump_age_histogram(ddt_age_histo_t *histogram, uint64_t cutoff)
{
if (histogram->dah_entries == 0)
return;
(void) printf("DDT prune unique class age, %llu hour cutoff\n",
(u_longlong_t)(gethrestime_sec() - cutoff)/3600);
(void) printf("%5s %9s %4s\n", "age", "blocks", "amnt");
(void) printf("%5s %9s %4s\n", "-----", "---------", "----");
for (int i = 0; i < HIST_BINS; i++) {
(void) printf("%5d %9llu %4d%%\n", 1<<i,
(u_longlong_t)histogram->dah_age_histo[i],
(int)((histogram->dah_age_histo[i] * 100) /
histogram->dah_entries));
}
}
#endif
/*
* Enough room to expand DMU_POOL_DDT format for all possible DDT
* checksum/class/type combinations.
*/
#define DDT_NAMELEN 32
extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt,
const ddt_univ_phys_t *ddp);
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
char *name);
extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
uint64_t *walk, ddt_lightweight_entry_t *ddlwe);
extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
uint64_t *count);
extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
dmu_object_info_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DDT_H */
|