1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
|
// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
#include <stddef.h>
#include <deque>
#include <limits>
#include <vector>
#include "components/zucchini/image_index.h"
#include "components/zucchini/image_utils.h"
#include "components/zucchini/targets_affinity.h"
namespace zucchini {
constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
class EncodedView;
class EquivalenceSource;
// Returns similarity score between a token (raw byte or first byte of a
// reference) in |old_image_index| at |src| and a token in |new_image_index|
// at |dst|. |targets_affinities| describes affinities for each target pool and
// is used to evaluate similarity between references, hence it's size must be
// equal to the number of pools in both |old_image_index| and |new_image_index|.
// Both |src| and |dst| must refer to tokens in |old_image_index| and
// |new_image_index|.
double GetTokenSimilarity(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
offset_t src,
offset_t dst);
// Returns a similarity score between content in |old_image_index| and
// |new_image_index| at regions described by |equivalence|, using
// |targets_affinities| to evaluate similarity between references.
double GetEquivalenceSimilarity(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
const Equivalence& equivalence);
// Extends |equivalence| forward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceForward(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
const EquivalenceCandidate& equivalence,
double min_similarity);
// Extends |equivalence| backward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceBackward(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
const EquivalenceCandidate& equivalence,
double min_similarity);
// Creates an equivalence, starting with |src| and |dst| as offset hint, and
// extends it both forward and backward, trying to maximise similarity between
// |old_image_index| and |new_image_index|, and returns the result.
// |targets_affinities| is used to evaluate similarity between references.
// |min_similarity| describes the minimum acceptable similarity score and is
// used as threshold to discard bad equivalences.
EquivalenceCandidate VisitEquivalenceSeed(
const ImageIndex& old_image_index,
const ImageIndex& new_image_index,
const std::vector<TargetsAffinity>& targets_affinities,
offset_t src,
offset_t dst,
double min_similarity);
// Container of pruned equivalences used to map offsets from |old_image| to
// offsets in |new_image|. Equivalences are pruned by cropping smaller
// equivalences to avoid overlaps, to make the equivalence map (for covered
// bytes in |old_image| and |new_image|) one-to-one.
class OffsetMapper {
public:
using const_iterator = std::deque<Equivalence>::const_iterator;
// Constructors for various data sources. "Old" and "new" image sizes are
// needed for bounds checks and to handle dangling targets.
// - From a list of |equivalences|, already sorted (by |src_offset|) and
// pruned, useful for tests.
OffsetMapper(std::deque<Equivalence>&& equivalences,
offset_t old_image_size,
offset_t new_image_size);
// - From a generator, useful for Zucchini-apply.
OffsetMapper(EquivalenceSource&& equivalence_source,
offset_t old_image_size,
offset_t new_image_size);
// - From an EquivalenceMap that needs to be processed, useful for
// Zucchini-gen.
OffsetMapper(const EquivalenceMap& equivalence_map,
offset_t old_image_size,
offset_t new_image_size);
~OffsetMapper();
size_t size() const { return equivalences_.size(); }
const_iterator begin() const { return equivalences_.begin(); }
const_iterator end() const { return equivalences_.end(); }
// Returns naive extended forward-projection of "old" |offset| that follows
// |eq|'s delta. |eq| needs not cover |offset|.
// - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
// - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
// caller must to make the check (hence "naive").
offset_t NaiveExtendedForwardProject(const Equivalence& unit,
offset_t offset) const;
// Returns an offset in |new_image| corresponding to |offset| in |old_image|.
// Assumes |equivalences_| to be non-empty. Cases:
// - If |offset| is covered (i.e., in an "old" block), then use the delta of
// the (unique) equivalence unit that covers |offset|.
// - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
// nearest "old" block, use its delta, and avert underflow / overflow by
// clamping the result to |[0, new_image_size_)|.
// - If |offset| is >= |new_image_size_| (a "fake offset"), then use
// |new_image_size_ - old_image_size_| as the delta.
offset_t ExtendedForwardProject(offset_t offset) const;
// Given sorted |offsets|, applies a projection in-place of all offsets that
// are part of a pruned equivalence from |old_image| to |new_image|. Other
// offsets are removed from |offsets|.
void ForwardProjectAll(std::deque<offset_t>* offsets) const;
// Accessor for testing.
const std::deque<Equivalence> equivalences() const { return equivalences_; }
// Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
// source location that was covered by some Equivalence would become covered
// by exactly one Equivalence. Moreover, for the offset, the equivalence
// corresponds to the largest (pre-pruning) covering Equivalence, and in case
// of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
// change in size since empty Equivalences are removed.
static void PruneEquivalencesAndSortBySource(
std::deque<Equivalence>* equivalences);
private:
// |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
// block overlaps). Also, it is sorted by "old" offsets.
std::deque<Equivalence> equivalences_;
const offset_t old_image_size_;
const offset_t new_image_size_;
};
// Container of equivalences between |old_image_index| and |new_image_index|,
// sorted by |Equivalence::dst_offset|, only used during patch generation.
class EquivalenceMap {
public:
using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
EquivalenceMap();
// Initializes the object with |equivalences|.
explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
EquivalenceMap(EquivalenceMap&&);
EquivalenceMap(const EquivalenceMap&) = delete;
~EquivalenceMap();
// Finds relevant equivalences between |old_view| and |new_view|, using
// suffix array |old_sa| computed from |old_view| and using
// |targets_affinities| to evaluate similarity between references. This
// function is not symmetric. Equivalences might overlap in |old_view|, but
// not in |new_view|. It tries to maximize accumulated similarity within each
// equivalence, while maximizing |new_view| coverage. The minimum similarity
// of an equivalence is given by |min_similarity|.
void Build(const std::vector<offset_t>& old_sa,
const EncodedView& old_view,
const EncodedView& new_view,
const std::vector<TargetsAffinity>& targets_affinities,
double min_similarity);
size_t size() const { return candidates_.size(); }
const_iterator begin() const { return candidates_.begin(); }
const_iterator end() const { return candidates_.end(); }
private:
// Discovers equivalence candidates between |old_view| and |new_view| and
// stores them in the object. Note that resulting candidates are not sorted
// and might be overlapping in new image.
void CreateCandidates(const std::vector<offset_t>& old_sa,
const EncodedView& old_view,
const EncodedView& new_view,
const std::vector<TargetsAffinity>& targets_affinities,
double min_similarity);
// Sorts candidates by their offset in new image.
void SortByDestination();
// Visits |candidates_| (sorted by |dst_offset|) and remove all destination
// overlaps. Candidates with low similarity scores are more likely to be
// shrunken. Unfit candidates may be removed.
void Prune(const EncodedView& old_view,
const EncodedView& new_view,
const std::vector<TargetsAffinity>& targets_affinities,
double min_similarity);
std::vector<EquivalenceCandidate> candidates_;
};
} // namespace zucchini
#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
|