1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "sandbox/linux/bpf_dsl/codegen.h"
#include <stddef.h>
#include <stdint.h>
#include <limits>
#include <ostream>
#include <utility>
#include "base/check_op.h"
#include "sandbox/linux/system_headers/linux_filter.h"
// This CodeGen implementation strives for simplicity while still
// generating acceptable BPF programs under typical usage patterns
// (e.g., by PolicyCompiler).
//
// The key to its simplicity is that BPF programs only support forward
// jumps/branches, which allows constraining the DAG construction API
// to make instruction nodes immutable. Immutable nodes admits a
// simple greedy approach of emitting new instructions as needed and
// then reusing existing ones that have already been emitted. This
// cleanly avoids any need to compute basic blocks or apply
// topological sorting because the API effectively sorts instructions
// for us (e.g., before MakeInstruction() can be called to emit a
// branch instruction, it must have already been called for each
// branch path).
//
// This greedy algorithm is not without (theoretical) weakness though:
//
// 1. In the general case, we don't eliminate dead code. If needed,
// we could trace back through the program in Compile() and elide
// any unneeded instructions, but in practice we only emit live
// instructions anyway.
//
// 2. By not dividing instructions into basic blocks and sorting, we
// lose an opportunity to move non-branch/non-return instructions
// adjacent to their successor instructions, which means we might
// need to emit additional jumps. But in practice, they'll
// already be nearby as long as callers don't go out of their way
// to interleave MakeInstruction() calls for unrelated code
// sequences.
namespace sandbox {
// kBranchRange is the maximum value that can be stored in
// sock_filter's 8-bit jt and jf fields.
const size_t kBranchRange = std::numeric_limits<uint8_t>::max();
const CodeGen::Node CodeGen::kNullNode;
CodeGen::CodeGen() : program_(), equivalent_(), memos_() {
}
CodeGen::~CodeGen() {
}
CodeGen::Program CodeGen::Compile(CodeGen::Node head) {
return Program(program_.rbegin() + Offset(head), program_.rend());
}
CodeGen::Node CodeGen::MakeInstruction(uint16_t code,
uint32_t k,
Node jt,
Node jf) {
// To avoid generating redundant code sequences, we memoize the
// results from AppendInstruction().
auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));
CodeGen::Node* node = &res.first->second;
if (res.second) { // Newly inserted memo entry.
*node = AppendInstruction(code, k, jt, jf);
}
return *node;
}
CodeGen::Node CodeGen::AppendInstruction(uint16_t code,
uint32_t k,
Node jt,
Node jf) {
if (BPF_CLASS(code) == BPF_JMP) {
CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";
// Optimally adding jumps is rather tricky, so we use a quick
// approximation: by artificially reducing |jt|'s range, |jt| will
// stay within its true range even if we add a jump for |jf|.
jt = WithinRange(jt, kBranchRange - 1);
jf = WithinRange(jf, kBranchRange);
return Append(code, k, Offset(jt), Offset(jf));
}
CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";
if (BPF_CLASS(code) == BPF_RET) {
CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";
} else {
// For non-branch/non-return instructions, execution always
// proceeds to the next instruction; so we need to arrange for
// that to be |jt|.
jt = WithinRange(jt, 0);
CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction";
}
return Append(code, k, 0, 0);
}
CodeGen::Node CodeGen::WithinRange(Node target, size_t range) {
// Just use |target| if it's already within range.
if (Offset(target) <= range) {
return target;
}
// Alternatively, look for an equivalent instruction within range.
if (Offset(equivalent_.at(target)) <= range) {
return equivalent_.at(target);
}
// Otherwise, fall back to emitting a jump instruction.
Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0);
equivalent_.at(target) = jump;
return jump;
}
CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {
if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
CHECK_LE(jt, kBranchRange);
CHECK_LE(jf, kBranchRange);
} else {
CHECK_EQ(0U, jt);
CHECK_EQ(0U, jf);
}
CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));
CHECK_EQ(program_.size(), equivalent_.size());
Node res = program_.size();
program_.push_back(sock_filter{
code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k});
equivalent_.push_back(res);
return res;
}
size_t CodeGen::Offset(Node target) const {
CHECK_LT(target, program_.size()) << "Bogus offset target node";
return (program_.size() - 1) - target;
}
} // namespace sandbox
|