1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
|
//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines basic, non-domain-specific mechanisms for tracking tainted values.
//
//===----------------------------------------------------------------------===//
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
using namespace clang;
using namespace ento;
using namespace taint;
// Fully tainted symbols.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
// Partially tainted symbols.
REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
TaintTagType)
REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
const char *Sep) {
TaintMapTy TM = State->get<TaintMap>();
if (!TM.isEmpty())
Out << "Tainted symbols:" << NL;
for (const auto &I : TM)
Out << I.first << " : " << I.second << NL;
}
void taint::dumpTaint(ProgramStateRef State) {
printTaint(State, llvm::errs());
}
ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx,
TaintTagType Kind) {
return addTaint(State, State->getSVal(S, LCtx), Kind);
}
ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
TaintTagType Kind) {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return addTaint(State, Sym, Kind);
// If the SVal represents a structure, try to mass-taint all values within the
// structure. For now it only works efficiently on lazy compound values that
// were conjured during a conservative evaluation of a function - either as
// return values of functions that return structures or arrays by value, or as
// values of structures or arrays passed into the function by reference,
// directly or through pointer aliasing. Such lazy compound values are
// characterized by having exactly one binding in their captured store within
// their parent region, which is a conjured symbol default-bound to the base
// region of the parent region.
if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
if (Optional<SVal> binding =
State->getStateManager().getStoreManager().getDefaultBinding(
*LCV)) {
if (SymbolRef Sym = binding->getAsSymbol())
return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
}
}
const MemRegion *R = V.getAsRegion();
return addTaint(State, R, Kind);
}
ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
TaintTagType Kind) {
if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
return addTaint(State, SR->getSymbol(), Kind);
return State;
}
ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
TaintTagType Kind) {
// If this is a symbol cast, remove the cast before adding the taint. Taint
// is cast agnostic.
while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
Sym = SC->getOperand();
ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
assert(NewState);
return NewState;
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return removeTaint(State, Sym);
const MemRegion *R = V.getAsRegion();
return removeTaint(State, R);
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
return removeTaint(State, SR->getSymbol());
return State;
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
// If this is a symbol cast, remove the cast before adding the taint. Taint
// is cast agnostic.
while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
Sym = SC->getOperand();
ProgramStateRef NewState = State->remove<TaintMap>(Sym);
assert(NewState);
return NewState;
}
ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
SymbolRef ParentSym,
const SubRegion *SubRegion,
TaintTagType Kind) {
// Ignore partial taint if the entire parent symbol is already tainted.
if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
if (*T == Kind)
return State;
// Partial taint applies if only a portion of the symbol is tainted.
if (SubRegion == SubRegion->getBaseRegion())
return addTaint(State, ParentSym, Kind);
const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
Regs = F.add(Regs, SubRegion, Kind);
ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
assert(NewState);
return NewState;
}
bool taint::isTainted(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx, TaintTagType Kind) {
SVal val = State->getSVal(S, LCtx);
return isTainted(State, val, Kind);
}
bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
if (SymbolRef Sym = V.getAsSymbol())
return isTainted(State, Sym, Kind);
if (const MemRegion *Reg = V.getAsRegion())
return isTainted(State, Reg, Kind);
return false;
}
bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
TaintTagType K) {
if (!Reg)
return false;
// Element region (array element) is tainted if either the base or the offset
// are tainted.
if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
return isTainted(State, ER->getSuperRegion(), K) ||
isTainted(State, ER->getIndex(), K);
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
return isTainted(State, SR->getSymbol(), K);
if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
return isTainted(State, ER->getSuperRegion(), K);
return false;
}
bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
if (!Sym)
return false;
// Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
SE = Sym->symbol_end();
SI != SE; ++SI) {
if (!isa<SymbolData>(*SI))
continue;
if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
if (*Tag == Kind)
return true;
}
if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
// If this is a SymbolDerived with a tainted parent, it's also tainted.
if (isTainted(State, SD->getParentSymbol(), Kind))
return true;
// If this is a SymbolDerived with the same parent symbol as another
// tainted SymbolDerived and a region that's a sub-region of that tainted
// symbol, it's also tainted.
if (const TaintedSubRegions *Regs =
State->get<DerivedSymTaint>(SD->getParentSymbol())) {
const TypedValueRegion *R = SD->getRegion();
for (auto I : *Regs) {
// FIXME: The logic to identify tainted regions could be more
// complete. For example, this would not currently identify
// overlapping fields in a union as tainted. To identify this we can
// check for overlapping/nested byte offsets.
if (Kind == I.second && R->isSubRegionOf(I.first))
return true;
}
}
}
// If memory region is tainted, data is also tainted.
if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
if (isTainted(State, SRV->getRegion(), Kind))
return true;
}
// If this is a SymbolCast from a tainted value, it's also tainted.
if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
if (isTainted(State, SC->getOperand(), Kind))
return true;
}
}
return false;
}
PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
BugReporterContext &BRC,
PathSensitiveBugReport &BR) {
// Find the ExplodedNode where the taint was first introduced
if (!isTainted(N->getState(), V) ||
isTainted(N->getFirstPred()->getState(), V))
return nullptr;
const Stmt *S = N->getStmtForDiagnostics();
if (!S)
return nullptr;
const LocationContext *NCtx = N->getLocationContext();
PathDiagnosticLocation L =
PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
if (!L.isValid() || !L.asLocation().isValid())
return nullptr;
return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
}
|