1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
|
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "priminfo.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_partition.h"
namespace embree
{
namespace isa
{
/*! Performs standard object binning */
struct HeuristicStrandSplit
{
typedef range<size_t> Set;
static const size_t PARALLEL_THRESHOLD = 10000;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
/*! stores all information to perform some split */
struct Split
{
/*! construct an invalid split by default */
__forceinline Split()
: sah(inf), axis0(zero), axis1(zero) {}
/*! constructs specified split */
__forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
: sah(sah), axis0(axis0), axis1(axis1) {}
/*! calculates standard surface area heuristic for the split */
__forceinline float splitSAH() const { return sah; }
/*! test if this split is valid */
__forceinline bool valid() const { return sah != float(inf); }
public:
float sah; //!< SAH cost of the split
Vec3fa axis0, axis1; //!< axis the two strands are aligned into
};
__forceinline HeuristicStrandSplit () // FIXME: required?
: scene(nullptr), prims(nullptr) {}
/*! remember prim array */
__forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
: scene(scene), prims(prims) {}
__forceinline const Vec3fa direction(const PrimRef& prim) {
return scene->get(prim.geomID())->computeDirection(prim.primID());
}
__forceinline const BBox3fa bounds(const PrimRef& prim) {
return scene->get(prim.geomID())->vbounds(prim.primID());
}
__forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
return scene->get(prim.geomID())->vbounds(space,prim.primID());
}
/*! finds the best split */
const Split find(const range<size_t>& set, size_t logBlockSize)
{
Vec3fa axis0(0,0,1);
uint64_t bestGeomPrimID = -1;
/* curve with minimum ID determines first axis */
for (size_t i=set.begin(); i<set.end(); i++)
{
const uint64_t geomprimID = prims[i].ID64();
if (geomprimID >= bestGeomPrimID) continue;
const Vec3fa axis = direction(prims[i]);
if (sqr_length(axis) > 1E-18f) {
axis0 = normalize(axis);
bestGeomPrimID = geomprimID;
}
}
/* find 2nd axis that is most misaligned with first axis and has minimum ID */
float bestCos = 1.0f;
Vec3fa axis1 = axis0;
bestGeomPrimID = -1;
for (size_t i=set.begin(); i<set.end(); i++)
{
const uint64_t geomprimID = prims[i].ID64();
Vec3fa axisi = direction(prims[i]);
float leni = length(axisi);
if (leni == 0.0f) continue;
axisi /= leni;
float cos = abs(dot(axisi,axis0));
if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
bestCos = cos; axis1 = axisi;
bestGeomPrimID = geomprimID;
}
}
/* partition the two strands */
size_t lnum = 0, rnum = 0;
BBox3fa lbounds = empty, rbounds = empty;
const LinearSpace3fa space0 = frame(axis0).transposed();
const LinearSpace3fa space1 = frame(axis1).transposed();
for (size_t i=set.begin(); i<set.end(); i++)
{
PrimRef& prim = prims[i];
const Vec3fa axisi = normalize(direction(prim));
const float cos0 = abs(dot(axisi,axis0));
const float cos1 = abs(dot(axisi,axis1));
if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
else { rnum++; rbounds.extend(bounds(space1,prim)); }
}
/*! return an invalid split if we do not partition */
if (lnum == 0 || rnum == 0)
return Split(inf,axis0,axis1);
/*! calculate sah for the split */
const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
return Split(sah,axis0,axis1);
}
/*! array partitioning */
void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
if (!split.valid()) {
deterministic_order(set);
return splitFallback(set,lset,rset);
}
const size_t begin = set.begin();
const size_t end = set.end();
CentGeomBBox3fa local_left(empty);
CentGeomBBox3fa local_right(empty);
auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
const Vec3fa axisi = normalize(direction(prim));
const float cos0 = abs(dot(axisi,split.axis0));
const float cos1 = abs(dot(axisi,split.axis1));
return cos0 > cos1;
};
auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
pinfo.extend(bounds(ref));
};
size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
new (&lset) PrimInfoRange(begin,center,local_left);
new (&rset) PrimInfoRange(center,end,local_right);
assert(area(lset.geomBounds) >= 0.0f);
assert(area(rset.geomBounds) >= 0.0f);
}
void deterministic_order(const Set& set)
{
/* required as parallel partition destroys original primitive order */
std::sort(&prims[set.begin()],&prims[set.end()]);
}
void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end)/2;
CentGeomBBox3fa left(empty);
for (size_t i=begin; i<center; i++)
left.extend(bounds(prims[i]));
new (&lset) PrimInfoRange(begin,center,left);
CentGeomBBox3fa right(empty);
for (size_t i=center; i<end; i++)
right.extend(bounds(prims[i]));
new (&rset) PrimInfoRange(center,end,right);
}
private:
Scene* const scene;
PrimRef* const prims;
};
}
}
|