1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
|
// 2009 © Václav Šmilauer <eudoxos@arcig.cz>
// 2013 © Bruno Chareyre <bruno.chareyre@grenoble-inp.fr>
#include "InsertionSortCollider.hpp"
#include <lib/high-precision/Constants.hpp>
#include <core/Dispatching.hpp>
#include <core/Interaction.hpp>
#include <core/InteractionContainer.hpp>
#include <core/Scene.hpp>
#include <pkg/common/Sphere.hpp>
#include <boost/static_assert.hpp>
#ifdef YADE_OPENMP
#include <omp.h>
#endif
namespace yade { // Cannot have #include directive inside.
using math::max;
using math::min; // using inside .cpp file is ok.
YADE_PLUGIN((InsertionSortCollider))
CREATE_LOGGER(InsertionSortCollider);
// called by the insertion sort if 2 bodies swapped their bounds in such a way that a new overlap may appear
void InsertionSortCollider::handleBoundInversion(Body::id_t id1, Body::id_t id2, InteractionContainer* interactions, Scene*)
{
assert(!periodic);
assert(id1 != id2);
#ifdef YADE_MPI //Note #0: this #ifdef is painfull, there many others needed hereafter (see #1), compilation without MPI will fail atm
if (spatialOverlap(id1, id2) && Collider::mayCollide(Body::byId(id1, scene).get(), Body::byId(id2, scene).get(), scene->subdomain)
&& !interactions->found(id1, id2))
#else
if (spatialOverlap(id1, id2) && Collider::mayCollide(Body::byId(id1, scene).get(), Body::byId(id2, scene).get()) && !interactions->found(id1, id2))
#endif
interactions->insert(shared_ptr<Interaction>(new Interaction(id1, id2)));
}
void InsertionSortCollider::insertionSort(VecBounds& v, InteractionContainer* interactions, Scene*, bool doCollide)
{
assert(!periodic);
//assert(v.size()==v.vec.size());
for (size_t i = 1; i < v.size(); i++) {
const Bounds viInit = v[i];
long j = i - 1; /* cache hasBB; otherwise 1% overall performance hit */
const bool viInitBB = viInit.flags.hasBB;
const bool isMin = viInit.flags.isMin;
while (j >= 0 && v[j] > viInit) {
v[j + 1] = v[j];
// no collisions without bounding boxes
// also, do not collide body with itself; it sometimes happens for facets aligned perpendicular to an axis, for reasons that are not very clear
// see (old site, fixed bug) https://bugs.launchpad.net/yade/+bug/669095
// skip bounds with same isMin flags, since inversion doesn't imply anything in that case
if (isMin && !v[j].flags.isMin && doCollide && viInitBB && v[j].flags.hasBB && (viInit.id != v[j].id)) {
if (viInit.id < v[j].id) handleBoundInversion(viInit.id, v[j].id, interactions, scene);
else
handleBoundInversion(v[j].id, viInit.id, interactions, scene);
/*if (isMin)*/
// else handleBoundSplit(viInit.id,v[j].id,interactions,scene);
}
j--;
}
v[j + 1] = viInit;
}
}
#ifdef YADE_OPENMP
//Parallel version, only for non-periodic case at the moment (feel free to implement for the periodic case...)
void InsertionSortCollider::insertionSortParallel(VecBounds& v, InteractionContainer* interactions, Scene*, bool doCollide)
{
assert(!periodic);
///escape parallel sort if 1/ single thread or 2/ not at least 10 bounds per-thread
if ((ompThreads <= 1) or (v.size() < size_t(10 * ompThreads))) return insertionSort(v, interactions, scene, doCollide);
Real chunksVerlet = 4 * verletDist; //is 2* the theoretical requirement?
if (chunksVerlet <= 0) { LOG_ERROR("Parallel insertion sort needs verletDist>0"); }
///chunks defines subsets of the bounds lists, we make sure they are not too small wrt. verlet dist.
std::vector<Body::id_t> chunks;
unsigned nChunks = ompThreads;
unsigned chunkSize = unsigned(v.size() / nChunks) + 1;
for (unsigned n = 0; n < nChunks; n++)
chunks.push_back(n * chunkSize);
chunks.push_back(v.size());
while (nChunks > 1) {
bool changeChunks = false;
for (unsigned n = 1; n < nChunks; n++)
if (chunksVerlet > (v[chunks[n]].coord - v[chunks[n - 1]].coord)) changeChunks = true;
if (!changeChunks) break;
nChunks--;
chunkSize = unsigned(v.size() / nChunks) + 1;
chunks.clear();
for (unsigned n = 0; n < nChunks; n++)
chunks.push_back(n * chunkSize);
chunks.push_back(v.size());
}
///Define per-thread containers bufferizing the actual insertion of new interactions, since inserting is not thread-safe
std::vector<std::vector<std::pair<Body::id_t, Body::id_t>>> newInteractions;
newInteractions.resize(ompThreads, std::vector<std::pair<Body::id_t, Body::id_t>>());
for (int kk = 0; kk < ompThreads; kk++)
newInteractions[kk].reserve(long(chunkSize * 0.3));
/// First sort, independant in each chunk
#pragma omp parallel for schedule(dynamic, 1) num_threads(ompThreads > 0 ? min(ompThreads, omp_get_max_threads()) : omp_get_max_threads())
for (unsigned k = 0; k < nChunks; k++) {
int threadNum = omp_get_thread_num();
for (auto i = chunks[k] + 1; i < chunks[k + 1]; i++) {
const Bounds viInit = v[i];
auto j = i - 1;
if (not(j >= chunks[k] && v[j] > viInit)) continue; //else we need to assign v[j+1] after the 'while'
const bool viInitBB = viInit.flags.hasBB;
const bool isMin = viInit.flags.isMin;
while (j >= chunks[k] && v[j] > viInit) {
v[j + 1] = v[j];
if (isMin && !v[j].flags.isMin && doCollide && viInitBB && v[j].flags.hasBB && (viInit.id != v[j].id)) {
const Body::id_t& id1 = v[j].id;
const Body::id_t& id2 = viInit.id;
//(see #0 if compilation fails)
#ifdef YADE_MPI
if (spatialOverlap(id1, id2)
&& Collider::mayCollide(Body::byId(id1, scene).get(), Body::byId(id2, scene).get(), scene->subdomain)
&& !interactions->found(id1, id2))
#else
if (spatialOverlap(id1, id2) && Collider::mayCollide(Body::byId(id1, scene).get(), Body::byId(id2, scene).get())
&& !interactions->found(id1, id2))
#endif
newInteractions[threadNum].push_back(std::pair<Body::id_t, Body::id_t>(v[j].id, viInit.id));
}
j--;
}
v[j + 1] = viInit;
}
}
///In the second sort, the chunks are connected consistently.
///If sorting requires to move a bound past half-chunk, the algorithm is not thread safe,
/// if it happens we run the 1-thread sort at the end
bool parallelFailed = false;
#pragma omp parallel for schedule(dynamic, 1) num_threads(ompThreads > 0 ? min(ompThreads, omp_get_max_threads()) : omp_get_max_threads())
for (unsigned k = 1; k < nChunks; k++) {
int threadNum = omp_get_thread_num();
long i = chunks[k];
long halfChunkStart = long(i - chunkSize * 0.5);
long halfChunkEnd = long(i + chunkSize * 0.5);
for (; i < halfChunkEnd; i++) {
if (!(v[i] < v[i - 1])) break; //contiguous chunks now connected consistently
const Bounds viInit = v[i];
long j = i - 1; /* cache hasBB; otherwise 1% overall performance hit */
const bool viInitBB = viInit.flags.hasBB;
const bool isMin = viInit.flags.isMin;
while (j >= halfChunkStart && viInit < v[j]) {
v[j + 1] = v[j];
if (isMin && !v[j].flags.isMin && doCollide && viInitBB && v[j].flags.hasBB && (viInit.id != v[j].id)) {
const Body::id_t& id1 = v[j].id;
const Body::id_t& id2 = viInit.id;
//FIXME: do we need the check with found(id1,id2) here? It is checked again below...
#ifdef YADE_MPI
if (spatialOverlap(id1, id2)
&& Collider::mayCollide(Body::byId(id1, scene).get(), Body::byId(id2, scene).get(), scene->subdomain)
&& !interactions->found(id1, id2))
#else
if (spatialOverlap(id1, id2) && Collider::mayCollide(Body::byId(id1, scene).get(), Body::byId(id2, scene).get())
&& !interactions->found(id1, id2))
#endif
newInteractions[threadNum].push_back(std::pair<Body::id_t, Body::id_t>(v[j].id, viInit.id));
}
j--;
}
v[j + 1] = viInit;
if (j < halfChunkStart) parallelFailed = true;
}
if (i >= halfChunkEnd) parallelFailed = true;
}
/// Now insert interactions sequentially
for (int n = 0; n < ompThreads; n++)
for (size_t k = 0, kend = newInteractions[n].size(); k < kend; k++)
/*if (!interactions->found(newInteractions[n][k].first,newInteractions[n][k].second))*/ //Not needed, already checked above
if (newInteractions[n][k].first < newInteractions[n][k].second)
interactions->insert(shared_ptr<Interaction>(new Interaction(newInteractions[n][k].first, newInteractions[n][k].second)));
else
interactions->insert(shared_ptr<Interaction>(new Interaction(newInteractions[n][k].second, newInteractions[n][k].first)));
/// If some bounds traversed more than a half-chunk, we complete colliding with the sequential sort
if (parallelFailed) return insertionSort(v, interactions, scene, doCollide);
}
#endif
vector<Body::id_t> InsertionSortCollider::probeBoundingVolume(const Bound& bv)
{
if (periodic) { throw invalid_argument("InsertionSortCollider::probeBoundingVolume: handling periodic boundary not implemented."); }
vector<Body::id_t> ret;
for (vector<Bounds>::const_iterator it = BB[0].cbegin(), et = BB[0].cend(); it < et; ++it) {
if (it->coord > bv.max[0]) break;
if (!it->flags.isMin || !it->flags.hasBB) continue;
int offset = 3 * it->id;
const shared_ptr<Body>& b = Body::byId(it->id, scene);
if (!b || !b->bound) continue;
const Real& sweepLength = b->bound->sweepLength;
Vector3r disp = b->state->pos - b->bound->refPos;
if (!(maxima[offset] - sweepLength + disp[0] < bv.min[0] || minima[offset] + sweepLength + disp[0] > bv.max[0]
|| minima[offset + 1] + sweepLength + disp[1] > bv.max[1] || maxima[offset + 1] - sweepLength + disp[1] < bv.min[1]
|| minima[offset + 2] + sweepLength + disp[2] > bv.max[2] || maxima[offset + 2] - sweepLength + disp[2] < bv.min[2])) {
ret.push_back(it->id);
}
}
return ret;
}
// STRIDE
bool InsertionSortCollider::isActivated()
{
// activated if number of bodies changes (hence need to refresh collision information)
// or the time of scheduled run already came, or we were never scheduled yet
if (!strideActive) { return true; }
if (!newton) { return true; }
fastestBodyMaxDist = newton->maxVelocitySq;
if (fastestBodyMaxDist >= 1 || fastestBodyMaxDist == 0) { return true; }
if (BB[0].size() != 2 * scene->bodies->size() and not scene->bodies->useRedirection) { return true; }
if (scene->interactions->dirty) { return true; }
if (scene->doSort) { return true; }
return false;
}
void InsertionSortCollider::action()
{
#ifdef ISC_TIMING
timingDeltas->start();
#endif
numAction++;
const size_t nBodies = scene->bodies->size();
keepListsShort = scene->bodies->useRedirection;
InteractionContainer* interactions = scene->interactions.get();
scene->interactions->iterColliderLastRun = -1;
scene->doSort = false;
#ifdef YADE_OPENMP
if (ompThreads <= 0) ompThreads = omp_get_max_threads();
#endif
// periodicity changed, force reinit
if (scene->isPeriodic != periodic) {
for (int i = 0; i < 3; i++) {
BB[i].clear();
}
periodic = scene->isPeriodic;
}
// pre-conditions
// adjust storage size
bool doInitSort = false;
if (doSort) {
doInitSort = true;
doSort = false;
}
// ### Prepare lists of bounds. First approach: Bounds are from id=0 to id=nBodies, possibly with many null bounds after body erase
if (not keepListsShort) {
if (BB[0].size() != 2 * nBodies) {
// store previous size
size_t BBsize = BB[0].size();
LOG_DEBUG("Resize bounds containers from " << BBsize << " to " << nBodies * 2 << ", will std::sort.");
// bodies deleted; clear the container completely, and do as if all bodies were added (rather slow…)
// future possibility: insertion sort with such operator that deleted bodies would all go to the end, then just trim bounds
if (2 * nBodies < BBsize) {
for (int i = 0; i < 3; i++)
BB[i].clear();
}
// more than 20% new bodies, do initial sort again
if (BBsize == 0 || ((2 * nBodies - BBsize) / double(BBsize)) > 0.2) doInitSort = true;
assert((BBsize % 2) == 0);
for (int i = 0; i < 3; i++) {
BB[i].reserve(2 * nBodies);
// add lower and upper bounds; coord is not important, will be updated from bb shortly
for (size_t id = BBsize / 2; id < nBodies; id++) {
BB[i].push_back(Bounds(0, id, /*isMin=*/true));
BB[i].push_back(Bounds(0, id, /*isMin=*/false));
}
}
}
// ### Second approach with using body redirection: Bounds sizes match the number of real bodies in the scene
} else if (not scene->bodies->checkedByCollider) {
scene->bodies->updateRealBodies();
vector<Body::id_t>& insrts = scene->bodies->insertedBodies;
const vector<Body::id_t>& erased = scene->bodies->erasedBodies;
size_t nInsert = insrts.size();
size_t nErased = erased.size();
size_t nReal = scene->bodies->realBodies.size();
size_t BBsize = BB[0].size();
// BBsize==0 if collider runs for the first time, 1/ at iteration 0, or 2/ at iteration N after loading scene.
// In both cases it should be safe to insert all real bodies from scratch, hence the copy below.
// It will simply ignore any sequence of insert/erase that could have happen before collider execution
// Next collision detections will effectively insert/erase incrementaly after this initialisation
if (BBsize == 0) {
insrts = scene->bodies->realBodies;
nInsert = nReal;
}
// Handle erased bodies
int countNoBound = 0;
if (nErased > 0) {
for (const auto& id : erased)
if (3 * (id + 1) <= (int)minima.size())
minima[3 * id] = Mathr::MAX_REAL; // invalidate data so we can check what was erased in next loop and remove the bounds
size_t idxTarget = 0;
for (size_t i = 0; i < 3; i++) {
idxTarget = 0;
VecBounds& BBi = BB[i];
// move all bounds to the beginning of the vector
for (size_t idx = 0; idx < BBsize; idx++) {
const Body::id_t& id = BBi[idx].id;
if (Body::byId(id, scene) /*and Body::byId(id, scene)->isBounded()*/
and (3 * (id + 1) > (int)minima.size() or minima[3 * id] != Mathr::MAX_REAL)) {
if (idxTarget < idx) BBi[idxTarget] = BBi[idx];
idxTarget++;
} else {
if (i == 0 and Body::byId(id, scene) and not Body::byId(id, scene)->isBounded()) countNoBound++;
continue;
}
}
if (idxTarget < BBi.size()) BBi.resize(idxTarget);
else
break; // if first coordinate is unchanged no need to check the two others
}
}
// Now handle inserted bodies
if (not smartInsertErase) { // else we handle newly inserted ones at the end
if (BB[0].size() == 0 or nInsert / double(BB[0].size()) > 0.05) doInitSort = true;
std::sort(insrts.begin(), insrts.end()); // so we can identify duplicates more easily in case of multiple insert/erase with same id
for (int i = 0; i < 3; i++) {
for (size_t idx = 0; idx < nInsert; idx++) {
if (idx > 0 and insrts[idx] == insrts[idx - 1]) continue; // skip duplicate
if (Body::byId(insrts[idx], scene)
and Body::byId(insrts[idx], scene)->isBounded()) { //could have been inserted then erased
BB[i].push_back(Bounds(0, insrts[idx], /*isMin=*/true));
BB[i].push_back(Bounds(0, insrts[idx], /*isMin=*/false));
}
}
}
}
}
scene->bodies->checkedByCollider = true; // bounds lists and bodies list are in sync
if (minima.size() != (size_t)3 * nBodies) {
minima.resize(3 * nBodies);
maxima.resize(3 * nBodies);
}
//Increase the size of force container.
scene->forces.addMaxId(scene->bodies->size());
// update periodicity
assert(BB[0].axis == 0);
assert(BB[1].axis == 1);
assert(BB[2].axis == 2);
if (periodic) {
for (int i = 0; i < 3; i++)
BB[i].updatePeriodicity(scene);
invSizes = Vector3r(1. / scene->cell->getSize()[0], 1. / scene->cell->getSize()[1], 1. / scene->cell->getSize()[2]);
}
if (verletDist < 0) {
Real minR = std::numeric_limits<Real>::infinity();
for (const auto& b : *scene->bodies) {
if (!b || !b->shape) continue;
Sphere* s = dynamic_cast<Sphere*>(b->shape.get());
if (!s) continue;
minR = min(s->radius, minR);
}
if (math::isinf(minR))
LOG_WARN("verletDist is set to 0 because no spheres were found. It will result in suboptimal performances, consider setting a positive "
"verletDist in your script.");
// if no spheres, disable stride
verletDist = math::isinf(minR) ? 0 : math::abs(verletDist) * minR;
}
// if interactions are dirty, force reinitialization
if (scene->interactions->dirty) {
doInitSort = true;
scene->interactions->dirty = false;
}
// update bounds via boundDispatcher
boundDispatcher->scene = scene;
boundDispatcher->sweepDist = verletDist;
boundDispatcher->minSweepDistFactor = minSweepDistFactor;
boundDispatcher->targetInterv = targetInterv;
boundDispatcher->updatingDispFactor = updatingDispFactor;
boundDispatcher->action();
ISC_CHECKPOINT("boundDispatcher");
// STRIDE
if (verletDist > 0) {
// get NewtonIntegrator, to ask for the maximum velocity value
if (!newton) {
FOREACH(shared_ptr<Engine> & e, scene->engines)
{
newton = YADE_PTR_DYN_CAST<NewtonIntegrator>(e);
if (newton) break;
}
if (!newton) { throw runtime_error("InsertionSortCollider.verletDist>0, but unable to locate NewtonIntegrator within O.engines."); }
}
}
// STRIDE
// get us ready for strides, if they were deactivated
if (verletDist > 0) strideActive = true;
if (strideActive) {
assert(verletDist > 0);
assert(YADE_PTR_DYN_CAST<NewtonIntegrator>(newton));
assert(strideActive);
assert(newton->maxVelocitySq >= 0);
newton->updatingDispFactor = updatingDispFactor;
} else
boundDispatcher->sweepDist = 0;
ISC_CHECKPOINT("bound");
// copy bounds along given axis into our arrays
const size_t nBounds = BB[0].size();
const Vector3r maxVect(Mathr::MAX_REAL, Mathr::MAX_REAL, Mathr::MAX_REAL);
// #pragma omp parallel for schedule(guided) num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
for (int j = 0; j < 3; j++) {
// const long cacheIter = scene->iter;
VecBounds& BBj = BB[j];
for (size_t i = 0; i < nBounds; i++) {
Bounds& BBji = BBj[i];
const Body::id_t id = BBji.id;
const shared_ptr<Body>& b = Body::byId(id, scene);
if (b) {
const shared_ptr<Bound>& bv = b->bound;
// coordinate is min/max if has bounding volume, otherwise both are the position. Add periodic shift so that we are inside the cell
// watch out for the parentheses around ?: within ?: (there was unwanted conversion of the Reals to bools!)
// FIXME: the Mathr::MAX_REAL trick will not work very well for periodic BCs.
BBji.coord = ((BBji.flags.hasBB = ((bool)bv)) ? (BBji.flags.isMin ? bv->min[j] : bv->max[j])
: (keepListsShort ? Mathr::MAX_REAL : b->state->pos[j]))
- (periodic ? BBj.cellDim * BBji.period : 0.);
// if initializing periodic, shift coords & record the period into BBj[i].period
if (doInitSort && periodic) BBji.coord = cellWrap(BBji.coord, 0, BBj.cellDim, BBji.period);
// for each body, copy its minima and maxima, for quick checks of overlaps later
//bounds have been all updated when j==0, we can safely copy them here when j==1
#if (YADE_REAL_BIT <= 64)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpragmas"
// this is to remove warning about manipulating raw memory
#pragma GCC diagnostic ignored "-Wclass-memaccess"
if (bv) {
if (BBji.flags.isMin && j == 1) memcpy(&minima[3 * id], &bv->min, 3 * sizeof(Real));
memcpy(&maxima[3 * id], &bv->max, 3 * sizeof(Real));
} else if (keepListsShort) {
memcpy(&minima[3 * id], &maxVect, 3 * sizeof(Real));
memcpy(&maxima[3 * id], &maxVect, 3 * sizeof(Real));
}
#pragma GCC diagnostic pop
#else
if (bv) {
if (BBji.flags.isMin && j == 1) {
minima[3 * id] = bv->min[0];
minima[3 * id + 1] = bv->min[1];
minima[3 * id + 2] = bv->min[2];
}
maxima[3 * id] = bv->max[0];
maxima[3 * id + 1] = bv->max[1];
maxima[3 * id + 2] = bv->max[2];
} else if (keepListsShort) {
minima[3 * id] = maxVect[0];
minima[3 * id + 1] = maxVect[1];
minima[3 * id + 2] = maxVect[2];
maxima[3 * id] = maxVect[0];
maxima[3 * id + 1] = maxVect[1];
maxima[3 * id + 2] = maxVect[2];
}
#endif
} else {
BBj[i].flags.hasBB = false; /* for vanished body, keep the coordinate as-is, to minimize inversions. */
#ifdef YADE_MPI
if (keepListsShort) LOG_ERROR("Shouldn't happen " << id << " in " << scene->subdomain);
#endif
}
}
}
ISC_CHECKPOINT("copy");
// remove interactions which have disconnected bounds and are not real (will run parallel if YADE_OPENMP)
interactions->conditionalyEraseNonReal(*this, scene);
ISC_CHECKPOINT("erase");
// sort
// the regular case
if (!doInitSort && !sortThenCollide) {
/* each inversion in insertionSort calls may add interaction */
//1000 bodies is heuristic minimum above which parallel sort is called
if (!periodic)
for (int i = 0; i < 3; i++)
#ifdef YADE_OPENMP
{
if (ompThreads <= 1 || nBodies < 1000 || verletDist == 0) insertionSort(BB[i], interactions, scene);
else
insertionSortParallel(BB[i], interactions, scene);
}
#else
{
insertionSort(BB[i], interactions, scene);
}
#endif
else
for (int i = 0; i < 3; i++)
insertionSortPeri(BB[i], interactions, scene);
if (keepListsShort and smartInsertErase
and scene->bodies->insertedBodies.size() > 0) { // Do the init sort on new bodies only, Precondition: initial bounds list is sorted
for (const auto& id1 : scene->bodies->insertedBodies) {
const shared_ptr<Body>& b = Body::byId(id1, scene);
if (b and b->isBounded()) {
for (const auto& id2 : probeBoundingVolume(*(b->bound))) { // Probe interactions with new body
if (Collider::mayCollide(
Body::byId(id1, scene).get(),
Body::byId(id2, scene).get()
#ifdef YADE_MPI
,
scene->subdomain
#endif
)
and !interactions->found(id1, id2))
interactions->insert(shared_ptr<Interaction>(new Interaction(id1, id2)));
}
// then insert bounds in the right place so that they remain sorted
for (int i = 0; i < 3; i++) {
auto bMin = Bounds(b->bound->min[i], id1, /*isMin=*/true);
auto bMax = Bounds(b->bound->max[i], id1, /*isMin=*/false);
auto it = std::lower_bound(BB[i].begin(), BB[i].end(), bMin);
it = BB[i].insert(it, bMin);
it = std::lower_bound(it, BB[i].end(), bMax);
BB[i].insert(it, bMax);
}
}
}
}
}
// create initial interactions (much slower)
else {
if (doInitSort) {
// the initial sort is in independent in 3 dimensions, may be run in parallel; it seems that there is no time gain running in parallel, though
// important to reset loInx for periodic simulation (!!)
// #pragma omp parallel for schedule(dynamic,1) num_threads(min(ompThreads,3))
for (int i = 0; i < 3; i++) {
BB[i].loIdx = 0;
BB[i].sort();
}
numReinit++;
} else { // sortThenCollide
if (!periodic)
for (int i = 0; i < 3; i++)
insertionSort(BB[i], interactions, scene, false);
else
for (int i = 0; i < 3; i++)
insertionSortPeri(BB[i], interactions, scene, false);
}
// traverse the container along requested axis and find overlaps
assert(sortAxis == 0 || sortAxis == 1 || sortAxis == 2);
VecBounds& V = BB[sortAxis];
findOverlaps(V);
}
scene->bodies->insertedBodies.clear();
scene->bodies->erasedBodies.clear();
ISC_CHECKPOINT("sort&collide");
}
void InsertionSortCollider::findOverlaps(VecBounds& V)
{
InteractionContainer* interactions = scene->interactions.get();
// go through potential aabb collisions, create interactions as necessary
if (!periodic) {
#ifdef YADE_OPENMP
std::vector<std::vector<std::pair<Body::id_t, Body::id_t>>> newInts;
newInts.resize(ompThreads, std::vector<std::pair<Body::id_t, Body::id_t>>());
for (int kk = 0; kk < ompThreads; kk++)
#pragma omp parallel for schedule(guided, 200) num_threads(ompThreads)
#endif
for (size_t i = 0; i < V.size(); i++) {
// start from the lower bound (i.e. skipping upper bounds)
// skip bodies without bbox, because they don't collide
if (!(V[i].flags.isMin && V[i].flags.hasBB)) continue;
const Body::id_t& iid = V[i].id;
// go up until we meet the upper bound
for (size_t j = i + 1; /* handle case 2. of swapped min/max */ j < V.size() && V[j].id != iid; j++) {
const Body::id_t& jid = V[j].id;
// take 2 of the same condition (only handle collision [min_i..max_i]+min_j, not [min_i..max_i]+min_i (symmetric)
if (!(V[j].flags.isMin && V[j].flags.hasBB)) continue;
if (spatialOverlap(iid, jid)
&& Collider::mayCollide(
Body::byId(iid, scene).get(),
Body::byId(jid, scene).get()
#ifdef YADE_MPI
,
scene->subdomain
#endif
)) {
#ifdef YADE_OPENMP
unsigned int threadNum = omp_get_thread_num();
newInts[threadNum].push_back(std::pair<Body::id_t, Body::id_t>(iid, jid));
#else
if (!interactions->found(iid, jid)) {
if (iid < jid) interactions->insert(shared_ptr<Interaction>(new Interaction(iid, jid)));
else
interactions->insert(shared_ptr<Interaction>(new Interaction(jid, iid)));
}
#endif
}
}
}
//go through newly created candidates sequentially, duplicates coming from different threads may exist so we check existence with found()
#ifdef YADE_OPENMP
for (int n = 0; n < ompThreads; n++)
for (size_t k = 0, kend = newInts[n].size(); k < kend; k++)
if (!interactions->found(newInts[n][k].first, newInts[n][k].second)) {
if (newInts[n][k].first < newInts[n][k].second)
interactions->insert(shared_ptr<Interaction>(new Interaction(newInts[n][k].first, newInts[n][k].second)));
else
interactions->insert(shared_ptr<Interaction>(new Interaction(newInts[n][k].second, newInts[n][k].first)));
}
#endif
} else { // periodic case: see comments above
for (size_t i = 0; i < V.size(); i++) {
if (!(V[i].flags.isMin && V[i].flags.hasBB)) continue;
const Body::id_t& iid = V[i].id;
// we might wrap over the periodic boundary here; that's why the condition is different from the aperiodic case
for (long j = V.norm(i + 1); V[j].id != iid; j = V.norm(j + 1)) {
const Body::id_t& jid = V[j].id;
if (!(V[j].flags.isMin && V[j].flags.hasBB)) continue;
if (iid < jid) handleBoundInversionPeri(iid, jid, interactions, scene);
else
handleBoundInversionPeri(jid, iid, interactions, scene);
}
}
}
}
// return floating value wrapped between x0 and x1 and saving period number to period
Real InsertionSortCollider::cellWrap(const Real x, const Real x0, const Real x1, int& period)
{
Real xNorm = (x - x0) / (x1 - x0);
period = (int)floor(xNorm); // some people say this is very slow; probably optimized by gcc, however (google suggests)
return x0 + (xNorm - period) * (x1 - x0);
}
// return coordinate wrapped to 0…x1, relative to x0; don't care about period
Real InsertionSortCollider::cellWrapRel(const Real x, const Real x0, const Real x1)
{
Real xNorm = (x - x0) / (x1 - x0);
return (xNorm - floor(xNorm)) * (x1 - x0);
}
//NOTE: possible improvements:
// 1) (not only periodic) keep a mask defining overlaps in directions 1,2,3, and compare the sum instead of checking overlap in three directions everytime there is an inversion. (maybe not possible? does it need a N² table?!!)
// 2) use norm() only when needed (first and last elements, mainly, can be treated as special cases)
void InsertionSortCollider::insertionSortPeri(VecBounds& v, InteractionContainer* interactions, Scene*, bool doCollide)
{
assert(periodic);
size_t& loIdx = v.loIdx;
const size_t& size = v.size();
// The condition in next "for" loop would segfault if size=0, escape here and warn
if (BB[0].size() == 0) {
LOG_WARN("no bounds where found, empty scene? Turn collider off if nothing to collide, else please report bug");
return;
}
/* We have to visit each bound at least once (first condition), but this is not enough. The correct ordering in the begining of the list needs a second pass to connect begin and end consistently (the second condition). Strictly the second condition should include "+ (v.norm(j+1)==loIdx ? v.cellDim : 0)" but it is ok as is since the shift is added inside the loop. */
for (size_t _i = 0; (_i < size) || (v[v.norm(_i)].coord < v[v.norm(_i - 1)].coord); _i++) {
const size_t i = v.norm(_i); //FIXME: useless, and many others can probably be removed
const size_t i_1 = v.norm(i - 1);
//switch period of (i) if the coord is below the lower edge cooridnate-wise and just above the split
if (i == loIdx && v[i].coord < 0) {
v[i].period -= 1;
v[i].coord += v.cellDim;
loIdx = v.norm(loIdx + 1);
}
// coordinate of v[i] used to check inversions
// if crossing the split, adjust by cellDim;
// if we get below the loIdx however, the v[i].coord will have been adjusted already, no need to do that here
const Real iCmpCoord = v[i].coord + (i == loIdx ? v.cellDim : 0);
// no inversion
if (v[i_1].coord <= iCmpCoord) continue;
// vi is the copy that will travel down the list, while other elts go up
// if will be placed in the list only at the end, to avoid extra copying
size_t j = i_1;
Bounds vi = v[i];
const bool viHasBB = vi.flags.hasBB;
const bool isMin = v[i].flags.isMin;
//For the first pass, the bounds are not travelling down past v[0] (j<_i above prevents that), otherwise we would not know which part of the list has been correctly sorted. Only after the first pass, we sort end vs. begining of the list.
while ((j < _i) and v[j].coord > (vi.coord + /* wrap for elt just below split */ (v.norm(j + 1) == loIdx ? v.cellDim : 0))) {
size_t j1 = v.norm(j + 1);
// OK, now if many bodies move at the same pace through the cell and at one point, there is inversion,
// this can happen without any side-effects
if (false && v[j].coord > 2 * v.cellDim) {
// this condition is not strictly necessary, but the loop of insertionSort would have to run more times.
// Since size of particle is required to be < .5*cellDim, this would mean simulation explosion anyway
LOG_FATAL("Body #" << v[j].id << " going faster than 1 cell in one step? Not handled.");
throw runtime_error(__FILE__ ": body moving too fast (skipped 1 cell).");
}
Bounds& vNew(v[j1]); // elt at j+1 being overwritten by the one at j and adjusted
vNew = v[j];
// inversions close to the split need special care
if (j == loIdx && vi.coord < 0) {
vi.period -= 1;
vi.coord += v.cellDim;
loIdx = v.norm(loIdx + 1);
} else if (j1 == loIdx) {
vNew.period += 1;
vNew.coord -= v.cellDim;
loIdx = v.norm(loIdx - 1);
}
if (isMin && !v[j].flags.isMin && (doCollide && viHasBB && v[j].flags.hasBB))
if (vi.id != vNew.id) {
if (vi.id < vNew.id) handleBoundInversionPeri(vi.id, vNew.id, interactions, scene);
else
handleBoundInversionPeri(vNew.id, vi.id, interactions, scene);
}
j = v.norm(j - 1);
}
v[v.norm(j + 1)] = vi;
}
//Keep coord's in [0,cellDim] by clamping the largest values
for (size_t i = v.norm(loIdx - 1); v[i].coord > v.cellDim; i = v.norm(--i)) {
v[i].period += 1;
v[i].coord -= v.cellDim;
loIdx = i;
}
}
// called by the insertion sort if 2 bodies swapped their bounds
void InsertionSortCollider::handleBoundInversionPeri(Body::id_t id1, Body::id_t id2, InteractionContainer* interactions, Scene*)
{
assert(periodic);
if (interactions->found(id1, id2)) return; // we want to _create_ new ones, we don't care about existing ones
Vector3i periods(Vector3i::Zero());
bool overlap = spatialOverlapPeri(id1, id2, scene, periods);
if (overlap
&& Collider::mayCollide(
Body::byId(id1, scene).get(),
Body::byId(id2, scene).get()
#ifdef YADE_MPI
,
scene->subdomain
#endif
)) {
shared_ptr<Interaction> newI = shared_ptr<Interaction>(new Interaction(id1, id2));
newI->cellDist = periods;
interactions->insert(newI);
}
}
/* Performance hint
================
Since this function is called (most the time) from insertionSort,
we actually already know what is the overlap status in that one dimension, including
periodicity information; both values could be passed down as a parameters, avoiding 1 of 3 loops here.
We do some floats math here, so the speedup could noticeable; doesn't concertn the non-periodic variant,
where it is only plain comparisons taking place.
*/
//! return true if bodies bb overlap in all 3 dimensions
bool InsertionSortCollider::spatialOverlapPeri(Body::id_t id1, Body::id_t id2, Scene* scene2, Vector3i& periods) const
{
// scene shadows a member ‘yade::InsertionSortCollider::scene’
assert(periodic);
assert(id1 != id2); // programming error, or weird bodies (too large?)
for (int axis = 0; axis < 3; axis++) {
Real dim = scene2->cell->getSize()[axis];
// LOG_DEBUG("dim["<<axis<<"]="<<dim);
// too big bodies
if (!allowBiggerThanPeriod) {
assert(maxima[3 * id1 + axis] - minima[3 * id1 + axis] < .99 * dim);
assert(maxima[3 * id2 + axis] - minima[3 * id2 + axis] < .99 * dim);
}
// define normalized positions relative to id1->max, and with +1 shift for id1->min so that body1's bounds cover an interval [shiftedMin; 1] at the end of a b1-centric period
Real lmin = (minima[3 * id2 + axis] - maxima[3 * id1 + axis]) * invSizes[axis];
Real lmax = (maxima[3 * id2 + axis] - maxima[3 * id1 + axis]) * invSizes[axis];
Real shiftedMin = (minima[3 * id1 + axis] - maxima[3 * id1 + axis]) * invSizes[axis] + 1.;
#ifdef YADE_MPI
bool subDoverlap = (Body::byId(id1, scene2)->getIsSubdomain() || Body::byId(id2, scene2)->getIsSubdomain());
bool fluidBodyOverLap = (Body::byId(id1, scene2)->getIsFluidDomainBbox() || Body::byId(id2, scene2)->getIsFluidDomainBbox());
if (((lmax - lmin) > 0.5 || shiftedMin < 0) && !(subDoverlap or fluidBodyOverLap)) {
#else
if ((lmax - lmin) > 0.5 || shiftedMin < 0) {
#endif
if (allowBiggerThanPeriod) {
periods[axis] = 0;
continue;
} else {
LOG_FATAL(
"Body #" << ((lmax - lmin) > 0.5 ? id2 : id1) << " spans over half of the cell size " << dim << " (axis=" << axis
<< ", see flags allowBiggerThanPeriod and Cell.flipFlippable)");
throw runtime_error(__FILE__ ": Body larger than half of the cell size encountered.");
}
}
int period1 = int(math::floor(lmax));
//overlap around zero, on the "+" side
if ((lmin - period1) <= overlapTolerance) {
periods[axis] = -period1;
continue;
}
//overlap around 1, on the "-" sides
if ((lmax - period1 + overlapTolerance) >= shiftedMin) {
periods[axis] = -period1 - 1;
continue;
}
// none of the above, exit
return false;
}
return true;
}
boost::python::tuple InsertionSortCollider::dumpBounds()
{
boost::python::list bl[3]; // 3 bound lists, inserted into the tuple at the end
for (int axis = 0; axis < 3; axis++) {
VecBounds& V = BB[axis];
if (periodic) {
for (size_t i = 0; i < V.size(); i++) {
long ii = V.norm(i); // start from the period boundary
bl[axis].append(boost::python::make_tuple(V[ii].coord, (V[ii].flags.isMin ? -1 : 1) * V[ii].id, V[ii].period));
}
} else {
for (size_t i = 0; i < V.size(); i++) {
bl[axis].append(boost::python::make_tuple(V[i].coord, (V[i].flags.isMin ? -1 : 1) * V[i].id));
}
}
}
return boost::python::make_tuple(bl[0], bl[1], bl[2]);
}
void InsertionSortCollider::VecBounds::updatePeriodicity(Scene* scene)
{
assert(scene->isPeriodic);
assert(axis >= 0 && axis <= 2);
cellDim = scene->cell->getSize()[axis];
}
} // namespace yade
|