File: SpecializedPlacedVolImplHelper.h

package info (click to toggle)
vecgeom 1.2.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 23,928 kB
  • sloc: cpp: 88,717; ansic: 6,894; python: 1,035; sh: 582; sql: 538; makefile: 29
file content (554 lines) | stat: -rw-r--r-- 27,143 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
#pragma once

#include "VecGeom/base/Cuda.h"
#include "VecGeom/base/Global.h"
#include "VecGeom/base/SOA3D.h"

#include <algorithm>

#ifdef VECGEOM_DISTANCE_DEBUG
#include "VecGeom/volumes/utilities/ResultComparator.h"
#endif

#ifndef __clang__
#pragma GCC diagnostic push
// We ignore warnings of this type in this file.
// The warning occurred due to potential overflow of memory address locations output[i]
// where i is an unsigned long long in a loop. It can be safely ignored since such
// memory locations do in fact not exist (~multiple petabyte in memory).
#pragma GCC diagnostic ignored "-Waggressive-loop-optimizations"
#endif // __clang__

namespace vecgeom {

// putting a forward declaration by hand
VECGEOM_DEVICE_DECLARE_CONV_TEMPLATE_1t_2v(class, CommonSpecializedVolImplHelper, typename, TranslationCode,
                                           translation::kGeneric, RotationCode, rotation::kGeneric);
VECGEOM_DEVICE_DECLARE_CONV_TEMPLATE_1t_2v(class, SIMDSpecializedVolImplHelper, class, TranslationCode,
                                           translation::kGeneric, RotationCode, rotation::kGeneric);
VECGEOM_DEVICE_DECLARE_CONV_TEMPLATE_1t_2v(class, LoopSpecializedVolImplHelper, class, TranslationCode,
                                           translation::kGeneric, RotationCode, rotation::kGeneric);

inline namespace VECGEOM_IMPL_NAMESPACE {

template <class Specialization, TranslationCode transC, RotationCode rotC>
class CommonSpecializedVolImplHelper : public Specialization::PlacedShape_t {

  using PlacedShape_t    = typename Specialization::PlacedShape_t;
  using UnplacedVolume_t = typename Specialization::UnplacedVolume_t;

public:
#ifndef VECCORE_CUDA
  CommonSpecializedVolImplHelper(char const *const label, LogicalVolume const *const logical_volume,
                                 Transformation3D const *const transformation)
      : PlacedShape_t(label, logical_volume, transformation)
  {
  }

  CommonSpecializedVolImplHelper(char const *const label, LogicalVolume *const logical_volume,
                                 Transformation3D const *const transformation)
      : PlacedShape_t(label, logical_volume, transformation)
  {
  }

  CommonSpecializedVolImplHelper(LogicalVolume const *const logical_volume,
                                 Transformation3D const *const transformation)
      : CommonSpecializedVolImplHelper("", logical_volume, transformation)
  {
  }

  // this constructor mimics the constructor from the Unplaced solid
  // it ensures that placed volumes can be constructed just like ordinary Geant4/ROOT solids
  template <typename... ArgTypes>
  CommonSpecializedVolImplHelper(char const *const label, ArgTypes... params)
      : CommonSpecializedVolImplHelper(label, new LogicalVolume(new UnplacedVolume_t(params...)),
                                       &Transformation3D::kIdentity)
  {
  }

#else // Compiling for CUDA
  VECCORE_ATT_DEVICE CommonSpecializedVolImplHelper(LogicalVolume const *const logical_volume,
                                                    Transformation3D const *const transformation, const unsigned int id,
                                                    const int copy_no, const int child_id)
      : PlacedShape_t(logical_volume, transformation, id, copy_no, child_id)
  {
  }
#endif
  using PlacedShape_t::Contains;
  using PlacedShape_t::DistanceToIn;
  using PlacedShape_t::DistanceToOut;
  using PlacedShape_t::Inside;
  using PlacedShape_t::PlacedShape_t;
  using PlacedShape_t::SafetyToIn;
  using PlacedShape_t::SafetyToOut;
  using PlacedShape_t::UnplacedContains;

  virtual int MemorySize() const override { return sizeof(*this); }

  VECCORE_ATT_HOST_DEVICE
  virtual void PrintType() const override { Specialization::PrintType(); }

  virtual void PrintType(std::ostream &os) const override { Specialization::PrintType(os, transC, rotC); }
  virtual void PrintImplementationType(std::ostream &os) const override { Specialization::PrintImplementationType(os); }
  virtual void PrintUnplacedType(std::ostream &os) const override { Specialization::PrintUnplacedType(os); }

  int GetTransCode() const final { return transC; }
  int GetRotCode() const final { return rotC; }

  VECCORE_ATT_HOST_DEVICE
  virtual EnumInside Inside(Vector3D<Precision> const &point) const override
  {
    Inside_t output;
    Transformation3D const *tr = this->GetTransformation();
    Specialization::Inside(*this->GetUnplacedStruct(), tr->Transform<transC, rotC, Precision>(point), output);
    return (EnumInside)output;
  }

  VECCORE_ATT_HOST_DEVICE
  virtual bool Contains(Vector3D<Precision> const &point) const override
  {
    bool output(false);
    Transformation3D const *tr = this->GetTransformation();
    Vector3D<Precision> lp     = tr->Transform<transC, rotC, Precision>(point);
    Specialization::Contains(*this->GetUnplacedStruct(), lp, output);
    return output;
  }

  VECCORE_ATT_HOST_DEVICE
  virtual bool Contains(Vector3D<Precision> const &point, Vector3D<Precision> &localPoint) const override
  {
    bool output(false);
    Transformation3D const *tr = this->GetTransformation();
    localPoint                 = tr->Transform<transC, rotC, Precision>(point);
    Specialization::Contains(*this->GetUnplacedStruct(), localPoint, output);
#ifdef VECGEOM_DISTANCE_DEBUG
    DistanceComparator::CompareUnplacedContains(this, output, localPoint);
#endif
    return output;
  }

  VECCORE_ATT_HOST_DEVICE
  virtual Precision DistanceToIn(Vector3D<Precision> const &point, Vector3D<Precision> const &direction,
                                 const Precision stepMax = kInfLength) const override
  {
#ifndef VECCORE_CUDA
    assert(direction.IsNormalized() && " direction not normalized in call to DistanceToIn ");
#endif
    Precision output(kInfLength);
    Transformation3D const *tr = this->GetTransformation();
    Specialization::DistanceToIn(*this->GetUnplacedStruct(), tr->Transform<transC, rotC>(point),
                                 tr->TransformDirection<rotC>(direction), stepMax, output);
#ifdef VECGEOM_DISTANCE_DEBUG
    DistanceComparator::CompareDistanceToIn(this, output, point, direction, stepMax);
#endif
    return output;
  }

  VECCORE_ATT_HOST_DEVICE
  virtual Precision PlacedDistanceToOut(Vector3D<Precision> const &point, Vector3D<Precision> const &direction,
                                        const Precision stepMax = kInfLength) const override
  {
#ifndef VECCORE_CUDA
    assert(direction.IsNormalized() && " direction not normalized in call to PlacedDistanceToOut ");
#endif
    Transformation3D const *tr = this->GetTransformation();
    Precision output(-1.);
    Specialization::template DistanceToOut(*this->GetUnplacedStruct(), tr->Transform<transC, rotC>(point),
                                           tr->TransformDirection<rotC>(direction), stepMax, output);

#ifdef VECGEOM_DISTANCE_DEBUG
    DistanceComparator::CompareDistanceToOut(this, output, this->GetTransformation()->Transform(point),
                                             this->GetTransformation()->TransformDirection(direction), stepMax);
#endif
    return output;
  }

  VECCORE_ATT_HOST_DEVICE
  virtual Precision SafetyToIn(Vector3D<Precision> const &point) const override
  {
    Precision output(kInfLength);
    Transformation3D const *tr = this->GetTransformation();
    Specialization::SafetyToIn(*this->GetUnplacedStruct(), tr->Transform<transC, rotC>(point), output);
    return output;
  }

  virtual Real_v SafetyToInVec(Vector3D<Real_v> const &position) const override
  {
    Transformation3D const *tr = this->GetTransformation();
    return this->GetUnplacedVolume()->UnplacedVolume_t::SafetyToInVec(tr->Transform<transC, rotC>(position));
  }

}; // End class CommonSpecializedVolImplHelper

// needs to be in the specializations
template <class Specialization, typename Real_v, int transC, int rotC>
VECGEOM_FORCE_INLINE
static void ContainsLoopKernel(typename Specialization::UnplacedStruct_t const &shapestruct,
                               Transformation3D const &trans, const size_t offset, const size_t size,
                               SOA3D<Precision> const &points, bool *const output)
{

  using Bool_v = typename vecCore::Mask_v<Real_v>;
  for (decltype(points.size()) i(offset); i < size; i += vecCore::VectorSize<Real_v>()) {
    Vector3D<Real_v> point(vecCore::FromPtr<Real_v>(points.x() + i), vecCore::FromPtr<Real_v>(points.y() + i),
                           vecCore::FromPtr<Real_v>(points.z() + i));
    Bool_v result(false);
    Specialization::template Contains<Real_v>(shapestruct, trans.Transform<transC, rotC>(point), result);
    // vecCore::StoreMask(result, output);
    // StoreMask has problem -> see VECCORE-21
    for (size_t j = 0; j < vecCore::VectorSize<Real_v>(); ++j)
      output[i + j] = vecCore::MaskLaneAt(result, j);
  }
}

template <class Specialization, typename Real_v, int transC, int rotC>
VECGEOM_FORCE_INLINE
static void InsideLoopKernel(typename Specialization::UnplacedStruct_t const &shapestruct,
                             Transformation3D const &trans, const size_t offset, const size_t size,
                             SOA3D<Precision> const &points, Inside_t *const output)
{
  using Index_t = vecCore::Index_v<Real_v>;
  for (decltype(points.size()) i(offset); i < size; i += vecCore::VectorSize<Real_v>()) {
    Vector3D<Real_v> point(vecCore::FromPtr<Real_v>(points.x() + i), vecCore::FromPtr<Real_v>(points.y() + i),
                           vecCore::FromPtr<Real_v>(points.z() + i));
    Index_t result;
    Specialization::template Inside<Real_v>(shapestruct, trans.Transform<transC, rotC>(point), result);
    // TODO: make a proper store here
    for (size_t j = 0; j < vecCore::VectorSize<Index_t>(); ++j)
      output[i + j] = vecCore::LaneAt<Index_t>(result, j);
  }
}

template <class Specialization, typename Real_v, int transC, int rotC>
VECGEOM_FORCE_INLINE
static void SafetyToInLoopKernel(typename Specialization::UnplacedStruct_t const &shapestruct,
                                 Transformation3D const &trans, const size_t offset, const size_t size,
                                 SOA3D<Precision> const &points, Precision *const output)
{

  for (decltype(points.size()) i(offset); i < size; i += vecCore::VectorSize<Real_v>()) {
    Vector3D<Real_v> point(vecCore::FromPtr<Real_v>(points.x() + i), vecCore::FromPtr<Real_v>(points.y() + i),
                           vecCore::FromPtr<Real_v>(points.z() + i));
    Real_v result(kInfLength);
    Specialization::template SafetyToIn<Real_v>(shapestruct, trans.Transform<transC, rotC>(point), result);
    vecCore::Store(result, output + i);
  }
}

template <class Specialization, typename Real_v, int transC, int rotC>
VECGEOM_FORCE_INLINE
static void DistanceToInLoopKernel(typename Specialization::UnplacedStruct_t const &shapestruct,
                                   Transformation3D const &trans, const size_t offset, const size_t size,
                                   SOA3D<Precision> const &points, SOA3D<Precision> const &directions,
                                   Precision const *const stepMax, Precision *const output)
{

  for (decltype(points.size()) i(offset); i < size; i += vecCore::VectorSize<Real_v>()) {
    Vector3D<Real_v> point(vecCore::FromPtr<Real_v>(points.x() + i), vecCore::FromPtr<Real_v>(points.y() + i),
                           vecCore::FromPtr<Real_v>(points.z() + i));
    Vector3D<Real_v> dir(vecCore::FromPtr<Real_v>(directions.x() + i), vecCore::FromPtr<Real_v>(directions.y() + i),
                         vecCore::FromPtr<Real_v>(directions.z() + i));
    Real_v step_max(vecCore::FromPtr<Real_v>(stepMax + i));
    Real_v result(kInfLength);
    Specialization::template DistanceToIn<Real_v>(shapestruct, trans.Transform<transC, rotC>(point),
                                                  trans.TransformDirection<rotC>(dir), step_max, result);
    vecCore::Store(result, output + i);
  }
}

template <class Specialization, int transC, int rotC>
class SIMDSpecializedVolImplHelper : public CommonSpecializedVolImplHelper<Specialization, transC, rotC> {
  using CommonHelper_t = CommonSpecializedVolImplHelper<Specialization, transC, rotC>;

public:
  using CommonHelper_t::CommonHelper_t;
  using CommonHelper_t::Contains;
  using CommonHelper_t::DistanceToIn;
  using CommonHelper_t::DistanceToOut;
  using CommonHelper_t::Inside;
  using CommonHelper_t::SafetyToIn;
  using CommonHelper_t::SafetyToOut;
  using CommonHelper_t::UnplacedContains;

  SIMDSpecializedVolImplHelper(VPlacedVolume const *other)
      : CommonHelper_t(other->GetName(), other->GetLogicalVolume(), other->GetTransformation())
  {
  }

  VECCORE_ATT_HOST_DEVICE
  virtual ~SIMDSpecializedVolImplHelper() {}

  virtual void SafetyToIn(SOA3D<Precision> const &points, Precision *const output) const override
  {
    const auto kS = vecCore::VectorSize<VectorBackend::Real_v>();
    auto offset   = points.size() - points.size() % kS;
    //   auto shape = ((UnplacedVolume_t *)this)->UnplacedVolume_t::GetUnplacedStruct();
    auto shape  = this->GetUnplacedStruct();
    auto transf = this->GetTransformation();

    // vector loop treatment
    SafetyToInLoopKernel<Specialization, VectorBackend::Real_v, transC, rotC>(*shape, *transf, 0, offset, points,
                                                                              output);
    // tail treatment
    SafetyToInLoopKernel<Specialization, ScalarBackend::Real_v, transC, rotC>(*shape, *transf, offset, points.size(),
                                                                              points, output);
  }

  virtual void DistanceToIn(SOA3D<Precision> const &points, SOA3D<Precision> const &directions,
                            Precision const *const stepMax, Precision *const output) const override
  {
    auto offset = points.size() - points.size() % vecCore::VectorSize<VectorBackend::Real_v>();
    auto shape  = this->GetUnplacedStruct();
    auto transf = this->GetTransformation();
    // vector loop treatment
    DistanceToInLoopKernel<Specialization, VectorBackend::Real_v, transC, rotC>(*shape, *transf, 0, offset, points,
                                                                                directions, stepMax, output);
    // tail treatment
    DistanceToInLoopKernel<Specialization, ScalarBackend::Real_v, transC, rotC>(*shape, *transf, offset, points.size(),
                                                                                points, directions, stepMax, output);
  }

  using UnplacedVolume_t = typename Specialization::UnplacedVolume_t;

  // the explicit SIMD interface
  virtual Real_v DistanceToInVec(Vector3D<Real_v> const &p, Vector3D<Real_v> const &d,
                                 Real_v const step_max) const override
  {
    Real_v output(kInfLength);
    Transformation3D const *tr = this->GetTransformation();
    auto unplacedstruct        = this->GetUnplacedStruct();
    Specialization::template DistanceToIn<Real_v>(*unplacedstruct, tr->Transform<transC, rotC>(p),
                                                  tr->TransformDirection<rotC>(d), step_max, output);
    return output;
  }

  virtual void Contains(SOA3D<Precision> const &points, bool *const output) const override
  {
    auto offset = points.size() - points.size() % vecCore::VectorSize<VectorBackend::Real_v>();
    auto shape  = this->GetUnplacedStruct();
    auto transf = this->GetTransformation();
    // vector loop treatment
    ContainsLoopKernel<Specialization, VectorBackend::Real_v, transC, rotC>(*shape, *transf, 0, offset, points, output);
    // tail treatment
    ContainsLoopKernel<Specialization, ScalarBackend::Real_v, transC, rotC>(*shape, *transf, offset, points.size(),
                                                                            points, output);
  }

  virtual void Inside(SOA3D<Precision> const &points, Inside_t *const output) const override
  {
    // I would be in favor of getting rid of this interface (unless someone asks for it)
    // Inside is only provided for Geant4 which currently does not have a basket interface
    // InsideTemplate(points, output);
    auto offset = points.size() - points.size() % vecCore::VectorSize<VectorBackend::Real_v>();
    auto shape  = this->GetUnplacedStruct();
    auto transf = this->GetTransformation();
    // vector loop treatment
    InsideLoopKernel<Specialization, VectorBackend::Real_v, transC, rotC>(*shape, *transf, 0, offset, points, output);
    // tail treatment
    InsideLoopKernel<Specialization, ScalarBackend::Real_v, transC, rotC>(*shape, *transf, offset, points.size(),
                                                                          points, output);
  }

#ifdef VECGEOM_CUDA_INTERFACE
  using ThisClass_t = SIMDSpecializedVolImplHelper<Specialization, transC, rotC>;
  virtual size_t DeviceSizeOf() const override { return DevicePtr<CudaType_t<ThisClass_t>>::SizeOf(); }

  DevicePtr<cuda::VPlacedVolume> CopyToGpu(DevicePtr<cuda::LogicalVolume> const logical_volume,
                                           DevicePtr<cuda::Transformation3D> const transform,
                                           DevicePtr<cuda::VPlacedVolume> const in_gpu_ptr) const override
  {
    DevicePtr<CudaType_t<ThisClass_t>> gpu_ptr(in_gpu_ptr);
    gpu_ptr.Construct(logical_volume, transform, this->id(), this->GetCopyNo(), this->GetChildId());
    CudaAssertError();
    // Need to go via the void* because the regular c++ compilation
    // does not actually see the declaration for the cuda version
    // (and thus can not determine the inheritance).
    return DevicePtr<cuda::VPlacedVolume>((void *)gpu_ptr);
  }

  DevicePtr<cuda::VPlacedVolume> CopyToGpu(DevicePtr<cuda::LogicalVolume> const logical_volume,
                                           DevicePtr<cuda::Transformation3D> const transform) const override
  {
    DevicePtr<CudaType_t<ThisClass_t>> gpu_ptr;
    gpu_ptr.Allocate();
    return CopyToGpu(logical_volume, transform, DevicePtr<cuda::VPlacedVolume>((void *)gpu_ptr));
  }

  /**
   * Copy many instances of this class to the GPU.
   * \param host_volumes Host volumes to be copied. These should all be of the same type as the class that this function is called with.
   * \param logical_volumes GPU addresses of the logical volumes corresponding to the placed volumes.
   * \param transforms GPU addresses of the transformations corresponding to the placed volumes.
   * \param in_gpu_ptrs GPU addresses where the GPU instances of the host volumes should be placed.
   * \note This requires an explicit template instantiation of ConstructManyOnGpu<ThisClass_t>().
   * \see VECGEOM_DEVICE_INST_PLACED_VOLUME_IMPL and its multi-argument versions.
   */
  void CopyManyToGpu(std::vector<VPlacedVolume const *> const & host_volumes,
                     std::vector<DevicePtr<cuda::LogicalVolume>> const & logical_volumes,
                     std::vector<DevicePtr<cuda::Transformation3D>> const & transforms,
                     std::vector<DevicePtr<cuda::VPlacedVolume>> const & in_gpu_ptrs) const override
  {
    assert(host_volumes.size() == logical_volumes.size());
    assert(host_volumes.size() == transforms.size());
    assert(host_volumes.size() == in_gpu_ptrs.size());

    std::vector<decltype(std::declval<ThisClass_t>().id())> ids;
    std::vector<decltype(std::declval<ThisClass_t>().GetCopyNo())> copyNos;
    std::vector<decltype(std::declval<ThisClass_t>().GetChildId())> childIds;
    for (auto placedVol : host_volumes) {
      ids.push_back(placedVol->id());
      copyNos.push_back(placedVol->GetCopyNo());
      childIds.push_back(placedVol->GetChildId());
    }

    ConstructManyOnGpu<CudaType_t<ThisClass_t>>(in_gpu_ptrs.size(), in_gpu_ptrs.data(), logical_volumes.data(),
                                                transforms.data(), ids.data(), copyNos.data(), childIds.data());
  }

#endif // VECGEOM_CUDA_INTERFACE

}; // end SIMD Helper

template <class Specialization, int transC, int rotC>
class LoopSpecializedVolImplHelper : public CommonSpecializedVolImplHelper<Specialization, transC, rotC> {
  using CommonHelper_t   = CommonSpecializedVolImplHelper<Specialization, transC, rotC>;
  using UnplacedVolume_t = typename Specialization::UnplacedVolume_t;

public:
  using CommonHelper_t::CommonHelper_t;
  using CommonHelper_t::Contains;
  using CommonHelper_t::DistanceToIn;
  using CommonHelper_t::DistanceToOut;
  using CommonHelper_t::Inside;
  using CommonHelper_t::SafetyToIn;
  using CommonHelper_t::SafetyToOut;
  using CommonHelper_t::UnplacedContains;

  LoopSpecializedVolImplHelper(VPlacedVolume const *other)
      : CommonHelper_t(other->GetName(), other->GetLogicalVolume(), other->GetTransformation())
  {
  }

  virtual void SafetyToIn(SOA3D<Precision> const &points, Precision *const output) const override
  {
    auto shape  = this->GetUnplacedStruct();
    auto transf = this->GetTransformation();
    SafetyToInLoopKernel<Specialization, vecgeom::ScalarBackend::Real_v, transC, rotC>(*shape, *transf, 0,
                                                                                       points.size(), points, output);
  }

  virtual void Contains(SOA3D<Precision> const &points, bool *const output) const override
  {
    auto unplacedv = this->GetUnplacedStruct();
    auto transf    = this->GetTransformation();
    // vector loop treatment
    ContainsLoopKernel<Specialization, vecgeom::ScalarBackend::Real_v, transC, rotC>(*unplacedv, *transf, 0,
                                                                                     points.size(), points, output);
  }

  virtual void Inside(SOA3D<Precision> const &points, Inside_t *const output) const override
  {
    // I would be in favor of getting rid of this interface (unless someone asks for it)
    // Inside is only provided for Geant4 which currently does not have a basket interface
    // InsideTemplate(points, output);
    auto shape  = this->GetUnplacedStruct();
    auto transf = this->GetTransformation();
    InsideLoopKernel<Specialization, vecgeom::ScalarBackend::Real_v, transC, rotC>(*shape, *transf, 0, points.size(),
                                                                                   points, output);
  }

  virtual void DistanceToIn(SOA3D<Precision> const &points, SOA3D<Precision> const &directions,
                            Precision const *const stepMax, Precision *const output) const override
  {
    auto shape  = this->GetUnplacedStruct();
    auto transf = this->GetTransformation();
    DistanceToInLoopKernel<Specialization, vecgeom::ScalarBackend::Real_v, transC, rotC>(
        *shape, *transf, 0, points.size(), points, directions, stepMax, output);
  }

  // the explicit SIMD interface
  virtual Real_v DistanceToInVec(Vector3D<Real_v> const &p, Vector3D<Real_v> const &d,
                                 Real_v const step_max) const override
  {
    Real_v output(kInfLength);
    using vecCore::LaneAt;
    using Real_s = Precision;
    for (size_t i = 0; i < vecCore::VectorSize<Real_v>(); ++i) {
      Transformation3D const *tr = this->GetTransformation();
      const auto unplacedstruct  = this->GetUnplacedStruct();
      const Vector3D<Real_s> ps(LaneAt(p.x(), i), LaneAt(p.y(), i), LaneAt(p.z(), i)); // scalar vector
      const Vector3D<Real_s> ds(LaneAt(d.x(), i), LaneAt(d.y(), i), LaneAt(d.z(), i)); // scalar direction;
      Real_s tmp(-1.);
      Specialization::template DistanceToIn<Real_s>(*unplacedstruct, tr->Transform<transC, rotC>(ps),
                                                    tr->TransformDirection<rotC>(ds), LaneAt(step_max, i), tmp);
      vecCore::AssignLane(output, i, tmp);
    }
    return output;
  }

#ifdef VECGEOM_CUDA_INTERFACE
  // QUESTION: CAN WE COMBINE THIS CODE WITH THE ONE FROM SIMDHelper and put it into CommonHelper?
  using ThisClass_t = LoopSpecializedVolImplHelper<Specialization, transC, rotC>;

  virtual size_t DeviceSizeOf() const override { return DevicePtr<CudaType_t<ThisClass_t>>::SizeOf(); }

  DevicePtr<cuda::VPlacedVolume> CopyToGpu(DevicePtr<cuda::LogicalVolume> const logical_volume,
                                           DevicePtr<cuda::Transformation3D> const transform,
                                           DevicePtr<cuda::VPlacedVolume> const in_gpu_ptr) const override
  {
    DevicePtr<CudaType_t<ThisClass_t>> gpu_ptr(in_gpu_ptr);
    gpu_ptr.Construct(logical_volume, transform, this->id(), this->GetCopyNo(), this->GetChildId());
    CudaAssertError();
    // Need to go via the void* because the regular c++ compilation
    // does not actually see the declaration for the cuda version
    // (and thus can not determine the inheritance).
    return DevicePtr<cuda::VPlacedVolume>((void *)gpu_ptr);
  }

  DevicePtr<cuda::VPlacedVolume> CopyToGpu(DevicePtr<cuda::LogicalVolume> const logical_volume,
                                           DevicePtr<cuda::Transformation3D> const transform) const override
  {
    DevicePtr<CudaType_t<ThisClass_t>> gpu_ptr;
    gpu_ptr.Allocate();
    return CopyToGpu(logical_volume, transform, DevicePtr<cuda::VPlacedVolume>((void *)gpu_ptr));
  }

  /**
   * Copy many instances of this class to the GPU.
   * \param host_volumes Host volumes to be copied. These should all be of the same type as the class that this function is called with.
   * \param logical_volumes GPU addresses of the logical volumes corresponding to the placed volumes.
   * \param transforms GPU addresses of the transformations corresponding to the placed volumes.
   * \param in_gpu_ptrs GPU addresses where the GPU instances of the host volumes should be placed.
   * \note This requires an explicit template instantiation of ConstructManyOnGpu<ThisClass_t>().
   * \see VECGEOM_DEVICE_INST_PLACED_VOLUME_IMPL
   */
  void CopyManyToGpu(std::vector<VPlacedVolume const *> const & host_volumes,
                     std::vector<DevicePtr<cuda::LogicalVolume>> const & logical_volumes,
                     std::vector<DevicePtr<cuda::Transformation3D>> const & transforms,
                     std::vector<DevicePtr<cuda::VPlacedVolume>> const & in_gpu_ptrs) const override
  {
    assert(host_volumes.size() == logical_volumes.size());
    assert(host_volumes.size() == transforms.size());
    assert(host_volumes.size() == in_gpu_ptrs.size());

    std::vector<decltype(std::declval<ThisClass_t>().id())> ids;
    std::vector<decltype(std::declval<ThisClass_t>().GetCopyNo())> copyNos;
    std::vector<decltype(std::declval<ThisClass_t>().GetChildId())> childIds;
    for (auto placedVol : host_volumes) {
      ids.push_back(placedVol->id());
      copyNos.push_back(placedVol->GetCopyNo());
      childIds.push_back(placedVol->GetChildId());
    }

    ConstructManyOnGpu<CudaType_t<ThisClass_t>>(in_gpu_ptrs.size(), in_gpu_ptrs.data(), logical_volumes.data(),
                                                transforms.data(), ids.data(), copyNos.data(), childIds.data());
  }
#endif // VECGEOM_CUDA_INTERFACE

}; // end Loop Helper
} // namespace VECGEOM_IMPL_NAMESPACE
} // namespace vecgeom

#ifndef __clang__
#pragma GCC diagnostic pop
#endif