10#if !defined(ampcor_cuda_correlators_Sequential_icc)
11#error This header is an implementation detail of ampcor::cuda::correlators::Sequential
17template <
typename raster_t>
23 cell_type * support = _arena + pid*(_refCells + _tgtCells);
25 tile_type tile(_refLayout, support);
27 std::copy(ref.begin(), ref.end(), tile.view().begin());
33template <
typename raster_t>
39 cell_type * support = _arena + pid*(_refCells + _tgtCells) + _refCells;
41 tile_type tile(_tgtLayout, support);
43 std::copy(tgt.begin(), tgt.end(), tile.view().begin());
49template <
typename raster_t>
52adjust() ->
const value_type *
55 pyre::journal::debug_t channel(
"ampcor.cuda");
58 auto refDim = _refLayout.shape()[0];
60 auto tgtDim = _tgtLayout.shape()[0];
62 auto corDim = _corLayout.shape()[0];
66 auto coarseArena = _push();
68 auto amplitudes = _detect(coarseArena, refDim, tgtDim);
70 auto refStatistics = _refStats(amplitudes, refDim, tgtDim);
72 auto sat = _sat(amplitudes, refDim, tgtDim);
74 auto tgtStatistics = _tgtStats(sat, refDim, tgtDim, corDim);
76 auto gamma = _correlate(amplitudes, refStatistics, tgtStatistics, refDim, tgtDim, corDim);
78 auto maxcor = _maxcor(gamma, corDim);
82 cudaFree(tgtStatistics);
84 cudaFree(refStatistics);
90 auto refRefinedDim = _refRefinedLayout.shape()[0];
92 auto tgtRefinedDim = _tgtRefinedLayout.shape()[0];
94 auto corRefinedDim = _corRefinedLayout.shape()[0];
96 auto corZoomedDim = _corZoomedLayout.shape()[0];
99 _nudge(maxcor, refDim, tgtDim);
101 auto refinedArena = _refinedArena();
103 _refRefine(coarseArena, refinedArena);
105 _tgtMigrate(coarseArena, maxcor, refinedArena);
107 _tgtRefine(refinedArena);
110 cudaFree(coarseArena);
113 _deramp(refinedArena);
115 amplitudes = _detect(refinedArena, refRefinedDim, tgtRefinedDim);
117 refStatistics = _refStats(amplitudes, refRefinedDim, tgtRefinedDim);
119 sat = _sat(amplitudes, refRefinedDim, tgtRefinedDim);
121 gamma = _correlate(amplitudes, refStatistics, tgtStatistics,
122 refRefinedDim, tgtRefinedDim, corRefinedDim);
124 auto zoomed = _zoomcor(gamma);
126 auto maxcorZoomed = _maxcor(zoomed, corZoomedDim);
129 auto offsets = _offsetField(maxcor, maxcorZoomed);
132 cudaFree(maxcorZoomed);
135 cudaFree(tgtStatistics);
137 cudaFree(refStatistics);
138 cudaFree(amplitudes);
139 cudaFree(refinedArena);
147template <
typename raster_t>
150arena() const -> const cell_type *
156template <
typename raster_t>
159pairs() const -> size_type
166template <
typename raster_t>
175template <
typename raster_t>
178 const layout_type & refLayout,
const layout_type & tgtLayout,
179 size_type refineFactor, size_type refineMargin, size_type zoomFactor) :
181 _refineFactor{ refineFactor },
182 _refineMargin{ refineMargin },
183 _zoomFactor{ zoomFactor },
184 _refLayout{ refLayout },
185 _tgtLayout{ tgtLayout },
186 _corLayout{ tgtLayout.shape() - refLayout.shape() + index_type::fill(1) },
187 _refRefinedLayout{ refineFactor * _refLayout.shape() },
188 _tgtRefinedLayout{ refineFactor * (_refLayout.shape() + index_type::fill(2*refineMargin)) },
189 _corRefinedLayout{ index_type::fill(2*refineFactor*refineMargin+1) },
190 _corZoomedLayout { zoomFactor * _corRefinedLayout.shape() },
191 _refCells{ _refLayout.size() },
192 _tgtCells{ _tgtLayout.size() },
193 _corCells{ _corLayout.size() },
194 _refRefinedCells{ _refRefinedLayout.size() },
195 _tgtRefinedCells{ _tgtRefinedLayout.size() },
196 _refFootprint{ _refCells * sizeof(cell_type) },
197 _tgtFootprint{ _tgtCells * sizeof(cell_type) },
198 _corFootprint{ _corCells * sizeof(value_type) },
199 _refRefinedFootprint{ _refRefinedCells * sizeof(cell_type) },
200 _tgtRefinedFootprint{ _tgtRefinedCells * sizeof(cell_type) },
201 _arena{ new cell_type[ _pairs * (_refCells+_tgtCells) ] },
202 _offsets{ new value_type[ 2 * _pairs ] }
205 auto footprint = _pairs*(_refFootprint + _tgtFootprint);
206 auto refinedFootprint = _pairs*(_refRefinedFootprint + _tgtRefinedFootprint);
208 pyre::journal::debug_t channel(
"ampcor.cuda");
211 << pyre::journal::at(__HERE__)
212 <<
"new Sequential worker:"
213 << pyre::journal::newline
214 <<
" pairs: " << _pairs
215 << pyre::journal::newline
216 <<
" ref shape: " << _refLayout <<
", " << _refCells <<
" cells"
217 << pyre::journal::newline
218 <<
" tgt shape: " << _tgtLayout <<
", " << _tgtCells <<
" cells"
219 << pyre::journal::newline
220 <<
" footprint: " << (_refCells+_tgtCells) <<
" cells in "
221 << (footprint/1024/1024) <<
" Mb"
222 << pyre::journal::newline
223 <<
" refine factor: " << refineFactor
224 << pyre::journal::newline
225 <<
" refine margin: " << refineMargin
226 << pyre::journal::newline
227 <<
" refined ref shape: " << _refRefinedLayout <<
", " << _refRefinedCells <<
" cells"
228 << pyre::journal::newline
229 <<
" refined tgt shape: " << _tgtRefinedLayout <<
", " << _tgtRefinedCells <<
" cells"
230 << pyre::journal::newline
231 <<
" footprint: " << (_refRefinedCells+_tgtRefinedCells) <<
" cells in "
232 << (refinedFootprint/1024/1024) <<
" Mb"
233 << pyre::journal::newline
234 <<
" arena: " << _arena
235 << pyre::journal::endl;
240template <
typename raster_t>
246 pyre::journal::debug_t channel(
"ampcor.cuda");
249 channel << pyre::journal::at(__HERE__);
251 for (
auto pid = 0; pid < _pairs; ++pid) {
253 channel <<
"pid: " << pid << pyre::journal::newline;
255 cell_type * refLoc = _arena + pid*(_refCells + _tgtCells);
257 tile_type ref(_refLayout, refLoc);
259 channel <<
"reference: " << pyre::journal::newline;
260 for (
auto idx = 0; idx < _refLayout.shape()[0]; ++idx) {
261 for (
auto jdx = 0; jdx < _refLayout.shape()[1]; ++jdx) {
262 channel << ref[{idx, jdx}] <<
" ";
264 channel << pyre::journal::newline;
268 cell_type * tgtLoc = refLoc + _refCells;
270 tile_type tgt(_tgtLayout, tgtLoc);
273 channel <<
"target: " << pyre::journal::newline;
274 for (
auto idx = 0; idx < _tgtLayout.shape()[0]; ++idx) {
275 for (
auto jdx = 0; jdx < _tgtLayout.shape()[1]; ++jdx) {
276 channel << tgt[{idx, jdx}] <<
" ";
278 channel << pyre::journal::newline;
283 channel << pyre::journal::endl;
293template <
typename raster_t>
296_push() const -> cell_type *
299 cell_type * cArena =
nullptr;
301 auto footprint = _pairs * (_refFootprint + _tgtFootprint);
303 auto status = cudaMallocManaged(&cArena, footprint);
305 if (status != cudaSuccess) {
307 pyre::journal::error_t error(
"ampcor.cuda");
310 << pyre::journal::at(__HERE__)
311 <<
"while allocating " << 1.0*footprint/1024/1024
312 <<
"Mb of device memory for the input hyper-grid: "
313 << cudaGetErrorName(status) <<
" (" << status <<
")"
314 << pyre::journal::endl;
316 throw std::bad_alloc();
320 status = cudaMemcpy(cArena, _arena, footprint, cudaMemcpyHostToDevice);
322 if (status != cudaSuccess) {
324 std::string description = cudaGetErrorName(status);
326 pyre::journal::error_t error(
"ampcor.cuda");
329 << pyre::journal::at(__HERE__)
330 <<
"while transferring tiles to the device: "
331 << description <<
" (" << status <<
")"
332 << pyre::journal::endl;
336 throw std::logic_error(description);
344template <
typename raster_t>
347_detect(
const cell_type * cArena, size_type refDim, size_type tgtDim)
const -> value_type *
350 auto refCells = refDim * refDim;
352 auto tgtCells = tgtDim * tgtDim;
355 value_type * rArena =
nullptr;
357 auto cells = _pairs * (refCells + tgtCells);
359 auto footprint = cells *
sizeof(value_type);
361 auto status = cudaMallocManaged(&rArena, footprint);
363 if (status != cudaSuccess) {
365 pyre::journal::error_t error(
"ampcor.cuda");
368 << pyre::journal::at(__HERE__)
369 <<
"while allocating " << 1.0*footprint/1024/1024
370 <<
"Mb of device memory for the tile amplitudes: "
371 << cudaGetErrorName(status) <<
" (" << status <<
")"
372 << pyre::journal::endl;
374 throw std::bad_alloc();
378 kernels::detect(cArena, cells, rArena);
385template <
typename raster_t>
388_refStats(value_type * rArena, size_type refDim, size_type tgtDim)
const -> value_type *
391 auto refCells = refDim * refDim;
393 auto tgtCells = tgtDim * tgtDim;
396 value_type * stats =
nullptr;
399 auto footprint = _pairs *
sizeof(value_type);
401 auto status = cudaMallocManaged(&stats, footprint);
403 if (status != cudaSuccess) {
405 pyre::journal::error_t error(
"ampcor.cuda");
408 << pyre::journal::at(__HERE__)
409 <<
"while allocating " << 1.0*footprint/1024/1024
410 <<
"Mb of device memory for the variances of the reference tiles: "
411 << cudaGetErrorName(status) <<
" (" << status <<
")"
412 << pyre::journal::endl;
414 throw std::bad_alloc();
418 kernels::refStats(rArena, _pairs, refDim, refCells + tgtCells, stats);
425template <
typename raster_t>
428_sat(
const value_type * rArena, size_type refDim, size_type tgtDim)
const -> value_type *
431 pyre::journal::debug_t channel(
"ampcor.cuda");
434 auto refCells = refDim * refDim;
436 auto tgtCells = tgtDim * tgtDim;
438 auto tgtFootprint = tgtCells *
sizeof(value_type);
441 value_type * sat =
nullptr;
443 auto footprint = _pairs * tgtFootprint;
445 auto status = cudaMallocManaged(&sat, footprint);
447 if (status != cudaSuccess) {
449 pyre::journal::error_t error(
"ampcor.cuda");
452 << pyre::journal::at(__HERE__)
453 <<
"while allocating memory for the sum area tables: "
454 << cudaGetErrorName(status) <<
" (" << status <<
")"
455 << pyre::journal::endl;
457 throw std::bad_alloc();
461 << pyre::journal::at(__HERE__)
462 <<
"allocated an arena of " << footprint <<
" bytes for the sum area tables at "
464 << pyre::journal::endl;
467 kernels::sat(rArena, _pairs, refCells, tgtCells, tgtDim, sat);
476template <
typename raster_t>
480 size_type refDim, size_type tgtDim, size_type corDim)
const -> value_type *
483 pyre::journal::debug_t channel(
"ampcor.cuda");
486 value_type * stats =
nullptr;
489 auto footprint = _pairs * corDim*corDim *
sizeof(value_type);
491 auto status = cudaMallocManaged(&stats, footprint);
493 if (status != cudaSuccess) {
495 std::string description = cudaGetErrorName(status);
497 pyre::journal::error_t error(
"ampcor.cuda");
500 << pyre::journal::at(__HERE__)
501 <<
"while allocating device memory for the table of target amplitude averages: "
502 << description <<
" (" << status <<
")"
503 << pyre::journal::endl;
505 throw std::bad_alloc();
509 << pyre::journal::at(__HERE__)
510 <<
"allocated an arena of " << footprint <<
" bytes for the target amplitude averages at "
512 << pyre::journal::endl;
515 kernels::tgtStats(dSAT, _pairs, refDim, tgtDim, corDim, stats);
523template <
typename raster_t>
527 const value_type * refStats,
const value_type * tgtStats,
528 size_type refDim, size_type tgtDim, size_type corDim)
const -> value_type *
531 pyre::journal::debug_t channel(
"ampcor.cuda");
534 auto refCells = refDim * refDim;
536 auto tgtCells = tgtDim * tgtDim;
538 auto corCells = corDim * corDim;
541 value_type * dCorrelation =
nullptr;
543 auto size = _pairs * corCells;
545 auto footprint = size *
sizeof(value_type);
547 auto status = cudaMallocManaged(&dCorrelation, footprint);
549 if (status != cudaSuccess) {
551 std::string description = cudaGetErrorName(status);
553 pyre::journal::error_t error(
"ampcor.cuda");
556 << pyre::journal::at(__HERE__)
557 <<
"while allocating device memory for the correlation matrix: "
558 << description <<
" (" << status <<
")"
559 << pyre::journal::endl;
561 throw std::bad_alloc();
565 << pyre::journal::at(__HERE__)
566 <<
"allocated " << footprint <<
" bytes for the correlation matrix at "
568 << pyre::journal::endl;
571 kernels::correlate(rArena, refStats, tgtStats,
573 refCells, tgtCells, corCells, refDim, tgtDim, corDim,
582template <
typename raster_t>
585_maxcor(
const value_type * gamma, size_type corDim)
const ->
int *
588 pyre::journal::debug_t channel(
"ampcor.cuda");
591 auto corCells = corDim * corDim;
596 auto footprint = 2 * _pairs *
sizeof(int);
598 auto status = cudaMallocManaged(&loc, footprint);
600 if (status != cudaSuccess) {
602 std::string description = cudaGetErrorName(status);
604 pyre::journal::error_t error(
"ampcor.cuda");
607 << pyre::journal::at(__HERE__)
608 <<
"while allocating device memory for the location of the correlation maxima: "
609 << description <<
" (" << status <<
")"
610 << pyre::journal::endl;
612 throw std::bad_alloc();
616 << pyre::journal::at(__HERE__)
617 <<
"allocated " << footprint <<
" bytes for the locations of the correlation maxima at "
619 << pyre::journal::endl;
622 kernels::maxcor(gamma, _pairs, corCells, corDim, loc);
631template <
typename raster_t>
634_nudge(
int * locations, size_type refDim, size_type tgtDim)
const
638 kernels::nudge(_pairs, refDim, tgtDim, _refineMargin, locations);
646template <
typename raster_t>
652 pyre::journal::debug_t channel(
"ampcor.cuda");
655 cell_type * arena =
nullptr;
657 auto footprint = _pairs * (_refRefinedFootprint + _tgtRefinedFootprint);
659 auto status = cudaMallocManaged(&arena, footprint);
661 if (status != cudaSuccess) {
663 pyre::journal::error_t error(
"ampcor.cuda");
666 << pyre::journal::at(__HERE__)
667 <<
"while allocating " << 1.0*footprint/1024/1024
668 <<
"Mb of device memory for the refined tile hyper-grid: "
669 << cudaGetErrorName(status) <<
" (" << status <<
")"
670 << pyre::journal::endl;
672 throw std::bad_alloc();
677 << pyre::journal::at(__HERE__)
678 <<
"allocated a refined arena: " << footprint <<
" bytes at " << arena
679 << pyre::journal::endl;
682 status = cudaMemset(arena, 0, footprint);
684 if (status != cudaSuccess) {
686 std::string description = cudaGetErrorName(status);
688 pyre::journal::error_t error(
"ampcor.cuda");
691 << pyre::journal::at(__HERE__)
692 <<
"while initializing " << footprint
693 <<
" bytes of device memory for the refined tile hyper-grid: "
694 << description <<
" (" << status <<
")"
695 << pyre::journal::endl;
697 throw std::runtime_error(description);
706template <
typename raster_t>
709_refRefine(cell_type * coarseArena, cell_type * refinedArena)
const
712 pyre::journal::debug_t channel(
"ampcor.cuda");
715 auto rshape = _refLayout.shape();
717 auto tshape = _refRefinedLayout.shape();
722 int fwdRanks[] = {
static_cast<int>(rshape[0]),
static_cast<int>(rshape[1]) };
725 int fwdIEmbed[] = {
static_cast<int>(rshape[0]),
static_cast<int>(rshape[1]) };
729 int fwdIDist = _refCells + _tgtCells;
732 int fwdOEmbed[] = {
static_cast<int>(tshape[0]),
static_cast<int>(tshape[1]) };
736 int fwdODist = _refRefinedCells + _tgtRefinedCells;
741 auto status = cufftPlanMany(&fwdPlan, dim, fwdRanks,
742 fwdIEmbed, fwdIStride, fwdIDist,
743 fwdOEmbed, fwdOStride, fwdODist,
748 if (status != CUFFT_SUCCESS) {
750 pyre::journal::error_t error(
"ampcor.cuda");
753 << pyre::journal::at(__HERE__)
754 <<
"while refining the reference tiles: forward FFT plan: error " << status
755 << pyre::journal::endl;
757 throw std::runtime_error(
"while refining the reference tiles: forward FFT plan error");
761 status = cufftExecC2C(fwdPlan,
762 reinterpret_cast<cufftComplex *
>(coarseArena),
763 reinterpret_cast<cufftComplex *
>(refinedArena),
766 if (status != CUFFT_SUCCESS) {
768 pyre::journal::error_t error(
"ampcor.cuda");
771 << pyre::journal::at(__HERE__)
772 <<
"while refining the reference tiles: executing the forward FFT plan: error "
774 << pyre::journal::endl;
776 throw std::runtime_error(
"while executing the forward FFT plan for the reference tiles");
779 auto code = cudaDeviceSynchronize();
781 if (code != cudaSuccess) {
783 std::string description = cudaGetErrorName(code);
785 pyre::journal::error_t channel(
"ampcor.cuda");
788 << pyre::journal::at(__HERE__)
789 <<
"while refining reference tiles: STEP 1: " << description <<
" (" << code <<
")"
790 << pyre::journal::endl;
792 throw std::runtime_error(description);
795 cufftDestroy(fwdPlan);
799 int revRanks[] = {
static_cast<int>(tshape[0]),
static_cast<int>(tshape[1]) };
801 int revIEmbed[] = {
static_cast<int>(tshape[0]),
static_cast<int>(tshape[1]) };
805 int revIDist = _refRefinedCells + _tgtRefinedCells;
807 int revOEmbed[] = {
static_cast<int>(tshape[0]),
static_cast<int>(tshape[1]) };
811 int revODist = _refRefinedCells + _tgtRefinedCells;
816 status = cufftPlanMany(&revPlan, dim, revRanks,
817 revIEmbed, revIStride, revIDist,
818 revOEmbed, revOStride, revODist,
823 if (status != CUFFT_SUCCESS) {
825 pyre::journal::error_t error(
"ampcor.cuda");
828 << pyre::journal::at(__HERE__)
829 <<
"while refining the reference tiles: inverse FFT plan: error " << status
830 << pyre::journal::endl;
832 throw std::runtime_error(
"while refining the reference tiles: inverse FFT plan error");
836 status = cufftExecC2C(revPlan,
837 reinterpret_cast<cufftComplex *
>(refinedArena),
838 reinterpret_cast<cufftComplex *
>(refinedArena),
841 if (status != CUFFT_SUCCESS) {
843 pyre::journal::error_t error(
"ampcor.cuda");
846 << pyre::journal::at(__HERE__)
847 <<
"while refining the reference tiles: executing the inverse FFT plan: error "
849 << pyre::journal::endl;
851 throw std::runtime_error(
"while executing the inverse FFT plan for the reference tiles");
854 code = cudaDeviceSynchronize();
856 if (code != cudaSuccess) {
858 std::string description = cudaGetErrorName(code);
860 pyre::journal::error_t channel(
"ampcor.cuda");
863 << pyre::journal::at(__HERE__)
864 <<
"while refining reference tiles: STEP 2: " << description <<
" (" << code <<
")"
865 << pyre::journal::endl;
867 throw std::runtime_error(description);
871 cufftDestroy(revPlan);
879template <
typename raster_t>
882_tgtMigrate(cell_type * coarseArena,
int * locations, cell_type * refinedArena)
const
885 pyre::journal::debug_t channel(
"ampcor.cuda");
888 auto refShape = _refLayout.shape();
890 auto tgtShape = _tgtLayout.shape();
892 auto refRefinedShape = _refRefinedLayout.shape();
894 auto tgtRefinedShape = _tgtRefinedLayout.shape();
897 auto refDim = refShape[0];
898 auto tgtDim = tgtShape[0];
899 auto refRefinedDim = refRefinedShape[0];
900 auto tgtRefinedDim = tgtRefinedShape[0];
903 auto expDim = refDim + 2 * _refineMargin;
906 kernels::migrate(coarseArena, _pairs,
907 refDim, tgtDim, expDim,
908 refRefinedDim, tgtRefinedDim,
918template <
typename raster_t>
925 pyre::journal::debug_t channel(
"ampcor.cuda");
928 auto tgtShape = _tgtLayout.shape();
930 auto tgtRefShape = _tgtRefinedLayout.shape();
932 auto expShape = _refLayout.shape() + index_type::fill(2*_refineMargin);
941 int fwdRanks[] = {
static_cast<int>(expShape[0]),
static_cast<int>(expShape[1]) };
943 int fwdIEmbed[] = {
static_cast<int>(tgtRefShape[0]),
static_cast<int>(tgtRefShape[1]) };
947 int fwdIDist = _refRefinedCells + _tgtRefinedCells;
949 int fwdOEmbed[] = {
static_cast<int>(tgtRefShape[0]),
static_cast<int>(tgtRefShape[1]) };
953 int fwdODist = _refRefinedCells + _tgtRefinedCells;
958 auto status = cufftPlanMany(&fwdPlan, dim, fwdRanks,
959 fwdIEmbed, fwdIStride, fwdIDist,
960 fwdOEmbed, fwdOStride, fwdODist,
965 if (status != CUFFT_SUCCESS) {
967 pyre::journal::error_t error(
"ampcor.cuda");
970 << pyre::journal::at(__HERE__)
971 <<
"while refining the target tiles: forward FFT plan: error " << status
972 << pyre::journal::endl;
974 throw std::runtime_error(
"while refining the target tiles: forward FFT plan error");
978 auto firstTile =
reinterpret_cast<cufftComplex *
>(refinedArena + _refRefinedCells);
980 status = cufftExecC2C(fwdPlan, firstTile, firstTile, CUFFT_FORWARD);
982 if (status != CUFFT_SUCCESS) {
984 pyre::journal::error_t error(
"ampcor.cuda");
987 << pyre::journal::at(__HERE__)
988 <<
"while refining the target tiles: executing the forward FFT plan: error "
990 << pyre::journal::endl;
992 throw std::runtime_error(
"while executing the forward FFT plan for the reference tiles");
995 auto code = cudaDeviceSynchronize();
997 if (code != cudaSuccess) {
999 std::string description = cudaGetErrorName(code);
1001 pyre::journal::error_t channel(
"ampcor.cuda");
1004 << pyre::journal::at(__HERE__)
1005 <<
"while refining target tiles: STEP 1: " << description <<
" (" << code <<
")"
1006 << pyre::journal::endl;
1008 throw std::runtime_error(description);
1011 cufftDestroy(fwdPlan);
1015 int revRanks[] = {
static_cast<int>(tgtRefShape[0]),
static_cast<int>(tgtRefShape[1]) };
1018 int revIEmbed[] = {
static_cast<int>(tgtRefShape[0]),
static_cast<int>(tgtRefShape[1]) };
1022 int revIDist = _refRefinedCells + _tgtRefinedCells;
1024 int revOEmbed[] = {
static_cast<int>(tgtRefShape[0]),
static_cast<int>(tgtRefShape[1]) };
1028 int revODist = _refRefinedCells + _tgtRefinedCells;
1031 cufftHandle revPlan;
1033 status = cufftPlanMany(&revPlan, dim, revRanks,
1034 revIEmbed, revIStride, revIDist,
1035 revOEmbed, revOStride, revODist,
1040 if (status != CUFFT_SUCCESS) {
1042 pyre::journal::error_t error(
"ampcor.cuda");
1045 << pyre::journal::at(__HERE__)
1046 <<
"while refining the target tiles: inverse FFT plan: error " << status
1047 << pyre::journal::endl;
1049 throw std::runtime_error(
"while refining the target tiles: inverse FFT plan error");
1053 status = cufftExecC2C(revPlan, firstTile, firstTile, CUFFT_FORWARD);
1055 if (status != CUFFT_SUCCESS) {
1057 pyre::journal::error_t error(
"ampcor.cuda");
1060 << pyre::journal::at(__HERE__)
1061 <<
"while refining the target tiles: executing the inverse FFT plan: error "
1063 << pyre::journal::endl;
1065 throw std::runtime_error(
"while executing the inverse FFT plan for the target tiles");
1068 code = cudaDeviceSynchronize();
1070 if (code != cudaSuccess) {
1072 std::string description = cudaGetErrorName(code);
1074 pyre::journal::error_t channel(
"ampcor.cuda");
1077 << pyre::journal::at(__HERE__)
1078 <<
"while refining target tiles: STEP 2: " << description <<
" (" << code <<
")"
1079 << pyre::journal::endl;
1081 throw std::runtime_error(description);
1085 cufftDestroy(revPlan);
1093template <
typename raster_t>
1096_deramp(cell_type * arena)
const
1104template <
typename raster_t>
1107_zoomcor(value_type * gamma)
const -> value_type *
1110 const auto Mb = 1.0 / 1024 / 1024;
1112 pyre::journal::debug_t channel(
"ampcor.cuda");
1115 auto corShape = _corRefinedLayout.shape();
1117 int corDim = corShape[0];
1119 auto zmdShape = _corZoomedLayout.shape();
1121 auto zmdCells = _corZoomedLayout.size();
1123 int zmdDim = zmdShape[0];
1126 auto scratch = kernels::r2c(gamma, _pairs, corDim, zmdDim);
1131 int fwdRanks[] = { corDim, corDim };
1133 int fwdIEmbed[] = { zmdDim, zmdDim };
1137 int fwdIDist = zmdCells;
1139 int fwdOEmbed[] = { zmdDim, zmdDim };
1143 int fwdODist = zmdCells;
1146 cufftHandle fwdPlan;
1148 auto statusFFT = cufftPlanMany(&fwdPlan, dim, fwdRanks,
1149 fwdIEmbed, fwdIStride, fwdIDist,
1150 fwdOEmbed, fwdOStride, fwdODist,
1154 if (statusFFT != CUFFT_SUCCESS) {
1156 pyre::journal::error_t error(
"ampcor.cuda");
1159 << pyre::journal::at(__HERE__)
1160 <<
"while zooming the correlation surface: forward FFT plan: error " << statusFFT
1161 << pyre::journal::endl;
1163 throw std::runtime_error(
"while zooming the correlation hyper-matrix: forward plan error");
1167 << pyre::journal::at(__HERE__)
1168 <<
"zooming the correlation matrix: forward FFT"
1169 << pyre::journal::endl;
1171 statusFFT = cufftExecC2C(fwdPlan, scratch, scratch, CUFFT_FORWARD);
1173 if (statusFFT != CUFFT_SUCCESS) {
1175 pyre::journal::error_t error(
"ampcor.cuda");
1178 << pyre::journal::at(__HERE__)
1179 <<
"while zooming the correlation hyper-matrix: executing the forward FFT plan: error "
1181 << pyre::journal::endl;
1183 throw std::runtime_error(
"while executing the forward FFT plan for the reference tiles");
1186 auto status = cudaDeviceSynchronize();
1188 if (status != cudaSuccess) {
1190 std::string description = cudaGetErrorName(status);
1192 pyre::journal::error_t channel(
"ampcor.cuda");
1195 << pyre::journal::at(__HERE__)
1196 <<
"while zooming the correlation matrix: STEP 1: " << description
1197 <<
" (" << status <<
")"
1198 << pyre::journal::endl;
1200 throw std::runtime_error(description);
1203 cufftDestroy(fwdPlan);
1206 int revRanks[] = { zmdDim, zmdDim };
1208 int revIEmbed[] = { zmdDim, zmdDim };
1212 int revIDist = zmdCells;
1214 int revOEmbed[] = { zmdDim, zmdDim };
1218 int revODist = zmdCells;
1221 cufftHandle revPlan;
1223 statusFFT = cufftPlanMany(&revPlan, dim, revRanks,
1224 revIEmbed, revIStride, revIDist,
1225 revOEmbed, revOStride, revODist,
1229 if (statusFFT != CUFFT_SUCCESS) {
1231 pyre::journal::error_t error(
"ampcor.cuda");
1234 << pyre::journal::at(__HERE__)
1235 <<
"while zooming the correlation surface: forward FFT plan: error " << statusFFT
1236 << pyre::journal::endl;
1238 throw std::runtime_error(
"while zooming the correlation hyper-matrix: forward plan error");
1242 << pyre::journal::at(__HERE__)
1243 <<
"zooming the correlation matrix: inverse FFT"
1244 << pyre::journal::endl;
1246 statusFFT = cufftExecC2C(revPlan, scratch, scratch, CUFFT_INVERSE);
1248 if (statusFFT != CUFFT_SUCCESS) {
1250 pyre::journal::error_t error(
"ampcor.cuda");
1253 << pyre::journal::at(__HERE__)
1254 <<
"while zooming the correlation hyper-matrix: executing the inverse FFT plan: error "
1256 << pyre::journal::endl;
1258 throw std::runtime_error(
"while executing the inverse FFT plan for the reference tiles");
1261 status = cudaDeviceSynchronize();
1263 if (status != cudaSuccess) {
1265 std::string description = cudaGetErrorName(status);
1267 pyre::journal::error_t channel(
"ampcor.cuda");
1270 << pyre::journal::at(__HERE__)
1271 <<
"while zooming the correlation matrix: STEP 2: " << description
1272 <<
" (" << status <<
")"
1273 << pyre::journal::endl;
1275 throw std::runtime_error(description);
1278 cufftDestroy(revPlan);
1281 auto zoomed = kernels::c2r(scratch, _pairs, zmdDim);
1292template <
typename raster_t>
1295_offsetField(
const int * coarse,
const int * fine) ->
const value_type *
1298 const auto Mb = 1.0 / (1024 * 1024);
1300 auto margin = (_tgtLayout.shape()[0] - _refLayout.shape()[0]) / 2;
1302 auto zoom = _refineFactor * _zoomFactor;
1305 auto cells = 2 * _pairs;
1307 auto footprint = cells *
sizeof(value_type);
1309 value_type * offsets =
nullptr;
1311 auto status = cudaMallocManaged(&offsets, footprint);
1313 if (status != cudaSuccess) {
1315 pyre::journal::error_t error(
"ampcor.cuda");
1318 << pyre::journal::at(__HERE__)
1319 <<
"while allocating " << footprint * Mb
1320 <<
"Mb of device memory for the offset field: "
1321 << cudaGetErrorName(status) <<
" (" << status <<
")"
1322 << pyre::journal::endl;
1324 throw std::bad_alloc();
1327 status = cudaMemset(offsets, footprint, 0);
1329 if (status != cudaSuccess) {
1331 std::string description = cudaGetErrorName(status);
1333 pyre::journal::error_t error(
"ampcor.cuda");
1336 << pyre::journal::at(__HERE__)
1337 <<
"while initializing " << footprint * Mb
1338 <<
" Mb of device memory for the offset field: "
1339 << description <<
" (" << status <<
")"
1340 << pyre::journal::endl;
1342 throw std::runtime_error(description);
1346 kernels::offsetField(coarse, fine, _pairs, margin, _refineMargin, zoom, offsets);
1349 status = cudaMemcpy(_offsets, offsets, footprint, cudaMemcpyDeviceToHost);
1351 if (status != cudaSuccess) {
1353 std::string description = cudaGetErrorName(status);
1355 pyre::journal::error_t error(
"ampcor.cuda");
1358 << pyre::journal::at(__HERE__)
1359 <<
"while harvesting the offset field from the device: "
1360 << description <<
" (" << status <<
")"
1361 << pyre::journal::endl;
1363 throw std::logic_error(description);
1375template <
typename raster_t>
1381 const auto Mb = 1.0 / 1024 / 1024;
1384 pyre::journal::debug_t channel(
"ampcor.cuda");
1387 value_type * zoomed =
nullptr;
1390 auto cshape = _corRefinedLayout.shape();
1392 auto zshape = _corZoomedLayout.shape();
1394 int corDim = cshape[0];
1396 int zmdDim = zshape[0];
1399 auto cells = zmdDim * (zmdDim/2 + 1);
1401 auto footprint = 2 * cells * _pairs *
sizeof(value_type);
1404 << pyre::journal::at(__HERE__)
1406 << _pairs <<
" hermitian " << zmdDim <<
"x" << zmdDim <<
" matrices with "
1407 << cells <<
" independent cells," << pyre::journal::newline
1408 <<
"for a total of " << footprint * Mb <<
" Mb for the zoomed correlation matrix"
1409 << pyre::journal::endl;
1411 auto status = cudaMallocManaged(&zoomed, footprint);
1413 if (status != cudaSuccess) {
1415 pyre::journal::error_t error(
"ampcor.cuda");
1418 << pyre::journal::at(__HERE__)
1419 <<
"while allocating " << footprint * Mb
1420 <<
" Mb of device memory for the zoomed correlation matrix"
1421 << cudaGetErrorName(status) <<
" (" << status <<
")"
1422 << pyre::journal::endl;
1424 throw std::bad_alloc();
1427 status = cudaMemset(zoomed, 0, footprint);
1429 if (status != cudaSuccess) {
1431 std::string description = cudaGetErrorName(status);
1433 pyre::journal::error_t error(
"ampcor.cuda");
1436 << pyre::journal::at(__HERE__)
1437 <<
"while initializing " << footprint * Mb
1438 <<
" Mb of device memory for the zoomed correlation matrix: "
1439 << description <<
" (" << status <<
")"
1440 << pyre::journal::endl;
1442 throw std::runtime_error(description);
1448 int fwdRanks[] = { corDim, corDim };
1450 int fwdIEmbed[] = { corDim, corDim };
1454 int fwdIDist = _corRefinedLayout.size();
1456 int fwdOEmbed[] = { zmdDim, zmdDim };
1460 int fwdODist = cells;
1463 cufftHandle fwdPlan;
1465 auto statusFFT = cufftPlanMany(&fwdPlan, dim, fwdRanks,
1466 fwdIEmbed, fwdIStride, fwdIDist,
1467 fwdOEmbed, fwdOStride, fwdODist,
1471 if (statusFFT != CUFFT_SUCCESS) {
1473 pyre::journal::error_t error(
"ampcor.cuda");
1476 << pyre::journal::at(__HERE__)
1477 <<
"while zooming the correlation surface: forward FFT plan: error " << statusFFT
1478 << pyre::journal::endl;
1480 throw std::runtime_error(
"while zooming the correlation hyper-matrix: forward plan error");
1484 << pyre::journal::at(__HERE__)
1485 <<
"zooming the correlation matrix: forward FFT"
1486 << pyre::journal::endl;
1488 statusFFT = cufftExecR2C(fwdPlan, gamma,
reinterpret_cast<cufftComplex *
>(zoomed));
1490 if (statusFFT != CUFFT_SUCCESS) {
1492 pyre::journal::error_t error(
"ampcor.cuda");
1495 << pyre::journal::at(__HERE__)
1496 <<
"while zooming the correlation hyper-matrix: executing the forward FFT plan: error "
1498 << pyre::journal::endl;
1500 throw std::runtime_error(
"while executing the forward FFT plan for the reference tiles");
1503 status = cudaDeviceSynchronize();
1505 if (status != cudaSuccess) {
1507 std::string description = cudaGetErrorName(status);
1509 pyre::journal::error_t channel(
"ampcor.cuda");
1512 << pyre::journal::at(__HERE__)
1513 <<
"while zooming the correlation matrix: STEP 1: " << description
1514 <<
" (" << status <<
")"
1515 << pyre::journal::endl;
1517 throw std::runtime_error(description);
1520 cufftDestroy(fwdPlan);
1523 int revRanks[] = { zmdDim, zmdDim };
1525 int revIEmbed[] = { zmdDim, zmdDim };
1529 int revIDist = cells;
1531 int revOEmbed[] = { zmdDim, zmdDim };
1535 int revODist = cells;
1538 cufftHandle revPlan;
1540 statusFFT = cufftPlanMany(&revPlan, dim, revRanks,
1541 revIEmbed, revIStride, revIDist,
1542 revOEmbed, revOStride, revODist,
1546 if (statusFFT != CUFFT_SUCCESS) {
1548 pyre::journal::error_t error(
"ampcor.cuda");
1551 << pyre::journal::at(__HERE__)
1552 <<
"while zooming the correlation surface: forward FFT plan: error " << statusFFT
1553 << pyre::journal::endl;
1555 throw std::runtime_error(
"while zooming the correlation hyper-matrix: forward plan error");
1559 << pyre::journal::at(__HERE__)
1560 <<
"zooming the correlation matrix: inverse FFT"
1561 << pyre::journal::endl;
1563 statusFFT = cufftExecC2R(revPlan,
reinterpret_cast<cufftComplex *
>(zoomed), zoomed);
1565 if (statusFFT != CUFFT_SUCCESS) {
1567 pyre::journal::error_t error(
"ampcor.cuda");
1570 << pyre::journal::at(__HERE__)
1571 <<
"while zooming the correlation hyper-matrix: executing the inverse FFT plan: error "
1573 << pyre::journal::endl;
1575 throw std::runtime_error(
"while executing the inverse FFT plan for the reference tiles");
1578 status = cudaDeviceSynchronize();
1580 if (status != cudaSuccess) {
1582 std::string description = cudaGetErrorName(status);
1584 pyre::journal::error_t channel(
"ampcor.cuda");
1587 << pyre::journal::at(__HERE__)
1588 <<
"while zooming the correlation matrix: STEP 2: " << description
1589 <<
" (" << status <<
")"
1590 << pyre::journal::endl;
1592 throw std::runtime_error(description);
1595 cufftDestroy(revPlan);
Definition Sequential.h:12