isce3 0.25.0
Loading...
Searching...
No Matches
Sequential.icc
1// -*- C++ -*-
2// -*- coding: utf-8 -*-
3//
4// michael a.g. aïvázis <michael.aivazis@para-sim.com>
5// parasim
6// (c) 1998-2019 all rights reserved
7//
8
9// code guard
10#if !defined(ampcor_cuda_correlators_Sequential_icc)
11#error This header is an implementation detail of ampcor::cuda::correlators::Sequential
12#endif
13
14#include <cufft.h>
15
16// interface
17template <typename raster_t>
18void
20addReferenceTile(size_type pid, const constview_type & ref)
21{
22 // figure out the starting address of this tile in the arena
23 cell_type * support = _arena + pid*(_refCells + _tgtCells);
24 // adapt it into a grid
25 tile_type tile(_refLayout, support);
26 // move the data
27 std::copy(ref.begin(), ref.end(), tile.view().begin());
28 // all done
29 return;
30}
31
32
33template <typename raster_t>
34void
36addTargetTile(size_type pid, const constview_type & tgt)
37{
38 // figure out the starting address of this tile in the arena
39 cell_type * support = _arena + pid*(_refCells + _tgtCells) + _refCells;
40 // adapt it into a grid
41 tile_type tile(_tgtLayout, support);
42 // move the data
43 std::copy(tgt.begin(), tgt.end(), tile.view().begin());
44 // all done
45 return;
46}
47
48
49template <typename raster_t>
50auto
52adjust() -> const value_type *
53{
54 // make a channel
55 pyre::journal::debug_t channel("ampcor.cuda");
56
57 // compute the dimension of the reference tiles; assume they are square
58 auto refDim = _refLayout.shape()[0];
59 // compute the dimension of the target tiles; assume they are square
60 auto tgtDim = _tgtLayout.shape()[0];
61 // compute the dimension of the correlation matrix
62 auto corDim = _corLayout.shape()[0];
63
64 // prelude: coarse adjustments
65 // move the tiles to the device
66 auto coarseArena = _push();
67 // compute their amplitudes
68 auto amplitudes = _detect(coarseArena, refDim, tgtDim);
69 // adjust the reference tiles to zero mean and compute the variances
70 auto refStatistics = _refStats(amplitudes, refDim, tgtDim);
71 // compute the sum area tables for all possible search window placements within the target tile
72 auto sat = _sat(amplitudes, refDim, tgtDim);
73 // use the SATs to compute the mean amplitude of all possible window placements
74 auto tgtStatistics = _tgtStats(sat, refDim, tgtDim, corDim);
75 // compute the correlation hyper-surface
76 auto gamma = _correlate(amplitudes, refStatistics, tgtStatistics, refDim, tgtDim, corDim);
77 // find its maxima
78 auto maxcor = _maxcor(gamma, corDim);
79
80 // interlude: housekeeping
81 cudaFree(gamma);
82 cudaFree(tgtStatistics);
83 cudaFree(sat);
84 cudaFree(refStatistics);
85 cudaFree(amplitudes);
86
87 // refinement: refine the tiles by a factor and repeat the process with a narrower search
88 // window around the location of maximum correlation
89 // compute the dimension of the reference tiles; assume they are square
90 auto refRefinedDim = _refRefinedLayout.shape()[0];
91 // compute the dimension of the target tiles; assume they are square
92 auto tgtRefinedDim = _tgtRefinedLayout.shape()[0];
93 // compute the dimension of the correlation matrix
94 auto corRefinedDim = _corRefinedLayout.shape()[0];
95 // compute the dimension of the zoomed correlation matrix
96 auto corZoomedDim = _corZoomedLayout.shape()[0];
97 // ensure that the expanded target tiles where the correlation is maximum fit within the
98 // search window
99 _nudge(maxcor, refDim, tgtDim);
100 // allocate room for the refinement area
101 auto refinedArena = _refinedArena();
102 // refine the reference tiles
103 _refRefine(coarseArena, refinedArena);
104 // collect the expanded maxcor tiles and migrate them to our new arena
105 _tgtMigrate(coarseArena, maxcor, refinedArena);
106 // refine the expanded target tiles in place
107 _tgtRefine(refinedArena);
108
109 // interlude: housekeeping
110 cudaFree(coarseArena);
111
112 // deramp
113 _deramp(refinedArena);
114 // compute amplitudes
115 amplitudes = _detect(refinedArena, refRefinedDim, tgtRefinedDim);
116 // adjust the reference tiles to zero mean and compute the variances
117 refStatistics = _refStats(amplitudes, refRefinedDim, tgtRefinedDim);
118 // compute the sum area tables for all possible search window placements with the target tile
119 sat = _sat(amplitudes, refRefinedDim, tgtRefinedDim);
120 // compute the correlation hyper-surface
121 gamma = _correlate(amplitudes, refStatistics, tgtStatistics,
122 refRefinedDim, tgtRefinedDim, corRefinedDim);
123 // zoom in
124 auto zoomed = _zoomcor(gamma);
125 // find its maxima
126 auto maxcorZoomed = _maxcor(zoomed, corZoomedDim);
127
128 // compute the shifts and return them
129 auto offsets = _offsetField(maxcor, maxcorZoomed);
130
131 // clean up
132 cudaFree(maxcorZoomed);
133 cudaFree(maxcor);
134 cudaFree(gamma);
135 cudaFree(tgtStatistics);
136 cudaFree(sat);
137 cudaFree(refStatistics);
138 cudaFree(amplitudes);
139 cudaFree(refinedArena);
140
141 // all done
142 return offsets;
143}
144
145
146// accessors
147template <typename raster_t>
148auto
150arena() const -> const cell_type *
151{
152 return _arena;
153}
154
155
156template <typename raster_t>
157auto
159pairs() const -> size_type
160{
161 return _pairs;
162}
163
164
165// meta-methods
166template <typename raster_t>
168~Sequential() {
169 // release the host memory
170 delete [] _arena;
171 delete [] _offsets;
172}
173
174
175template <typename raster_t>
177Sequential(size_type pairs,
178 const layout_type & refLayout, const layout_type & tgtLayout,
179 size_type refineFactor, size_type refineMargin, size_type zoomFactor) :
180 _pairs{ pairs },
181 _refineFactor{ refineFactor },
182 _refineMargin{ refineMargin },
183 _zoomFactor{ zoomFactor },
184 _refLayout{ refLayout },
185 _tgtLayout{ tgtLayout },
186 _corLayout{ tgtLayout.shape() - refLayout.shape() + index_type::fill(1) },
187 _refRefinedLayout{ refineFactor * _refLayout.shape() },
188 _tgtRefinedLayout{ refineFactor * (_refLayout.shape() + index_type::fill(2*refineMargin)) },
189 _corRefinedLayout{ index_type::fill(2*refineFactor*refineMargin+1) },
190 _corZoomedLayout { zoomFactor * _corRefinedLayout.shape() },
191 _refCells{ _refLayout.size() },
192 _tgtCells{ _tgtLayout.size() },
193 _corCells{ _corLayout.size() },
194 _refRefinedCells{ _refRefinedLayout.size() },
195 _tgtRefinedCells{ _tgtRefinedLayout.size() },
196 _refFootprint{ _refCells * sizeof(cell_type) },
197 _tgtFootprint{ _tgtCells * sizeof(cell_type) },
198 _corFootprint{ _corCells * sizeof(value_type) }, // the correlation matrix is real...
199 _refRefinedFootprint{ _refRefinedCells * sizeof(cell_type) },
200 _tgtRefinedFootprint{ _tgtRefinedCells * sizeof(cell_type) },
201 _arena{ new cell_type[ _pairs * (_refCells+_tgtCells) ] },
202 _offsets{ new value_type[ 2 * _pairs ] }
203{
204 // compute the footprints
205 auto footprint = _pairs*(_refFootprint + _tgtFootprint);
206 auto refinedFootprint = _pairs*(_refRefinedFootprint + _tgtRefinedFootprint);
207 // make a channel
208 pyre::journal::debug_t channel("ampcor.cuda");
209 // show me
210 channel
211 << pyre::journal::at(__HERE__)
212 << "new Sequential worker:"
213 << pyre::journal::newline
214 << " pairs: " << _pairs
215 << pyre::journal::newline
216 << " ref shape: " << _refLayout << ", " << _refCells << " cells"
217 << pyre::journal::newline
218 << " tgt shape: " << _tgtLayout << ", " << _tgtCells << " cells"
219 << pyre::journal::newline
220 << " footprint: " << (_refCells+_tgtCells) << " cells in "
221 << (footprint/1024/1024) << " Mb"
222 << pyre::journal::newline
223 << " refine factor: " << refineFactor
224 << pyre::journal::newline
225 << " refine margin: " << refineMargin
226 << pyre::journal::newline
227 << " refined ref shape: " << _refRefinedLayout << ", " << _refRefinedCells << " cells"
228 << pyre::journal::newline
229 << " refined tgt shape: " << _tgtRefinedLayout << ", " << _tgtRefinedCells << " cells"
230 << pyre::journal::newline
231 << " footprint: " << (_refRefinedCells+_tgtRefinedCells) << " cells in "
232 << (refinedFootprint/1024/1024) << " Mb"
233 << pyre::journal::newline
234 << " arena: " << _arena
235 << pyre::journal::endl;
236}
237
238
239// debugging support
240template <typename raster_t>
241void
243dump() const
244{
245 // dump the arena as a sequence of reference and target tiles
246 pyre::journal::debug_t channel("ampcor.cuda");
247
248 // sign in
249 channel << pyre::journal::at(__HERE__);
250 // go through all the pairs
251 for (auto pid = 0; pid < _pairs; ++pid) {
252 // inject the pid
253 channel << "pid: " << pid << pyre::journal::newline;
254 // compute the address of the reference tile in the arena
255 cell_type * refLoc = _arena + pid*(_refCells + _tgtCells);
256 // adapt it into a grid
257 tile_type ref(_refLayout, refLoc);
258 // inject it
259 channel << "reference: " << pyre::journal::newline;
260 for (auto idx = 0; idx < _refLayout.shape()[0]; ++idx) {
261 for (auto jdx = 0; jdx < _refLayout.shape()[1]; ++jdx) {
262 channel << ref[{idx, jdx}] << " ";
263 }
264 channel << pyre::journal::newline;
265 }
266
267 // compute the address of the target tile in the arena
268 cell_type * tgtLoc = refLoc + _refCells;
269 // adapt it into a grid
270 tile_type tgt(_tgtLayout, tgtLoc);
271
272 // inject it
273 channel << "target: " << pyre::journal::newline;
274 for (auto idx = 0; idx < _tgtLayout.shape()[0]; ++idx) {
275 for (auto jdx = 0; jdx < _tgtLayout.shape()[1]; ++jdx) {
276 channel << tgt[{idx, jdx}] << " ";
277 }
278 channel << pyre::journal::newline;
279 }
280
281 }
282 // sing off
283 channel << pyre::journal::endl;
284
285 // all done
286 return;
287}
288
289
290// implementation details: methods
291
292// push the tiles to the device
293template <typename raster_t>
294auto
296_push() const -> cell_type *
297{
298 // grab a spot
299 cell_type * cArena = nullptr;
300 // compute the required size
301 auto footprint = _pairs * (_refFootprint + _tgtFootprint);
302 // allocate room for it
303 auto status = cudaMallocManaged(&cArena, footprint);
304 // if something went wrong
305 if (status != cudaSuccess) {
306 // make a channel
307 pyre::journal::error_t error("ampcor.cuda");
308 // complain
309 error
310 << pyre::journal::at(__HERE__)
311 << "while allocating " << 1.0*footprint/1024/1024
312 << "Mb of device memory for the input hyper-grid: "
313 << cudaGetErrorName(status) << " (" << status << ")"
314 << pyre::journal::endl;
315 // and bail
316 throw std::bad_alloc();
317 }
318
319 // move the data
320 status = cudaMemcpy(cArena, _arena, footprint, cudaMemcpyHostToDevice);
321 // if something went wrong
322 if (status != cudaSuccess) {
323 // build the error description
324 std::string description = cudaGetErrorName(status);
325 // make a channel
326 pyre::journal::error_t error("ampcor.cuda");
327 // complain
328 error
329 << pyre::journal::at(__HERE__)
330 << "while transferring tiles to the device: "
331 << description << " (" << status << ")"
332 << pyre::journal::endl;
333 // release the memory
334 cudaFree(cArena);
335 // and bail
336 throw std::logic_error(description);
337 }
338
339 // all done
340 return cArena;
341}
342
343// compute the amplitude of the signal
344template <typename raster_t>
345auto
347_detect(const cell_type * cArena, size_type refDim, size_type tgtDim) const -> value_type *
348{
349 // compute the size of a reference tile
350 auto refCells = refDim * refDim;
351 // compute the size of a target tile
352 auto tgtCells = tgtDim * tgtDim;
353
354 // find a spot on the device
355 value_type * rArena = nullptr;
356 // compute the number of cells whose amplitude we have to compute
357 auto cells = _pairs * (refCells + tgtCells);
358 // compute the required size
359 auto footprint = cells * sizeof(value_type);
360 // allocate room for it
361 auto status = cudaMallocManaged(&rArena, footprint);
362 // if something went wrong
363 if (status != cudaSuccess) {
364 // make a channel
365 pyre::journal::error_t error("ampcor.cuda");
366 // complain
367 error
368 << pyre::journal::at(__HERE__)
369 << "while allocating " << 1.0*footprint/1024/1024
370 << "Mb of device memory for the tile amplitudes: "
371 << cudaGetErrorName(status) << " (" << status << ")"
372 << pyre::journal::endl;
373 // and bail
374 throw std::bad_alloc();
375 }
376
377 // engage...
378 kernels::detect(cArena, cells, rArena);
379
380 // all done
381 return rArena;
382}
383
384// subtract the tile mean from each reference pixel
385template <typename raster_t>
386auto
388_refStats(value_type * rArena, size_type refDim, size_type tgtDim) const -> value_type *
389{
390 // compute the size of a reference tile
391 auto refCells = refDim * refDim;
392 // compute the size of a target tile
393 auto tgtCells = tgtDim * tgtDim;
394
395 // grab a spot
396 value_type * stats = nullptr;
397 // compute the amount of memory needed to store the variances of the reference tiles: one
398 // number per reference tile
399 auto footprint = _pairs * sizeof(value_type);
400 // allocate room
401 auto status = cudaMallocManaged(&stats, footprint);
402 // if something went wrong
403 if (status != cudaSuccess) {
404 // make a channel
405 pyre::journal::error_t error("ampcor.cuda");
406 // complain
407 error
408 << pyre::journal::at(__HERE__)
409 << "while allocating " << 1.0*footprint/1024/1024
410 << "Mb of device memory for the variances of the reference tiles: "
411 << cudaGetErrorName(status) << " (" << status << ")"
412 << pyre::journal::endl;
413 // and bail
414 throw std::bad_alloc();
415 }
416
417 // engage
418 kernels::refStats(rArena, _pairs, refDim, refCells + tgtCells, stats);
419
420 // all done
421 return stats;
422}
423
424// build the sum area tables for the target tiles
425template <typename raster_t>
426auto
428_sat(const value_type * rArena, size_type refDim, size_type tgtDim) const -> value_type *
429{
430 // make a channel
431 pyre::journal::debug_t channel("ampcor.cuda");
432
433 // compute the size of a reference tile
434 auto refCells = refDim * refDim;
435 // compute the size of a target tile
436 auto tgtCells = tgtDim * tgtDim;
437 // compute the memory footprint of a target tile
438 auto tgtFootprint = tgtCells * sizeof(value_type);
439
440 // grab a spot for the sat tables
441 value_type * sat = nullptr;
442 // compute the amount of memory needed to store them
443 auto footprint = _pairs * tgtFootprint;
444 // allocate memory
445 auto status = cudaMallocManaged(&sat, footprint);
446 // if something went wrong
447 if (status != cudaSuccess) {
448 // make a channel
449 pyre::journal::error_t error("ampcor.cuda");
450 // complain
451 error
452 << pyre::journal::at(__HERE__)
453 << "while allocating memory for the sum area tables: "
454 << cudaGetErrorName(status) << " (" << status << ")"
455 << pyre::journal::endl;
456 // and bail
457 throw std::bad_alloc();
458 }
459 // otherwise, show me
460 channel
461 << pyre::journal::at(__HERE__)
462 << "allocated an arena of " << footprint << " bytes for the sum area tables at "
463 << sat
464 << pyre::journal::endl;
465
466 // engage
467 kernels::sat(rArena, _pairs, refCells, tgtCells, tgtDim, sat);
468
469 // all done
470 return sat;
471}
472
473
474// compute the average values for all possible placements of the reference shape within the
475// target tile
476template <typename raster_t>
477auto
479_tgtStats(const value_type * dSAT,
480 size_type refDim, size_type tgtDim, size_type corDim) const -> value_type *
481{
482 // make a channel
483 pyre::journal::debug_t channel("ampcor.cuda");
484
485 // pick a spot for the table of amplitude averages
486 value_type * stats = nullptr;
487 // compute the amount of memory needed to store the target tile statistics: we store the
488 // mean per placement per tile
489 auto footprint = _pairs * corDim*corDim * sizeof(value_type);
490 // allocate memory on the device
491 auto status = cudaMallocManaged(&stats, footprint);
492 // if something went wrong
493 if (status != cudaSuccess) {
494 // get the error description
495 std::string description = cudaGetErrorName(status);
496 // make a channel
497 pyre::journal::error_t error("ampcor.cuda");
498 // complain
499 error
500 << pyre::journal::at(__HERE__)
501 << "while allocating device memory for the table of target amplitude averages: "
502 << description << " (" << status << ")"
503 << pyre::journal::endl;
504 // and bail
505 throw std::bad_alloc();
506 }
507 // show me
508 channel
509 << pyre::journal::at(__HERE__)
510 << "allocated an arena of " << footprint << " bytes for the target amplitude averages at "
511 << stats
512 << pyre::journal::endl;
513
514 // engage
515 kernels::tgtStats(dSAT, _pairs, refDim, tgtDim, corDim, stats);
516
517 // all done
518 return stats;
519}
520
521
522// compute the correlation surface
523template <typename raster_t>
524auto
526_correlate(const value_type * rArena,
527 const value_type * refStats, const value_type * tgtStats,
528 size_type refDim, size_type tgtDim, size_type corDim) const -> value_type *
529{
530 // make a channel
531 pyre::journal::debug_t channel("ampcor.cuda");
532
533 // compute the size of the reference tile
534 auto refCells = refDim * refDim;
535 // compute the size of the target tile
536 auto tgtCells = tgtDim * tgtDim;
537 // compute the size of the correlation matrix
538 auto corCells = corDim * corDim;
539
540 // pick a spot for the correlation matrix
541 value_type * dCorrelation = nullptr;
542 // compute the total number of cells in the amplitude hyper-grid
543 auto size = _pairs * corCells;
544 // and the amount of memory needed to store them
545 auto footprint = size * sizeof(value_type);
546 // allocate memory on the device
547 auto status = cudaMallocManaged(&dCorrelation, footprint);
548 // if something went wrong
549 if (status != cudaSuccess) {
550 // get the error description
551 std::string description = cudaGetErrorName(status);
552 // make a channel
553 pyre::journal::error_t error("ampcor.cuda");
554 // complain
555 error
556 << pyre::journal::at(__HERE__)
557 << "while allocating device memory for the correlation matrix: "
558 << description << " (" << status << ")"
559 << pyre::journal::endl;
560 // and bail
561 throw std::bad_alloc();
562 }
563 // show me
564 channel
565 << pyre::journal::at(__HERE__)
566 << "allocated " << footprint << " bytes for the correlation matrix at "
567 << dCorrelation
568 << pyre::journal::endl;
569
570 // engage
571 kernels::correlate(rArena, refStats, tgtStats,
572 _pairs,
573 refCells, tgtCells, corCells, refDim, tgtDim, corDim,
574 dCorrelation);
575
576 // all done
577 return dCorrelation;
578}
579
580
581// find the locations of the correlation maxima
582template <typename raster_t>
583auto
585_maxcor(const value_type * gamma, size_type corDim) const -> int *
586{
587 // make a channel
588 pyre::journal::debug_t channel("ampcor.cuda");
589
590 // compute the size of the correlation matrix
591 auto corCells = corDim * corDim;
592
593 // find a spot
594 int * loc = nullptr;
595 // compute the amount of memory we need: a (row, coal) locator for each tile pair
596 auto footprint = 2 * _pairs * sizeof(int);
597 // allocate memory on the device
598 auto status = cudaMallocManaged(&loc, footprint);
599 // if something went wrong
600 if (status != cudaSuccess) {
601 // get the error description
602 std::string description = cudaGetErrorName(status);
603 // make a channel
604 pyre::journal::error_t error("ampcor.cuda");
605 // complain
606 error
607 << pyre::journal::at(__HERE__)
608 << "while allocating device memory for the location of the correlation maxima: "
609 << description << " (" << status << ")"
610 << pyre::journal::endl;
611 // and bail
612 throw std::bad_alloc();
613 }
614 // show me
615 channel
616 << pyre::journal::at(__HERE__)
617 << "allocated " << footprint << " bytes for the locations of the correlation maxima at "
618 << loc
619 << pyre::journal::endl;
620
621 // engage
622 kernels::maxcor(gamma, _pairs, corCells, corDim, loc);
623
624 // all done
625 return loc;
626}
627
628
629// adjust the locations of the correlation maxima so that the new target tiles fit within the
630// search window
631template <typename raster_t>
632void
634_nudge(int * locations, size_type refDim, size_type tgtDim) const
635{
636 // make sure that all locations are adjusted so that they allow enough room for the
637 // {refineMargin} by moving the ULHC of the tiles so they fit
638 kernels::nudge(_pairs, refDim, tgtDim, _refineMargin, locations);
639
640 // all done
641 return;
642}
643
644
645// allocate room for the refined tiles
646template <typename raster_t>
647auto
649_refinedArena() const -> cell_type *
650{
651 // make a channel
652 pyre::journal::debug_t channel("ampcor.cuda");
653
654 // grab a spot
655 cell_type * arena = nullptr;
656 // compute the required size
657 auto footprint = _pairs * (_refRefinedFootprint + _tgtRefinedFootprint);
658 // allocate room for it
659 auto status = cudaMallocManaged(&arena, footprint);
660 // if something went wrong
661 if (status != cudaSuccess) {
662 // make a channel
663 pyre::journal::error_t error("ampcor.cuda");
664 // complain
665 error
666 << pyre::journal::at(__HERE__)
667 << "while allocating " << 1.0*footprint/1024/1024
668 << "Mb of device memory for the refined tile hyper-grid: "
669 << cudaGetErrorName(status) << " (" << status << ")"
670 << pyre::journal::endl;
671 // and bail
672 throw std::bad_alloc();
673 }
674
675 // show me
676 channel
677 << pyre::journal::at(__HERE__)
678 << "allocated a refined arena: " << footprint << " bytes at " << arena
679 << pyre::journal::endl;
680
681 // initialize the memory
682 status = cudaMemset(arena, 0, footprint);
683 // if something went wrong
684 if (status != cudaSuccess) {
685 // get the error description
686 std::string description = cudaGetErrorName(status);
687 // make a channel
688 pyre::journal::error_t error("ampcor.cuda");
689 // complain
690 error
691 << pyre::journal::at(__HERE__)
692 << "while initializing " << footprint
693 << " bytes of device memory for the refined tile hyper-grid: "
694 << description << " (" << status << ")"
695 << pyre::journal::endl;
696 // and bail
697 throw std::runtime_error(description);
698 }
699
700 // all done
701 return arena;
702}
703
704
705// refine the reference tiles
706template <typename raster_t>
707void
709_refRefine(cell_type * coarseArena, cell_type * refinedArena) const
710{
711 // make a channel
712 pyre::journal::debug_t channel("ampcor.cuda");
713
714 // get the shape the reference tile
715 auto rshape = _refLayout.shape();
716 // and the shape of the refined tile
717 auto tshape = _refRefinedLayout.shape();
718
719 // step 1: forward FFT from {coarseArena} to {refinedArena}
720 // the plan characteristics
721 int dim = 2;
722 int fwdRanks[] = { static_cast<int>(rshape[0]), static_cast<int>(rshape[1]) };
723 // the data layout of the coarse tiles
724 // the tile shape stays the same
725 int fwdIEmbed[] = { static_cast<int>(rshape[0]), static_cast<int>(rshape[1]) };
726 // the data is densely packed
727 int fwdIStride = 1;
728 // the distance between reference tiles
729 int fwdIDist = _refCells + _tgtCells;
730 // the data layout of the refined tiles
731 // the tile shape stays the same
732 int fwdOEmbed[] = { static_cast<int>(tshape[0]), static_cast<int>(tshape[1]) };
733 // the data is densely packed
734 int fwdOStride = 1;
735 // the distance between reference tiles in the refined arena
736 int fwdODist = _refRefinedCells + _tgtRefinedCells;
737
738 // grab a spot for a plan
739 cufftHandle fwdPlan;
740 // instantiate
741 auto status = cufftPlanMany(&fwdPlan, dim, fwdRanks,
742 fwdIEmbed, fwdIStride, fwdIDist,
743 fwdOEmbed, fwdOStride, fwdODist,
744 CUFFT_C2C,
745 _pairs
746 );
747 // if something went wrong
748 if (status != CUFFT_SUCCESS) {
749 // make a channel
750 pyre::journal::error_t error("ampcor.cuda");
751 // complain
752 error
753 << pyre::journal::at(__HERE__)
754 << "while refining the reference tiles: forward FFT plan: error " << status
755 << pyre::journal::endl;
756 // and bail
757 throw std::runtime_error("while refining the reference tiles: forward FFT plan error");
758 }
759
760 // execute the forward plan
761 status = cufftExecC2C(fwdPlan,
762 reinterpret_cast<cufftComplex *>(coarseArena),
763 reinterpret_cast<cufftComplex *>(refinedArena),
764 CUFFT_FORWARD);
765 // if something went wrong
766 if (status != CUFFT_SUCCESS) {
767 // make a channel
768 pyre::journal::error_t error("ampcor.cuda");
769 // complain
770 error
771 << pyre::journal::at(__HERE__)
772 << "while refining the reference tiles: executing the forward FFT plan: error "
773 << status
774 << pyre::journal::endl;
775 // and bail
776 throw std::runtime_error("while executing the forward FFT plan for the reference tiles");
777 }
778 // wait for the device to finish
779 auto code = cudaDeviceSynchronize();
780 // if something went wrong
781 if (code != cudaSuccess) {
782 // form the error description
783 std::string description = cudaGetErrorName(code);
784 // make a channel
785 pyre::journal::error_t channel("ampcor.cuda");
786 // complain
787 channel
788 << pyre::journal::at(__HERE__)
789 << "while refining reference tiles: STEP 1: " << description << " (" << code << ")"
790 << pyre::journal::endl;
791 // bail
792 throw std::runtime_error(description);
793 }
794 // clean up
795 cufftDestroy(fwdPlan);
796
797 // step 2: inverse FFT of the refined tiles back into the refined arena
798 // the plan characteristics
799 int revRanks[] = { static_cast<int>(tshape[0]), static_cast<int>(tshape[1]) };
800 // the data layout of the transformed reference tiles
801 int revIEmbed[] = { static_cast<int>(tshape[0]), static_cast<int>(tshape[1]) };
802 // the data is densely packed
803 int revIStride = 1;
804 // the distance between reference tiles
805 int revIDist = _refRefinedCells + _tgtRefinedCells;
806 // the inverse FFT tiles have identical layout
807 int revOEmbed[] = { static_cast<int>(tshape[0]), static_cast<int>(tshape[1]) };
808 // the data is densely packed
809 int revOStride = 1;
810 // the distance between reference tiles
811 int revODist = _refRefinedCells + _tgtRefinedCells;
812
813 // grab a spot
814 cufftHandle revPlan;
815 // instantiate the inverse plan
816 status = cufftPlanMany(&revPlan, dim, revRanks,
817 revIEmbed, revIStride, revIDist,
818 revOEmbed, revOStride, revODist,
819 CUFFT_C2C,
820 _pairs
821 );
822 // if something went wrong
823 if (status != CUFFT_SUCCESS) {
824 // make a channel
825 pyre::journal::error_t error("ampcor.cuda");
826 // complain
827 error
828 << pyre::journal::at(__HERE__)
829 << "while refining the reference tiles: inverse FFT plan: error " << status
830 << pyre::journal::endl;
831 // and bail
832 throw std::runtime_error("while refining the reference tiles: inverse FFT plan error");
833 }
834
835 // execute the inverse plan
836 status = cufftExecC2C(revPlan,
837 reinterpret_cast<cufftComplex *>(refinedArena),
838 reinterpret_cast<cufftComplex *>(refinedArena),
839 CUFFT_INVERSE);
840 // if something went wrong
841 if (status != CUFFT_SUCCESS) {
842 // make a channel
843 pyre::journal::error_t error("ampcor.cuda");
844 // complain
845 error
846 << pyre::journal::at(__HERE__)
847 << "while refining the reference tiles: executing the inverse FFT plan: error "
848 << status
849 << pyre::journal::endl;
850 // and bail
851 throw std::runtime_error("while executing the inverse FFT plan for the reference tiles");
852 }
853 // wait for the device to finish
854 code = cudaDeviceSynchronize();
855 // if something went wrong
856 if (code != cudaSuccess) {
857 // form the error description
858 std::string description = cudaGetErrorName(code);
859 // make a channel
860 pyre::journal::error_t channel("ampcor.cuda");
861 // complain
862 channel
863 << pyre::journal::at(__HERE__)
864 << "while refining reference tiles: STEP 2: " << description << " (" << code << ")"
865 << pyre::journal::endl;
866 // bail
867 throw std::runtime_error(description);
868 }
869
870 // clean up
871 cufftDestroy(revPlan);
872
873 // all done
874 return;
875}
876
877
878// migrate the expanded unrefined target tiles into the {refinedArena}
879template <typename raster_t>
880void
882_tgtMigrate(cell_type * coarseArena, int * locations, cell_type * refinedArena) const
883{
884 // make a channel
885 pyre::journal::debug_t channel("ampcor.cuda");
886
887 // the reference tile shape
888 auto refShape = _refLayout.shape();
889 // the target tile shape
890 auto tgtShape = _tgtLayout.shape();
891 // the refined reference tile shape
892 auto refRefinedShape = _refRefinedLayout.shape();
893 // the refined target tile shape
894 auto tgtRefinedShape = _tgtRefinedLayout.shape();
895
896 // unpack the dimensions
897 auto refDim = refShape[0];
898 auto tgtDim = tgtShape[0];
899 auto refRefinedDim = refRefinedShape[0];
900 auto tgtRefinedDim = tgtRefinedShape[0];
901
902 // compute the dimension of the expanded maxcor tile
903 auto expDim = refDim + 2 * _refineMargin;
904
905 // engage...
906 kernels::migrate(coarseArena, _pairs,
907 refDim, tgtDim, expDim,
908 refRefinedDim, tgtRefinedDim,
909 locations,
910 refinedArena);
911
912 // all done
913 return;
914}
915
916
917// refine the target tiles around the locations of the correlation maxima
918template <typename raster_t>
919void
921_tgtRefine(cell_type * refinedArena) const
922{
923 // N.B.: assumes {locations} are already nudged and on the CPU...
924 // make a channel
925 pyre::journal::debug_t channel("ampcor.cuda");
926
927 // the shape of the target tile
928 auto tgtShape = _tgtLayout.shape();
929 // the shape the refined target tile
930 auto tgtRefShape = _tgtRefinedLayout.shape();
931 // the shape of the expanded target tile
932 auto expShape = _refLayout.shape() + index_type::fill(2*_refineMargin);
933
934 // N.B.: the expanded maxcor target tiles are expected to have already been moved to the
935 // refined arena after the maxcor locations were determined
936
937 // step 1: forward FFT in place in {refinedArena}
938 // the plan characteristics
939 int dim = 2;
940 // use the shape of the expanded target tile
941 int fwdRanks[] = { static_cast<int>(expShape[0]), static_cast<int>(expShape[1]) };
942 // this tile is already occupying its destination location in {refinedArena}
943 int fwdIEmbed[] = { static_cast<int>(tgtRefShape[0]), static_cast<int>(tgtRefShape[1]) };
944 // the data is dense
945 int fwdIStride = 1;
946 // the distance between tiles
947 int fwdIDist = _refRefinedCells + _tgtRefinedCells;
948 // the destination of the forward FFT has identical layout
949 int fwdOEmbed[] = { static_cast<int>(tgtRefShape[0]), static_cast<int>(tgtRefShape[1]) };
950 // the data is dense
951 int fwdOStride = 1;
952 // the distance between tiles
953 int fwdODist = _refRefinedCells + _tgtRefinedCells;
954
955 // grab a spot for a plan
956 cufftHandle fwdPlan;
957 // instantiate
958 auto status = cufftPlanMany(&fwdPlan, dim, fwdRanks,
959 fwdIEmbed, fwdIStride, fwdIDist,
960 fwdOEmbed, fwdOStride, fwdODist,
961 CUFFT_C2C,
962 _pairs
963 );
964 // if something went wrong
965 if (status != CUFFT_SUCCESS) {
966 // make a channel
967 pyre::journal::error_t error("ampcor.cuda");
968 // complain
969 error
970 << pyre::journal::at(__HERE__)
971 << "while refining the target tiles: forward FFT plan: error " << status
972 << pyre::journal::endl;
973 // and bail
974 throw std::runtime_error("while refining the target tiles: forward FFT plan error");
975 }
976
977 // the address of the first expanded maxcor tile
978 auto firstTile = reinterpret_cast<cufftComplex *>(refinedArena + _refRefinedCells);
979 // execute the forward plan
980 status = cufftExecC2C(fwdPlan, firstTile, firstTile, CUFFT_FORWARD);
981 // if something went wrong
982 if (status != CUFFT_SUCCESS) {
983 // make a channel
984 pyre::journal::error_t error("ampcor.cuda");
985 // complain
986 error
987 << pyre::journal::at(__HERE__)
988 << "while refining the target tiles: executing the forward FFT plan: error "
989 << status
990 << pyre::journal::endl;
991 // and bail
992 throw std::runtime_error("while executing the forward FFT plan for the reference tiles");
993 }
994 // wait for the device to finish
995 auto code = cudaDeviceSynchronize();
996 // if something went wrong
997 if (code != cudaSuccess) {
998 // form the error description
999 std::string description = cudaGetErrorName(code);
1000 // make a channel
1001 pyre::journal::error_t channel("ampcor.cuda");
1002 // complain
1003 channel
1004 << pyre::journal::at(__HERE__)
1005 << "while refining target tiles: STEP 1: " << description << " (" << code << ")"
1006 << pyre::journal::endl;
1007 // bail
1008 throw std::runtime_error(description);
1009 }
1010 // clean up the forward plan
1011 cufftDestroy(fwdPlan);
1012
1013 // step 2: inverse FFT of the refined tiles back into the refined arena
1014 // the plan characteristics
1015 int revRanks[] = { static_cast<int>(tgtRefShape[0]), static_cast<int>(tgtRefShape[1]) };
1016
1017 // the data layout of the transformed reference tiles
1018 int revIEmbed[] = { static_cast<int>(tgtRefShape[0]), static_cast<int>(tgtRefShape[1]) };
1019 // the data is densely packed
1020 int revIStride = 1;
1021 // the distance between reference tiles
1022 int revIDist = _refRefinedCells + _tgtRefinedCells;
1023 // the inverse FFT tiles have identical layout
1024 int revOEmbed[] = { static_cast<int>(tgtRefShape[0]), static_cast<int>(tgtRefShape[1]) };
1025 // the data is densely packed
1026 int revOStride = 1;
1027 // the distance between reference tiles
1028 int revODist = _refRefinedCells + _tgtRefinedCells;
1029
1030 // grab a spot
1031 cufftHandle revPlan;
1032 // instantiate the inverse plan
1033 status = cufftPlanMany(&revPlan, dim, revRanks,
1034 revIEmbed, revIStride, revIDist,
1035 revOEmbed, revOStride, revODist,
1036 CUFFT_C2C,
1037 _pairs
1038 );
1039 // if something went wrong
1040 if (status != CUFFT_SUCCESS) {
1041 // make a channel
1042 pyre::journal::error_t error("ampcor.cuda");
1043 // complain
1044 error
1045 << pyre::journal::at(__HERE__)
1046 << "while refining the target tiles: inverse FFT plan: error " << status
1047 << pyre::journal::endl;
1048 // and bail
1049 throw std::runtime_error("while refining the target tiles: inverse FFT plan error");
1050 }
1051
1052 // execute the inverse plan
1053 status = cufftExecC2C(revPlan, firstTile, firstTile, CUFFT_FORWARD);
1054 // if something went wrong
1055 if (status != CUFFT_SUCCESS) {
1056 // make a channel
1057 pyre::journal::error_t error("ampcor.cuda");
1058 // complain
1059 error
1060 << pyre::journal::at(__HERE__)
1061 << "while refining the target tiles: executing the inverse FFT plan: error "
1062 << status
1063 << pyre::journal::endl;
1064 // and bail
1065 throw std::runtime_error("while executing the inverse FFT plan for the target tiles");
1066 }
1067 // wait for the device to finish
1068 code = cudaDeviceSynchronize();
1069 // if something went wrong
1070 if (code != cudaSuccess) {
1071 // form the error description
1072 std::string description = cudaGetErrorName(code);
1073 // make a channel
1074 pyre::journal::error_t channel("ampcor.cuda");
1075 // complain
1076 channel
1077 << pyre::journal::at(__HERE__)
1078 << "while refining target tiles: STEP 2: " << description << " (" << code << ")"
1079 << pyre::journal::endl;
1080 // bail
1081 throw std::runtime_error(description);
1082 }
1083
1084 // clean up
1085 cufftDestroy(revPlan);
1086
1087 // all done
1088 return;
1089}
1090
1091
1092// refine the target tiles around the locations of the correlation maxima
1093template <typename raster_t>
1094void
1096_deramp(cell_type * arena) const
1097{
1098 // all done
1099 return;
1100}
1101
1102
1103// zoom the correlation matrix
1104template <typename raster_t>
1105auto
1107_zoomcor(value_type * gamma) const -> value_type *
1108{
1109 // constants
1110 const auto Mb = 1.0 / 1024 / 1024;
1111 // make a channel
1112 pyre::journal::debug_t channel("ampcor.cuda");
1113
1114 // get the shape of the incoming correlation matrix
1115 auto corShape = _corRefinedLayout.shape();
1116 // extract the dimension
1117 int corDim = corShape[0];
1118 // get the shape of the zoomed correlation matrix
1119 auto zmdShape = _corZoomedLayout.shape();
1120 // compute the number if cells in each zoomed correlation matrix
1121 auto zmdCells = _corZoomedLayout.size();
1122 // extract the dimension
1123 int zmdDim = zmdShape[0];
1124
1125 // step 1: up-cast and embed
1126 auto scratch = kernels::r2c(gamma, _pairs, corDim, zmdDim);
1127
1128 // step 2: forward FFT from the incoming gamma to the zoomed gamma
1129 // the plan characteristics
1130 int dim = 2;
1131 int fwdRanks[] = { corDim, corDim };
1132 // the input layout
1133 int fwdIEmbed[] = { zmdDim, zmdDim };
1134 // the data is densely packed
1135 int fwdIStride = 1;
1136 // the distance between correlation matrices
1137 int fwdIDist = zmdCells;
1138 // the output layout
1139 int fwdOEmbed[] = { zmdDim, zmdDim };
1140 // the data is densely packed
1141 int fwdOStride = 1;
1142 // the distance between successive correlation matrices
1143 int fwdODist = zmdCells;
1144
1145 // grab a spot for the forward plan
1146 cufftHandle fwdPlan;
1147 // instantiate it
1148 auto statusFFT = cufftPlanMany(&fwdPlan, dim, fwdRanks,
1149 fwdIEmbed, fwdIStride, fwdIDist,
1150 fwdOEmbed, fwdOStride, fwdODist,
1151 CUFFT_C2C,
1152 _pairs);
1153 // if something went wrong
1154 if (statusFFT != CUFFT_SUCCESS) {
1155 // make a channel
1156 pyre::journal::error_t error("ampcor.cuda");
1157 // complain
1158 error
1159 << pyre::journal::at(__HERE__)
1160 << "while zooming the correlation surface: forward FFT plan: error " << statusFFT
1161 << pyre::journal::endl;
1162 // and bail
1163 throw std::runtime_error("while zooming the correlation hyper-matrix: forward plan error");
1164 }
1165 // show me
1166 channel
1167 << pyre::journal::at(__HERE__)
1168 << "zooming the correlation matrix: forward FFT"
1169 << pyre::journal::endl;
1170 // execute the forward plan
1171 statusFFT = cufftExecC2C(fwdPlan, scratch, scratch, CUFFT_FORWARD);
1172 // if something went wrong
1173 if (statusFFT != CUFFT_SUCCESS) {
1174 // make a channel
1175 pyre::journal::error_t error("ampcor.cuda");
1176 // complain
1177 error
1178 << pyre::journal::at(__HERE__)
1179 << "while zooming the correlation hyper-matrix: executing the forward FFT plan: error "
1180 << statusFFT
1181 << pyre::journal::endl;
1182 // and bail
1183 throw std::runtime_error("while executing the forward FFT plan for the reference tiles");
1184 }
1185 // wait for the device to finish
1186 auto status = cudaDeviceSynchronize();
1187 // if something went wrong
1188 if (status != cudaSuccess) {
1189 // form the error description
1190 std::string description = cudaGetErrorName(status);
1191 // make a channel
1192 pyre::journal::error_t channel("ampcor.cuda");
1193 // complain
1194 channel
1195 << pyre::journal::at(__HERE__)
1196 << "while zooming the correlation matrix: STEP 1: " << description
1197 << " (" << status << ")"
1198 << pyre::journal::endl;
1199 // bail
1200 throw std::runtime_error(description);
1201 }
1202 // clean up
1203 cufftDestroy(fwdPlan);
1204
1205 // step 3: inverse FFT of the zero-extended zoomed gamma
1206 int revRanks[] = { zmdDim, zmdDim };
1207 // the input layout
1208 int revIEmbed[] = { zmdDim, zmdDim };
1209 // the data is densely packed
1210 int revIStride = 1;
1211 // the distance between correlation matrices
1212 int revIDist = zmdCells;
1213 // the output layout
1214 int revOEmbed[] = { zmdDim, zmdDim };
1215 // the data is densely packed
1216 int revOStride = 1;
1217 // the distance between successive correlation matrices
1218 int revODist = zmdCells;
1219
1220 // grab a spot for the forward plan
1221 cufftHandle revPlan;
1222 // instantiate it
1223 statusFFT = cufftPlanMany(&revPlan, dim, revRanks,
1224 revIEmbed, revIStride, revIDist,
1225 revOEmbed, revOStride, revODist,
1226 CUFFT_C2C,
1227 _pairs);
1228 // if something went wrong
1229 if (statusFFT != CUFFT_SUCCESS) {
1230 // make a channel
1231 pyre::journal::error_t error("ampcor.cuda");
1232 // complain
1233 error
1234 << pyre::journal::at(__HERE__)
1235 << "while zooming the correlation surface: forward FFT plan: error " << statusFFT
1236 << pyre::journal::endl;
1237 // and bail
1238 throw std::runtime_error("while zooming the correlation hyper-matrix: forward plan error");
1239 }
1240 // show me
1241 channel
1242 << pyre::journal::at(__HERE__)
1243 << "zooming the correlation matrix: inverse FFT"
1244 << pyre::journal::endl;
1245 // execute the inverse plan
1246 statusFFT = cufftExecC2C(revPlan, scratch, scratch, CUFFT_INVERSE);
1247 // if something went wrong
1248 if (statusFFT != CUFFT_SUCCESS) {
1249 // make a channel
1250 pyre::journal::error_t error("ampcor.cuda");
1251 // complain
1252 error
1253 << pyre::journal::at(__HERE__)
1254 << "while zooming the correlation hyper-matrix: executing the inverse FFT plan: error "
1255 << statusFFT
1256 << pyre::journal::endl;
1257 // and bail
1258 throw std::runtime_error("while executing the inverse FFT plan for the reference tiles");
1259 }
1260 // wait for the device to finish
1261 status = cudaDeviceSynchronize();
1262 // if something went wrong
1263 if (status != cudaSuccess) {
1264 // form the error description
1265 std::string description = cudaGetErrorName(status);
1266 // make a channel
1267 pyre::journal::error_t channel("ampcor.cuda");
1268 // complain
1269 channel
1270 << pyre::journal::at(__HERE__)
1271 << "while zooming the correlation matrix: STEP 2: " << description
1272 << " (" << status << ")"
1273 << pyre::journal::endl;
1274 // bail
1275 throw std::runtime_error(description);
1276 }
1277 // clean up
1278 cufftDestroy(revPlan);
1279
1280 // down-cast
1281 auto zoomed = kernels::c2r(scratch, _pairs, zmdDim);
1282
1283 // clean up
1284 cudaFree(scratch);
1285
1286 // all done
1287 return zoomed;
1288}
1289
1290
1291// assemble the offsets
1292template <typename raster_t>
1293auto
1295_offsetField(const int * coarse, const int * fine) -> const value_type *
1296{
1297 // constants
1298 const auto Mb = 1.0 / (1024 * 1024);
1299 // compute the search margin
1300 auto margin = (_tgtLayout.shape()[0] - _refLayout.shape()[0]) / 2;
1301 // compute the overall zoom factor
1302 auto zoom = _refineFactor * _zoomFactor;
1303
1304 // compute the number of cells in the offset field
1305 auto cells = 2 * _pairs;
1306 // and its memory footprint
1307 auto footprint = cells * sizeof(value_type);
1308 // grab a spot
1309 value_type * offsets = nullptr;
1310 // allocate memory on the device for the answers
1311 auto status = cudaMallocManaged(&offsets, footprint);
1312 // if something went wrong
1313 if (status != cudaSuccess) {
1314 // make a channel
1315 pyre::journal::error_t error("ampcor.cuda");
1316 // complain
1317 error
1318 << pyre::journal::at(__HERE__)
1319 << "while allocating " << footprint * Mb
1320 << "Mb of device memory for the offset field: "
1321 << cudaGetErrorName(status) << " (" << status << ")"
1322 << pyre::journal::endl;
1323 // and bail
1324 throw std::bad_alloc();
1325 }
1326 // zero it out
1327 status = cudaMemset(offsets, footprint, 0);
1328 // if something went wrong
1329 if (status != cudaSuccess) {
1330 // get the error description
1331 std::string description = cudaGetErrorName(status);
1332 // make a channel
1333 pyre::journal::error_t error("ampcor.cuda");
1334 // complain
1335 error
1336 << pyre::journal::at(__HERE__)
1337 << "while initializing " << footprint * Mb
1338 << " Mb of device memory for the offset field: "
1339 << description << " (" << status << ")"
1340 << pyre::journal::endl;
1341 // and bail
1342 throw std::runtime_error(description);
1343 }
1344
1345 // launch the kernel that does the work
1346 kernels::offsetField(coarse, fine, _pairs, margin, _refineMargin, zoom, offsets);
1347
1348 // harvest the data
1349 status = cudaMemcpy(_offsets, offsets, footprint, cudaMemcpyDeviceToHost);
1350 // if something went wrong
1351 if (status != cudaSuccess) {
1352 // build the error description
1353 std::string description = cudaGetErrorName(status);
1354 // make a channel
1355 pyre::journal::error_t error("ampcor.cuda");
1356 // complain
1357 error
1358 << pyre::journal::at(__HERE__)
1359 << "while harvesting the offset field from the device: "
1360 << description << " (" << status << ")"
1361 << pyre::journal::endl;
1362 // and bail
1363 throw std::logic_error(description);
1364 }
1365
1366 // clean up
1367 cudaFree(offsets);
1368
1369 // all done
1370 return _offsets;
1371}
1372
1373
1374// zoom the correlation matrix
1375template <typename raster_t>
1376auto
1378_zoomcor_r2r(value_type * gamma) const -> value_type *
1379{
1380 // constants
1381 const auto Mb = 1.0 / 1024 / 1024;
1382
1383 // make a channel
1384 pyre::journal::debug_t channel("ampcor.cuda");
1385
1386 // grab a spot for the zoomed correlation matrix
1387 value_type * zoomed = nullptr;
1388
1389 // get the shape for the incoming correlation matrix
1390 auto cshape = _corRefinedLayout.shape();
1391 // and the zoomed one
1392 auto zshape = _corZoomedLayout.shape();
1393 // get the dimension of the refined correlation matrix
1394 int corDim = cshape[0];
1395 // get the dimension of the zoomed correlation matrix
1396 int zmdDim = zshape[0];
1397
1398 // the non-redundant number of cells in the hermitian transform
1399 auto cells = zmdDim * (zmdDim/2 + 1);
1400 // compute the amount of memory needed, including the required padding for C2R
1401 auto footprint = 2 * cells * _pairs * sizeof(value_type);
1402 // show me
1403 channel
1404 << pyre::journal::at(__HERE__)
1405 << "allocating "
1406 << _pairs << " hermitian " << zmdDim << "x" << zmdDim << " matrices with "
1407 << cells << " independent cells," << pyre::journal::newline
1408 << "for a total of " << footprint * Mb << " Mb for the zoomed correlation matrix"
1409 << pyre::journal::endl;
1410 // allocate memory for it
1411 auto status = cudaMallocManaged(&zoomed, footprint);
1412 // if something went wrong
1413 if (status != cudaSuccess) {
1414 // make a channel
1415 pyre::journal::error_t error("ampcor.cuda");
1416 // complain
1417 error
1418 << pyre::journal::at(__HERE__)
1419 << "while allocating " << footprint * Mb
1420 << " Mb of device memory for the zoomed correlation matrix"
1421 << cudaGetErrorName(status) << " (" << status << ")"
1422 << pyre::journal::endl;
1423 // and bail
1424 throw std::bad_alloc();
1425 }
1426 // initialize the memory
1427 status = cudaMemset(zoomed, 0, footprint);
1428 // if something went wrong
1429 if (status != cudaSuccess) {
1430 // get the error description
1431 std::string description = cudaGetErrorName(status);
1432 // make a channel
1433 pyre::journal::error_t error("ampcor.cuda");
1434 // complain
1435 error
1436 << pyre::journal::at(__HERE__)
1437 << "while initializing " << footprint * Mb
1438 << " Mb of device memory for the zoomed correlation matrix: "
1439 << description << " (" << status << ")"
1440 << pyre::journal::endl;
1441 // and bail
1442 throw std::runtime_error(description);
1443 }
1444
1445 // step 1: forward FFT from the incoming gamma to the zoomed gamma
1446 // the plan characteristics
1447 int dim = 2;
1448 int fwdRanks[] = { corDim, corDim };
1449 // the input layout
1450 int fwdIEmbed[] = { corDim, corDim };
1451 // the data is densely packed
1452 int fwdIStride = 1;
1453 // the distance between correlation matrices
1454 int fwdIDist = _corRefinedLayout.size();
1455 // the output layout
1456 int fwdOEmbed[] = { zmdDim, zmdDim };
1457 // the data is densely packed
1458 int fwdOStride = 1;
1459 // the distance between successive correlation matrices
1460 int fwdODist = cells;
1461
1462 // grab a spot for the forward plan
1463 cufftHandle fwdPlan;
1464 // instantiate it
1465 auto statusFFT = cufftPlanMany(&fwdPlan, dim, fwdRanks,
1466 fwdIEmbed, fwdIStride, fwdIDist,
1467 fwdOEmbed, fwdOStride, fwdODist,
1468 CUFFT_R2C,
1469 _pairs);
1470 // if something went wrong
1471 if (statusFFT != CUFFT_SUCCESS) {
1472 // make a channel
1473 pyre::journal::error_t error("ampcor.cuda");
1474 // complain
1475 error
1476 << pyre::journal::at(__HERE__)
1477 << "while zooming the correlation surface: forward FFT plan: error " << statusFFT
1478 << pyre::journal::endl;
1479 // and bail
1480 throw std::runtime_error("while zooming the correlation hyper-matrix: forward plan error");
1481 }
1482 // show me
1483 channel
1484 << pyre::journal::at(__HERE__)
1485 << "zooming the correlation matrix: forward FFT"
1486 << pyre::journal::endl;
1487 // execute the forward plan
1488 statusFFT = cufftExecR2C(fwdPlan, gamma, reinterpret_cast<cufftComplex *>(zoomed));
1489 // if something went wrong
1490 if (statusFFT != CUFFT_SUCCESS) {
1491 // make a channel
1492 pyre::journal::error_t error("ampcor.cuda");
1493 // complain
1494 error
1495 << pyre::journal::at(__HERE__)
1496 << "while zooming the correlation hyper-matrix: executing the forward FFT plan: error "
1497 << statusFFT
1498 << pyre::journal::endl;
1499 // and bail
1500 throw std::runtime_error("while executing the forward FFT plan for the reference tiles");
1501 }
1502 // wait for the device to finish
1503 status = cudaDeviceSynchronize();
1504 // if something went wrong
1505 if (status != cudaSuccess) {
1506 // form the error description
1507 std::string description = cudaGetErrorName(status);
1508 // make a channel
1509 pyre::journal::error_t channel("ampcor.cuda");
1510 // complain
1511 channel
1512 << pyre::journal::at(__HERE__)
1513 << "while zooming the correlation matrix: STEP 1: " << description
1514 << " (" << status << ")"
1515 << pyre::journal::endl;
1516 // bail
1517 throw std::runtime_error(description);
1518 }
1519 // clean up
1520 cufftDestroy(fwdPlan);
1521
1522 // step 2: inverse FFT of the zero-extended zoomed gamma
1523 int revRanks[] = { zmdDim, zmdDim };
1524 // the input layout
1525 int revIEmbed[] = { zmdDim, zmdDim };
1526 // the data is densely packed
1527 int revIStride = 1;
1528 // the distance between correlation matrices
1529 int revIDist = cells;
1530 // the output layout
1531 int revOEmbed[] = { zmdDim, zmdDim };
1532 // the data is densely packed
1533 int revOStride = 1;
1534 // the distance between successive correlation matrices
1535 int revODist = cells;
1536
1537 // grab a spot for the forward plan
1538 cufftHandle revPlan;
1539 // instantiate it
1540 statusFFT = cufftPlanMany(&revPlan, dim, revRanks,
1541 revIEmbed, revIStride, revIDist,
1542 revOEmbed, revOStride, revODist,
1543 CUFFT_C2R,
1544 _pairs);
1545 // if something went wrong
1546 if (statusFFT != CUFFT_SUCCESS) {
1547 // make a channel
1548 pyre::journal::error_t error("ampcor.cuda");
1549 // complain
1550 error
1551 << pyre::journal::at(__HERE__)
1552 << "while zooming the correlation surface: forward FFT plan: error " << statusFFT
1553 << pyre::journal::endl;
1554 // and bail
1555 throw std::runtime_error("while zooming the correlation hyper-matrix: forward plan error");
1556 }
1557 // show me
1558 channel
1559 << pyre::journal::at(__HERE__)
1560 << "zooming the correlation matrix: inverse FFT"
1561 << pyre::journal::endl;
1562 // execute the inverse plan
1563 statusFFT = cufftExecC2R(revPlan, reinterpret_cast<cufftComplex *>(zoomed), zoomed);
1564 // if something went wrong
1565 if (statusFFT != CUFFT_SUCCESS) {
1566 // make a channel
1567 pyre::journal::error_t error("ampcor.cuda");
1568 // complain
1569 error
1570 << pyre::journal::at(__HERE__)
1571 << "while zooming the correlation hyper-matrix: executing the inverse FFT plan: error "
1572 << statusFFT
1573 << pyre::journal::endl;
1574 // and bail
1575 throw std::runtime_error("while executing the inverse FFT plan for the reference tiles");
1576 }
1577 // wait for the device to finish
1578 status = cudaDeviceSynchronize();
1579 // if something went wrong
1580 if (status != cudaSuccess) {
1581 // form the error description
1582 std::string description = cudaGetErrorName(status);
1583 // make a channel
1584 pyre::journal::error_t channel("ampcor.cuda");
1585 // complain
1586 channel
1587 << pyre::journal::at(__HERE__)
1588 << "while zooming the correlation matrix: STEP 2: " << description
1589 << " (" << status << ")"
1590 << pyre::journal::endl;
1591 // bail
1592 throw std::runtime_error(description);
1593 }
1594 // clean up
1595 cufftDestroy(revPlan);
1596
1597 // all done
1598 return zoomed;
1599}
1600
1601
1602// end of file
Definition Sequential.h:12

Generated for ISCE3.0 by doxygen 1.13.2.