isce3 0.25.0
Loading...
Searching...
No Matches
cudaUtil.h
Go to the documentation of this file.
1
9
10#ifndef __CUDAUTIL_H
11#define __CUDAUTIL_H
12
13// for 2D FFT
14#define NRANK 2
15
16//typical choices of number of threads in a block
17// for processing 1D and 2D arrays
18#define NTHREADS 512 //
19#define NTHREADS2D 16 //
20
21#define WARPSIZE 32
22#define MAXTHREADS 1024 //2048 for newer GPUs
23
24#ifdef __FERMI__ //2.0: M2090
25#define MAXBLOCKS 65535 //x
26#define MAXBLOCKS2 65535 //y,z
27#else //2.0 and above : K40, ...
28#define MAXBLOCKS 4294967295 //x
29#define MAXBLOCKS2 65535 //y,z
30#endif
31
32#define IDX2R(i,j,NJ) (((i)*(NJ))+(j)) //row-major order
33#define IDX2C(i,j,NI) (((j)*(NI))+(i)) //col-major order
34
35#define IDIVUP(i,j) ((i+j-1)/j)
36
37#define IMUL(a, b) __mul24(a, b)
38
39#ifndef MAX
40#define MAX(a,b) (a > b ? a : b)
41#endif
42
43#ifndef MIN
44#define MIN(a,b) (a > b ? b: a)
45#endif
46
47namespace isc3::matchtemplate::pycuampcor {
48
49// compute the next integer in power of 2
50inline int nextpower2(int value)
51{
52 int r=1;
53 while (r<value) r<<=1;
54 return r;
55}
56
57} // namespace
58
59#endif //__CUDAUTIL_H
60//end of file

Generated for ISCE3.0 by doxygen 1.13.2.