__global__
void stencil_kernel(int N, const float * x, float * y)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
if (i < N && j < N)
{
int index = N * i + j;
float result = 4.0f * x[index];
if (i > 0 ) result -= x[index - N];
if (i < N - 1) result -= x[index + N];
if (j > 0 ) result -= x[index - 1];
if (j < N - 1) result -= x[index + 1];
y[N * i + j] = result;
}
}
{
public:
int N;
stencil(int N)
: super(N*N,N*N), N(N) {}
template <typename VectorType1,
typename VectorType2>
void operator()(const VectorType1& x, VectorType2& y) const
{
const float * x_ptr = thrust::raw_pointer_cast(&x[0]);
float * y_ptr = thrust::raw_pointer_cast(&y[0]);
dim3 dimBlock(16,16);
dim3 dimGrid((N + 15) / 16, (N + 15) / 16);
stencil_kernel<<<dimGrid,dimBlock>>>(N, x_ptr, y_ptr);
}
};
int main(void)
{
const int N = 10;
stencil A(N);
return 0;
}