Open main menu

CDOT Wiki β

Changes

GPU621/NoName

163 bytes added, 11:38, 24 November 2016
OpenMP code
template <typename T, typename R, typename C, typename S>
int scan(
const T* in, // source data T* out, // output data int size, // size of source, output data sets R reduce, // reduction expression C combine, // combine expression S scan_fn, // scan function (exclusive or inclusive) T initial // initial value
)
{
/* int tile size = (n - 1)/ntiles + 1; reduced[tid] = reduce(in + tid * tilesize,itile == last_tile ? last_tile_size : tile_size, combine, T(0)); #pragma omp barrier #pragma omp single */ int nthreads = 1; if (size > 0) { // requested number of tiles int max_threads = omp_get_max_threads(); T* reduced = new T[max_threads]; T* scanRes = new T[max_threads];
#pragma omp parallel { int ntiles = omp_get_num_threads(); // Number of tiles int itile = omp_get_thread_num(); int tile_size = (size - 1) / ntiles + 1;
int last_tile = ntiles - 1;
int last_tile_size = size - last_tile * tile_size;
23
edits