Open main menu

CDOT Wiki β

Changes

GPU621/NoName

203 bytes added, 18:59, 26 November 2016
OpenMP code
T* reduced = new T[max_threads];
T* scanRes = new T[max_threads];
  #pragma omp parallel { int ntiles = omp_get_num_threads(); // Number of tiles int itile = omp_get_thread_num(); int tile_size = (size - 1) / ntiles + 1; int last_tile = ntiles - 1; int last_tile_size = size - last_tile * tile_size; if (itile == 0) nthreads = ntiles; // step 1 - reduce each tile separately for (int itile = 0; itile < ntiles; itile++) reduced[itile] = reduce(in + itile * tile_size, itile == last_tile ? last_tile_size : tile_size, combine, T(0));  // step 2 - perform exclusive scan on all tiles using reduction outputs // store results in scanRes[] excl_scan(reduced, scanRes, ntiles, combine, T(0));  // step 3 - scan each tile separately using scanRes[] for (int itile = 0; itile < ntiles; itile++) scan_fn(in + itile * tile_size, out + itile * tile_size, itile == last_tile ? last_tile_size : tile_size, combine, scanRes[itile]); } delete[] reduced; delete[] scanRes; } return nthreads;
}
23
edits