Changes

Kernal Blas

852 bytes removed, 12:41, 29 March 2018

→‎Assignment 2

__global__ void ~~cal_pi~~calculate(float *sum, int nbin, float step, int nthreads, int nblocks) {

int i;

float x;

int idx = blockIdx.x*blockDim.x + threadIdx.x; // Sequential thread index across the blocks

for (i = idx; i< nbin; i += nthreads*nblocks) {

x = (i + 0.5)*step;

sum[idx] += 4.0 / (1.0 + x*x);

}

</syntaxhighlight>

<br>

'''Main function

~~// Using CUDA device to calculate pi~~

~~#include <stdio.h>~~

~~#include <cuda.h>~~

~~#include <iostream>~~

~~#include <ctime>~~

~~#include <chrono>~~

~~#include <cstdlib>~~

~~using namespace std::chrono;~~

~~//#define NUM_BLOCK 30 // Number of thread blocks~~

~~const int ntpb = 8; // Number of threads per block~~

~~int tid;~~

~~float pi = 0;~~

~~// Kernel that executes on the CUDA device~~

~~__global__ void calculate(float *sum, int nbin, float step, int nthreads, int nblocks) {~~

~~int i;~~

~~float x;~~

~~int idx = blockIdx.x * blockDim.x + threadIdx.x; // Sequential thread index across the blocks~~

~~for (i = idx; i< nbin; i += nthreads*nblocks) {~~

~~x = (i + 0.5)*step;~~

~~sum[idx] += 4.0 / (1.0 + x*x);~~

}

~~void reportTime(const char* msg, steady_clock::duration span) {~~

~~auto ms = duration_cast<milliseconds>(span);~~

~~std::cout << msg << " - took - " <<~~

~~ms.count() << " millisecs" << std::endl;~~

}

// Main routine that executes on the host

int main(int argc, char** argv) {

Jpham14

96

edits

CDOT Wiki β

Changes

Kernal Blas

CDOT Wiki ^β