GPU610/DPS915 Student Resources
GPU610/DPS915 | Student List | Group and Project Index | Student Resources | Glossary
The purpose of this page is to share useful information that can help groups with their CUDA projects.
Contents
CUDA Enabled Cards
Workshop Notes
BLAS Documentation
See the BLAS Documentation Page
Getting Started on Mac
http://developer.download.nvidia.com/compute/DevZone/docs/html/C/doc/CUDA_Getting_Started_Mac.pdf
http://developer.nvidia.com/cuda/cuda-downloads
Makefile Documentation
See the Makefile Documentation Page
Troubleshooting
Problem with CUDA driver version 5.0.24 on MacBook Pro 2012 Fix
Ubuntu 12.04 LTS and CUDA 5 Toolkit Installation Guide
See the guide here; work in progress
Fortran to C
Sample code from the TOMO project - converted by James Boelen, Raymong Hung, and Stanley Tsang
Original Fortran Subroutine
SUBROUTINE longtrack_self(direction,nrep,yp,xp,turnnow) !------------------------------------------------------------------------- ! h: principal harmonic number ! eta0: phase slip factor ! E0: energy of synchronous particle m ! beta0: relativistic beta of synchronous particle ! phi0: synchronous phase ! q: charge state of particles ! dphi: phase difference between considered particle and synchronous one ! denergy: energy difference between considered particle and synchronous one ! nrep: pass cavity nrep times before returning data ! direction: to inverse the time advance (rotation in the bucket), 1 or -1 ! xp and yp: time and energy in pixels ! dtbin and dEbin: GLOBAL time and energy pixel size in s and MeV ! omegarev0: revolution frequency ! VRF1,VRF2,VRF1dot,VRF2dot: GLOBAL RF voltages and derivatives of volts ! turnnow: present turn !--------------------------------------------------------------------------- IMPLICIT NONE REAL(SP), DIMENSION(:), INTENT(INOUT) :: xp,yp REAL(SP), DIMENSION(SIZE(xp)) :: dphi,denergy,selfvolt !HPF$ distribute dphi(block) !HPF$ align with dphi :: denergy,selfvolt,xp INTEGER :: mm INTEGER :: i,p,nrep,direction,turnnow dphi=(xp+xorigin)*h*omegarev0(turnnow)*dtbin-phi0(turnnow) denergy=(yp-yat0)*dEbin IF (direction.GT.0) THEN p=turnnow/dturns+1 DO i=1,nrep forall(mm=1:size(xp)) dphi(mm)=dphi(mm)-c1(turnnow)*denergy(mm) turnnow=turnnow+1 forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-& xorigin*h*omegarev0(turnnow)*dtbin forall(mm=1:size(xp)) xp(mm)=(xp(mm)-& phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin) forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1) forall(mm=1:size(xp)) denergy(mm)=denergy(mm)+q*((& (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+& (VRF2+VRF2dot*tatturn(turnnow))*& SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))-c2(turnnow) END DO ELSE p=turnnow/dturns DO i=1,nrep forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1) forall(mm=1:size(xp)) denergy(mm)=denergy(mm)-q*((& (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+& (VRF2+VRF2dot*tatturn(turnnow))*& SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))+c2(turnnow) turnnow=turnnow-1 forall(mm=1:size(xp)) dphi(mm)=dphi(mm)+c1(turnnow)*denergy(mm) forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-& xorigin*h*omegarev0(turnnow)*dtbin forall(mm=1:size(xp)) xp(mm)=(xp(mm)-& phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin) END DO END IF yp=denergy/dEbin+yat0 END SUBROUTINE longtrack_self
Modified Fortran Subroutine
SUBROUTINE longtrack_self(direction,nrep,yp,xp,turnnow) !------------------------------------------------------------------------- ! h: principal harmonic number ! eta0: phase slip factor ! E0: energy of synchronous particle ! beta0: relativistic beta of synchronous particle ! phi0: synchronous phase ! q: charge state of particles ! dphi: phase difference between considered particle and synchronous one ! denergy: energy difference between considered particle and synchronous one ! nrep: pass cavity nrep times before returning data ! direction: to inverse the time advance (rotation in the bucket), 1 or -1 ! xp and yp: time and energy in pixels ! dtbin and dEbin: GLOBAL time and energy pixel size in s and MeV ! omegarev0: revolution frequency ! VRF1,VRF2,VRF1dot,VRF2dot: GLOBAL RF voltages and derivatives of volts ! turnnow: present turn !--------------------------------------------------------------------------- IMPLICIT NONE REAL(SP), DIMENSION(:), INTENT(INOUT) :: xp,yp REAL(SP), DIMENSION(SIZE(xp)) :: dphi,denergy,selfvolt !HPF$ distribute dphi(block) !HPF$ align with dphi :: denergy,selfvolt,xp INTEGER :: mm INTEGER :: i,p,nrep,direction,turnnow CALL gputrack_self(direction,nrep,yp,xp,turnnow, & SIZE(xp),dphi,denergy, & c1, & c2, & dEbin, & dtbin, & h, & hratio, & omegarev0, & phi0, & phi12, & q, & tatturn, & VRF1, & VRF1dot, & VRF2, & VRF2dot, & xorigin, & yat0, & p, & dturns, & phiwrap, & selfvolt, & profilecount-1, & wraplength, & vself ) END SUBROUTINE longtrack_self
New C Function
#include <stdio.h> #include <math.h> void gputrack_self_ ( \ int *direction, \ int *nrep, \ float *yp, \ float *xp, \ int *turnnow, \ int *sizeofarrays, \ float *dphi, \ float *denergy, \ float *c1, \ float *c2, \ float *dEbin, \ float *dtbin, \ float *h, \ float *hratio, \ float *omegarev0, \ float *phi0, \ float *phi12, \ float *q, \ float *tatturn, \ float *VRF1, \ float *VRF1dot, \ float *VRF2, \ float *VRF2dot, \ float *xorigin, \ float *yat0, \ int *p, \ int *dturns, \ float *phiwrap, \ float *selfvolt, \ int *vselfDimRow, \ int *vselfDimCol, \ float *vself \ ) { /* Local Variables */ int l,i,mm,t; l = *sizeofarrays; t = *turnnow; // longtrack_self specific local variables int cp; cp = *p; /* dphi=(xp+xorigin)*h*omegarev0(turnnow)*dtbin-phi0(turnnow) */ for(mm = 0; mm < l; mm++) { dphi[mm] = (xp[mm] + *xorigin) * *h * omegarev0[t] * *dtbin - phi0[t]; } /* denergy=(yp-yat0)*dEbin */ for(mm = 0; mm < l; mm++) { denergy[mm] = (yp[mm] - *yat0) * *dEbin; } /* IF (direction.GT.0) THEN */ if (*direction > 0) { /* p=turnnow/dturns+1 */ cp = t / *dturns + 1; /* DO i=1,nrep */ for(i = 1; i <= *nrep; i++) { /* forall(mm=1:size(xp)) dphi(mm)=dphi(mm)-c1(turnnow)*denergy(mm) */ for(mm=0;mm<l;mm++) { dphi[mm] = dphi[mm] - c1[t] *denergy[mm]; } /* turnnow=turnnow+1 */ t=t+1; /* forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-& xorigin*h*omegarev0(turnnow)*dtbin */ for(mm=0;mm<l;mm++) { xp[mm] = dphi[mm] + phi0[t] - \ *xorigin * *h * omegarev0[t] * *dtbin; } /* forall(mm=1:size(xp)) xp(mm)=(xp(mm)-& phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin) */ for(mm = 0; mm < l; mm++) { xp[mm] = (xp[mm] - \ *phiwrap * floor(xp[mm] / *phiwrap)) / (*h * omegarev0[t] * *dtbin); } /* forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1) */ for(mm = 0; mm < l; mm++) { int itemp = floor(xp[mm]); selfvolt[mm] = vself[(*vselfDimRow * (itemp)) + (cp-1)]; } /* forall(mm=1:size(xp)) denergy(mm)=denergy(mm)+q*((& (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+& (VRF2+VRF2dot*tatturn(turnnow))*& SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))-c2(turnnow) */ for(mm = 0; mm < l; mm++) { denergy[mm] = denergy[mm] + *q *(( \ (*VRF1 + *VRF1dot * tatturn[t]) * sin(dphi[mm] + phi0[t]) + \ (*VRF2 + *VRF2dot * tatturn[t]) * \ sin(*hratio * (dphi[mm] + phi0[t] - *phi12))) + selfvolt[mm]) -c2[t]; } /* END DO */ } } else { // p=turnnow/dturns cp = t / *dturns; // DO i=1,nrep for (i=1;i<=*nrep;i++) { // forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1) for(mm = 0; mm < l; mm++) { int itemp = (int)floor(xp[mm]); selfvolt[mm] = vself[(*vselfDimRow*(itemp)) + (cp-1)]; } /* forall(mm=1:size(xp)) denergy(mm)=denergy(mm)-q*((& (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+& (VRF2+VRF2dot*tatturn(turnnow))*& SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))+c2(turnnow) */ for(mm = 0; mm < l; mm++) { denergy[mm]=denergy[mm] - *q *(( \ (*VRF1 + *VRF1dot * tatturn[t]) *sin(dphi[mm] + phi0[t]) + \ (*VRF2 + *VRF2dot * tatturn[t]) * \ sin(*hratio * (dphi[mm] + phi0[t] - *phi12))) + selfvolt[mm]) + c2[t]; } // turnnow=turnnow-1 t--; /* forall(mm=1:size(xp)) dphi(mm)=dphi(mm)-c1(turnnow)*denergy(mm) */ for(mm = 0; mm < l; mm++) { dphi[mm]=dphi[mm] + c1[t] * denergy[mm]; } /* forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-& xorigin*h*omegarev0(turnnow)*dtbin */ for(mm = 0; mm < l; mm++) { xp[mm] = dphi[mm] + phi0[t] - \ *xorigin * *h * omegarev0[t] * *dtbin; } /* forall(mm=1:size(xp)) xp(mm)=(xp(mm)-& phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin) */ for(mm = 0; mm < l; mm++) { xp[mm] = (xp[mm] - \ *phiwrap * floor(xp[mm] / *phiwrap)) / (*h * omegarev0[t] * *dtbin); } } } // yp=denergy/dEbin+yat0 for(mm=0; mm<l; mm++) { yp[mm] = denergy[mm] / *dEbin + *yat0; } *turnnow = t; return; }