GPU610/DPS915 | Student List | Group and Project Index | Student Resources | Glossary

The purpose of this page is to share useful information that can help groups with their CUDA projects.

CUDA Enabled Cards

List @ CUDA Wiki

Workshop Notes

BLAS Documentation

See the BLAS Documentation Page

Getting Started on Mac

http://developer.download.nvidia.com/compute/DevZone/docs/html/C/doc/CUDA_Getting_Started_Mac.pdf

http://developer.nvidia.com/cuda/cuda-downloads

Makefile Documentation

See the Makefile Documentation Page

Troubleshooting

Problem with CUDA driver version 5.0.24 on MacBook Pro 2012 Fix

Ubuntu 12.04 LTS and CUDA 5 Toolkit Installation Guide

See the guide here; work in progress

SVGALIBS - Graphics Library

This library is a Linux graphics library and thus will not work on windows (I have tried very briefly on finding a way but could not for the reason that Windows does not have X11/xorgs/linux tty devices). The program needs to be run on a Linux machine because it is using svgalibs which is an archaic way to display stuff on the linux screen (from quick google search on the svga library).

svgalibs link

Converting Fortran Code to C Code

Sample code from the TOMO project - converted by James Boelen, Raymong Hung, and Stanley Tsang

Original Fortran Subroutine

SUBROUTINE longtrack_self(direction,nrep,yp,xp,turnnow)
!-------------------------------------------------------------------------
! h: principal harmonic number
! eta0: phase slip factor
! E0: energy of synchronous particle m
! beta0: relativistic beta of synchronous particle
! phi0: synchronous phase
! q: charge state of particles
! dphi: phase difference between considered particle and synchronous one
! denergy: energy difference between considered particle and synchronous one
! nrep: pass cavity nrep times before returning data
! direction: to inverse the time advance (rotation in the bucket), 1 or -1
! xp and yp: time and energy in pixels
! dtbin and dEbin: GLOBAL time and energy pixel size in s and MeV
! omegarev0: revolution frequency
! VRF1,VRF2,VRF1dot,VRF2dot: GLOBAL RF voltages and derivatives of volts
! turnnow: present turn
!---------------------------------------------------------------------------
  IMPLICIT NONE
  REAL(SP), DIMENSION(:), INTENT(INOUT) :: xp,yp
  REAL(SP), DIMENSION(SIZE(xp)) :: dphi,denergy,selfvolt
!HPF$ distribute dphi(block)
!HPF$ align with dphi :: denergy,selfvolt,xp
  INTEGER :: mm
  INTEGER :: i,p,nrep,direction,turnnow
  dphi=(xp+xorigin)*h*omegarev0(turnnow)*dtbin-phi0(turnnow)
  denergy=(yp-yat0)*dEbin
  IF (direction.GT.0) THEN
    p=turnnow/dturns+1
    DO i=1,nrep
      forall(mm=1:size(xp)) dphi(mm)=dphi(mm)-c1(turnnow)*denergy(mm)
      turnnow=turnnow+1
      forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-&
                                   xorigin*h*omegarev0(turnnow)*dtbin
      forall(mm=1:size(xp)) xp(mm)=(xp(mm)-&
        phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin)
      forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1)
      forall(mm=1:size(xp)) denergy(mm)=denergy(mm)+q*((&
        (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+&
        (VRF2+VRF2dot*tatturn(turnnow))*&
        SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))-c2(turnnow)
    END DO
  ELSE
    p=turnnow/dturns
    DO i=1,nrep
      forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1)
      forall(mm=1:size(xp)) denergy(mm)=denergy(mm)-q*((&
        (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+&
        (VRF2+VRF2dot*tatturn(turnnow))*&
        SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))+c2(turnnow)
      turnnow=turnnow-1
      forall(mm=1:size(xp)) dphi(mm)=dphi(mm)+c1(turnnow)*denergy(mm)
      forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-&
                                   xorigin*h*omegarev0(turnnow)*dtbin
      forall(mm=1:size(xp)) xp(mm)=(xp(mm)-&
        phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin)
    END DO
  END IF
  yp=denergy/dEbin+yat0
END SUBROUTINE longtrack_self

Modified Fortran Subroutine

SUBROUTINE longtrack_self(direction,nrep,yp,xp,turnnow)
!-------------------------------------------------------------------------
! h: principal harmonic number
! eta0: phase slip factor
! E0: energy of synchronous particle
! beta0: relativistic beta of synchronous particle
! phi0: synchronous phase
! q: charge state of particles
! dphi: phase difference between considered particle and synchronous one
! denergy: energy difference between considered particle and synchronous one
! nrep: pass cavity nrep times before returning data
! direction: to inverse the time advance (rotation in the bucket), 1 or -1
! xp and yp: time and energy in pixels
! dtbin and dEbin: GLOBAL time and energy pixel size in s and MeV
! omegarev0: revolution frequency
! VRF1,VRF2,VRF1dot,VRF2dot: GLOBAL RF voltages and derivatives of volts
! turnnow: present turn
!---------------------------------------------------------------------------
  IMPLICIT NONE
  REAL(SP), DIMENSION(:), INTENT(INOUT) :: xp,yp
  REAL(SP), DIMENSION(SIZE(xp)) :: dphi,denergy,selfvolt
!HPF$ distribute dphi(block)
!HPF$ align with dphi :: denergy,selfvolt,xp
  INTEGER :: mm
  INTEGER :: i,p,nrep,direction,turnnow
  CALL gputrack_self(direction,nrep,yp,xp,turnnow, &
  SIZE(xp),dphi,denergy, &
     c1, &
     c2, &
     dEbin, &
     dtbin, &
     h, &
     hratio, &
     omegarev0, &
     phi0, &
     phi12, &
     q, &
     tatturn, &
     VRF1, &
     VRF1dot, &
     VRF2, &
     VRF2dot, &
     xorigin, &
     yat0, &
     p, &
     dturns, &
     phiwrap, &
     selfvolt, &
     profilecount-1, &
     wraplength, &
     vself )
END SUBROUTINE longtrack_self

New C Function

#include <stdio.h>
#include <math.h>

void gputrack_self_ ( \
    int  *direction, \
    int  *nrep, \
    float *yp, \
    float *xp, \
    int  *turnnow, \
    int  *sizeofarrays, \
    float *dphi, \
    float *denergy, \
    float *c1, \
    float *c2, \
    float *dEbin, \
    float *dtbin, \
    float *h, \
    float *hratio, \
    float *omegarev0, \
    float *phi0, \
    float *phi12, \
    float *q, \
    float *tatturn, \
    float *VRF1, \
    float *VRF1dot, \
    float *VRF2, \
    float *VRF2dot, \
    float *xorigin, \
    float *yat0, \
    int *p, \
    int *dturns, \
    float *phiwrap, \
    float *selfvolt, \
    int *vselfDimRow, \
    int *vselfDimCol, \
    float *vself \
)
{
    /* Local Variables */
    int l,i,mm,t;
    l = *sizeofarrays;
    t = *turnnow;
   
   
    // longtrack_self specific local variables
    int cp;
    cp = *p;
   
    /*  dphi=(xp+xorigin)*h*omegarev0(turnnow)*dtbin-phi0(turnnow) */
    for(mm = 0; mm < l; mm++) {
        dphi[mm] = (xp[mm] + *xorigin) * *h * omegarev0[t] * *dtbin - phi0[t];
    }
   
    /*  denergy=(yp-yat0)*dEbin */
    for(mm = 0; mm < l; mm++) {
        denergy[mm] = (yp[mm] - *yat0) * *dEbin;
    }

    /*   IF (direction.GT.0) THEN */
    if (*direction > 0) {
        /* p=turnnow/dturns+1 */
        cp = t / *dturns + 1;
        /* DO i=1,nrep */
        for(i = 1; i <= *nrep; i++) {
            /* forall(mm=1:size(xp)) dphi(mm)=dphi(mm)-c1(turnnow)*denergy(mm) */
            for(mm=0;mm<l;mm++) {
                dphi[mm] = dphi[mm] - c1[t] *denergy[mm];
            }
            /* turnnow=turnnow+1 */
            t=t+1;
            /* forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-&
                xorigin*h*omegarev0(turnnow)*dtbin */
            for(mm=0;mm<l;mm++) {
                xp[mm] = dphi[mm] + phi0[t] - \
                *xorigin * *h * omegarev0[t] * *dtbin;
            }
            /* forall(mm=1:size(xp)) xp(mm)=(xp(mm)-&
                phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin) */
            for(mm = 0; mm < l; mm++) {
                xp[mm] = (xp[mm] - \
                *phiwrap * floor(xp[mm] / *phiwrap)) / (*h * omegarev0[t] * *dtbin);
            }
            /* forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1) */
            for(mm = 0; mm < l; mm++) {
                int itemp = floor(xp[mm]);
                selfvolt[mm] = vself[(*vselfDimRow * (itemp)) + (cp-1)];
            }
            /* forall(mm=1:size(xp)) denergy(mm)=denergy(mm)+q*((&
                (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+&
                (VRF2+VRF2dot*tatturn(turnnow))*&
                SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))-c2(turnnow) */
            for(mm = 0; mm < l; mm++) {
                denergy[mm] = denergy[mm] + *q *(( \
                (*VRF1 + *VRF1dot * tatturn[t]) * sin(dphi[mm] + phi0[t]) + \
                (*VRF2 + *VRF2dot * tatturn[t]) * \
                sin(*hratio * (dphi[mm] + phi0[t] - *phi12))) + selfvolt[mm]) -c2[t];
            }
        /*     END DO */
        }
    }
      else {
        // p=turnnow/dturns
        cp = t / *dturns;
        // DO i=1,nrep
        for (i=1;i<=*nrep;i++) {
            // forall(mm=1:size(xp)) selfvolt(mm)=vself(p,FLOOR(xp(mm))+1)
            for(mm = 0; mm < l; mm++) {
                int itemp = (int)floor(xp[mm]);
                selfvolt[mm] = vself[(*vselfDimRow*(itemp)) + (cp-1)];
            }
            /* forall(mm=1:size(xp)) denergy(mm)=denergy(mm)-q*((&
                (VRF1+VRF1dot*tatturn(turnnow))*SIN(dphi(mm)+phi0(turnnow))+&
                (VRF2+VRF2dot*tatturn(turnnow))*&
                SIN(hratio*(dphi(mm)+phi0(turnnow)-phi12)))+selfvolt(mm))+c2(turnnow) */
            for(mm = 0; mm < l; mm++) {
                denergy[mm]=denergy[mm] - *q *(( \
                (*VRF1 + *VRF1dot * tatturn[t]) *sin(dphi[mm] + phi0[t]) + \
                (*VRF2 + *VRF2dot * tatturn[t]) * \
                sin(*hratio * (dphi[mm] + phi0[t] - *phi12))) + selfvolt[mm]) + c2[t];
            }
            // turnnow=turnnow-1
            t--;
            /* forall(mm=1:size(xp)) dphi(mm)=dphi(mm)-c1(turnnow)*denergy(mm) */
            for(mm = 0; mm < l; mm++) {
                dphi[mm]=dphi[mm] + c1[t] * denergy[mm];
            }
            /* forall(mm=1:size(xp)) xp(mm)=dphi(mm)+phi0(turnnow)-&
                xorigin*h*omegarev0(turnnow)*dtbin */
            for(mm = 0; mm < l; mm++) {
                xp[mm] = dphi[mm] + phi0[t] - \
                *xorigin * *h * omegarev0[t] * *dtbin;
            }
            /* forall(mm=1:size(xp)) xp(mm)=(xp(mm)-&
                phiwrap*FLOOR(xp(mm)/phiwrap))/(h*omegarev0(turnnow)*dtbin) */
            for(mm = 0; mm < l; mm++) {
                xp[mm] = (xp[mm] - \
                *phiwrap * floor(xp[mm] / *phiwrap)) / (*h * omegarev0[t] * *dtbin);
            }
        }
    }
   
    // yp=denergy/dEbin+yat0
    for(mm=0; mm<l; mm++) {
        yp[mm] = denergy[mm] / *dEbin + *yat0;
    }   

    *turnnow = t;
   
  return;
}

CDOT Wiki ^β

GPU610/DPS915 Student Resources

CUDA Enabled Cards

Workshop Notes

BLAS Documentation

Getting Started on Mac

Makefile Documentation

Troubleshooting

Ubuntu 12.04 LTS and CUDA 5 Toolkit Installation Guide

SVGALIBS - Graphics Library

Converting Fortran Code to C Code

Original Fortran Subroutine

Modified Fortran Subroutine

New C Function

CDOT Wiki β

GPU610/DPS915 Student Resources

CUDA Enabled Cards

Workshop Notes

BLAS Documentation

Getting Started on Mac

Makefile Documentation

Troubleshooting

Ubuntu 12.04 LTS and CUDA 5 Toolkit Installation Guide

SVGALIBS - Graphics Library

Converting Fortran Code to C Code

Original Fortran Subroutine

Modified Fortran Subroutine

New C Function

CDOT Wiki ^β