112
edits
Changes
TriForce
,→Kernel Optimization Attempts
[[File:Julia.jpg]]
|}
This problem would be fairly simple to parallelize. In the image created by Julia sets each pixel is independent of the others. This problem involves Complex numbers, but that can be simply represented by using two arrays, or pairs of floats.
==== Assignment 1: Selection for parallelizing ====
#define N (BOX_W * BOX_W)
__global__ void solve(int* d_a) {
// Used to remember which row | col | box ( section ) have which values
__shared__ bool rowHas[N][N];
__shared__ bool colHas[N][N];
__shared__ bool boxHas[N][N];
// Used to ensure that the table has changed
__shared__ bool changed;
// Number of spaces which can place the number in each section
__shared__ int rowCount[N][N];
__shared__ int colCount[N][N];
__shared__ int boxCount[N][N];
// Where the square is located in the Sudoku
int row = threadIdx.x;
int col = threadIdx.y;
int box = row / BOX_W + (col / BOX_W) * BOX_W;
// Unique identifier for each square in row, col, box
// Corresponds to the generic Sudoku Solve
// Using a Sudoku to solve a Sudoku !!!
int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W);
// Square's location in the Sudoku
int gridIdx = col * N + row;
int at = d_a[gridIdx];
bool notSeen[N];
for (int i = 0; i < N; ++i)
notSeen[i] = true;
rowHas[col][row] = false;
colHas[col][row] = false;
boxHas[col][row] = false;
__syncthreads();
if (at != UNASSIGNED) {
rowHas[row][at - 1] = true;
colHas[col][at - 1] = true;
boxHas[box][at - 1] = true;
}
// Previous loop has not changed any values
do {
// RESET counters
rowCount[col][row] = 0;
colCount[col][row] = 0;
boxCount[col][row] = 0;
__syncthreads();
if (gridIdx == 0) // forget previous change
changed = false;
int count = 0; // number of values which can fit in this square
int guess = 0; // last value found which can fit in this square
for (int idx = 0; idx < N; ++idx) {
// Ensures that every square in each section is working on a different number in the section
int num = (idx + offset) % N;
if (at == UNASSIGNED && notSeen[num]) {
if (rowHas[row][num] || boxHas[box][num] || colHas[col][num])
notSeen[num] = false;
else {
++count;
guess = num;
rowCount[row][num]++;
colCount[col][num]++;
boxCount[box][num]++;
}
}
__syncthreads();
}
// Find values which can go in only one spot in the section
for (int idx = 0; idx < N && count > 1; ++idx) {
if (notSeen[idx] &&
(rowCount[row][idx] == 1 || boxCount[box][idx] == 1 || colCount[col][idx] == 1)) {
// In this section this value can only appear in this square
guess = idx;
count = 1;
}
}
if (count == 1) {
at = guess + 1;
rowHas[row][guess] = true;
colHas[col][guess] = true;
boxHas[box][guess] = true;
changed = true;
}
__syncthreads();
} while (changed);
//SOLVED CHECK
if (!(rowHas[row][col] || colHas[row][col] || boxHas[row][col]))
changed = true;
__syncthreads();
if (changed && gridIdx == 0)
at = 0;
d_a[gridIdx] = at;
}
void print(int result[N][N]) {
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++)
printf("%3d", result[row][col]);
printf("\n");
}
}
// Driver program to test main program functions
int main() {
int h_a[N][N] = {
{ 1, 0, 4, 0, 25, 0, 19, 0, 0, 10, 21, 8, 0, 14, 0, 6, 12, 9, 0, 0, 0, 0, 0, 0, 5},
{ 5, 0, 19, 23, 24, 0, 22, 12, 0, 0, 16, 6, 0, 20, 0, 18, 0, 25, 14, 13, 10, 11, 0, 1, 15},
{ 0, 0, 0, 0, 0, 0, 21, 5, 0, 20, 11, 10, 0, 1, 0, 4, 8, 24, 23, 15, 18, 0, 16, 22, 19},
{ 0, 7, 21, 8, 18, 0, 0, 0, 11, 0, 5, 0, 0, 24, 0, 0, 0, 17, 22, 1, 9, 6, 25, 0, 0},
{ 0, 13, 15, 0, 22, 14, 0, 18, 0, 16, 0, 0, 0, 4, 0, 0, 0, 19, 0, 0, 0, 24, 20, 21, 17},
{ 12, 0, 11, 0, 6, 0, 0, 0, 0, 15, 0, 0, 0, 0, 21, 25, 19, 0, 4, 0, 22, 14, 0, 20, 0},
{ 8, 0, 0, 21, 0, 16, 0, 0, 0, 2, 0, 3, 0, 0, 0, 0, 17, 23, 18, 22, 0, 0, 0, 24, 6},
{ 4, 0, 14, 18, 7, 9, 0, 22, 21, 19, 0, 0, 0, 2, 0, 5, 0, 0, 0, 6, 16, 15, 0, 11, 12},
{ 22, 0, 24, 0, 23, 0, 0, 11, 0, 7, 0, 0, 4, 0, 14, 0, 2, 12, 0, 8, 5, 19, 0, 25, 9},
{ 20, 0, 0, 0, 5, 0, 0, 0, 0, 17, 9, 0, 12, 18, 0, 1, 0, 0, 7, 24, 0, 0, 0, 13, 4},
{ 13, 0, 0, 5, 0, 2, 23, 14, 4, 18, 22, 0, 17, 0, 0, 20, 0, 1, 9, 21, 12, 0, 0, 8, 11},
{ 14, 23, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 20, 25, 0, 3, 4, 13, 0, 11, 21, 9, 5, 18, 22},
{ 7, 0, 0, 11, 17, 20, 24, 0, 0, 0, 3, 4, 1, 12, 0, 0, 6, 14, 0, 5, 25, 13, 0, 0, 0},
{ 0, 0, 16, 9, 0, 17, 11, 7, 10, 25, 0, 0, 0, 13, 6, 0, 0, 18, 0, 0, 19, 4, 0, 0, 20},
{ 6, 15, 0, 19, 4, 13, 0, 0, 5, 0, 18, 11, 0, 0, 9, 8, 22, 16, 25, 10, 7, 0, 0, 0, 0},
{ 0, 0, 0, 2, 0, 0, 10, 19, 3, 0, 1, 0, 22, 9, 4, 11, 15, 0, 20, 0, 0, 8, 23, 0, 25},
{ 0, 24, 8, 13, 1, 0, 0, 4, 20, 0, 17, 14, 0, 0, 18, 0, 16, 22, 5, 0, 11, 0, 10, 0, 0},
{ 23, 10, 0, 0, 0, 0, 0, 0, 18, 0, 6, 0, 16, 0, 0, 17, 1, 0, 13, 0, 0, 3, 19, 12, 0},
{ 25, 5, 0, 14, 11, 0, 17, 0, 8, 24, 13, 0, 19, 23, 15, 9, 0, 0, 12, 0, 20, 0, 22, 0, 7},
{ 0, 0, 17, 4, 0, 22, 15, 0, 23, 11, 12, 25, 0, 0, 0, 0, 18, 8, 0, 7, 0, 0, 14, 0, 13},
{ 19, 6, 23, 22, 8, 0, 0, 1, 25, 4, 14, 2, 0, 3, 7, 13, 10, 11, 16, 0, 0, 0, 0, 0, 0},
{ 0, 4, 0, 17, 0, 3, 0, 24, 0, 8, 20, 23, 11, 10, 25, 22, 0, 0, 0, 12, 13, 2, 18, 6, 0},
{ 0, 0, 7, 16, 0, 0, 6, 17, 2, 21, 0, 18, 0, 0, 0, 19, 0, 0, 8, 0, 0, 0, 0, 4, 0},
{ 18, 9, 25, 1, 2, 11, 0, 0, 13, 22, 4, 0, 21, 0, 5, 0, 23, 7, 0, 0, 15, 0, 3, 0, 8},
{ 0, 21, 10, 0, 0, 12, 0, 20, 16, 0, 19, 0, 0, 0, 0, 15, 14, 4, 2, 18, 23, 25, 11, 7, 0}
};
__syncthreads();
}
__syncthreads();
// Previous loop has not changed any values
do {
// RESET counters
rowCount[col][row] = 0;
colCount[col][row] = 0;
boxCount[col][row] = 0;
__syncthreads();
if (gridIdx == 0) // forget previous change
changed = false;
int count = 0; // number of values which can fit in this square
int guess = 0; // last value found which can fit in this square
int b_shuttle = 1;
for (int idx = 0; idx < N; ++idx) {
// Ensures that every square in each section is working on a different number in the section
int num = (idx + offset) % N;
if (b_shuttle & notSeen) {
if (rowHas[row][num] || boxHas[box][num] || colHas[col][num])
notSeen ^= b_shuttle;
else {
++count;
guess = num;
rowCount[row][num]++;
colCount[col][num]++;
boxCount[box][num]++;
}
}
b_shuttle <<= 1;
__syncthreads();
}
// Find values which can go in only one spot in the section
b_shuttle = 1;
for (int idx = 0; idx < N && count > 1; ++idx) {
int num = (idx + offset) % N;
if ((b_shuttle & notSeen) &&
(rowCount[row][num] == 1 || boxCount[box][num] == 1 || colCount[col][num] == 1)) {
// In this section this value can only appear in this square
guess = num;
count = 1;
}
b_shuttle <<= 1;
}
__syncthreads();
}
|}
}
|} Change : Quickly finds one section that requires a single value in one spot, by checking all sections at once and remembering a single section Theory : Similar to the previous Kernel, trying to remove the second loop Result : Surprisingly slow, gains little benefit from the section logic and shared memory, yet is still required to count all values{| class="wikitable mw-collapsible mw-collapsed"! Notify - Determines a single section that has a limited value (removes section loop)|-| __global__ void printsolve(int result* d_a) { // Used to remember which row | col | box ( section ) have which values __shared__ bool rowHas[N][N]; __shared__ bool colHas[N][N]; __shared__ bool boxHas[N][N]; // Used to ensure that the table has changed __shared__ bool changed; // Number of spaces which can place the number in each section __shared__ int rowCount[N][N]; __shared__ int colCount[N][N]; __shared__ int boxCount[N][N]; // Where the square is located in the Sudoku int row = threadIdx.x; int col = threadIdx.y; int box = row / BOX_W + (col / BOX_W) {* BOX_W; // Unique identifier for each square in row, col, box // Corresponds to the generic Sudoku Solve // Using a Sudoku to solve a Sudoku !!! int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W); // Square's location in the Sudoku int gridIdx = col * N + row ; int at = d_a[gridIdx]; bool notSeen[N]; for (int i = 0; row i < N; row++i) notSeen[i] = true; rowHas[col][row] = false; colHas[col][row] = false; boxHas[col][row] = false; __syncthreads(); __shared__ int notify; if (at != UNASSIGNED) { rowHas[row][at - 1] = true; colHas[col][at - 1] = true; boxHas[box][at - 1] = true; } // Previous loop has not changed any values do { // RESET counters rowCount[col][row] = 0; colCount[col][row] = 0; boxCount[col][row] = 0; __syncthreads(); if (gridIdx == 0) { // forget previous change changed = false; notify = -1; } int count = 0; // number of values which can fit in this square int guess = 0; // last value found which can fit in this square for (int col idx = 0; col idx < N; ++idx) { // Ensures that every square in each section is working on a different number in the section int num = (idx + offset) % N; if (at == UNASSIGNED && notSeen[num]) { if (rowHas[row][num] || boxHas[box][num] || colHas[col][num]) notSeen[num] = false; else { ++count; guess = num; rowCount[row][num]++; colCount[col][num]++; boxCount[box][num]++; } } __syncthreads(); } if (rowCount[row][col] == 1 || colCount[row][col] == 1 || boxCount[row][col] == 1) notify = col; __syncthreads(); printf // Find values which can go in only one spot in the section if ("%3d", resultnotify > 0 && at == UNASSIGNED && notSeen[notify] && (rowCount[row][notify] == 1 || boxCount[box][notify] == 1 || colCount[col][notify] == 1)){ // In this section this value can only appear in this square guess = notify; printf count = 1; } if (count == 1) { at = guess + 1; rowHas[row][guess] = true; colHas[col][guess] = true; boxHas[box][guess] = true; changed = true; } __syncthreads("\n"); }while (changed); //SOLVED CHECK if (!(rowHas[row][col] || colHas[row][col] || boxHas[row][col])) changed = true; __syncthreads(); if (changed && gridIdx == 0) at = 0; d_a[gridIdx] = at;
}
}
|}
{| class="wikitable mw-collapsible mw-collapsed"
! Countdown Boolean Array (EDITED - now 4.37ms)
|-
|
__global__ void solve(int* d_a) {
// Used to remember which row | col | box ( section ) have which values
__shared__ bool rowHas[N][N];
__shared__ bool colHas[N][N];
__shared__ bool boxHas[N][N];
// Used to ensure that the table has changed
__shared__ bool changed;
// Number of spaces which can place the number in each section
__shared__ int rowCount[N][N];
__shared__ int colCount[N][N];
__shared__ int boxCount[N][N];
// Where the square is located in the Sudoku
int row = threadIdx.x;
int col = threadIdx.y;
int box = row / BOX_W + (col / BOX_W) * BOX_W;
int gridIdx = col * N + row;
int at = d_a[gridIdx];
// Unique identifier for each square in row, col, box
// Corresponds to the generic Sudoku Solve
// Using a Sudoku to solve a Sudoku !!!
int offset = col + (row % BOX_W) * BOX_W + (box % BOX_W);
// Square's location in the Sudoku
int count = 0; //Number of values which can fit in this square
bool notSeen[N]; //Boolean Array as an Integer
for(int idx = 0; idx < N; ++idx)
notSeen[idx] = false;
if (gridIdx == 0)
changed = true;
rowHas[col][row] = false;
colHas[col][row] = false;
boxHas[col][row] = false;
rowCount[col][row] = 0;
colCount[col][row] = 0;
boxCount[col][row] = 0;
__syncthreads();
if (at != UNASSIGNED) {
rowHas[row][at - 1] = true;
colHas[col][at - 1] = true;
boxHas[box][at - 1] = true;
}
__syncthreads();
int guess;
for (int idx = 0; idx < N; ++idx) {
int num = (idx + offset) % N;
if (at == UNASSIGNED && !(rowHas[row][num] || boxHas[box][num] || colHas[col][num])) {
notSeen[num] = true; //this value can go here
++count; //how many values this square can have
guess = num;
//how many values this section can have
rowCount[row][num]++;
colCount[col][num]++;
boxCount[box][num]++;
}
__syncthreads();
}
if (at == UNASSIGNED && count == 0) //NOT POSSIBLE SUDOKU
changed = false;
__syncthreads();
if (count == 1) {
at = guess + 1;
count = 0;
notSeen[guess] = false;
rowHas[row][guess] = true;
colHas[col][guess] = true;
boxHas[box][guess] = true;
}
// Previous loop has not changed any values
while (changed) {
__syncthreads();
if (gridIdx == 0) // forget previous change
changed = false;
for (int idx = 0; idx < N; ++idx) {
// Ensures that every square in each section is working on a different number in the section
int num = (idx + offset) % N;
if (notSeen[num] &&
(at != UNASSIGNED || rowHas[row][num] || boxHas[box][num] || colHas[col][num])) {
rowCount[row][num]--;
colCount[col][num]--;
boxCount[box][num]--;
notSeen[num] = false;
--count;
}
__syncthreads();
if ( notSeen[num] &&
(count == 1 || rowCount[row][num] == 1 || boxCount[box][num] == 1 || colCount[col][num] == 1)) {
rowHas[row][num] = true;
colHas[col][num] = true;
boxHas[box][num] = true;
changed = true;
notSeen[num] = false;
at = num + 1;
count = 0;
}
}
__syncthreads();
};
if (!(rowHas[row][col] && colHas[row][col] && boxHas[box][col]))
changed = true; //HAVE NOT SOLVED the sudoku
__syncthreads();
if (changed && gridIdx == 0)
at = 0;
d_a[gridIdx] = at;
}
|}
[[File:Kernel_Compare.png]]
==== Occupancy Calculations ====
{| class="wikitable mw-collapsible mw-collapsed"