112
edits
Changes
TriForce
,→Progress
=== Assignment 2 ===
{| class="wikitable mw-collapsible mw-collapsed"
! Code for Solving a Sudoku using backtracking
|-
|
#include <stdio.h>
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <iomanip>
// CUDA header file
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
// A Backtracking program in C++ to solve Sudoku problem
#include <stdio.h>
// UNASSIGNED is used for empty cells in sudoku grid
#define UNASSIGNED 0
// N is used for the size of Sudoku grid. Size will be NxN
#define BOXWIDTH 3
#define N 9
// This function finds an entry in grid that is still unassigned
bool FindUnassignedLocation(int grid[N][N], int &row, int &col);
// Checks whether it will be legal to assign num to the given row, col
__global__ void makeGuess(int* d_a, int guess, int row, int col) {
__shared__ bool found;
bool control = !(threadIdx.x || threadIdx.y);
int tidx = 0;
if (threadIdx.y == 2) {
tidx = (((row / BOXWIDTH) * BOXWIDTH) + threadIdx.x / BOXWIDTH) * N + threadIdx.x % BOXWIDTH +
(col / BOXWIDTH) * BOXWIDTH;
}
else if (threadIdx.y == 1) {
tidx = row * N + threadIdx.x;
}
else {
tidx = threadIdx.x * N + col;
}
if (control)
found = true;
__syncthreads();
while (found) {
__syncthreads();
if (control)
found = false;
__syncthreads();
if (d_a[tidx] == ++guess)
found = true;
__syncthreads();
}
/**/
if (control)
d_a[row * N + col] = guess;
/**/
}
/* Takes a partially filled-in grid and attempts to assign values to
all unassigned locations in such a way to meet the requirements
for Sudoku solution (non-duplication across rows, columns, and boxes) */
bool SolveSudoku(int grid[N][N], int* d_a, int row, int col)
{
// If there is no unassigned location, we are done
if (!FindUnassignedLocation(grid, row, col))
return true; // success!
// consider digits 1 to 9
dim3 block(N, 3);
int guess = 0;
while (guess <= N) {
makeGuess << <1, block >> > (d_a, guess, row, col);
cudaMemcpy(&guess, d_a + row * N + col, sizeof(int), cudaMemcpyDeviceToHost);
if (guess <= N && SolveSudoku(grid, d_a, row, col + 1))
return true;
}
//Erase the guess on the host
int zero = UNASSIGNED;
cudaMemcpy(d_a + row * N + col, &zero, sizeof(int), cudaMemcpyHostToDevice);
return false; // this triggers backtracking
}
/* Searches the grid to find an entry that is still unassigned. If
found, the reference parameters row, col will be set the location
that is unassigned, and true is returned. If no unassigned entries
remain, false is returned. */
bool FindUnassignedLocation(int grid[N][N], int &row, int &col)
{
for (; row < N; ++row) {
for (; col < N; ++col)
if (grid[row][col] == UNASSIGNED)
return true;
col = 0;
}
return false;
}
/* A utility function to print grid */
void printGrid(int grid[N][N])
{
for (int row = 0; row < N; row++)
{
for (int col = 0; col < N; col++)
printf("%2d", grid[row][col]);
printf("\n");
}
}
/* Driver Program to test above functions */
int main()
{
/* 0 means unassigned cells */
int grid[N][N] = { {3, 0, 6, 5, 0, 8, 4, 0, 0},
{5, 2, 0, 0, 0, 0, 0, 0, 0},
{0, 8, 7, 0, 0, 0, 0, 3, 1},
{0, 0, 3, 0, 1, 0, 0, 8, 0},
{9, 0, 0, 8, 6, 3, 0, 0, 5},
{0, 5, 0, 0, 9, 0, 6, 0, 0},
{1, 3, 0, 0, 0, 0, 2, 5, 0},
{0, 0, 0, 0, 0, 0, 0, 7, 4},
{0, 0, 5, 2, 0, 6, 3, 0, 0} };
/**
int grid[N][N] = { {0, 8, 0, 0, 0, 0, 0, 3, 0, 0, 0, 10, 9, 7, 11, 0},
{0, 9, 15, 13, 0, 10, 0, 0, 2, 6, 8, 16, 0, 0, 0, 0},
{0, 0, 16, 0, 15, 0, 8, 0, 9, 0, 0, 0, 6, 0, 2, 0},
{1, 0, 2, 0, 9, 11, 4, 6, 15, 3, 5, 7, 0, 0, 12, 0},
{16, 6, 4, 0, 5, 2, 0, 0, 1, 0, 0, 0, 11, 0, 0, 12},
{5, 11, 0, 0, 0, 3, 0, 15, 0, 16, 0, 13, 0, 1, 0, 8},
{0, 0, 3, 0, 0, 6, 11, 14, 0, 5, 7, 0, 0, 9, 0, 0},
{0, 0, 0, 14, 8, 0, 10, 0, 0, 11, 12, 0, 0, 0, 0, 0},
{0, 7, 13, 0, 0, 0, 0, 12, 0, 8, 9, 0, 0, 0, 3, 0},
{0, 0, 11, 9, 0, 7, 0, 0, 0, 0, 0, 12, 0, 8, 16, 5},
{0, 0, 10, 0, 11, 13, 0, 0, 0, 0, 0, 3, 12, 0, 6, 0},
{0, 5, 0, 0, 10, 15, 0, 1, 7, 2, 0, 0, 14, 11, 0, 0},
{0, 0, 5, 0, 0, 12, 14, 0, 0, 10, 0, 0, 15, 0, 0, 4},
{9, 0, 14, 6, 0, 0, 1, 0, 16, 0, 2, 0, 3, 0, 13, 0},
{8, 13, 0, 4, 0, 0, 0, 0, 12, 7, 3, 0, 0, 6, 0, 0},
{0, 16, 12, 0, 0, 5, 0, 9, 0, 13, 14, 4, 1, 0, 0, 0} };
/**/
int* d_a;
cudaMalloc((void**)&d_a, N*N * sizeof(int));
cudaMemcpy(d_a, grid, N*N * sizeof(int), cudaMemcpyHostToDevice);
SolveSudoku(grid, d_a, 0, 0);
cudaMemcpy(grid, d_a, N*N * sizeof(int), cudaMemcpyDeviceToHost);
printGrid(grid);
cudaFree(d_a);
/**
if (SolveSudoku(grid, 0, 0))
printGrid(grid);
else
printf("No solution exists");
/**/
return 0;
}
|}
This code is capable of solving the 9x9 matrix supplied
HOWEVER with the backtracking algorithm substituting values and the communications delay between the GPU and CPU,
This code is unable to solve the 16x16 in any reasonable amount of time (I stopped it at 10+ minutes).
If you consider the 130+ empty spaces in the grid I estimate over 130^2 calls to cudaMemcpy either way...
=== Assignment 3 ===