Open main menu

CDOT Wiki β

Changes

GPU621/DPS921 T-eaSyPeasy

183 bytes removed, 13:29, 14 April 2016
Description
</pre>
The strange thing is that after these parallelisations The algorithm thealgorithm performed a lot worse. I think this has to do with the granularity of the parallelism. Our serial version can solve a 500 city problem around 10 times faster than the non-serial version can.Another approach would have been to parallelise the whole nodes instead of the operations in them, because we aren't solving 1000 by 1000 matrices but this is a little bit difficult because for each node to exist, the parent needs to have been evaluated already. There would have to be some very large amount of 20 by 20 matricescomplex Node management logic to determine which nodes to spawn and which not to spawn, the parallelism isn't splitting into all when to halt evaluation on certain nodes and that much and some sort of that may be faster off serialthing.
Another approach would have been Learn from me and think long and hard about what you want to parallelise in regards of granularity, because in this case parallelising the individual operations in the whole nodes instead Node made my algorithm significantly worse. It took a lot of programmer effort and gave me no reward in terms of speed. I did learn a lot about C++ writing this algorithm, so all was not lost. I left the operations cilk/cilk_stubs.h in themthe solution below, but this is a little bit difficult because for each node the serial solution was the best working solution I found. If you want to existtry the parallel version, just remove the parent needs to have been evaluated alreadystubs.
==Source==
 
The program takes it's input from a text file I generated in the same folder as the source code, the file contains the cities for the problem. The text file has 2 columns of integers.
The first column is read as the x value of the city and the second column is the y value of the city. You can adjust the number of cities to be taken from the file in the source code at the top of the main section.
<pre>
#include <cmath>
#include <time.h>
#include <cilk/cilk.h>#include <cilk/reducer_min.h>#include <cilk/cilk_stub.h>
struct Selection {
bool invalidSolution = false;
uint16_t* originalMatrix;
uint16_t* workingCopy;
Node(int amountOfCities, uint16_t* matrix, std::vector<std::vector<Selection>> solution, uint16_t lb);
Node(int amountOfCities, uint16_t* matrix, std::vector<std::vector<Selection>> solution);
void evaluate(){
std::vector<Selectionuint16_t> rowMinimarowMins(matrixSize); std::vector<Selectionuint16_t> colMinimacolMins(matrixSize);
std::vector<Zero> zeroes;
std::vector<bool> zeroInColumn(matrixSize, false);
lowerBound = sumCurrentSolution();
createWorkingCopy();
findRowMinimafor (rowMinima, zeroInColumnint row = 0; row < matrixSize; row++) { int16_t minimum = findRowMinimum(row); rowMins[row] = minimum; if(minimum != UINT16_MAX) lowerBound += sumMinimaminimum; } for (int row = 0; row < matrixSize; row++) { if (rowMinimarowMins[row] != UINT16_MAX && rowMins[row] != 0);{ subtractMinima subtractRowMinimum(rowMinimarowMins[row], truerow); } findColMinima}  for (int col = 0; col < matrixSize; col++) { int16_t minimum = findColMinimum(colMinima, zeroInColumncol); colMins[col] = minimum; if (minimum != UINT16_MAX) lowerBound += sumMinimaminimum; } for (int col = 0; col < matrixSize; col++) { if (colMinimacolMins[col] != UINT16_MAX && colMins[col] != 0);{ subtractMinima subtractRowMinimum(colMinimacolMins[col], falsecol); } } gatherZeroes(rowMinima, colMinima, zeroes);
if (zeroes.empty()) {
invalidSolution = true;
findPenalties(zeroes);
Zero assignment = assignLargestPenalty(zeroes);
 
createLeftNode(solution, assignment);
int subtourIndex = addToResult(solution, assignment);
createRightNode(solution, assignment, subtourIndex);
checkSolutionFound();
}
free(workingCopyoriginalMatrix); }  int16_t findColMinimum(int col) { cilk::reducer< cilk::op_min<uint16_t> > best; cilk_for(int row = 0; row< matrixSize; row++) { best->calc_min(originalMatrix[row*matrixSize + col]); } return best.get_value(); }  int16_t findRowMinimum(int row) { cilk::reducer< cilk::op_min<uint16_t> > best; cilk_for (int col = 0; col < matrixSize; col++) { best->calc_min(originalMatrix[row*matrixSize + col]); } return best.get_value(); }  void subtractRowMinimum(uint16_t val, int row) { if (val != 0) { cilk_for(int col = 0; col < matrixSize; col++) { if (originalMatrix[row*matrixSize + col] != UINT16_MAX) originalMatrix[row*matrixSize + col] -= val; } } }  void subtractColMinimum(uint16_t val, int col) { if (val != 0) { cilk_for(int row = 0; row < matrixSize; row++) { if (originalMatrix[row*matrixSize + col] != UINT16_MAX) originalMatrix[row*matrixSize + col] -= val; } }
}
}
//void createWorkingCopy() { workingCopy // originalMatrix = (uint16_t*)malloc(sizeof(uint16_t) * matrixSize * matrixSize); // memcpy(workingCopyoriginalMatrix, originalMatrix, (sizeof(uint16_t) * matrixSize * matrixSize)); }  void findRowMinima(std::vector<Selection>& rowMinima, std::vector<bool>& zeroInColumn) { for (int row = 0; row < matrixSize; row++) { int rowOffset = row * matrixSize; uint16_t currentMinimum = UINT16_MAX; Selection currentSelection; for (int col = 0; col < matrixSize; col++) { if (workingCopy[rowOffset + col] < currentMinimum) { currentMinimum = workingCopy[rowOffset + col]; currentSelection = Selection(row, col, currentMinimum); zeroInColumn[col] = true; } } if (currentMinimum != UINT16_MAX) { rowMinima.push_back(currentSelection); } } //std::for_each(rowMinima.begin(), rowMinima.end(), [&](Selection s) { // std::cout << " ( " << s.row << ", " << s.col << ") val = " << s.val << " \n"; //});  }  int sumMinima(const std::vector<Selection>& minima) { int sum_of_elems = 0; std::for_each(minima.begin(), minima.end(), [&](Selection s) { sum_of_elems += s.val; }); return sum_of_elems; }  void subtractMinima(std::vector<Selection> minima, const bool row) { if (row) { std::for_each(minima.begin(), minima.end(), [&](Selection s) { if (s.val != 0) { for (int j = 0; j < matrixSize; j++) { if (workingCopy[s.row*matrixSize + j] != UINT16_MAX) { workingCopy[s.row*matrixSize + j] -= s.val; } } } }); } else { std::for_each(minima.begin(), minima.end(), [&](Selection s) { if (s.val != 0) { for (int row = 0; row < matrixSize; row++) { if (workingCopy[row*matrixSize + s.col] != UINT16_MAX) { workingCopy[row*matrixSize + s.col] -= s.val; } } } }); } }  void findColMinima(std::vector<Selection>& colMinima, const std::vector<bool>& zeroInColumn) { for (int col = 0; col < matrixSize; col++) { if (!zeroInColumn[col]) { uint16_t currentMinimum = UINT16_MAX; Selection currentSelection; for (int row = 0; row < matrixSize; row++) { if (workingCopy[row*matrixSize + col] < currentMinimum) { currentMinimum = workingCopy[row*matrixSize + col]; currentSelection = Selection(row, col, currentMinimum); } } if (currentMinimum != UINT16_MAX) { colMinima.push_back(currentSelection); } } } //std::for_each(colMinima.begin(), colMinima.end(), [&](Selection s) { // std::cout << " ( " << s.row << ", " << s.col << ") val = " << s.val << " \n"; //}); }
void printMatrix() {
std::cout << "\n";
for (int j = 0; j < matrixSize; j++) {
std::cout << workingCopyoriginalMatrix[i*matrixSize + j] << " ";
}
}
std::cout << "\n";
}
void gatherZeroes(std::vector<Selection>& rowMinima, std::vector<Selection>& colMinima, std::vector<Zero>& zeroes) {
for (int i = 0; i < matrixSize; i++) {
for (int j = 0; j < matrixSize; j++) {
if (workingCopyoriginalMatrix[i*matrixSize + j] == 0) {
zeroes.push_back(Zero(i, j));
}
uint16_t horizontalMin = UINT16_MAX;
for (int col = 0; col < matrixSize; col++) {
if (col != z.col && workingCopyoriginalMatrix[z.row*matrixSize + col] < horizontalMin) { horizontalMin = workingCopyoriginalMatrix[z.row*matrixSize + col];
}
}
uint16_t verticalMin = UINT16_MAX;
for (int row = 0; row < matrixSize; row++) {
if (row != z.row && workingCopyoriginalMatrix[row * matrixSize + z.col] < verticalMin) { verticalMin = workingCopyoriginalMatrix[row * matrixSize + z.col];
}
}
lowerBound = lb;
}
 
 
class Tree {
}
}
 
void pruneNodeList(std::vector<Node>& nodeList, int upperBound) {
int main() {
int amountOfCities = 15500;
uint16_t* originalMatrix = (uint16_t*)malloc(sizeof(uint16_t)*amountOfCities*amountOfCities);
getMatrixFromFile("cities.txt", amountOfCities, originalMatrix);
}
</pre>
 
==useful links==
#https://www.youtube.com/watch?v=-cLsEHP0qt0 - Branch and Bound explanation from minute 34:30.
#https://www.cilkplus.org/tutorial-reducer-min - using the min reducer in cilk_for