Open main menu

CDOT Wiki β

Changes

DPS921/Intel Advisor

1,734 bytes added, 20:08, 4 December 2020
Intel Memory Access Pattern Analysis
= Intel Memory Access Pattern Analysis =
We can use the MAP analysis tool to check for various memory issues, such as non-contiguous memory accesses and unit strides.
<source>#include <iostream>
using namespace std;
 
const long int SIZE = 3500000;
 
typedef struct tricky
{
int member1;
float member2;
} tricky;
 
tricky structArray[SIZE];
 
int main()
{
cout << "Starting.\n";
for (long int i = 0; i < SIZE; i++)
{
structArray[i].member1 = (i / 25) + i - 78;
}
cout << "Done.\n";
return EXIT_SUCCESS;
}
 
#include <iostream>
#include <time.h>
using namespace std;
 
const int LOOPS = 1500000;
const int SIZE = 14992;
const int STEPS = SIZE / 2;
 
float floatArray[SIZE];
double doubleArray[SIZE];
 
time_t start;
time_t finish;
 
int main()
{
// Contiguous data access, same number of iterations as the noncontiguous.
start = time(NULL);
#pragma nounroll
for (float i = 0; i < LOOPS; i++)
{
#pragma nounroll
for (int j = 0; j < STEPS; j += 1)
{
floatArray[j] = i;
}
}
finish = time(NULL);
cout << "Contiguous Float: " << finish - start << "\n";
 
// Contiguous data access on doubles, so that it should require roughly
// the same number of cache line loads as the 2-stride float loop.
start = time(NULL);
#pragma nounroll
for (double i = 0; i < LOOPS; i++)
{
#pragma nounroll
for (int j = 0; j < STEPS; j += 1)
{
doubleArray[j] = i;
}
}
finish = time(NULL);
cout << "Contiguous Double: " << finish - start << "\n";
 
// Stride-2 float. Same number of iterations as the contiguous version,
// same number of cache line loads as the double loop. Slower than both.
start = time(NULL);
#pragma nounroll
for (float i = 0; i < LOOPS; i++)
{
#pragma nounroll
for (int j = 0; j < STEPS * 2; j += 2)
{
floatArray[j] = i;
}
}
finish = time(NULL);
cout << "Noncontiguous Float: " << finish - start << "\n";
 
return EXIT_SUCCESS;
}</source>
= Intel Dependencies Analysis =
62
edits