Timing study

This commit is contained in:
Logan Hunt 2021-12-08 01:50:12 -07:00
parent 253b267f1c
commit c846568cf2
152 changed files with 755 additions and 46 deletions

Binary file not shown.

View File

@ -1,11 +1,12 @@
#include "game.cuh" #include "game.cuh"
// Count the number of life neighbors a cell has
__device__ int neighbors(struct GAME game, int x, int y) { __device__ int neighbors(struct GAME game, int x, int y) {
int n = 0; int n = 0;
for (int dy = -1; dy <= 1; dy++) { for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) { for (int dx = -1; dx <= 1; dx++) {
if (!(dx == 0 && dy == 0) && (x+dx) >= 0 && (y+dy) >= 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) { if (!(dx == 0 && dy == 0) && (x+dx) > 0 && (y+dy) > 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) {
if (game.grid[(y+dy) * (game.width+game.padding*2) + (x+dx)]) { if (game.grid[(y+dy) * (game.width+game.padding*2) + (x+dx)]) {
n++; n++;
} }
@ -15,11 +16,14 @@ __device__ int neighbors(struct GAME game, int x, int y) {
return n; return n;
} }
// Compute the next iteration of a board
// We have to give it the newGrid as a parameter otherwise
// each block will be computing its own version of the next grid
__global__ void next(struct GAME game, unsigned char* newGrid) { __global__ void next(struct GAME game, unsigned char* newGrid) {
int idy = blockDim.y * blockIdx.y + threadIdx.y; int idy = blockDim.y * blockIdx.y + threadIdx.y;
int idx = blockDim.x * blockIdx.x + threadIdx.x; int idx = blockDim.x * blockIdx.x + threadIdx.x;
if (idy <= game.height+game.padding*2 && idx <= game.width+game.padding*2) { if (idy < game.height+game.padding*2 && idx < game.width+game.padding*2) {
int my_neighbors = neighbors(game, idx, idy); int my_neighbors = neighbors(game, idx, idy);
int my_coord = idy * (game.width+game.padding*2) + idx; int my_coord = idy * (game.width+game.padding*2) + idx;
newGrid[my_coord] = 0; // It's possible that there are artifacts from the last iteration newGrid[my_coord] = 0; // It's possible that there are artifacts from the last iteration
@ -37,6 +41,7 @@ __global__ void next(struct GAME game, unsigned char* newGrid) {
} }
} }
// Randomly assign life value to each cell
void randomize(struct GAME* game) { void randomize(struct GAME* game) {
for (int y = game->padding; y < game->height+game->padding; y++) { for (int y = game->padding; y < game->height+game->padding; y++) {
for (int x = game->padding; x < game->width+game->padding; x++) { for (int x = game->padding; x < game->width+game->padding; x++) {

View File

@ -18,7 +18,7 @@
*/ */
#define BLOCK 32 #define BLOCK 32
#define PADDING 10 #define PADDING 10
#define VERBOSE 1 //#define VERBOSE 1
#define SEED 100 #define SEED 100
// gpuErrchk source: https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api // gpuErrchk source: https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
@ -33,14 +33,16 @@ true) {
} }
} }
// Do the simulation
void simulate(int argc, char** argv) { void simulate(int argc, char** argv) {
srand(SEED); srand(SEED);
clock_t totalStart = clock(); clock_t global_start = clock();
char* filename; char* filename;
struct GAME game; struct GAME game;
game.padding = PADDING; game.padding = PADDING;
int iterations, log_each_step; int iterations, log_each_step;
if (argc == 7) { if (argc == 7) {
// Parse the arguments
filename = argv[2]; filename = argv[2];
game.width = atoi(argv[3]); game.width = atoi(argv[3]);
game.height = atoi(argv[4]); game.height = atoi(argv[4]);
@ -60,6 +62,7 @@ void simulate(int argc, char** argv) {
game.grid = (unsigned char*)malloc(size); game.grid = (unsigned char*)malloc(size);
memset(game.grid, 0, size); memset(game.grid, 0, size);
// Choose where to read initial position
if (strcmp(filename, "random") == 0) { if (strcmp(filename, "random") == 0) {
randomize(&game); randomize(&game);
} else { } else {
@ -68,43 +71,53 @@ void simulate(int argc, char** argv) {
char iteration_file[1024]; char iteration_file[1024];
// Allocate device memory
unsigned char* grid_d; unsigned char* grid_d;
unsigned char* newGrid; unsigned char* newGrid;
gpuErrchk(cudaMalloc(&grid_d, size)); gpuErrchk(cudaMalloc(&grid_d, size));
gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice));
gpuErrchk(cudaMalloc(&newGrid, size)); gpuErrchk(cudaMalloc(&newGrid, size));
gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice)); // Copy the initial grid to the device
free(game.grid);
game.grid = grid_d; // Use the device copy
// The grid that we will copy results
unsigned char* grid_h = (unsigned char*)malloc(size); unsigned char* grid_h = (unsigned char*)malloc(size);
unsigned char* temp; unsigned char* temp;
game.grid = grid_d; // Calculate grid width for kernel
int grid_width = (int)ceil((game.width+(2*game.padding))/(float)BLOCK);
int grid_num = (int)ceil((game.width+(2*game.padding))/(float)BLOCK); int grid_height = (int)ceil((game.height+(2*game.padding))/(float)BLOCK);
dim3 dim_grid(grid_num, grid_num, 1); dim3 dim_grid(grid_width, grid_height, 1);
dim3 dim_block(BLOCK, BLOCK, 1); dim3 dim_block(BLOCK, BLOCK, 1);
cudaEvent_t startLife, stopLife; // Timing
cudaEventCreate(&startLife); cudaEvent_t start, end;
cudaEventCreate(&stopLife); cudaEventCreate(&start);
double timeComputingLife = 0; cudaEventCreate(&end);
float localTime = 0; double time_computing_life = 0;
float local_time = 0;
for (int i = 0; i <= iterations; i++) { for (int i = 0; i <= iterations; i++) {
// Iteration 0 will just be the initial grid
if (i > 0) { if (i > 0) {
cudaEventRecord(startLife); cudaEventRecord(start);
// Compute the next grid
next<<<dim_grid, dim_block>>>(game, newGrid); next<<<dim_grid, dim_block>>>(game, newGrid);
cudaEventRecord(stopLife); cudaEventRecord(end);
cudaEventSynchronize(stopLife); cudaEventSynchronize(end);
cudaEventElapsedTime(&localTime, startLife, stopLife); cudaEventElapsedTime(&local_time, start, end);
timeComputingLife += localTime/1000; time_computing_life += local_time/1000;
// Swap game.grid and newGrid
temp = game.grid; temp = game.grid;
game.grid = newGrid; game.grid = newGrid;
newGrid = temp; newGrid = temp;
} }
if (log_each_step) { if (log_each_step) {
// If we are logging each step, perform IO operations
gpuErrchk(cudaMemcpy(grid_h, game.grid, size, cudaMemcpyDeviceToHost)); gpuErrchk(cudaMemcpy(grid_h, game.grid, size, cudaMemcpyDeviceToHost));
#ifdef VERBOSE #ifdef VERBOSE
// Print the board without the padding elements
printf("\n===Iteration %i===\n", i); printf("\n===Iteration %i===\n", i);
for (int y = game.padding; y < game.height+game.padding; y++) { for (int y = game.padding; y < game.height+game.padding; y++) {
for (int x = game.padding; x < game.width+game.padding; x++) { for (int x = game.padding; x < game.width+game.padding; x++) {
@ -114,6 +127,7 @@ void simulate(int argc, char** argv) {
} }
printf("===End iteration %i===\n", i); printf("===End iteration %i===\n", i);
#endif #endif
// Save to a file
sprintf(iteration_file, "output/iteration-%07d.bin", i); sprintf(iteration_file, "output/iteration-%07d.bin", i);
temp = game.grid; temp = game.grid;
game.grid = grid_h; game.grid = grid_h;
@ -122,13 +136,7 @@ void simulate(int argc, char** argv) {
} }
} }
clock_t totalEnd = clock(); printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", time_computing_life, ((double)clock() - (double)global_start)/CLOCKS_PER_SEC);
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", timeComputingLife, ((double)totalEnd - (double)totalStart)/CLOCKS_PER_SEC);
cudaFree(&newGrid);
cudaFree(&grid_d);
cudaFree(&game.grid);
free(grid_h);
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.169687
Clock time: 1.560000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.254989
Clock time: 2.240000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.354361
Clock time: 3.050000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.480174
Clock time: 4.070000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.619636
Clock time: 5.220000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.029867
Clock time: 0.330000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.059907
Clock time: 0.540000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.110954
Clock time: 1.000000

View File

@ -0,0 +1,18 @@
#!/bin/bash
#SBATCH --time=0:30:00 # walltime, abbreviated by -t
#SBATCH --nodes=1 # number of cluster nodes, abbreviated by -N
#SBATCH -o slurm-%j.out-%N # name of the stdout, using the job number (%j) and the first node (%N)
#SBATCH -e slurm-%j.err-%N # name of the stderr, using job and first node values
#SBATCH --ntasks=1 # number of MPI tasks, abbreviated by -n
# additional information for allocated clusters
#SBATCH --account=notchpeak-shared-short # account - abbreviated by -A
#SBATCH --partition=notchpeak-shared-short # partition, abbreviated by -p
#SBATCH --gres=gpu:k80:1
cd $HOME/gol/cuda-global
iterations=1000
for size in 250 500 750 1000 1250 1500 1750 2000
do
srun ./gol simulate random $size $size $iterations 1 > timing-study/output-$cores-$iterations-$size.txt
done

BIN
mpi/gol

Binary file not shown.

View File

@ -15,10 +15,12 @@
Any live cell with more than three live neighbors dies (overpopulation). Any live cell with more than three live neighbors dies (overpopulation).
Any dead cell with exactly three live neighbors becomes a live cell (reproduction). Any dead cell with exactly three live neighbors becomes a live cell (reproduction).
*/ */
#define PADDING 16 #define PADDING 10
//#define VERBOSE 1 //#define VERBOSE 1
#define SEED 100 #define SEED 100
// A structure to keep the global arguments because each process
// will use its own GAME structure
struct Args { struct Args {
int process_count; int process_count;
int iterations; int iterations;
@ -30,6 +32,7 @@ struct Args {
int data_per_proc; int data_per_proc;
}; };
// Make a datatype out of an Args struct
void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) { void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) {
int blocks[8] = {1,1,1,1,1,1,1,1}; int blocks[8] = {1,1,1,1,1,1,1,1};
MPI_Aint displacements[8]; MPI_Aint displacements[8];
@ -50,6 +53,7 @@ void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) {
MPI_Bcast(args, 1, arg_t, 0, comm); MPI_Bcast(args, 1, arg_t, 0, comm);
} }
// Scatter the grid among nodes
void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, int rank, int* data_counts, int* displacements, char* filename) { void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, int rank, int* data_counts, int* displacements, char* filename) {
unsigned char* data; unsigned char* data;
@ -63,12 +67,14 @@ void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, i
data = malloc(size); data = malloc(size);
memset(data, 0, size); memset(data, 0, size);
game.grid = data; game.grid = data;
// Choose where to read initial position
if (strcmp(filename, "random") == 0) { if (strcmp(filename, "random") == 0) {
randomize(&game); randomize(&game);
} else { } else {
read_in(filename, &game); read_in(filename, &game);
} }
} }
// Do the scatter (some nodes may work on more rows)
MPI_Scatterv(data, data_counts, displacements, MPI_UNSIGNED_CHAR, local_data, data_counts[rank], MPI_UNSIGNED_CHAR, 0, comm); MPI_Scatterv(data, data_counts, displacements, MPI_UNSIGNED_CHAR, local_data, data_counts[rank], MPI_UNSIGNED_CHAR, 0, comm);
if (rank == 0) { if (rank == 0) {
@ -77,12 +83,13 @@ void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, i
} }
// Do the simulation
void simulate(int argc, char** argv) { void simulate(int argc, char** argv) {
srand(SEED); srand(SEED);
double totalStart = MPI_Wtime();
struct Args args; struct Args args;
args.padding = PADDING; args.padding = PADDING;
// Initialize MPI stuff
int rank, process_count; int rank, process_count;
MPI_Comm comm; MPI_Comm comm;
MPI_Init(&argc, &argv); MPI_Init(&argc, &argv);
@ -91,7 +98,9 @@ void simulate(int argc, char** argv) {
MPI_Comm_size(comm, &args.process_count); MPI_Comm_size(comm, &args.process_count);
char* filename; char* filename;
double global_start;
if (rank == 0) { if (rank == 0) {
// Parse the arguments
if (argc == 7) { if (argc == 7) {
filename = argv[2]; filename = argv[2];
args.width = atoi(argv[3]); args.width = atoi(argv[3]);
@ -99,7 +108,7 @@ void simulate(int argc, char** argv) {
args.iterations = atoi(argv[5]); args.iterations = atoi(argv[5]);
args.log_each_step = atoi(argv[6]); args.log_each_step = atoi(argv[6]);
} else { } else {
printf("Usage: ./gol simulate <filename | random> <width> <height> <iterations> <log-each-step?1:0> <block-size>\n"); printf("Usage: ./gol simulate <filename | random> <width> <height> <iterations> <log-each-step?1:0>\n");
filename = "random"; filename = "random";
args.height = 5; args.height = 5;
args.width = 5; args.width = 5;
@ -107,12 +116,17 @@ void simulate(int argc, char** argv) {
args.log_each_step = 0; args.log_each_step = 0;
} }
global_start = MPI_Wtime();
// Figure out how much work the average node will be doing
args.rows_per_proc = (args.height + args.padding*2)/args.process_count; args.rows_per_proc = (args.height + args.padding*2)/args.process_count;
args.data_per_proc = args.rows_per_proc * (args.width + args.padding*2); args.data_per_proc = args.rows_per_proc * (args.width + args.padding*2);
} }
broadcast_and_receive_input(comm, &args); broadcast_and_receive_input(comm, &args);
// Calculate the exact work each thread will do and arguments for
// the Scatterv to scatter the grid
int grid_size = ((args.width + args.padding*2)*(args.height + args.padding*2)); int grid_size = ((args.width + args.padding*2)*(args.height + args.padding*2));
int* data_counts = malloc(sizeof(int) * args.process_count); int* data_counts = malloc(sizeof(int) * args.process_count);
int* displacements = malloc(sizeof(int) * args.process_count); int* displacements = malloc(sizeof(int) * args.process_count);
@ -123,19 +137,20 @@ void simulate(int argc, char** argv) {
data_counts[args.process_count-1] += grid_size % (args.data_per_proc * args.process_count); data_counts[args.process_count-1] += grid_size % (args.data_per_proc * args.process_count);
unsigned char* local_data = malloc(data_counts[rank]*sizeof(unsigned char)); unsigned char* local_data = malloc(data_counts[rank]*sizeof(unsigned char));
memset(local_data, 0, sizeof(unsigned char) * data_counts[rank]); memset(local_data, 0, sizeof(unsigned char) * data_counts[rank]);
// Scatter the data among nodes
scatter_data(comm, &args, local_data, rank, data_counts, displacements, filename); scatter_data(comm, &args, local_data, rank, data_counts, displacements, filename);
// Allocate space for current grid (1 byte per tile)
char iteration_file[1024]; char iteration_file[1024];
double timeComputingLife = 0; // Local_game is our current job
float localTime = 0;
struct GAME local_game; struct GAME local_game;
local_game.grid = local_data; local_game.grid = local_data;
local_game.width = args.width; local_game.width = args.width;
local_game.height = data_counts[rank] / (args.width + args.padding*2); local_game.height = data_counts[rank] / (args.width + args.padding*2);
local_game.padding = args.padding; local_game.padding = args.padding;
// Assign halo elements to send to be received from above and below nodes
unsigned char* halo_above = NULL; unsigned char* halo_above = NULL;
unsigned char* halo_below = NULL; unsigned char* halo_below = NULL;
if (rank > 0) { if (rank > 0) {
@ -148,32 +163,46 @@ void simulate(int argc, char** argv) {
} }
unsigned char* global_data; unsigned char* global_data;
if (rank == 0) {
global_data = malloc(sizeof(unsigned char) * grid_size);
memset(global_data, 0, sizeof(unsigned char) * grid_size);
}
// Timing code
double time_computing_life = 0;
double start,end;
for (int i = 0; i <= args.iterations; i++) { for (int i = 0; i <= args.iterations; i++) {
// Iteration 0 will just be the initial grid
if (i > 0) { if (i > 0) {
int total_width = args.width + args.padding*2; int total_width = args.width + args.padding*2;
MPI_Status status;
if (rank < args.process_count - 1) { if (rank < args.process_count - 1) {
MPI_Send(&local_game.grid[(local_game.height-1) * total_width], total_width, MPI_UNSIGNED_CHAR, rank+1, 1, comm); MPI_Send(&local_game.grid[(local_game.height-1) * total_width], total_width, MPI_UNSIGNED_CHAR, rank+1, 1, comm);
} }
if (rank > 0) { if (rank > 0) {
MPI_Recv(halo_above, total_width, MPI_UNSIGNED_CHAR, rank-1, 1, comm, NULL); MPI_Recv(halo_above, total_width, MPI_UNSIGNED_CHAR, rank-1, 1, comm, &status);
MPI_Send(&local_game.grid[0], total_width, MPI_UNSIGNED_CHAR, rank-1, 0, comm); MPI_Send(&local_game.grid[0], total_width, MPI_UNSIGNED_CHAR, rank-1, 0, comm);
} }
if (rank < args.process_count - 1) { if (rank < args.process_count - 1) {
MPI_Recv(halo_below, total_width, MPI_UNSIGNED_CHAR, rank+1, 0, comm, NULL); MPI_Recv(halo_below, total_width, MPI_UNSIGNED_CHAR, rank+1, 0, comm, &status);
} }
MPI_Barrier(comm); MPI_Barrier(comm);
start = MPI_Wtime();
// Compute the next grid
next(&local_game, halo_above, halo_below); next(&local_game, halo_above, halo_below);
end = MPI_Wtime();
time_computing_life += end-start;
} }
if (args.log_each_step) { if (args.log_each_step) {
if (rank == 0) { // If we are logging each step, perform IO operations
global_data = malloc(sizeof(unsigned char) * grid_size); // Gather all of the local grids into global_data
memset(global_data, 0, sizeof(unsigned char) * grid_size);
}
MPI_Gatherv(local_game.grid, data_counts[rank], MPI_UNSIGNED_CHAR, global_data, data_counts, displacements, MPI_UNSIGNED_CHAR, 0, comm); MPI_Gatherv(local_game.grid, data_counts[rank], MPI_UNSIGNED_CHAR, global_data, data_counts, displacements, MPI_UNSIGNED_CHAR, 0, comm);
if (rank == 0) { if (rank == 0) {
#ifdef VERBOSE #if VERBOSE == 1
printf("\n===Iteration %i===\n", i); printf("\n===Iteration %i===\n", i);
// Print the baord without the padding elements
for (int y = args.padding; y < args.height+args.padding; y++) { for (int y = args.padding; y < args.height+args.padding; y++) {
for (int x = args.padding; x < args.width+args.padding; x++) { for (int x = args.padding; x < args.width+args.padding; x++) {
printf("%s ", global_data[y*(args.width+2*args.padding) + x] ? "X" : " "); printf("%s ", global_data[y*(args.width+2*args.padding) + x] ? "X" : " ");
@ -183,6 +212,7 @@ void simulate(int argc, char** argv) {
printf("===End iteration %i===\n", i); printf("===End iteration %i===\n", i);
#endif #endif
// Save to a file
struct GAME global_game; struct GAME global_game;
global_game.grid = global_data; global_game.grid = global_data;
global_game.width = args.width; global_game.width = args.width;
@ -194,12 +224,15 @@ void simulate(int argc, char** argv) {
} }
} }
double totalEnd = MPI_Wtime(); double total_end = MPI_Wtime();
MPI_Finalize();
if (rank == 0) { if (rank == 0) {
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", timeComputingLife, (totalEnd - totalStart)); printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", time_computing_life, (total_end - global_start));
free(local_game.grid);
free(data_counts);
free(halo_above);
free(halo_below);
} }
MPI_Finalize();
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 33.832562
Clock time: 37.939663

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 21716 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 75.141736
Clock time: 83.149478

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 21837 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 132.636661
Clock time: 145.001708

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.383001
Clock time: 4.113476

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.793952
Clock time: 9.832794

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 19.270078
Clock time: 21.813069

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.833550
Clock time: 6.323680

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.347700
Clock time: 9.178630

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 23209 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.483342
Clock time: 17.330302

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 23290 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.198089
Clock time: 2.217166

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.735509
Clock time: 2.513034

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.617002
Clock time: 4.091923

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.106571
Clock time: 7.500836

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 3.445883
Clock time: 11.167682

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.741983
Clock time: 16.777514

View File

@ -0,0 +1,8 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 34784 RUNNING AT kp160
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.301682
Clock time: 28.791425

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.145483
Clock time: 2.572587

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.570992
Clock time: 3.899400

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.215016
Clock time: 5.047125

View File

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.414322
Clock time: 9.439315

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.171989
Clock time: 13.927639

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 3.133675
Clock time: 19.271850

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.398371
Clock time: 25.650748

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 5.639865
Clock time: 33.529967

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.100765
Clock time: 2.412458

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.465147
Clock time: 3.942927

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.815429
Clock time: 5.642879

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.467197
Clock time: 11.707533

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 22126 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 18.823087
Clock time: 26.449810

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 22197 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 33.274214
Clock time: 45.841294

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.599813
Clock time: 2.807879

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.212790
Clock time: 4.133439

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.830949
Clock time: 6.854574

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.226861
Clock time: 7.517444

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 22852 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 9.416485
Clock time: 16.706325

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 12.741221
Clock time: 22.281683

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 16.578412
Clock time: 26.921717

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.296146
Clock time: 2.211905

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.111486
Clock time: 2.710176

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.419305
Clock time: 4.675962

View File

@ -0,0 +1,11 @@
mkdir: cannot create directory timing-study: File exists
[proxy:0:0@kp013] HYD_pmcd_pmip_control_cmd_cb (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmip_cb.c:887): assert (!closed) failed
[proxy:0:0@kp013] HYDT_dmxu_poll_wait_for_event (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/demux/demux_poll.c:76): callback returned error status
[proxy:0:0@kp013] main (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmip.c:202): demux engine error waiting for event
srun: error: kp013: task 0: Exited with exit code 7
[mpiexec@kp013] HYDT_bscu_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/bootstrap/utils/bscu_wait.c:76): one of the processes terminated badly; aborting
[mpiexec@kp013] HYDT_bsci_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/bootstrap/src/bsci_wait.c:23): launcher returned error waiting for completion
[mpiexec@kp013] HYD_pmci_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c:218): launcher returned error waiting for completion
[mpiexec@kp013] main (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/ui/mpich/mpiexec.c:340): process manager error waiting for completion
srun: error: Unable to create step for job 10870703: Job/step already completing or completed
slurmstepd: error: *** JOB 10870703 ON kp013 CANCELLED AT 2021-12-08T01:29:02 DUE TO TIME LIMIT ***

View File

@ -0,0 +1,24 @@
#!/bin/bash
#SBATCH --time=0:10:00 # walltime, abbreviated by -t
#SBATCH --nodes=2 # number of cluster nodes, abbreviated by -N
#SBATCH -o slurm-%j.out-%N # name of the stdout, using the job number (%j) and the first node (%N)
#SBATCH -e slurm-%j.err-%N # name of the stderr, using job and first node values
#SBATCH --ntasks=24 # number of MPI tasks, abbreviated by -n
# additional information for allocated clusters
#SBATCH --account=usucs5030 # account - abbreviated by -A
#SBATCH --partition=kingspeak # partition, abbreviated by -p
cd $HOME/gol/mpi
mkdir timing-study
module load intel mpich
iterations=1000
for cores in 1 4 8 12 16 20 #24
do
for size in 250 500 750 1000 1250 1500 1750 2000
do
mpirun -np $cores ./gol simulate random $size $size $iterations 1 > timing-study/output-$cores-$iterations-$size.txt
done
done

View File

@ -1,5 +1,6 @@
#include "create_grid.h" #include "create_grid.h"
// Print entirety of a grid to verify input
void print_grid(struct GAME* game) { void print_grid(struct GAME* game) {
printf("\n===GRID===\n"); printf("\n===GRID===\n");
for (int y = 0; y < game->height; y++) { for (int y = 0; y < game->height; y++) {
@ -10,6 +11,7 @@ void print_grid(struct GAME* game) {
} }
} }
// Go through user input
void create_grid(int argc, char** argv) { void create_grid(int argc, char** argv) {
char* filename; char* filename;
struct GAME game; struct GAME game;

View File

@ -1,5 +1,6 @@
#include "file.h" #include "file.h"
// Read a grid from a binary file into the space without padding
void read_in(char* filename, struct GAME* game) { void read_in(char* filename, struct GAME* game) {
FILE* file = fopen(filename, "rb"); FILE* file = fopen(filename, "rb");
for (int i = game->padding; i < game->height+game->padding; i++) { for (int i = game->padding; i < game->height+game->padding; i++) {
@ -8,6 +9,7 @@ void read_in(char* filename, struct GAME* game) {
fclose(file); fclose(file);
} }
// Write a grid to a binary file into the space without padding
void write_out(char* filename, struct GAME* game) { void write_out(char* filename, struct GAME* game) {
FILE* file = fopen(filename, "w+"); FILE* file = fopen(filename, "w+");
for (int i = game->padding; i < game->height+game->padding; i++) { for (int i = game->padding; i < game->height+game->padding; i++) {

View File

@ -1,5 +1,6 @@
#include "game.h" #include "game.h"
// Calculate the number of live neighbors a cell has
int neighbors(struct GAME* game, int x, int y) { int neighbors(struct GAME* game, int x, int y) {
int n = 0; int n = 0;
for (int dy = -1; dy <= 1; dy++) { for (int dy = -1; dy <= 1; dy++) {
@ -15,6 +16,7 @@ int neighbors(struct GAME* game, int x, int y) {
return n; return n;
} }
// Compute the next iteration of a board
void next(struct GAME* game, int threads) { void next(struct GAME* game, int threads) {
unsigned char** newGrid = malloc(sizeof(unsigned char*) * (game->height+(game->padding*2))); unsigned char** newGrid = malloc(sizeof(unsigned char*) * (game->height+(game->padding*2)));
int y,x,i,size; int y,x,i,size;
@ -30,10 +32,12 @@ void next(struct GAME* game, int threads) {
#pragma omp parallel num_threads(threads) shared(per_thread, threads, total_width, total_height, newGrid, game) private(y,x,i) #pragma omp parallel num_threads(threads) shared(per_thread, threads, total_width, total_height, newGrid, game) private(y,x,i)
{ {
// Each thread gets a number of cells to compute
int me = omp_get_thread_num(); int me = omp_get_thread_num();
int thread_start = per_thread * me; int thread_start = per_thread * me;
int thread_end = thread_start + per_thread + (me == threads-1 ? (total_width*total_height) % per_thread : 0); int thread_end = thread_start + per_thread + (me == threads-1 ? (total_width*total_height) % per_thread : 0);
for (i = thread_start; i < thread_end; i++) { for (i = thread_start; i < thread_end; i++) {
// Iterate through each cell assigned for this thread
y = i / total_width; y = i / total_width;
x = i % total_width; x = i % total_width;
int my_neighbors = neighbors(game, x, y); int my_neighbors = neighbors(game, x, y);
@ -55,6 +59,7 @@ void next(struct GAME* game, int threads) {
game->grid = newGrid; game->grid = newGrid;
} }
//Rnadomly assign life value to each cell
void randomize(struct GAME* game) { void randomize(struct GAME* game) {
for (int y = game->padding; y < game->height+game->padding; y++) { for (int y = game->padding; y < game->height+game->padding; y++) {
for (int x = game->padding; x < game->width+game->padding; x++) { for (int x = game->padding; x < game->width+game->padding; x++) {

View File

@ -19,6 +19,7 @@
//#define VERBOSE 1 //#define VERBOSE 1
#define SEED 100 #define SEED 100
// Do the simulation
void simulate(int argc, char** argv) { void simulate(int argc, char** argv) {
srand(SEED); srand(SEED);
char* filename; char* filename;
@ -26,6 +27,7 @@ void simulate(int argc, char** argv) {
game.padding = PADDING; game.padding = PADDING;
int iterations, log_each_step, threads; int iterations, log_each_step, threads;
if (argc == 8) { if (argc == 8) {
// Parse the arguments
filename = argv[2]; filename = argv[2];
game.width = atoi(argv[3]); game.width = atoi(argv[3]);
game.height = atoi(argv[4]); game.height = atoi(argv[4]);
@ -51,6 +53,7 @@ void simulate(int argc, char** argv) {
memset(game.grid[i], 0, game.width+(2*game.padding)); memset(game.grid[i], 0, game.width+(2*game.padding));
} }
// Choose where to read initial position
if (strcmp(filename, "random") == 0) { if (strcmp(filename, "random") == 0) {
randomize(&game); randomize(&game);
} else { } else {
@ -62,16 +65,19 @@ void simulate(int argc, char** argv) {
double start, end; double start, end;
for (int i = 0; i <= iterations; i++) { for (int i = 0; i <= iterations; i++) {
// Iteration 0 will just be the initial grid
if (i > 0) { if (i > 0) {
// Iteration 0 is just the input board
start = omp_get_wtime(); start = omp_get_wtime();
// Compute the next grid with threads
next(&game, threads); next(&game, threads);
end = omp_get_wtime(); end = omp_get_wtime();
time_computing_life += ((double) (end - start)); time_computing_life += ((double) (end - start));
} }
if (log_each_step) { if (log_each_step) {
// If we are logging each step, perform IO operations
#if VERBOSE == 1 #if VERBOSE == 1
printf("\n===Iteration %i===\n", i); printf("\n===Iteration %i===\n", i);
// Print the board without the padding elements
for (int y = game.padding; y < game.height+game.padding; y++) { for (int y = game.padding; y < game.height+game.padding; y++) {
for (int x = game.padding; x < game.width+game.padding; x++) { for (int x = game.padding; x < game.width+game.padding; x++) {
printf("%s ", game.grid[y][x] ? "X" : " "); printf("%s ", game.grid[y][x] ? "X" : " ");
@ -80,6 +86,7 @@ void simulate(int argc, char** argv) {
} }
printf("===End iteration %i===\n", i); printf("===End iteration %i===\n", i);
#endif #endif
// Save to a file
sprintf(iteration_file, "output/iteration-%07d.bin", i); sprintf(iteration_file, "output/iteration-%07d.bin", i);
write_out(iteration_file, &game); write_out(iteration_file, &game);
} }

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 73.312715
Clock time: 77.450210

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 113.646203
Clock time: 118.646829

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 163.034248
Clock time: 171.017339

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 220.656360
Clock time: 231.050593

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 287.698970
Clock time: 300.176503

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 5.088550
Clock time: 5.963468

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 18.956314
Clock time: 20.440567

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 41.660313
Clock time: 44.581177

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 7.026118
Clock time: 10.251363

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 10.801676
Clock time: 15.900482

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 25.918769
Clock time: 34.562182

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 25.862278
Clock time: 34.828966

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 30.705054
Clock time: 43.042410

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.860293
Clock time: 3.331446

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.097878
Clock time: 3.643646

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.321400
Clock time: 6.663178

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 9.131005
Clock time: 12.449032

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 13.434282
Clock time: 18.116181

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 16.706991
Clock time: 24.712374

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 23.733395
Clock time: 33.306681

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 30.429469
Clock time: 42.369926

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.738930
Clock time: 3.383995

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.352756
Clock time: 3.601057

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 5.147589
Clock time: 7.427564

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 7.390921
Clock time: 11.239737

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 11.254777
Clock time: 16.136264

Some files were not shown because too many files have changed in this diff Show More