Timing study

This commit is contained in:
Logan Hunt 2021-12-08 01:50:12 -07:00
parent 253b267f1c
commit c846568cf2
152 changed files with 755 additions and 46 deletions

Binary file not shown.

View File

@ -1,11 +1,12 @@
#include "game.cuh"
// Count the number of life neighbors a cell has
__device__ int neighbors(struct GAME game, int x, int y) {
int n = 0;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
if (!(dx == 0 && dy == 0) && (x+dx) >= 0 && (y+dy) >= 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) {
if (!(dx == 0 && dy == 0) && (x+dx) > 0 && (y+dy) > 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) {
if (game.grid[(y+dy) * (game.width+game.padding*2) + (x+dx)]) {
n++;
}
@ -15,11 +16,14 @@ __device__ int neighbors(struct GAME game, int x, int y) {
return n;
}
// Compute the next iteration of a board
// We have to give it the newGrid as a parameter otherwise
// each block will be computing its own version of the next grid
__global__ void next(struct GAME game, unsigned char* newGrid) {
int idy = blockDim.y * blockIdx.y + threadIdx.y;
int idx = blockDim.x * blockIdx.x + threadIdx.x;
if (idy <= game.height+game.padding*2 && idx <= game.width+game.padding*2) {
if (idy < game.height+game.padding*2 && idx < game.width+game.padding*2) {
int my_neighbors = neighbors(game, idx, idy);
int my_coord = idy * (game.width+game.padding*2) + idx;
newGrid[my_coord] = 0; // It's possible that there are artifacts from the last iteration
@ -37,6 +41,7 @@ __global__ void next(struct GAME game, unsigned char* newGrid) {
}
}
// Randomly assign life value to each cell
void randomize(struct GAME* game) {
for (int y = game->padding; y < game->height+game->padding; y++) {
for (int x = game->padding; x < game->width+game->padding; x++) {

View File

@ -18,7 +18,7 @@
*/
#define BLOCK 32
#define PADDING 10
#define VERBOSE 1
//#define VERBOSE 1
#define SEED 100
// gpuErrchk source: https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
@ -33,14 +33,16 @@ true) {
}
}
// Do the simulation
void simulate(int argc, char** argv) {
srand(SEED);
clock_t totalStart = clock();
clock_t global_start = clock();
char* filename;
struct GAME game;
game.padding = PADDING;
int iterations, log_each_step;
if (argc == 7) {
// Parse the arguments
filename = argv[2];
game.width = atoi(argv[3]);
game.height = atoi(argv[4]);
@ -60,6 +62,7 @@ void simulate(int argc, char** argv) {
game.grid = (unsigned char*)malloc(size);
memset(game.grid, 0, size);
// Choose where to read initial position
if (strcmp(filename, "random") == 0) {
randomize(&game);
} else {
@ -68,43 +71,53 @@ void simulate(int argc, char** argv) {
char iteration_file[1024];
// Allocate device memory
unsigned char* grid_d;
unsigned char* newGrid;
gpuErrchk(cudaMalloc(&grid_d, size));
gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice));
gpuErrchk(cudaMalloc(&newGrid, size));
gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice)); // Copy the initial grid to the device
free(game.grid);
game.grid = grid_d; // Use the device copy
// The grid that we will copy results
unsigned char* grid_h = (unsigned char*)malloc(size);
unsigned char* temp;
game.grid = grid_d;
int grid_num = (int)ceil((game.width+(2*game.padding))/(float)BLOCK);
dim3 dim_grid(grid_num, grid_num, 1);
// Calculate grid width for kernel
int grid_width = (int)ceil((game.width+(2*game.padding))/(float)BLOCK);
int grid_height = (int)ceil((game.height+(2*game.padding))/(float)BLOCK);
dim3 dim_grid(grid_width, grid_height, 1);
dim3 dim_block(BLOCK, BLOCK, 1);
cudaEvent_t startLife, stopLife;
cudaEventCreate(&startLife);
cudaEventCreate(&stopLife);
double timeComputingLife = 0;
float localTime = 0;
// Timing
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);
double time_computing_life = 0;
float local_time = 0;
for (int i = 0; i <= iterations; i++) {
// Iteration 0 will just be the initial grid
if (i > 0) {
cudaEventRecord(startLife);
cudaEventRecord(start);
// Compute the next grid
next<<<dim_grid, dim_block>>>(game, newGrid);
cudaEventRecord(stopLife);
cudaEventSynchronize(stopLife);
cudaEventElapsedTime(&localTime, startLife, stopLife);
timeComputingLife += localTime/1000;
cudaEventRecord(end);
cudaEventSynchronize(end);
cudaEventElapsedTime(&local_time, start, end);
time_computing_life += local_time/1000;
// Swap game.grid and newGrid
temp = game.grid;
game.grid = newGrid;
newGrid = temp;
}
if (log_each_step) {
// If we are logging each step, perform IO operations
gpuErrchk(cudaMemcpy(grid_h, game.grid, size, cudaMemcpyDeviceToHost));
#ifdef VERBOSE
// Print the board without the padding elements
printf("\n===Iteration %i===\n", i);
for (int y = game.padding; y < game.height+game.padding; y++) {
for (int x = game.padding; x < game.width+game.padding; x++) {
@ -114,6 +127,7 @@ void simulate(int argc, char** argv) {
}
printf("===End iteration %i===\n", i);
#endif
// Save to a file
sprintf(iteration_file, "output/iteration-%07d.bin", i);
temp = game.grid;
game.grid = grid_h;
@ -122,13 +136,7 @@ void simulate(int argc, char** argv) {
}
}
clock_t totalEnd = clock();
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", timeComputingLife, ((double)totalEnd - (double)totalStart)/CLOCKS_PER_SEC);
cudaFree(&newGrid);
cudaFree(&grid_d);
cudaFree(&game.grid);
free(grid_h);
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", time_computing_life, ((double)clock() - (double)global_start)/CLOCKS_PER_SEC);
}
int main(int argc, char** argv) {

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.169687
Clock time: 1.560000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.254989
Clock time: 2.240000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.354361
Clock time: 3.050000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.480174
Clock time: 4.070000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.619636
Clock time: 5.220000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.029867
Clock time: 0.330000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.059907
Clock time: 0.540000

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.110954
Clock time: 1.000000

View File

@ -0,0 +1,18 @@
#!/bin/bash
#SBATCH --time=0:30:00 # walltime, abbreviated by -t
#SBATCH --nodes=1 # number of cluster nodes, abbreviated by -N
#SBATCH -o slurm-%j.out-%N # name of the stdout, using the job number (%j) and the first node (%N)
#SBATCH -e slurm-%j.err-%N # name of the stderr, using job and first node values
#SBATCH --ntasks=1 # number of MPI tasks, abbreviated by -n
# additional information for allocated clusters
#SBATCH --account=notchpeak-shared-short # account - abbreviated by -A
#SBATCH --partition=notchpeak-shared-short # partition, abbreviated by -p
#SBATCH --gres=gpu:k80:1
cd $HOME/gol/cuda-global
iterations=1000
for size in 250 500 750 1000 1250 1500 1750 2000
do
srun ./gol simulate random $size $size $iterations 1 > timing-study/output-$cores-$iterations-$size.txt
done

BIN
mpi/gol

Binary file not shown.

View File

@ -15,10 +15,12 @@
Any live cell with more than three live neighbors dies (overpopulation).
Any dead cell with exactly three live neighbors becomes a live cell (reproduction).
*/
#define PADDING 16
#define PADDING 10
//#define VERBOSE 1
#define SEED 100
// A structure to keep the global arguments because each process
// will use its own GAME structure
struct Args {
int process_count;
int iterations;
@ -30,6 +32,7 @@ struct Args {
int data_per_proc;
};
// Make a datatype out of an Args struct
void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) {
int blocks[8] = {1,1,1,1,1,1,1,1};
MPI_Aint displacements[8];
@ -50,6 +53,7 @@ void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) {
MPI_Bcast(args, 1, arg_t, 0, comm);
}
// Scatter the grid among nodes
void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, int rank, int* data_counts, int* displacements, char* filename) {
unsigned char* data;
@ -63,12 +67,14 @@ void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, i
data = malloc(size);
memset(data, 0, size);
game.grid = data;
// Choose where to read initial position
if (strcmp(filename, "random") == 0) {
randomize(&game);
} else {
read_in(filename, &game);
}
}
// Do the scatter (some nodes may work on more rows)
MPI_Scatterv(data, data_counts, displacements, MPI_UNSIGNED_CHAR, local_data, data_counts[rank], MPI_UNSIGNED_CHAR, 0, comm);
if (rank == 0) {
@ -77,12 +83,13 @@ void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, i
}
// Do the simulation
void simulate(int argc, char** argv) {
srand(SEED);
double totalStart = MPI_Wtime();
struct Args args;
args.padding = PADDING;
// Initialize MPI stuff
int rank, process_count;
MPI_Comm comm;
MPI_Init(&argc, &argv);
@ -91,7 +98,9 @@ void simulate(int argc, char** argv) {
MPI_Comm_size(comm, &args.process_count);
char* filename;
double global_start;
if (rank == 0) {
// Parse the arguments
if (argc == 7) {
filename = argv[2];
args.width = atoi(argv[3]);
@ -99,7 +108,7 @@ void simulate(int argc, char** argv) {
args.iterations = atoi(argv[5]);
args.log_each_step = atoi(argv[6]);
} else {
printf("Usage: ./gol simulate <filename | random> <width> <height> <iterations> <log-each-step?1:0> <block-size>\n");
printf("Usage: ./gol simulate <filename | random> <width> <height> <iterations> <log-each-step?1:0>\n");
filename = "random";
args.height = 5;
args.width = 5;
@ -107,12 +116,17 @@ void simulate(int argc, char** argv) {
args.log_each_step = 0;
}
global_start = MPI_Wtime();
// Figure out how much work the average node will be doing
args.rows_per_proc = (args.height + args.padding*2)/args.process_count;
args.data_per_proc = args.rows_per_proc * (args.width + args.padding*2);
}
broadcast_and_receive_input(comm, &args);
// Calculate the exact work each thread will do and arguments for
// the Scatterv to scatter the grid
int grid_size = ((args.width + args.padding*2)*(args.height + args.padding*2));
int* data_counts = malloc(sizeof(int) * args.process_count);
int* displacements = malloc(sizeof(int) * args.process_count);
@ -123,19 +137,20 @@ void simulate(int argc, char** argv) {
data_counts[args.process_count-1] += grid_size % (args.data_per_proc * args.process_count);
unsigned char* local_data = malloc(data_counts[rank]*sizeof(unsigned char));
memset(local_data, 0, sizeof(unsigned char) * data_counts[rank]);
// Scatter the data among nodes
scatter_data(comm, &args, local_data, rank, data_counts, displacements, filename);
// Allocate space for current grid (1 byte per tile)
char iteration_file[1024];
double timeComputingLife = 0;
float localTime = 0;
// Local_game is our current job
struct GAME local_game;
local_game.grid = local_data;
local_game.width = args.width;
local_game.height = data_counts[rank] / (args.width + args.padding*2);
local_game.padding = args.padding;
// Assign halo elements to send to be received from above and below nodes
unsigned char* halo_above = NULL;
unsigned char* halo_below = NULL;
if (rank > 0) {
@ -148,32 +163,46 @@ void simulate(int argc, char** argv) {
}
unsigned char* global_data;
if (rank == 0) {
global_data = malloc(sizeof(unsigned char) * grid_size);
memset(global_data, 0, sizeof(unsigned char) * grid_size);
}
// Timing code
double time_computing_life = 0;
double start,end;
for (int i = 0; i <= args.iterations; i++) {
// Iteration 0 will just be the initial grid
if (i > 0) {
int total_width = args.width + args.padding*2;
MPI_Status status;
if (rank < args.process_count - 1) {
MPI_Send(&local_game.grid[(local_game.height-1) * total_width], total_width, MPI_UNSIGNED_CHAR, rank+1, 1, comm);
}
if (rank > 0) {
MPI_Recv(halo_above, total_width, MPI_UNSIGNED_CHAR, rank-1, 1, comm, NULL);
MPI_Recv(halo_above, total_width, MPI_UNSIGNED_CHAR, rank-1, 1, comm, &status);
MPI_Send(&local_game.grid[0], total_width, MPI_UNSIGNED_CHAR, rank-1, 0, comm);
}
if (rank < args.process_count - 1) {
MPI_Recv(halo_below, total_width, MPI_UNSIGNED_CHAR, rank+1, 0, comm, NULL);
MPI_Recv(halo_below, total_width, MPI_UNSIGNED_CHAR, rank+1, 0, comm, &status);
}
MPI_Barrier(comm);
start = MPI_Wtime();
// Compute the next grid
next(&local_game, halo_above, halo_below);
end = MPI_Wtime();
time_computing_life += end-start;
}
if (args.log_each_step) {
if (rank == 0) {
global_data = malloc(sizeof(unsigned char) * grid_size);
memset(global_data, 0, sizeof(unsigned char) * grid_size);
}
// If we are logging each step, perform IO operations
// Gather all of the local grids into global_data
MPI_Gatherv(local_game.grid, data_counts[rank], MPI_UNSIGNED_CHAR, global_data, data_counts, displacements, MPI_UNSIGNED_CHAR, 0, comm);
if (rank == 0) {
#ifdef VERBOSE
#if VERBOSE == 1
printf("\n===Iteration %i===\n", i);
// Print the baord without the padding elements
for (int y = args.padding; y < args.height+args.padding; y++) {
for (int x = args.padding; x < args.width+args.padding; x++) {
printf("%s ", global_data[y*(args.width+2*args.padding) + x] ? "X" : " ");
@ -183,6 +212,7 @@ void simulate(int argc, char** argv) {
printf("===End iteration %i===\n", i);
#endif
// Save to a file
struct GAME global_game;
global_game.grid = global_data;
global_game.width = args.width;
@ -194,12 +224,15 @@ void simulate(int argc, char** argv) {
}
}
double totalEnd = MPI_Wtime();
MPI_Finalize();
double total_end = MPI_Wtime();
if (rank == 0) {
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", timeComputingLife, (totalEnd - totalStart));
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", time_computing_life, (total_end - global_start));
free(local_game.grid);
free(data_counts);
free(halo_above);
free(halo_below);
}
MPI_Finalize();
}
int main(int argc, char** argv) {

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 33.832562
Clock time: 37.939663

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 21716 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 75.141736
Clock time: 83.149478

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 21837 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 132.636661
Clock time: 145.001708

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.383001
Clock time: 4.113476

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.793952
Clock time: 9.832794

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 19.270078
Clock time: 21.813069

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.833550
Clock time: 6.323680

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.347700
Clock time: 9.178630

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 23209 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.483342
Clock time: 17.330302

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 23290 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.198089
Clock time: 2.217166

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.735509
Clock time: 2.513034

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.617002
Clock time: 4.091923

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.106571
Clock time: 7.500836

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 3.445883
Clock time: 11.167682

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.741983
Clock time: 16.777514

View File

@ -0,0 +1,8 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 34784 RUNNING AT kp160
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.301682
Clock time: 28.791425

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.145483
Clock time: 2.572587

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.570992
Clock time: 3.899400

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.215016
Clock time: 5.047125

View File

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.414322
Clock time: 9.439315

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.171989
Clock time: 13.927639

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 3.133675
Clock time: 19.271850

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.398371
Clock time: 25.650748

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 5.639865
Clock time: 33.529967

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.100765
Clock time: 2.412458

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.465147
Clock time: 3.942927

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.815429
Clock time: 5.642879

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 8.467197
Clock time: 11.707533

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 22126 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 18.823087
Clock time: 26.449810

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 22197 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 33.274214
Clock time: 45.841294

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.599813
Clock time: 2.807879

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.212790
Clock time: 4.133439

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.830949
Clock time: 6.854574

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.226861
Clock time: 7.517444

View File

@ -0,0 +1,11 @@
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 22852 RUNNING AT kp013
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 9.416485
Clock time: 16.706325

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 12.741221
Clock time: 22.281683

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 16.578412
Clock time: 26.921717

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.296146
Clock time: 2.211905

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 1.111486
Clock time: 2.710176

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.419305
Clock time: 4.675962

View File

@ -0,0 +1,11 @@
mkdir: cannot create directory timing-study: File exists
[proxy:0:0@kp013] HYD_pmcd_pmip_control_cmd_cb (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmip_cb.c:887): assert (!closed) failed
[proxy:0:0@kp013] HYDT_dmxu_poll_wait_for_event (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/demux/demux_poll.c:76): callback returned error status
[proxy:0:0@kp013] main (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmip.c:202): demux engine error waiting for event
srun: error: kp013: task 0: Exited with exit code 7
[mpiexec@kp013] HYDT_bscu_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/bootstrap/utils/bscu_wait.c:76): one of the processes terminated badly; aborting
[mpiexec@kp013] HYDT_bsci_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/bootstrap/src/bsci_wait.c:23): launcher returned error waiting for completion
[mpiexec@kp013] HYD_pmci_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c:218): launcher returned error waiting for completion
[mpiexec@kp013] main (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/ui/mpich/mpiexec.c:340): process manager error waiting for completion
srun: error: Unable to create step for job 10870703: Job/step already completing or completed
slurmstepd: error: *** JOB 10870703 ON kp013 CANCELLED AT 2021-12-08T01:29:02 DUE TO TIME LIMIT ***

View File

@ -0,0 +1,24 @@
#!/bin/bash
#SBATCH --time=0:10:00 # walltime, abbreviated by -t
#SBATCH --nodes=2 # number of cluster nodes, abbreviated by -N
#SBATCH -o slurm-%j.out-%N # name of the stdout, using the job number (%j) and the first node (%N)
#SBATCH -e slurm-%j.err-%N # name of the stderr, using job and first node values
#SBATCH --ntasks=24 # number of MPI tasks, abbreviated by -n
# additional information for allocated clusters
#SBATCH --account=usucs5030 # account - abbreviated by -A
#SBATCH --partition=kingspeak # partition, abbreviated by -p
cd $HOME/gol/mpi
mkdir timing-study
module load intel mpich
iterations=1000
for cores in 1 4 8 12 16 20 #24
do
for size in 250 500 750 1000 1250 1500 1750 2000
do
mpirun -np $cores ./gol simulate random $size $size $iterations 1 > timing-study/output-$cores-$iterations-$size.txt
done
done

View File

@ -1,5 +1,6 @@
#include "create_grid.h"
// Print entirety of a grid to verify input
void print_grid(struct GAME* game) {
printf("\n===GRID===\n");
for (int y = 0; y < game->height; y++) {
@ -10,6 +11,7 @@ void print_grid(struct GAME* game) {
}
}
// Go through user input
void create_grid(int argc, char** argv) {
char* filename;
struct GAME game;

View File

@ -1,5 +1,6 @@
#include "file.h"
// Read a grid from a binary file into the space without padding
void read_in(char* filename, struct GAME* game) {
FILE* file = fopen(filename, "rb");
for (int i = game->padding; i < game->height+game->padding; i++) {
@ -8,6 +9,7 @@ void read_in(char* filename, struct GAME* game) {
fclose(file);
}
// Write a grid to a binary file into the space without padding
void write_out(char* filename, struct GAME* game) {
FILE* file = fopen(filename, "w+");
for (int i = game->padding; i < game->height+game->padding; i++) {

View File

@ -1,5 +1,6 @@
#include "game.h"
// Calculate the number of live neighbors a cell has
int neighbors(struct GAME* game, int x, int y) {
int n = 0;
for (int dy = -1; dy <= 1; dy++) {
@ -15,6 +16,7 @@ int neighbors(struct GAME* game, int x, int y) {
return n;
}
// Compute the next iteration of a board
void next(struct GAME* game, int threads) {
unsigned char** newGrid = malloc(sizeof(unsigned char*) * (game->height+(game->padding*2)));
int y,x,i,size;
@ -30,10 +32,12 @@ void next(struct GAME* game, int threads) {
#pragma omp parallel num_threads(threads) shared(per_thread, threads, total_width, total_height, newGrid, game) private(y,x,i)
{
// Each thread gets a number of cells to compute
int me = omp_get_thread_num();
int thread_start = per_thread * me;
int thread_end = thread_start + per_thread + (me == threads-1 ? (total_width*total_height) % per_thread : 0);
for (i = thread_start; i < thread_end; i++) {
// Iterate through each cell assigned for this thread
y = i / total_width;
x = i % total_width;
int my_neighbors = neighbors(game, x, y);
@ -55,6 +59,7 @@ void next(struct GAME* game, int threads) {
game->grid = newGrid;
}
//Rnadomly assign life value to each cell
void randomize(struct GAME* game) {
for (int y = game->padding; y < game->height+game->padding; y++) {
for (int x = game->padding; x < game->width+game->padding; x++) {

View File

@ -19,6 +19,7 @@
//#define VERBOSE 1
#define SEED 100
// Do the simulation
void simulate(int argc, char** argv) {
srand(SEED);
char* filename;
@ -26,6 +27,7 @@ void simulate(int argc, char** argv) {
game.padding = PADDING;
int iterations, log_each_step, threads;
if (argc == 8) {
// Parse the arguments
filename = argv[2];
game.width = atoi(argv[3]);
game.height = atoi(argv[4]);
@ -51,6 +53,7 @@ void simulate(int argc, char** argv) {
memset(game.grid[i], 0, game.width+(2*game.padding));
}
// Choose where to read initial position
if (strcmp(filename, "random") == 0) {
randomize(&game);
} else {
@ -62,16 +65,19 @@ void simulate(int argc, char** argv) {
double start, end;
for (int i = 0; i <= iterations; i++) {
// Iteration 0 will just be the initial grid
if (i > 0) {
// Iteration 0 is just the input board
start = omp_get_wtime();
// Compute the next grid with threads
next(&game, threads);
end = omp_get_wtime();
time_computing_life += ((double) (end - start));
}
if (log_each_step) {
// If we are logging each step, perform IO operations
#if VERBOSE == 1
printf("\n===Iteration %i===\n", i);
// Print the board without the padding elements
for (int y = game.padding; y < game.height+game.padding; y++) {
for (int x = game.padding; x < game.width+game.padding; x++) {
printf("%s ", game.grid[y][x] ? "X" : " ");
@ -80,6 +86,7 @@ void simulate(int argc, char** argv) {
}
printf("===End iteration %i===\n", i);
#endif
// Save to a file
sprintf(iteration_file, "output/iteration-%07d.bin", i);
write_out(iteration_file, &game);
}

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 73.312715
Clock time: 77.450210

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 113.646203
Clock time: 118.646829

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 163.034248
Clock time: 171.017339

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 220.656360
Clock time: 231.050593

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 287.698970
Clock time: 300.176503

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 5.088550
Clock time: 5.963468

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 18.956314
Clock time: 20.440567

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 41.660313
Clock time: 44.581177

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 7.026118
Clock time: 10.251363

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 10.801676
Clock time: 15.900482

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 25.918769
Clock time: 34.562182

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 25.862278
Clock time: 34.828966

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 30.705054
Clock time: 43.042410

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.860293
Clock time: 3.331446

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.097878
Clock time: 3.643646

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 4.321400
Clock time: 6.663178

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 9.131005
Clock time: 12.449032

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 13.434282
Clock time: 18.116181

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 16.706991
Clock time: 24.712374

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 23.733395
Clock time: 33.306681

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 30.429469
Clock time: 42.369926

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 0.738930
Clock time: 3.383995

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 2.352756
Clock time: 3.601057

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 5.147589
Clock time: 7.427564

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 7.390921
Clock time: 11.239737

View File

@ -0,0 +1,4 @@
===Timing===
Time computing life: 11.254777
Clock time: 16.136264

Some files were not shown because too many files have changed in this diff Show More