Timing study
This commit is contained in:
parent
253b267f1c
commit
c846568cf2
BIN
cuda-global/gol
BIN
cuda-global/gol
Binary file not shown.
@ -1,11 +1,12 @@
|
|||||||
#include "game.cuh"
|
#include "game.cuh"
|
||||||
|
|
||||||
|
// Count the number of life neighbors a cell has
|
||||||
__device__ int neighbors(struct GAME game, int x, int y) {
|
__device__ int neighbors(struct GAME game, int x, int y) {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
|
|
||||||
for (int dy = -1; dy <= 1; dy++) {
|
for (int dy = -1; dy <= 1; dy++) {
|
||||||
for (int dx = -1; dx <= 1; dx++) {
|
for (int dx = -1; dx <= 1; dx++) {
|
||||||
if (!(dx == 0 && dy == 0) && (x+dx) >= 0 && (y+dy) >= 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) {
|
if (!(dx == 0 && dy == 0) && (x+dx) > 0 && (y+dy) > 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) {
|
||||||
if (game.grid[(y+dy) * (game.width+game.padding*2) + (x+dx)]) {
|
if (game.grid[(y+dy) * (game.width+game.padding*2) + (x+dx)]) {
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
@ -15,11 +16,14 @@ __device__ int neighbors(struct GAME game, int x, int y) {
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute the next iteration of a board
|
||||||
|
// We have to give it the newGrid as a parameter otherwise
|
||||||
|
// each block will be computing its own version of the next grid
|
||||||
__global__ void next(struct GAME game, unsigned char* newGrid) {
|
__global__ void next(struct GAME game, unsigned char* newGrid) {
|
||||||
int idy = blockDim.y * blockIdx.y + threadIdx.y;
|
int idy = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
int idx = blockDim.x * blockIdx.x + threadIdx.x;
|
int idx = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
if (idy <= game.height+game.padding*2 && idx <= game.width+game.padding*2) {
|
if (idy < game.height+game.padding*2 && idx < game.width+game.padding*2) {
|
||||||
int my_neighbors = neighbors(game, idx, idy);
|
int my_neighbors = neighbors(game, idx, idy);
|
||||||
int my_coord = idy * (game.width+game.padding*2) + idx;
|
int my_coord = idy * (game.width+game.padding*2) + idx;
|
||||||
newGrid[my_coord] = 0; // It's possible that there are artifacts from the last iteration
|
newGrid[my_coord] = 0; // It's possible that there are artifacts from the last iteration
|
||||||
@ -37,6 +41,7 @@ __global__ void next(struct GAME game, unsigned char* newGrid) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Randomly assign life value to each cell
|
||||||
void randomize(struct GAME* game) {
|
void randomize(struct GAME* game) {
|
||||||
for (int y = game->padding; y < game->height+game->padding; y++) {
|
for (int y = game->padding; y < game->height+game->padding; y++) {
|
||||||
for (int x = game->padding; x < game->width+game->padding; x++) {
|
for (int x = game->padding; x < game->width+game->padding; x++) {
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
*/
|
*/
|
||||||
#define BLOCK 32
|
#define BLOCK 32
|
||||||
#define PADDING 10
|
#define PADDING 10
|
||||||
#define VERBOSE 1
|
//#define VERBOSE 1
|
||||||
#define SEED 100
|
#define SEED 100
|
||||||
|
|
||||||
// gpuErrchk source: https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
|
// gpuErrchk source: https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
|
||||||
@ -33,14 +33,16 @@ true) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Do the simulation
|
||||||
void simulate(int argc, char** argv) {
|
void simulate(int argc, char** argv) {
|
||||||
srand(SEED);
|
srand(SEED);
|
||||||
clock_t totalStart = clock();
|
clock_t global_start = clock();
|
||||||
char* filename;
|
char* filename;
|
||||||
struct GAME game;
|
struct GAME game;
|
||||||
game.padding = PADDING;
|
game.padding = PADDING;
|
||||||
int iterations, log_each_step;
|
int iterations, log_each_step;
|
||||||
if (argc == 7) {
|
if (argc == 7) {
|
||||||
|
// Parse the arguments
|
||||||
filename = argv[2];
|
filename = argv[2];
|
||||||
game.width = atoi(argv[3]);
|
game.width = atoi(argv[3]);
|
||||||
game.height = atoi(argv[4]);
|
game.height = atoi(argv[4]);
|
||||||
@ -60,6 +62,7 @@ void simulate(int argc, char** argv) {
|
|||||||
game.grid = (unsigned char*)malloc(size);
|
game.grid = (unsigned char*)malloc(size);
|
||||||
memset(game.grid, 0, size);
|
memset(game.grid, 0, size);
|
||||||
|
|
||||||
|
// Choose where to read initial position
|
||||||
if (strcmp(filename, "random") == 0) {
|
if (strcmp(filename, "random") == 0) {
|
||||||
randomize(&game);
|
randomize(&game);
|
||||||
} else {
|
} else {
|
||||||
@ -68,43 +71,53 @@ void simulate(int argc, char** argv) {
|
|||||||
|
|
||||||
char iteration_file[1024];
|
char iteration_file[1024];
|
||||||
|
|
||||||
|
// Allocate device memory
|
||||||
unsigned char* grid_d;
|
unsigned char* grid_d;
|
||||||
unsigned char* newGrid;
|
unsigned char* newGrid;
|
||||||
gpuErrchk(cudaMalloc(&grid_d, size));
|
gpuErrchk(cudaMalloc(&grid_d, size));
|
||||||
gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice));
|
|
||||||
gpuErrchk(cudaMalloc(&newGrid, size));
|
gpuErrchk(cudaMalloc(&newGrid, size));
|
||||||
|
gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice)); // Copy the initial grid to the device
|
||||||
|
free(game.grid);
|
||||||
|
game.grid = grid_d; // Use the device copy
|
||||||
|
|
||||||
|
// The grid that we will copy results
|
||||||
unsigned char* grid_h = (unsigned char*)malloc(size);
|
unsigned char* grid_h = (unsigned char*)malloc(size);
|
||||||
unsigned char* temp;
|
unsigned char* temp;
|
||||||
|
|
||||||
game.grid = grid_d;
|
// Calculate grid width for kernel
|
||||||
|
int grid_width = (int)ceil((game.width+(2*game.padding))/(float)BLOCK);
|
||||||
int grid_num = (int)ceil((game.width+(2*game.padding))/(float)BLOCK);
|
int grid_height = (int)ceil((game.height+(2*game.padding))/(float)BLOCK);
|
||||||
dim3 dim_grid(grid_num, grid_num, 1);
|
dim3 dim_grid(grid_width, grid_height, 1);
|
||||||
dim3 dim_block(BLOCK, BLOCK, 1);
|
dim3 dim_block(BLOCK, BLOCK, 1);
|
||||||
|
|
||||||
cudaEvent_t startLife, stopLife;
|
// Timing
|
||||||
cudaEventCreate(&startLife);
|
cudaEvent_t start, end;
|
||||||
cudaEventCreate(&stopLife);
|
cudaEventCreate(&start);
|
||||||
double timeComputingLife = 0;
|
cudaEventCreate(&end);
|
||||||
float localTime = 0;
|
double time_computing_life = 0;
|
||||||
|
float local_time = 0;
|
||||||
|
|
||||||
for (int i = 0; i <= iterations; i++) {
|
for (int i = 0; i <= iterations; i++) {
|
||||||
|
// Iteration 0 will just be the initial grid
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
cudaEventRecord(startLife);
|
cudaEventRecord(start);
|
||||||
|
// Compute the next grid
|
||||||
next<<<dim_grid, dim_block>>>(game, newGrid);
|
next<<<dim_grid, dim_block>>>(game, newGrid);
|
||||||
cudaEventRecord(stopLife);
|
cudaEventRecord(end);
|
||||||
cudaEventSynchronize(stopLife);
|
cudaEventSynchronize(end);
|
||||||
cudaEventElapsedTime(&localTime, startLife, stopLife);
|
cudaEventElapsedTime(&local_time, start, end);
|
||||||
timeComputingLife += localTime/1000;
|
time_computing_life += local_time/1000;
|
||||||
|
|
||||||
|
// Swap game.grid and newGrid
|
||||||
temp = game.grid;
|
temp = game.grid;
|
||||||
game.grid = newGrid;
|
game.grid = newGrid;
|
||||||
newGrid = temp;
|
newGrid = temp;
|
||||||
}
|
}
|
||||||
if (log_each_step) {
|
if (log_each_step) {
|
||||||
|
// If we are logging each step, perform IO operations
|
||||||
gpuErrchk(cudaMemcpy(grid_h, game.grid, size, cudaMemcpyDeviceToHost));
|
gpuErrchk(cudaMemcpy(grid_h, game.grid, size, cudaMemcpyDeviceToHost));
|
||||||
#ifdef VERBOSE
|
#ifdef VERBOSE
|
||||||
|
// Print the board without the padding elements
|
||||||
printf("\n===Iteration %i===\n", i);
|
printf("\n===Iteration %i===\n", i);
|
||||||
for (int y = game.padding; y < game.height+game.padding; y++) {
|
for (int y = game.padding; y < game.height+game.padding; y++) {
|
||||||
for (int x = game.padding; x < game.width+game.padding; x++) {
|
for (int x = game.padding; x < game.width+game.padding; x++) {
|
||||||
@ -114,6 +127,7 @@ void simulate(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
printf("===End iteration %i===\n", i);
|
printf("===End iteration %i===\n", i);
|
||||||
#endif
|
#endif
|
||||||
|
// Save to a file
|
||||||
sprintf(iteration_file, "output/iteration-%07d.bin", i);
|
sprintf(iteration_file, "output/iteration-%07d.bin", i);
|
||||||
temp = game.grid;
|
temp = game.grid;
|
||||||
game.grid = grid_h;
|
game.grid = grid_h;
|
||||||
@ -122,13 +136,7 @@ void simulate(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
clock_t totalEnd = clock();
|
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", time_computing_life, ((double)clock() - (double)global_start)/CLOCKS_PER_SEC);
|
||||||
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", timeComputingLife, ((double)totalEnd - (double)totalStart)/CLOCKS_PER_SEC);
|
|
||||||
|
|
||||||
cudaFree(&newGrid);
|
|
||||||
cudaFree(&grid_d);
|
|
||||||
cudaFree(&game.grid);
|
|
||||||
free(grid_h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
4
cuda-global/timing-study/output--1000-1000.txt
Normal file
4
cuda-global/timing-study/output--1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.169687
|
||||||
|
Clock time: 1.560000
|
4
cuda-global/timing-study/output--1000-1250.txt
Normal file
4
cuda-global/timing-study/output--1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.254989
|
||||||
|
Clock time: 2.240000
|
4
cuda-global/timing-study/output--1000-1500.txt
Normal file
4
cuda-global/timing-study/output--1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.354361
|
||||||
|
Clock time: 3.050000
|
4
cuda-global/timing-study/output--1000-1750.txt
Normal file
4
cuda-global/timing-study/output--1000-1750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.480174
|
||||||
|
Clock time: 4.070000
|
4
cuda-global/timing-study/output--1000-2000.txt
Normal file
4
cuda-global/timing-study/output--1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.619636
|
||||||
|
Clock time: 5.220000
|
4
cuda-global/timing-study/output--1000-250.txt
Normal file
4
cuda-global/timing-study/output--1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.029867
|
||||||
|
Clock time: 0.330000
|
4
cuda-global/timing-study/output--1000-500.txt
Normal file
4
cuda-global/timing-study/output--1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.059907
|
||||||
|
Clock time: 0.540000
|
4
cuda-global/timing-study/output--1000-750.txt
Normal file
4
cuda-global/timing-study/output--1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.110954
|
||||||
|
Clock time: 1.000000
|
0
cuda-global/timing-study/slurm-3610476.err-notch081
Normal file
0
cuda-global/timing-study/slurm-3610476.err-notch081
Normal file
0
cuda-global/timing-study/slurm-3610476.out-notch081
Normal file
0
cuda-global/timing-study/slurm-3610476.out-notch081
Normal file
0
cuda-global/timing-study/slurm-3611549.err-notch081
Normal file
0
cuda-global/timing-study/slurm-3611549.err-notch081
Normal file
0
cuda-global/timing-study/slurm-3611549.out-notch081
Normal file
0
cuda-global/timing-study/slurm-3611549.out-notch081
Normal file
18
cuda-global/timing-study/timing_study.sh
Executable file
18
cuda-global/timing-study/timing_study.sh
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH --time=0:30:00 # walltime, abbreviated by -t
|
||||||
|
#SBATCH --nodes=1 # number of cluster nodes, abbreviated by -N
|
||||||
|
#SBATCH -o slurm-%j.out-%N # name of the stdout, using the job number (%j) and the first node (%N)
|
||||||
|
#SBATCH -e slurm-%j.err-%N # name of the stderr, using job and first node values
|
||||||
|
#SBATCH --ntasks=1 # number of MPI tasks, abbreviated by -n
|
||||||
|
# additional information for allocated clusters
|
||||||
|
#SBATCH --account=notchpeak-shared-short # account - abbreviated by -A
|
||||||
|
#SBATCH --partition=notchpeak-shared-short # partition, abbreviated by -p
|
||||||
|
#SBATCH --gres=gpu:k80:1
|
||||||
|
|
||||||
|
cd $HOME/gol/cuda-global
|
||||||
|
|
||||||
|
iterations=1000
|
||||||
|
for size in 250 500 750 1000 1250 1500 1750 2000
|
||||||
|
do
|
||||||
|
srun ./gol simulate random $size $size $iterations 1 > timing-study/output-$cores-$iterations-$size.txt
|
||||||
|
done
|
@ -15,10 +15,12 @@
|
|||||||
Any live cell with more than three live neighbors dies (overpopulation).
|
Any live cell with more than three live neighbors dies (overpopulation).
|
||||||
Any dead cell with exactly three live neighbors becomes a live cell (reproduction).
|
Any dead cell with exactly three live neighbors becomes a live cell (reproduction).
|
||||||
*/
|
*/
|
||||||
#define PADDING 16
|
#define PADDING 10
|
||||||
//#define VERBOSE 1
|
//#define VERBOSE 1
|
||||||
#define SEED 100
|
#define SEED 100
|
||||||
|
|
||||||
|
// A structure to keep the global arguments because each process
|
||||||
|
// will use its own GAME structure
|
||||||
struct Args {
|
struct Args {
|
||||||
int process_count;
|
int process_count;
|
||||||
int iterations;
|
int iterations;
|
||||||
@ -30,6 +32,7 @@ struct Args {
|
|||||||
int data_per_proc;
|
int data_per_proc;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Make a datatype out of an Args struct
|
||||||
void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) {
|
void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) {
|
||||||
int blocks[8] = {1,1,1,1,1,1,1,1};
|
int blocks[8] = {1,1,1,1,1,1,1,1};
|
||||||
MPI_Aint displacements[8];
|
MPI_Aint displacements[8];
|
||||||
@ -50,6 +53,7 @@ void broadcast_and_receive_input(MPI_Comm comm, struct Args* args) {
|
|||||||
MPI_Bcast(args, 1, arg_t, 0, comm);
|
MPI_Bcast(args, 1, arg_t, 0, comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scatter the grid among nodes
|
||||||
void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, int rank, int* data_counts, int* displacements, char* filename) {
|
void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, int rank, int* data_counts, int* displacements, char* filename) {
|
||||||
unsigned char* data;
|
unsigned char* data;
|
||||||
|
|
||||||
@ -63,12 +67,14 @@ void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, i
|
|||||||
data = malloc(size);
|
data = malloc(size);
|
||||||
memset(data, 0, size);
|
memset(data, 0, size);
|
||||||
game.grid = data;
|
game.grid = data;
|
||||||
|
// Choose where to read initial position
|
||||||
if (strcmp(filename, "random") == 0) {
|
if (strcmp(filename, "random") == 0) {
|
||||||
randomize(&game);
|
randomize(&game);
|
||||||
} else {
|
} else {
|
||||||
read_in(filename, &game);
|
read_in(filename, &game);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Do the scatter (some nodes may work on more rows)
|
||||||
MPI_Scatterv(data, data_counts, displacements, MPI_UNSIGNED_CHAR, local_data, data_counts[rank], MPI_UNSIGNED_CHAR, 0, comm);
|
MPI_Scatterv(data, data_counts, displacements, MPI_UNSIGNED_CHAR, local_data, data_counts[rank], MPI_UNSIGNED_CHAR, 0, comm);
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
@ -77,12 +83,13 @@ void scatter_data(MPI_Comm comm, struct Args* args, unsigned char* local_data, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Do the simulation
|
||||||
void simulate(int argc, char** argv) {
|
void simulate(int argc, char** argv) {
|
||||||
srand(SEED);
|
srand(SEED);
|
||||||
double totalStart = MPI_Wtime();
|
|
||||||
struct Args args;
|
struct Args args;
|
||||||
args.padding = PADDING;
|
args.padding = PADDING;
|
||||||
|
|
||||||
|
// Initialize MPI stuff
|
||||||
int rank, process_count;
|
int rank, process_count;
|
||||||
MPI_Comm comm;
|
MPI_Comm comm;
|
||||||
MPI_Init(&argc, &argv);
|
MPI_Init(&argc, &argv);
|
||||||
@ -91,7 +98,9 @@ void simulate(int argc, char** argv) {
|
|||||||
MPI_Comm_size(comm, &args.process_count);
|
MPI_Comm_size(comm, &args.process_count);
|
||||||
|
|
||||||
char* filename;
|
char* filename;
|
||||||
|
double global_start;
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
|
// Parse the arguments
|
||||||
if (argc == 7) {
|
if (argc == 7) {
|
||||||
filename = argv[2];
|
filename = argv[2];
|
||||||
args.width = atoi(argv[3]);
|
args.width = atoi(argv[3]);
|
||||||
@ -99,7 +108,7 @@ void simulate(int argc, char** argv) {
|
|||||||
args.iterations = atoi(argv[5]);
|
args.iterations = atoi(argv[5]);
|
||||||
args.log_each_step = atoi(argv[6]);
|
args.log_each_step = atoi(argv[6]);
|
||||||
} else {
|
} else {
|
||||||
printf("Usage: ./gol simulate <filename | random> <width> <height> <iterations> <log-each-step?1:0> <block-size>\n");
|
printf("Usage: ./gol simulate <filename | random> <width> <height> <iterations> <log-each-step?1:0>\n");
|
||||||
filename = "random";
|
filename = "random";
|
||||||
args.height = 5;
|
args.height = 5;
|
||||||
args.width = 5;
|
args.width = 5;
|
||||||
@ -107,12 +116,17 @@ void simulate(int argc, char** argv) {
|
|||||||
args.log_each_step = 0;
|
args.log_each_step = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
global_start = MPI_Wtime();
|
||||||
|
|
||||||
|
// Figure out how much work the average node will be doing
|
||||||
args.rows_per_proc = (args.height + args.padding*2)/args.process_count;
|
args.rows_per_proc = (args.height + args.padding*2)/args.process_count;
|
||||||
args.data_per_proc = args.rows_per_proc * (args.width + args.padding*2);
|
args.data_per_proc = args.rows_per_proc * (args.width + args.padding*2);
|
||||||
}
|
}
|
||||||
|
|
||||||
broadcast_and_receive_input(comm, &args);
|
broadcast_and_receive_input(comm, &args);
|
||||||
|
|
||||||
|
// Calculate the exact work each thread will do and arguments for
|
||||||
|
// the Scatterv to scatter the grid
|
||||||
int grid_size = ((args.width + args.padding*2)*(args.height + args.padding*2));
|
int grid_size = ((args.width + args.padding*2)*(args.height + args.padding*2));
|
||||||
int* data_counts = malloc(sizeof(int) * args.process_count);
|
int* data_counts = malloc(sizeof(int) * args.process_count);
|
||||||
int* displacements = malloc(sizeof(int) * args.process_count);
|
int* displacements = malloc(sizeof(int) * args.process_count);
|
||||||
@ -123,19 +137,20 @@ void simulate(int argc, char** argv) {
|
|||||||
data_counts[args.process_count-1] += grid_size % (args.data_per_proc * args.process_count);
|
data_counts[args.process_count-1] += grid_size % (args.data_per_proc * args.process_count);
|
||||||
unsigned char* local_data = malloc(data_counts[rank]*sizeof(unsigned char));
|
unsigned char* local_data = malloc(data_counts[rank]*sizeof(unsigned char));
|
||||||
memset(local_data, 0, sizeof(unsigned char) * data_counts[rank]);
|
memset(local_data, 0, sizeof(unsigned char) * data_counts[rank]);
|
||||||
|
|
||||||
|
// Scatter the data among nodes
|
||||||
scatter_data(comm, &args, local_data, rank, data_counts, displacements, filename);
|
scatter_data(comm, &args, local_data, rank, data_counts, displacements, filename);
|
||||||
|
|
||||||
// Allocate space for current grid (1 byte per tile)
|
|
||||||
char iteration_file[1024];
|
char iteration_file[1024];
|
||||||
|
|
||||||
double timeComputingLife = 0;
|
// Local_game is our current job
|
||||||
float localTime = 0;
|
|
||||||
|
|
||||||
struct GAME local_game;
|
struct GAME local_game;
|
||||||
local_game.grid = local_data;
|
local_game.grid = local_data;
|
||||||
local_game.width = args.width;
|
local_game.width = args.width;
|
||||||
local_game.height = data_counts[rank] / (args.width + args.padding*2);
|
local_game.height = data_counts[rank] / (args.width + args.padding*2);
|
||||||
local_game.padding = args.padding;
|
local_game.padding = args.padding;
|
||||||
|
|
||||||
|
// Assign halo elements to send to be received from above and below nodes
|
||||||
unsigned char* halo_above = NULL;
|
unsigned char* halo_above = NULL;
|
||||||
unsigned char* halo_below = NULL;
|
unsigned char* halo_below = NULL;
|
||||||
if (rank > 0) {
|
if (rank > 0) {
|
||||||
@ -148,32 +163,46 @@ void simulate(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned char* global_data;
|
unsigned char* global_data;
|
||||||
|
if (rank == 0) {
|
||||||
|
global_data = malloc(sizeof(unsigned char) * grid_size);
|
||||||
|
memset(global_data, 0, sizeof(unsigned char) * grid_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timing code
|
||||||
|
double time_computing_life = 0;
|
||||||
|
double start,end;
|
||||||
|
|
||||||
for (int i = 0; i <= args.iterations; i++) {
|
for (int i = 0; i <= args.iterations; i++) {
|
||||||
|
// Iteration 0 will just be the initial grid
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
int total_width = args.width + args.padding*2;
|
int total_width = args.width + args.padding*2;
|
||||||
|
|
||||||
|
MPI_Status status;
|
||||||
if (rank < args.process_count - 1) {
|
if (rank < args.process_count - 1) {
|
||||||
MPI_Send(&local_game.grid[(local_game.height-1) * total_width], total_width, MPI_UNSIGNED_CHAR, rank+1, 1, comm);
|
MPI_Send(&local_game.grid[(local_game.height-1) * total_width], total_width, MPI_UNSIGNED_CHAR, rank+1, 1, comm);
|
||||||
}
|
}
|
||||||
if (rank > 0) {
|
if (rank > 0) {
|
||||||
MPI_Recv(halo_above, total_width, MPI_UNSIGNED_CHAR, rank-1, 1, comm, NULL);
|
MPI_Recv(halo_above, total_width, MPI_UNSIGNED_CHAR, rank-1, 1, comm, &status);
|
||||||
MPI_Send(&local_game.grid[0], total_width, MPI_UNSIGNED_CHAR, rank-1, 0, comm);
|
MPI_Send(&local_game.grid[0], total_width, MPI_UNSIGNED_CHAR, rank-1, 0, comm);
|
||||||
}
|
}
|
||||||
if (rank < args.process_count - 1) {
|
if (rank < args.process_count - 1) {
|
||||||
MPI_Recv(halo_below, total_width, MPI_UNSIGNED_CHAR, rank+1, 0, comm, NULL);
|
MPI_Recv(halo_below, total_width, MPI_UNSIGNED_CHAR, rank+1, 0, comm, &status);
|
||||||
}
|
}
|
||||||
MPI_Barrier(comm);
|
MPI_Barrier(comm);
|
||||||
|
start = MPI_Wtime();
|
||||||
|
// Compute the next grid
|
||||||
next(&local_game, halo_above, halo_below);
|
next(&local_game, halo_above, halo_below);
|
||||||
|
end = MPI_Wtime();
|
||||||
|
time_computing_life += end-start;
|
||||||
}
|
}
|
||||||
if (args.log_each_step) {
|
if (args.log_each_step) {
|
||||||
if (rank == 0) {
|
// If we are logging each step, perform IO operations
|
||||||
global_data = malloc(sizeof(unsigned char) * grid_size);
|
// Gather all of the local grids into global_data
|
||||||
memset(global_data, 0, sizeof(unsigned char) * grid_size);
|
|
||||||
}
|
|
||||||
MPI_Gatherv(local_game.grid, data_counts[rank], MPI_UNSIGNED_CHAR, global_data, data_counts, displacements, MPI_UNSIGNED_CHAR, 0, comm);
|
MPI_Gatherv(local_game.grid, data_counts[rank], MPI_UNSIGNED_CHAR, global_data, data_counts, displacements, MPI_UNSIGNED_CHAR, 0, comm);
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
#ifdef VERBOSE
|
#if VERBOSE == 1
|
||||||
printf("\n===Iteration %i===\n", i);
|
printf("\n===Iteration %i===\n", i);
|
||||||
|
// Print the baord without the padding elements
|
||||||
for (int y = args.padding; y < args.height+args.padding; y++) {
|
for (int y = args.padding; y < args.height+args.padding; y++) {
|
||||||
for (int x = args.padding; x < args.width+args.padding; x++) {
|
for (int x = args.padding; x < args.width+args.padding; x++) {
|
||||||
printf("%s ", global_data[y*(args.width+2*args.padding) + x] ? "X" : " ");
|
printf("%s ", global_data[y*(args.width+2*args.padding) + x] ? "X" : " ");
|
||||||
@ -183,6 +212,7 @@ void simulate(int argc, char** argv) {
|
|||||||
printf("===End iteration %i===\n", i);
|
printf("===End iteration %i===\n", i);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Save to a file
|
||||||
struct GAME global_game;
|
struct GAME global_game;
|
||||||
global_game.grid = global_data;
|
global_game.grid = global_data;
|
||||||
global_game.width = args.width;
|
global_game.width = args.width;
|
||||||
@ -194,12 +224,15 @@ void simulate(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double totalEnd = MPI_Wtime();
|
double total_end = MPI_Wtime();
|
||||||
MPI_Finalize();
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", timeComputingLife, (totalEnd - totalStart));
|
printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", time_computing_life, (total_end - global_start));
|
||||||
|
free(local_game.grid);
|
||||||
|
free(data_counts);
|
||||||
|
free(halo_above);
|
||||||
|
free(halo_below);
|
||||||
}
|
}
|
||||||
|
MPI_Finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
4
mpi/timing-study/output-1-1000-1000.txt
Normal file
4
mpi/timing-study/output-1-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 33.832562
|
||||||
|
Clock time: 37.939663
|
11
mpi/timing-study/output-1-1000-1250.txt
Normal file
11
mpi/timing-study/output-1-1000-1250.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 21716 RUNNING AT kp013
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
||||||
|
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
|
||||||
|
This typically refers to a problem with your application.
|
||||||
|
Please see the FAQ page for debugging suggestions
|
4
mpi/timing-study/output-1-1000-1500.txt
Normal file
4
mpi/timing-study/output-1-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 75.141736
|
||||||
|
Clock time: 83.149478
|
11
mpi/timing-study/output-1-1000-1750.txt
Normal file
11
mpi/timing-study/output-1-1000-1750.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 21837 RUNNING AT kp013
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
||||||
|
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
|
||||||
|
This typically refers to a problem with your application.
|
||||||
|
Please see the FAQ page for debugging suggestions
|
4
mpi/timing-study/output-1-1000-2000.txt
Normal file
4
mpi/timing-study/output-1-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 132.636661
|
||||||
|
Clock time: 145.001708
|
4
mpi/timing-study/output-1-1000-250.txt
Normal file
4
mpi/timing-study/output-1-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.383001
|
||||||
|
Clock time: 4.113476
|
4
mpi/timing-study/output-1-1000-500.txt
Normal file
4
mpi/timing-study/output-1-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 8.793952
|
||||||
|
Clock time: 9.832794
|
4
mpi/timing-study/output-1-1000-750.txt
Normal file
4
mpi/timing-study/output-1-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 19.270078
|
||||||
|
Clock time: 21.813069
|
4
mpi/timing-study/output-12-1000-1000.txt
Normal file
4
mpi/timing-study/output-12-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.833550
|
||||||
|
Clock time: 6.323680
|
4
mpi/timing-study/output-12-1000-1250.txt
Normal file
4
mpi/timing-study/output-12-1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 4.347700
|
||||||
|
Clock time: 9.178630
|
11
mpi/timing-study/output-12-1000-1500.txt
Normal file
11
mpi/timing-study/output-12-1000-1500.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 23209 RUNNING AT kp013
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
||||||
|
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
|
||||||
|
This typically refers to a problem with your application.
|
||||||
|
Please see the FAQ page for debugging suggestions
|
4
mpi/timing-study/output-12-1000-1750.txt
Normal file
4
mpi/timing-study/output-12-1000-1750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 8.483342
|
||||||
|
Clock time: 17.330302
|
11
mpi/timing-study/output-12-1000-2000.txt
Normal file
11
mpi/timing-study/output-12-1000-2000.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 23290 RUNNING AT kp013
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
||||||
|
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
|
||||||
|
This typically refers to a problem with your application.
|
||||||
|
Please see the FAQ page for debugging suggestions
|
4
mpi/timing-study/output-12-1000-250.txt
Normal file
4
mpi/timing-study/output-12-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.198089
|
||||||
|
Clock time: 2.217166
|
4
mpi/timing-study/output-12-1000-500.txt
Normal file
4
mpi/timing-study/output-12-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.735509
|
||||||
|
Clock time: 2.513034
|
4
mpi/timing-study/output-12-1000-750.txt
Normal file
4
mpi/timing-study/output-12-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 1.617002
|
||||||
|
Clock time: 4.091923
|
4
mpi/timing-study/output-16-1000-1000.txt
Normal file
4
mpi/timing-study/output-16-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.106571
|
||||||
|
Clock time: 7.500836
|
4
mpi/timing-study/output-16-1000-1250.txt
Normal file
4
mpi/timing-study/output-16-1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 3.445883
|
||||||
|
Clock time: 11.167682
|
4
mpi/timing-study/output-16-1000-1500.txt
Normal file
4
mpi/timing-study/output-16-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 4.741983
|
||||||
|
Clock time: 16.777514
|
8
mpi/timing-study/output-16-1000-1750.txt
Normal file
8
mpi/timing-study/output-16-1000-1750.txt
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 34784 RUNNING AT kp160
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
4
mpi/timing-study/output-16-1000-2000.txt
Normal file
4
mpi/timing-study/output-16-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 8.301682
|
||||||
|
Clock time: 28.791425
|
4
mpi/timing-study/output-16-1000-250.txt
Normal file
4
mpi/timing-study/output-16-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.145483
|
||||||
|
Clock time: 2.572587
|
4
mpi/timing-study/output-16-1000-500.txt
Normal file
4
mpi/timing-study/output-16-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.570992
|
||||||
|
Clock time: 3.899400
|
4
mpi/timing-study/output-16-1000-750.txt
Normal file
4
mpi/timing-study/output-16-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 1.215016
|
||||||
|
Clock time: 5.047125
|
0
mpi/timing-study/output-20-1000-250.txt
Normal file
0
mpi/timing-study/output-20-1000-250.txt
Normal file
4
mpi/timing-study/output-24-1000-1000.txt
Normal file
4
mpi/timing-study/output-24-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 1.414322
|
||||||
|
Clock time: 9.439315
|
4
mpi/timing-study/output-24-1000-1250.txt
Normal file
4
mpi/timing-study/output-24-1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.171989
|
||||||
|
Clock time: 13.927639
|
4
mpi/timing-study/output-24-1000-1500.txt
Normal file
4
mpi/timing-study/output-24-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 3.133675
|
||||||
|
Clock time: 19.271850
|
4
mpi/timing-study/output-24-1000-1750.txt
Normal file
4
mpi/timing-study/output-24-1000-1750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 4.398371
|
||||||
|
Clock time: 25.650748
|
4
mpi/timing-study/output-24-1000-2000.txt
Normal file
4
mpi/timing-study/output-24-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 5.639865
|
||||||
|
Clock time: 33.529967
|
4
mpi/timing-study/output-24-1000-250.txt
Normal file
4
mpi/timing-study/output-24-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.100765
|
||||||
|
Clock time: 2.412458
|
4
mpi/timing-study/output-24-1000-500.txt
Normal file
4
mpi/timing-study/output-24-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.465147
|
||||||
|
Clock time: 3.942927
|
4
mpi/timing-study/output-24-1000-750.txt
Normal file
4
mpi/timing-study/output-24-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.815429
|
||||||
|
Clock time: 5.642879
|
4
mpi/timing-study/output-4-1000-1000.txt
Normal file
4
mpi/timing-study/output-4-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 8.467197
|
||||||
|
Clock time: 11.707533
|
11
mpi/timing-study/output-4-1000-1250.txt
Normal file
11
mpi/timing-study/output-4-1000-1250.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 22126 RUNNING AT kp013
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
||||||
|
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
|
||||||
|
This typically refers to a problem with your application.
|
||||||
|
Please see the FAQ page for debugging suggestions
|
4
mpi/timing-study/output-4-1000-1500.txt
Normal file
4
mpi/timing-study/output-4-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 18.823087
|
||||||
|
Clock time: 26.449810
|
11
mpi/timing-study/output-4-1000-1750.txt
Normal file
11
mpi/timing-study/output-4-1000-1750.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 22197 RUNNING AT kp013
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
||||||
|
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
|
||||||
|
This typically refers to a problem with your application.
|
||||||
|
Please see the FAQ page for debugging suggestions
|
4
mpi/timing-study/output-4-1000-2000.txt
Normal file
4
mpi/timing-study/output-4-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 33.274214
|
||||||
|
Clock time: 45.841294
|
4
mpi/timing-study/output-4-1000-250.txt
Normal file
4
mpi/timing-study/output-4-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.599813
|
||||||
|
Clock time: 2.807879
|
4
mpi/timing-study/output-4-1000-500.txt
Normal file
4
mpi/timing-study/output-4-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.212790
|
||||||
|
Clock time: 4.133439
|
4
mpi/timing-study/output-4-1000-750.txt
Normal file
4
mpi/timing-study/output-4-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 4.830949
|
||||||
|
Clock time: 6.854574
|
4
mpi/timing-study/output-8-1000-1000.txt
Normal file
4
mpi/timing-study/output-8-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 4.226861
|
||||||
|
Clock time: 7.517444
|
11
mpi/timing-study/output-8-1000-1250.txt
Normal file
11
mpi/timing-study/output-8-1000-1250.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
===================================================================================
|
||||||
|
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
|
||||||
|
= PID 22852 RUNNING AT kp013
|
||||||
|
= EXIT CODE: 11
|
||||||
|
= CLEANING UP REMAINING PROCESSES
|
||||||
|
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
|
||||||
|
===================================================================================
|
||||||
|
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
|
||||||
|
This typically refers to a problem with your application.
|
||||||
|
Please see the FAQ page for debugging suggestions
|
4
mpi/timing-study/output-8-1000-1500.txt
Normal file
4
mpi/timing-study/output-8-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 9.416485
|
||||||
|
Clock time: 16.706325
|
4
mpi/timing-study/output-8-1000-1750.txt
Normal file
4
mpi/timing-study/output-8-1000-1750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 12.741221
|
||||||
|
Clock time: 22.281683
|
4
mpi/timing-study/output-8-1000-2000.txt
Normal file
4
mpi/timing-study/output-8-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 16.578412
|
||||||
|
Clock time: 26.921717
|
4
mpi/timing-study/output-8-1000-250.txt
Normal file
4
mpi/timing-study/output-8-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.296146
|
||||||
|
Clock time: 2.211905
|
4
mpi/timing-study/output-8-1000-500.txt
Normal file
4
mpi/timing-study/output-8-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 1.111486
|
||||||
|
Clock time: 2.710176
|
4
mpi/timing-study/output-8-1000-750.txt
Normal file
4
mpi/timing-study/output-8-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.419305
|
||||||
|
Clock time: 4.675962
|
11
mpi/timing-study/slurm-10870703.err-kp013
Normal file
11
mpi/timing-study/slurm-10870703.err-kp013
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
mkdir: cannot create directory ‘timing-study’: File exists
|
||||||
|
[proxy:0:0@kp013] HYD_pmcd_pmip_control_cmd_cb (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmip_cb.c:887): assert (!closed) failed
|
||||||
|
[proxy:0:0@kp013] HYDT_dmxu_poll_wait_for_event (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/demux/demux_poll.c:76): callback returned error status
|
||||||
|
[proxy:0:0@kp013] main (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmip.c:202): demux engine error waiting for event
|
||||||
|
srun: error: kp013: task 0: Exited with exit code 7
|
||||||
|
[mpiexec@kp013] HYDT_bscu_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/bootstrap/utils/bscu_wait.c:76): one of the processes terminated badly; aborting
|
||||||
|
[mpiexec@kp013] HYDT_bsci_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/tools/bootstrap/src/bsci_wait.c:23): launcher returned error waiting for completion
|
||||||
|
[mpiexec@kp013] HYD_pmci_wait_for_completion (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/pm/pmiserv/pmiserv_pmci.c:218): launcher returned error waiting for completion
|
||||||
|
[mpiexec@kp013] main (../../../../../../srcdir/mpich/3.2.1/src/pm/hydra/ui/mpich/mpiexec.c:340): process manager error waiting for completion
|
||||||
|
srun: error: Unable to create step for job 10870703: Job/step already completing or completed
|
||||||
|
slurmstepd: error: *** JOB 10870703 ON kp013 CANCELLED AT 2021-12-08T01:29:02 DUE TO TIME LIMIT ***
|
0
mpi/timing-study/slurm-10870703.out-kp013
Normal file
0
mpi/timing-study/slurm-10870703.out-kp013
Normal file
24
mpi/timing-study/timing_study.sh
Normal file
24
mpi/timing-study/timing_study.sh
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH --time=0:10:00 # walltime, abbreviated by -t
|
||||||
|
#SBATCH --nodes=2 # number of cluster nodes, abbreviated by -N
|
||||||
|
#SBATCH -o slurm-%j.out-%N # name of the stdout, using the job number (%j) and the first node (%N)
|
||||||
|
#SBATCH -e slurm-%j.err-%N # name of the stderr, using job and first node values
|
||||||
|
#SBATCH --ntasks=24 # number of MPI tasks, abbreviated by -n
|
||||||
|
# additional information for allocated clusters
|
||||||
|
#SBATCH --account=usucs5030 # account - abbreviated by -A
|
||||||
|
#SBATCH --partition=kingspeak # partition, abbreviated by -p
|
||||||
|
|
||||||
|
cd $HOME/gol/mpi
|
||||||
|
mkdir timing-study
|
||||||
|
|
||||||
|
module load intel mpich
|
||||||
|
|
||||||
|
iterations=1000
|
||||||
|
|
||||||
|
for cores in 1 4 8 12 16 20 #24
|
||||||
|
do
|
||||||
|
for size in 250 500 750 1000 1250 1500 1750 2000
|
||||||
|
do
|
||||||
|
mpirun -np $cores ./gol simulate random $size $size $iterations 1 > timing-study/output-$cores-$iterations-$size.txt
|
||||||
|
done
|
||||||
|
done
|
@ -1,5 +1,6 @@
|
|||||||
#include "create_grid.h"
|
#include "create_grid.h"
|
||||||
|
|
||||||
|
// Print entirety of a grid to verify input
|
||||||
void print_grid(struct GAME* game) {
|
void print_grid(struct GAME* game) {
|
||||||
printf("\n===GRID===\n");
|
printf("\n===GRID===\n");
|
||||||
for (int y = 0; y < game->height; y++) {
|
for (int y = 0; y < game->height; y++) {
|
||||||
@ -10,6 +11,7 @@ void print_grid(struct GAME* game) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Go through user input
|
||||||
void create_grid(int argc, char** argv) {
|
void create_grid(int argc, char** argv) {
|
||||||
char* filename;
|
char* filename;
|
||||||
struct GAME game;
|
struct GAME game;
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "file.h"
|
#include "file.h"
|
||||||
|
|
||||||
|
// Read a grid from a binary file into the space without padding
|
||||||
void read_in(char* filename, struct GAME* game) {
|
void read_in(char* filename, struct GAME* game) {
|
||||||
FILE* file = fopen(filename, "rb");
|
FILE* file = fopen(filename, "rb");
|
||||||
for (int i = game->padding; i < game->height+game->padding; i++) {
|
for (int i = game->padding; i < game->height+game->padding; i++) {
|
||||||
@ -8,6 +9,7 @@ void read_in(char* filename, struct GAME* game) {
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write a grid to a binary file into the space without padding
|
||||||
void write_out(char* filename, struct GAME* game) {
|
void write_out(char* filename, struct GAME* game) {
|
||||||
FILE* file = fopen(filename, "w+");
|
FILE* file = fopen(filename, "w+");
|
||||||
for (int i = game->padding; i < game->height+game->padding; i++) {
|
for (int i = game->padding; i < game->height+game->padding; i++) {
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "game.h"
|
#include "game.h"
|
||||||
|
|
||||||
|
// Calculate the number of live neighbors a cell has
|
||||||
int neighbors(struct GAME* game, int x, int y) {
|
int neighbors(struct GAME* game, int x, int y) {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (int dy = -1; dy <= 1; dy++) {
|
for (int dy = -1; dy <= 1; dy++) {
|
||||||
@ -15,6 +16,7 @@ int neighbors(struct GAME* game, int x, int y) {
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute the next iteration of a board
|
||||||
void next(struct GAME* game, int threads) {
|
void next(struct GAME* game, int threads) {
|
||||||
unsigned char** newGrid = malloc(sizeof(unsigned char*) * (game->height+(game->padding*2)));
|
unsigned char** newGrid = malloc(sizeof(unsigned char*) * (game->height+(game->padding*2)));
|
||||||
int y,x,i,size;
|
int y,x,i,size;
|
||||||
@ -30,10 +32,12 @@ void next(struct GAME* game, int threads) {
|
|||||||
|
|
||||||
#pragma omp parallel num_threads(threads) shared(per_thread, threads, total_width, total_height, newGrid, game) private(y,x,i)
|
#pragma omp parallel num_threads(threads) shared(per_thread, threads, total_width, total_height, newGrid, game) private(y,x,i)
|
||||||
{
|
{
|
||||||
|
// Each thread gets a number of cells to compute
|
||||||
int me = omp_get_thread_num();
|
int me = omp_get_thread_num();
|
||||||
int thread_start = per_thread * me;
|
int thread_start = per_thread * me;
|
||||||
int thread_end = thread_start + per_thread + (me == threads-1 ? (total_width*total_height) % per_thread : 0);
|
int thread_end = thread_start + per_thread + (me == threads-1 ? (total_width*total_height) % per_thread : 0);
|
||||||
for (i = thread_start; i < thread_end; i++) {
|
for (i = thread_start; i < thread_end; i++) {
|
||||||
|
// Iterate through each cell assigned for this thread
|
||||||
y = i / total_width;
|
y = i / total_width;
|
||||||
x = i % total_width;
|
x = i % total_width;
|
||||||
int my_neighbors = neighbors(game, x, y);
|
int my_neighbors = neighbors(game, x, y);
|
||||||
@ -55,6 +59,7 @@ void next(struct GAME* game, int threads) {
|
|||||||
game->grid = newGrid;
|
game->grid = newGrid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Rnadomly assign life value to each cell
|
||||||
void randomize(struct GAME* game) {
|
void randomize(struct GAME* game) {
|
||||||
for (int y = game->padding; y < game->height+game->padding; y++) {
|
for (int y = game->padding; y < game->height+game->padding; y++) {
|
||||||
for (int x = game->padding; x < game->width+game->padding; x++) {
|
for (int x = game->padding; x < game->width+game->padding; x++) {
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
//#define VERBOSE 1
|
//#define VERBOSE 1
|
||||||
#define SEED 100
|
#define SEED 100
|
||||||
|
|
||||||
|
// Do the simulation
|
||||||
void simulate(int argc, char** argv) {
|
void simulate(int argc, char** argv) {
|
||||||
srand(SEED);
|
srand(SEED);
|
||||||
char* filename;
|
char* filename;
|
||||||
@ -26,6 +27,7 @@ void simulate(int argc, char** argv) {
|
|||||||
game.padding = PADDING;
|
game.padding = PADDING;
|
||||||
int iterations, log_each_step, threads;
|
int iterations, log_each_step, threads;
|
||||||
if (argc == 8) {
|
if (argc == 8) {
|
||||||
|
// Parse the arguments
|
||||||
filename = argv[2];
|
filename = argv[2];
|
||||||
game.width = atoi(argv[3]);
|
game.width = atoi(argv[3]);
|
||||||
game.height = atoi(argv[4]);
|
game.height = atoi(argv[4]);
|
||||||
@ -51,6 +53,7 @@ void simulate(int argc, char** argv) {
|
|||||||
memset(game.grid[i], 0, game.width+(2*game.padding));
|
memset(game.grid[i], 0, game.width+(2*game.padding));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Choose where to read initial position
|
||||||
if (strcmp(filename, "random") == 0) {
|
if (strcmp(filename, "random") == 0) {
|
||||||
randomize(&game);
|
randomize(&game);
|
||||||
} else {
|
} else {
|
||||||
@ -62,16 +65,19 @@ void simulate(int argc, char** argv) {
|
|||||||
double start, end;
|
double start, end;
|
||||||
|
|
||||||
for (int i = 0; i <= iterations; i++) {
|
for (int i = 0; i <= iterations; i++) {
|
||||||
|
// Iteration 0 will just be the initial grid
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
// Iteration 0 is just the input board
|
|
||||||
start = omp_get_wtime();
|
start = omp_get_wtime();
|
||||||
|
// Compute the next grid with threads
|
||||||
next(&game, threads);
|
next(&game, threads);
|
||||||
end = omp_get_wtime();
|
end = omp_get_wtime();
|
||||||
time_computing_life += ((double) (end - start));
|
time_computing_life += ((double) (end - start));
|
||||||
}
|
}
|
||||||
if (log_each_step) {
|
if (log_each_step) {
|
||||||
|
// If we are logging each step, perform IO operations
|
||||||
#if VERBOSE == 1
|
#if VERBOSE == 1
|
||||||
printf("\n===Iteration %i===\n", i);
|
printf("\n===Iteration %i===\n", i);
|
||||||
|
// Print the board without the padding elements
|
||||||
for (int y = game.padding; y < game.height+game.padding; y++) {
|
for (int y = game.padding; y < game.height+game.padding; y++) {
|
||||||
for (int x = game.padding; x < game.width+game.padding; x++) {
|
for (int x = game.padding; x < game.width+game.padding; x++) {
|
||||||
printf("%s ", game.grid[y][x] ? "X" : " ");
|
printf("%s ", game.grid[y][x] ? "X" : " ");
|
||||||
@ -80,6 +86,7 @@ void simulate(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
printf("===End iteration %i===\n", i);
|
printf("===End iteration %i===\n", i);
|
||||||
#endif
|
#endif
|
||||||
|
// Save to a file
|
||||||
sprintf(iteration_file, "output/iteration-%07d.bin", i);
|
sprintf(iteration_file, "output/iteration-%07d.bin", i);
|
||||||
write_out(iteration_file, &game);
|
write_out(iteration_file, &game);
|
||||||
}
|
}
|
||||||
|
4
openmp/timing-study/output-1-1000-1000.txt
Normal file
4
openmp/timing-study/output-1-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 73.312715
|
||||||
|
Clock time: 77.450210
|
4
openmp/timing-study/output-1-1000-1250.txt
Normal file
4
openmp/timing-study/output-1-1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 113.646203
|
||||||
|
Clock time: 118.646829
|
4
openmp/timing-study/output-1-1000-1500.txt
Normal file
4
openmp/timing-study/output-1-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 163.034248
|
||||||
|
Clock time: 171.017339
|
4
openmp/timing-study/output-1-1000-1750.txt
Normal file
4
openmp/timing-study/output-1-1000-1750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 220.656360
|
||||||
|
Clock time: 231.050593
|
4
openmp/timing-study/output-1-1000-2000.txt
Normal file
4
openmp/timing-study/output-1-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 287.698970
|
||||||
|
Clock time: 300.176503
|
4
openmp/timing-study/output-1-1000-250.txt
Normal file
4
openmp/timing-study/output-1-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 5.088550
|
||||||
|
Clock time: 5.963468
|
4
openmp/timing-study/output-1-1000-500.txt
Normal file
4
openmp/timing-study/output-1-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 18.956314
|
||||||
|
Clock time: 20.440567
|
4
openmp/timing-study/output-1-1000-750.txt
Normal file
4
openmp/timing-study/output-1-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 41.660313
|
||||||
|
Clock time: 44.581177
|
4
openmp/timing-study/output-12-1000-1000.txt
Normal file
4
openmp/timing-study/output-12-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 7.026118
|
||||||
|
Clock time: 10.251363
|
4
openmp/timing-study/output-12-1000-1250.txt
Normal file
4
openmp/timing-study/output-12-1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 10.801676
|
||||||
|
Clock time: 15.900482
|
4
openmp/timing-study/output-12-1000-1500.txt
Normal file
4
openmp/timing-study/output-12-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 25.918769
|
||||||
|
Clock time: 34.562182
|
4
openmp/timing-study/output-12-1000-1750.txt
Normal file
4
openmp/timing-study/output-12-1000-1750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 25.862278
|
||||||
|
Clock time: 34.828966
|
4
openmp/timing-study/output-12-1000-2000.txt
Normal file
4
openmp/timing-study/output-12-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 30.705054
|
||||||
|
Clock time: 43.042410
|
4
openmp/timing-study/output-12-1000-250.txt
Normal file
4
openmp/timing-study/output-12-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.860293
|
||||||
|
Clock time: 3.331446
|
4
openmp/timing-study/output-12-1000-500.txt
Normal file
4
openmp/timing-study/output-12-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.097878
|
||||||
|
Clock time: 3.643646
|
4
openmp/timing-study/output-12-1000-750.txt
Normal file
4
openmp/timing-study/output-12-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 4.321400
|
||||||
|
Clock time: 6.663178
|
4
openmp/timing-study/output-16-1000-1000.txt
Normal file
4
openmp/timing-study/output-16-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 9.131005
|
||||||
|
Clock time: 12.449032
|
4
openmp/timing-study/output-16-1000-1250.txt
Normal file
4
openmp/timing-study/output-16-1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 13.434282
|
||||||
|
Clock time: 18.116181
|
4
openmp/timing-study/output-16-1000-1500.txt
Normal file
4
openmp/timing-study/output-16-1000-1500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 16.706991
|
||||||
|
Clock time: 24.712374
|
4
openmp/timing-study/output-16-1000-1750.txt
Normal file
4
openmp/timing-study/output-16-1000-1750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 23.733395
|
||||||
|
Clock time: 33.306681
|
4
openmp/timing-study/output-16-1000-2000.txt
Normal file
4
openmp/timing-study/output-16-1000-2000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 30.429469
|
||||||
|
Clock time: 42.369926
|
4
openmp/timing-study/output-16-1000-250.txt
Normal file
4
openmp/timing-study/output-16-1000-250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 0.738930
|
||||||
|
Clock time: 3.383995
|
4
openmp/timing-study/output-16-1000-500.txt
Normal file
4
openmp/timing-study/output-16-1000-500.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 2.352756
|
||||||
|
Clock time: 3.601057
|
4
openmp/timing-study/output-16-1000-750.txt
Normal file
4
openmp/timing-study/output-16-1000-750.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 5.147589
|
||||||
|
Clock time: 7.427564
|
4
openmp/timing-study/output-20-1000-1000.txt
Normal file
4
openmp/timing-study/output-20-1000-1000.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 7.390921
|
||||||
|
Clock time: 11.239737
|
4
openmp/timing-study/output-20-1000-1250.txt
Normal file
4
openmp/timing-study/output-20-1000-1250.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
===Timing===
|
||||||
|
Time computing life: 11.254777
|
||||||
|
Clock time: 16.136264
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user