diff --git a/w1/README.pdf b/w1/README.pdf new file mode 100644 index 0000000..4af56e4 Binary files /dev/null and b/w1/README.pdf differ diff --git a/w1/README.rmd b/w1/README.rmd new file mode 100644 index 0000000..4800420 --- /dev/null +++ b/w1/README.rmd @@ -0,0 +1,89 @@ +# Comparison between C-implemented and R-implemented dual-loop matrix summing function performance + +## Running + +To run this project, run the following commands: + +```bash +# matrix test +Rscript mat_tests.R +``` + +### Building and running + +If you edit the C code, to recompile run: + +```bash +bash make_c.sh +``` + +### View Evaluation + +To install packages necessary for this .rmd document, run: + +```bash +Rscript install_libs.R +``` + +## Evaluation +The experiment shows the performance comparison between the R-implemented and C-implemented matrix summing functions +for different matrix sizes. As the matrix size increases, the C-implemented function demonstrates significantly +better performance compared to the R-implemented function. +Surprisingly, the speedup remains fairly constant in relative terms, stabilizing at about 4x + + + + +> Note: Evaluation script run on an AMD Ryzen 9 7950X3D cpu with enough RAM for all matrix sizes + + +| Matrix size | sum1 run duration (secs) | sum2 run duration (secs) | +|-------------|---------------------------|---------------------------| +| 5x5 | 7.152557e-06 | 7.867813e-06 | +| 10x10 | 9.775162e-06 | 5.00679e-06 | +| 50x50 | 9.346008e-05 | 8.106232e-06 | +| 100x100 | 0.0003376007 | 1.955032e-05 | +| 500x500 | 0.007472992 | 0.001415014 | +| 1000x1000 | 0.03007007 | 0.005748034 | +| 5000x5000 | 0.6559205 | 0.1854615 | +| 10000x10000 | 2.692389 | 0.6747584 | +| 20000x20000 | 10.67763 | 2.615553 | +| 30000x30000 | 24.33534 | 5.987761 | + +```{r diagram} +library(ggplot2) +library(dplyr) +library(tidyr) + +# prepare data +data <- tribble( + ~Matrix.size, ~R.sum, ~C.sum, + "5", 7.152557e-06, 7.867813e-06, + "10", 9.775162e-06, 5.00679e-06, + "50", 9.346008e-05, 8.106232e-06, + "100", 0.0003376007, 1.955032e-05, + "500", 0.007472992, 0.001415014, + "1000", 0.03007007, 0.005748034, + "5000", 0.6559205, 0.1854615, + "10000", 2.692389, 0.6747584, + "20000", 10.67763, 2.615553, + "30000", 24.33534, 5.987761 +) + +# Convert Matrix.size to factor with desired order +data$Matrix.size <- factor(data$Matrix.size, levels = data$Matrix.size) + +# rearrange data +data_long <- data %>% + pivot_longer(cols = c(R.sum, C.sum), + names_to = "Method", + values_to = "Duration") + +# Create the plot +ggplot(data_long, aes(x = Matrix.size, y = Duration, color = Method)) + + geom_point() + + scale_y_log10() + + labs(x = "Matrix Size", y = "Duration (seconds)", color = "Method") + + ggtitle("Calculation Time per (square) matrix size") + + theme_minimal() +``` \ No newline at end of file diff --git a/w1/install_libs.R b/w1/install_libs.R new file mode 100644 index 0000000..e029319 --- /dev/null +++ b/w1/install_libs.R @@ -0,0 +1,4 @@ +install.packages("rmarkdown") +install.packages("ggplot2") +install.packages("dplyr") +install.packages("tidyr") \ No newline at end of file diff --git a/w1/make_c.sh b/w1/make_c.sh new file mode 100644 index 0000000..2f85f92 --- /dev/null +++ b/w1/make_c.sh @@ -0,0 +1 @@ +R CMD SHLIB mat.c diff --git a/w1/mat.R b/w1/mat.R new file mode 100644 index 0000000..a217bfe --- /dev/null +++ b/w1/mat.R @@ -0,0 +1,19 @@ +dyn.load("mat.so") + + +sum1 <- function(matrix) { + result <- 0 + for (i in seq_len(nrow(matrix))) { + for (j in seq_len(ncol(matrix))) { + result <- result + matrix[i, j] + } + } + return(result) +} + +sum2 <- function(matrix) { + nrow <- nrow(matrix) + ncol <- ncol(matrix) + result <- .C("c_sum_matrix", as.double(matrix), as.integer(nrow), as.integer(ncol), result = double(1))$result + return(result) +} diff --git a/w1/mat.c b/w1/mat.c new file mode 100644 index 0000000..2033113 --- /dev/null +++ b/w1/mat.c @@ -0,0 +1,12 @@ +#include +#include + +void c_sum_matrix(double *matrix, int *nrow, int *ncol, double *result) { + int i, j; + *result = 0.0; + for (i = 0; i < *nrow; i++) { + for (j = 0; j < *ncol; j++) { + *result += matrix[i * (*ncol) + j]; + } + } +} \ No newline at end of file diff --git a/w1/mat.o b/w1/mat.o new file mode 100644 index 0000000..c75cb0e Binary files /dev/null and b/w1/mat.o differ diff --git a/w1/mat.so b/w1/mat.so new file mode 100755 index 0000000..ec89a41 Binary files /dev/null and b/w1/mat.so differ diff --git a/w1/mat_tests.R b/w1/mat_tests.R new file mode 100644 index 0000000..c736b8d --- /dev/null +++ b/w1/mat_tests.R @@ -0,0 +1,32 @@ +source("mat.R") + +time <- function(f) { + start_time <- Sys.time() + val <- f() + end_time <- Sys.time() + print(end_time - start_time) + return(val) +} + +sizes <- c(5, 10, 50, 100, 500, 1000, 5000, 10000, 20000, 30000) +# sizes <- c(5, 10, 50, 100, 500, 1000, 5000, 10000) + +# warm up + +for (size in 30:50) { + m <- matrix(runif(size * size), nrow = size, ncol = size) + sum1(m) + sum2(m) +} + +for (size in sizes) { + m <- matrix(runif(size * size), nrow = size, ncol = size) + + cat(sprintf("Matrix size: %dx%d\n", size, size)) + cat("sum1 run duration: ") + time(function() sum1(m)) + cat("sum2 run duration: ") + time(function() sum2(m)) + cat("\n") +} + diff --git a/w1/w1_yannik-bretschneider_evoalgs-practise.zip b/w1/w1_yannik-bretschneider_evoalgs-practise.zip new file mode 100644 index 0000000..4d09c97 Binary files /dev/null and b/w1/w1_yannik-bretschneider_evoalgs-practise.zip differ