Compare commits

...

2 Commits

Author SHA1 Message Date
a5ccd8759c feat: W1 2024-05-06 08:54:48 +02:00
10bb038cdf refactor: moved files to intro 2024-05-06 08:54:44 +02:00
28 changed files with 161 additions and 4 deletions

8
.gitignore vendored
View File

@ -1,4 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
**/.Rproj.user
**/.Rhistory
**/.RData
**/.Ruserdata

BIN
w1/README.pdf Normal file

Binary file not shown.

89
w1/README.rmd Normal file
View File

@ -0,0 +1,89 @@
# Comparison between C-implemented and R-implemented dual-loop matrix summing function performance
## Running
To run this project, run the following commands:
```bash
# matrix test
Rscript mat_tests.R
```
### Building and running
If you edit the C code, to recompile run:
```bash
bash make_c.sh
```
### View Evaluation
To install packages necessary for this .rmd document, run:
```bash
Rscript install_libs.R
```
## Evaluation
The experiment shows the performance comparison between the R-implemented and C-implemented matrix summing functions
for different matrix sizes. As the matrix size increases, the C-implemented function demonstrates significantly
better performance compared to the R-implemented function.
Surprisingly, the speedup remains fairly constant in relative terms, stabilizing at about 4x
> Note: Evaluation script run on an AMD Ryzen 9 7950X3D cpu with enough RAM for all matrix sizes
| Matrix size | sum1 run duration (secs) | sum2 run duration (secs) |
|-------------|---------------------------|---------------------------|
| 5x5 | 7.152557e-06 | 7.867813e-06 |
| 10x10 | 9.775162e-06 | 5.00679e-06 |
| 50x50 | 9.346008e-05 | 8.106232e-06 |
| 100x100 | 0.0003376007 | 1.955032e-05 |
| 500x500 | 0.007472992 | 0.001415014 |
| 1000x1000 | 0.03007007 | 0.005748034 |
| 5000x5000 | 0.6559205 | 0.1854615 |
| 10000x10000 | 2.692389 | 0.6747584 |
| 20000x20000 | 10.67763 | 2.615553 |
| 30000x30000 | 24.33534 | 5.987761 |
```{r diagram}
library(ggplot2)
library(dplyr)
library(tidyr)
# prepare data
data <- tribble(
~Matrix.size, ~R.sum, ~C.sum,
"5", 7.152557e-06, 7.867813e-06,
"10", 9.775162e-06, 5.00679e-06,
"50", 9.346008e-05, 8.106232e-06,
"100", 0.0003376007, 1.955032e-05,
"500", 0.007472992, 0.001415014,
"1000", 0.03007007, 0.005748034,
"5000", 0.6559205, 0.1854615,
"10000", 2.692389, 0.6747584,
"20000", 10.67763, 2.615553,
"30000", 24.33534, 5.987761
)
# Convert Matrix.size to factor with desired order
data$Matrix.size <- factor(data$Matrix.size, levels = data$Matrix.size)
# rearrange data
data_long <- data %>%
pivot_longer(cols = c(R.sum, C.sum),
names_to = "Method",
values_to = "Duration")
# Create the plot
ggplot(data_long, aes(x = Matrix.size, y = Duration, color = Method)) +
geom_point() +
scale_y_log10() +
labs(x = "Matrix Size", y = "Duration (seconds)", color = "Method") +
ggtitle("Calculation Time per (square) matrix size") +
theme_minimal()
```

4
w1/install_libs.R Normal file
View File

@ -0,0 +1,4 @@
install.packages("rmarkdown")
install.packages("ggplot2")
install.packages("dplyr")
install.packages("tidyr")

1
w1/make_c.sh Normal file
View File

@ -0,0 +1 @@
R CMD SHLIB mat.c

19
w1/mat.R Normal file
View File

@ -0,0 +1,19 @@
dyn.load("mat.so")
sum1 <- function(matrix) {
result <- 0
for (i in seq_len(nrow(matrix))) {
for (j in seq_len(ncol(matrix))) {
result <- result + matrix[i, j]
}
}
return(result)
}
sum2 <- function(matrix) {
nrow <- nrow(matrix)
ncol <- ncol(matrix)
result <- .C("c_sum_matrix", as.double(matrix), as.integer(nrow), as.integer(ncol), result = double(1))$result
return(result)
}

12
w1/mat.c Normal file
View File

@ -0,0 +1,12 @@
#include <R.h>
#include <Rinternals.h>
void c_sum_matrix(double *matrix, int *nrow, int *ncol, double *result) {
int i, j;
*result = 0.0;
for (i = 0; i < *nrow; i++) {
for (j = 0; j < *ncol; j++) {
*result += matrix[i * (*ncol) + j];
}
}
}

BIN
w1/mat.o Normal file

Binary file not shown.

BIN
w1/mat.so Executable file

Binary file not shown.

32
w1/mat_tests.R Normal file
View File

@ -0,0 +1,32 @@
source("mat.R")
time <- function(f) {
start_time <- Sys.time()
val <- f()
end_time <- Sys.time()
print(end_time - start_time)
return(val)
}
sizes <- c(5, 10, 50, 100, 500, 1000, 5000, 10000, 20000, 30000)
# sizes <- c(5, 10, 50, 100, 500, 1000, 5000, 10000)
# warm up
for (size in 30:50) {
m <- matrix(runif(size * size), nrow = size, ncol = size)
sum1(m)
sum2(m)
}
for (size in sizes) {
m <- matrix(runif(size * size), nrow = size, ncol = size)
cat(sprintf("Matrix size: %dx%d\n", size, size))
cat("sum1 run duration: ")
time(function() sum1(m))
cat("sum2 run duration: ")
time(function() sum2(m))
cat("\n")
}

Binary file not shown.