feat: w7

feat: w3 done
2024-06-24 12:13:13 +02:00 · 2024-05-20 16:14:55 +02:00
16 changed files with 952 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@
 **/.Rhistory
 **/.RData
 **/.Ruserdata
 .Rproj.user
--- a/w3/README.rmd
+++ b/w3/README.rmd
@ -0,0 +1,432 @@
 # Übungsblatt 3 - Evolutionary Algorithms (Paul Helstab & Yannik Bretschneider)
 ## Loading Data
 ```{r setwd}
 # Get the current working directory
 current_directory <- getwd()
 # Define the folder name to check
 folder_name <- "w3"
 # Construct the full path to the folder
 folder_path <- file.path(current_directory, folder_name)
 # Check if the folder exists (means most likely in the repo root folder)
 if (dir.exists(folder_path)) {
    setwd(folder_path)
    cat("Changed working directory to:", getwd(), "\n")
 } else {
    cat("Folder 'w3' does not exist in the current working directory.
 Staying where we are\n")
 }
 ```
 ```{r read_data}
 file_path <- "./data1.txt"
 print(paste0("read data from '", file_path, "'"))
 data <- read.table(
    file_path,
    header = FALSE,
    sep = " ",
    # colClasses = c("numeric", "numeric", "integer", "integer")
 )
 colnames(data) <- c("x1", "x2", "x_bias", "label")
 evaluation_set_idx <- c(90:109)
 ```
 ```r
 # possible, but unnecessary here:
 # grab first 10 as evaluation set
 evaluation_set <- data[evaluation_set_idx, ]
 # grab the rest as training set
 training_set <- data[-evaluation_set_idx, ]
 ```
 ## Aufgabe 1: Perzeptron
 ```{r peceptron-impl}
 # randomized order
 random_order <- sample(nrow(data))
 vecs <- data[random_order, 1:3]
 classes <- data[random_order, 4]
 weights <- c(0, 0, 0)
 # matrix for logging weights
 wlog <- matrix(weights, ncol = 3)
 infer <- function(x, wghts) {
    return(sign(x %*% wghts))
 }
 perceptron_infer <- function(x) {
    return(infer(x, weights))
 }
 while (TRUE) {
    mispredictions <- 0
    for (i in seq_len(nrow(vecs))) {
        vec <- as.numeric(vecs[i, ])
        class <- classes[i]
        # print(vec %*% weights)
        error <- as.numeric(class - perceptron_infer(vec))
        if (error != 0) {
            mispredictions <- mispredictions + 1
            print("adjusting")
            weights <- weights + error * vec
            wlog <- rbind(wlog, weights)
        }
    }
    if (mispredictions == 0) {
        print("No more mispredictions")
        break
    } else {
        print(paste0("Misclassifications: ", mispredictions))
    }
 }
 print("NOTE: for more interesting plots, run this chunk multiple times until multiple \"adjusting\" can be seen in the output")
 ```
 ```{r apply-to-data}
 perceptron_reinfer <- t(sapply(seq_len(nrow(data)), function(i) {
    return(c(as.numeric(data[i, 1:3]), perceptron_infer((as.numeric(data[i, 1:3])))))
 }))
 # perceptron_reinfer
 ```
 ### Plots
 The plots show the datapoints on a 2D plot, with the colors showing the classes (`green := 1` and `red := -1`)  
 The separation plane defined by the normal of the weight vector is shown in increasing opacities, until the final 
 weights are reached.
 ```{r plot-weights}
 perceptron_plot <- function() {
    plot(data$x1, data$x2,
        col = ifelse(data$label == 1, "green", "red"),
        pch = 16, xlab = "x1", ylab = "x2", xlim = c(-5, 5), ylim = c(-5, 5)
    )
    legend("topright",
        legend = c("Class 1", "Class -1"),
        col = c("green", "red"), pch = 16
    )
    # Plot the separation planes
    num_steps <- nrow(wlog)
    alpha_step <- 1 / num_steps
    for (i in 1:num_steps) {
        a <- wlog[i, 1]
        b <- wlog[i, 2]
        c <- wlog[i, 3]
        # Calculate the line coordinates
        x <- seq(-5, 5, length.out = 100)
        y <- -(a * x + c) / b
        # Determine the line color based on the iteration
        alpha <- i * alpha_step
        line_color <- rgb(0, 0, 1, alpha)
        # Plot the line
        lines(x, y, col = line_color)
    }
    # Plot the final separation plane
    a <- weights[1]
    b <- weights[2]
    c <- weights[3]
    x <- seq(-5, 5, length.out = 100)
    y <- -(a * x + c) / b
    lines(x, y, col = "blue", lwd = 2)
 }
 perceptron_plot()
 ```
 ```{r plot-weights-pdf}
 pdf("perceptron-plot.pdf", width = 8, height = 6)
 perceptron_plot()
 dev.off()
 ```
 ## Aufgabe 2: Perzeptron Evolutionary Algorithm (EA)
 ```{r peceptron-ea-impl}
 # randomized order
 ea_random_order <- sample(nrow(data))
 ea_vecs <- data[ea_random_order, 1:3]
 ea_classes <- data[ea_random_order, 4]
 NUM_INDIVIDUALS <- 3
 MUTATE_ALL_GENES <- FALSE
 MUTATE_STEP <- function() {
    return(0.5)
 }
 # MUTATE_STEP <- function() {
 #     return(runif(1, min = -1, max = 1)[1])
 # }
 weights <- matrix(0, ncol = 3, nrow = NUM_INDIVIDUALS)
 # matrix for logging weights
 ea_wlog <- matrix(0, ncol = 3)
 fittest <- c(0, 0, 0)
 mutate <- function(genes, step_size_fn, mutate_all) {
    if (mutate_all == TRUE) {
        for (i in seq_along(genes)) {
            step <- step_size_fn() * sample(c(1, -1), size = 1)[1] # mutation in step_size, -step_size or not at all
            genes[i] <- genes[i] + step
        }
    } else {
        idx_to_mutate <- sample(seq_along(genes), size = 1)[1]
        genes[idx_to_mutate] <- genes[idx_to_mutate] + step_size_fn() * sample(c(1, -1), size = 1)[1] # mutation in step_size or not AT all
    }
    return(genes)
 }
 infer <- function(x, wghts) {
    return(sign(x %*% wghts))
 }
 fitness <- function(wghts) {
    mispredictions <- 0
    for (i in seq_len(nrow(ea_vecs))) {
        vec <- as.numeric(ea_vecs[i, ])
        class <- ea_classes[i]
        # print(vec %*% weights)
        error <- as.numeric(class - infer(vec, wghts))
        if (error != 0) {
            mispredictions <- mispredictions + 1
        }
    }
    return(mispredictions)
 }
 while (TRUE) {
    i_fittest <- weights[1, ]
    i_fit <- fitness(weights[1, ])
    for (individual in 2:NUM_INDIVIDUALS) {
        this_fitness <- fitness(weights[individual, ])
        if (this_fitness < i_fit) {
            i_fittest <- weights[individual, ]
            i_fit <- this_fitness
        }
    }
    # add fittest to weights
    ea_wlog <- rbind(ea_wlog, i_fittest)
    if (i_fit == 0) {
        print("No more mispredictions")
        fittest <- i_fittest
        break
    } else {
        print(paste0("Misclassifications: ", i_fit))
        for (individual in 1:NUM_INDIVIDUALS) {
            weights[individual, ] <- mutate(i_fittest, step_size_fn = MUTATE_STEP, mutate_all = MUTATE_ALL_GENES)
        }
    }
 }
 ```
 ```{r ea-apply-to-data}
 ea_reinfer <- t(sapply(seq_len(nrow(data)), function(i) {
    return(c(as.numeric(data[i, 1:3]), infer((as.numeric(data[i, 1:3])), fittest)))
 }))
 # ea_reinfer
 ```
 ### Plots
 The plots show the datapoints on a 2D plot, with the colors showing the classes (`green := 1` and `red := -1`)  
 The separation plane defined by the normal of the weight vector is shown in increasing opacities, until the final 
 weights are reached.
 ```{r ea-plot-weights}
 ea_plot <- function() {
    plot(data$x1, data$x2,
        col = ifelse(data$label == 1, "green", "red"),
        pch = 16, xlab = "x1", ylab = "x2", xlim = c(-5, 5), ylim = c(-5, 5)
    )
    legend("topright",
        legend = c("Class 1", "Class -1"),
        col = c("green", "red"), pch = 16
    )
    # Plot the separation planes
    num_steps <- nrow(ea_wlog)
    alpha_step <- 1 / num_steps
    for (i in 1:num_steps) {
        a <- ea_wlog[i, 1]
        b <- ea_wlog[i, 2]
        c <- ea_wlog[i, 3]
        # Calculate the line coordinates
        x <- seq(-5, 5, length.out = 100)
        y <- -(a * x + c) / b
        # Determine the line color based on the iteration
        alpha <- i * alpha_step
        line_color <- rgb(0, 0, 1, alpha)
        # Plot the line
        lines(x, y, col = line_color)
    }
    # Plot the final separation plane
    a <- fittest[1]
    b <- fittest[2]
    c <- fittest[3]
    x <- seq(-5, 5, length.out = 100)
    y <- -(a * x + c) / b
    lines(x, y, col = "blue", lwd = 2)
 }
 ea_plot()
 ```
 ```{r plot-weights-pdf}
 pdf("ea-plot_static-delta_one.pdf", width = 8, height = 6)
 ea_plot()
 dev.off()
 ```
 ### Evaluation
 Gaussian (Mutate All) with a high number of individuals generally acheives a correct result first-try. In our very constrained individuum pool tests with 3 individuals, this also seems to be the best choice for parameters for the EA. Its efficacy seems to increase even more with a larger gene pool. 
 ## Aufgabe 3: Exploration vs Exploitation
 ```{r setup}
 # generate a random binary vector of length 10
 generate_individual <- function() {
    sample(c(0, 1), 10, replace = TRUE)
 }
 # 1-point crossover
 one_point_crossover <- function(parent1, parent2) {
    point <- sample(1:9, 1)
    child1 <- c(parent1[1:point], parent2[(point + 1):10])
    child2 <- c(parent2[1:point], parent1[(point + 1):10])
    return(list(child1, child2))
 }
 # 2-point crossover
 two_point_crossover <- function(parent1, parent2) {
    points <- sort(sample(1:9, 2))
    child1 <- c(parent1[1:points[1]], parent2[(points[1] + 1):points[2]], parent1[(points[2] + 1):10])
    child2 <- c(parent2[1:points[1]], parent1[(points[1] + 1):points[2]], parent2[(points[2] + 1):10])
    return(list(child1, child2))
 }
 # uniform crossover
 uniform_crossover <- function(parent1, parent2) {
    mask <- sample(c(0, 1), 10, replace = TRUE)
    child1 <- ifelse(mask == 1, parent1, parent2)
    child2 <- ifelse(mask == 1, parent2, parent1)
    return(list(child1, child2))
 }
 # Function to convert binary vector to decimal
 binary_to_decimal <- function(binary_vector) {
    sum(binary_vector * 2^(rev(seq_along(binary_vector) - 1)))
 }
 ```
 ```{r initialize_parents}
 # init parents
 parent1 <- generate_individual()
 parent2 <- generate_individual()
 print(parent1)
 print(parent2)
 # Set num gens
 num_generations <- 10000
 # for tracking unique solutions
 unique_solutions <- list()
 unique_solutions[[binary_to_decimal(parent1)]] <- TRUE
 unique_solutions[[binary_to_decimal(parent2)]] <- TRUE
 ```
 ```{r run_genetic_algorithm}
 for (generation in 1:num_generations) {
    # Choose crossover method
    crossover_method <- sample(c("one_point", "two_point", "uniform"), 1)
    if (crossover_method == "one_point") {
        children <- one_point_crossover(parent1, parent2)
    } else if (crossover_method == "two_point") {
        children <- two_point_crossover(parent1, parent2)
    } else {
        children <- uniform_crossover(parent1, parent2)
    }
    # Replace
    parent1 <- children[[1]]
    parent2 <- children[[2]]
    # Track sols
    unique_solutions[[binary_to_decimal(parent1)]] <- TRUE
    unique_solutions[[binary_to_decimal(parent2)]] <- TRUE
 }
 # Calculate number of unique solutions observed
 num_unique_solutions <- length(unique_solutions)
 # Calculate number of possible solutions covered
 num_possible_solutions <- 2^10
 num_solutions_covered <- length(unique_solutions)
 # Print results
 cat("Number of unique solutions observed:", num_unique_solutions, "\n")
 cat("Number of possible solutions covered:", num_solutions_covered, "\n")
 cat("Percentage of 10-bit numbers covered:", round(num_solutions_covered / num_possible_solutions * 100, 2), "%\n")
 ```
--- a/w3/data1.txt
+++ b/w3/data1.txt
@ -0,0 +1,200 @@
 -1.32897014671217 -2.36785102664075 -1 1
 -2.15970367170725 -1.41686449781219 -1 1
 -1.38963684686937 -0.939314504497346 -1 1
 -2.39486126072160 0.156941452755162 -1 1
 -2.06264712461684 -0.804412097745957 -1 1
 -1.59995934139993 -2.07087560267702 -1 1
 -0.301759430691359 -0.780246221467475 -1 1
 -1.30880048251936 -0.655267889174166 -1 1
 -2.13821265199027 -3.26960613438387 -1 1
 -0.725161331033519 -0.677840073724945 -1 1
 -2.73393355319214 -0.76718258286048 -1 1
 -1.54347592440629 -0.712018637866198 -1 1
 -2.56545772113774 -2.86936228227077 -1 1
 -1.43201933507797 -2.13302840936145 -1 1
 -2.07330667024688 -3.72123784804042 -1 1
 -2.84033507635568 -0.243184822799128 -1 1
 -1.47403941210529 -0.549328960464295 -1 1
 -1.83738457125925 -2.66717286904658 -1 1
 -2.27871321216998 -2.98058889038791 -1 1
 -1.95615460430846 -1.67004701470848 -1 1
 -2.60073487762731 -2.20452775815597 -1 1
 -0.26734293595561 -1.71331299152070 -1 1
 -3.47839627261639 -2.83115791470682 -1 1
 -1.19695437101501 -0.945609834844714 -1 1
 -2.78998139170657 0.257675391308978 -1 1
 -2.12464531272405 0.777501391183755 -1 1
 -2.31932391039648 -1.86277899921771 -1 1
 -5.54482177865753 -2.70213117761077 -1 1
 -2.78021553510943 -3.03301792798881 -1 1
 -1.60768417956298 -1.74172015222199 -1 1
 -3.61446849343074 -1.80335879930914 -1 1
 -3.23422019981727 -2.70249791326027 -1 1
 -3.02682979962044 -1.74278869754663 -1 1
 -2.41276478154580 -0.776567065003342 -1 1
 -1.75552935518460 -1.91038140781957 -1 1
 -0.139451116184016 -1.18482537954623 -1 1
 -2.95650175314386 -1.29101568834187 -1 1
 -2.9489793179245 -1.61904473503749 -1 1
 -2.60315833182049 -3.18621980659633 -1 1
 -1.46831479870983 -3.80905675128548 -1 1
 -0.905005395061582 -2.03556983376913 -1 1
 -0.901078142728159 -0.825017845358332 -1 1
 -0.665652556519657 -1.32550835429996 -1 1
 -1.46112641743637 -1.97574413713737 -1 1
 -1.06457534745575 -2.785521609747 -1 1
 -1.34420271056785 -1.06666579289930 -1 1
 -2.07631856650808 -1.93542983825108 -1 1
 -2.35690175063823 -3.26600979191016 -1 1
 -1.7056897160519 -3.04297084229814 -1 1
 -2.80704309897473 -1.53106686645482 -1 1
 -2.44711856977661 -2.5849302819368 -1 1
 -1.51065176293650 -0.86509385564769 -1 1
 -1.94090434314101 -0.634401545904208 -1 1
 -2.04270534190753 -0.361097103415342 -1 1
 -1.06607471507957 -3.29065512487734 -1 1
 -3.24060617216703 -2.26876252288359 -1 1
 -0.0486425864884459 -1.13673076708325 -1 1
 -0.266739979422662 -0.878999257393973 -1 1
 -0.420637663314567 -0.482675855002418 -1 1
 -1.06406981568800 -4.17713862737802 -1 1
 -0.625197602089241 -2.91030552103365 -1 1
 -3.27562750609849 -2.30760870403296 -1 1
 -0.499947053765473 -3.29272300439147 -1 1
 -2.91674268064002 -1.68191356080486 -1 1
 -2.17888247623848 -0.662447607788639 -1 1
 -2.09290120372439 -0.593566611901163 -1 1
 -1.25264885418333 -3.199747175357 -1 1
 -2.00339086767013 -0.708453667782978 -1 1
 -1.36640247016023 -0.609207709185745 -1 1
 -0.911401862275114 -2.55455598535213 -1 1
 -1.64339071909895 -2.70101342303568 -1 1
 -1.71423981174520 -2.33624402853916 -1 1
 0.199702636780847 -1.83962146056716 -1 1
 -1.18400256670017 -2.30370589782562 -1 1
 -2.80977943163309 -0.908178300835772 -1 1
 -2.13569173985164 -0.91407304459949 -1 1
 -0.700888732479868 -1.74846788054676 -1 1
 -3.14027577143708 -0.902886138153785 -1 1
 -0.681225799170511 -0.525783857770332 -1 1
 -2.08732062127811 -1.89701037917116 -1 1
 -0.0586057242746669 -4.04053972551628 -1 1
 -1.88077968783775 0.421666644295287 -1 1
 -1.37795480435589 -1.77199347049285 -1 1
 -1.41637459799429 -2.44011696127162 -1 1
 -2.77060878833344 -0.929345155427266 -1 1
 -2.19214219615774 -1.98905330321631 -1 1
 -2.52816178990139 -2.2585972967922 -1 1
 -2.16234859219211 -1.60438927644781 -1 1
 -1.62177037851373 -0.99010930185921 -1 1
 -3.23318182763954 -1.13658991149595 -1 1
 -0.372235271791896 -2.34620587435379 -1 1
 -1.5001555577351 -1.95434020917647 -1 1
 -3.52894478305670 -2.96387692953025 -1 1
 -3.6191496287414 -0.799179012881697 -1 1
 -1.95049705463410 -1.76753962185022 -1 1
 -2.99928621186237 -1.39904916882386 -1 1
 -2.00475587442074 -2.36844162821365 -1 1
 -0.830570944902423 -1.45228655753697 -1 1
 -1.49328010078687 -2.08341021079596 -1 1
 -1.99969145454271 -1.06885729770145 -1 1
 2.49460626439645 3.86875369750027 -1 -1
 0.7446494499116 1.68808804411077 -1 -1
 1.71014916961483 1.73205312306135 -1 -1
 2.53359800973500 1.6784603245221 -1 -1
 4.36539754555225 2.68376377601958 -1 -1
 1.30100189213241 0.393578903079953 -1 -1
 3.39025005470382 3.81407573113482 -1 -1
 2.99425229125324 1.55248460007409 -1 -1
 -0.697405479258973 2.66052071663056 -1 -1
 1.05485413438518 1.43026328316951 -1 -1
 2.15499286281072 2.80396478089847 -1 -1
 1.13383276710016 1.16636578938430 -1 -1
 2.40583658837318 1.71309249518999 -1 -1
 3.9529836684296 -0.0306622708202284 -1 -1
 1.44673688777537 1.69536150393561 -1 -1
 2.40653572484163 2.53986102442878 -1 -1
 0.927093361660263 2.05434503667819 -1 -1
 2.13211058846022 0.780770245015111 -1 -1
 1.52942807748731 -0.384411036254697 -1 -1
 0.0374065135699719 3.71412296025181 -1 -1
 1.32860906911755 2.4624846210011 -1 -1
 3.11790204244201 4.23424681600907 -1 -1
 1.47550988790566 1.1318875058157 -1 -1
 1.76354867452616 0.186487030093577 -1 -1
 2.72517351969596 0.867467761927136 -1 -1
 1.64871907711219 1.14947956824857 -1 -1
 3.35728840379298 3.39193733666552 -1 -1
 1.97314223473543 0.71936438292462 -1 -1
 2.46627878877807 1.85228201801042 -1 -1
 1.82782122930369 0.662112012577584 -1 -1
 0.469136899706291 2.69385034831914 -1 -1
 2.95906527151821 1.34793880046152 -1 -1
 3.1845846978969 1.90347273893573 -1 -1
 1.62972843753454 1.88240236488746 -1 -1
 1.84880143044407 1.08902891549876 -1 -1
 2.74003682637714 3.28858597224097 -1 -1
 1.54579953010360 2.58652641559206 -1 -1
 2.41806254601694 1.41997311235272 -1 -1
 0.679066628438395 2.5492871531371 -1 -1
 3.53627392920231 2.20395858016939 -1 -1
 1.91575617514485 2.04417898782122 -1 -1
 1.99849496533958 1.34313999800502 -1 -1
 3.73338748817565 2.7673871731482 -1 -1
 2.35097932569732 3.09463656920031 -1 -1
 2.44118802836801 2.77962382569914 -1 -1
 2.72291823080543 4.11301272621089 -1 -1
 -0.333578934240685 1.93864004097092 -1 -1
 1.45281983530088 1.77630338471216 -1 -1
 4.06662985060252 2.79810284586335 -1 -1
 1.18796770270894 1.94600292544487 -1 -1
 0.690665017500061 3.21726119606832 -1 -1
 2.78069373531768 1.98025053530047 -1 -1
 2.07146143234442 2.77246826065373 -1 -1
 2.36549947930286 2.31182063191906 -1 -1
 1.4782493446176 1.98015787175957 -1 -1
 0.585216947594101 1.22131489755131 -1 -1
 1.85336420511426 0.707882262279918 -1 -1
 2.41381294874122 2.24766717770949 -1 -1
 2.39771241502864 1.77055374509899 -1 -1
 0.604267049505909 3.16280325091495 -1 -1
 2.81450596467485 1.14183730585994 -1 -1
 2.93364913348434 2.64917329100802 -1 -1
 3.18414663995059 3.16608797066945 -1 -1
 3.25191092293097 3.42335827192670 -1 -1
 2.2453247048103 1.59789384315062 -1 -1
 0.875192156027067 1.54233584846762 -1 -1
 2.89266085813154 1.29408855207970 -1 -1
 0.171910769280119 3.71229856331406 -1 -1
 0.519019904060457 1.22505957089452 -1 -1
 1.81434871964992 1.27449969743080 -1 -1
 2.57908019548281 2.02392243783863 -1 -1
 1.70639994544607 0.581553546693116 -1 -1
 2.35788618976732 0.611170547764614 -1 -1
 2.25599283045020 1.32790185796492 -1 -1
 1.35040643794059 2.07276219536503 -1 -1
 1.39329211427557 2.06473061135590 -1 -1
 1.33088217468705 2.39043191559914 -1 -1
 1.73118239651758 0.655853745458392 -1 -1
 2.70919473877065 1.94105221553915 -1 -1
 2.1448373798777 2.19455127482444 -1 -1
 0.974221061163073 0.780324226340574 -1 -1
 1.60717332452036 2.90271704500523 -1 -1
 1.78505625028714 0.29413215901734 -1 -1
 1.26508124491513 2.92214327142116 -1 -1
 0.653893256808685 1.48139537994700 -1 -1
 1.94673646103508 1.51013334653874 -1 -1
 1.37651122485238 3.31676717137167 -1 -1
 0.760451278123538 0.979923184771868 -1 -1
 1.28292094213002 2.66135701016817 -1 -1
 0.536342053807597 1.85100087363587 -1 -1
 1.18225844069278 0.39263971309868 -1 -1
 2.5717715638596 2.84462847265310 -1 -1
 2.42383258110959 0.666237598132384 -1 -1
 0.747348132067155 1.47264064251741 -1 -1
 0.838865215496192 1.76783636185541 -1 -1
 4.52610068296141 1.46232793550644 -1 -1
 1.06513040906981 2.40015863837706 -1 -1
 2.1234030850167 1.67672406529316 -1 -1
 3.93362700547878 1.77953431787149 -1 -1
 5.240508093196 3.19583522613743 -1 -1
--- a/w3/ea-plot_static-delta.pdf
+++ b/w3/ea-plot_static-delta.pdf
--- a/w3/ea-plot_static-delta_all.pdf
+++ b/w3/ea-plot_static-delta_all.pdf
--- a/w3/ea-plot_static-delta_one.pdf
+++ b/w3/ea-plot_static-delta_one.pdf
--- a/w3/ea-plot_static-gaussian_all.pdf
+++ b/w3/ea-plot_static-gaussian_all.pdf
--- a/w3/ea-plot_static-gaussian_one.pdf
+++ b/w3/ea-plot_static-gaussian_one.pdf
--- a/w3/evoalgs-w3.zip
+++ b/w3/evoalgs-w3.zip
--- a/w3/perceptron-plot.pdf
+++ b/w3/perceptron-plot.pdf
--- a/w3/perceptron-plot_3-adjustments.pdf
+++ b/w3/perceptron-plot_3-adjustments.pdf
--- a/w3/w3.Rproj
+++ b/w3/w3.Rproj
@ -0,0 +1,13 @@
 Version: 1.0
 RestoreWorkspace: Default
 SaveWorkspace: Default
 AlwaysSaveHistory: Default
 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
 NumSpacesForTab: 2
 Encoding: UTF-8
 RnwWeave: Sweave
 LaTeX: pdfLaTeX
--- a/w7/neg.example.RData
+++ b/w7/neg.example.RData
--- a/w7/pos.example.RData
+++ b/w7/pos.example.RData
--- a/w7/w7.Rmd
+++ b/w7/w7.Rmd
@ -0,0 +1,293 @@
 ```{R fsm}
 # Define FSM class
 FSM <- setRefClass("FSM",
  fields = list(
    states = "list",
    transitions = "list",
    start_state = "character",
    accept_states = "character",
    input_alphabet = "character",
    output_alphabet = "character"
  ),
  methods = list(
    initialize = function(states, transitions, start_state, accept_states) {
      states <<- states
      transitions <<- transitions
      start_state <<- start_state
      accept_states <<- accept_states
      input_alphabet <<- letters
      output_alphabet <<- c("0", "1")
    },
    change_output = function(state, new_output) {
      states[[state]]$output <<- new_output
    },
    modify_transition = function(from_state, input, to_state) {
      transitions[[from_state]][[input]] <<- to_state
    },
    add_state = function(state_name, output) {
      if (length(states) < 10) {
        states[[state_name]] <<- list(output = output)
        transitions[[state_name]] <<- list()
      }
    },
    remove_state = function(state_name) {
      if (length(states) <= 2 || state_name == start_state) {
        return(FALSE) # Don't remove if it's the start state or if we have 2 or fewer states
      }
      states[[state_name]] <<- NULL
      transitions[[state_name]] <<- NULL
      for (state in names(transitions)) {
        transitions[[state]] <<- transitions[[state]][names(transitions[[state]]) != state_name]
      }
      # If we removed an accept state, choose a new one if necessary
      if (state_name %in% accept_states) {
        accept_states <<- setdiff(accept_states, state_name)
        if (length(accept_states) == 0) {
          accept_states <<- sample(names(states), 1)
        }
      }
      # Redirect any transitions that went to the removed state
      for (state in names(transitions)) {
        for (input in names(transitions[[state]])) {
          if (transitions[[state]][[input]] == state_name) {
            transitions[[state]][[input]] <<- sample(setdiff(names(states), state), 1)
          }
        }
      }
      return(TRUE)
    },
    change_start_state = function(new_start_state) {
      start_state <<- new_start_state
    },
    run = function(input_string) {
      if (nchar(input_string) == 0) {
        return(states[[start_state]]$output)
      }
      current_state <- start_state
      for (char in strsplit(input_string, "")[[1]]) {
        if (!is.null(transitions[[current_state]][[char]])) {
          current_state <- transitions[[current_state]][[char]]
        }
        # Stay in current state if no transition is defined
      }
      if (is.null(states[[current_state]]) || is.null(states[[current_state]]$output)) {
        warning("Invalid final state or missing output")
        warning(sprintf("Current state: %s", current_state))
        return("0") # Default output
      }
      return(states[[current_state]]$output)
    },
    copy = function() {
      FSM$new(
        states = lapply(states, function(x) list(output = x$output)),
        transitions = lapply(transitions, function(x) as.list(x)),
        start_state = start_state,
        accept_states = accept_states
      )
    },
    check = function() {
      # Check if start state exists
      if (!(start_state %in% names(states))) {
        cat("Error: Start state does not exist\n")
        return(FALSE)
      }
      # Check if all accept states exist
      for (accept_state in accept_states) {
        if (!(accept_state %in% names(states))) {
          cat("Error: Accept state", accept_state, "does not exist\n")
          return(FALSE)
        }
      }
      # Check if all transitions are valid
      for (from_state in names(transitions)) {
        if (!(from_state %in% names(states))) {
          cat("Error: Transition from non-existent state", from_state, "\n")
          return(FALSE)
        }
        for (input in names(transitions[[from_state]])) {
          to_state <- transitions[[from_state]][[input]]
          if (!(to_state %in% names(states))) {
            cat("Error: Transition to non-existent state", to_state, "\n")
            return(FALSE)
          }
        }
      }
      # Check if all states have an output
      for (state in names(states)) {
        if (is.null(states[[state]]$output)) {
          cat("Error: State", state, "has no output\n")
          return(FALSE)
        }
      }
      # cat("FSM is well-formed\n")
      return(TRUE)
    }
  ),
 )
 # Create a new FSM
 create_fsm <- function() {
  num_states <- sample(2:10, 1)
  state_names <- paste0("S", 1:num_states)
  states <- lapply(state_names, function(s) {
    list(output = sample(c("0", "1"), 1))
  })
  names(states) <- state_names
  transitions <- lapply(state_names, function(s) {
    num_transitions <- max(1, sample(1:10, 1)) # Ensure at least one transition
    inputs <- sample(letters, num_transitions, replace = TRUE)
    to_states <- sample(state_names, num_transitions, replace = TRUE)
    trans <- setNames(to_states, inputs)
    return(as.list(trans))
  })
  names(transitions) <- state_names
  start_state <- sample(state_names, 1)
  accept_states <- sample(state_names, 1)
  FSM$new(states, transitions, start_state, accept_states)
 }
 # Mutation operation
 mutate_fsm <- function(fsm) {
  mutation_type <- sample(1:5, 1)
  new_fsm <- fsm$copy()
  if (mutation_type == 2 && length(new_fsm$states) <= 2) {
    if (mutation_type == 3 && length(new_fsm$states) >= 10) {
      mutation_type <- sample(c(1, 4, 5), 1)
    } else {
      mutation_type <- sample(c(1, 3, 4, 5), 1)
    }
  } else if (mutation_type == 3 && length(new_fsm$states) >= 10) {
    sample(c(1, 2, 4, 5), 1)
  }
  cat(mutation_type)
  if (mutation_type == 1) {
    # Change output signal
    state <- sample(names(new_fsm$states), 1)
    new_output <- sample(new_fsm$output_alphabet, 1)
    new_fsm$change_output(state, new_output)
  } else if (mutation_type == 2) {
    # Modify transition
    from_state <- sample(names(new_fsm$states), 1)
    input <- sample(new_fsm$input_alphabet, 1)
    to_state <- sample(names(new_fsm$states), 1)
    new_fsm$modify_transition(from_state, input, to_state)
  } else if (mutation_type == 3 && length(new_fsm$states) < 10) {
    # Add state
    new_state <- paste0("S", length(new_fsm$states) + 1)
    new_output <- sample(new_fsm$output_alphabet, 1)
    new_fsm$add_state(new_state, new_output)
    # Add at least one transition to/from the new state
    from_state <- sample(c(names(new_fsm$states), new_state), 1)
    to_state <- sample(c(names(new_fsm$states), new_state), 1)
    input <- sample(new_fsm$input_alphabet, 1)
    new_fsm$modify_transition(from_state, input, to_state)
  } else if (mutation_type == 4 && length(new_fsm$states) > 2) {
    # Remove state
    state_to_remove <- sample(names(new_fsm$states), 1)
    new_fsm$remove_state(state_to_remove)
  } else if (mutation_type == 5) {
    # Change start state
    new_start_state <- sample(names(new_fsm$states), 1)
    new_fsm$change_start_state(new_start_state)
  }
  new_fsm$check()
  return(new_fsm)
 }
 # Evaluate FSM fitness
 evaluate_fitness <- function(fsm, pos_examples, neg_examples) {
  correct <- 0
  total <- length(pos_examples) + length(neg_examples)
  for (example in pos_examples) {
    if (fsm$run(example) == "1") correct <- correct + 1
  }
  for (example in neg_examples) {
    if (fsm$run(example) == "0") correct <- correct + 1
  }
  return(correct / total)
 }
 # Evolutionary Algorithm
 evolve_fsm <- function(pos_examples, neg_examples, generations = 100) {
  population <- replicate(10, create_fsm())
  wlog <- list()
  for (gen in 1:generations) {
    # Create children through mutation
    children <- lapply(population, mutate_fsm)
    # Evaluate fitness of all FSMs
    cat("eval...\n")
    all_fsms <- c(population, children)
    fitness_scores <- sapply(all_fsms, evaluate_fitness, pos_examples, neg_examples)
    cat("max fitness: ")
    cat(max(fitness_scores))
    cat("\n")
    # Select the best 10 FSMs for the next generation
    population <- all_fsms[order(fitness_scores, decreasing = TRUE)[1:10]]
    # Calculate statistics for the current generation
    state_nums <- sapply(population, function(fsm) length(fsm$states))
    transition_nums <- sapply(population, function(fsm) sum(sapply(fsm$transitions, function(trans) length(trans))))
    wlog[[gen]] <- list(
      generation = gen,
      best_fitness = max(fitness_scores),
      worst_fitness = min(fitness_scores),
      average_fitness = mean(fitness_scores),
      median_fitness = median(fitness_scores),
      max_state_num = max(state_nums),
      min_state_num = min(state_nums),
      avg_state_num = mean(state_nums),
      median_state_num = median(state_nums),
      max_transition_num = max(transition_nums),
      min_transition_num = min(transition_nums),
      avg_transition_num = mean(transition_nums),
      median_transition_num = median(transition_nums)
    )
    cat(sprintf("Generation %d: Best Fitness = %.4f, Avg Fitness = %.4f\n", gen, max(fitness_scores), mean(fitness_scores)))
    cat(sprintf("               Min Fitness = %.4f, Std Dev = %.4f\n", min(fitness_scores), sd(fitness_scores)))
    cat(sprintf("               Median Fitness = %.4f\n", median(fitness_scores)))
    cat(sprintf("               Max States = %d, Min States = %d\n", max(state_nums), min(state_nums)))
    cat(sprintf("               Avg States = %.2f, Median States = %.1f\n", mean(state_nums), median(state_nums)))
    cat(sprintf("               Max Transitions = %d, Min Transitions = %d\n", max(transition_nums), min(transition_nums)))
    cat(sprintf("               Avg Transitions = %.2f, Median Transitions = %.1f\n", mean(transition_nums), median(transition_nums)))
  }
  # Return the best FSM
  best_fsm <- population[[which.max(sapply(population, evaluate_fitness, pos_examples, neg_examples))]]
  return(list(res = best_fsm, wlog = wlog))
 }
 # Load example data
 load("pos.example.RData")
 load("neg.example.RData")
 # Run the evolutionary algorithm
 best_fsm <- evolve_fsm(pos.examples, neg.examples)
 ```
--- a/w7/w7.Rproj
+++ b/w7/w7.Rproj
@ -0,0 +1,13 @@
 Version: 1.0
 RestoreWorkspace: Default
 SaveWorkspace: Default
 AlwaysSaveHistory: Default
 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
 NumSpacesForTab: 2
 Encoding: UTF-8
 RnwWeave: Sweave
 LaTeX: pdfLaTeX
Author	SHA1	Message	Date
Yandrik	10dc453116	feat: w7	2024-06-24 12:13:13 +02:00
Yandrik	5bdab9b8ab	feat: w3 done	2024-05-20 16:14:55 +02:00