Compare commits

...

2 Commits

Author SHA1 Message Date
10dc453116 feat: w7 2024-06-24 12:13:13 +02:00
5bdab9b8ab feat: w3 done 2024-05-20 16:14:55 +02:00
16 changed files with 952 additions and 0 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
**/.Rhistory **/.Rhistory
**/.RData **/.RData
**/.Ruserdata **/.Ruserdata
.Rproj.user

432
w3/README.rmd Normal file
View File

@ -0,0 +1,432 @@
# Übungsblatt 3 - Evolutionary Algorithms (Paul Helstab & Yannik Bretschneider)
## Loading Data
```{r setwd}
# Get the current working directory
current_directory <- getwd()
# Define the folder name to check
folder_name <- "w3"
# Construct the full path to the folder
folder_path <- file.path(current_directory, folder_name)
# Check if the folder exists (means most likely in the repo root folder)
if (dir.exists(folder_path)) {
setwd(folder_path)
cat("Changed working directory to:", getwd(), "\n")
} else {
cat("Folder 'w3' does not exist in the current working directory.
Staying where we are\n")
}
```
```{r read_data}
file_path <- "./data1.txt"
print(paste0("read data from '", file_path, "'"))
data <- read.table(
file_path,
header = FALSE,
sep = " ",
# colClasses = c("numeric", "numeric", "integer", "integer")
)
colnames(data) <- c("x1", "x2", "x_bias", "label")
evaluation_set_idx <- c(90:109)
```
```r
# possible, but unnecessary here:
# grab first 10 as evaluation set
evaluation_set <- data[evaluation_set_idx, ]
# grab the rest as training set
training_set <- data[-evaluation_set_idx, ]
```
## Aufgabe 1: Perzeptron
```{r peceptron-impl}
# randomized order
random_order <- sample(nrow(data))
vecs <- data[random_order, 1:3]
classes <- data[random_order, 4]
weights <- c(0, 0, 0)
# matrix for logging weights
wlog <- matrix(weights, ncol = 3)
infer <- function(x, wghts) {
return(sign(x %*% wghts))
}
perceptron_infer <- function(x) {
return(infer(x, weights))
}
while (TRUE) {
mispredictions <- 0
for (i in seq_len(nrow(vecs))) {
vec <- as.numeric(vecs[i, ])
class <- classes[i]
# print(vec %*% weights)
error <- as.numeric(class - perceptron_infer(vec))
if (error != 0) {
mispredictions <- mispredictions + 1
print("adjusting")
weights <- weights + error * vec
wlog <- rbind(wlog, weights)
}
}
if (mispredictions == 0) {
print("No more mispredictions")
break
} else {
print(paste0("Misclassifications: ", mispredictions))
}
}
print("NOTE: for more interesting plots, run this chunk multiple times until multiple \"adjusting\" can be seen in the output")
```
```{r apply-to-data}
perceptron_reinfer <- t(sapply(seq_len(nrow(data)), function(i) {
return(c(as.numeric(data[i, 1:3]), perceptron_infer((as.numeric(data[i, 1:3])))))
}))
# perceptron_reinfer
```
### Plots
The plots show the datapoints on a 2D plot, with the colors showing the classes (`green := 1` and `red := -1`)
The separation plane defined by the normal of the weight vector is shown in increasing opacities, until the final
weights are reached.
```{r plot-weights}
perceptron_plot <- function() {
plot(data$x1, data$x2,
col = ifelse(data$label == 1, "green", "red"),
pch = 16, xlab = "x1", ylab = "x2", xlim = c(-5, 5), ylim = c(-5, 5)
)
legend("topright",
legend = c("Class 1", "Class -1"),
col = c("green", "red"), pch = 16
)
# Plot the separation planes
num_steps <- nrow(wlog)
alpha_step <- 1 / num_steps
for (i in 1:num_steps) {
a <- wlog[i, 1]
b <- wlog[i, 2]
c <- wlog[i, 3]
# Calculate the line coordinates
x <- seq(-5, 5, length.out = 100)
y <- -(a * x + c) / b
# Determine the line color based on the iteration
alpha <- i * alpha_step
line_color <- rgb(0, 0, 1, alpha)
# Plot the line
lines(x, y, col = line_color)
}
# Plot the final separation plane
a <- weights[1]
b <- weights[2]
c <- weights[3]
x <- seq(-5, 5, length.out = 100)
y <- -(a * x + c) / b
lines(x, y, col = "blue", lwd = 2)
}
perceptron_plot()
```
```{r plot-weights-pdf}
pdf("perceptron-plot.pdf", width = 8, height = 6)
perceptron_plot()
dev.off()
```
## Aufgabe 2: Perzeptron Evolutionary Algorithm (EA)
```{r peceptron-ea-impl}
# randomized order
ea_random_order <- sample(nrow(data))
ea_vecs <- data[ea_random_order, 1:3]
ea_classes <- data[ea_random_order, 4]
NUM_INDIVIDUALS <- 3
MUTATE_ALL_GENES <- FALSE
MUTATE_STEP <- function() {
return(0.5)
}
# MUTATE_STEP <- function() {
# return(runif(1, min = -1, max = 1)[1])
# }
weights <- matrix(0, ncol = 3, nrow = NUM_INDIVIDUALS)
# matrix for logging weights
ea_wlog <- matrix(0, ncol = 3)
fittest <- c(0, 0, 0)
mutate <- function(genes, step_size_fn, mutate_all) {
if (mutate_all == TRUE) {
for (i in seq_along(genes)) {
step <- step_size_fn() * sample(c(1, -1), size = 1)[1] # mutation in step_size, -step_size or not at all
genes[i] <- genes[i] + step
}
} else {
idx_to_mutate <- sample(seq_along(genes), size = 1)[1]
genes[idx_to_mutate] <- genes[idx_to_mutate] + step_size_fn() * sample(c(1, -1), size = 1)[1] # mutation in step_size or not AT all
}
return(genes)
}
infer <- function(x, wghts) {
return(sign(x %*% wghts))
}
fitness <- function(wghts) {
mispredictions <- 0
for (i in seq_len(nrow(ea_vecs))) {
vec <- as.numeric(ea_vecs[i, ])
class <- ea_classes[i]
# print(vec %*% weights)
error <- as.numeric(class - infer(vec, wghts))
if (error != 0) {
mispredictions <- mispredictions + 1
}
}
return(mispredictions)
}
while (TRUE) {
i_fittest <- weights[1, ]
i_fit <- fitness(weights[1, ])
for (individual in 2:NUM_INDIVIDUALS) {
this_fitness <- fitness(weights[individual, ])
if (this_fitness < i_fit) {
i_fittest <- weights[individual, ]
i_fit <- this_fitness
}
}
# add fittest to weights
ea_wlog <- rbind(ea_wlog, i_fittest)
if (i_fit == 0) {
print("No more mispredictions")
fittest <- i_fittest
break
} else {
print(paste0("Misclassifications: ", i_fit))
for (individual in 1:NUM_INDIVIDUALS) {
weights[individual, ] <- mutate(i_fittest, step_size_fn = MUTATE_STEP, mutate_all = MUTATE_ALL_GENES)
}
}
}
```
```{r ea-apply-to-data}
ea_reinfer <- t(sapply(seq_len(nrow(data)), function(i) {
return(c(as.numeric(data[i, 1:3]), infer((as.numeric(data[i, 1:3])), fittest)))
}))
# ea_reinfer
```
### Plots
The plots show the datapoints on a 2D plot, with the colors showing the classes (`green := 1` and `red := -1`)
The separation plane defined by the normal of the weight vector is shown in increasing opacities, until the final
weights are reached.
```{r ea-plot-weights}
ea_plot <- function() {
plot(data$x1, data$x2,
col = ifelse(data$label == 1, "green", "red"),
pch = 16, xlab = "x1", ylab = "x2", xlim = c(-5, 5), ylim = c(-5, 5)
)
legend("topright",
legend = c("Class 1", "Class -1"),
col = c("green", "red"), pch = 16
)
# Plot the separation planes
num_steps <- nrow(ea_wlog)
alpha_step <- 1 / num_steps
for (i in 1:num_steps) {
a <- ea_wlog[i, 1]
b <- ea_wlog[i, 2]
c <- ea_wlog[i, 3]
# Calculate the line coordinates
x <- seq(-5, 5, length.out = 100)
y <- -(a * x + c) / b
# Determine the line color based on the iteration
alpha <- i * alpha_step
line_color <- rgb(0, 0, 1, alpha)
# Plot the line
lines(x, y, col = line_color)
}
# Plot the final separation plane
a <- fittest[1]
b <- fittest[2]
c <- fittest[3]
x <- seq(-5, 5, length.out = 100)
y <- -(a * x + c) / b
lines(x, y, col = "blue", lwd = 2)
}
ea_plot()
```
```{r plot-weights-pdf}
pdf("ea-plot_static-delta_one.pdf", width = 8, height = 6)
ea_plot()
dev.off()
```
### Evaluation
Gaussian (Mutate All) with a high number of individuals generally acheives a correct result first-try. In our very constrained individuum pool tests with 3 individuals, this also seems to be the best choice for parameters for the EA. Its efficacy seems to increase even more with a larger gene pool.
## Aufgabe 3: Exploration vs Exploitation
```{r setup}
# generate a random binary vector of length 10
generate_individual <- function() {
sample(c(0, 1), 10, replace = TRUE)
}
# 1-point crossover
one_point_crossover <- function(parent1, parent2) {
point <- sample(1:9, 1)
child1 <- c(parent1[1:point], parent2[(point + 1):10])
child2 <- c(parent2[1:point], parent1[(point + 1):10])
return(list(child1, child2))
}
# 2-point crossover
two_point_crossover <- function(parent1, parent2) {
points <- sort(sample(1:9, 2))
child1 <- c(parent1[1:points[1]], parent2[(points[1] + 1):points[2]], parent1[(points[2] + 1):10])
child2 <- c(parent2[1:points[1]], parent1[(points[1] + 1):points[2]], parent2[(points[2] + 1):10])
return(list(child1, child2))
}
# uniform crossover
uniform_crossover <- function(parent1, parent2) {
mask <- sample(c(0, 1), 10, replace = TRUE)
child1 <- ifelse(mask == 1, parent1, parent2)
child2 <- ifelse(mask == 1, parent2, parent1)
return(list(child1, child2))
}
# Function to convert binary vector to decimal
binary_to_decimal <- function(binary_vector) {
sum(binary_vector * 2^(rev(seq_along(binary_vector) - 1)))
}
```
```{r initialize_parents}
# init parents
parent1 <- generate_individual()
parent2 <- generate_individual()
print(parent1)
print(parent2)
# Set num gens
num_generations <- 10000
# for tracking unique solutions
unique_solutions <- list()
unique_solutions[[binary_to_decimal(parent1)]] <- TRUE
unique_solutions[[binary_to_decimal(parent2)]] <- TRUE
```
```{r run_genetic_algorithm}
for (generation in 1:num_generations) {
# Choose crossover method
crossover_method <- sample(c("one_point", "two_point", "uniform"), 1)
if (crossover_method == "one_point") {
children <- one_point_crossover(parent1, parent2)
} else if (crossover_method == "two_point") {
children <- two_point_crossover(parent1, parent2)
} else {
children <- uniform_crossover(parent1, parent2)
}
# Replace
parent1 <- children[[1]]
parent2 <- children[[2]]
# Track sols
unique_solutions[[binary_to_decimal(parent1)]] <- TRUE
unique_solutions[[binary_to_decimal(parent2)]] <- TRUE
}
# Calculate number of unique solutions observed
num_unique_solutions <- length(unique_solutions)
# Calculate number of possible solutions covered
num_possible_solutions <- 2^10
num_solutions_covered <- length(unique_solutions)
# Print results
cat("Number of unique solutions observed:", num_unique_solutions, "\n")
cat("Number of possible solutions covered:", num_solutions_covered, "\n")
cat("Percentage of 10-bit numbers covered:", round(num_solutions_covered / num_possible_solutions * 100, 2), "%\n")
```

200
w3/data1.txt Normal file
View File

@ -0,0 +1,200 @@
-1.32897014671217 -2.36785102664075 -1 1
-2.15970367170725 -1.41686449781219 -1 1
-1.38963684686937 -0.939314504497346 -1 1
-2.39486126072160 0.156941452755162 -1 1
-2.06264712461684 -0.804412097745957 -1 1
-1.59995934139993 -2.07087560267702 -1 1
-0.301759430691359 -0.780246221467475 -1 1
-1.30880048251936 -0.655267889174166 -1 1
-2.13821265199027 -3.26960613438387 -1 1
-0.725161331033519 -0.677840073724945 -1 1
-2.73393355319214 -0.76718258286048 -1 1
-1.54347592440629 -0.712018637866198 -1 1
-2.56545772113774 -2.86936228227077 -1 1
-1.43201933507797 -2.13302840936145 -1 1
-2.07330667024688 -3.72123784804042 -1 1
-2.84033507635568 -0.243184822799128 -1 1
-1.47403941210529 -0.549328960464295 -1 1
-1.83738457125925 -2.66717286904658 -1 1
-2.27871321216998 -2.98058889038791 -1 1
-1.95615460430846 -1.67004701470848 -1 1
-2.60073487762731 -2.20452775815597 -1 1
-0.26734293595561 -1.71331299152070 -1 1
-3.47839627261639 -2.83115791470682 -1 1
-1.19695437101501 -0.945609834844714 -1 1
-2.78998139170657 0.257675391308978 -1 1
-2.12464531272405 0.777501391183755 -1 1
-2.31932391039648 -1.86277899921771 -1 1
-5.54482177865753 -2.70213117761077 -1 1
-2.78021553510943 -3.03301792798881 -1 1
-1.60768417956298 -1.74172015222199 -1 1
-3.61446849343074 -1.80335879930914 -1 1
-3.23422019981727 -2.70249791326027 -1 1
-3.02682979962044 -1.74278869754663 -1 1
-2.41276478154580 -0.776567065003342 -1 1
-1.75552935518460 -1.91038140781957 -1 1
-0.139451116184016 -1.18482537954623 -1 1
-2.95650175314386 -1.29101568834187 -1 1
-2.9489793179245 -1.61904473503749 -1 1
-2.60315833182049 -3.18621980659633 -1 1
-1.46831479870983 -3.80905675128548 -1 1
-0.905005395061582 -2.03556983376913 -1 1
-0.901078142728159 -0.825017845358332 -1 1
-0.665652556519657 -1.32550835429996 -1 1
-1.46112641743637 -1.97574413713737 -1 1
-1.06457534745575 -2.785521609747 -1 1
-1.34420271056785 -1.06666579289930 -1 1
-2.07631856650808 -1.93542983825108 -1 1
-2.35690175063823 -3.26600979191016 -1 1
-1.7056897160519 -3.04297084229814 -1 1
-2.80704309897473 -1.53106686645482 -1 1
-2.44711856977661 -2.5849302819368 -1 1
-1.51065176293650 -0.86509385564769 -1 1
-1.94090434314101 -0.634401545904208 -1 1
-2.04270534190753 -0.361097103415342 -1 1
-1.06607471507957 -3.29065512487734 -1 1
-3.24060617216703 -2.26876252288359 -1 1
-0.0486425864884459 -1.13673076708325 -1 1
-0.266739979422662 -0.878999257393973 -1 1
-0.420637663314567 -0.482675855002418 -1 1
-1.06406981568800 -4.17713862737802 -1 1
-0.625197602089241 -2.91030552103365 -1 1
-3.27562750609849 -2.30760870403296 -1 1
-0.499947053765473 -3.29272300439147 -1 1
-2.91674268064002 -1.68191356080486 -1 1
-2.17888247623848 -0.662447607788639 -1 1
-2.09290120372439 -0.593566611901163 -1 1
-1.25264885418333 -3.199747175357 -1 1
-2.00339086767013 -0.708453667782978 -1 1
-1.36640247016023 -0.609207709185745 -1 1
-0.911401862275114 -2.55455598535213 -1 1
-1.64339071909895 -2.70101342303568 -1 1
-1.71423981174520 -2.33624402853916 -1 1
0.199702636780847 -1.83962146056716 -1 1
-1.18400256670017 -2.30370589782562 -1 1
-2.80977943163309 -0.908178300835772 -1 1
-2.13569173985164 -0.91407304459949 -1 1
-0.700888732479868 -1.74846788054676 -1 1
-3.14027577143708 -0.902886138153785 -1 1
-0.681225799170511 -0.525783857770332 -1 1
-2.08732062127811 -1.89701037917116 -1 1
-0.0586057242746669 -4.04053972551628 -1 1
-1.88077968783775 0.421666644295287 -1 1
-1.37795480435589 -1.77199347049285 -1 1
-1.41637459799429 -2.44011696127162 -1 1
-2.77060878833344 -0.929345155427266 -1 1
-2.19214219615774 -1.98905330321631 -1 1
-2.52816178990139 -2.2585972967922 -1 1
-2.16234859219211 -1.60438927644781 -1 1
-1.62177037851373 -0.99010930185921 -1 1
-3.23318182763954 -1.13658991149595 -1 1
-0.372235271791896 -2.34620587435379 -1 1
-1.5001555577351 -1.95434020917647 -1 1
-3.52894478305670 -2.96387692953025 -1 1
-3.6191496287414 -0.799179012881697 -1 1
-1.95049705463410 -1.76753962185022 -1 1
-2.99928621186237 -1.39904916882386 -1 1
-2.00475587442074 -2.36844162821365 -1 1
-0.830570944902423 -1.45228655753697 -1 1
-1.49328010078687 -2.08341021079596 -1 1
-1.99969145454271 -1.06885729770145 -1 1
2.49460626439645 3.86875369750027 -1 -1
0.7446494499116 1.68808804411077 -1 -1
1.71014916961483 1.73205312306135 -1 -1
2.53359800973500 1.6784603245221 -1 -1
4.36539754555225 2.68376377601958 -1 -1
1.30100189213241 0.393578903079953 -1 -1
3.39025005470382 3.81407573113482 -1 -1
2.99425229125324 1.55248460007409 -1 -1
-0.697405479258973 2.66052071663056 -1 -1
1.05485413438518 1.43026328316951 -1 -1
2.15499286281072 2.80396478089847 -1 -1
1.13383276710016 1.16636578938430 -1 -1
2.40583658837318 1.71309249518999 -1 -1
3.9529836684296 -0.0306622708202284 -1 -1
1.44673688777537 1.69536150393561 -1 -1
2.40653572484163 2.53986102442878 -1 -1
0.927093361660263 2.05434503667819 -1 -1
2.13211058846022 0.780770245015111 -1 -1
1.52942807748731 -0.384411036254697 -1 -1
0.0374065135699719 3.71412296025181 -1 -1
1.32860906911755 2.4624846210011 -1 -1
3.11790204244201 4.23424681600907 -1 -1
1.47550988790566 1.1318875058157 -1 -1
1.76354867452616 0.186487030093577 -1 -1
2.72517351969596 0.867467761927136 -1 -1
1.64871907711219 1.14947956824857 -1 -1
3.35728840379298 3.39193733666552 -1 -1
1.97314223473543 0.71936438292462 -1 -1
2.46627878877807 1.85228201801042 -1 -1
1.82782122930369 0.662112012577584 -1 -1
0.469136899706291 2.69385034831914 -1 -1
2.95906527151821 1.34793880046152 -1 -1
3.1845846978969 1.90347273893573 -1 -1
1.62972843753454 1.88240236488746 -1 -1
1.84880143044407 1.08902891549876 -1 -1
2.74003682637714 3.28858597224097 -1 -1
1.54579953010360 2.58652641559206 -1 -1
2.41806254601694 1.41997311235272 -1 -1
0.679066628438395 2.5492871531371 -1 -1
3.53627392920231 2.20395858016939 -1 -1
1.91575617514485 2.04417898782122 -1 -1
1.99849496533958 1.34313999800502 -1 -1
3.73338748817565 2.7673871731482 -1 -1
2.35097932569732 3.09463656920031 -1 -1
2.44118802836801 2.77962382569914 -1 -1
2.72291823080543 4.11301272621089 -1 -1
-0.333578934240685 1.93864004097092 -1 -1
1.45281983530088 1.77630338471216 -1 -1
4.06662985060252 2.79810284586335 -1 -1
1.18796770270894 1.94600292544487 -1 -1
0.690665017500061 3.21726119606832 -1 -1
2.78069373531768 1.98025053530047 -1 -1
2.07146143234442 2.77246826065373 -1 -1
2.36549947930286 2.31182063191906 -1 -1
1.4782493446176 1.98015787175957 -1 -1
0.585216947594101 1.22131489755131 -1 -1
1.85336420511426 0.707882262279918 -1 -1
2.41381294874122 2.24766717770949 -1 -1
2.39771241502864 1.77055374509899 -1 -1
0.604267049505909 3.16280325091495 -1 -1
2.81450596467485 1.14183730585994 -1 -1
2.93364913348434 2.64917329100802 -1 -1
3.18414663995059 3.16608797066945 -1 -1
3.25191092293097 3.42335827192670 -1 -1
2.2453247048103 1.59789384315062 -1 -1
0.875192156027067 1.54233584846762 -1 -1
2.89266085813154 1.29408855207970 -1 -1
0.171910769280119 3.71229856331406 -1 -1
0.519019904060457 1.22505957089452 -1 -1
1.81434871964992 1.27449969743080 -1 -1
2.57908019548281 2.02392243783863 -1 -1
1.70639994544607 0.581553546693116 -1 -1
2.35788618976732 0.611170547764614 -1 -1
2.25599283045020 1.32790185796492 -1 -1
1.35040643794059 2.07276219536503 -1 -1
1.39329211427557 2.06473061135590 -1 -1
1.33088217468705 2.39043191559914 -1 -1
1.73118239651758 0.655853745458392 -1 -1
2.70919473877065 1.94105221553915 -1 -1
2.1448373798777 2.19455127482444 -1 -1
0.974221061163073 0.780324226340574 -1 -1
1.60717332452036 2.90271704500523 -1 -1
1.78505625028714 0.29413215901734 -1 -1
1.26508124491513 2.92214327142116 -1 -1
0.653893256808685 1.48139537994700 -1 -1
1.94673646103508 1.51013334653874 -1 -1
1.37651122485238 3.31676717137167 -1 -1
0.760451278123538 0.979923184771868 -1 -1
1.28292094213002 2.66135701016817 -1 -1
0.536342053807597 1.85100087363587 -1 -1
1.18225844069278 0.39263971309868 -1 -1
2.5717715638596 2.84462847265310 -1 -1
2.42383258110959 0.666237598132384 -1 -1
0.747348132067155 1.47264064251741 -1 -1
0.838865215496192 1.76783636185541 -1 -1
4.52610068296141 1.46232793550644 -1 -1
1.06513040906981 2.40015863837706 -1 -1
2.1234030850167 1.67672406529316 -1 -1
3.93362700547878 1.77953431787149 -1 -1
5.240508093196 3.19583522613743 -1 -1

BIN
w3/ea-plot_static-delta.pdf Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
w3/evoalgs-w3.zip Normal file

Binary file not shown.

BIN
w3/perceptron-plot.pdf Normal file

Binary file not shown.

Binary file not shown.

13
w3/w3.Rproj Normal file
View File

@ -0,0 +1,13 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX

BIN
w7/neg.example.RData Normal file

Binary file not shown.

BIN
w7/pos.example.RData Normal file

Binary file not shown.

293
w7/w7.Rmd Normal file
View File

@ -0,0 +1,293 @@
```{R fsm}
# Define FSM class
FSM <- setRefClass("FSM",
fields = list(
states = "list",
transitions = "list",
start_state = "character",
accept_states = "character",
input_alphabet = "character",
output_alphabet = "character"
),
methods = list(
initialize = function(states, transitions, start_state, accept_states) {
states <<- states
transitions <<- transitions
start_state <<- start_state
accept_states <<- accept_states
input_alphabet <<- letters
output_alphabet <<- c("0", "1")
},
change_output = function(state, new_output) {
states[[state]]$output <<- new_output
},
modify_transition = function(from_state, input, to_state) {
transitions[[from_state]][[input]] <<- to_state
},
add_state = function(state_name, output) {
if (length(states) < 10) {
states[[state_name]] <<- list(output = output)
transitions[[state_name]] <<- list()
}
},
remove_state = function(state_name) {
if (length(states) <= 2 || state_name == start_state) {
return(FALSE) # Don't remove if it's the start state or if we have 2 or fewer states
}
states[[state_name]] <<- NULL
transitions[[state_name]] <<- NULL
for (state in names(transitions)) {
transitions[[state]] <<- transitions[[state]][names(transitions[[state]]) != state_name]
}
# If we removed an accept state, choose a new one if necessary
if (state_name %in% accept_states) {
accept_states <<- setdiff(accept_states, state_name)
if (length(accept_states) == 0) {
accept_states <<- sample(names(states), 1)
}
}
# Redirect any transitions that went to the removed state
for (state in names(transitions)) {
for (input in names(transitions[[state]])) {
if (transitions[[state]][[input]] == state_name) {
transitions[[state]][[input]] <<- sample(setdiff(names(states), state), 1)
}
}
}
return(TRUE)
},
change_start_state = function(new_start_state) {
start_state <<- new_start_state
},
run = function(input_string) {
if (nchar(input_string) == 0) {
return(states[[start_state]]$output)
}
current_state <- start_state
for (char in strsplit(input_string, "")[[1]]) {
if (!is.null(transitions[[current_state]][[char]])) {
current_state <- transitions[[current_state]][[char]]
}
# Stay in current state if no transition is defined
}
if (is.null(states[[current_state]]) || is.null(states[[current_state]]$output)) {
warning("Invalid final state or missing output")
warning(sprintf("Current state: %s", current_state))
return("0") # Default output
}
return(states[[current_state]]$output)
},
copy = function() {
FSM$new(
states = lapply(states, function(x) list(output = x$output)),
transitions = lapply(transitions, function(x) as.list(x)),
start_state = start_state,
accept_states = accept_states
)
},
check = function() {
# Check if start state exists
if (!(start_state %in% names(states))) {
cat("Error: Start state does not exist\n")
return(FALSE)
}
# Check if all accept states exist
for (accept_state in accept_states) {
if (!(accept_state %in% names(states))) {
cat("Error: Accept state", accept_state, "does not exist\n")
return(FALSE)
}
}
# Check if all transitions are valid
for (from_state in names(transitions)) {
if (!(from_state %in% names(states))) {
cat("Error: Transition from non-existent state", from_state, "\n")
return(FALSE)
}
for (input in names(transitions[[from_state]])) {
to_state <- transitions[[from_state]][[input]]
if (!(to_state %in% names(states))) {
cat("Error: Transition to non-existent state", to_state, "\n")
return(FALSE)
}
}
}
# Check if all states have an output
for (state in names(states)) {
if (is.null(states[[state]]$output)) {
cat("Error: State", state, "has no output\n")
return(FALSE)
}
}
# cat("FSM is well-formed\n")
return(TRUE)
}
),
)
# Create a new FSM
create_fsm <- function() {
num_states <- sample(2:10, 1)
state_names <- paste0("S", 1:num_states)
states <- lapply(state_names, function(s) {
list(output = sample(c("0", "1"), 1))
})
names(states) <- state_names
transitions <- lapply(state_names, function(s) {
num_transitions <- max(1, sample(1:10, 1)) # Ensure at least one transition
inputs <- sample(letters, num_transitions, replace = TRUE)
to_states <- sample(state_names, num_transitions, replace = TRUE)
trans <- setNames(to_states, inputs)
return(as.list(trans))
})
names(transitions) <- state_names
start_state <- sample(state_names, 1)
accept_states <- sample(state_names, 1)
FSM$new(states, transitions, start_state, accept_states)
}
# Mutation operation
mutate_fsm <- function(fsm) {
mutation_type <- sample(1:5, 1)
new_fsm <- fsm$copy()
if (mutation_type == 2 && length(new_fsm$states) <= 2) {
if (mutation_type == 3 && length(new_fsm$states) >= 10) {
mutation_type <- sample(c(1, 4, 5), 1)
} else {
mutation_type <- sample(c(1, 3, 4, 5), 1)
}
} else if (mutation_type == 3 && length(new_fsm$states) >= 10) {
sample(c(1, 2, 4, 5), 1)
}
cat(mutation_type)
if (mutation_type == 1) {
# Change output signal
state <- sample(names(new_fsm$states), 1)
new_output <- sample(new_fsm$output_alphabet, 1)
new_fsm$change_output(state, new_output)
} else if (mutation_type == 2) {
# Modify transition
from_state <- sample(names(new_fsm$states), 1)
input <- sample(new_fsm$input_alphabet, 1)
to_state <- sample(names(new_fsm$states), 1)
new_fsm$modify_transition(from_state, input, to_state)
} else if (mutation_type == 3 && length(new_fsm$states) < 10) {
# Add state
new_state <- paste0("S", length(new_fsm$states) + 1)
new_output <- sample(new_fsm$output_alphabet, 1)
new_fsm$add_state(new_state, new_output)
# Add at least one transition to/from the new state
from_state <- sample(c(names(new_fsm$states), new_state), 1)
to_state <- sample(c(names(new_fsm$states), new_state), 1)
input <- sample(new_fsm$input_alphabet, 1)
new_fsm$modify_transition(from_state, input, to_state)
} else if (mutation_type == 4 && length(new_fsm$states) > 2) {
# Remove state
state_to_remove <- sample(names(new_fsm$states), 1)
new_fsm$remove_state(state_to_remove)
} else if (mutation_type == 5) {
# Change start state
new_start_state <- sample(names(new_fsm$states), 1)
new_fsm$change_start_state(new_start_state)
}
new_fsm$check()
return(new_fsm)
}
# Evaluate FSM fitness
evaluate_fitness <- function(fsm, pos_examples, neg_examples) {
correct <- 0
total <- length(pos_examples) + length(neg_examples)
for (example in pos_examples) {
if (fsm$run(example) == "1") correct <- correct + 1
}
for (example in neg_examples) {
if (fsm$run(example) == "0") correct <- correct + 1
}
return(correct / total)
}
# Evolutionary Algorithm
evolve_fsm <- function(pos_examples, neg_examples, generations = 100) {
population <- replicate(10, create_fsm())
wlog <- list()
for (gen in 1:generations) {
# Create children through mutation
children <- lapply(population, mutate_fsm)
# Evaluate fitness of all FSMs
cat("eval...\n")
all_fsms <- c(population, children)
fitness_scores <- sapply(all_fsms, evaluate_fitness, pos_examples, neg_examples)
cat("max fitness: ")
cat(max(fitness_scores))
cat("\n")
# Select the best 10 FSMs for the next generation
population <- all_fsms[order(fitness_scores, decreasing = TRUE)[1:10]]
# Calculate statistics for the current generation
state_nums <- sapply(population, function(fsm) length(fsm$states))
transition_nums <- sapply(population, function(fsm) sum(sapply(fsm$transitions, function(trans) length(trans))))
wlog[[gen]] <- list(
generation = gen,
best_fitness = max(fitness_scores),
worst_fitness = min(fitness_scores),
average_fitness = mean(fitness_scores),
median_fitness = median(fitness_scores),
max_state_num = max(state_nums),
min_state_num = min(state_nums),
avg_state_num = mean(state_nums),
median_state_num = median(state_nums),
max_transition_num = max(transition_nums),
min_transition_num = min(transition_nums),
avg_transition_num = mean(transition_nums),
median_transition_num = median(transition_nums)
)
cat(sprintf("Generation %d: Best Fitness = %.4f, Avg Fitness = %.4f\n", gen, max(fitness_scores), mean(fitness_scores)))
cat(sprintf(" Min Fitness = %.4f, Std Dev = %.4f\n", min(fitness_scores), sd(fitness_scores)))
cat(sprintf(" Median Fitness = %.4f\n", median(fitness_scores)))
cat(sprintf(" Max States = %d, Min States = %d\n", max(state_nums), min(state_nums)))
cat(sprintf(" Avg States = %.2f, Median States = %.1f\n", mean(state_nums), median(state_nums)))
cat(sprintf(" Max Transitions = %d, Min Transitions = %d\n", max(transition_nums), min(transition_nums)))
cat(sprintf(" Avg Transitions = %.2f, Median Transitions = %.1f\n", mean(transition_nums), median(transition_nums)))
}
# Return the best FSM
best_fsm <- population[[which.max(sapply(population, evaluate_fitness, pos_examples, neg_examples))]]
return(list(res = best_fsm, wlog = wlog))
}
# Load example data
load("pos.example.RData")
load("neg.example.RData")
# Run the evolutionary algorithm
best_fsm <- evolve_fsm(pos.examples, neg.examples)
```

13
w7/w7.Rproj Normal file
View File

@ -0,0 +1,13 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX