diff --git a/w7/neg.example.RData b/w7/neg.example.RData new file mode 100644 index 0000000..d38ea87 Binary files /dev/null and b/w7/neg.example.RData differ diff --git a/w7/pos.example.RData b/w7/pos.example.RData new file mode 100644 index 0000000..b77c7ef Binary files /dev/null and b/w7/pos.example.RData differ diff --git a/w7/w7.Rmd b/w7/w7.Rmd new file mode 100644 index 0000000..129ac09 --- /dev/null +++ b/w7/w7.Rmd @@ -0,0 +1,293 @@ +```{R fsm} +# Define FSM class +FSM <- setRefClass("FSM", + fields = list( + states = "list", + transitions = "list", + start_state = "character", + accept_states = "character", + input_alphabet = "character", + output_alphabet = "character" + ), + methods = list( + initialize = function(states, transitions, start_state, accept_states) { + states <<- states + transitions <<- transitions + start_state <<- start_state + accept_states <<- accept_states + input_alphabet <<- letters + output_alphabet <<- c("0", "1") + }, + change_output = function(state, new_output) { + states[[state]]$output <<- new_output + }, + modify_transition = function(from_state, input, to_state) { + transitions[[from_state]][[input]] <<- to_state + }, + add_state = function(state_name, output) { + if (length(states) < 10) { + states[[state_name]] <<- list(output = output) + transitions[[state_name]] <<- list() + } + }, + remove_state = function(state_name) { + if (length(states) <= 2 || state_name == start_state) { + return(FALSE) # Don't remove if it's the start state or if we have 2 or fewer states + } + + states[[state_name]] <<- NULL + transitions[[state_name]] <<- NULL + for (state in names(transitions)) { + transitions[[state]] <<- transitions[[state]][names(transitions[[state]]) != state_name] + } + + # If we removed an accept state, choose a new one if necessary + if (state_name %in% accept_states) { + accept_states <<- setdiff(accept_states, state_name) + if (length(accept_states) == 0) { + accept_states <<- sample(names(states), 1) + } + } + + # Redirect any transitions that went to the removed state + for (state in names(transitions)) { + for (input in names(transitions[[state]])) { + if (transitions[[state]][[input]] == state_name) { + transitions[[state]][[input]] <<- sample(setdiff(names(states), state), 1) + } + } + } + + return(TRUE) + }, + change_start_state = function(new_start_state) { + start_state <<- new_start_state + }, + run = function(input_string) { + if (nchar(input_string) == 0) { + return(states[[start_state]]$output) + } + + current_state <- start_state + for (char in strsplit(input_string, "")[[1]]) { + if (!is.null(transitions[[current_state]][[char]])) { + current_state <- transitions[[current_state]][[char]] + } + # Stay in current state if no transition is defined + } + + if (is.null(states[[current_state]]) || is.null(states[[current_state]]$output)) { + warning("Invalid final state or missing output") + warning(sprintf("Current state: %s", current_state)) + return("0") # Default output + } + + return(states[[current_state]]$output) + }, + copy = function() { + FSM$new( + states = lapply(states, function(x) list(output = x$output)), + transitions = lapply(transitions, function(x) as.list(x)), + start_state = start_state, + accept_states = accept_states + ) + }, + check = function() { + # Check if start state exists + if (!(start_state %in% names(states))) { + cat("Error: Start state does not exist\n") + return(FALSE) + } + + # Check if all accept states exist + for (accept_state in accept_states) { + if (!(accept_state %in% names(states))) { + cat("Error: Accept state", accept_state, "does not exist\n") + return(FALSE) + } + } + + # Check if all transitions are valid + for (from_state in names(transitions)) { + if (!(from_state %in% names(states))) { + cat("Error: Transition from non-existent state", from_state, "\n") + return(FALSE) + } + for (input in names(transitions[[from_state]])) { + to_state <- transitions[[from_state]][[input]] + if (!(to_state %in% names(states))) { + cat("Error: Transition to non-existent state", to_state, "\n") + return(FALSE) + } + } + } + + # Check if all states have an output + for (state in names(states)) { + if (is.null(states[[state]]$output)) { + cat("Error: State", state, "has no output\n") + return(FALSE) + } + } + + # cat("FSM is well-formed\n") + return(TRUE) + } + ), +) + +# Create a new FSM +create_fsm <- function() { + num_states <- sample(2:10, 1) + state_names <- paste0("S", 1:num_states) + + states <- lapply(state_names, function(s) { + list(output = sample(c("0", "1"), 1)) + }) + names(states) <- state_names + + transitions <- lapply(state_names, function(s) { + num_transitions <- max(1, sample(1:10, 1)) # Ensure at least one transition + inputs <- sample(letters, num_transitions, replace = TRUE) + to_states <- sample(state_names, num_transitions, replace = TRUE) + trans <- setNames(to_states, inputs) + return(as.list(trans)) + }) + names(transitions) <- state_names + + start_state <- sample(state_names, 1) + accept_states <- sample(state_names, 1) + + FSM$new(states, transitions, start_state, accept_states) +} + +# Mutation operation +mutate_fsm <- function(fsm) { + mutation_type <- sample(1:5, 1) + new_fsm <- fsm$copy() + + if (mutation_type == 2 && length(new_fsm$states) <= 2) { + if (mutation_type == 3 && length(new_fsm$states) >= 10) { + mutation_type <- sample(c(1, 4, 5), 1) + } else { + mutation_type <- sample(c(1, 3, 4, 5), 1) + } + } else if (mutation_type == 3 && length(new_fsm$states) >= 10) { + sample(c(1, 2, 4, 5), 1) + } + + cat(mutation_type) + + if (mutation_type == 1) { + # Change output signal + state <- sample(names(new_fsm$states), 1) + new_output <- sample(new_fsm$output_alphabet, 1) + new_fsm$change_output(state, new_output) + } else if (mutation_type == 2) { + # Modify transition + from_state <- sample(names(new_fsm$states), 1) + input <- sample(new_fsm$input_alphabet, 1) + to_state <- sample(names(new_fsm$states), 1) + new_fsm$modify_transition(from_state, input, to_state) + } else if (mutation_type == 3 && length(new_fsm$states) < 10) { + # Add state + new_state <- paste0("S", length(new_fsm$states) + 1) + new_output <- sample(new_fsm$output_alphabet, 1) + new_fsm$add_state(new_state, new_output) + # Add at least one transition to/from the new state + from_state <- sample(c(names(new_fsm$states), new_state), 1) + to_state <- sample(c(names(new_fsm$states), new_state), 1) + input <- sample(new_fsm$input_alphabet, 1) + new_fsm$modify_transition(from_state, input, to_state) + } else if (mutation_type == 4 && length(new_fsm$states) > 2) { + # Remove state + state_to_remove <- sample(names(new_fsm$states), 1) + new_fsm$remove_state(state_to_remove) + } else if (mutation_type == 5) { + # Change start state + new_start_state <- sample(names(new_fsm$states), 1) + new_fsm$change_start_state(new_start_state) + } + + new_fsm$check() + + return(new_fsm) +} + +# Evaluate FSM fitness +evaluate_fitness <- function(fsm, pos_examples, neg_examples) { + correct <- 0 + total <- length(pos_examples) + length(neg_examples) + + for (example in pos_examples) { + if (fsm$run(example) == "1") correct <- correct + 1 + } + for (example in neg_examples) { + if (fsm$run(example) == "0") correct <- correct + 1 + } + + return(correct / total) +} + +# Evolutionary Algorithm +evolve_fsm <- function(pos_examples, neg_examples, generations = 100) { + population <- replicate(10, create_fsm()) + + wlog <- list() + + for (gen in 1:generations) { + # Create children through mutation + children <- lapply(population, mutate_fsm) + + # Evaluate fitness of all FSMs + cat("eval...\n") + all_fsms <- c(population, children) + fitness_scores <- sapply(all_fsms, evaluate_fitness, pos_examples, neg_examples) + cat("max fitness: ") + cat(max(fitness_scores)) + cat("\n") + + # Select the best 10 FSMs for the next generation + population <- all_fsms[order(fitness_scores, decreasing = TRUE)[1:10]] + + # Calculate statistics for the current generation + state_nums <- sapply(population, function(fsm) length(fsm$states)) + transition_nums <- sapply(population, function(fsm) sum(sapply(fsm$transitions, function(trans) length(trans)))) + + wlog[[gen]] <- list( + generation = gen, + best_fitness = max(fitness_scores), + worst_fitness = min(fitness_scores), + average_fitness = mean(fitness_scores), + median_fitness = median(fitness_scores), + max_state_num = max(state_nums), + min_state_num = min(state_nums), + avg_state_num = mean(state_nums), + median_state_num = median(state_nums), + max_transition_num = max(transition_nums), + min_transition_num = min(transition_nums), + avg_transition_num = mean(transition_nums), + median_transition_num = median(transition_nums) + ) + + cat(sprintf("Generation %d: Best Fitness = %.4f, Avg Fitness = %.4f\n", gen, max(fitness_scores), mean(fitness_scores))) + cat(sprintf(" Min Fitness = %.4f, Std Dev = %.4f\n", min(fitness_scores), sd(fitness_scores))) + cat(sprintf(" Median Fitness = %.4f\n", median(fitness_scores))) + cat(sprintf(" Max States = %d, Min States = %d\n", max(state_nums), min(state_nums))) + cat(sprintf(" Avg States = %.2f, Median States = %.1f\n", mean(state_nums), median(state_nums))) + cat(sprintf(" Max Transitions = %d, Min Transitions = %d\n", max(transition_nums), min(transition_nums))) + cat(sprintf(" Avg Transitions = %.2f, Median Transitions = %.1f\n", mean(transition_nums), median(transition_nums))) + } + + # Return the best FSM + best_fsm <- population[[which.max(sapply(population, evaluate_fitness, pos_examples, neg_examples))]] + return(list(res = best_fsm, wlog = wlog)) +} + +# Load example data +load("pos.example.RData") +load("neg.example.RData") + +# Run the evolutionary algorithm +best_fsm <- evolve_fsm(pos.examples, neg.examples) +``` \ No newline at end of file diff --git a/w7/w7.Rproj b/w7/w7.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/w7/w7.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX