evoalgs-r-practise/intro/dataframes.R

58 lines
1.5 KiB
R

# Creating a data frame
employee_data <- data.frame(
EmployeeID = c(1, 2, 3, 4),
Name = c("Alice", "Bob", "Charlie", "David"),
Age = c(28, 34, 29, 40),
Department = c("HR", "IT", "Marketing", "Finance"),
stringsAsFactors = FALSE
)
print("Original Data Frame:")
print(employee_data)
# Accessing data frame columns
print("Names Column:")
print(employee_data$Name)
# Accessing rows and columns using indices
print("Second Row, Third Column:")
print(employee_data[2, 3])
# Adding a new column
employee_data$Salary <- c(50000, 55000, 49000, 53000)
print("Data Frame with Salary Column:")
print(employee_data)
# Removing a column
employee_data$Age <- NULL
print("Data Frame after Removing Age Column:")
print(employee_data)
# Filtering rows
it_department <- subset(employee_data, Department == "IT")
print("Employees in IT Department:")
print(it_department)
# Summarizing data
average_salary <- mean(employee_data$Salary)
print(paste("Average Salary:", average_salary))
# Using dplyr for more advanced data frame manipulation
# Uncomment the next lines if dplyr is not installed
# install.packages("dplyr")
library(dplyr)
# Selecting specific columns with dplyr
selected_columns <- select(employee_data, Name, Salary)
print("Selected Columns:")
print(selected_columns)
# Filtering with dplyr
high_earners <- filter(employee_data, Salary > 50000)
print("High Earners:")
print(high_earners)
# Arranging rows by a column
sorted_employees <- arrange(employee_data, desc(Salary))
print("Employees Sorted by Salary:")
print(sorted_employees)