# 1. Load Required Libraries # Install these first using install.packages() if you haven't already library(dplyr) library(neuralnet) library(caret) library(fastDummies) # 2. Simulate the Dataset (Based on your Knowledge Graph of variables) # In reality, you would load this via: data <- read.csv("pediatric_or_data.csv") set.seed(42) n_samples <- 500 data <- data.frame( # Patient Demographics (Continuous) age_months = runif(n_samples, 1, 144), weight_kg = runif(n_samples, 3, 40), # Comorbidities & History (Yes/No -> Binary categorical) prev_cardiac_surgery = sample(c("Yes", "No"), n_samples, replace = TRUE), down_syndrome = sample(c("Yes", "No"), n_samples, replace = TRUE), # Surgery Specifics procedure_type = sample(c("ASD_Closure", "Glenn", "Fontan", "TGA_Repair"), n_samples, replace = TRUE), open_heart = sample(c("Yes", "No"), n_samples, replace = TRUE), # Team Experience (Continuous) lead_surgeon_years = runif(n_samples, 5, 30), # Target Variable: Total OR Duration in Minutes or_duration_mins = runif(n_samples, 120, 480) ) # 3. Pre-Processing: One-Hot Encoding for Yes/No & Categorical Variables # Neural networks cannot read text. This turns "Yes/No" into 1s and 0s. data_encoded <- dummy_cols(data, select_columns = c("prev_cardiac_surgery", "down_syndrome", "procedure_type", "open_heart"), remove_first_dummy = TRUE, # Prevents perfect collinearity remove_selected_columns = TRUE) # 4. Pre-Processing: Normalize/Scale Data # Neural networks fail if scales are mismatched (e.g., age in months vs. weight in kg) normalize <- function(x) { return ((x - min(x)) / (max(x) - min(x))) } data_normalized <- as.data.frame(lapply(data_encoded, normalize)) # 5. Train / Test Split (80% Training, 20% Testing) train_index <- createDataPartition(data_normalized$or_duration_mins, p = 0.8, list = FALSE) train_set <- data_normalized[train_index, ] test_set <- data_normalized[-train_index, ] # 6. Build the Neural Network # We dynamically grab all column names to build the formula: target ~ var1 + var2 + ... n <- names(train_set) f <- as.formula(paste("or_duration_mins ~", paste(n[!n %in% "or_duration_mins"], collapse = " + "))) # Train the model (2 hidden layers: 5 nodes then 3 nodes) # linear.output = TRUE because we are predicting a continuous number (minutes), not classifying nn_model <- neuralnet(f, data = train_set, hidden = c(5, 3), linear.output = TRUE, stepmax = 1e6) # Plot the neural network to visualize the nodes and weights # plot(nn_model) # 7. Predict & Evaluate on Test Data predictions <- compute(nn_model, test_set[, !(names(test_set) == "or_duration_mins")]) # De-normalize predictions to get actual minutes back min_duration <- min(data$or_duration_mins) max_duration <- max(data$or_duration_mins) predicted_mins <- (predictions$net.result * (max_duration - min_duration)) + min_duration actual_mins <- (test_set$or_duration_mins * (max_duration - min_duration)) + min_duration # Calculate Error (Mean Absolute Error) mae <- mean(abs(predicted_mins - actual_mins)) print(paste("Mean Absolute Error in Minutes:", round(mae, 2)))