library(tidyverse)


# Set seed for reproducibility
set.seed(510)

# Simulate predictor variables; we will cover this design in the second half of the class
n <- 100  # Number of observations
X1 <- rnorm(n)                      # Continuous predictor
X2 <- rbinom(n, 1, 0.5)             # Binary predictor
X3 <- runif(n, min = 0, max = 10)   # Continuous predictor

# Generate the probability of success using a logistic model
beta_0 <- -1     # Intercept
beta_1 <- 1.5    # Coefficient for X1
beta_2 <- -0.8   # Coefficient for X2
beta_3 <- 0.4    # Coefficient for X3

logit_p <- beta_0 + beta_1 * X1 + beta_2 * X2 + beta_3 * X3
p <- exp(logit_p) / (1 + exp(logit_p))  # Convert to probability

# Simulate binary outcome Y
Y <- rbinom(n, 1, p)

# Create tibble
simulated_data <- tibble(Y = Y, X1 = X1, X2 = X2, X3 = X3)

# Fit logistic regression model
logit_model <- glm(Y ~ X1 + X2 + X3, data = simulated_data, family = binomial)

# Predict probabilities
predicted_probs <- predict(logit_model, type = "response")

# Append predicted probabilities
d = simulated_data |> bind_cols(predicted_probs)
names(d)[5] = "Predicted Probability"

# Look at the data
d |> view()

# Create two small functions that take logical vectors of prediction and truth
sensitivity = function(prediction, truth) {
  true_positive = sum(prediction == 1 & truth == 1)
  false_negative = sum(prediction == 0 & truth == 1)
  sensitivity = true_positive / (true_positive + false_negative)
  return(sensitivity)
}

specificity = function(prediction, truth) {
  true_negative = sum(prediction == 0 & truth == 0)
  false_positive = sum(prediction == 1 & truth == 0)
  specificity = true_negative / (true_negative + false_positive)
  return(specificity)
}

# Now create an ROC curve. Start with threshold 0, and go to threshold 1. Use 1000 steps, say
n_thresh = 50
# thresholds = seq(min(d$`Predicted Probability`),max(d$`Predicted Probability`),length.out = n_thresh)
# or try:
thresholds = d$`Predicted Probability` |> unique()

# For each value of "thresh", calculate sensitivity and specificity

# Start with an empty tibble
results = tibble(threshold = numeric(), sensitivity = numeric(), specificity = numeric())

for (threshold in thresholds){
  threshold
  prediction = d["Predicted Probability"]>threshold
  sens = sensitivity(prediction,d$Y)
  spec = specificity(prediction,d$Y)
  results = bind_rows(results,
                      tibble(threshold = threshold, sensitivity = sens, specificity = spec))
}

# Look a the ROC results
results |> view()

# Plot the ROC curve
results |> 
  arrange(desc(sensitivity)) |> 
  ggplot(aes(1-specificity,sensitivity,color = threshold)) + geom_point() + theme_minimal()


# Debugging
# Only works in code!


i = sample.int(12,1)

add_one_to_square = function(x){
  sq = x^2
  out = sq+1
  return(out)
}

add_one_to_square(sample.int(12,1))