# Reviewer comment 2: Compare GP with ANN (Fixed grid)
source("supplementary_analysis.R")
OUTPUT_FILE <- "output/GP_vs_NN.RDS"
SEED <- 1234




# Create data from DGP (Bayesian linear regression)
set.seed(SEED)
p <- 5
n <- 50
b0 <- rnorm(p)
B0 <- diag(runif(p, 0.5, 1.5))
a0 <- 30
d0 <- 3
data <- sim_bayesian_linear_regression(n, p, b0, B0, a0, d0)
X <- data$X
y <- data$y
print(data)




# - Generate training data varying b0
library(gptools2)
set.seed(SEED)
n_eval <- 100
training_X <- matrix(rnorm(n_eval * p), n_eval, p)
training_y <- training_X |>
    map_row(f = function(b0) {
        beta_samples <- gibbs_sampler(b0, B0, a0, d0, X, y)$beta
        beta_samples |>
            map_col(mcmc_sliced_mean_with_sd) |>
            reduce(join_data_with_attr(c, c, "sigma"))
    }) |>
    reduce(join_data_with_attr(rbind, rbind, "sigma"))


# - Fit training data with GP
model_gps <- training_y |>
    iter_col(\(y, j) gp(training_X, y, sigma = mean(attr(training_y, "sigma")[,j])))


# - Fit training data with NN (use 100 random initialisation with hidden layer size 2 to 10)
model_nns <- 1:100 |>
    map(\(i) map(2:10, \(size) nnet::nnet(training_X, training_y, size = size))) |>
    reduce(c)


# - Generate testing data
test_n_eval <- 100

testing_X <- matrix(rnorm(test_n_eval * p), test_n_eval, p)

testing_y_truth <- testing_X |>
    map_row(\(b0) t(posterior_mean_analytical(b0, B0, X, y))) |>
    reduce(rbind)

testing_y_gp <- model_gps |>
    map(\(model) predict_gp(model, testing_X)$mean) |>
    reduce(cbind)

testing_y_nns <- model_nns |>
    map(\(model) predict(model, testing_X))

# Pick the best NN using the testing set (intentionally) for strong competition
nn_mse <- testing_y_nns |>
    sapply(\(nn_pred_y) mse(nn_pred_y, testing_y_truth))




# - Compare the MSE of the fit on the testing data
message("Gaussian Process MSE: ", mse(testing_y_gp, testing_y_truth))
message("Neural Network MSE: ", testing_y_nns[which.min(nn_mse)])
message("Written to: ", OUTPUT_FILE)
saveRDS(list(testing_y_gp = testing_y_gp,
             testing_y_nn = testing_y_nns[which.min(nn_mse)],
             testing_y_truth = testing_y_truth,
             mse_gp = mse(testing_y_gp, testing_y_truth),
             mse_nn = min(nn_mse)),
        OUTPUT_FILE)
message("Complete")




# - Remarks
# NN is not stable. The result depends on the initial random parameters a lot,
# and it often fails to learn anything, i.e., predict 0 for everything.
# In general, NN is more suited to a data-rich regime, while GP is more suited
# to data-poor regime.
