#' Multivariate Gaussian Process
#'
#' @param X A numeric matrix; the data.
#' @param Y A numeric matrix; the data.
#' @param kernel A kernel object, which is a named list of a list of
#' named parameters and an uninitialised kernel function.
#' @param Sigma A numeric matrix; the covariance of the noise of the data.
#' The dimension of `Sigma` should be equal to the dimension of `Y`.
#' @param ... Optional argument to pass to \code{optim}.
#'
#' @examples 
#' \dontrun{
#' X <- matrix(10 * runif(300), ncol = 3)
#' f <- function(X) {
#'   Y <- matrix(0, nrow = nrow(X), ncol = 2)
#'   Y[,1] <- sin(X[,1] + X[,2]) + 2 * X[,3]
#'   Y[,2] <- cos(X[,1] - X[,3]) + X[,2]
#'   Y
#' }
#' Y <- f(X)
#' 
#' model <- mgp(X, Y, Sigma = diag(2))
#' fitted_Y <- predict_mgp(model, X)$mean
#' head(cbind(Y, fitted_Y))
#' compare(as.numeric(Y), as.numeric(fitted_Y))
#' 
#' new_X <- matrix(10 * runif(90), ncol = 3)
#' new_Y <- f(new_X)
#' fitted_new_Y <- predict_mgp(model, new_X)$mean
#' head(cbind(new_Y, fitted_new_Y))
#' compare(as.numeric(new_Y), as.numeric(fitted_new_Y))
#' }
#'
#' @export
mgp <- function(X, Y, kernel = squared_exponential(), Sigma, ...) {
    log_det_Sigma <- log_det(Sigma)
    tr <- function(x) sum(diag(x))

    NLL <- function(param) {
        n <- nrow(Y)
        d <- ncol(Y)
        param_named <- relist(param, kernel$param)
        kern_fun <- do.call(kernel$kern_fun, param_named)
        K <- kcov(X, X, kern_fun)
        I <- diag(nrow(K))

        d * log_det(K) + n * log_det_Sigma +
            tr(solve(K, Y) %*% solve(Sigma, t(Y)))
    }
    parameters <- optim(par = unlist(kernel$param), NLL, ...)
    list(
        train_X = X, train_Y = Y,
        kernel = kernel, Sigma = Sigma,
        parameters = relist(parameters$par, kernel$param),
        optim_log = parameters
    )
}


#' Predict using the Multivariate Gaussian Process
#'
#' @param model Output from \code{gp}; the model object.
#' @param new_X A numeric matrix; the points to predict.
#'
#' @return A named list containing all the input and the
#' predictive mean vector and covariance matrix.
#' @export
predict_mgp <- function(model, new_X) {
    X <- model$train_X
    Y <- model$train_Y
    kern_fun <- do.call(model$kernel$kern_fun, model$parameters)

    K_11 <- kcov(X, X, kern_fun)
    K_12 <- kcov(X, new_X, kern_fun)
    K_21 <- t(K_12)
    K_22 <- kcov(new_X, new_X, kern_fun)

    list(
        model = model,
        new_X = new_X,
        mean = K_21 %*% solve(K_11, Y),
        covariance = K_22 - K_21 %*% solve(K_11, K_12)
    )
}
