## ----echo = FALSE, message = FALSE-------------------------------------------------------------------
library(tidyverse)
library(scales)
library(ISLR)
library(knitr)
library(dplyr)
library(tidyr)

theme_set(theme_bw())

## ---- echo = FALSE-----------------------------------------------------------------------------------
x <- seq(-6, 6, length.out = 1000)
ggplot() +
  geom_line(aes(x, dnorm(x, 1.25, 1)), colour = "red") +
  geom_line(aes(x, dnorm(x, -1.25, 1)), colour = "blue") +
  geom_vline(aes(xintercept = 0), lty = 2) +
  xlab("") + ylab("")


## ---- echo = FALSE-----------------------------------------------------------------------------------
df <- data.frame(x = rnorm(20, 1.25, 1), y = "1") %>%
  bind_rows(data.frame(x = rnorm(20, -1.25, 1), y = "2"))

df %>%
  group_by(y) %>%
  summarise(mu = mean(x), pi = n()/nrow(df)) %>%
  data.frame() -> ests

ests$sigma2 <- df %>%
  left_join(ests) %>%
  mutate(summand = (x - mu)^2) %>%
  group_by(y) %>%
  summarise(summand = sum(summand)) %>%
  summarise(sigma2 = sum(summand)/(nrow(df) - nrow(ests))) %>% .$sigma2


ggplot() +
  geom_histogram(aes(x, group = y, fill = y), data = df, bins = 30, position = "dodge") +
  geom_vline(aes(xintercept = 0), lty = 2) +
  geom_vline(aes(xintercept = sum(ests$mu)/2))


## ---- echo = FALSE-----------------------------------------------------------------------------------
## test data
test_df <- data.frame(x = rnorm(20000, 1.25, 1), y = "1") %>%
  bind_rows(data.frame(x = rnorm(20000, -1.25, 1), y = "2"))

test_df %>%
  mutate(pred = as.numeric(x <= sum(ests$mu)/2) + 1) %>%
  select(y, pred) %>%
  table() -> confusion

test_df %>%
  mutate(pred = as.numeric(x <= 0) + 1) %>%
  select(y, pred) %>%
  table() -> confusion_bayes

confusion


## ---- echo = FALSE-----------------------------------------------------------------------------------
library(mvtnorm)


## ---- echo = FALSE, cache = TRUE---------------------------------------------------------------------
expand.grid(x1 = seq(-10, 10, length.out = 1000), 
            x2 = seq(-10, 10, length.out = 1000)) %>%
  data.frame() %>%
  mutate(indep = dmvnorm(.),
         corr = dmvnorm(., mean = c(0, 0), sigma = matrix(c(1, .5, .5, 1), ncol = 2))) %>% 
  gather(density, f, indep, corr) %>% 
  ggplot() +
  geom_contour(aes(x1, x2, z = f)) +
  facet_wrap(.~density)


## ---- message = FALSE--------------------------------------------------------------------------------
library(MASS) # package containing lda function
lda_fit <- lda(default ~ student + balance, data = Default)
lda_fit

# training data confusion matrix
table(predict(lda_fit)$class, Default$default)


## ----------------------------------------------------------------------------------------------------
table(predict(lda_fit)$posterior[, "Yes"] > 0.2, Default$default)


## ---- echo = FALSE, cache = TRUE, fig.height=2-------------------------------------------------------
get_errors <- function(model, y, tau) {
  conf <- table(predict(model)$posterior[, 2] > tau, y)

  data.frame(error_1 = conf[2, 1]/sum(conf[, 1]),
             error_2 = conf[1, 2]/sum(conf[, 2]),
             error_tot = (conf[2, 1] + conf[1, 2])/sum(conf))
}

res <- data.frame()
for(tau in seq(0.0001, 0.5, by = .0001)) {
  res <- rbind(res, data.frame(threshold = tau, get_errors(lda_fit, Default$default, tau)))
}

res %>%
  gather(error, value, -threshold) %>%
  ggplot() +
  geom_line(aes(threshold, value, colour = error))


## ---- echo = FALSE, cache=TRUE, fig.height = 2-------------------------------------------------------
## make fake data
mu_1 <- c(-1, -1)
mu_2 <- c(1, 1)
sigma_1 <- matrix(c(1, .5, .5, 1), nrow = 2)
sigma_2 <- matrix(c(1, -.5, -.5, 1), nrow = 2)

# lda will be better because this data is generated from the same sigma
lda_better_train <- data.frame(class = "1", rmvnorm(100, mu_1, sigma_1)) %>%
  bind_rows(data.frame(class = "2", rmvnorm(100, mu_2, sigma_1)))

# lqa will be better because this data is generated from different sigmas
qda_better_train <- data.frame(class = "1", rmvnorm(100, mu_1, sigma_1)) %>%
  bind_rows(data.frame(class = "2", rmvnorm(100, mu_2, sigma_2)))

names(lda_better_train) <- names(qda_better_train) <- c("class", "x1", "x2")

## models -- lda and qga fit
lda_fit_lda_better <- lda(class ~ x1 + x2, data = lda_better_train)
qda_fit_lda_better <- qda(class ~ x1 + x2, data = lda_better_train)
lda_fit_qda_better <- lda(class ~ x1 + x2, data = qda_better_train)
qda_fit_qda_better <- qda(class ~ x1 + x2, data = qda_better_train)

## function to calculate the Bayes classifier boundary
delta <- function(x, mu, sigma, pi) {
  -0.5 * t(x) %*% solve(sigma) %*% x + t(x) %*% solve(sigma) %*% mu - 0.5 * t(mu) %*% solve(sigma) %*% mu - 0.5 * log(sum(diag(sigma))) + log(pi)
}

## create plot data to separate the space
expand.grid(x1 = seq(-4, 4, length.out = 100), 
            x2 = seq(-4, 4, length.out = 100)) %>%
  data.frame() -> plot_dat

## lda better situation data  
lda_plot_dat <- data.frame(plot_dat, 
                           delta_1 = apply(plot_dat, 1, delta, mu = mu_1, sigma = sigma_1, pi = 0.5),
                           delta_2 = apply(plot_dat, 1, delta, mu = mu_2, sigma = sigma_1, pi = 0.5)) %>%
  mutate(bayes_classifier = ifelse(delta_1 >= delta_2, "1", "2")) %>%
  mutate(lda_classifier = as.character(predict(lda_fit_lda_better, plot_dat)$class)) %>%
  mutate(qda_classifier = as.character(predict(qda_fit_lda_better, plot_dat)$class))
  
## qda better situation data
qda_plot_dat <- data.frame(plot_dat, 
                           delta_1 = apply(plot_dat, 1, delta, mu = mu_1, sigma = sigma_1, pi = 0.5),
                           delta_2 = apply(plot_dat, 1, delta, mu = mu_2, sigma = sigma_2, pi = 0.5)) %>%
  mutate(bayes_classifier = ifelse(delta_1 >= delta_2, "1", "2")) %>%
  mutate(lda_classifier = as.character(predict(lda_fit_qda_better, plot_dat)$class)) %>%
  mutate(qda_classifier = as.character(predict(qda_fit_qda_better, plot_dat)$class))

## make plots
lda_plot_dat %>%
  dplyr::select(-delta_1, -delta_2) %>%
  gather(classifier, class, -x1, -x2) %>%
  ggplot() +
  geom_tile(aes(x1, x2, fill = class), alpha = 0.5) +
  facet_wrap(.~classifier) +
  geom_point(aes(x1, x2, colour = class), data = lda_better_train)
  
qda_plot_dat %>%
  dplyr::select(-delta_1, -delta_2) %>%
  gather(classifier, class, -x1, -x2) %>%
  ggplot() +
  geom_tile(aes(x1, x2, fill = class), alpha = 0.5) +
  facet_wrap(.~classifier) +
  geom_point(aes(x1, x2, colour = class), data = qda_better_train)


## ---- echo = FALSE-----------------------------------------------------------------------------------
library(class)


## ---- echo = FALSE, cache = TRUE, fig.show='hold', out.width="50%", fig.height=5---------------------
## make fake data
mu_1 <- c(-1, 1)
mu_2 <- c(2, -1)
mu_3 <- c(-3, .1)
mu_4 <- c(1, -1)
sigma_1 <- matrix(c(1, .5, .5, 1), nrow = 2)
sigma_2 <- matrix(c(1, -.5, -.5, 1), nrow = 2)

sample_mixture2 <- function(n, mu_1, mu_2, sigma, p) {
  z <- rbinom(n, 1, p)
  z*rmvnorm(n, mu_1, sigma) + (1 - z)*rmvnorm(n, mu_2, sigma) 
}

d_mixture2 <- function(x, mu_1, mu_2, sigma, p) {
  p*dmvnorm(x, mu_1, sigma) + (1 - p)*dmvnorm(x, mu_2, sigma) 
}

# training data from the mixture
train <- data.frame(class = "1", sample_mixture2(100, mu_1, mu_2, sigma_1, 0.2)) %>%
  bind_rows(data.frame(class = "2", sample_mixture2(100, mu_3, mu_4, sigma_1, 0.7)))

names(train) <- c("class", "x1", "x2")

## function to calculate the Bayes classifier boundary
bayes_classifier <- function(x) {
  as.character(as.numeric(d_mixture2(x, mu_1, mu_2, sigma_1, 0.2)*0.5 < d_mixture2(x, mu_3, mu_3, sigma_1, 0.7)*0.5) + 1)
}

## create plot data to separate the space
expand.grid(x1 = seq(-6, 6, length.out = 500), 
            x2 = seq(-6, 6, length.out = 500)) %>%
  data.frame() -> plot_dat

## knn plots for each k
for(k in c(1, 10, 100)) {
  ## fit knn
  knn_fit <- knn(train[, -1], plot_dat, train$class, k = k)
  
  knn_plot_dat <- plot_dat %>%
    mutate(class = as.character(knn_fit))
  
  ## make plots
  knn_plot_dat %>%
    ggplot() +
    geom_tile(aes(x1, x2, fill = class), alpha = 0.5) +
    geom_point(aes(x1, x2, colour = class), data = train) +
    ggtitle(paste("KNN, K =", k)) +
    theme(text = element_text(size = 20), legend.position = 'hide') -> p
  
  print(p)
}

plot_dat %>%
  mutate(class = apply(plot_dat, 1, bayes_classifier)) %>%
  ggplot() +
  geom_tile(aes(x1, x2, fill = class), alpha = 0.5) +
  geom_point(aes(x1, x2, colour = class), data = train) +
  ggtitle("Bayes Classifier") +
  theme(text = element_text(size = 20))