--- title: "Low-Rank Canonical Correlation Analysis (LR-CCA)" author: "Eric Bridgeford" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{lrcca} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r} require(lolR) require(ggplot2) require(MASS) n=400 d=30 r=3 ``` Data for this notebook will be `n=400` examples of `d=30` dimensions. # LR-CCA ## Stacked Cigar Simulation We first visualize the first `2` dimensions: ```{r, fig.width=5} testdat <- lol.sims.cigar(n, d) X <- testdat$X Y <- testdat$Y data <- data.frame(x1=X[,1], x2=X[,2], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point() + xlab("x1") + ylab("x2") + ggtitle("Simulated Data") ``` Projecting with LR-CCA to `3` dimensions and visualizing the first `2`: ```{r, fig.width=5} result <- lol.project.lrcca(X, Y, r) data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point() + xlab("x1") + ylab("x2") + ggtitle("Projected Data using LR-CCA") ``` Projecting with LDA to `K-1=1` dimensions: ```{r, fig.width=5} liney <- MASS::lda(result$Xr, Y) result <- predict(liney, result$Xr) lhat <- 1 - sum(result$class == Y)/length(Y) data <- data.frame(x1=result$x[,1], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, fill=y)) + geom_density(adjust=1.5, alpha=0.6) + xlab("x1") + ylab("Density") + ggtitle(sprintf("LR-CCA - LDA, L = %.2f", lhat)) ``` ## Trunk Simulation We visualize the first `2` dimensions: ```{r, fig.width=5} testdat <- lol.sims.rtrunk(n, d) X <- testdat$X Y <- testdat$Y data <- data.frame(x1=X[,1], x2=X[,2], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point() + xlab("x1") + ylab("x2") + ggtitle("Simulated Data") ``` Projecting with LR-CCA to `3` dimensions and visualizing the first `2`: ```{r, fig.width=5} result <- lol.project.lrcca(X, Y, r) data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point() + xlab("x1") + ylab("x2") + ggtitle("Projected Data using LR-CCA") ``` Projecting with LDA to `K-1=1` dimensions: ```{r, fig.width=5} liney <- MASS::lda(result$Xr, Y) result <- predict(liney, result$Xr) lhat <- 1 - sum(result$class == Y)/length(Y) data <- data.frame(x1=result$x[,1], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, fill=y)) + geom_density(adjust=1.5, alpha=0.6) + xlab("x1") + ylab("Density") + ggtitle(sprintf("LRCCA-LDA, L = %.2f", lhat)) ``` ## Rotated Trunk Simulation We visualize the first `2` dimensions: ```{r, fig.width=5} testdat <- lol.sims.rtrunk(n, d, rotate=TRUE) X <- testdat$X Y <- testdat$Y data <- data.frame(x1=X[,1], x2=X[,2], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point() + xlab("x1") + ylab("x2") + ggtitle("Simulated Data") ``` Projecting with LRCCA to `3` dimensions and visualizing the first `2`: ```{r, fig.width=5} result <- lol.project.lrcca(X, Y, r) data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point() + xlab("x1") + ylab("x2") + ggtitle("Projected Data using LR-CCA") ``` Projecting with LDA to `K-1=1` dimensions: ```{r, fig.width=5} liney <- MASS::lda(result$Xr, Y) result <- predict(liney, result$Xr) lhat <- 1 - sum(result$class == Y)/length(Y) data <- data.frame(x1=result$x[,1], y=Y) data$y <- factor(data$y) ggplot(data, aes(x=x1, fill=y)) + geom_density(adjust=1.5, alpha=0.6) + xlab("x1") + ylab("Density") + ggtitle(sprintf("LRCCA-LDA, L = %.2f", lhat)) ```