a) Change your working directory. Try the commands help(c) and help(matrix).

#setwd("...")
#help(c)
#help(matrix)

b) Calculate the affine transformation \(y = xA^{-1} + b\), where

\[\mathbf{A} = \left(\begin{array} {rrr} 2 & 1 & 5 \\ -2 & 7 & 0 \\ 5 & -8 & -1 \end{array}\right), \mathbf{x^T} = \left(\begin{array} {rrr} 8 \\ -4 \\ 2 \end{array}\right), \mathbf{b^T} = \left(\begin{array} {rrr} 3 \\ 10 \\ -19 \end{array}\right). \] ##

A <- matrix(c(2,1,5,-2,7,0,5,-8,-1),ncol=3,byrow=TRUE)
A
##      [,1] [,2] [,3]
## [1,]    2    1    5
## [2,]   -2    7    0
## [3,]    5   -8   -1
x1 <- c(8,-4,2)
x1
## [1]  8 -4  2
x2 <- matrix(c(8,-4,2),nrow=1)
x2
##      [,1] [,2] [,3]
## [1,]    8   -4    2
b <- c(3,10,-19)
b
## [1]   3  10 -19
x1%*% solve(A) + b
##          [,1]     [,2]      [,3]
## [1,] 3.774775 11.45946 -17.12613
x2%*% solve(A) + b
##          [,1]     [,2]      [,3]
## [1,] 3.774775 11.45946 -17.12613
solve(A)%*%x1  + b # Note that R does not give an error here
##            [,1]
## [1,]   2.729730
## [2,]   9.351351
## [3,] -17.162162
#y4 <- solve(A)%*%x2  + b # Here, R gives an error

# Note that when multiplying vectors and matrices, the safe way
# is to always use variables of the class matrix

c) Install the package mvtnorm and load the corresponding functions to your workspace. Set the seed to 123 using the command set.seed(123). Generate 100 observations from a two dimensional normal distribution with expected value \(\mu\) and covariance matrix \(\Sigma\). Visualize the observations.

\[\mathbf{\mu} = \left(\begin{array} {rrr} 3 \\ 1 \end{array}\right), \mathbf{\Sigma} = \left(\begin{array} {rrr} 4 & 1 \\ 1 & 2 \end{array}\right) \]

set.seed(123)

#install.packages("mvtnorm") # Run this only once

library(mvtnorm) # Run this after restarting RStudio

n <- 100
mu <- c(3,1)
Sigma <- matrix(c(4,1,1,2),byrow=T,ncol=2)

X <- rmvnorm(n,mu,Sigma)

plot(X)


d) Use the data from (c) and calculate the sample mean \(\bar{x}\) and the sample covariance matrix \(S_x\). Calculate the eigenvalues and eigenvectors from the matrix \(S_x\).

mx <- apply(X,2,mean)
colMeans(X)
## [1] 3.004655 0.970002
Sx <- cov(X)

sum(diag(Sx)) - sum(eigen(Sx)$values)
## [1] 0
prod(eigen(Sx)$values) -det(Sx)
## [1] 1.776357e-15

e) Calculate the affine transformation \(y = A x + b\).

#e
b <- c(3,1)
A <- matrix(c(1,2,3,1),byrow=T,ncol=2)
Y <- sweep(X%*%t(A),2,b,"+")

#another way
ones = rep(1,n)
Y2 = X%*%t(A) + ones%*%t(b)
# When comparing if multivariate expressions are the same,
# use e.g. the Frobenius norm
norm(colMeans(Y) - A %*% colMeans(X) - b, type="F")
## [1] 8.881784e-16
norm(cov(Y) - A %*% cov(X) %*% t(A), type="F")
## [1] 3.552714e-15

f) Upload the data from the file Data1.txt into your workspace. Create a function, that centers your data (removes the mean) and pairwise scatterplots the variables. Calculate the sample covariance and correlation matrices and thecorresponding eigenvalues- and vectors.

# Here, we make a conversion to type matrix,
# note that many of the basic matrix operations are not 
# available for variables of type data.frame
setwd("C:/Users/Omistaja/Documents/R")
D1 <- as.matrix(read.table("Data1.txt",sep="\t",header=FALSE))
pairs(D1)

#The function apply allows to use vector commands either by rows or by columns. 
#The first argument of the command apply is the matrix or data frame to which the function will be applied. 
#The second argument gives the dimensions over which the function is to be applied.
#In the case of a matrix, 1 indicates rows, 2 indicates columns. 
#The third argument gives the name of the function to be applied.
#The function sweep(A,integer,a) substract the vector a, by columns if integer=1 and by rows if integer=2, from the matrix A.


center <- function(X){
  pairs(X)
  ave <- apply(X,2,mean)
  cent <- sweep(X,2,ave,"-")
  
  return(cent)  
}

C <- center(D1)

apply(C,2,mean)
##            V1            V2            V3            V4 
## -1.888219e-17  1.220785e-17 -1.680513e-18 -2.111592e-17
cov(D1)
##              V1          V2           V3           V4
## V1  0.085600783 0.007107367 -0.008906472 -0.004708558
## V2  0.007107367 0.069159452  0.015104514  0.013638556
## V3 -0.008906472 0.015104514  0.104474247 -0.003473715
## V4 -0.004708558 0.013638556 -0.003473715  0.080600878
cov(C)
##              V1          V2           V3           V4
## V1  0.085600783 0.007107367 -0.008906472 -0.004708558
## V2  0.007107367 0.069159452  0.015104514  0.013638556
## V3 -0.008906472 0.015104514  0.104474247 -0.003473715
## V4 -0.004708558 0.013638556 -0.003473715  0.080600878
cor(D1)
##             V1        V2          V3          V4
## V1  1.00000000 0.0923728 -0.09418076 -0.05668644
## V2  0.09237280 1.0000000  0.17769547  0.18267232
## V3 -0.09418076 0.1776955  1.00000000 -0.03785468
## V4 -0.05668644 0.1826723 -0.03785468  1.00000000
cor(C)
##             V1        V2          V3          V4
## V1  1.00000000 0.0923728 -0.09418076 -0.05668644
## V2  0.09237280 1.0000000  0.17769547  0.18267232
## V3 -0.09418076 0.1776955  1.00000000 -0.03785468
## V4 -0.05668644 0.1826723 -0.03785468  1.00000000
eigen(cov(C))$values
## [1] 0.11163469 0.08903608 0.08697829 0.05218629
eigen(cor(C))$values
## [1] 1.2367328 1.0762947 1.0211537 0.6658188
eigen(cov(C))$vectors
##             [,1]        [,2]        [,3]       [,4]
## [1,]  0.24232756  0.51237112 -0.76290091  0.3110231
## [2,] -0.30797566  0.55314104 -0.04145492 -0.7729602
## [3,] -0.91739205 -0.09949153 -0.23313028  0.3068282
## [4,] -0.06942745  0.64931676  0.60159285  0.4600583
eigen(cor(C))$vectors
##             [,1]       [,2]       [,3]       [,4]
## [1,] -0.02338207  0.7717167 -0.5016233  0.3902315
## [2,]  0.72821948  0.2244426 -0.1739016 -0.6237629
## [3,]  0.47737240 -0.5465891 -0.4787554  0.4941145
## [4,]  0.49118760  0.2352002  0.6992321  0.4631307