lol
#####A
#step 1
[Link](123) # reproducibility
mu <- c(4, 3, 2) # mean vector
Sigma <- matrix(c(4,1,0,
1,2,1,
0,1,3),
nrow=3, byrow=TRUE)
#step 2
# generate 10000 samples
data <- mvrnorm(n=10000, mu=mu, Sigma=Sigma) #we got 10000x3 dimensional matrix
data <- [Link](data) #matrix to data frame for data
manipulation
colnames(data) <- c("X1","X2","X3")
head(data)
#(a)boxplot and histogram
# Boxplots
boxplot(data, main="Boxplots of X1, X2, X3")
# Histograms
par(mfrow=c(1,3))
hist(data$X1, main="Histogram of X1", col="skyblue", breaks=30)
hist(data$X2, main="Histogram of X2", col="skyblue", breaks=30)
hist(data$X3, main="Histogram of X3", col="skyblue", breaks=30)
par(mfrow=c(1,1))
#(b)MLE
# For n = 500
[Link](1)
data500 <- mvrnorm(n=500, mu=mu, Sigma=Sigma)
mean500 <- colMeans(data500)
cov500 <- cov(data500) * (499/500) # convert sample covariance to MLE
# For n = 5000
[Link](2)
data5000 <- mvrnorm(n=5000, mu=mu, Sigma=Sigma)
mean5000 <- colMeans(data5000)
cov5000 <- cov(data5000) * (4999/5000)
mean500; cov500
mean5000; cov5000
#manually
# -------------------------------
# Generate data (same as before)
# -------------------------------
[Link](123)
mu <- c(4, 3, 2)
Sigma <- matrix(c(4, 1, 0,
1, 2, 1,
0, 1, 3), nrow=3, byrow=TRUE)
library(MASS)
data <- mvrnorm(n=10000, mu=mu, Sigma=Sigma)
df <- [Link](data)
colnames(df) <- c("X1", "X2", "X3")
# -------------------------------
# Step 1: Correlation matrix
# -------------------------------
R <- cor(df)
print(R)
# Extract correlations
r12 <- R["X1", "X2"]
r13 <- R["X1", "X3"]
r23 <- R["X2", "X3"]
# -------------------------------
# Step 2: Manual formula method
# -------------------------------
r12_3 <- (r12 - r13*r23) / sqrt((1-r13^2) * (1-r23^2))
cat("Partial correlation r12.3 (formula) =", r12_3, "\n")
# -------------------------------
# Step 3: Inverse correlation matrix method
# -------------------------------
R_inv <- solve(R)
pcor_12_3 <- -R_inv[1,2] / sqrt(R_inv[1,1] * R_inv[2,2])
cat("Partial correlation r12.3 (matrix inversion) =", pcor_12_3, "\n")
#(d)
# Fit linear regression
fit <- lm(X1 ~ X2 + X3, data=data)
summary(fit)
# Multiple correlation coefficient R
R <- sqrt(summary(fit)$[Link])
R
ques2
# Data
X <- matrix(c(6,10, 8,9, 6,3), ncol=2, byrow=TRUE)
# Hypothesized mean vector
mu0 <- c(9, 5)
# Sample mean
xbar <- colMeans(X)
# Sample covariance matrix
S <- cov(X)
# Hotelling's T^2
n <- nrow(X)
d <- ncol(X)
diff <- xbar - mu0
T2 <- n * t(diff) %*% solve(S) %*% diff
T2
#step 2
# Convert to F-statistic
F_stat <- ((n - d) / (d * (n - 1))) * T2
p_value <- 1 - pf(F_stat, d, n - d)
F_stat
p_value
#step3
alpha <- 0.05
t_crit <- qt(1 - alpha/(2*d), df=n-1)
# Diagonal variances
s_diag <- diag(S)
# Standard errors
se <- sqrt(s_diag / n)
# Bonferroni CIs
CIs <- cbind(
lower = xbar - t_crit * se,
upper = xbar + t_crit * se
)
CIs
ques 3
# Load library
library(MASS) # for mvrnorm
[Link](123)
# Parameters
mu <- c(1, 3, 2) # mean vector
Sigma <- matrix(c(5, 1, 0,
1, 2, 1,
0, 1, 3), nrow = 3, byrow = TRUE)
# (a) Simulate 1000 samples from multivariate normal
n <- 1000
samples <- mvrnorm(n = n, mu = mu, Sigma = Sigma)
# Sample covariance matrix (Wishart distributed)
S <- cov(samples)
S
#(b)
# Create a transformation matrix A
A <- matrix(c(1,2,0,
0,1,1,
1,0,1), nrow=3, byrow=TRUE)
# Transform the data
trans_samples <- t(A %*% t(samples))
# Covariance of transformed data
S_trans <- cov(trans_samples)
S_trans
A*Sigma*t(A)
#(c)
# Generalized variance
gen_var <- det(S)
gen_var
# Check degeneracy
if (abs(gen_var) < 1e-8) {
cat("The Wishart matrix is degenerate (determinant ~ 0)\n")
} else {
cat("The Wishart matrix is non-degenerate (determinant =", gen_var, ")\n")
}
##B
#1111111111111111111111111
#aa## MLE of mean and covariance for n = 500 and 700
library(MASS)
mu <- c(2, 3, 4)
Sigma <- matrix(c(4,1,0,
1,2,1,
0,1,3), nrow=3, byrow=TRUE)
[Link](123)
X <- mvrnorm(1000, mu, Sigma)
# For n = 500
X1 <- X[1:500,]
mean_500 <- colMeans(X1)
cov_500 <- cov(X1) * (499/500) # MLE uses 1/n instead of 1/(n-1)
# For n = 700
X2 <- X[1:700,]
mean_700 <- colMeans(X2)
cov_700 <- cov(X2) * (699/700)
mean_500; cov_500
mean_700; cov_700
#bbbbbbbbbbbb Marginal probability for the first variable bbbbbbbbbbb
# X1 ~ N(2, 4)
mu1 <- 2
sd1 <- sqrt(4)
# Example: P(1 < X1 < 3)
p <- pnorm(3, mu1, sd1) - pnorm(1, mu1, sd1)
p
#ccccccccccccccccc Regression model: X1 ~ X2 + X3
data <- [Link](X1 = X[,1], X2 = X[,2], X3 = X[,3])
model <- lm(X1 ~ X2 + X3, data = data)
summary(model)
# Multiple correlation
R <- sqrt(summary(model)$[Link])
R
#222222222222222222222222222
# ============================================================
# Question: Multivariate Normal and Chi-Square Relationship
# ============================================================
# Given:
k <- 6 # number of components (degrees of freedom)
n <- 1000 # number of observations
# ------------------------------------------------------------
# (a) Generate sample from standard normal N(0,1)
# ------------------------------------------------------------
[Link](123) # for reproducibility
X <- matrix(rnorm(n * k, mean = 0, sd = 1), nrow = n, ncol = k)
# Interpretation:
# Each row of X represents one observation of the multivariate normal vector
# X ~ N(0, I_k). Each column is one component (independent N(0,1)).
# ------------------------------------------------------------
# (b) Compute Chi-square statistic for each observation
# ------------------------------------------------------------
chi_sq_values <- rowSums(X^2)
# Interpretation:
# Since each observation has k = 6 independent N(0,1) components,
# their sum of squares follows Chi-square(k=6) distribution.
# ------------------------------------------------------------
# (c) i. Calculate PDF of Chi-square(6) at x = 8
# ------------------------------------------------------------
x_value <- 8
pdf_val <- dchisq(x_value, df = k)
pdf_val
# Interpretation:
# The PDF gives the height of the Chi-square(6) density curve at x = 8.
# It represents how likely values near 8 are, relative to other points.
# ------------------------------------------------------------
# (c) ii. Calculate CDF at x = 8 for Chi-square(6)
# ------------------------------------------------------------
cdf_val <- pchisq(x_value, df = k)
cdf_val
# Interpretation:
# The CDF gives P(Chi-square(6) ≤ 8),
# i.e., the probability that a Chi-square(6) random variable takes a value ≤ 8.
# ------------------------------------------------------------
# (d) Plot histogram and overlay theoretical PDF
# ------------------------------------------------------------
hist(chi_sq_values, breaks = 30, probability = TRUE,
main = expression("Histogram of Chi-square Values with Theoretical PDF
Overlay"),
xlab = expression(chi^2 ~ "values"), col = "lightblue", border = "gray")
# Overlay theoretical chi-square(6) PDF curve
curve(dchisq(x, df = k), col = "red", lwd = 2, add = TRUE)
# Interpretation:
# The histogram represents the simulated distribution of chi-square statistics
# from our generated multivariate normal data.
# The red line shows the theoretical Chi-square(6) probability density function.
# A close match between the histogram and the red curve verifies that
# the sum of squares of 6 standard normal variables indeed follows Chi-square(6).
# Data-333333333333333333333
X <- matrix(c(
580,516,613,750,
473,319,514,963,
664,369,782,107,
739,193,293,530,
143,853,927,121,
127,632,512,837,
703,551,936,118,
108,578,856,113,
185,74,244,663,
111,544,618,816,
815,365,500,930,
770,522,542,570,
759,205,443,789,
928,360,402,611,
849,137,396,700
), ncol=4, byrow=TRUE)
colnames(X) <- c("X1","X2","X3","X4")
# Hypothesized mean vector
mu0 <- c(208, 400, 500, 500)
# Sample statistics
n <- nrow(X)
p <- ncol(X)
xbar <- colMeans(X)
S <- cov(X)
# Hotelling T^2
diff <- xbar - mu0
T2 <- n * t(diff) %*% solve(S) %*% diff
# Convert to F statistic
Fstat <- (n - p) / (p * (n - 1)) * T2
pval <- 1 - pf(Fstat, df1=p, df2=n-p)
list(
SampleMean = xbar,
T2 = T2,
Fstat = Fstat,
pvalue = pval
)
#44444444444444444444
# -------------------------
# Simulation parameters
# -------------------------
[Link](123) # reproducible
n <- 800 # number of samples
p <- 3 # dimension
mu <- c(1, 4, 5) # mean vector
Sigma <- matrix(c(5,1,0,
1,2,1,
0,1,3), nrow=3, byrow=TRUE)
# ---------- (simulate) ----------
# Use MASS::mvrnorm to draw n samples from N_p(mu, Sigma)
if(!requireNamespace("MASS", quietly = TRUE)) [Link]("MASS")
library(MASS)
X <- MASS::mvrnorm(n = n, mu = mu, Sigma = Sigma)
# ---------- (a) sample covariance matrix ----------
# sample mean
xbar <- colMeans(X)
# scatter (sum of (x - xbar)(x - xbar)')
A <- t(X - matrix(xbar, nrow=n, ncol=p, byrow=TRUE)) %*% (X - matrix(xbar, nrow=n,
ncol=p, byrow=TRUE))
# sample covariance (unbiased)
S_sample <- A / (n - 1)
cat("Sample covariance matrix (S):\n")
print(S_sample)
# Theoretical note:
# If X_i ~ N_p(mu, Sigma) and we form A = sum_{i=1}^n (X_i - xbar)(X_i - xbar)',
# then A ~ Wishart_p(n-1, Sigma). Equivalently (n-1) * S_sample ~ Wishart_p(n-1,
Sigma).
# ---------- (b) Wishart -> Chi-square relationship ----------
# A property: If A ~ Wishart_p(m, Sigma) then
# T = trace(Sigma^{-1} %*% A) = sum_{i=1}^m (Z_i' Z_i)
# where each Z_i ~ N_p(0, I_p), hence T ~ Chi-square with df = p * m.
m <- n - 1 # degrees of freedom parameter for Wishart (when
centered by sample mean)
df_chi <- p * m # degrees of freedom for trace statistic
invSigma <- solve(Sigma)
T_stat <- sum(diag(invSigma %*% A)) # trace(invSigma %*% A)
# p-value for observed T_stat under Chi-square(df_chi)
p_value <- 1 - pchisq(T_stat, df = df_chi)
cat("\nTrace statistic T = trace(Sigma^{-1} A):", T_stat, "\n")
cat("Chi-square df (p * m):", df_chi, "\n")
cat("p-value (1 - F_chi2(T)):", p_value, "\n")
# ---------- (c) Generalized variance and degeneracy ----------
# Generalized variance = determinant of covariance matrix (commonly det(S_sample))
det_S <- det(S_sample)
logdet_S <- determinant(S_sample, logarithm = TRUE)$modulus # numerically stable
eig_S <- eigen(S_sample, symmetric = TRUE)$values
rank_S <- qr(S_sample)$rank
cond_S <- kappa(S_sample) # condition number (ratio of largest to smallest
singular value)
cat("\nGeneralized variance (det(S)):", det_S, "\n")
cat("Log-determinant:", logdet_S, "\n")
cat("Eigenvalues of S:", eig_S, "\n")
cat("Rank of S:", rank_S, "\n")
cat("Condition number kappa(S):", cond_S, "\n")
# Check degeneracy rule for Wishart:
# If m < p, Wishart matrix is singular with probability 1 (degenerate).
# If m >= p and Sigma positive definite, Wishart is non-singular with probability
1.
cat("\nDegeneracy check: m (", m, ") ", ifelse(m < p, " < p -> Wishart singular
(degenerate) with prob 1",
">= p -> Wishart is non-singular
w.p.1 (for pd Sigma)"), "\n")