0% found this document useful (0 votes)

13 views8 pages

Multivariate Normal Data Analysis Guide

The document contains R code for generating multivariate normal data, performing statistical analyses including maximum likelihood estimation (MLE) of mean and covariance, calculating partial correlations, fitting linear regression models, and conducting Hotelling's T-squared tests. It also explores the relationship between multivariate normal distributions and Chi-square statistics, along with simulations to analyze sample covariance matrices and their properties. Various statistical visualizations such as histograms and boxplots are included to illustrate the results.

Uploaded by

Abdullah-Al Emon

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

13 views8 pages

Multivariate Normal Data Analysis Guide

Uploaded by

Abdullah-Al Emon

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

lol

#####A

#step 1

[Link](123) # reproducibility

mu <- c(4, 3, 2) # mean vector

Sigma <- matrix(c(4,1,0,
1,2,1,
0,1,3),
nrow=3, byrow=TRUE)
#step 2

# generate 10000 samples

data <- mvrnorm(n=10000, mu=mu, Sigma=Sigma) #we got 10000x3 dimensional matrix
data <- [Link](data) #matrix to data frame for data
manipulation
colnames(data) <- c("X1","X2","X3")

head(data)

#(a)boxplot and histogram

# Boxplots
boxplot(data, main="Boxplots of X1, X2, X3")

# Histograms
par(mfrow=c(1,3))
hist(data$X1, main="Histogram of X1", col="skyblue", breaks=30)
hist(data$X2, main="Histogram of X2", col="skyblue", breaks=30)
hist(data$X3, main="Histogram of X3", col="skyblue", breaks=30)
par(mfrow=c(1,1))

#(b)MLE

# For n = 500
[Link](1)
data500 <- mvrnorm(n=500, mu=mu, Sigma=Sigma)
mean500 <- colMeans(data500)
cov500 <- cov(data500) * (499/500) # convert sample covariance to MLE

# For n = 5000
[Link](2)
data5000 <- mvrnorm(n=5000, mu=mu, Sigma=Sigma)
mean5000 <- colMeans(data5000)
cov5000 <- cov(data5000) * (4999/5000)

mean500; cov500
mean5000; cov5000

#manually

# -------------------------------
# Generate data (same as before)
# -------------------------------
[Link](123)

mu <- c(4, 3, 2)
Sigma <- matrix(c(4, 1, 0,
1, 2, 1,
0, 1, 3), nrow=3, byrow=TRUE)

library(MASS)
data <- mvrnorm(n=10000, mu=mu, Sigma=Sigma)
df <- [Link](data)
colnames(df) <- c("X1", "X2", "X3")

# -------------------------------
# Step 1: Correlation matrix
# -------------------------------
R <- cor(df)
print(R)

# Extract correlations
r12 <- R["X1", "X2"]
r13 <- R["X1", "X3"]
r23 <- R["X2", "X3"]

# -------------------------------
# Step 2: Manual formula method
# -------------------------------
r12_3 <- (r12 - r13*r23) / sqrt((1-r13^2) * (1-r23^2))
cat("Partial correlation r12.3 (formula) =", r12_3, "\n")

# -------------------------------
# Step 3: Inverse correlation matrix method
# -------------------------------
R_inv <- solve(R)
pcor_12_3 <- -R_inv[1,2] / sqrt(R_inv[1,1] * R_inv[2,2])
cat("Partial correlation r12.3 (matrix inversion) =", pcor_12_3, "\n")

#(d)

# Fit linear regression

fit <- lm(X1 ~ X2 + X3, data=data)
summary(fit)

# Multiple correlation coefficient R

R <- sqrt(summary(fit)$[Link])
R

ques2

# Data
X <- matrix(c(6,10, 8,9, 6,3), ncol=2, byrow=TRUE)

# Hypothesized mean vector

mu0 <- c(9, 5)

# Sample mean
xbar <- colMeans(X)

# Sample covariance matrix

S <- cov(X)

# Hotelling's T^2
n <- nrow(X)
d <- ncol(X)
diff <- xbar - mu0
T2 <- n * t(diff) %*% solve(S) %*% diff
T2

#step 2

# Convert to F-statistic
F_stat <- ((n - d) / (d * (n - 1))) * T2
p_value <- 1 - pf(F_stat, d, n - d)

F_stat
p_value

#step3

alpha <- 0.05

t_crit <- qt(1 - alpha/(2*d), df=n-1)

# Diagonal variances
s_diag <- diag(S)

# Standard errors
se <- sqrt(s_diag / n)

# Bonferroni CIs
CIs <- cbind(
lower = xbar - t_crit * se,
upper = xbar + t_crit * se
)
CIs

ques 3

# Load library
library(MASS) # for mvrnorm

[Link](123)

# Parameters
mu <- c(1, 3, 2) # mean vector
Sigma <- matrix(c(5, 1, 0,
1, 2, 1,
0, 1, 3), nrow = 3, byrow = TRUE)
# (a) Simulate 1000 samples from multivariate normal
n <- 1000
samples <- mvrnorm(n = n, mu = mu, Sigma = Sigma)

# Sample covariance matrix (Wishart distributed)

S <- cov(samples)
S

#(b)

# Create a transformation matrix A

A <- matrix(c(1,2,0,
0,1,1,
1,0,1), nrow=3, byrow=TRUE)

# Transform the data

trans_samples <- t(A %*% t(samples))

# Covariance of transformed data

S_trans <- cov(trans_samples)
S_trans

A*Sigma*t(A)

#(c)

# Generalized variance
gen_var <- det(S)
gen_var

# Check degeneracy
if (abs(gen_var) < 1e-8) {
cat("The Wishart matrix is degenerate (determinant ~ 0)\n")
} else {
cat("The Wishart matrix is non-degenerate (determinant =", gen_var, ")\n")
}

##B

#1111111111111111111111111
#aa## MLE of mean and covariance for n = 500 and 700

library(MASS)

mu <- c(2, 3, 4)
Sigma <- matrix(c(4,1,0,
1,2,1,
0,1,3), nrow=3, byrow=TRUE)

[Link](123)
X <- mvrnorm(1000, mu, Sigma)

# For n = 500
X1 <- X[1:500,]
mean_500 <- colMeans(X1)
cov_500 <- cov(X1) * (499/500) # MLE uses 1/n instead of 1/(n-1)
# For n = 700
X2 <- X[1:700,]
mean_700 <- colMeans(X2)
cov_700 <- cov(X2) * (699/700)

mean_500; cov_500
mean_700; cov_700

#bbbbbbbbbbbb Marginal probability for the first variable bbbbbbbbbbb

# X1 ~ N(2, 4)
mu1 <- 2
sd1 <- sqrt(4)

# Example: P(1 < X1 < 3)

p <- pnorm(3, mu1, sd1) - pnorm(1, mu1, sd1)
p

#ccccccccccccccccc Regression model: X1 ~ X2 + X3

data <- [Link](X1 = X[,1], X2 = X[,2], X3 = X[,3])

model <- lm(X1 ~ X2 + X3, data = data)
summary(model)

# Multiple correlation
R <- sqrt(summary(model)$[Link])
R

#222222222222222222222222222

# ============================================================
# Question: Multivariate Normal and Chi-Square Relationship
# ============================================================

# Given:
k <- 6 # number of components (degrees of freedom)
n <- 1000 # number of observations

# ------------------------------------------------------------
# (a) Generate sample from standard normal N(0,1)
# ------------------------------------------------------------
[Link](123) # for reproducibility
X <- matrix(rnorm(n * k, mean = 0, sd = 1), nrow = n, ncol = k)

# Interpretation:
# Each row of X represents one observation of the multivariate normal vector
# X ~ N(0, I_k). Each column is one component (independent N(0,1)).

# ------------------------------------------------------------
# (b) Compute Chi-square statistic for each observation
# ------------------------------------------------------------
chi_sq_values <- rowSums(X^2)

# Interpretation:
# Since each observation has k = 6 independent N(0,1) components,
# their sum of squares follows Chi-square(k=6) distribution.

# ------------------------------------------------------------
# (c) i. Calculate PDF of Chi-square(6) at x = 8
# ------------------------------------------------------------
x_value <- 8
pdf_val <- dchisq(x_value, df = k)
pdf_val

# Interpretation:
# The PDF gives the height of the Chi-square(6) density curve at x = 8.
# It represents how likely values near 8 are, relative to other points.

# ------------------------------------------------------------
# (c) ii. Calculate CDF at x = 8 for Chi-square(6)
# ------------------------------------------------------------
cdf_val <- pchisq(x_value, df = k)
cdf_val

# Interpretation:
# The CDF gives P(Chi-square(6) ≤ 8),
# i.e., the probability that a Chi-square(6) random variable takes a value ≤ 8.

# ------------------------------------------------------------
# (d) Plot histogram and overlay theoretical PDF
# ------------------------------------------------------------
hist(chi_sq_values, breaks = 30, probability = TRUE,
main = expression("Histogram of Chi-square Values with Theoretical PDF
Overlay"),
xlab = expression(chi^2 ~ "values"), col = "lightblue", border = "gray")

# Overlay theoretical chi-square(6) PDF curve

curve(dchisq(x, df = k), col = "red", lwd = 2, add = TRUE)

# Interpretation:
# The histogram represents the simulated distribution of chi-square statistics
# from our generated multivariate normal data.
# The red line shows the theoretical Chi-square(6) probability density function.
# A close match between the histogram and the red curve verifies that
# the sum of squares of 6 standard normal variables indeed follows Chi-square(6).

# Data-333333333333333333333
X <- matrix(c(
580,516,613,750,
473,319,514,963,
664,369,782,107,
739,193,293,530,
143,853,927,121,
127,632,512,837,
703,551,936,118,
108,578,856,113,
185,74,244,663,
111,544,618,816,
815,365,500,930,
770,522,542,570,
759,205,443,789,
928,360,402,611,
849,137,396,700
), ncol=4, byrow=TRUE)

colnames(X) <- c("X1","X2","X3","X4")

# Hypothesized mean vector
mu0 <- c(208, 400, 500, 500)

# Sample statistics
n <- nrow(X)
p <- ncol(X)
xbar <- colMeans(X)
S <- cov(X)

# Hotelling T^2
diff <- xbar - mu0
T2 <- n * t(diff) %*% solve(S) %*% diff

# Convert to F statistic
Fstat <- (n - p) / (p * (n - 1)) * T2
pval <- 1 - pf(Fstat, df1=p, df2=n-p)

list(
SampleMean = xbar,
T2 = T2,
Fstat = Fstat,
pvalue = pval
)

#44444444444444444444

# -------------------------
# Simulation parameters
# -------------------------
[Link](123) # reproducible
n <- 800 # number of samples
p <- 3 # dimension
mu <- c(1, 4, 5) # mean vector
Sigma <- matrix(c(5,1,0,
1,2,1,
0,1,3), nrow=3, byrow=TRUE)

# ---------- (simulate) ----------

# Use MASS::mvrnorm to draw n samples from N_p(mu, Sigma)
if(!requireNamespace("MASS", quietly = TRUE)) [Link]("MASS")
library(MASS)
X <- MASS::mvrnorm(n = n, mu = mu, Sigma = Sigma)

# ---------- (a) sample covariance matrix ----------

# sample mean
xbar <- colMeans(X)
# scatter (sum of (x - xbar)(x - xbar)')
A <- t(X - matrix(xbar, nrow=n, ncol=p, byrow=TRUE)) %*% (X - matrix(xbar, nrow=n,
ncol=p, byrow=TRUE))
# sample covariance (unbiased)
S_sample <- A / (n - 1)

cat("Sample covariance matrix (S):\n")

print(S_sample)

# Theoretical note:
# If X_i ~ N_p(mu, Sigma) and we form A = sum_{i=1}^n (X_i - xbar)(X_i - xbar)',
# then A ~ Wishart_p(n-1, Sigma). Equivalently (n-1) * S_sample ~ Wishart_p(n-1,
Sigma).

# ---------- (b) Wishart -> Chi-square relationship ----------

# A property: If A ~ Wishart_p(m, Sigma) then
# T = trace(Sigma^{-1} %*% A) = sum_{i=1}^m (Z_i' Z_i)
# where each Z_i ~ N_p(0, I_p), hence T ~ Chi-square with df = p * m.
m <- n - 1 # degrees of freedom parameter for Wishart (when
centered by sample mean)
df_chi <- p * m # degrees of freedom for trace statistic

invSigma <- solve(Sigma)

T_stat <- sum(diag(invSigma %*% A)) # trace(invSigma %*% A)

# p-value for observed T_stat under Chi-square(df_chi)

p_value <- 1 - pchisq(T_stat, df = df_chi)

cat("\nTrace statistic T = trace(Sigma^{-1} A):", T_stat, "\n")

cat("Chi-square df (p * m):", df_chi, "\n")
cat("p-value (1 - F_chi2(T)):", p_value, "\n")

# ---------- (c) Generalized variance and degeneracy ----------

# Generalized variance = determinant of covariance matrix (commonly det(S_sample))
det_S <- det(S_sample)
logdet_S <- determinant(S_sample, logarithm = TRUE)$modulus # numerically stable
eig_S <- eigen(S_sample, symmetric = TRUE)$values
rank_S <- qr(S_sample)$rank
cond_S <- kappa(S_sample) # condition number (ratio of largest to smallest
singular value)

cat("\nGeneralized variance (det(S)):", det_S, "\n")

cat("Log-determinant:", logdet_S, "\n")
cat("Eigenvalues of S:", eig_S, "\n")
cat("Rank of S:", rank_S, "\n")
cat("Condition number kappa(S):", cond_S, "\n")

# Check degeneracy rule for Wishart:

# If m < p, Wishart matrix is singular with probability 1 (degenerate).
# If m >= p and Sigma positive definite, Wishart is non-singular with probability
1.
cat("\nDegeneracy check: m (", m, ") ", ifelse(m < p, " < p -> Wishart singular
(degenerate) with prob 1",
">= p -> Wishart is non-singular
w.p.1 (for pd Sigma)"), "\n")

Multivariate Normal Analysis and MLE
No ratings yet
Multivariate Normal Analysis and MLE
37 pages
Multivariate Statistics Study Guide
No ratings yet
Multivariate Statistics Study Guide
31 pages
R Practical Exercises on Data Analysis
No ratings yet
R Practical Exercises on Data Analysis
13 pages
R Inbuilt Functions and Statistical Tests
No ratings yet
R Inbuilt Functions and Statistical Tests
6 pages
Bivariate Normal Distribution Solutions
No ratings yet
Bivariate Normal Distribution Solutions
10 pages
Essential R Commands for Statistics
No ratings yet
Essential R Commands for Statistics
5 pages
Simulating Normal Random Variables in R
No ratings yet
Simulating Normal Random Variables in R
13 pages
R journal
No ratings yet
R journal
13 pages
Multivariate Normal Analysis in R
No ratings yet
Multivariate Normal Analysis in R
29 pages
Mean and Variance Computation in R
No ratings yet
Mean and Variance Computation in R
22 pages
R Programming for Biostatistics Analysis
No ratings yet
R Programming for Biostatistics Analysis
6 pages
Data Analysis with R: Correlation & Model
No ratings yet
Data Analysis with R: Correlation & Model
11 pages
Multivariate Statistical Methods Overview
No ratings yet
Multivariate Statistical Methods Overview
14 pages
R Basics: Data Types and Functions
No ratings yet
R Basics: Data Types and Functions
18 pages
Statistical Functions and Inference in R
No ratings yet
Statistical Functions and Inference in R
13 pages
Statistical Estimation in R Simulation
No ratings yet
Statistical Estimation in R Simulation
3 pages
EM Algorithm for Gaussian Mixtures
No ratings yet
EM Algorithm for Gaussian Mixtures
9 pages
Introduction to Multivariate Methods
No ratings yet
Introduction to Multivariate Methods
24 pages
R Programming: Vectors, Matrices, and Stats
No ratings yet
R Programming: Vectors, Matrices, and Stats
34 pages
R Programming Data Analysis Techniques
No ratings yet
R Programming Data Analysis Techniques
13 pages
R Programming: Data Analysis Techniques
No ratings yet
R Programming: Data Analysis Techniques
8 pages
R Programming Basics and Data Analysis
No ratings yet
R Programming Basics and Data Analysis
4 pages
Linear Combinations & Mixture Distributions
No ratings yet
Linear Combinations & Mixture Distributions
3 pages
STAT3006 Statistical Learning Notes
No ratings yet
STAT3006 Statistical Learning Notes
110 pages
Estimators and Confidence Intervals Analysis
No ratings yet
Estimators and Confidence Intervals Analysis
9 pages
R Programming and KNN Assignment Guide
No ratings yet
R Programming and KNN Assignment Guide
9 pages
MATLAB Covariance Matrix Example
No ratings yet
MATLAB Covariance Matrix Example
6 pages
Covariance of Linear Combinations
No ratings yet
Covariance of Linear Combinations
53 pages
Estimating Expected Value with Non-IID Data
No ratings yet
Estimating Expected Value with Non-IID Data
8 pages
MATLAB Guide: Expected Value & Distributions
No ratings yet
MATLAB Guide: Expected Value & Distributions
26 pages
Sampling Distributions & R Simulations
No ratings yet
Sampling Distributions & R Simulations
7 pages
R Commands for Data Analysis Basics
No ratings yet
R Commands for Data Analysis Basics
11 pages
MATLAB Homework on Random Variables
No ratings yet
MATLAB Homework on Random Variables
14 pages
Statistics 109 Homework Assignment
No ratings yet
Statistics 109 Homework Assignment
23 pages
Data Mining Analysis by Konstantinos
No ratings yet
Data Mining Analysis by Konstantinos
13 pages
Introduction to R Programming Basics
No ratings yet
Introduction to R Programming Basics
115 pages
Central Limit Theorem for Uniform RVs
No ratings yet
Central Limit Theorem for Uniform RVs
9 pages
Matrix Operations and Statistical Tests
No ratings yet
Matrix Operations and Statistical Tests
20 pages
Lab1
No ratings yet
Lab1
8 pages
Test-Retest Reliability Code Overview
No ratings yet
Test-Retest Reliability Code Overview
18 pages
Multivariate Normal Distribution Explained
No ratings yet
Multivariate Normal Distribution Explained
17 pages
R Markdown Data Analysis Examples
No ratings yet
R Markdown Data Analysis Examples
7 pages
Non-Parametric Regression and Smoothing Techniques
No ratings yet
Non-Parametric Regression and Smoothing Techniques
5 pages
Multivariate Analysis: R Code Solutions
No ratings yet
Multivariate Analysis: R Code Solutions
16 pages
Essential R Commands for Data Analysis
No ratings yet
Essential R Commands for Data Analysis
11 pages
Advanced Statistical Analysis Functions
No ratings yet
Advanced Statistical Analysis Functions
25 pages
Statistical Methods and R Code Guide
No ratings yet
Statistical Methods and R Code Guide
14 pages
R Functions for Probability Distributions
No ratings yet
R Functions for Probability Distributions
29 pages
Monte Carlo Simulations in R Analysis
No ratings yet
Monte Carlo Simulations in R Analysis
10 pages
Statistical Analysis and Confidence Intervals
No ratings yet
Statistical Analysis and Confidence Intervals
4 pages
day2_PROGRAMS(192372319)[1]
No ratings yet
day2_PROGRAMS(192372319)[1]
18 pages
Univariate Regression Overview
No ratings yet
Univariate Regression Overview
5 pages
Exponential Distribution Analysis in R
No ratings yet
Exponential Distribution Analysis in R
6 pages
Multivariate Normal - Chi Square
No ratings yet
Multivariate Normal - Chi Square
19 pages
Cad Da
No ratings yet
Cad Da
12 pages
R Statistical Analysis Guide
No ratings yet
R Statistical Analysis Guide
120 pages
R Data Management and Statistical Functions
No ratings yet
R Data Management and Statistical Functions
4 pages
Exponential Distribution Analysis in R
No ratings yet
Exponential Distribution Analysis in R
3 pages

Multivariate Normal Data Analysis Guide

Uploaded by

Multivariate Normal Data Analysis Guide

Uploaded by

lol

mu <- c(4, 3, 2) # mean vector

# generate 10000 samples

#(a)boxplot and histogram

# Fit linear regression

# Multiple correlation coefficient R

# Hypothesized mean vector

# Sample covariance matrix

alpha <- 0.05

# Sample covariance matrix (Wishart distributed)

# Create a transformation matrix A

# Transform the data

# Covariance of transformed data

#bbbbbbbbbbbb Marginal probability for the first variable bbbbbbbbbbb

# Example: P(1 < X1 < 3)

#ccccccccccccccccc Regression model: X1 ~ X2 + X3

data <- [Link](X1 = X[,1], X2 = X[,2], X3 = X[,3])

# Overlay theoretical chi-square(6) PDF curve

colnames(X) <- c("X1","X2","X3","X4")

# ---------- (simulate) ----------

# ---------- (a) sample covariance matrix ----------

cat("Sample covariance matrix (S):\n")

# ---------- (b) Wishart -> Chi-square relationship ----------

invSigma <- solve(Sigma)

# p-value for observed T_stat under Chi-square(df_chi)

cat("\nTrace statistic T = trace(Sigma^{-1} A):", T_stat, "\n")

# ---------- (c) Generalized variance and degeneracy ----------

cat("\nGeneralized variance (det(S)):", det_S, "\n")

# Check degeneracy rule for Wishart:

You might also like