set.seed(1779)
n=100
level=5 # In the data there are 5 classes
k=n/level # no. of observations in each class
mu1= rcauchy(level)
mu2= rnorm(level)
sigma1= 5
sigma2= 1
X1=c()
X2=c()
count=0
for(i in 1:level)
    {
    X1[((count*20)+1):((count+1)*20)]=rnorm(20,mu1[i],sigma1)
    X2[((count*20)+1):((count+1)*20)]=rnorm(20,mu2[i],sigma2)
    count=count+1
    }
eps=rnorm(n)
mubeta0=rnorm(1,8,1)
mubeta1=rnorm(1,5,3)
mubeta2=rnorm(1,3,2)
print('Beta0 Values are:')
beta0= abs(rnorm(level,mubeta0)); beta0
print('Beta1 Values are:')
beta1= rcauchy(level,mubeta1) ;beta1
print('Beta2 Values are:')
beta2= rnorm(level,mubeta2,3);beta2
X1beta=c()
X2beta=c()
count=0
for(i in 1:level)
    {
    X1beta[((count*20)+1):((count+1)*20)]= beta0[i]+(beta1[i]*X1[((count*20)+1):((count+1)*20)])
    X2beta[((count*20)+1):((count+1)*20)]= beta2[i]*X2[((count*20)+1):((count+1)*20)]
    count=count+1
    }

y=X1beta+X2beta+eps
data=cbind(y,X1,X2)

[1] "Beta0 Values are:"

[1] "Beta1 Values are:"

[1] "Beta2 Values are:"


# we plot the data
par(mfrow=c(1,3))
plot(X1,y)
plot(X2,y)
plot(X1,X2)


# we fit separate regression line for every level separately
#for( i in 1:level)
mod=list()    
count=0
for( i in 1:level)
    {
mod[[i]]=lm(y[((count*k)+1):((count+1)*k)]~(X1[((count*k)+1):((count+1)*k)]+
                                            X2[((count*k)+1):((count+1)*k)]))
count=count+1
    }


# collecting all the coefficients (for different classes)
beta1_hat=c()
beta0_hat=c()
beta2_hat=c()
for(i in 1:level)
{
    beta0_hat[i]= mod[[i]]$coefficients[1]
    beta1_hat[i]= mod[[i]]$coefficients[2]
    beta2_hat[i]= mod[[i]]$coefficients[3]
}


# Now we use the above coefficients in the form of priors for the 
# bayesian hierarchical modelling
m_beta0= mean(beta0_hat)
m_beta1=mean(beta1_hat)
m_beta2= mean(beta2_hat)
sd_beta0=sd(beta0_hat)
sd_beta1=sd(beta1_hat)
sd_beta2=sd(beta2_hat)


# Now we use some prior for our betas
# Assuming we do not have much of an idea about the parameters 
# we will select some vague priors
#y_bar=mean(y)
sim=10^7
beta2_gen=beta1_gen=beta0_gen=matrix(0,nrow=level,ncol=sim) # just initializing the list
thresh=1
#y_gen=matrix(0,nrow=n,ncol=sim)
#sigma=c()
diff_b2=diff_b1=diff_b0=c()
for( i in 1:sim)
    {
        mubeta0= rnorm(1,m_beta0,30)
        sigmabeta0= abs(rnorm(1,sd_beta0,10))
        mubeta1= rnorm(1,m_beta1,10)
        sigmabeta1= abs(rnorm(1,sd_beta1,10))
        mubeta2= rnorm(1,m_beta2,30)
        sigmabeta2= abs(rnorm(1,sd_beta2,30))
        #count=0
        #sigma[i]= abs(rnorm(1,0,10))
    for(j in 1:level)
            {
        b0=beta0_gen[j,i]= rnorm(1,mubeta0,sigmabeta0)
        b1=beta1_gen[j,i]= rnorm(1,mubeta1,sigmabeta1)
        b2=beta2_gen[j,i]= rnorm(1,mubeta2,sigmabeta2)
        }
        diff_b0[i]= abs(sqrt(sum((beta0_gen[,i]-beta0_hat)^2))) 
        diff_b1[i]= abs(sqrt(sum((beta1_gen[,i]-beta1_hat)^2)))
        diff_b2[i]= abs(sqrt(sum((beta2_gen[,i]-beta2_hat)^2)))
        
}
index_b0=which(diff_b0<thresh)
index_b1= which(diff_b1<thresh)
index_b2= which(diff_b2<thresh)

m_beta0=apply(beta0_gen[,index_b0],1,mean)
m_beta1= apply(beta1_gen[,index_b1],1,mean)
m_beta2= apply(beta1_gen[,index_b2],1,mean)
print("Number of posterior samples obtained for Beta0 are: ");length(index_b0)
print("Number of posterior samples obtained for Beta1 are: ");length(index_b1)
print("Number of posterior samples obtained for Beta2 are: ");length(index_b2)

[1] "Number of posterior samples obtained for Beta0 are: "

[1] "Number of posterior samples obtained for Beta1 are: "

[1] "Number of posterior samples obtained for Beta2 are: "


m_beta0 # mean of the posterior sample of Beta0
beta0 # Original Coefficients of Beta0
m_beta1
beta1
m_beta2
beta2
# mean of posterior samples based on the threshold  are close to the --
# orginal values of beta's
# Except for that of beta2


# Converting the posterior samples for 5 levels (matrix) into the vector form
gen_beta0=as.vector(beta0_gen[,index_b0])
gen_beta1=as.vector(beta1_gen[,index_b1])
gen_beta2=as.vector(beta2_gen[,index_b2])
beta0.samples=t(beta0_gen[,index_b0])
beta1.samples= t(beta1_gen[,index_b1])
beta2.samples= t(beta2_gen[,index_b1])


library(ggplot2)
# Density plots
par(mfrow=c(3,1))
  ggplot(data.frame(beta = gen_beta0), aes(x = beta)) +
  geom_density(fill = "lightblue", alpha = 0.5) +
  labs(title = paste0("Density plot of beta0"))

  ggplot(data.frame(beta = gen_beta1), aes(x = beta)) +
  geom_density(fill = "lightblue", alpha = 0.5) +
  labs(title = paste0("Density plot of beta1"))


  ggplot(data.frame(beta = gen_beta2), aes(x = beta)) +
  geom_density(fill = "lightblue", alpha = 0.5) +
  labs(title = paste0("Density plot of beta2"))


# Box plots

ggplot(data.frame(beta = gen_beta0, class = 
                  factor(rep(1:5, each = length(gen_beta0)/level))),
         aes(y = beta, x = class)) +
  geom_boxplot(fill = "lightblue", alpha = 0.5) +
  facet_wrap(~ class) +
  labs(title = paste0("Box plot of beta0 with levels"))

  ggplot(data.frame(beta = gen_beta1, class = 
                    factor(rep(1:5, each = length(gen_beta1)/level))),
         aes(y = beta, x = class)) +
  geom_boxplot(fill = "lightblue", alpha = 0.5) +
  facet_wrap(~ class) +
  labs(title = paste0("Box plot of beta1 with levels"))

    ggplot(data.frame(beta = gen_beta2, class = 
                      factor(rep(1:5, each = length(gen_beta2)/level))),
         aes(y = beta, x = class)) +
  geom_boxplot(fill = "lightblue", alpha = 0.5) +
  facet_wrap(~ class) +
  labs(title = paste0("Box plot of beta2 with levels"))


# Trace plots

  ggplot(data.frame(beta = gen_beta0), aes(x = 1:length(beta))) +
  geom_line(aes(y = beta)) +
  labs(title = paste0("Trace plot of beta0"))
    
  ggplot(data.frame(beta = gen_beta1), aes(x = 1:length(beta))) +
  geom_line(aes(y = beta)) +
  labs(title = paste0("Trace plot of beta1"))

  ggplot(data.frame(beta = gen_beta2), aes(x = 1:length(beta))) +
  geom_line(aes(y = beta)) +
  labs(title = paste0("Trace plot of beta2"))


# Generate predicted values for the range of predictor values
y.predict = matrix(0,nrow=n,ncol= dim(beta1.samples)[1])

for( i in 1:(dim(beta1.samples)[1]))
    {
    count=0
    
    for( j in 1:level)
        {
        
y.predict[((count*20)+1):((count+1)*20),i]=beta1.samples[i,j]*
        X1[((count*20)+1):((count+1)*20)]
        count=count+1
}
  
}


# Compute the mean and standard deviation of the predicted values for each x value

y.mean = apply(y.predict, 1, mean)
y.sd = apply(y.predict, 1, sd)

# Compute the 95% credible interval for each x value
ci.lower = y.mean - 1.96*y.sd
ci.upper = y.mean + 1.96*y.sd

# Create a plot of the regression line with shaded confidence intervals
ggplot(data.frame(x = X1, y = y), aes(x = x, y = y)) +
  geom_point() +
  geom_ribbon(aes(ymin = ci.lower, ymax = ci.upper), alpha = 0.3) +
  geom_line(aes(x = X1, y = y.mean), color = "red", linewidth = 1) +
  labs(title = "95% Credible Plot for Linear Regression using X1 as predictor",
       x = "X1", y = "y")


# Generate predicted values for the range of predictor values
y.predict = matrix(0,nrow=n,ncol= dim(beta2.samples)[1])

for( i in 1:(dim(beta2.samples)[1]))
    {
    count=0
    
    for( j in 1:level)
        {
        
y.predict[((count*20)+1):((count+1)*20),i]=beta2.samples[i,j]*
        X2[((count*20)+1):((count+1)*20)]
        count=count+1
}
  
}

# Compute the mean and standard deviation of the predicted values for each x value
y.mean = apply(y.predict, 1, mean)
y.sd = apply(y.predict, 1, sd)

# Compute the 95% credible interval for each x value
ci.lower = y.mean - 1.96*y.sd
ci.upper = y.mean + 1.96*y.sd

# Create a plot of the regression line with shaded confidence intervals
ggplot(data.frame(x = X2, y = y), aes(x = x, y = y)) +
  geom_point() +
  geom_ribbon(aes(ymin = ci.lower, ymax = ci.upper), alpha = 0.3) +
  geom_line(aes(x = X2, y = y.mean), color = "red", linewidth = 1) +
  labs(title = "95% Credible Plot for Linear Regression using X2 as predictor",
       x = "X2", y = "y")

Bayesian Hierarchical Regression using Approximate Bayesian Computing in R¶

By Saurav Jadhav¶

Bayesian Hierarchical Linear Regression¶

Theory for generating the data¶

$$\mu_{1i}\sim Cauchy\ (0,1)\ \forall i=1,2,3,4,5$$¶

$$\mu_{1i}\ \text{corresponds to the prior mean for}\ X_1\ \text{with}\ i^{th}\ \text{level}$$¶

$$X_{1}^i\ \sim N(\mu_{1i},5) $$¶

$$ \text{Where}\ N(\mu,\sigma^{2})\ \text{is the normal distribution with mean}\ \mu \text{and variance}\ \sigma^{2} $$¶

$$ X_{1}^i\ \text{corresponds to observations of}\ X_{1}\ \text{with}\ i^{th}\ \text{level}$$¶

$$ \text{Similarly,} $$¶

$$\mu_{2i}\sim N(0,1)\ \forall i=1,2,3,4,5$$¶

$$X_{2}^i\ \sim N(\mu_{2i},1) $$¶

$$ X_{2}^i\ \text{corresponds to observations of}\ X_{2}\ \text{with}\ i^{th}\ \text{level}$$¶

$$ \text{Now, we can write:} $$¶

$$\underline{y^i}= \beta_{0i}+\beta_{1i}\underline{X_{1}^i}+ \beta_{2i}\underline{X_{2}^i}+\underline{\epsilon{^i}} $$¶

$$ \text{Where,}$$¶

$$\underline{y^i}\ \text{is the reponse vector corresponds to the}\ i^{th}\ \text{level} $$¶

$$\underline{\epsilon{^i}}\ \text{is the vector corresponding to the random error component generated} \\ \text{ from the standard normal distribution}$$¶

$$ \text{As we mentioned, we will be using partial pooling. Hence, we will have:} $$¶

$$\beta_{0i},\ \beta_{1i},\ and\ \beta_{2i}\ \forall\ levels\ i =1,2,3,4,5 $$¶

$$ \text{Since we are assuming some hierarchy, all these} \\ \beta's\ \text{come from a prior distribution}$$¶

$$\mu_{\beta_{0}} \sim N(8,1) $$¶

$$\beta_{0i} \sim N(\mu_{\beta_{0}},1) $$¶

$$\mu_{\beta_{1}} \sim N(5,3)$$¶

$$\beta_{1i} \sim N(\mu_{\beta_{1}},1) $$¶

$$\mu_{\beta_{2}} \sim N(3,2)$$¶

$$\beta_{2i} \sim N(\mu_{\beta_{2}},1) $$¶

$$ \text{The parameters of the prior distribution are arbitarily choosen} $$¶

Below is the code to generate the data as per the above theory¶

Plotting the data¶

$$ \text{We observe no relationship between}\ X_2\ \text{and y}$$¶

$$ \text{We use No-pooling to generate relevant priors for the beta's} $$¶

$$ \text{1. We fit separate regression line to each level in order to obtain:} $$¶

$$ \\ \hat{\beta_{0i}},\ \hat{\beta_{1i}},\ \hat{\beta_{2i}}\ \forall i=1,2,3,4,5\\ $$¶

$$ \text{2. We use mean of these no-pooling coefficients as a prior to}\\ \text{obtain the sample of regression coefficients.} $$¶

$$ \text{3. The posterior regression coefficients are obtained using}\\ \text{Approximate Bayesian Computing (ABC)} $$¶

Code for the above workflow¶

$$ \text{Approximate Bayesian Computing (ABC) to obtain posterior sample} $$¶

$$ \text{Steps to compute posterior samples of regression coefficients using ABC:} $$¶

$$\text{1. From the above work flow, we obtain}\ 10^7\ \text{samples of}\ \beta_{0i}\ \beta_{1i},\ \beta_{2i} $$¶

$$\text{2. Now we compute the euclidean distance of every sample obtained}\\ \text{ of the regression coefficients from that of obtained by No-pooling method} $$¶

$$\text{3. We select that sample as our posterior sample for which the distance is} \\ \text{less than some threshold.} \ \text{(In our case the threshold is one.)} $$¶

$$ \text{ 4. The complete flow is represented below} $$¶

$$ \text{In the above flow diagram,}\ \beta_{ki}^j\ \text{indicate the}\ j^{th}\ \text{sample of the regression coefficient} \\ \text{for the}\ i^{th}\ \text{level for the}\ k^{th}\ \text{variable} $$¶

Code to find the posterior sample using the ABC method as per the above flow¶

Comparing average of the posterior sample to that of the original coefficients¶

$$ \text{From the above comparison we can see that mean of the posterior sample for}\ \underline{\beta_{0}},\ \text{and}\ \underline{\beta_{1}}\ \text{are close to the true value. We will visually see the fit.} $$¶

Plots for the posterior samples¶

$$\text{From above we can see that the density plot for} \\ \beta_{0}\ \text{looks more stable and closer to the original distribution} $$¶

$$ \text{Box Plots for posterior samples of:}\\ \beta_{0i},\ \beta_{1i},\ \text{and}\ \beta_{2i}\ \forall \ levels\ i $$¶

Trace Plots¶

We obtain 95% credible plot for the regression setup using the posterior Beta samples.¶

What's Next?¶

Will leave all this to the reader to explore.¶

Acknowledgement¶

Akanksha ma'am and Madhuri ma'am of the Department of Statistics, SPPU for introducing the concepts.¶

References¶

1. Bayesian_Hierarchical_Linear_Regression, by Carlos Souza¶

2. Forced vital capacity (FVC)¶

3. What Does a Low FVC Mean?¶

1. Bayesian_Hierarchical_Linear_Regression, by Carlos Souza ¶