Bowling Green State University
(419) 378 - 9131
ebaltay@bgsu.edu
library(dplyr)
library(dummies)
# Author: Endale B Altaye
# Dec 2014
# BGSU, For Regression Class
backwardElimination=function(data,responceVariable,alphaToRemove)
{
dataname=data.frame(data)
response=responceVariable
varname=names(dataname)
predVarname=varname[varname!=response]
# computing MLR containing all variables
mymodel=lm(as.formula(paste(response,paste(predVarname,collapse="+"),sep="~")),data=dataname)
# Extracting the p-values of each predictor variables excluding the pvalue of the intercept
pvalue=summary(mymodel)$coeff[-1,4]
i=1
message(sprintf("Variable removed at each step with alpha to stay value = %s:",alphaToStay))
newpredvarname=predVarname
repeat
{
# comparing the largest p-value (probably the most insignificant one) with the alphaToRemove value
if(max(pvalue)>alphaToRemove)
{
mostinSig=which(pvalue==max(pvalue))
removedvar=newpredvarname[mostinSig]
newpredvarname=newpredvarname[-mostinSig]
print(sprintf("step %s removed variable=%s, the p-value was %s",i,removedvar,max(pvalue)))
if(length(newpredvarname)==0)
{
# if we end up droping every thing
Finalresult=print("No Significant variable Remains")
break
}
i=i+1
mymodel=lm(as.formula(paste(response,paste(newpredvarname,collapse="+"),sep="~")),data=dataname)
pvalue=summary(mymodel)$coeff[-1,4]
}
else {
Finalresult= summary(mymodel)
break }
}
if(length(predVarname)==length(newpredvarname)) {message("No Variable Removed")}
message("Summary of Finally Selected Model is:")
Finalresult
}
housingprice <- read.delim(".../housingprice.txt")
str(housingprice)
housingpriceWithDummy=dummy.data.frame(housingprice,verbose=TRUE)
str(housingpriceWithDummy)
housingpriceWithDummy=housingpriceWithDummy %>% select(-c(BEDS3,BATHS2,HEAT0,STYLE0,GARAGE1,BASEMENT0,FIRE0,SCHOOL0))
backwardElimination(housingpriceWithDummy,"PRICE",0.2) # keeping varibles in the model which have a pvalue less than 20%