# +--------------------------------------------------------------------------+
# | Einfuehrung in die Oekonometrie                                          |
# | Lineare Mehrfachregression                                               |
# | Beispiel "Mietspiegel Muenchen 1999" (doc_mietspiegel99.pdf)             |
# | R. Schuhr 19.03.2012                                                     |
# + -------------------------------------------------------------------------+


# Einlesen der Daten aus der Datei "miete99.csv" und Definition des Data-Frames "msm"
# -----------------------------------------------------------------------------------
# Hinweis: Die Datei miete99.csv enthaelt eine Teilmenge der Daten aus der Datei mietspiegel99.csv.
# Enthalten sind Daten zu Wohnungen mit normaler Ausstattung, die in oder nach 1970 gebaut wurden.

msm <- read.csv2("miete99.csv",header=TRUE)
dim(msm)
msm[1:20,]
attach(msm)


# Streudiagramme
# --------------
plot(flaeche, miete, xlab="Wohnflaeche in qm", ylab="Miete in DM", main="Mietepreise in Muenchen 1999")
plot(flaeche, log(miete), xlab="Wohnflaeche in qm", ylab="log(Miete)", main="Mietepreise in Muenchen 1999")
# Die Log-Transformation fuehrt zu einer Stabilisierung inhomogener (= heteroskedastischer) Stoervarianzen.


# Lineare Mehrfachregression - Kompakt
# ------------------------------------
msm.modell.1 <- lm(log(miete)~flaeche+lage.+lage..,data=msm)
summary(msm.modell.1)

# Interpretation der Regressionskoeffizienten
confint(msm.modell.1, level=0.99)
msm.modell.1$coeff
exp(msm.modell.1$coeff)

# F-Test der Hypothese H0: b2 = b3
msm$lage <- lage. + lage..
msm.modell.2 <- lm(log(miete)~flaeche+lage,data=msm)  
# Alternativ: msm.modell.2 <-  lm(log(miete)~flaeche+I(lage.+lage..),data=msm)   
summary(msm.modell.2)
anova(msm.modell.2, msm.modell.1)

# Interpretation der Regressionskoeffizienten von msm.modell.2
confint(msm.modell.2, level=0.99)
msm.modell.2$coeff
exp(msm.modell.2$coeff)

# Verwendete R-Funktionen
?log
?lm
?summary
?confint
?I
?anova

# Streudiagramm fit versus log(miete)
plot(msm.modell.2$fit, log(miete), xlab="Fitted Values", ylab="log(Miete)", main="Mietepreise in Muenchen 1999")
lines(msm.modell.2$fit, msm.modell.2$fit, type="l", col="blue") 

# Streudiagramm exp(fit) versus miete
plot(exp(msm.modell.2$fit), miete, xlab="exp(Fitted Values)", ylab="Miete in DM", main="Mietepreise in Muenchen 1999")
lines(exp(msm.modell.2$fit), exp(msm.modell.2$fit), type="l", col="blue") 

# Residuendiagramm
plot(msm.modell.2$fit, msm.modell.2$res, type="p",  xlab="Fitted Values", ylab="Residuen", main="Residuenplot") 
abline(h=0)

# Verwendete R-Funktionen
?plot
?lines
?abline


# Lineare Regressionsanalyse: Konfidenz- und Prognoseintervalle
# -------------------------------------------------------------
# Konfidenzintervall
clim <- predict(msm.modell.2, newdata=msm, interval="confidence", level = 0.95)
cbind(flaeche,lage,clim)[1:15,]
plot(msm.modell.2$fit, log(miete), xlab="Fitted Values", ylab="log(Miete)")
lines(msm.modell.2$fit, msm.modell.2$fit, type="l", col="blue") 
lines(clim[,1], clim[,2], col="red")
lines(clim[,1], clim[,3], col="red")

# Prognoseintervall
plim <- predict(msm.modell.2, newdata=msm, interval="prediction", level = 0.95)
cbind(flaeche,lage,plim)[1:15,]
plot(msm.modell.2$fit, log(miete), xlab="Fitted Values", ylab="log(Miete)")
lines(msm.modell.2$fit, msm.modell.2$fit, type="l", col="blue") 
lines(plim[,1], plim[,2], col="green")
lines(plim[,1], plim[,3], col="green")

# Prognoseintervall fuer original Skala
plot(exp(msm.modell.2$fit), miete, xlab="exp(Fitted Values)", ylab="Miete")
eplim <- exp(plim)
lines(eplim[,1], eplim[,1], col="blue")
lines(eplim[,1], eplim[,2], col="green")
lines(eplim[,1], eplim[,3], col="green")


# Konsequenz der Heteroskedastizitaet
msm.modell.3<- lm(miete~flaeche+lage)
summary(msm.modell.3
# Prognoseintervall
plim <- predict(msm.modell.3, newdata=msm, interval="prediction", level = 0.9)
plot(msm.modell.3$fit, miete, xlab="Fitted Values", ylab="log(Miete)")
lines(plim[,1], plim[,1], col="blue")
lines(plim[,1], plim[,2], col="darkgreen") # Prognoseintervall msm.modell.3
lines(plim[,1], plim[,3], col="darkgreen") # Prognoseintervall msm.modell.3
lines(eplim[,1], eplim[,2], col="green")   # Prognoseintervall msm.modell.2
lines(eplim[,1], eplim[,3], col="green")   # Prognoseintervall msm.modell.2


detach(msm) # Ende
