Note that the directories used to store data are likely different on your computer, and such references will need to be changed before using any such code.
library(tidyverse)
df <- read.csv("../../Data/Session_2-1.csv", stringsAsFactors=FALSE)
df_full <- df
uol <- filter(df, isin == "SG1S83002349")
#clean_df <- subset(df,fyear==2017 & !is.na(revt) & !is.na(ni) & revt > 1)
# revt: Revenue, at: Assets
summary(uol[,c("revt", "at")])
revt at
Min. : 94.78 Min. : 1218
1st Qu.: 193.41 1st Qu.: 3044
Median : 427.44 Median : 3478
Mean : 666.38 Mean : 5534
3rd Qu.:1058.61 3rd Qu.: 7939
Max. :2103.15 Max. :19623
mod1 <- lm(revt ~ at, data = uol)
summary(mod1)
Call:
lm(formula = revt ~ at, data = uol)
Residuals:
Min 1Q Median 3Q Max
-295.01 -101.29 -41.09 47.17 926.29
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -13.831399 67.491305 -0.205 0.839
at 0.122914 0.009678 12.701 6.7e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 221.2 on 27 degrees of freedom
Multiple R-squared: 0.8566, Adjusted R-squared: 0.8513
F-statistic: 161.3 on 1 and 27 DF, p-value: 6.699e-13
# Graph showing squared error
uolg <- uol[,c("at","revt")]
uolg$resid <- mod1$residuals
uolg$xleft <- ifelse(uolg$resid < 0,uolg$at,uolg$at - uolg$resid)
uolg$xright <- ifelse(uolg$resid < 0,uolg$at - uolg$resid, uol$at)
uolg$ytop <- ifelse(uolg$resid < 0,uolg$revt - uolg$resid,uol$revt)
uolg$ybottom <- ifelse(uolg$resid < 0,uolg$revt, uolg$revt - uolg$resid)
uolg$point <- TRUE
uolg2 <- uolg
uolg2$point <- FALSE
uolg2$at <- ifelse(uolg$resid < 0,uolg2$xright,uolg2$xleft)
uolg2$revt <- ifelse(uolg$resid < 0,uolg2$ytop,uolg2$ybottom)
uolg <- rbind(uolg, uolg2)
uolg %>% ggplot(aes(y=revt, x=at)) +
geom_point(aes(shape=point, group=point)) +
scale_shape_manual(values=c(NA,18)) +
geom_smooth(method="lm", se=FALSE) +
geom_errorbarh(aes(xmax=xright, xmin = xleft)) +
geom_errorbar(aes(ymax=ytop, ymin = ybottom)) +
theme(legend.position="none") + xlim(0, 20000) + ylim(0, 2500)
`geom_smooth()` using formula 'y ~ x'
```r
# tidyverse
uol <- uol %>%
mutate(revt_growth1 = revt / lag(revt) - 1)
# R way
uol$revt_growth2 = uol$revt / c(NA, uol$revt[-length(uol$revt)]) - 1
identical(uol$revt_growth1, uol$revt_growth2)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiWzFdIFRSVUVcbiJ9 -->
[1] TRUE
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuIyBNYWtlIHRoZSBvdGhlciBuZWVkZWQgY2hhbmdlXG51b2wgPC0gdW9sICU+JVxuICBtdXRhdGUoYXRfZ3Jvd3RoID0gYXQgLyBsYWcoYXQpIC0gMSkgJT4lICAjIENhbGN1bGF0ZSBhc3NldCBncm93dGhcbiAgcmVuYW1lKHJldnRfZ3Jvd3RoID0gcmV2dF9ncm93dGgxKSAgICAgICAgIyBSZW5hbWUgZm9yIHJlYWRhYmlsaXR5XG4jIFJ1biB0aGUgT0xTIG1vZGVsXG5tb2QyIDwtIGxtKHJldnRfZ3Jvd3RoIH4gYXRfZ3Jvd3RoLCBkYXRhID0gdW9sKVxuc3VtbWFyeShtb2QyKVxuYGBgXG5gYGAifQ== -->
```r
```r
# Make the other needed change
uol <- uol %>%
mutate(at_growth = at / lag(at) - 1) %>% # Calculate asset growth
rename(revt_growth = revt_growth1) # Rename for readability
# Run the OLS model
mod2 <- lm(revt_growth ~ at_growth, data = uol)
summary(mod2)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiXG5DYWxsOlxubG0oZm9ybXVsYSA9IHJldnRfZ3Jvd3RoIH4gYXRfZ3Jvd3RoLCBkYXRhID0gdW9sKVxuXG5SZXNpZHVhbHM6XG4gICAgIE1pbiAgICAgICAxUSAgIE1lZGlhbiAgICAgICAzUSAgICAgIE1heCBcbi0wLjU3NzM2IC0wLjEwNTM0IC0wLjAwOTUzICAwLjE1MTMyICAwLjQyMjg0IFxuXG5Db2VmZmljaWVudHM6XG4gICAgICAgICAgICBFc3RpbWF0ZSBTdGQuIEVycm9yIHQgdmFsdWUgUHIoPnx0fCkgIFxuKEludGVyY2VwdCkgIDAuMDkwMjQgICAgMC4wNTYyMCAgIDEuNjA2ICAgMC4xMjA0ICBcbmF0X2dyb3d0aCAgICAwLjUzODIxICAgIDAuMjc3MTcgICAxLjk0MiAgIDAuMDYzMSAuXG4tLS1cblNpZ25pZi4gY29kZXM6ICAwIMOi4oKsy5wqKirDouKCrOKEoiAwLjAwMSDDouKCrMucKirDouKCrOKEoiAwLjAxIMOi4oKsy5wqw6LigqzihKIgMC4wNSDDouKCrMucLsOi4oKs4oSiIDAuMSDDouKCrMucIMOi4oKs4oSiIDFcblxuUmVzaWR1YWwgc3RhbmRhcmQgZXJyb3I6IDAuMjQ0NCBvbiAyNiBkZWdyZWVzIG9mIGZyZWVkb21cbiAgKDEgb2JzZXJ2YXRpb24gZGVsZXRlZCBkdWUgdG8gbWlzc2luZ25lc3MpXG5NdWx0aXBsZSBSLXNxdWFyZWQ6ICAwLjEyNjcsXHRBZGp1c3RlZCBSLXNxdWFyZWQ6ICAwLjA5MzA3IFxuRi1zdGF0aXN0aWM6IDMuNzcxIG9uIDEgYW5kIDI2IERGLCAgcC12YWx1ZTogMC4wNjMwN1xuIn0= -->
Call: lm(formula = revt_growth ~ at_growth, data = uol)
Residuals: Min 1Q Median 3Q Max -0.57736 -0.10534 -0.00953 0.15132 0.42284
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.09024 0.05620 1.606 0.1204
at_growth 0.53821 0.27717 1.942 0.0631 . — Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2444 on 26 degrees of freedom (1 observation deleted due to missingness) Multiple R-squared: 0.1267, Adjusted R-squared: 0.09307 F-statistic: 3.771 on 1 and 26 DF, p-value: 0.06307
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuIyBsY3Q6IHNob3J0IHRlcm0gbGlhYmlsaXRpZXMsIGNoZTogY2FzaCBhbmQgZXF1aXZhbGVudHMsIGViaXQ6IEVCSVRcbnVvbCA8LSB1b2wgJT4lXG4gIG11dGF0ZV9hdCh2YXJzKGxjdCwgY2hlLCBlYml0KSwgbGlzdChncm93dGggPSB+LiAvIGxhZyguKSAtIDEpKSAgIyBDYWxjdWxhdGUgMyBncm93dGhzXG5tb2QzIDwtIGxtKHJldnRfZ3Jvd3RoIH4gbGN0X2dyb3d0aCArIGNoZV9ncm93dGggKyBlYml0X2dyb3d0aCwgZGF0YT11b2wpXG5zdW1tYXJ5KG1vZDMpXG5gYGBcbmBgYCJ9 -->
```r
```r
# lct: short term liabilities, che: cash and equivalents, ebit: EBIT
uol <- uol %>%
mutate_at(vars(lct, che, ebit), list(growth = ~. / lag(.) - 1)) # Calculate 3 growths
mod3 <- lm(revt_growth ~ lct_growth + che_growth + ebit_growth, data=uol)
summary(mod3)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiXG5DYWxsOlxubG0oZm9ybXVsYSA9IHJldnRfZ3Jvd3RoIH4gbGN0X2dyb3d0aCArIGNoZV9ncm93dGggKyBlYml0X2dyb3d0aCwgXG4gICAgZGF0YSA9IHVvbClcblxuUmVzaWR1YWxzOlxuICAgICBNaW4gICAgICAgMVEgICBNZWRpYW4gICAgICAgM1EgICAgICBNYXggXG4tMC40NjUzMSAtMC4xNTA5NyAgMC4wMDIwNSAgMC4xNzYwMSAgMC4zMTk5NyBcblxuQ29lZmZpY2llbnRzOlxuICAgICAgICAgICAgRXN0aW1hdGUgU3RkLiBFcnJvciB0IHZhbHVlIFByKD58dHwpICAgXG4oSW50ZXJjZXB0KSAgMC4wNzQ5OCAgICAwLjA0OTE1ICAgMS41MjYgIDAuMTQwMTggICBcbmxjdF9ncm93dGggICAwLjIzNDgyICAgIDAuMDczMTkgICAzLjIwOSAgMC4wMDM3NiAqKlxuY2hlX2dyb3d0aCAgLTAuMTE1NjEgICAgMC4wOTIyNyAgLTEuMjUzICAwLjIyMjMwICAgXG5lYml0X2dyb3d0aCAgMC4wMzgwOCAgICAwLjAyMjA4ICAgMS43MjQgIDAuMDk3NTEgLiBcbi0tLVxuU2lnbmlmLiBjb2RlczogIDAgw6LigqzLnCoqKsOi4oKs4oSiIDAuMDAxIMOi4oKsy5wqKsOi4oKs4oSiIDAuMDEgw6LigqzLnCrDouKCrOKEoiAwLjA1IMOi4oKsy5wuw6LigqzihKIgMC4xIMOi4oKsy5wgw6LigqzihKIgMVxuXG5SZXNpZHVhbCBzdGFuZGFyZCBlcnJvcjogMC4yMjI4IG9uIDI0IGRlZ3JlZXMgb2YgZnJlZWRvbVxuICAoMSBvYnNlcnZhdGlvbiBkZWxldGVkIGR1ZSB0byBtaXNzaW5nbmVzcylcbk11bHRpcGxlIFItc3F1YXJlZDogICAwLjMzLFx0QWRqdXN0ZWQgUi1zcXVhcmVkOiAgMC4yNDYyIFxuRi1zdGF0aXN0aWM6ICAzLjk0IG9uIDMgYW5kIDI0IERGLCAgcC12YWx1ZTogMC4wMjAzM1xuIn0= -->
Call: lm(formula = revt_growth ~ lct_growth + che_growth + ebit_growth, data = uol)
Residuals: Min 1Q Median 3Q Max -0.46531 -0.15097 0.00205 0.17601 0.31997
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.07498 0.04915 1.526 0.14018
lct_growth 0.23482 0.07319 3.209 0.00376 che_growth -0.11561 0.09227 -1.253 0.22230
ebit_growth 0.03808 0.02208 1.724 0.09751 . — Signif. codes: 0 ‘*’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2228 on 24 degrees of freedom (1 observation deleted due to missingness) Multiple R-squared: 0.33, Adjusted R-squared: 0.2462 F-statistic: 3.94 on 3 and 24 DF, p-value: 0.02033
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuZGV0ZWN0b3IgPC0gZnVuY3Rpb24oKSB7XG4gIGRpY2UgPC0gc2FtcGxlKDE6Niwgc2l6ZT0yLCByZXBsYWNlPVRSVUUpXG4gIGlmIChzdW0oZGljZSkgPT0gMTIpIHtcbiAgICBcXGV4cGxvZGVkXFxcbiAgfSBlbHNlIHtcbiAgICBcXHN0aWxsIHRoZXJlXFxcbiAgfVxufVxuXG5leHBlcmltZW50IDwtIHJlcGxpY2F0ZSgxMDAwLGRldGVjdG9yKCkpXG4jIHAgdmFsdWVcbnAgPC0gc3VtKGV4cGVyaW1lbnQgPT0gXFxzdGlsbCB0aGVyZVxcKSAvIDEwMDBcbmlmIChwIDwgMC4wNSkge1xuICBwYXN0ZShcXHAtdmFsdWU6IFxcLCBwLCBcXC0tIEZhaWwgdG8gcmVqZWN0IEhfQVxuYGBgIn0= -->
```r
```r
detector <- function() {
dice <- sample(1:6, size=2, replace=TRUE)
if (sum(dice) == 12) {
\exploded\
} else {
\still there\
}
}
experiment <- replicate(1000,detector())
# p value
p <- sum(experiment == \still there\) / 1000
if (p < 0.05) {
paste(\p-value: \, p, \-- Fail to reject H_A
[1] \p-value: 0.974 -- Reject H_A that sun exploded\
```r
library(tidyverse)
read_csv('../../Data/Session_3-1.csv') %>%
ggplot(aes(y=revtq, x=atq)) +
geom_point() +
geom_smooth(method=\lm\) +
xlab(\Assets\) +
ylab(\Revenue\)
<!-- rnb-source-end -->
<!-- rnb-plot-begin -->
<img src="" />
<!-- rnb-plot-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxucGxvdF9ub3JtIDwtIGZ1bmN0aW9uKGJvdW5kLCBvdXRlcikge1xuICB4IDwtIHNlcSgtb3V0ZXIsb3V0ZXIsbGVuZ3RoPTEwMClcbiAgaHggPC0gZG5vcm0oeClcbiAgcGxvdCh4LCBoeCwgdHlwZT1cXG5cXCwgeGxhYj1cXHogdmFsdWVzXFwsIHlsYWI9XFxOb3JtYWwgUERGXFwsIG1haW49XFxOb3JtYWwgRGlzdHJpYnV0aW9uXFwsIGF4ZXM9RkFMU0UpXG4gIGxpbmVzKHgsIGh4KVxuICBpIDwtIHggPCAtYm91bmRcbiAgcG9seWdvbihjKC1vdXRlcix4W2ldLC1ib3VuZCwtYm91bmQpLCBjKDAsaHhbaV0sbWF4KGh4W2ldKSwwKSwgY29sPVxccmVkXFwpXG4gIGkgPC0geCA+IGJvdW5kXG4gIHBvbHlnb24oYyhib3VuZCwgYm91bmQsIHhbaV0sb3V0ZXIpLCBjKDAsbWF4KGh4W2ldKSwgaHhbaV0sMCksIGNvbD1cXHJlZFxcKVxuICBheGlzKDEsIGF0PS1vdXRlcjpvdXRlciwgcG9zPTApXG59XG5wbG90X25vcm0oMS45NiwgNClcbmBgYFxuYGBgIn0= -->
```r
```r
plot_norm <- function(bound, outer) {
x <- seq(-outer,outer,length=100)
hx <- dnorm(x)
plot(x, hx, type=\n\, xlab=\z values\, ylab=\Normal PDF\, main=\Normal Distribution\, axes=FALSE)
lines(x, hx)
i <- x < -bound
polygon(c(-outer,x[i],-bound,-bound), c(0,hx[i],max(hx[i]),0), col=\red\)
i <- x > bound
polygon(c(bound, bound, x[i],outer), c(0,max(hx[i]), hx[i],0), col=\red\)
axis(1, at=-outer:outer, pos=0)
}
plot_norm(1.96, 4)
<!-- rnb-source-end -->
<!-- rnb-plot-begin -->
<img src="" />
<!-- rnb-plot-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuYW5vdmEobW9kMiwgbW9kMywgdGVzdD1cXENoaXNxXFwpXG5gYGBcbmBgYCJ9 -->
```r
```r
anova(mod2, mod3, test=\Chisq\)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiQW5hbHlzaXMgb2YgVmFyaWFuY2UgVGFibGVcblxuTW9kZWwgMTogcmV2dF9ncm93dGggfiBhdF9ncm93dGhcbk1vZGVsIDI6IHJldnRfZ3Jvd3RoIH4gbGN0X2dyb3d0aCArIGNoZV9ncm93dGggKyBlYml0X2dyb3d0aFxuICBSZXMuRGYgICAgUlNTIERmIFN1bSBvZiBTcSBQcig+Q2hpKSAgXG4xICAgICAyNiAxLjU1MzQgICAgICAgICAgICAgICAgICAgICAgICBcbjIgICAgIDI0IDEuMTkxOCAgMiAgIDAuMzYxNjggICAwLjAyNjIgKlxuLS0tXG5TaWduaWYuIGNvZGVzOiAgMCDDouKCrMucKioqw6LigqzihKIgMC4wMDEgw6LigqzLnCoqw6LigqzihKIgMC4wMSDDouKCrMucKsOi4oKs4oSiIDAuMDUgw6LigqzLnC7DouKCrOKEoiAwLjEgw6LigqzLnCDDouKCrOKEoiAxXG4ifQ== -->
Analysis of Variance Table
Model 1: revt_growth ~ at_growth Model 2: revt_growth ~ lct_growth + che_growth + ebit_growth Res.Df RSS Df Sum of Sq Pr(>Chi)
1 26 1.5534
2 24 1.1918 2 0.36168 0.0262 — Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuYGBgclxuIyBFbnN1cmUgZmlybXMgaGF2ZSBhdCBsZWFzdCAkMU0gKGxvY2FsIGN1cnJlbmN5KSwgYW5kIGhhdmUgcmV2ZW51ZVxuIyBkZiBjb250YWlucyBhbGwgcmVhbCBlc3RhdGUgY29tcGFuaWVzIGV4Y2x1ZGluZyBOb3J0aCBBbWVyaWNhXG5kZl9jbGVhbiA8LSBmaWx0ZXIoZGYsIGRmJGF0PjEsIGRmJHJldnQ+MClcbiMgV2UgY2xlYW5lZCBvdXQgNTc4IG9ic2VydmF0aW9ucyFcbnByaW50KGMobnJvdyhkZiksIG5yb3coZGZfY2xlYW4pKSlcbmBgYFxuYGBgXG5gYGAifQ== -->
```r
```r
```r
# Ensure firms have at least $1M (local currency), and have revenue
# df contains all real estate companies excluding North America
df_clean <- filter(df, df$at>1, df$revt>0)
# We cleaned out 578 observations!
print(c(nrow(df), nrow(df_clean)))
```r
```r
forecast4 <-
lm(revt_lead ~ revt + act + che + lct + dp + ebit , data=df_clean)
tidy(forecast4)
```r
```r
forecast3.1 <-
lm(revt_lead ~ revt + act + che + lct + dp + ebit + factor(isin),
data=df_clean[df_clean$fic==\SGP\,])
# n=7 to prevent outputting every fixed effect
print(tidy(forecast3.1), n=15)
glance(forecast2)
anova(forecast1, forecast2, test="Chisq")
Analysis of Variance Table
Model 1: revt_lead ~ lct + che + ebit
Model 2: revt_lead ~ revt + act + che + lct + dp + ebit
Res.Df RSS Df Sum of Sq Pr(>Chi)
1 24 3059182
2 21 863005 3 2196177 1.477e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Note the group_by -- without it, lead() will pull from the subsequent firm!
# ungroup() tells R that we finished grouping
df_clean <- df_clean %>%
group_by(isin) %>%
mutate(revt_lead = lead(revt)) %>%
ungroup()
forecast3 <-
lm(revt_lead ~ revt + act + che + lct + dp + ebit,
data=df_clean[df_clean$fic=="SGP",])
tidy(forecast3)
glance(forecast3)
forecast4 <-
lm(revt_lead ~ revt + act + che + lct + dp + ebit , data=df_clean)
tidy(forecast4)
glance(forecast4)
forecast3.1 <-
lm(revt_lead ~ revt + act + che + lct + dp + ebit + factor(isin),
data=df_clean[df_clean$fic=="SGP",])
# n=7 to prevent outputting every fixed effect
print(tidy(forecast3.1), n=15)
glance(forecast3.1)
anova(forecast3, forecast3.1, test="Chisq")
Analysis of Variance Table
Model 1: revt_lead ~ revt + act + che + lct + dp + ebit
Model 2: revt_lead ~ revt + act + che + lct + dp + ebit + factor(isin)
Res.Df RSS Df Sum of Sq Pr(>Chi)
1 324 14331633
2 304 13215145 20 1116488 0.1765
library(fixest)
forecast3.2 <-
feols(revt_lead ~ revt + act + che + lct + dp + ebit | isin,
data=df_clean[df_clean$fic=="SGP",])
NOTE: 29 observations removed because of NA values (LHS: 21, RHS: 8).
summary(forecast3.2)
OLS estimation, Dep. Var.: revt_lead
Observations: 331
Fixed-effects: isin: 21
Standard-errors: Clustered (isin)
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 199.8 Adj. R2: 0.843506
Within R2: 0.780586
df_clean %>%
filter(fic=="SGP") %>%
group_by(isin) %>%
mutate(mean_revt_lead=mean(revt_lead, na.rm=T)) %>%
slice(1) %>%
ungroup() %>%
ggplot(aes(x=mean_revt_lead)) + geom_histogram(aes(y = ..density..)) + geom_density(alpha=.4, fill="#FF6666")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Exports for the following week
save(df_clean, forecast2, uol, forecast4, file = "../../Data/Session_2_export.RData")