Main question
Can we use bankruptcy models to predict supplier bankruptcies?
But first:
Does the Altman Z-score [still] pick up bankruptcy?
Based on this article, why do we care about bankruptcy risk for other firms?
\[ Z = 1.2 x_1 + 1.4 x_2 + 3.3 x_3 + 0.6 x_4 + 0.999 x_5 \]
This looks like a linear regression without a constant
More about this, from Altman himself in 2000: rmc.link/420class5-2
Can we use bankruptcy models to predict supplier bankruptcies?
But first:
Does the Altman Z-score [still] pick up bankruptcy?
Is this a forecasting or forensics question?
dlsrn == 2
, then the firm went bankruptdldte
# initial cleaning
# 100338 is an outlier in the bonds distribution
df <- df %>% filter(at >= 1, revt >= 1, gvkey != 100338)
## Merge in stock value
df$date <- as.Date(df$datadate)
df_mve <- df_mve %>%
mutate(date = as.Date(datadate),
mve = csho * prcc_f) %>%
rename(gvkey=GVKEY)
df <- left_join(df, df_mve[,c("gvkey","date","mve")])
## Joining, by = c("gvkey", "date")
df <- df %>%
group_by(gvkey) %>%
mutate(bankrupt = ifelse(row_number() == n() & dlrsn == 2 &
!is.na(dlrsn), 1, 0)) %>%
ungroup()
row_number()
gives the current row within the group, with the first row as 1, next as 2, etc.n()
gives the number of rows in the group# Calculate the measures needed
df <- df %>%
mutate(wcap_at = wcap / at, # x1
re_at = re / at, # x2
ebit_at = ebit / at, # x3
mve_lt = mve / lt, # x4
revt_at = revt / at) # x5
# cleanup
df <- df %>%
mutate_if(is.numeric, list(~replace(., !is.finite(.), NA)))
# Calculate the score
df <- df %>%
mutate(Z = 1.2 * wcap_at + 1.4 * re_at + 3.3 * ebit_at + 0.6 * mve_lt +
0.999 * revt_at)
# Calculate date info for merging
df$date <- as.Date(df$datadate)
df$year <- year(df$date)
df$month <- month(df$date)
We’ll check our Z-score against credit rating as a simple validation
# df_ratings has ratings data in it
# Ratings, in order from worst to best
ratings <- c("D", "C", "CC", "CCC-", "CCC","CCC+", "B-", "B", "B+", "BB-",
"BB", "BB+", "BBB-", "BBB", "BBB+", "A-", "A", "A+", "AA-", "AA",
"AA+", "AAA-", "AAA", "AAA+")
# Convert string ratings (splticrm) to numeric ratings
df_ratings$rating <- factor(df_ratings$splticrm, levels=ratings, ordered=T)
df_ratings$date <- as.Date(df_ratings$datadate)
df_ratings$year <- year(df_ratings$date)
df_ratings$month <- month(df_ratings$date)
# Merge together data
df <- left_join(df, df_ratings[,c("gvkey", "year", "month", "rating")])
## Joining, by = c("gvkey", "year", "month")
df %>%
filter(!is.na(Z),
!is.na(bankrupt)) %>%
group_by(bankrupt) %>%
mutate(mean_Z=mean(Z,na.rm=T)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_Z) %>%
html_df()
bankrupt | mean_Z |
---|---|
0 | 3.939223 |
1 | 0.927843 |
df %>%
filter(!is.na(Z),
!is.na(bankrupt),
year >= 2000) %>%
group_by(bankrupt) %>%
mutate(mean_Z=mean(Z,na.rm=T)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_Z) %>%
html_df()
bankrupt | mean_Z |
---|---|
0 | 3.822281 |
1 | 1.417683 |
##
## Call:
## glm(formula = bankrupt ~ Z, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8297 -0.0676 -0.0654 -0.0624 3.7794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.94354 0.11829 -50.245 < 2e-16 ***
## Z -0.06383 0.01239 -5.151 2.59e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1085.2 on 35296 degrees of freedom
## Residual deviance: 1066.5 on 35295 degrees of freedom
## (15577 observations deleted due to missingness)
## AIC: 1070.5
##
## Number of Fisher Scoring iterations: 9
Examples:
Correct 92.0% of the time using Z < 1 as a cutoff
Correct 99.7% of the time if we say firms never go bankrupt…
This type of chart (filled in) is called a Confusion matrix
We say that the company will go bankrupt, but they don’t
We say that the company will not go bankrupt, yet they do
library(ROCR)
dfZ <- df %>% filter(!is.na(Z), !is.na(bankrupt))
pred_Z <- predict(fit_Z, dfZ, type="response")
ROCpred_Z <- prediction(as.numeric(pred_Z), as.numeric(dfZ$bankrupt))
ROCperf_Z <- performance(ROCpred_Z, 'tpr','fpr')
'tpr'
is true positive rate'fpr'
is false positive rate## [1] 0.8280943
@
to pull out values, not $
AUC is the probability that our model assigns a higher estimated probability to a randomly selected 1 than to a randomly selected 0.
\[ DD = \frac{\log(V_A / D) + (r-\frac{1}{2}\sigma_A^2)(T-t)}{\sigma_A \sqrt(T-t)} \]
# df_stock is an already prepped csv from CRSP data
df_stock$date <- as.Date(df_stock$date)
df <- left_join(df, df_stock[,c("gvkey", "date", "ret", "ret.sd")])
## Joining, by = c("gvkey", "date")
df_rf$date <- as.Date(df_rf$dateff)
df_rf$year <- year(df_rf$date)
df_rf$month <- month(df_rf$date)
df <- left_join(df, df_rf[,c("year", "month", "rf")])
## Joining, by = c("year", "month")
df <- df %>%
mutate(DD = (log(mve / lt) + (rf - (ret.sd*sqrt(253))^2 / 2)) /
(ret.sd*sqrt(253)))
# Clean the measure
df <- df %>%
mutate_if(is.numeric, list(~replace(., !is.finite(.), NA)))
ret.sd
is daily return standard deviation
df %>%
filter(!is.na(DD),
!is.na(bankrupt)) %>%
group_by(bankrupt) %>%
mutate(mean_DD=mean(DD, na.rm=T),
prob_default =
pnorm(-1 * mean_DD)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_DD,
prob_default) %>%
html_df()
bankrupt | mean_DD | prob_default |
---|---|---|
0 | 0.6096854 | 0.2710351 |
1 | -2.4445081 | 0.9927475 |
df %>%
filter(!is.na(DD),
!is.na(bankrupt),
year >= 2000) %>%
group_by(bankrupt) %>%
mutate(mean_DD=mean(DD, na.rm=T),
prob_default =
pnorm(-1 * mean_DD)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_DD,
prob_default) %>%
html_df()
bankrupt | mean_DD | prob_default |
---|---|---|
0 | 0.8379932 | 0.2010172 |
1 | -4.3001844 | 0.9999915 |
##
## Call:
## glm(formula = bankrupt ~ DD, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9929 -0.0750 -0.0634 -0.0506 3.6503
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.16394 0.15322 -40.230 < 2e-16 ***
## DD -0.24459 0.03781 -6.469 9.89e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 718.67 on 21563 degrees of freedom
## Residual deviance: 677.27 on 21562 degrees of freedom
## (33618 observations deleted due to missingness)
## AIC: 681.27
##
## Number of Fisher Scoring iterations: 9
dfDD <- df %>% filter(!is.na(DD), !is.na(bankrupt))
pred_DD <- predict(fit_DD, dfDD, type="response")
ROCpred_DD <- prediction(as.numeric(pred_DD), as.numeric(dfDD$bankrupt))
ROCperf_DD <- performance(ROCpred_DD, 'tpr','fpr')
df_ROC_DD <- data.frame(FalsePositive=c(ROCperf_DD@x.values[[1]]),
TruePositive=c(ROCperf_DD@y.values[[1]]))
ggplot() +
geom_line(data=df_ROC_DD, aes(x=FalsePositive, y=TruePositive, color="DD")) +
geom_line(data=df_ROC_Z, aes(x=FP, y=TP, color="Z")) +
geom_abline(slope=1)
#AUC
auc_DD <- performance(ROCpred_DD, measure = "auc")
AUCs <- c(auc_Z@y.values[[1]], auc_DD@y.values[[1]])
names(AUCs) <- c("Z", "DD")
AUCs
## Z DD
## 0.8280943 0.8098304
Both measures perform similarly, but Altman Z performs slightly better.
Why?
# calculate downgrade
df <- df %>%
group_by(gvkey) %>%
arrange(date) %>%
mutate(downgrade = ifelse(rating < lag(rating),1,0),
diff_Z = Z - lag(Z),
diff_DD = DD - lag(DD)) %>%
ungroup()
# training sample
train <- df %>% filter(year < 2014, !is.na(diff_Z), !is.na(diff_DD), !is.na(downgrade),
year > 1985)
test <- df %>% filter(year >= 2014, !is.na(diff_Z), !is.na(diff_DD), !is.na(downgrade))
# glms
fit_Z2 <- glm(downgrade ~ diff_Z, data=train, family=binomial)
fit_DD2 <- glm(downgrade ~ diff_DD, data=train, family=binomial)
##
## Call:
## glm(formula = downgrade ~ diff_Z, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4115 -0.4428 -0.4428 -0.3928 2.7437
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.27310 0.06139 -37.029 <2e-16 ***
## diff_Z -0.77150 0.09245 -8.345 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2145.3 on 3277 degrees of freedom
## Residual deviance: 2065.8 on 3276 degrees of freedom
## AIC: 2069.8
##
## Number of Fisher Scoring iterations: 5
##
## Call:
## glm(formula = downgrade ~ diff_DD, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5726 -0.4565 -0.4558 -0.4095 2.6804
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.21199 0.05926 -37.325 < 2e-16 ***
## diff_DD -0.21378 0.03723 -5.742 9.37e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2145.3 on 3277 degrees of freedom
## Residual deviance: 2113.2 on 3276 degrees of freedom
## AIC: 2117.2
##
## Number of Fisher Scoring iterations: 5
## Z DD
## 0.6465042 0.5847885
## Z DD
## 0.8134671 0.7420213
What other data could we use to predict corporate bankruptcy as it relates to a company’s supply chain?
##
## Call:
## glm(formula = downgrade ~ diff_Z + diff_DD, family = binomial,
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3263 -0.4431 -0.4430 -0.3892 2.7504
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.27217 0.06144 -36.980 < 2e-16 ***
## diff_Z -0.71374 0.10709 -6.665 2.65e-11 ***
## diff_DD -0.04884 0.04638 -1.053 0.292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2145.3 on 3277 degrees of freedom
## Residual deviance: 2064.7 on 3275 degrees of freedom
## AIC: 2070.7
##
## Number of Fisher Scoring iterations: 5
## factor AME SE z p lower upper
## diff_DD -0.0043 0.0041 -1.0525 0.2926 -0.0122 0.0037
## diff_Z -0.0625 0.0094 -6.6473 0.0000 -0.0809 -0.0441
The
margins::
syntax allows us to call a function without loading the whole library. There is a conflict in thepredict
functions of ROCR and margins, so this is safer.
## Combined Z DD
## 0.8151596 0.8134671 0.7420213