Main question
Can we use bankruptcy models to predict supplier bankruptcies?
But first:
Does the Altman Z-score [still] pick up bankruptcy?
Dr. Richard M. Crowley
\[ Z = 1.2 x_1 + 1.4 x_2 + 3.3 x_3 + 0.6 x_4 + 0.999 x_5 \]
This looks like a linear regression without a constant
More about this, from Altman himself in 2000: rmc.link/420class5
Can we use bankruptcy models to predict supplier bankruptcies?
But first:
Does the Altman Z-score [still] pick up bankruptcy?
Is this a forecasting or forensics question?
dlsrn == 2
, then the firm went bankruptdldte
# initial cleaning
df <- df %>% filter(at >= 1, revt >= 1, gvkey != 100338)
## Merge in stock value
df$date <- as.Date(df$datadate)
df_mve <- df_mve %>%
mutate(date = as.Date(datadate),
mve = csho * prcc_f) %>%
rename(gvkey=GVKEY)
df <- left_join(df, df_mve[,c("gvkey","date","mve")])
## Joining, by = c("gvkey", "date")
df <- df %>%
group_by(gvkey) %>%
mutate(bankrupt = ifelse(row_number() == n() & dlrsn == 2 &
!is.na(dlrsn), 1, 0)) %>%
ungroup()
row_number()
gives the current row within the group, with the first row as 1, next as 2, etc.n()
gives the number of rows in the group# Calculate the measures needed
df <- df %>%
mutate(wcap_at = wcap / at, # x1
re_at = re / at, # x2
ebit_at = ebit / at, # x3
mve_lt = mve / lt, # x4
revt_at = revt / at) # x5
# cleanup
df <- df %>%
mutate_if(is.numeric, list(~replace(., !is.finite(.), NA)))
# Calculate the score
df <- df %>%
mutate(Z = 1.2 * wcap_at + 1.4 * re_at + 3.3 * ebit_at + 0.6 * mve_lt +
0.999 * revt_at)
# Calculate date info for merging
df$date <- as.Date(df$datadate)
df$year <- year(df$date)
df$month <- month(df$date)
We’ll check our Z-score against credit rating as a simple validation
# df_ratings has ratings data in it
# Ratings, in order from worst to best
ratings <- c("D", "C", "CC", "CCC-", "CCC","CCC+", "B-", "B", "B+", "BB-",
"BB", "BB+", "BBB-", "BBB", "BBB+", "A-", "A", "A+", "AA-", "AA",
"AA+", "AAA-", "AAA", "AAA+")
# Convert string ratings (splticrm) to numeric ratings
df_ratings$rating <- factor(df_ratings$splticrm, levels=ratings, ordered=T)
df_ratings$date <- as.Date(df_ratings$datadate)
df_ratings$year <- year(df_ratings$date)
df_ratings$month <- month(df_ratings$date)
# Merge together data
df <- left_join(df, df_ratings[,c("gvkey", "year", "month", "rating")])
## Joining, by = c("gvkey", "year", "month")
df %>%
filter(!is.na(Z),
!is.na(bankrupt)) %>%
group_by(bankrupt) %>%
mutate(mean_Z=mean(Z,na.rm=T)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_Z) %>%
html_df()
bankrupt | mean_Z |
---|---|
0 | 3.939223 |
1 | 0.927843 |
df %>%
filter(!is.na(Z),
!is.na(bankrupt),
year >= 2000) %>%
group_by(bankrupt) %>%
mutate(mean_Z=mean(Z,na.rm=T)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_Z) %>%
html_df()
bankrupt | mean_Z |
---|---|
0 | 3.822281 |
1 | 1.417683 |
fit_Z <- glm(bankrupt ~ Z, data=df, family=binomial)
summary(fit_Z)
##
## Call:
## glm(formula = bankrupt ~ Z, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8297 -0.0676 -0.0654 -0.0624 3.7794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.94354 0.11829 -50.245 < 2e-16 ***
## Z -0.06383 0.01239 -5.151 2.59e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1085.2 on 35296 degrees of freedom
## Residual deviance: 1066.5 on 35295 degrees of freedom
## (15577 observations deleted due to missingness)
## AIC: 1070.5
##
## Number of Fisher Scoring iterations: 9
Based on this article, why do we care about bankruptcy risk for other firms?
Examples:
Correct 92.0% of the time using Z < 1 as a cutoff
Correct 99.7% of the time if we say firms never go bankrupt…
This type of chart (filled in) is called a Confusion matrix
We say that the company will go bankrupt, but they don’t
We say that the company will not go bankrupt, yet they do
library(ROCR)
pred_Z <- predict(fit_Z, df, type="response")
ROCpred_Z <- prediction(as.numeric(pred_Z), as.numeric(df$bankrupt))
ROCperf_Z <- performance(ROCpred_Z, 'tpr','fpr')
'tpr'
is true positive rate'fpr'
is false positive ratedf_ROC_Z <- data.frame(
FP=c(ROCperf_Z@x.values[[1]]),
TP=c(ROCperf_Z@y.values[[1]]))
ggplot(data=df_ROC_Z,
aes(x=FP, y=TP)) + geom_line() +
geom_abline(slope=1)
plot(ROCperf_Z)
auc_Z <- performance(ROCpred_Z, measure = "auc")
auc_Z@y.values[[1]]
## [1] 0.8280943
@
to pull out values, not $
\[ DD = \frac{\log(V_A / D) + (r-\frac{1}{2}\sigma_A^2)(T-t)}{\sigma_A \sqrt(T-t)} \]
# df_stock is an already prepped csv from CRSP data
df_stock$date <- as.Date(df_stock$date)
df <- left_join(df, df_stock[,c("gvkey", "date", "ret", "ret.sd")])
## Joining, by = c("gvkey", "date")
df_rf$date <- as.Date(df_rf$dateff)
df_rf$year <- year(df_rf$date)
df_rf$month <- month(df_rf$date)
df <- left_join(df, df_rf[,c("year", "month", "rf")])
## Joining, by = c("year", "month")
df <- df %>%
mutate(DD = (log(mve / lt) + (rf - (ret.sd*sqrt(253))^2 / 2)) /
(ret.sd*sqrt(253)))
# Clean the measure
df <- df %>%
mutate_if(is.numeric, list(~replace(., !is.finite(.), NA)))
ret.sd
is daily return standard deviation
df %>%
filter(!is.na(DD),
!is.na(bankrupt)) %>%
group_by(bankrupt) %>%
mutate(mean_DD=mean(DD, na.rm=T),
prob_default =
pnorm(-1 * mean_DD)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_DD,
prob_default) %>%
html_df()
bankrupt | mean_DD | prob_default |
---|---|---|
0 | 0.6096854 | 0.2710351 |
1 | -2.4445081 | 0.9927475 |
df %>%
filter(!is.na(DD),
!is.na(bankrupt),
year >= 2000) %>%
group_by(bankrupt) %>%
mutate(mean_DD=mean(DD, na.rm=T),
prob_default =
pnorm(-1 * mean_DD)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt, mean_DD,
prob_default) %>%
html_df()
bankrupt | mean_DD | prob_default |
---|---|---|
0 | 0.8379932 | 0.2010172 |
1 | -4.3001844 | 0.9999915 |
fit_DD <- glm(bankrupt ~ DD, data=df, family=binomial)
summary(fit_DD)
##
## Call:
## glm(formula = bankrupt ~ DD, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9929 -0.0750 -0.0634 -0.0506 3.6503
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.16394 0.15322 -40.230 < 2e-16 ***
## DD -0.24459 0.03781 -6.469 9.89e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 718.67 on 21563 degrees of freedom
## Residual deviance: 677.27 on 21562 degrees of freedom
## (33618 observations deleted due to missingness)
## AIC: 681.27
##
## Number of Fisher Scoring iterations: 9
pred_DD <- predict(fit_DD, df, type="response")
ROCpred_DD <- prediction(as.numeric(pred_DD), as.numeric(df$bankrupt))
ROCperf_DD <- performance(ROCpred_DD, 'tpr','fpr')
df_ROC_DD <- data.frame(FalsePositive=c(ROCperf_DD@x.values[[1]]),
TruePositive=c(ROCperf_DD@y.values[[1]]))
ggplot() +
geom_line(data=df_ROC_DD, aes(x=FalsePositive, y=TruePositive, color="DD")) +
geom_line(data=df_ROC_Z, aes(x=FP, y=TP, color="Z")) +
geom_abline(slope=1)
#AUC
auc_DD <- performance(ROCpred_DD, measure = "auc")
AUCs <- c(auc_Z@y.values[[1]], auc_DD@y.values[[1]])
names(AUCs) <- c("Z", "DD")
AUCs
## Z DD
## 0.8280943 0.8098304
Both measures perform similarly, but Altman Z performs slightly better.
Why?
# calculate downgrade
df <- df %>% arrange(gvkey, date) %>% group_by(gvkey) %>% mutate(downgrade = ifelse(rating < lag(rating),1,0))
# training sample
train <- df %>% filter(year < 2015)
test <- df %>% filter(year >= 2015)
# glms
fit_Z2 <- glm(downgrade ~ Z, data=train, family=binomial)
fit_DD2 <- glm(downgrade ~ DD, data=train, family=binomial)
summary(fit_Z2)
##
## Call:
## glm(formula = downgrade ~ Z, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1223 -0.5156 -0.4418 -0.3277 6.4638
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.10377 0.09288 -11.88 <2e-16 ***
## Z -0.43729 0.03839 -11.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3874.5 on 5795 degrees of freedom
## Residual deviance: 3720.4 on 5794 degrees of freedom
## (47058 observations deleted due to missingness)
## AIC: 3724.4
##
## Number of Fisher Scoring iterations: 6
summary(fit_DD2)
##
## Call:
## glm(formula = downgrade ~ DD, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7319 -0.5004 -0.4279 -0.3343 3.0756
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.36395 0.05609 -42.15 <2e-16 ***
## DD -0.22269 0.02040 -10.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3115.3 on 4732 degrees of freedom
## Residual deviance: 2982.9 on 4731 degrees of freedom
## (48121 observations deleted due to missingness)
## AIC: 2986.9
##
## Number of Fisher Scoring iterations: 5
## Z DD
## 0.6839086 0.6811714
## Z DD
## 0.7270046 0.7185990
What other data could we use to predict corporate bankruptcy as it relates to a company’s supply chain?
fit_comb <- glm(downgrade ~ Z + DD, data=train, family=binomial)
summary(fit_comb)
##
## Call:
## glm(formula = downgrade ~ Z + DD, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0215 -0.5198 -0.4132 -0.2867 4.9825
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.85624 0.15822 -11.732 < 2e-16 ***
## Z -0.19292 0.05828 -3.310 0.000932 ***
## DD -0.23893 0.03226 -7.406 1.3e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2888.9 on 4308 degrees of freedom
## Residual deviance: 2691.0 on 4306 degrees of freedom
## (48545 observations deleted due to missingness)
## AIC: 2697
##
## Number of Fisher Scoring iterations: 6
fit_comb %>%
margins::margins() %>%
summary()
## factor AME SE z p lower upper
## DD -0.0213 0.0029 -7.3680 0.0000 -0.0270 -0.0156
## Z -0.0172 0.0052 -3.2998 0.0010 -0.0274 -0.0070
The
margins::
syntax allows us to call a function without loading the whole library. There is a conflict in thepredict
functions of ROCR and margins, so this is safer.
## Combined Z DD
## 0.7456504 0.7270046 0.7185990
# Calculating a 253 day rolling standard deviation in R
crsp <- crsp %>%
group_by(gvkey) %>%
mutate(ret.sd = rollapply(data=ret, width=253, FUN=sd, align="right", fill=NA, na.rm=T)) %>%
ungroup()