Main question
Can we use bankruptcy models to predict supplier bankruptcies?
But first:
Does the Altman Z-score [still] pick up bankruptcy?
Based on this article, why do we care about bankruptcy risk for other firms?
\[ Z = 1.2 x_1 + 1.4 x_2 + 3.3 x_3 + 0.6 x_4 + 0.999 x_5 \]
This looks like a linear regression without a constant
More about this, from Altman himself in 2000: rmc.link/420class5-2
How would these assumptions stand today?
Can we use bankruptcy models to predict supplier bankruptcies?
But first:
Does the Altman Z-score [still] pick up bankruptcy?
Is this a forecasting or forensics question?
dlsrn == 2
, then the firm went bankruptdldte
# initial cleaning
# 100338 is an outlier in the bonds distribution
df <- df %>% filter(at >= 1, revt >= 1, gvkey != 100338)
## Merge in stock value
df$date <- as.Date(df$datadate)
df_mve <- df_mve %>%
mutate(date = as.Date(datadate),
mve = csho * prcc_f) %>%
rename(gvkey=GVKEY)
df <- left_join(df, df_mve[,c("gvkey","date","mve")])
## Joining, by = c("gvkey", "date")
df <- df %>%
group_by(gvkey) %>%
arrange(datadate) %>%
mutate(bankrupt = ifelse(row_number() == n() & dlrsn == 2 &
!is.na(dlrsn), 1, 0),
bankrupt_lead = lead(bankrupt)) %>%
ungroup() %>%
filter(!is.na(bankrupt_lead)) %>%
mutate(bankrupt_lead = factor(bankrupt_lead, levels=c(0,1)))
n()
gives the number of rows in the group# Calculate the measures needed
df <- df %>%
mutate(wcap_at = wcap / at, # x1
re_at = re / at, # x2
ebit_at = ebit / at, # x3
mve_lt = mve / lt, # x4
revt_at = revt / at) # x5
# cleanup
df <- df %>%
mutate_if(is.numeric, list(~replace(., !is.finite(.), NA)))
# Calculate the score
df <- df %>%
mutate(Z = 1.2 * wcap_at + 1.4 * re_at + 3.3 * ebit_at + 0.6 * mve_lt +
0.999 * revt_at)
# Calculate date info for merging
df$date <- as.Date(df$datadate)
df$year <- year(df$date)
df$month <- month(df$date)
We’ll check our Z-score against credit rating as a simple validation
# df_ratings has ratings data in it
# Ratings, in order from worst to best
ratings <- c("D", "C", "CC", "CCC-", "CCC","CCC+", "B-", "B", "B+", "BB-",
"BB", "BB+", "BBB-", "BBB", "BBB+", "A-", "A", "A+", "AA-", "AA",
"AA+", "AAA-", "AAA", "AAA+")
# Convert string ratings (splticrm) to numeric ratings
df_ratings$rating <- factor(df_ratings$splticrm, levels=ratings, ordered=T)
df_ratings$date <- as.Date(df_ratings$datadate)
df_ratings$year <- year(df_ratings$date)
df_ratings$month <- month(df_ratings$date)
# Merge together data
df <- left_join(df, df_ratings[,c("gvkey", "year", "month", "rating")])
## Joining, by = c("gvkey", "year", "month")
df %>%
filter(!is.na(Z),
!is.na(bankrupt)) %>%
group_by(bankrupt_lead) %>%
mutate(mean_Z=mean(Z,na.rm=T)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt_lead, mean_Z) %>%
html_df()
bankrupt_lead | mean_Z |
---|---|
0 | 3.993796 |
1 | 1.739039 |
df %>%
filter(!is.na(Z),
!is.na(bankrupt_lead),
year >= 2000) %>%
group_by(bankrupt_lead) %>%
mutate(mean_Z=mean(Z,na.rm=T)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt_lead, mean_Z) %>%
html_df()
bankrupt_lead | mean_Z |
---|---|
0 | 3.897392 |
1 | 1.670656 |
##
## Call:
## glm(formula = bankrupt_lead ~ Z, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3959 -0.0705 -0.0685 -0.0658 3.7421
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.87769 0.11741 -50.060 < 2e-16 ***
## Z -0.05494 0.01235 -4.449 8.61e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1101.0 on 33372 degrees of freedom
## Residual deviance: 1088.8 on 33371 degrees of freedom
## (14245 observations deleted due to missingness)
## AIC: 1092.8
##
## Number of Fisher Scoring iterations: 9
Examples:
Correct 92.6% of the time using Z < 1 as a cutoff
Correct 99.8% of the time if we say firms never go bankrupt…
## Z < 1 Z >= 1
## No bankruptcy 2654 30641
## Bankruptcy 29 49
This type of chart (filled in) is called a Confusion matrix
We say that the company will go bankrupt, but they don’t
We say that the company will not go bankrupt, yet they do
Calculating any of these require the following
predict(. , type="response")
0
, anything above it is 1
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 roc_auc binary 0.750
AUC is the probability that our model assigns a higher estimated probability to a randomly selected 1 than to a randomly selected 0.
\[ DD = \frac{\log(V_A / D) + (r-\frac{1}{2}\sigma_A^2)(T-t)}{\sigma_A \sqrt(T-t)} \]
# df_stock is an already prepped csv from CRSP data
df_stock$date <- as.Date(df_stock$date)
df <- left_join(df, df_stock[,c("gvkey", "date", "ret", "ret.sd")])
## Joining, by = c("gvkey", "date")
df_rf$date <- as.Date(df_rf$dateff)
df_rf$year <- year(df_rf$date)
df_rf$month <- month(df_rf$date)
df <- left_join(df, df_rf[,c("year", "month", "rf")])
## Joining, by = c("year", "month")
df <- df %>%
mutate(DD = (log(mve / lt) + (rf - (ret.sd*sqrt(253))^2 / 2)) /
(ret.sd*sqrt(253)))
# Clean the measure
df <- df %>%
mutate_if(is.numeric, list(~replace(., !is.finite(.), NA)))
ret.sd
is daily return standard deviation
df %>%
filter(!is.na(DD),
!is.na(bankrupt_lead)) %>%
group_by(bankrupt_lead) %>%
mutate(mean_DD=mean(DD, na.rm=T),
prob_default =
pnorm(-1 * mean_DD)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt_lead, mean_DD,
prob_default) %>%
html_df()
bankrupt_lead | mean_DD | prob_default |
---|---|---|
0 | 0.6427281 | 0.2602003 |
1 | -3.1423863 | 0.9991621 |
df %>%
filter(!is.na(DD),
!is.na(bankrupt_lead),
year >= 2000) %>%
group_by(bankrupt_lead) %>%
mutate(mean_DD=mean(DD, na.rm=T),
prob_default =
pnorm(-1 * mean_DD)) %>%
slice(1) %>%
ungroup() %>%
select(bankrupt_lead, mean_DD,
prob_default) %>%
html_df()
bankrupt_lead | mean_DD | prob_default |
---|---|---|
0 | 0.8878013 | 0.1873238 |
1 | -4.4289487 | 0.9999953 |
##
## Call:
## glm(formula = bankrupt_lead ~ DD, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.6531 -0.0730 -0.0596 -0.0451 3.7497
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.27408 0.16653 -37.676 < 2e-16 ***
## DD -0.29783 0.03877 -7.682 1.57e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 665.03 on 20455 degrees of freedom
## Residual deviance: 608.65 on 20454 degrees of freedom
## (31380 observations deleted due to missingness)
## AIC: 612.65
##
## Number of Fisher Scoring iterations: 9
df_DD <- df %>% filter(!is.na(Z), !is.na(bankrupt_lead))
df_DD$pred <- predict(fit_DD, df_DD, type="response")
df_DD %>% roc_curve(truth=bankrupt_lead, estimate=pred, event_level='second') %>%
autoplot()
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 roc_auc binary 0.843
#AUC
auc_DD <- df_DD %>% roc_auc(truth=bankrupt_lead, estimate=pred, event_level='second')
AUCs <- c(auc_Z$.estimate, auc_DD$.estimate)
names(AUCs) <- c("Z", "DD")
AUCs
## Z DD
## 0.7498970 0.8434707
Distance to Default performs quite a bit better here!
Why?
# calculate downgrade
df <- df %>%
group_by(gvkey) %>%
arrange(date) %>%
mutate(downgrade = factor(ifelse(lead(rating) < rating,1,0), levels=c(0,1)),
diff_Z = Z - lag(Z),
diff_DD = DD - lag(DD)) %>%
ungroup()
# training sample
train <- df %>% filter(year < 2014, !is.na(diff_Z), !is.na(diff_DD), !is.na(downgrade),
year > 1985)
test <- df %>% filter(year >= 2014, !is.na(diff_Z), !is.na(diff_DD), !is.na(downgrade))
# glms
fit_Z2 <- glm(downgrade ~ diff_Z, data=train, family=binomial)
fit_DD2 <- glm(downgrade ~ diff_DD, data=train, family=binomial)
##
## Call:
## glm(formula = downgrade ~ diff_Z, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9418 -0.4313 -0.4311 -0.4254 2.6569
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.32925 0.06246 -37.294 <2e-16 ***
## diff_Z -0.09426 0.04860 -1.939 0.0525 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1913.6 on 3177 degrees of freedom
## Residual deviance: 1908.7 on 3176 degrees of freedom
## AIC: 1912.7
##
## Number of Fisher Scoring iterations: 5
##
## Call:
## glm(formula = downgrade ~ diff_DD, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5614 -0.4240 -0.4230 -0.3754 2.7957
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.36904 0.06452 -36.719 < 2e-16 ***
## diff_DD -0.25536 0.03883 -6.576 4.82e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1913.6 on 3177 degrees of freedom
## Residual deviance: 1871.4 on 3176 degrees of freedom
## AIC: 1875.4
##
## Number of Fisher Scoring iterations: 5
## Z DD
## 0.6672852 0.6440596
## Z DD
## 0.6464 0.5904
What other data could we use to predict corporate bankruptcy as it relates to a company’s supply chain?
##
## Call:
## glm(formula = downgrade ~ diff_Z + diff_DD, family = binomial,
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1511 -0.4244 -0.4230 -0.3739 2.8181
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.36899 0.06457 -36.689 < 2e-16 ***
## diff_Z 0.02886 0.04289 0.673 0.501
## diff_DD -0.26787 0.04306 -6.220 4.97e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1913.6 on 3177 degrees of freedom
## Residual deviance: 1871.0 on 3175 degrees of freedom
## AIC: 1877
##
## Number of Fisher Scoring iterations: 5
## factor AME SE z p lower upper
## diff_DD -0.0214 0.0035 -6.1316 0.0000 -0.0283 -0.0146
## diff_Z 0.0023 0.0034 0.6726 0.5012 -0.0044 0.0090
## Z DD Combined
## 0.6464000 0.5904000 0.5959385