Note that the directories used to store data are likely different on your computer, and such references will need to be changed before using any such code.
library(knitr)library(kableExtra)#' Pretty formatted HTML tables#' #' @param df The dataframe to format as a table#' @param names Names to use for columns if not using the df column names. Vector#' @param highlight_cols Columns to highlight. Default is c(1) for highlighting the first row. Use c() to remove.#' @param highlight_rows Rows to highlight. Default is c() to not highlight any rows.#' @param color_rows Rows to highlight in color. Default is c() to not highlight any rows.#' @param highlight_end_rows Rows to highlight. Uses a reverse indexing where 1 is the last row, 2 is second to last, etc.#' @param full Use to span the full width of a slide. FALSE by default.#' @param fixed_header Whether the first row should be treated as a header row. TRUE by default.html_df<-function(df, names=NULL, highlight_cols=c(1), highlight_rows=c(), color_rows=c(), highlight_end_rows=c(), full=F, fixed_header=T){if(!length(names)){names=colnames(df)}kbl(df,"html", col.names =names, align =c("l",rep('c',length(cols)-1)))%>%kable_paper(c("striped","hover"), full_width=full, fixed_thead=F, html_font ="\"Source Sans Pro\", Helvetica, sans-serif")%>%{if(length(highlight_cols))column_spec(., highlight_cols,bold=T, background ="#ffffff99")else.}%>%{if(length(highlight_rows))row_spec(., highlight_rows,bold=T, background ="#ffffff99")else.}%>%{if(length(color_rows))row_spec(., color_rows,bold=T, background ="#aaaaff99")else.}%>%{if(length(highlight_end_rows))row_spec(., nrow(df)+1-highlight_end_rows,bold=T, background ="#ffffff99")else.}}library(tidyverse)library(broom)
# Import Walmart data from Session_Projectlibrary(lubridate)preprocess_data<-function(df){# Merge the data together (Pulled from outside of function -- "scoping")df<-df%>%inner_join(weekly.stores)%>%inner_join(weekly.features[,1:11])# Compress the weird markdown information to 1 variabledf<-df%>%mutate(markdown =0, markdown =ifelse(!is.na(MarkDown1),MarkDown1,markdown), markdown =ifelse(!is.na(MarkDown1),MarkDown2,markdown), markdown =ifelse(!is.na(MarkDown1),MarkDown3,markdown), markdown =ifelse(!is.na(MarkDown1),MarkDown4,markdown), markdown =ifelse(!is.na(MarkDown1),MarkDown5,markdown))# Fix dates and add useful time variablesdf<-df%>%mutate(date =as.Date(Date), week =week(date), year =year(date), month =month(date))df}weekly<-read_csv("../../Data/WMT_train.csv")weekly.features<-read_csv("../../Data/WMT_features.csv")weekly.stores<-read_csv("../../Data/WMT_stores.csv")df<-preprocess_data(weekly)df<-df%>%group_by(Store, Dept)%>%mutate(store_avg=mean(Weekly_Sales, rm.na=T))%>%ungroup()
# Create the binary variable from Walmart sales datadf<-df%>%mutate(double =ifelse(Weekly_Sales>store_avg*2,1,0))fit<-glm(double~IsHoliday, data=df, family=binomial)tidy(fit)
percent<-function(x){paste0(round(x*100,2),"%")}library(rlang)# Another use for a quosure :)# You could always just use quoted arguments here thoughextract_margin<-function(m, x){x<-enquo(x)percent(filter(summary(m), factor==quo_text(x))$AME)}
Rows: 71460 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): left_type, arrive_type, status
dbl (11): imo, run, lat, lon, last_update, left_time, left_port, arrive_time...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 2512 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): flag, shipname, url, photo, photos link, recognized_next_port, rep...
dbl (4): imo, lat_of_latest_position, lon_of_latest_position, year_of_build
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 1117 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (3): port, lat, lon
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 33419 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (11): date, time, constrnt, wkng, rpd, scene, est_rmw, mthd, sat, commen...
dbl (13): intensity_ci, intensity_mslp, intensity_vmax, tno_fnl, tno_adj, tn...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# rename lat and lon to avoid conflictsdf_ports<-df_ports%>%rename(port_lat=lat, port_lon=lon)# Join in ships and portsdf<-df%>%left_join(df_ships)%>%filter(!imo==9528029|reported_destination=="AU ADL")%>%left_join(df_ports, by=c("left_port"="port"))%>%rename(left_lat=port_lat, left_lon=port_lon)%>%left_join(df_ports, by=c("arrive_port"="port"))%>%rename(arrive_lat=port_lat, arrive_lon=port_lon)
Joining with `by = join_by(imo)`
Warning in left_join(., df_ships): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 847 of `x` matches multiple rows in `y`.
ℹ Row 84 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
# fix datesdf<-df%>%mutate(last_update =as.POSIXct(last_update, tz="UTC",origin="1970-01-01"), left_time =as.POSIXct(left_time, tz="UTC",origin="1970-01-01"), arrive_time =as.POSIXct(arrive_time, tz="UTC",origin="1970-01-01"))# Fix typhoon datestyphoon$date<-as.POSIXct(paste(typhoon$date,typhoon$time), format="%Y%b%d %H%M%S", tz="UTC", origin="1970-01-01")# Fix typhoon longitude -- they have lon * -1 in the datatyphoon$lon<-typhoon$lon*-1typhoon_all<-typhoon# filter to dates in sampletyphoon<-typhoon%>%filter(date>as.POSIXct("2018-08-30", tz="UTC"))df<-df%>%rename(frame=run)df_all<-df[df$frame!=32,]df<-df%>%filter(last_update>as.POSIXct("2018-08-30", tz="UTC"))library(plotly)# needed for the toRGB() function
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
library(plotly)# for plottinglibrary(RColorBrewer)# for colors# plot with boats, ports, and typhoons# Note: geo is defined in the appendix -- it controls layoutpalette=brewer.pal(8, "Dark2")[c(1,8,3,2)]p<-plot_geo(colors=palette)%>%add_markers(data=df_ports, x =~port_lon, y =~port_lat, color ="Port")%>%add_markers(data=df_Aug31, x =~lon, y =~lat, color =~ship_type, text=~paste('Ship name',shipname))%>%add_markers(data=typhoon_Aug31, x =~lon, y =~lat, color="TYPHOON", text=~paste("Name", typhoon_name))%>%layout(showlegend =TRUE, geo =geo, title ='Singaporean owned container and tanker ships, August 31, 2018')p
# plot with boats and typhoonspalette=brewer.pal(8, "Dark2")[c(1,3,2)]p<-plot_geo(colors=palette)%>%add_markers(data=df_all[df_all$frame==14,], x =~lon, y =~lat, color =~ship_type, text=~paste('Ship name',shipname))%>%add_markers(data=typhoon_Jebi, x =~lon, y =~lat, color="Typhoon Jebi", text=~paste("Name", typhoon_name, "</br>Time: ", date))%>%layout( showlegend =TRUE, geo =geo, title ='Singaporean container/tanker ships, September 4, 2018, evening')p
library(leaflet)library(leaflet.extras)# typhoon iconsty_icons<-pulseIcons(color='red', # supports hex colors like "#C0C0C0" heartbeat =1.2, #heartbeat is frequency of pulsing iconSize=3)# ship iconsshipicons<-iconList( ship =makeIcon("../Figures/ship.png", NULL, 18, 18))leaflet()%>%addTiles()%>%setView(lng =136, lat =20, zoom=3)%>%addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax<=150/1.852,], lng=~lon, lat=~lat, # Not super typhoon stroke =TRUE, radius=2, color="orange", label=~date)%>%addPulseMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax>150/1.852,], lng=~lon, lat=~lat, # Super typhoon icon=ty_icons, label=~date)%>%addMarkers(data=df_all[df_all$frame==14,], lng=~lon, lat=~lat, label=~shipname, icon=shipicons)
# Load datadf3<-read.csv("../../Data/Shipping/combined.csv.gz")# Determine the main DV: delayeddf3<-df3%>%arrange(imo, last_update)%>%group_by(imo)%>%mutate(delayed=ifelse(difftime(arrive_time, lead(arrive_time), units="days")>1/8&arrive_time>last_update&left_port==lead(left_port),1,0))%>%ungroup()
Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
# sf can't handle NA values, so we need to filter them outdf3<-df3%>%mutate(n =row_number())df_ty<-df3%>%filter(!is.na(ty_lat), !is.na(ty_lon))# Create lists of points on Earthx<-st_as_sf(df_ty%>%select("lon", "lat"), coords =c("lon", "lat"), crs =4326, agr ="constant")y<-st_as_sf(df_ty%>%select("ty_lon", "ty_lat"), coords =c("ty_lon", "ty_lat"), crs =4326, agr ="constant")# Find the distance between the pointsdf_ty$dist_typhoon<-st_distance(x, y, by_element=T)/1000# distance in km# Add the distance back to our main data# Replaced NAs with a placeholder longer than any possible distancedf3<-df3%>%left_join(df_ty%>%select(n, dist_typhoon))%>%mutate(dist_typhoon =ifelse(is.na(dist_typhoon), 20036, dist_typhoon))%>%select(-n)
factor AME SE z p lower upper
typhoon_1000 0.0053 0.0032 1.6435 0.1003 -0.0010 0.0115
typhoon_2000 0.0040 0.0018 2.2006 0.0278 0.0004 0.0075
typhoon_500 0.0037 0.0042 0.8811 0.3782 -0.0045 0.0118
# Cut makes a categorical variable out of a numerical variable using specified binsdf3$Super<-ifelse(df3$intensity_vmax*1.852>185, 1, 0)df3$Moderate<-ifelse(df3$intensity_vmax*1.852>=88&df3$intensity_vmax*1.852<185, 1, 0)df3$Weak<-ifelse(df3$intensity_vmax*1.852>=41&df3$intensity_vmax*1.852<88, 1, 0)df3$HK_intensity<-cut(df3$intensity_vmax*1.852 ,c(-1,41, 62, 87, 117, 149, 999))table(df3$HK_intensity)
margins(fit2, at =list(Weak =c(1)), variables =c("typhoon_500", "typhoon_1000", "typhoon_2000"))%>%summary()%>%html_df()
factor
Weak
AME
SE
z
p
lower
upper
typhoon_1000
1
0.0073448
0.0053623
1.3697146
0.1707760
-0.0031651
0.0178547
typhoon_2000
1
0.0063950
0.0031242
2.0469359
0.0406644
0.0002717
0.0125182
typhoon_500
1
-0.0457062
0.7044878
-0.0648786
0.9482707
-1.4264768
1.3350645
margins(fit2, at =list(Moderate =c(1)), variables =c("typhoon_500", "typhoon_1000", "typhoon_2000"))%>%summary()%>%html_df()
factor
Moderate
AME
SE
z
p
lower
upper
typhoon_1000
1
0.0059123
0.0078242
0.7556384
0.4498660
-0.0094229
0.0212474
typhoon_2000
1
0.0043822
0.0039442
1.1110625
0.2665414
-0.0033482
0.0121126
typhoon_500
1
-0.0311833
0.6856382
-0.0454807
0.9637241
-1.3750095
1.3126429
margins(fit2, at =list(Super =c(1)), variables =c("typhoon_500", "typhoon_1000", "typhoon_2000"))%>%summary()%>%html_df()
factor
Super
AME
SE
z
p
lower
upper
typhoon_1000
1
0.0032538
0.0111428
0.2920086
0.7702801
-0.0185858
0.0250934
typhoon_2000
1
0.0102396
0.0041602
2.4613510
0.0138415
0.0020858
0.0183934
typhoon_500
1
-0.2242811
3.1633421
-0.0709000
0.9434773
-6.4243176
5.9757554
Source Code
---title: "Code for Session 4"author: "Dr. Richard M. Crowley"format: html: code-tools: true code-link: true embed-resources: true---Note that the directories used to store data are likely different on your computer, and such references will need to be changed before using any such code.```{r helpers, warning=FALSE, message=F}library(knitr)library(kableExtra)#' Pretty formatted HTML tables#' #' @param df The dataframe to format as a table#' @param names Names to use for columns if not using the df column names. Vector#' @param highlight_cols Columns to highlight. Default is c(1) for highlighting the first row. Use c() to remove.#' @param highlight_rows Rows to highlight. Default is c() to not highlight any rows.#' @param color_rows Rows to highlight in color. Default is c() to not highlight any rows.#' @param highlight_end_rows Rows to highlight. Uses a reverse indexing where 1 is the last row, 2 is second to last, etc.#' @param full Use to span the full width of a slide. FALSE by default.#' @param fixed_header Whether the first row should be treated as a header row. TRUE by default.html_df <-function(df, names=NULL, highlight_cols=c(1), highlight_rows=c(), color_rows=c(), highlight_end_rows=c(), full=F, fixed_header=T) {if(!length(names)) { names=colnames(df) }kbl(df,"html", col.names = names, align =c("l",rep('c',length(cols)-1))) %>%kable_paper(c("striped","hover"), full_width=full, fixed_thead=F, html_font ="\"Source Sans Pro\", Helvetica, sans-serif") %>% {if(length(highlight_cols)) column_spec(., highlight_cols,bold=T, background ="#ffffff99") else .} %>% {if(length(highlight_rows)) row_spec(., highlight_rows,bold=T, background ="#ffffff99") else .} %>% {if(length(color_rows)) row_spec(., color_rows,bold=T, background ="#aaaaff99") else .} %>% {if(length(highlight_end_rows)) row_spec(., nrow(df) +1- highlight_end_rows,bold=T, background ="#ffffff99") else .}}library(tidyverse)library(broom)``````{r}#| include: falseoptions(downlit.attached =c("knitr", "kableExtra", "tidyverse"))``````{r, message=F, warning=F}# Import Walmart data from Session_Projectlibrary(lubridate)preprocess_data <-function(df) {# Merge the data together (Pulled from outside of function -- "scoping") df <- df %>%inner_join(weekly.stores) %>%inner_join(weekly.features[,1:11])# Compress the weird markdown information to 1 variable df <- df %>%mutate(markdown =0,markdown =ifelse(!is.na(MarkDown1),MarkDown1,markdown),markdown =ifelse(!is.na(MarkDown1),MarkDown2,markdown),markdown =ifelse(!is.na(MarkDown1),MarkDown3,markdown),markdown =ifelse(!is.na(MarkDown1),MarkDown4,markdown),markdown =ifelse(!is.na(MarkDown1),MarkDown5,markdown))# Fix dates and add useful time variables df <- df %>%mutate(date =as.Date(Date),week =week(date),year =year(date),month =month(date)) df}weekly <-read_csv("../../Data/WMT_train.csv")weekly.features <-read_csv("../../Data/WMT_features.csv")weekly.stores <-read_csv("../../Data/WMT_stores.csv")df <-preprocess_data(weekly)df <- df %>%group_by(Store, Dept) %>%mutate(store_avg=mean(Weekly_Sales, rm.na=T)) %>%ungroup()``````{r}# Create the binary variable from Walmart sales datadf <- df %>%mutate(double =ifelse(Weekly_Sales > store_avg*2,1,0))fit <-glm(double ~ IsHoliday, data=df, family=binomial)tidy(fit)``````{r}# Automating the above:exp(coef(fit))``````{r}test_data <-data.frame(IsHoliday =c(F,T))predict(fit, test_data) # log odds``````{r}# probabilitiespredict(fit, test_data, type="response")``````{r, echo=T}a <-exp(-3.446804)b <-exp(-2.908164)c(a/(1+a), b/(1+b))``````{r}fit2 <-glm(double ~ IsHoliday + Temperature + Fuel_Price, data=df, family=binomial)summary(fit2)``````{r}# Oddsexp(coef(fit2))``````{r}# Typical September dayshday_sep <-mean(predict(fit2, filter(df, IsHoliday, month==9), type="response"))no_hday_sep <-mean(predict(fit2, filter(df, !IsHoliday, month==9), type="response"))# Typical December dayshday_dec <-mean(predict(fit2, filter(df, IsHoliday, month==12), type="response"))no_hday_dec <-mean(predict(fit2, filter(df, !IsHoliday, month==12), type="response"))html_df(data.frame(Month=c(9,9,12,12), IsHoliday=c(FALSE,TRUE,FALSE,TRUE),Probability=c(no_hday_sep, hday_sep, no_hday_dec, hday_dec)))``````{r, message=F}percent <-function(x) {paste0(round(x*100,2),"%")}library(rlang)# Another use for a quosure :)# You could always just use quoted arguments here thoughextract_margin <-function(m, x) { x <-enquo(x)percent(filter(summary(m), factor==quo_text(x))$AME)}``````{r, message=F}# Calculate AME marginal effectslibrary(margins)m <-margins(fit2)m``````{r}summary(m) %>%html_df()``````{r, fig.height=4, fig.width=6}plot(m, which=summary(m)$factor)``````{r}margins(fit2, at =list(IsHoliday =c(TRUE, FALSE)),variables =c("Temperature", "Fuel_Price")) %>%summary() %>%html_df()``````{r}margins(fit2, at =list(Temperature =c(0, 25, 50, 75, 100)),variables =c("IsHoliday")) %>%summary() %>%html_df()``````{r}# load datadf <-read_csv("../../Data/Shipping/shipping_dataset.csv", na="NA")df_ships <-read_csv("../../Data/Shipping/index.csv", na="NA")df_ports <-read_csv("../../Data/Shipping/port_dataset.csv", na="NA")typhoon <-read_csv("../../Data/Shipping/typhoon.csv")# rename lat and lon to avoid conflictsdf_ports <- df_ports %>%rename(port_lat=lat, port_lon=lon)# Join in ships and portsdf <- df %>%left_join(df_ships) %>%filter(!imo ==9528029| reported_destination =="AU ADL") %>%left_join(df_ports, by=c("left_port"="port")) %>%rename(left_lat=port_lat, left_lon=port_lon) %>%left_join(df_ports, by=c("arrive_port"="port")) %>%rename(arrive_lat=port_lat, arrive_lon=port_lon)# fix datesdf <- df %>%mutate(last_update =as.POSIXct(last_update, tz="UTC",origin="1970-01-01"),left_time =as.POSIXct(left_time, tz="UTC",origin="1970-01-01"),arrive_time =as.POSIXct(arrive_time, tz="UTC",origin="1970-01-01"))# Fix typhoon datestyphoon$date <-as.POSIXct(paste(typhoon$date,typhoon$time), format="%Y%b%d %H%M%S", tz="UTC", origin="1970-01-01")# Fix typhoon longitude -- they have lon * -1 in the datatyphoon$lon <- typhoon$lon *-1typhoon_all <- typhoon# filter to dates in sampletyphoon <- typhoon %>%filter(date >as.POSIXct("2018-08-30", tz="UTC"))df <- df %>%rename(frame=run)df_all <- df[df$frame !=32,]df <- df %>%filter(last_update >as.POSIXct("2018-08-30", tz="UTC"))library(plotly) # needed for the toRGB() functiongeo <-list(showland =TRUE,showlakes =TRUE,showcountries =TRUE,showocean =TRUE,countrywidth =0.5,landcolor =toRGB("grey90"),lakecolor =toRGB("aliceblue"),oceancolor =toRGB("aliceblue"),projection =list(type ='orthographic', # detailed at https://plot.ly/r/reference/#layout-geo-projectionrotation =list(lon =100,lat =1,roll =0 ) ),lonaxis =list(showgrid =TRUE,gridcolor =toRGB("gray40"),gridwidth =0.5 ),lataxis =list(showgrid =TRUE,gridcolor =toRGB("gray40"),gridwidth =0.5 ))typhoon_Aug31 <- typhoon[typhoon$date >as.POSIXct("2018-08-31 00:00:00", tz="UTC") & typhoon$date <as.POSIXct("2018-09-01 00:00:00", tz="UTC"),]typhoon_Jebi <- typhoon_all[typhoon_all$typhoon_name =="JEBI_Y",]df_Aug31 <- df[df$frame ==1,]``````{r, warning=F, message=F, fig.height=6, fig.width=12}library(plotly) # for plottinglibrary(RColorBrewer) # for colors# plot with boats, ports, and typhoons# Note: geo is defined in the appendix -- it controls layoutpalette =brewer.pal(8, "Dark2")[c(1,8,3,2)]p <-plot_geo(colors=palette) %>%add_markers(data=df_ports, x =~port_lon, y =~port_lat, color ="Port") %>%add_markers(data=df_Aug31, x =~lon, y =~lat, color =~ship_type,text=~paste('Ship name',shipname)) %>%add_markers(data=typhoon_Aug31, x =~lon, y =~lat, color="TYPHOON",text=~paste("Name", typhoon_name)) %>%layout(showlegend =TRUE, geo = geo,title ='Singaporean owned container and tanker ships, August 31, 2018')p``````{r, warning=F, message=F, fig.height=6, fig.width=10}# plot with boats and typhoonspalette =brewer.pal(8, "Dark2")[c(1,3,2)]p <-plot_geo(colors=palette) %>%add_markers(data=df_all[df_all$frame ==14,], x =~lon, y =~lat, color =~ship_type,text=~paste('Ship name',shipname)) %>%add_markers(data=typhoon_Jebi, x =~lon, y =~lat, color="Typhoon Jebi", text=~paste("Name", typhoon_name, "</br>Time: ", date)) %>%layout(showlegend =TRUE, geo = geo,title ='Singaporean container/tanker ships, September 4, 2018, evening')p``````{r, warning=F, message=F, fig.height=5, fig.width=9}library(leaflet)library(leaflet.extras)# typhoon iconsty_icons <-pulseIcons(color='red', # supports hex colors like "#C0C0C0"heartbeat =1.2, #heartbeat is frequency of pulsingiconSize=3)# ship iconsshipicons <-iconList(ship =makeIcon("../Figures/ship.png", NULL, 18, 18))leaflet() %>%addTiles() %>%setView(lng =136, lat =20, zoom=3) %>%addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax <=150/1.852,], lng=~lon, lat=~lat, # Not super typhoonstroke =TRUE, radius=2, color="orange", label=~date) %>%addPulseMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax >150/1.852,], lng=~lon, lat=~lat, # Super typhoonicon=ty_icons, label=~date) %>%addMarkers(data=df_all[df_all$frame ==14,], lng=~lon, lat=~lat,label=~shipname, icon=shipicons)``````{r}# Load datadf3 <-read.csv("../../Data/Shipping/combined.csv.gz")# Determine the main DV: delayeddf3 <- df3 %>%arrange(imo, last_update) %>%group_by(imo) %>%mutate(delayed=ifelse(difftime(arrive_time, lead(arrive_time), units="days") >1/8& arrive_time > last_update & left_port ==lead(left_port),1,0)) %>%ungroup()``````{r}library(sf)# sf can't handle NA values, so we need to filter them outdf3 <- df3 %>%mutate(n =row_number())df_ty <- df3 %>%filter(!is.na(ty_lat), !is.na(ty_lon))# Create lists of points on Earthx <-st_as_sf(df_ty %>%select("lon", "lat"), coords =c("lon", "lat"), crs =4326, agr ="constant")y <-st_as_sf(df_ty %>%select("ty_lon", "ty_lat"), coords =c("ty_lon", "ty_lat"), crs =4326, agr ="constant")# Find the distance between the pointsdf_ty$dist_typhoon <-st_distance(x, y, by_element=T) /1000# distance in km# Add the distance back to our main data# Replaced NAs with a placeholder longer than any possible distancedf3 <- df3 %>%left_join(df_ty %>%select(n, dist_typhoon)) %>%mutate(dist_typhoon =ifelse(is.na(dist_typhoon), 20036, dist_typhoon)) %>%select(-n)``````{r}df3$typhoon_500 =ifelse(df3$dist_typhoon <500& df3$dist_typhoon >=0, 1, 0)df3$typhoon_1000 =ifelse(df3$dist_typhoon <1000& df3$dist_typhoon >=500, 1, 0)df3$typhoon_2000 =ifelse(df3$dist_typhoon <2000& df3$dist_typhoon >=1000, 1, 0)``````{r}fit1 <-glm(delayed ~ typhoon_500 + typhoon_1000 + typhoon_2000, data=df3,family=binomial)summary(fit1)``````{r}odds1 <-exp(coef(fit1))odds1``````{r}m1 <-margins(fit1)summary(m1)``````{r}# Cut makes a categorical variable out of a numerical variable using specified binsdf3$Super <-ifelse(df3$intensity_vmax *1.852>185, 1, 0)df3$Moderate <-ifelse(df3$intensity_vmax *1.852>=88& df3$intensity_vmax *1.852<185, 1, 0)df3$Weak <-ifelse(df3$intensity_vmax *1.852>=41& df3$intensity_vmax *1.852<88, 1, 0)df3$HK_intensity <-cut(df3$intensity_vmax *1.852 ,c(-1,41, 62, 87, 117, 149, 999))table(df3$HK_intensity)``````{r}fit2 <-glm(delayed ~ (typhoon_500 + typhoon_1000 + typhoon_2000) : (Weak + Moderate + Super), data=df3,family=binomial)tidy(fit2)``````{r}m2 <-margins(fit2)summary(m2) %>%html_df()``````{r}margins(fit2, at =list(Weak =c(1)),variables =c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%summary() %>%html_df()``````{r}margins(fit2, at =list(Moderate =c(1)),variables =c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%summary() %>%html_df()``````{r}margins(fit2, at =list(Super =c(1)),variables =c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%summary() %>%html_df()```