Note that the directories used to store data are likely different on your computer, and such references will need to be changed before using any such code.

library(knitr)
library(kableExtra)
library(tidyverse)
library(broom)
html_df <- function(text, cols=NULL, col1=FALSE, full=F) {
  if(!length(cols)) {
    cols=colnames(text)
  }
  if(!col1) {
    kable(text,"html", col.names = cols, align = c("l",rep('c',length(cols)-1))) %>%
      kable_styling(bootstrap_options = c("striped","hover"), full_width=full)
  } else {
    kable(text,"html", col.names = cols, align = c("l",rep('c',length(cols)-1))) %>%
      kable_styling(bootstrap_options = c("striped","hover"), full_width=full) %>%
      column_spec(1,bold=T)
  }
}
# Import Walmart data from Session_Project
library(lubridate)
preprocess_data <- function(df) {
  # Merge the data together (Pulled from outside of function -- "scoping")
  df <- inner_join(df, weekly.stores)
  df <- inner_join(df, weekly.features[,1:11])
  
  # Compress the weird markdown information to 1 variable
  df$markdown <- 0
  df[!is.na(df$MarkDown1),]$markdown <- df[!is.na(df$MarkDown1),]$MarkDown1
  df[!is.na(df$MarkDown2),]$markdown <- df[!is.na(df$MarkDown2),]$MarkDown2
  df[!is.na(df$MarkDown3),]$markdown <- df[!is.na(df$MarkDown3),]$MarkDown3
  df[!is.na(df$MarkDown4),]$markdown <- df[!is.na(df$MarkDown4),]$MarkDown4
  df[!is.na(df$MarkDown5),]$markdown <- df[!is.na(df$MarkDown5),]$MarkDown5
  
  # Fix dates and add useful time variables
  df$date <- as.Date(df$Date)
  df$week <- week(df$date)
  df$year <- year(df$date)
  df$month <- month(df$date)
  
  df
}

weekly <- read.csv("../../Data/WMT_train.csv", stringsAsFactors=FALSE)
weekly.features <- read.csv("../../Data/WMT_features.csv", stringsAsFactors=FALSE)
weekly.stores <- read.csv("../../Data/WMT_stores.csv", stringsAsFactors=FALSE)
df <- preprocess_data(weekly)
#df$id <- df$Store *10000 + df$week * 100 + df$Dept
df <- df %>%
  group_by(Store, Dept) %>% 
  mutate(store_avg=mean(Weekly_Sales, rm.na=T)) %>%
  ungroup()
# Create the binary variable from Walmart sales data
df$double <- ifelse(df$Weekly_Sales > df$store_avg*2,1,0)
fit <- glm(double ~ IsHoliday, data=df, family=binomial)
tidy(fit)
# Automating the above:
exp(coef(fit))
  (Intercept) IsHolidayTRUE 
   0.03184725    1.71367497 
c(-3.44, -2.9)
[1] -3.44 -2.90
a <- exp(-3.44)
b <- exp(-2.9)
c(a/(1+a), b/(1+b))
[1] 0.03106848 0.05215356
model2 <- glm(double ~ IsHoliday + Temperature + Fuel_Price, data=df, family=binomial)
summary(model2)

Call:
glm(formula = double ~ IsHoliday + Temperature + Fuel_Price, 
    family = binomial, data = df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.4113  -0.2738  -0.2464  -0.2213   2.8562  

Coefficients:
                Estimate Std. Error z value Pr(>|z|)    
(Intercept)   -1.7764917  0.0673246  -26.39   <2e-16 ***
IsHolidayTRUE  0.3704298  0.0284395   13.03   <2e-16 ***
Temperature   -0.0108268  0.0004698  -23.04   <2e-16 ***
Fuel_Price    -0.3091950  0.0196234  -15.76   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 120370  on 421569  degrees of freedom
Residual deviance: 119146  on 421566  degrees of freedom
AIC: 119154

Number of Fisher Scoring iterations: 6
# Odds
exp(coef(model2))
  (Intercept) IsHolidayTRUE   Temperature    Fuel_Price 
    0.1692308     1.4483570     0.9892316     0.7340376 
# Typical September days
hday_sep    <- mean(predict(model2, filter(df, IsHoliday, month==9), type="response"))
no_hday_sep <-  mean(predict(model2, filter(df, !IsHoliday, month==9), type="response"))
# Typical December days
hday_dec    <- mean(predict(model2, filter(df, IsHoliday, month==12), type="response"))
no_hday_dec <-  mean(predict(model2, filter(df, !IsHoliday, month==12), type="response"))

html_df(data.frame(Month=c(9,9,12,12),
                   IsHoliday=c(FALSE,TRUE,FALSE,TRUE),
                   Probability=c(no_hday_sep, hday_sep, no_hday_dec, hday_dec)))
Month IsHoliday Probability
9 FALSE 0.0266789
9 TRUE 0.0374761
12 FALSE 0.0398377
12 TRUE 0.0586483
percent <- function(x) {
  paste0(round(x*100,2),"%")
}
library(rlang)
# Another use for a quosure :)
# You could always just use quoted arguements here though
extract_margin <- function(m, x) {
  x <- enquo(x)
  percent(filter(summary(m), factor==quo_text(x))$AME)
}
# Calculate AME marginal effects
library(margins)
m <- margins(model2)
m
summary(m) %>%
  html_df()
factor AME SE z p lower upper
Fuel_Price -0.0096438 0.0006163 -15.64800 0 -0.0108517 -0.0084359
IsHoliday 0.0133450 0.0011754 11.35372 0 0.0110413 0.0156487
Temperature -0.0003377 0.0000149 -22.71255 0 -0.0003668 -0.0003085
plot(m, which=summary(m)$factor)

margins(model2, at = list(IsHoliday = c(TRUE, FALSE)),
        variables = c("Temperature", "Fuel_Price")) %>%
  summary() %>%
  html_df()
factor IsHoliday AME SE z p lower upper
Fuel_Price FALSE -0.0093401 0.0005989 -15.59617 0 -0.0105139 -0.0081664
Fuel_Price TRUE -0.0131335 0.0008717 -15.06650 0 -0.0148420 -0.0114250
Temperature FALSE -0.0003271 0.0000146 -22.46024 0 -0.0003556 -0.0002985
Temperature TRUE -0.0004599 0.0000210 -21.92927 0 -0.0005010 -0.0004188
margins(model2, at = list(Temperature = c(0, 20, 40, 60, 80, 100)),
        variables = c("IsHoliday")) %>%
  summary() %>%
  html_df()
factor Temperature AME SE z p lower upper
IsHoliday 0 0.0234484 0.0020168 11.62643 0 0.0194955 0.0274012
IsHoliday 20 0.0194072 0.0016710 11.61387 0 0.0161320 0.0226824
IsHoliday 40 0.0159819 0.0013885 11.51001 0 0.0132604 0.0187033
IsHoliday 60 0.0131066 0.0011592 11.30623 0 0.0108345 0.0153786
IsHoliday 80 0.0107120 0.0009732 11.00749 0 0.0088046 0.0126193
IsHoliday 100 0.0087305 0.0008213 10.62977 0 0.0071207 0.0103402

# load data
df <- read.csv("../../Data/Shipping/shipping_dataset.csv", stringsAsFactors = F, na="NA")
df_ships <- read.csv("../../Data/Shipping/index.csv", stringsAsFactors = F, na="NA")
df_ports <- read.csv("../../Data/Shipping/port_dataset.csv", stringsAsFactors = F, na="NA")
typhoon <- read.csv("../../Data/Shipping/typhoon.csv", stringsAsFactors = F)

# rename lat and lon to avoid conflicts
df_ports <- df_ports %>% rename(port_lat=lat, port_lon=lon)

# Join in ships and ports
df <- left_join(df, df_ships)
Joining, by = "imo"
df <- df %>% filter(!imo == 9528029 | reported_destination == "AU ADL")
df <- left_join(df,df_ports, by=c("left_port"="port"))
df <- df %>% rename(left_lat=port_lat, left_lon=port_lon)
df <- left_join(df,df_ports, by=c("arrive_port"="port"))
df <- df %>% rename(arrive_lat=port_lat, arrive_lon=port_lon)

# fix dates
df$last_update = as.POSIXct(df$last_update, tz="UTC",origin="1970-01-01")
df$left_time = as.POSIXct(df$left_time, tz="UTC",origin="1970-01-01")
df$arrive_time = as.POSIXct(df$arrive_time, tz="UTC",origin="1970-01-01")

# Fix typhoon dates
typhoon$date <- as.POSIXct(paste(typhoon$date,typhoon$time), format="%Y%b%d %H%M%S", tz="UTC", origin="1970-01-01")
# Fix typhoon longitude -- they have lon * -1 in the data
typhoon$lon <- typhoon$lon * -1

typhoon_all <- typhoon
# filter to dates in sample
typhoon <- typhoon %>% filter(date > as.POSIXct("2018-08-30", tz="UTC"))

df <- df %>% rename(frame=run)

df_all <- df[df$frame != 32,]
df <- df %>% filter(last_update > as.POSIXct("2018-08-30", tz="UTC"))

library(plotly)  # needed for the toRGB() function
geo <- list(
  showland = TRUE,
  showlakes = TRUE,
  showcountries = TRUE,
  showocean = TRUE,
  countrywidth = 0.5,
  landcolor = toRGB("grey90"),
  lakecolor = toRGB("aliceblue"),
  oceancolor = toRGB("aliceblue"),
  projection = list(
    type = 'orthographic',  # detailed at https://plot.ly/r/reference/#layout-geo-projection
    rotation = list(
      lon = 100,
      lat = 1,
      roll = 0
    )
  ),
  lonaxis = list(
    showgrid = TRUE,
    gridcolor = toRGB("gray40"),
    gridwidth = 0.5
  ),
  lataxis = list(
    showgrid = TRUE,
    gridcolor = toRGB("gray40"),
    gridwidth = 0.5
  )
)

typhoon_Aug31 <- typhoon[typhoon$date > as.POSIXct("2018-08-31 00:00:00", tz="UTC") & typhoon$date < as.POSIXct("2018-09-01 00:00:00", tz="UTC"),]
typhoon_Jebi <- typhoon_all[typhoon_all$typhoon_name == "JEBI_Y",]
df_Aug31 <- df[df$frame == 1,]
library(plotly)  # for plotting
library(RColorBrewer)  # for colors
# plot with boats, ports, and typhoons
# Note: geo is defined in the appendix -- it controls layout
palette = brewer.pal(8, "Dark2")[c(1,8,3,2)]
p <- plot_geo(colors=palette) %>%
  add_markers(data=df_ports, x = ~port_lon, y = ~port_lat, color = "Port") %>%
  add_markers(data=df_Aug31, x = ~lon, y = ~lat, color = ~ship_type,
              text=~paste('Ship name',shipname)) %>%
  add_markers(data=typhoon_Aug31, x = ~lon, y = ~lat, color="TYPHOON",
              text=~paste("Name", typhoon_name)) %>%
   layout(showlegend = TRUE, geo = geo,
    title = 'Singaporean owned container and tanker ships, August 31, 2018')
p
# plot with boats and typhoons
palette = brewer.pal(8, "Dark2")[c(1,3,2)]
p <- plot_geo(colors=palette) %>%
  add_markers(data=df_all[df_all$frame == 14,], x = ~lon, y = ~lat, color = ~ship_type,
              text=~paste('Ship name',shipname)) %>%
  add_markers(data=typhoon_Jebi, x = ~lon, y = ~lat, color="Typhoon Jebi", text=~paste("Name", typhoon_name, "</br>Time: ", date)) %>%
   layout(
    showlegend = TRUE, geo = geo,
    title = 'Singaporean container/tanker ships, September 4, 2018, evening')
p
library(leaflet)
library(leaflet.extras)

# typhoon icons
icons <- pulseIcons(color='red',
  heartbeat = ifelse(typhoon_Jebi$intensity_vmax > 150/1.852, 0.8,
    ifelse(typhoon$intensity_vmax < 118/1.852, 1.6, 1.2)),
  iconSize=ifelse(typhoon_Jebi$intensity_vmax > 150/1.852, 12,
    ifelse(typhoon_Jebi$intensity_vmax < 118/1.852, 3, 8)))

# ship icons
shipicons <- iconList(
  ship = makeIcon("../Figures/ship.png", NULL, 18, 18)
)
leaflet() %>%
  addTiles() %>% 
  setView(lng = 136, lat = 34, zoom=4) %>%
  addPulseMarkers(data=typhoon_Jebi[seq(1,nrow(typhoon_Jebi),5),], lng=~lon,
                  lat=~lat, label=~date, icon=icons) %>%
  addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax > 150/1.852,],
    lng=~lon, lat=~lat,stroke = TRUE, radius=12, color="red", label=~date) %>%
  addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax <= 150/1.852 &
    typhoon_Jebi$intensity_vmax > 118/1.852,], lng=~lon, lat=~lat,
    stroke = TRUE, radius=12, color="red", label=~date) %>%
  addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax <=118/1.852,],
    lng=~lon, lat=~lat, stroke = TRUE, radius=12, color="red", label=~date) %>%
  addMarkers(data=df_all[df_all$frame == 14,], lng=~lon, lat=~lat,
             label=~shipname, icon=shipicons)
# Load datas
df3 <- read.csv("../../Data/Shipping/combined.csv", stringsAsFactors = F)

library(geosphere)

# Calculate distance to and from ports
df3$dist_toport <- distVincentyEllipsoid(as.matrix(df3[,c("lon","lat")]), as.matrix(df3[,c("arrive_lon","arrive_lat")]))
df3$dist_fromport <- distVincentyEllipsoid(as.matrix(df3[,c("lon","lat")]), as.matrix(df3[,c("left_lon","left_lat")]))
df3 <- df3 %>%
  arrange(imo, last_update) %>%
  group_by(imo) %>%
  mutate(delayed=ifelse(difftime(arrive_time, lead(arrive_time), units="days") > 1/8 & arrive_time > last_update & left_port == lead(left_port),1,0)) %>%
  ungroup()
library(geosphere)
x <- as.matrix(df3[,c("lon","lat")])  # ship location
y <- as.matrix(df3[,c("ty_lon","ty_lat")]) # typhoon location

df3$dist_typhoon <- distVincentyEllipsoid(x, y) / 1000
df3$typhoon_500 = ifelse(df3$dist_typhoon < 500 & 
                         df3$dist_typhoon >= 0, 1, 0)
df3$typhoon_1000 = ifelse(df3$dist_typhoon < 1000 &
                          df3$dist_typhoon >= 500, 1, 0)
df3$typhoon_2000 = ifelse(df3$dist_typhoon < 2000 &
                          df3$dist_typhoon >= 1000, 1, 0)
fit1 <- glm(delayed ~ typhoon_500 + typhoon_1000 + typhoon_2000, data=df3,
            family=binomial)
summary(fit1)

Call:
glm(formula = delayed ~ typhoon_500 + typhoon_1000 + typhoon_2000, 
    family = binomial, data = df3)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.2502  -0.2261  -0.2261  -0.2261   2.7127  

Coefficients:
             Estimate Std. Error  z value Pr(>|z|)    
(Intercept)  -3.65377    0.02934 -124.547   <2e-16 ***
typhoon_500   0.14073    0.16311    0.863   0.3883    
typhoon_1000  0.20539    0.12575    1.633   0.1024    
typhoon_2000  0.16059    0.07106    2.260   0.0238 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 14329  on 59184  degrees of freedom
Residual deviance: 14322  on 59181  degrees of freedom
  (3866 observations deleted due to missingness)
AIC: 14330

Number of Fisher Scoring iterations: 6
odds1 <- exp(coef(fit1))
odds1
 (Intercept)  typhoon_500 typhoon_1000 typhoon_2000 
  0.02589334   1.15111673   1.22800815   1.17420736 
m1 <- margins(fit1)
summary(m1)
# Cut makes a categorical variable out of a numerical variable using specified bins
df3$Super <- ifelse(df3$intensity_vmax * 1.852 > 185, 1, 0)
df3$Moderate <- ifelse(df3$intensity_vmax * 1.852 >= 88 &
                         df3$intensity_vmax * 1.852 < 185, 1, 0)
df3$Weak <- ifelse(df3$intensity_vmax * 1.852 >= 41 & 
                   df3$intensity_vmax * 1.852 < 88, 1, 0)
df3$HK_intensity <- cut(df3$intensity_vmax * 1.852 ,c(-1,41, 62, 87, 117, 149, 999))
table(df3$HK_intensity)

  (-1,41]   (41,62]   (62,87]  (87,117] (117,149] (149,999] 
     3398     12039     12615     11527      2255     21141 
fit2 <- glm(delayed ~ (typhoon_500 + typhoon_1000 + typhoon_2000) : 
              (Weak + Moderate + Super), data=df3,
            family=binomial)
tidy(fit2)
m2 <- margins(fit2)
summary(m2) %>%
  html_df()
factor AME SE z p lower upper
Moderate 0.0007378 0.0006713 1.0990530 0.2717449 -0.0005779 0.0020535
Super -0.0050241 0.0860163 -0.0584087 0.9534231 -0.1736129 0.1635647
typhoon_1000 0.0035473 0.0036186 0.9802921 0.3269420 -0.0035450 0.0106396
typhoon_2000 0.0039224 0.0017841 2.1985908 0.0279070 0.0004257 0.0074191
typhoon_500 -0.0440484 0.6803640 -0.0647424 0.9483791 -1.3775373 1.2894405
Weak 0.0009975 0.0005154 1.9353011 0.0529534 -0.0000127 0.0020077
margins(fit2, at = list(Weak = c(1)),
        variables = c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%
  summary() %>%
  html_df()
factor Weak AME SE z p lower upper
typhoon_1000 1 0.0073057 0.0053682 1.360938 0.1735332 -0.0032157 0.0178271
typhoon_2000 1 0.0067051 0.0031225 2.147328 0.0317671 0.0005850 0.0128251
typhoon_500 1 -0.0458116 0.7052501 -0.064958 0.9482075 -1.4280764 1.3364531
margins(fit2, at = list(Moderate = c(1)),
        variables = c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%
  summary() %>%
  html_df()
factor Moderate AME SE z p lower upper
typhoon_1000 1 0.0059332 0.0078245 0.7582856 0.4482800 -0.0094025 0.0212688
typhoon_2000 1 0.0044871 0.0039453 1.1373050 0.2554108 -0.0032457 0.0122198
typhoon_500 1 -0.0311946 0.6847130 -0.0455586 0.9636620 -1.3732074 1.3108182
margins(fit2, at = list(Super = c(1)),
        variables = c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%
  summary() %>%
  html_df()
factor Super AME SE z p lower upper
typhoon_1000 1 0.0030638 0.0111295 0.2752891 0.7830941 -0.0187495 0.0248772
typhoon_2000 1 0.0102513 0.0041568 2.4661549 0.0136572 0.0021041 0.0183985
typhoon_500 1 -0.2241250 3.1608062 -0.0709076 0.9434713 -6.4191913 5.9709413
---
title: "Code for Session 4"
author: "Dr. Richard M. Crowley"
date: ""
output:
  html_notebook
---

Note that the directories used to store data are likely different on your computer, and such references will need to be changed before using any such code.

```{r helpers, warning=FALSE, message=F}
library(knitr)
library(kableExtra)
library(tidyverse)
library(broom)
html_df <- function(text, cols=NULL, col1=FALSE, full=F) {
  if(!length(cols)) {
    cols=colnames(text)
  }
  if(!col1) {
    kable(text,"html", col.names = cols, align = c("l",rep('c',length(cols)-1))) %>%
      kable_styling(bootstrap_options = c("striped","hover"), full_width=full)
  } else {
    kable(text,"html", col.names = cols, align = c("l",rep('c',length(cols)-1))) %>%
      kable_styling(bootstrap_options = c("striped","hover"), full_width=full) %>%
      column_spec(1,bold=T)
  }
}
```

```{r, message=F, warning=F}
# Import Walmart data from Session_Project
library(lubridate)
preprocess_data <- function(df) {
  # Merge the data together (Pulled from outside of function -- "scoping")
  df <- inner_join(df, weekly.stores)
  df <- inner_join(df, weekly.features[,1:11])
  
  # Compress the weird markdown information to 1 variable
  df$markdown <- 0
  df[!is.na(df$MarkDown1),]$markdown <- df[!is.na(df$MarkDown1),]$MarkDown1
  df[!is.na(df$MarkDown2),]$markdown <- df[!is.na(df$MarkDown2),]$MarkDown2
  df[!is.na(df$MarkDown3),]$markdown <- df[!is.na(df$MarkDown3),]$MarkDown3
  df[!is.na(df$MarkDown4),]$markdown <- df[!is.na(df$MarkDown4),]$MarkDown4
  df[!is.na(df$MarkDown5),]$markdown <- df[!is.na(df$MarkDown5),]$MarkDown5
  
  # Fix dates and add useful time variables
  df$date <- as.Date(df$Date)
  df$week <- week(df$date)
  df$year <- year(df$date)
  df$month <- month(df$date)
  
  df
}

weekly <- read.csv("../../Data/WMT_train.csv", stringsAsFactors=FALSE)
weekly.features <- read.csv("../../Data/WMT_features.csv", stringsAsFactors=FALSE)
weekly.stores <- read.csv("../../Data/WMT_stores.csv", stringsAsFactors=FALSE)
df <- preprocess_data(weekly)
#df$id <- df$Store *10000 + df$week * 100 + df$Dept
df <- df %>%
  group_by(Store, Dept) %>% 
  mutate(store_avg=mean(Weekly_Sales, rm.na=T)) %>%
  ungroup()
```

```{r}
# Create the binary variable from Walmart sales data
df$double <- ifelse(df$Weekly_Sales > df$store_avg*2,1,0)
fit <- glm(double ~ IsHoliday, data=df, family=binomial)
tidy(fit)
```

```{r}
# Automating the above:
exp(coef(fit))
```

```{r}
c(-3.44, -2.9)
```

```{r}
a <- exp(-3.44)
b <- exp(-2.9)
c(a/(1+a), b/(1+b))
```

```{r}
model2 <- glm(double ~ IsHoliday + Temperature + Fuel_Price, data=df, family=binomial)
summary(model2)
```

```{r}
# Odds
exp(coef(model2))
```

```{r}
# Typical September days
hday_sep    <- mean(predict(model2, filter(df, IsHoliday, month==9), type="response"))
no_hday_sep <-  mean(predict(model2, filter(df, !IsHoliday, month==9), type="response"))
# Typical December days
hday_dec    <- mean(predict(model2, filter(df, IsHoliday, month==12), type="response"))
no_hday_dec <-  mean(predict(model2, filter(df, !IsHoliday, month==12), type="response"))

html_df(data.frame(Month=c(9,9,12,12),
                   IsHoliday=c(FALSE,TRUE,FALSE,TRUE),
                   Probability=c(no_hday_sep, hday_sep, no_hday_dec, hday_dec)))
```

```{r, message=F}
percent <- function(x) {
  paste0(round(x*100,2),"%")
}
library(rlang)
# Another use for a quosure :)
# You could always just use quoted arguements here though
extract_margin <- function(m, x) {
  x <- enquo(x)
  percent(filter(summary(m), factor==quo_text(x))$AME)
}
```

```{r, message=F}
# Calculate AME marginal effects
library(margins)
m <- margins(model2)
m
```

```{r}
summary(m) %>%
  html_df()
```

```{r, fig.height=4, fig.width=6}
plot(m, which=summary(m)$factor)
```

```{r}
margins(model2, at = list(IsHoliday = c(TRUE, FALSE)),
        variables = c("Temperature", "Fuel_Price")) %>%
  summary() %>%
  html_df()
```

```{r}
margins(model2, at = list(Temperature = c(0, 20, 40, 60, 80, 100)),
        variables = c("IsHoliday")) %>%
  summary() %>%
  html_df()
```

```{r}

# load data
df <- read.csv("../../Data/Shipping/shipping_dataset.csv", stringsAsFactors = F, na="NA")
df_ships <- read.csv("../../Data/Shipping/index.csv", stringsAsFactors = F, na="NA")
df_ports <- read.csv("../../Data/Shipping/port_dataset.csv", stringsAsFactors = F, na="NA")
typhoon <- read.csv("../../Data/Shipping/typhoon.csv", stringsAsFactors = F)

# rename lat and lon to avoid conflicts
df_ports <- df_ports %>% rename(port_lat=lat, port_lon=lon)

# Join in ships and ports
df <- left_join(df, df_ships)
df <- df %>% filter(!imo == 9528029 | reported_destination == "AU ADL")
df <- left_join(df,df_ports, by=c("left_port"="port"))
df <- df %>% rename(left_lat=port_lat, left_lon=port_lon)
df <- left_join(df,df_ports, by=c("arrive_port"="port"))
df <- df %>% rename(arrive_lat=port_lat, arrive_lon=port_lon)

# fix dates
df$last_update = as.POSIXct(df$last_update, tz="UTC",origin="1970-01-01")
df$left_time = as.POSIXct(df$left_time, tz="UTC",origin="1970-01-01")
df$arrive_time = as.POSIXct(df$arrive_time, tz="UTC",origin="1970-01-01")

# Fix typhoon dates
typhoon$date <- as.POSIXct(paste(typhoon$date,typhoon$time), format="%Y%b%d %H%M%S", tz="UTC", origin="1970-01-01")
# Fix typhoon longitude -- they have lon * -1 in the data
typhoon$lon <- typhoon$lon * -1

typhoon_all <- typhoon
# filter to dates in sample
typhoon <- typhoon %>% filter(date > as.POSIXct("2018-08-30", tz="UTC"))

df <- df %>% rename(frame=run)

df_all <- df[df$frame != 32,]
df <- df %>% filter(last_update > as.POSIXct("2018-08-30", tz="UTC"))

library(plotly)  # needed for the toRGB() function
geo <- list(
  showland = TRUE,
  showlakes = TRUE,
  showcountries = TRUE,
  showocean = TRUE,
  countrywidth = 0.5,
  landcolor = toRGB("grey90"),
  lakecolor = toRGB("aliceblue"),
  oceancolor = toRGB("aliceblue"),
  projection = list(
    type = 'orthographic',  # detailed at https://plot.ly/r/reference/#layout-geo-projection
    rotation = list(
      lon = 100,
      lat = 1,
      roll = 0
    )
  ),
  lonaxis = list(
    showgrid = TRUE,
    gridcolor = toRGB("gray40"),
    gridwidth = 0.5
  ),
  lataxis = list(
    showgrid = TRUE,
    gridcolor = toRGB("gray40"),
    gridwidth = 0.5
  )
)

typhoon_Aug31 <- typhoon[typhoon$date > as.POSIXct("2018-08-31 00:00:00", tz="UTC") & typhoon$date < as.POSIXct("2018-09-01 00:00:00", tz="UTC"),]
typhoon_Jebi <- typhoon_all[typhoon_all$typhoon_name == "JEBI_Y",]
df_Aug31 <- df[df$frame == 1,]
```

```{r, warning=F, message=F, fig.height=5, fig.width=8}
library(plotly)  # for plotting
library(RColorBrewer)  # for colors
# plot with boats, ports, and typhoons
# Note: geo is defined in the appendix -- it controls layout
palette = brewer.pal(8, "Dark2")[c(1,8,3,2)]
p <- plot_geo(colors=palette) %>%
  add_markers(data=df_ports, x = ~port_lon, y = ~port_lat, color = "Port") %>%
  add_markers(data=df_Aug31, x = ~lon, y = ~lat, color = ~ship_type,
              text=~paste('Ship name',shipname)) %>%
  add_markers(data=typhoon_Aug31, x = ~lon, y = ~lat, color="TYPHOON",
              text=~paste("Name", typhoon_name)) %>%
   layout(showlegend = TRUE, geo = geo,
    title = 'Singaporean owned container and tanker ships, August 31, 2018')
p
```

```{r, warning=F, message=F, fig.height=5, fig.width=8}
# plot with boats and typhoons
palette = brewer.pal(8, "Dark2")[c(1,3,2)]
p <- plot_geo(colors=palette) %>%
  add_markers(data=df_all[df_all$frame == 14,], x = ~lon, y = ~lat, color = ~ship_type,
              text=~paste('Ship name',shipname)) %>%
  add_markers(data=typhoon_Jebi, x = ~lon, y = ~lat, color="Typhoon Jebi", text=~paste("Name", typhoon_name, "</br>Time: ", date)) %>%
   layout(
    showlegend = TRUE, geo = geo,
    title = 'Singaporean container/tanker ships, September 4, 2018, evening')
p
```

```{r, warning=F, message=F, fig.height=5, fig.width=9}
library(leaflet)
library(leaflet.extras)

# typhoon icons
icons <- pulseIcons(color='red',
  heartbeat = ifelse(typhoon_Jebi$intensity_vmax > 150/1.852, 0.8,
    ifelse(typhoon$intensity_vmax < 118/1.852, 1.6, 1.2)),
  iconSize=ifelse(typhoon_Jebi$intensity_vmax > 150/1.852, 12,
    ifelse(typhoon_Jebi$intensity_vmax < 118/1.852, 3, 8)))

# ship icons
shipicons <- iconList(
  ship = makeIcon("../Figures/ship.png", NULL, 18, 18)
)
leaflet() %>%
  addTiles() %>% 
  setView(lng = 136, lat = 34, zoom=4) %>%
  addPulseMarkers(data=typhoon_Jebi[seq(1,nrow(typhoon_Jebi),5),], lng=~lon,
                  lat=~lat, label=~date, icon=icons) %>%
  addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax > 150/1.852,],
    lng=~lon, lat=~lat,stroke = TRUE, radius=12, color="red", label=~date) %>%
  addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax <= 150/1.852 &
    typhoon_Jebi$intensity_vmax > 118/1.852,], lng=~lon, lat=~lat,
    stroke = TRUE, radius=12, color="red", label=~date) %>%
  addCircleMarkers(data=typhoon_Jebi[typhoon_Jebi$intensity_vmax <=118/1.852,],
    lng=~lon, lat=~lat, stroke = TRUE, radius=12, color="red", label=~date) %>%
  addMarkers(data=df_all[df_all$frame == 14,], lng=~lon, lat=~lat,
             label=~shipname, icon=shipicons)
```

```{r}
# Load datas
df3 <- read.csv("../../Data/Shipping/combined.csv", stringsAsFactors = F)

library(geosphere)

# Calculate distance to and from ports
df3$dist_toport <- distVincentyEllipsoid(as.matrix(df3[,c("lon","lat")]), as.matrix(df3[,c("arrive_lon","arrive_lat")]))
df3$dist_fromport <- distVincentyEllipsoid(as.matrix(df3[,c("lon","lat")]), as.matrix(df3[,c("left_lon","left_lat")]))
df3 <- df3 %>%
  arrange(imo, last_update) %>%
  group_by(imo) %>%
  mutate(delayed=ifelse(difftime(arrive_time, lead(arrive_time), units="days") > 1/8 & arrive_time > last_update & left_port == lead(left_port),1,0)) %>%
  ungroup()
```

```{r}
library(geosphere)
x <- as.matrix(df3[,c("lon","lat")])  # ship location
y <- as.matrix(df3[,c("ty_lon","ty_lat")]) # typhoon location

df3$dist_typhoon <- distVincentyEllipsoid(x, y) / 1000
```

```{r}
df3$typhoon_500 = ifelse(df3$dist_typhoon < 500 & 
                         df3$dist_typhoon >= 0, 1, 0)
df3$typhoon_1000 = ifelse(df3$dist_typhoon < 1000 &
                          df3$dist_typhoon >= 500, 1, 0)
df3$typhoon_2000 = ifelse(df3$dist_typhoon < 2000 &
                          df3$dist_typhoon >= 1000, 1, 0)
```

```{r}
fit1 <- glm(delayed ~ typhoon_500 + typhoon_1000 + typhoon_2000, data=df3,
            family=binomial)
summary(fit1)
```

```{r}
odds1 <- exp(coef(fit1))
odds1
```

```{r}
m1 <- margins(fit1)
summary(m1)
```

```{r}
# Cut makes a categorical variable out of a numerical variable using specified bins
df3$Super <- ifelse(df3$intensity_vmax * 1.852 > 185, 1, 0)
df3$Moderate <- ifelse(df3$intensity_vmax * 1.852 >= 88 &
                         df3$intensity_vmax * 1.852 < 185, 1, 0)
df3$Weak <- ifelse(df3$intensity_vmax * 1.852 >= 41 & 
                   df3$intensity_vmax * 1.852 < 88, 1, 0)
df3$HK_intensity <- cut(df3$intensity_vmax * 1.852 ,c(-1,41, 62, 87, 117, 149, 999))
table(df3$HK_intensity)
```

```{r}
fit2 <- glm(delayed ~ (typhoon_500 + typhoon_1000 + typhoon_2000) : 
              (Weak + Moderate + Super), data=df3,
            family=binomial)
tidy(fit2)
```

```{r}
m2 <- margins(fit2)
summary(m2) %>%
  html_df()
```

```{r}
margins(fit2, at = list(Weak = c(1)),
        variables = c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%
  summary() %>%
  html_df()
```

```{r}
margins(fit2, at = list(Moderate = c(1)),
        variables = c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%
  summary() %>%
  html_df()
```

```{r}
margins(fit2, at = list(Super = c(1)),
        variables = c("typhoon_500", "typhoon_1000", "typhoon_2000")) %>%
  summary() %>%
  html_df()
```

