noaa <- readLines('http://www.ssd.noaa.gov/PS/TROP/2018/adt/archive.html')
library(stringr)
library(readr)
library(dplyr)
pattern = 'http[^\"]+text[^\"]+\\.txt'
urls <- str_extract(noaa,pattern)
urls <- urls[!is.na(urls)]
header1 <- c("date", "time", "intensity_ci", "intensity_mslp", "intensity_vmax","tno_fnl","tno_adj","tno_ini","constrnt","wkng","rpd","temp_ctr","temp_mean","scene","est_rmw","mw","lat","lon","mthd","sat","vza","comments")
widths1 <- c(9,7,5,7,6,5,4,4,10,5,5,8,7,8,7,6,8,8,6,9,5,NA)
header2 = header <- c("date", "time", "intensity_ci", "intensity_mslp", "intensity_mslpplat", "intensity_vmax","tno_fnl","tno_adj","tno_ini","constrnt","wkng","rpd","temp_ctr","temp_mean","scene","est_rmw","mw","lat","lon","mthd","sat","vza","comments")
widths2 <- c(9,7,5,7,8,6,5,4,4,10,5,5,8,7,8,7,6,8,8,6,9,5,NA)
first <- T
q <- 1
# Works as of September 8, 2019 -- may be broken by file updates
for(url in urls) {
  print(q)
  q <- q + 1
  lines <- readLines(url)
  name <- strsplit(substr(url,52,10000), "-")[[1]][1]
  # check if "BiasAdj" in line 4 -- if so, need an extra column!
  if(length(grep("BiasAdj",lines))) {
    widths <- widths2
    h <- header2
  } else {
    widths <- widths1
    h <- header1
  }
  
  if(url=="http://www.ssd.noaa.gov/PS/TROP/DATA/2018/adt/text/17P-list.txt") {
    # Special rule for this file due to incorrect negative MSLP values offsetting columns
    for(i in 5:55) {
      lines[i] = paste0(substr(lines[i],1,22), substr(lines[i],24,nchar(lines[i])), sep='')
    }
  }
  lines <- lines[5:(length(lines)-3)]
  
  data <- read_fwf(paste0(lines,collapse="\n"), fwf_widths(widths, col_names=h), na="N/A")
  data$est_rmw <- as.character(data$est_rmw)
  data$typhoon_name <- name
  if(first) {
    first = F
    typhoon <- data
  } else {
    typhoon <- bind_rows(typhoon, data)
  }
}
write.csv(typhoon,"typhoon.csv", row.names=F)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7ciB0eXBob29ucywgZXZhbD1GfQ0Kbm9hYSA8LSByZWFkTGluZXMoJ2h0dHA6Ly93d3cuc3NkLm5vYWEuZ292L1BTL1RST1AvMjAxOC9hZHQvYXJjaGl2ZS5odG1sJykNCmxpYnJhcnkoc3RyaW5ncikNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KGRwbHlyKQ0KcGF0dGVybiA9ICdodHRwW15cIl0rdGV4dFteXCJdK1xcLnR4dCcNCnVybHMgPC0gc3RyX2V4dHJhY3Qobm9hYSxwYXR0ZXJuKQ0KdXJscyA8LSB1cmxzWyFpcy5uYSh1cmxzKV0NCmhlYWRlcjEgPC0gYygiZGF0ZSIsICJ0aW1lIiwgImludGVuc2l0eV9jaSIsICJpbnRlbnNpdHlfbXNscCIsICJpbnRlbnNpdHlfdm1heCIsInRub19mbmwiLCJ0bm9fYWRqIiwidG5vX2luaSIsImNvbnN0cm50Iiwid2tuZyIsInJwZCIsInRlbXBfY3RyIiwidGVtcF9tZWFuIiwic2NlbmUiLCJlc3Rfcm13IiwibXciLCJsYXQiLCJsb24iLCJtdGhkIiwic2F0IiwidnphIiwiY29tbWVudHMiKQ0Kd2lkdGhzMSA8LSBjKDksNyw1LDcsNiw1LDQsNCwxMCw1LDUsOCw3LDgsNyw2LDgsOCw2LDksNSxOQSkNCmhlYWRlcjIgPSBoZWFkZXIgPC0gYygiZGF0ZSIsICJ0aW1lIiwgImludGVuc2l0eV9jaSIsICJpbnRlbnNpdHlfbXNscCIsICJpbnRlbnNpdHlfbXNscHBsYXQiLCAiaW50ZW5zaXR5X3ZtYXgiLCJ0bm9fZm5sIiwidG5vX2FkaiIsInRub19pbmkiLCJjb25zdHJudCIsIndrbmciLCJycGQiLCJ0ZW1wX2N0ciIsInRlbXBfbWVhbiIsInNjZW5lIiwiZXN0X3JtdyIsIm13IiwibGF0IiwibG9uIiwibXRoZCIsInNhdCIsInZ6YSIsImNvbW1lbnRzIikNCndpZHRoczIgPC0gYyg5LDcsNSw3LDgsNiw1LDQsNCwxMCw1LDUsOCw3LDgsNyw2LDgsOCw2LDksNSxOQSkNCmZpcnN0IDwtIFQNCnEgPC0gMQ0KIyBXb3JrcyBhcyBvZiBTZXB0ZW1iZXIgOCwgMjAxOSAtLSBtYXkgYmUgYnJva2VuIGJ5IGZpbGUgdXBkYXRlcw0KZm9yKHVybCBpbiB1cmxzKSB7DQogIHByaW50KHEpDQogIHEgPC0gcSArIDENCiAgbGluZXMgPC0gcmVhZExpbmVzKHVybCkNCiAgbmFtZSA8LSBzdHJzcGxpdChzdWJzdHIodXJsLDUyLDEwMDAwKSwgIi0iKVtbMV1dWzFdDQogICMgY2hlY2sgaWYgIkJpYXNBZGoiIGluIGxpbmUgNCAtLSBpZiBzbywgbmVlZCBhbiBleHRyYSBjb2x1bW4hDQogIGlmKGxlbmd0aChncmVwKCJCaWFzQWRqIixsaW5lcykpKSB7DQogICAgd2lkdGhzIDwtIHdpZHRoczINCiAgICBoIDwtIGhlYWRlcjINCiAgfSBlbHNlIHsNCiAgICB3aWR0aHMgPC0gd2lkdGhzMQ0KICAgIGggPC0gaGVhZGVyMQ0KICB9DQogIA0KICBpZih1cmw9PSJodHRwOi8vd3d3LnNzZC5ub2FhLmdvdi9QUy9UUk9QL0RBVEEvMjAxOC9hZHQvdGV4dC8xN1AtbGlzdC50eHQiKSB7DQogICAgIyBTcGVjaWFsIHJ1bGUgZm9yIHRoaXMgZmlsZSBkdWUgdG8gaW5jb3JyZWN0IG5lZ2F0aXZlIE1TTFAgdmFsdWVzIG9mZnNldHRpbmcgY29sdW1ucw0KICAgIGZvcihpIGluIDU6NTUpIHsNCiAgICAgIGxpbmVzW2ldID0gcGFzdGUwKHN1YnN0cihsaW5lc1tpXSwxLDIyKSwgc3Vic3RyKGxpbmVzW2ldLDI0LG5jaGFyKGxpbmVzW2ldKSksIHNlcD0nJykNCiAgICB9DQogIH0NCiAgbGluZXMgPC0gbGluZXNbNToobGVuZ3RoKGxpbmVzKS0zKV0NCiAgDQogIGRhdGEgPC0gcmVhZF9md2YocGFzdGUwKGxpbmVzLGNvbGxhcHNlPSJcbiIpLCBmd2Zfd2lkdGhzKHdpZHRocywgY29sX25hbWVzPWgpLCBuYT0iTi9BIikNCiAgZGF0YSRlc3Rfcm13IDwtIGFzLmNoYXJhY3RlcihkYXRhJGVzdF9ybXcpDQogIGRhdGEkdHlwaG9vbl9uYW1lIDwtIG5hbWUNCiAgaWYoZmlyc3QpIHsNCiAgICBmaXJzdCA9IEYNCiAgICB0eXBob29uIDwtIGRhdGENCiAgfSBlbHNlIHsNCiAgICB0eXBob29uIDwtIGJpbmRfcm93cyh0eXBob29uLCBkYXRhKQ0KICB9DQp9DQp3cml0ZS5jc3YodHlwaG9vbiwidHlwaG9vbi5jc3YiLCByb3cubmFtZXM9RikNCmBgYA0K