tpemartin / 110-2-R

The class repo for 110-2 Programming for Data Science
0 stars 3 forks source link

Complete programming block of whichDateRow #18

Open tpemartin opened 2 years ago

tpemartin commented 2 years ago

https://github.com/tpemartin/110-2-R/blob/main/may%2025.R

use weather station 竹子湖's weather data to collect all traffic accidents' weather status.

Chious commented 2 years ago
source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/support/weather.R", encoding="UTF-8") # R 4.2+
source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/traffic_accident.R", encoding="UTF-8")

weather = Weather()
weather$dowload_data()

climateDataList <- vector("list", nrow(traffic))

traffic$經度 <- as.double(traffic$經度)
traffic$緯度 <- as.double(traffic$緯度)
for(.x in 1:nrow(traffic)){
  traffic[.x, ] -> trafficX
  as.double(trafficX$經度)
as.double(trafficX$緯度)

  # trafficX$經度 trafficX$緯度 ==> whichStation
  whichStation={
    df1=weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["station"]]
    df2=weather[["weatherStation"]][["data"]][[1]][,c(2,4,5)]
    colnames(df2) <-c('stationName','longtitude','latitude')
    StationLocation <- merge(df1, df2, all.x = TRUE)
    StationLocation<-StationLocation[is.na(StationLocation$longtitude)==F,]
    rownames(StationLocation) <- 1:nrow(StationLocation)

    whichStation = 1
    Distance <- geosphere::distHaversine(c(trafficX$經度,trafficX$緯度),c(StationLocation[1,"longtitude"],StationLocation[1,"latitude"]))
    for (i in 2:nrow(StationLocation)){
      Distance2 <- geosphere::distHaversine(c(trafficX$經度,trafficX$緯度),c(StationLocation[i,"longtitude"],StationLocation[i,"latitude"]))
      if (Distance2 < Distance){
        whichStation = i
        Distance <- Distance2
      }else{

      }

    }
    whichStation
  }

  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]]$dataDate |> lubridate::ymd()
  # trafficX ==> whichDateTimeRow

  whichDateTimeRow = {
    trafficX$發生時間 -> trafficTime # (1)
    weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]]$dataTime -> availableTimes # (2)

    trafficTime |> class()
    availableTimes |> lubridate::ymd_hms() -> availableTimes
    availableTimes |> class()

    allDistance = abs(availableTimes - trafficTime)
    which.min(allDistance)

  }
  # trafficX ==> whichDateRow
  trafficX$發生時間 <- format(trafficX$發生時間, "%Y-%m-%d") |> lubridate::ymd()

  temp = weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]]
  temp[temp$dataDate==trafficX$發生時間,2:4]

  climatedata1 <- list()
  #從中發現竹子湖是第五筆資料 觀察其他氣象資料皆有對稱性 
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,1]->climatedata1$date
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,2]->climatedata1$stationPressure
  temp[temp$dataDate==trafficX$發生時間,2:4]->climatedata1$temperature

  climateDataList[[.x]] <- climatedata1
}
Christychenn commented 2 years ago
source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/support/weather.R", encoding="UTF-8") # R 4.2+
source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/traffic_accident.R", encoding="UTF-8")
weather = Weather()
weather$dowload_data()
whichStation = 5
# whichDateTimeRow = 6098
# whichDateRow = 255

climateDataList <- vector("list", nrow(traffic))
 for(.x in 1:nrow(traffic)){

  traffic[.x, ] -> trafficX

  # trafficX$經度 trafficX$緯度 ==> whichStation
  # trafficX ==> whichDateTimeRow
  whichDateTimeRow = {
    trafficX$發生時間 -> trafficTime # (1)
    weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]]$dataTime -> availableTimes # (2)

    trafficTime |> class()
    availableTimes |> lubridate::ymd_hms() -> availableTimes
    availableTimes |> class()

    # --Approach 1---
    ## vectorizing language
    allDistance = abs(availableTimes - trafficTime)
    which.min(allDistance)

  }
  # trafficX ==> whichDateRow
  whichDateRow = {
   trafficX$發生時間 -> trafficTime2 
    weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]][["dataDate"]]-> availableTimes2 

    trafficTime2 |> class()
    trafficTime2 |> lubridate::as_date() ->trafficTime2
    trafficTime2 |> class()
    availableTimes2 |> lubridate::ymd() -> availableTimes2
    availableTimes2 |> class()

    allDistance = abs(availableTimes2 - trafficTime2)
    which.min(allDistance)
  }

  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["station"]][whichStation,]

  climatedata1 <- list()
  #從中發現竹子湖是第五筆資料 觀察其他氣象資料皆有對稱性 
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,1]->climatedata1$date
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,2]->climatedata1$stationPressure
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]][whichDateRow,2:4]->climatedata1$temperature

  climateDataList[[.x]] <- climatedata1
 }

# For each accident -------------------------------------------------------
source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/R/traffice-accidents2.R", encoding="UTF-8")

  # within(x, y)
  within(
    traffic,
    {
      hour <- factor(hour)
      縣市 <- factor(縣市)
    }
  ) -> traffic

  traffic |> 
    split(
      traffic$hour
    ) -> split_traffic

  dataFrame_byHour_byCounty <- data.frame(
    時段=character(0),
    時段車禍次數=integer(0),
    時段車禍縣市數目=integer(0),
    #####
    車禍次數=integer(0),
    縣市=character(0),
    時段名次=character(0)
  )

  for(.x in seq_along(split_traffic)){
    # .x=1
    split_traffic[[.x]]$縣市 |>
      table() |> 
      sort(decreasing = T) -> accidents_hourX

    data.frame(
      時段=names(split_traffic[.x]),
      時段車禍次數=sum(accidents_hourX),
      時段車禍縣市數目=length(accidents_hourX[accidents_hourX !=0]),
      車禍次數=as.integer(accidents_hourX),
      縣市=names(accidents_hourX),
      時段名次=length(accidents_hourX)+1-rank(accidents_hourX, ties.method = "first") 
    ) -> dataFrameOfHourX

    dataFrame_byHour_byCounty =
      rbind(
        dataFrame_byHour_byCounty,
        dataFrameOfHourX
      )
  }
  dataFrame_byHour_byCounty |> View()