Complete programming block of whichDateRow

tpemartin / 110-2-R

The class repo for 110-2 Programming for Data Science

0 stars 3 forks source link

source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/support/weather.R", encoding="UTF-8") # R 4.2+ source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/traffic_accident.R", encoding="UTF-8") weather = Weather() weather$dowload_data() climateDataList <- vector("list", nrow(traffic)) traffic$經度 <- as.double(traffic$經度) traffic$緯度 <- as.double(traffic$緯度) for(.x in 1:nrow(traffic)){ traffic[.x, ] -> trafficX as.double(trafficX$經度) as.double(trafficX$緯度) # trafficX$經度 trafficX$緯度 ==> whichStation whichStation={ df1=weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["station"]] df2=weather[["weatherStation"]][["data"]][[1]][,c(2,4,5)] colnames(df2) <-c('stationName','longtitude','latitude') StationLocation <- merge(df1, df2, all.x = TRUE) StationLocation<-StationLocation[is.na(StationLocation$longtitude)==F,] rownames(StationLocation) <- 1:nrow(StationLocation) whichStation = 1 Distance <- geosphere::distHaversine(c(trafficX$經度,trafficX$緯度),c(StationLocation[1,"longtitude"],StationLocation[1,"latitude"])) for (i in 2:nrow(StationLocation)){ Distance2 <- geosphere::distHaversine(c(trafficX$經度,trafficX$緯度),c(StationLocation[i,"longtitude"],StationLocation[i,"latitude"])) if (Distance2 < Distance){ whichStation = i Distance <- Distance2 }else{ } } whichStation } weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]]$dataDate |> lubridate::ymd() # trafficX ==> whichDateTimeRow whichDateTimeRow = { trafficX$發生時間 -> trafficTime # (1) weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]]$dataTime -> availableTimes # (2) trafficTime |> class() availableTimes |> lubridate::ymd_hms() -> availableTimes availableTimes |> class() allDistance = abs(availableTimes - trafficTime) which.min(allDistance) } # trafficX ==> whichDateRow trafficX$發生時間 <- format(trafficX$發生時間, "%Y-%m-%d") |> lubridate::ymd() temp = weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]] temp[temp$dataDate==trafficX$發生時間,2:4] climatedata1 <- list() #從中發現竹子湖是第五筆資料觀察其他氣象資料皆有對稱性 weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,1]->climatedata1$date weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,2]->climatedata1$stationPressure temp[temp$dataDate==trafficX$發生時間,2:4]->climatedata1$temperature climateDataList[[.x]] <- climatedata1 }

source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/support/weather.R", encoding="UTF-8") # R 4.2+
source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/traffic_accident.R", encoding="UTF-8")

weather = Weather()
weather$dowload_data()

whichStation = 5
# whichDateTimeRow = 6098
# whichDateRow = 255

climateDataList <- vector("list", nrow(traffic))
 for(.x in 1:nrow(traffic)){

  traffic[.x, ] -> trafficX

  # trafficX$經度 trafficX$緯度 ==> whichStation
  # trafficX ==> whichDateTimeRow
  whichDateTimeRow = {
    trafficX$發生時間 -> trafficTime # (1)
    weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]]$dataTime -> availableTimes # (2)

    trafficTime |> class()
    availableTimes |> lubridate::ymd_hms() -> availableTimes
    availableTimes |> class()

    # --Approach 1---
    ## vectorizing language
    allDistance = abs(availableTimes - trafficTime)
    which.min(allDistance)

  }
  # trafficX ==> whichDateRow
  whichDateRow = {
   trafficX$發生時間 -> trafficTime2 
    weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]][["dataDate"]]-> availableTimes2 

    trafficTime2 |> class()
    trafficTime2 |> lubridate::as_date() ->trafficTime2
    trafficTime2 |> class()
    availableTimes2 |> lubridate::ymd() -> availableTimes2
    availableTimes2 |> class()

    allDistance = abs(availableTimes2 - trafficTime2)
    which.min(allDistance)
  }

  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["station"]][whichStation,]

  climatedata1 <- list()
  #從中發現竹子湖是第五筆資料 觀察其他氣象資料皆有對稱性 
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,1]->climatedata1$date
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsTimes"]][["stationObsTime"]][[whichStation]][whichDateTimeRow,2]->climatedata1$stationPressure
  weather[["rawData"]][["cwbdata"]][["resources"]][["resource"]][["data"]][["surfaceObs"]][["location"]][["stationObsStatistics"]][["temperature"]][["daily"]][[whichStation]][whichDateRow,2:4]->climatedata1$temperature

  climateDataList[[.x]] <- climatedata1
 }

# For each accident -------------------------------------------------------

source("https://raw.githubusercontent.com/tpemartin/110-2-R/main/R/traffice-accidents2.R", encoding="UTF-8")

  # within(x, y)
  within(
    traffic,
    {
      hour <- factor(hour)
      縣市 <- factor(縣市)
    }
  ) -> traffic

  traffic |> 
    split(
      traffic$hour
    ) -> split_traffic

  dataFrame_byHour_byCounty <- data.frame(
    時段=character(0),
    時段車禍次數=integer(0),
    時段車禍縣市數目=integer(0),
    #####
    車禍次數=integer(0),
    縣市=character(0),
    時段名次=character(0)
  )

  for(.x in seq_along(split_traffic)){
    # .x=1
    split_traffic[[.x]]$縣市 |>
      table() |> 
      sort(decreasing = T) -> accidents_hourX

    data.frame(
      時段=names(split_traffic[.x]),
      時段車禍次數=sum(accidents_hourX),
      時段車禍縣市數目=length(accidents_hourX[accidents_hourX !=0]),
      車禍次數=as.integer(accidents_hourX),
      縣市=names(accidents_hourX),
      時段名次=length(accidents_hourX)+1-rank(accidents_hourX, ties.method = "first") 
    ) -> dataFrameOfHourX

    dataFrame_byHour_byCounty =
      rbind(
        dataFrame_byHour_byCounty,
        dataFrameOfHourX
      )
  }
  dataFrame_byHour_byCounty |> View()

tpemartin / 110-2-R

Complete programming block of whichDateRow #18