Closed Robinlovelace closed 5 years ago
Here's a beefy real-world reproducible example. The last line should work. Heads-up @layik when it does, I think this will be near-CRAN-able. Thoughts?
# install required package:
devtools::install_github("ATFutures/calendar")
# Manual input ----
# First thought was to capture the data hosted at:
# http://timetable.leeds.ac.uk/teaching/201819/reporting/individual;?objectclass=module&idtype=name&identifier=TRAN5340M01&&template=SWSCUST+module+Individual&days=1-7&weeks=1-52&periods=1-21
# found from: http://timetable.leeds.ac.uk/teaching/201819/module.htm
library(rvest)
# vignette("selectorgadget") # check how it works...
u = "http://timetable.leeds.ac.uk/teaching/201819/reporting/TextSpreadsheet?objectclass=module&idtype=name&identifier=TRAN5340M01&template=SWSCUST+module+individual&days=1-7&periods=1-21&weeks=1-52"
day_of_week = 2
session_ids = c(
"intro",
"software",
"structure",
"cleaning",
"accessing",
"processing",
"viz",
"ml",
"project",
"prof"
)
session_descriptions = c(
"Introduction to transport data science",
"Software for practical data science",
"The structure of transport data",
"Data cleaning and subsetting",
"Accessing data from web sources",
"Routing",
"Data visualization",
"Machine learning",
"Project work",
"Professional issues"
)
html = read_html(u)
tt_html = html_nodes(html, ".spreadsheet") %>% html_text()
extract_column = function(x, n) {
nodes = html_nodes(x, paste0(".spreadsheet td:nth-child(", n, ")"))
html_text(nodes)[-1]
}
extract_column_name = function(x, n) {
nodes = html_nodes(x, paste0(".spreadsheet td:nth-child(", n, ")"))
html_text(nodes)[1]
}
cols = c(1:3, 7:11)
colum_names = purrr::map_chr(cols, ~ extract_column_name(html, .x))
tt_list = purrr::map(cols, ~ extract_column(html, .x))
names(tt_list) = colum_names
tt_df = tibble::as_tibble(tt_list)
tt_df$Weekday = day_of_week
# get dates -----
w14_start = as.Date("2019-01-28")
week_num = c(14:22, paste0("E", 1:4), 23:30)
n_weeks = length(week_num)
week_commencing = seq(from = w14_start, by = "week", length.out = n_weeks)
weeks = tibble::data_frame(week_num, week_commencing)
x = tt_df$Weeks[1]
extract_weeks = function(x) {
r_expression = paste0(
"c(",
gsub(pattern = "-", replacement = ":", x = x),
")"
)
eval(parse(text = r_expression))
}
extract_week_commencing = function(x) {
weeks_n = tibble::data_frame(week_num = as.character(extract_weeks(x)))
dplyr::inner_join(weeks_n, weeks)
}
extract_week_commencing(x)
extract_attributes = function(tt_df) {
tt_i = extract_week_commencing(tt_df$Weeks[1])
tt_i$code = tt_df$`Module code (or codes if jointly taught)`[1]
tt_i$type = tt_df$`Type of activity`[1]
tt_i$LOCATION = tt_df$Location[1]
tt_i$day_of_week = tt_df$Weekday[1]
tt_i$DTSTART = paste(tt_i$week_commencing, tt_df$Start[1])
tt_i$DTEND = paste(tt_i$week_commencing, tt_df$End[1])
tt_i$size = tt_df$Size[1]
tt_i$staff = tt_df$`Teaching staff`[1]
tt_i
}
# iterate over each type of event
i = 1
for(i in seq(nrow(tt_df))) {
tti = extract_attributes(tt_df[i, ])
if(i == 1) {
tt = tti
} else (
tt = rbind(tt, tti)
)
}
tt$SUMMARY = paste0(rep(session_ids, 2), ", ", gsub(pattern = " 1| Based Learning 1", replacement = "", tt$type))
tt$id = session_ids
tt$DESCRIPTION = paste0(rep(session_descriptions, 2), ", ", gsub(pattern = " 1| Based Learning 1", replacement = "", tt$type))
tt = dplyr::arrange(tt, desc(DTSTART))
tt_min = dplyr::select(tt, SUMMARY, DESCRIPTION, DTSTART, DTEND, LOCATION)
ic = ical(tt_min)
Looking into it...
Great work Robin, apologies for lack of contribution here.
So far the
ical()
function has an example of going in one direction:Created on 2019-01-14 by the reprex package (v0.2.1)
But not the other direction, e.g.:
Created on 2019-01-14 by the reprex package (v0.2.1)