Closed superstealthysheep closed 1 year ago
First step: reading the classes in a given week. What I've got:
var url = "https://www.myu.umn.edu/psp/psprd/EMPLOYEE/CAMP/s/WEBLIB_IS_DS.ISCRIPT1.FieldFormula.IScript_DrawSection?group=UM_SSS§ion=UM_SSS_ACAD_SCHEDULE&pslnk=1&cmd=smartnav&effdt=2023-02-05" // current week (may or may not work for people other than me?? need to test)
// create a (queryable!) DOM element from the url
var el = document.createElement('html');
await fetch(url).then(r=>r.text()).then(r => el.innerHTML = r); // idk man, was broken, then the "await" unbroke it
var classMeetings = el.querySelectorAll(".myu_calendar-class") // list of all the classes this week
for (let i = 0; i < classMeetings.length; i++) {
// defined for convenience/readability(?)
let meeting = classMeetings[i];
let classDetails = meeting.querySelector(".myu_calendar-class-details").innerHTML.replace(/\n/g,"").split("<br>") // regex is to get rid of random newlines that are in there for some reason
let semester = meeting.getAttribute("data-strm") // in format `xyyx', where `yy` is the year and `xx` = `13` is spring, `19` is fall
let date = meeting.getAttribute("data-fulldate")
let timeRange = classDetails[1]
let courseName = meeting.querySelector(".myu_calendar-class-name-color-referencer").innerText
let meetingType = classDetails[0]
let room = classDetails[2].replace(/^ | $/g,"") // strip off the leading and trailing space with regex
// let address =
console.log([semester, date, timeRange, courseName, meetingType, room].join(","))
}
Sample console output:
1233,20230206,9:05 - 9:55 AM,CSCI 2041,Lecture,Tate Hall B20
1233,20230206,10:10 - 11:00 AM,MATH 5286H,Lecture,Vincent Hall 1
1233,20230206,1:25 - 2:15 PM,CHEN 3101,Lecture,Amundson Hall B75
1233,20230206,3:35 - 4:25 PM,CSCI 2021,Lecture,Bruininks Hall 330
1233,20230207,9:05 - 9:55 AM,CHEN 3101,Discussion,Akerman Hall 317
1233,20230207,11:15 - 12:05 PM,CSCI 2041,Laboratory,Walter Library B28
1233,20230208,9:05 - 9:55 AM,CSCI 2041,Lecture,Tate Hall B20
1233,20230208,10:10 - 11:00 AM,MATH 5286H,Lecture,Vincent Hall 1
1233,20230208,1:25 - 2:15 PM,CHEN 3101,Lecture,Amundson Hall B75
1233,20230208,2:30 - 3:20 PM,CSCI 2021,Laboratory,Keller Hall 1-250
1233,20230208,3:35 - 4:25 PM,CSCI 2021,Lecture,Bruininks Hall 330
1233,20230209,9:05 - 9:55 AM,CHEN 3101,Discussion,Akerman Hall 317
1233,20230210,9:05 - 9:55 AM,CSCI 2041,Lecture,Tate Hall B20
1233,20230210,10:10 - 11:00 AM,MATH 5286H,Lecture,Vincent Hall 1
1233,20230210,1:25 - 2:15 PM,CHEN 3101,Lecture,Amundson Hall B75
1233,20230210,3:35 - 4:25 PM,CSCI 2021,Lecture,Bruininks Hall 330
Could someone else try this out to see if it works for people other than me?
Okay, now it returns an array of JSON object things. Also I'm figuring out how to wrap this into a function.
/**
* Returns a list of json objects containing all the meetings in a given week
* @param {string} date // a day during the week in question (Let's say the Sunday.), in format "yyyy-mm-dd", WITH DASHES
* @returns {Array} meetingObjectArray
*/
const weekToJson = async (date) => {
// appends the date info to our base url
const baseURL = "https://www.myu.umn.edu/psp/psprd/EMPLOYEE/CAMP/s/WEBLIB_IS_DS.ISCRIPT1.FieldFormula.IScript_DrawSection?group=UM_SSS§ion=UM_SSS_ACAD_SCHEDULE&pslnk=1&cmd=smartnav"
let url = baseURL.concat("&effdt=", date)
// create a (queryable!) DOM element from the url
let HTMLText;
var el = document.createElement('html');
await fetch(url).then(r => r.text()).then(r => HTMLText = r);
el.innerHTML = HTMLText;
var meetingElements = el.querySelectorAll(".myu_calendar-class") // list of all the classes this week as HTML elems
const meetingObjects = []; // list of json objects holding meeting data
for (let i = 0; i < meetingElements.length; i++) {
// defined for convenience/readability(?)
let meetingEl = meetingElements[i];
let classDetails = meetingEl.querySelector(".myu_calendar-class-details").innerHTML
.replace(/\n/g,"").split("<br>") // regex is to get rid of random newlines that are in there for some reason
meetingObjects.push({
"term" : meetingEl.getAttribute("data-strm"), // in format `xyyx', where `yy` is the year and `xx` = `13` is spring, `19` is fall
"courseNum" : meetingEl.getAttribute("data-class-nbr"),
"date" : meetingEl.getAttribute("data-fulldate"),
"meetingType" : classDetails[0],
"timeRange" : classDetails[1],
"room" : classDetails[2].replace(/^ | $/g,""), // strip off the leading and trailing space with regex
"courseName" : meetingEl.querySelector(".myu_calendar-class-name-color-referencer").innerText,
});
// console.log([semester, date, timeRange, courseName, meetingType, room].join(","))
}
console.log(meetingObjects);
return meetingObjects;
}
I've now written 2 functions: one that reads the classes in a given week (as written above), and one that gives more detailed info about a class (e.g. general meeting times, start and end dates) when supplied with a term, course number, and campus. Right now they're living in the branch at https://github.com/samyok/gophergrades/tree/superstealthysheep/scraping.
First, read what the list of courses is. Then, for each course, read