Closed jimbuck closed 6 years ago
Feedly seems to handle playlist links just fine, but I can't seem to figure out how they do it. Maybe just scraping the page and caching the result? Or using the actual API, which I'd like to avoid.
Simple scraping will work for playlists with less than 100 videos. If there are more then it should be very similar, just more "clicking"...
var scrapeIt = require("scrape-it")
let smPlaylist = 'https://www.youtube.com/playlist?list=PLNMUSSKcxKjeZ6H1jFDy_BISOENWM-aP3';
let mdPlaylist = 'https://www.youtube.com/playlist?list=PLNMUSSKcxKje3jq1srkt8pVVmPt_ijgyW';
let lgPlaylist = 'https://www.youtube.com/playlist?list=PLlrxD0HtieHg7uB3_amVXvaRgxIcXLtYD';
let playlist = await scanPlaylist(lgPlaylist);
console.log(playlist);
console.log(playlist.videos.length);
function scanPlaylist(playlistUrl) {
return scrapeIt(playlistUrl, {
playlist: '#pl-header h1.pl-header-title',
channel: '#pl-header .pl-header-details a',
channelUrl: {
selector: '#pl-header .pl-header-details a',
attr: 'href'
},
expectedVideos: {
selector: '#pl-header .pl-header-details li:nth-child(2)',
convert: c => parseInt(c, 10)
}
, videos: {
listItem: '#pl-video-table [data-video-id]',
data: {
id: { attr: 'data-video-id' },
video: 'a.pl-video-title-link',
channel: '.pl-video-owner a',
channelUrl: { selector: '.pl-video-owner a', attr: 'href', convert: c=> `http://youtube.com${c}` }
}
}
}).then(({ data, response }) => {
//console.log(`Status Code: ${response.statusCode}`);
data.playlistUrl = playlistUrl;
data.channelUrl = `http://youtube.com${data.channelUrl}`;
data.videos = data.videos.map(v => {
v.videoUrl = `http://youtube.com/watch?v=${v.id}`;
return v;
});
return data;
});
}
I have logic working for scraping playlists, just trying to optimize it for speed and memory (re-using the hidden browser window, etc.).
Closing since this is implemented as of abae7b62213c701af14fdb882dba2f5897813f67.
It looks like the playlists feed only returns the first 15 videos of the playlist, not the 15 most recent. Anybody know how to reverse it or change the "page"?
Example Playlist (more than 15 videos): https://www.youtube.com/playlist?list=PLtZHIFR5osfDjTfABmtcO_DuCgpJBRDk4
Example Feed: https://www.youtube.com/feeds/videos.xml?playlist_id=PLtZHIFR5osfDjTfABmtcO_DuCgpJBRDk4