kgilds / kevinsblogIII

0 stars 0 forks source link

Make an RSS Feeds #7

Open kgilds opened 1 week ago

kgilds commented 1 week ago

https://www.infoworld.com/article/2337738/how-to-create-your-own-rss-reader-with-r.html

kgilds commented 1 week ago
title: "Sharon's RSS Feed"
format: 
  html
embed-resources: true
editor: source
execute: 
  echo: false
  warning: false
  message: false

Load needed pa

kgilds commented 1 week ago
library(tidyRSS)
library(dplyr)
library(DT)
library(purrr)
library(stringr)
library(lubridate)
kgilds commented 1 week ago
myfeeds <- data.frame(feed_title = c("All InfoWorld", 
                                  "All Computerworld", 
                                  "Mastodon rstats", 
                                  "Mastodon QuartoPub",
                                  "R Bloggers",
                                  "R Weekly"),
                     feed_url = c("https://www.infoworld.com/index.rss",
                                  "https://www.computerworld.com/index.rss",
                                  "http://fosstodon.org/tags/rstats.rss",
                                  "http://fosstodon.org/tags/QuartoPub.rss",
                      "https://feeds.feedburner.com/Rbloggers",
                      "https://rweekly.org/atom.xml")
           ) |>
  arrange(feed_title)
kgilds commented 1 week ago
feed_test <- map(myfeeds$feed_url, tidyfeed)
kgilds commented 1 week ago
wrangle_feed <- function(the_feed_url, the_feed_dataframe = myfeeds) {
  my_feed_data <- tidyRSS::tidyfeed(the_feed_url)
  return(my_feed_data)
}
kgilds commented 1 week ago

my_feed_data$feed_title <- the_feed_dataframe$feed_title[the_feed_dataframe$feed_url == the_feed_url][1] ```
kgilds commented 1 week ago

if("entry_url" %in% names(my_feed_data)) {
    my_feed_data <- my_feed_data |>
      rename(item_title = entry_title, item_pub_date = entry_last_updated, item_link = entry_url, item_description = entry_content) 
  }```
kgilds commented 1 week ago
if(str_detect(my_feed_data$feed_title[1], "Mastodon")) {
  my_feed_data <- my_feed_data |>
    mutate(
      item_author = str_replace_all(item_link, "^.*?/(@.*?)/.*?$", "1"),
      item_title = if_else(str_detect(item_author, "@"), paste0("Mastodon Post by ", item_author), "Mastodon Post")
    )
  }
kgilds commented 1 week ago

my_feed_data <- my_feed_data |>
  select(Headline = item_title, Date = item_pub_date, URL = item_link, 
         Description = item_description, Feed = feed_title) |>
  mutate(
    Headline = str_glue("<a target='_blank' title='{Headline}' href='https://www.infoworld.com/{URL}' rel="noopener">{Headline}</a>")
) ```
kgilds commented 1 week ago
my_feed_data <- my_feed_data |>
  select(Headline = item_title, Date = item_pub_date, URL = item_link, Description = item_description, Feed = feed_title) |>
  mutate(
    Description = str_glue("{Description},  <a target='_blank' href='https://www.infoworld.com/{URL}' rel="noopener"> >></a>"),
  ) 
kgilds commented 1 week ago

trim_if_too_long <- function(item_description, max_chars = 600) {
  if(!is.na(item_description)) {
    if(nchar(item_description) > max_chars) {
      item_description <-  stringr::str_sub(item_description, 1, max_chars)
      item_description <-  str_replace_all(item_description, "s[^s]+$", ". . . ")
    }
    return(item_description)
  } else {
      return("")
    }
}```
kgilds commented 1 week ago

my_feed_data <- my_feed_data |>
  select(Headline = item_title, Date = item_pub_date, URL = item_link, Description = item_description, Feed = feed_title) |>
  mutate(
      Description = purrr::map_chr(Description, trim_if_too_long),
      Description = str_glue("{Description},  <a target='_blank' href='https://www.infoworld.com/{URL}' rel="noopener"> >></a>"),
  )  ```
kgilds commented 1 week ago
my_feed_data <- my_feed_data |>
  select(Headline = item_title, Date = item_pub_date, URL = item_link, Description = item_description, Feed = feed_title) |>
  mutate(
    Description = str_remove_all(Description, "To read this article in full, please click here"),
    Description = purrr::map_chr(Description, trim_if_too_long),
    Description = str_glue("{Description},  <a target='_blank' href='https://www.infoworld.com/{URL}' rel="noopener"> >></a>")
  )  
kgilds commented 1 week ago
wrangle_feed <- function(the_feed_url, the_feed_dataframe = myfeeds) {
  my_feed_data <- tidyfeed(the_feed_url)
  my_feed_data$feed_title <- the_feed_dataframe$feed_title[the_feed_dataframe$feed_url == the_feed_url][1]
 if("entry_url" %in% names(my_feed_data)) {
    my_feed_data <- my_feed_data |>
      rename(item_title = entry_title, item_pub_date = entry_last_updated, item_link = entry_url, item_description = entry_content) 
 }
 if(str_detect(my_feed_data$feed_title[1], "Mastodon")) {
    my_feed_data <- my_feed_data |>
      mutate(
        item_author = str_replace_all(item_link, "^.*?/(@.*?)/.*?$", "1"),
        item_title = if_else(str_detect(item_author, "@"), paste0("Mastodon Post by ", item_author), "Mastodon Post")
      )
 }  
 my_feed_data <- my_feed_data |>
  select(Headline = item_title, Date = item_pub_date, URL = item_link, Description = item_description,  Feed = feed_title) |>
  mutate(
    Description = str_remove_all(Description, "To read this article in full, please click here"),
    Description = purrr::map_chr(Description, trim_if_too_long),
    Description = str_glue("{Description},  <a target='_blank' href='https://www.infoworld.com/{URL}' rel="noopener"> >></a>"),
    Date = format_ISO8601(Date, precision = "ymdhm"),
    Date = str_replace_all(Date, "T", " ")
    )    
return(my_feed_data)  
}
kgilds commented 1 week ago
wrangle_feed_safely <- possibly(wrangle_feed, otherwise = NULL)
kgilds commented 1 week ago
mydata <- map_df(myfeeds$feed_url, wrangle_feed_safely) |>
  arrange(desc(Date))
kgilds commented 1 week ago
mydata <- map_df(myfeeds$feed_url, wrangle_feed_safely) |>
  arrange(desc(Date))