Web scraping | sapply function | Error in readBin(5L, "raw", 65536L) : Failure when receiving data from the peer

Hello together,

I am struggling with web scraping. Chunk A works fine but Chunk B doesn't, somehow. If you could possibly run code A and B on your computer and give me a hint, what's wrong with B?

Lots of kisses to everyone who is already puzzling!

Ciocclata

___CHUNK A (works)____

library(rvest)
library(dplyr)
library(data.table)
library(lubridate)

statements = data.table(NULL)

#2005,2004,2002: (YES)

for (pages_years in c(2005,2004,2002)){
    link = paste0("https://www.federalreserve.gov/monetarypolicy/fomchistorical", pages_years , ".htm")
    page = read_html(link)

    statement_links = page %>% html_nodes(".col-md-6+ .col-md-6 p:nth-child(2) a") %>% html_attr("href") %>% paste("https://www.federalreserve.gov", ., sep="")

    get_statement = function(statement_link){
    statement_page = read_html(statement_link)
    statement = statement_page %>% html_nodes("td p") %>% html_text() %>% paste(collapse = ",")
    return(statement)
  }
    statement = sapply(statement_links, FUN = get_statement, USE.NAMES = F)

    get_date = function(date_link){
    date_page = read_html(date_link)
    date = date_page %>% html_nodes("i") %>% html_text()
    return(date)
  }
    date = sapply(statement_links, FUN = get_date, USE.NAMES = F)
    date = format(mdy(date), "%Y-%m-%d")

    print(paste("Page:", pages_years))

    statements = rbind(statements, data.table(date, statement, stringsAsFactors = F))
}

DT = statements
DT = DT[date<=2021]
DT = DT[order(date)]

View(DT)

___CHUNK B (doesn't work)__

library(rvest)
library(dplyr)
library(data.table)
library(lubridate)

statements = data.table(NULL)

#2010,2008,2007: (NO)

for (pages_years in c(2010,2008,2007)){
    link = paste0("https://www.federalreserve.gov/monetarypolicy/fomchistorical", pages_years , ".htm")
    page = read_html(link)

    statement_links = page %>% html_nodes(".col-md-12 p:nth-child(1) a , .col-md-6+ .col-md-6 p:nth-child(2) a") %>% html_attr("href") %>% paste("https://www.federalreserve.gov", ., sep="")

    get_statement = function(statement_link){
    statement_page = read_html(statement_link)
    statement = statement_page %>% html_nodes(".hidden-sm+ .col-md-8") %>% html_text()
    return(statement)
  }
    statement = sapply(statement_links, FUN = get_statement, USE.NAMES = F)
    get_date = function(date_link){
    date_page = read_html(date_link)
    date = date_page %>% html_nodes(".article__time") %>% html_text()
    return(date)
  }
    date = sapply(statement_links, FUN = get_date, USE.NAMES = F)
    date = format(mdy(date), "%Y-%m-%d")
    print(paste("Page:", pages_years))
    statements = rbind(statements, data.frame(date, statement, stringsAsFactors = F))
}

DT = statements
DT = DT[date<=2021]
DT = DT[order(date)]

View(DT)

ropensci / EML

Web scraping | sapply function | Error in readBin(5L, "raw", 65536L) : Failure when receiving data from the peer #337