Open sertal70 opened 3 years ago
Hi Sertal,
not sure if this is still an issue for you, but it might be an issue for others. As far as I know this error occurs due to a discontiuation of an older version of the Facebook Graph API (< 3.2) that Rfacbook was written for.
The following code loads the Rfacebook package, some helper functions as defined in Rfacebook, and defines the function "getPage33" that works (almost) like the getPage function of Rfacebook. Note that this is a rather quick and dirty fix as I did not manage to fix the function to return reations as well.
library(Rfacebook)
# some helper functions, necessary to be loaded in the environment so that the new getPage33 function works
unlistWithNA <- function(lst, field){
if (length(field)==1){
notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field]])))
vect <- rep(NA, length(lst))
vect[notnulls] <- unlist(lapply(lst, function(x) x[[field]]))
}
if (length(field)==2){
notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]])))
vect <- rep(NA, length(lst))
vect[notnulls] <- unlist(lapply(lst, function(x) x[[field[1]]][[field[2]]]))
}
if (field[1]=="shares"){
notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]])))
vect <- rep(0, length(lst))
vect[notnulls] <- unlist(lapply(lst, function(x) x[[field[1]]][[field[2]]]))
}
if (length(field)==3){
notnulls <- unlist(lapply(lst, function(x)
tryCatch(!is.null(x[[field[1]]][[field[2]]][[field[3]]]),
error=function(e) FALSE)))
vect <- rep(NA, length(lst))
vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]]))
}
if (length(field)==4 & field[1]=="to"){
notnulls <- unlist(lapply(lst, function(x)
tryCatch(!is.null(x[[field[1]]][[field[2]]][[as.numeric(field[3])]][[field[4]]]),
error=function(e) FALSE)))
vect <- rep(NA, length(lst))
vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[as.numeric(field[3])]][[field[4]]]))
}
if (field[1] %in% c("comments", "likes") & !is.na(field[2])){
notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]][[field[3]]])))
vect <- rep(0, length(lst))
vect[notnulls] <- unlist(lapply(lst, function(x) x[[field[1]]][[field[2]]][[field[3]]]))
}
return(vect)
}
formatFbDate <- function(datestring, format="datetime") {
if (format=="datetime"){
date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")
}
if (format=="date"){
date <- as.Date(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")
}
return(date)
}
pageDataToDF <- function(json){
df <- data.frame(
post_id = unlistWithNA(json, 'id'),
from_id = unlistWithNA(json, c('from', 'id')),
from_name = unlistWithNA(json, c('from', 'name')),
message = unlistWithNA(json, 'message'),
created_time = unlistWithNA(json, 'created_time'),
# story = unlistWithNA(json, 'story'),
likes_count = unlistWithNA(json, c('likes', 'summary', 'total_count')),
comments_count = unlistWithNA(json, c('comments', 'summary', 'total_count')),
shares_count = unlistWithNA(json, c('shares', 'count')),
stringsAsFactors=F)
return(df)
}
# New getPage function
getPage33 <- function(page, token, n=25, since=NULL, until=NULL, feed=FALSE, reactions=FALSE,
verbose=TRUE, api=NULL){
url <- paste0('https://graph.facebook.com/', page,
'/posts?fields=from,message,created_time,attachments,story,comments.summary(true)',
',likes.summary(true),shares')
if (feed){
url <- paste0('https://graph.facebook.com/', page,
'/feed?fields=from,message,created_time,attachments,story,comments.summary(true)',
',likes.summary(true),shares')
}
if (!is.null(until)){
url <- paste0(url, '&until=', until)
}
if (!is.null(since)){
url <- paste0(url, '&since=', since)
}
if (n<=25){
url <- paste0(url, "&limit=", n)
}
if (n>25){
url <- paste0(url, "&limit=25")
}
# making query
content <- callAPI(url=url, token=token, api=api)
l <- length(content$data); if (verbose) cat(l, "posts ")
## retrying 3 times if error was found
error <- 0
while (length(content$error_code)>0){
cat("Error!\n")
Sys.sleep(0.5)
error <- error + 1
content <- callAPI(url=url, token=token)
if (error==3){ stop(content$error_msg) }
}
if (length(content$data)==0){
message("No public posts were found : ", page)
return(data.frame())
}
df <- pageDataToDF(content$data)
# sometimes posts older than 'until' are returned, so here
# I'm making sure the function stops when that happens
if (!is.null(since)){
dates <- formatFbDate(df$created_time, 'date')
mindate <- min(dates)
sincedate <- as.Date(since)
}
if (is.null(since)){
sincedate <- as.Date('1970/01/01')
mindate <- as.Date(Sys.time())
}
## paging if n>25
if (n>25){
df.list <- list(df)
while (l<n & length(content$data)>0 &
!is.null(content$paging$`next`) & sincedate <= mindate){
# waiting one second before making next API call...
Sys.sleep(0.5)
url <- content$paging$`next`
content <- callAPI(url=url, token=token, api=api)
l <- l + length(content$data)
if (length(content$data)>0){ if (verbose) cat(l, "posts ") }
## retrying 3 times if error was found
error <- 0
while (length(content$error_code)>0){
cat("Error!\n")
Sys.sleep(0.5)
error <- error + 1
content <- callAPI(url=url, token=token, api=api)
if (error==3){ stop(content$error_msg) }
}
new.df <- pageDataToDF(content$data)
df.list <- c(df.list, list(new.df))
#
if (!is.null(since) & nrow(new.df)>0){
dates <- formatFbDate(new.df$created_time, 'date')
mindate <- min(dates)
}
}
df <- do.call(rbind, df.list)
}
# returning only those requested
if (nrow(df)>n){
df <- df[1:n,]
}
# # deleting posts after specified date
if (!is.null(since)){
dates <- formatFbDate(df$created_time, 'date')
df <- df[dates>=sincedate,]
}
# add scrape time
df$scrape_time <- Sys.time()
# adding reactions data #### Somehow does not work
# if (reactions==TRUE){
# re = getReactions(df$id, token=token, verbose=FALSE, api=api)
# df <- merge(df, re, all.x=TRUE)
# # sorting
# df <- df[order(df$created_time),]
# }
#
return(df)
}
Here's an example, using the Facebook page of "Die ZEIT":
fb_oauth <- "xxxxxxxxxxxxxxxxxxxxxxxxx" ## your valid oauth key
# Calling the api
zeit_posts_df <- getPage33(page = "114803848589834",
token = fb_oauth,
since = as.POSIXct("2022-03-01", format="%Y-%m-%d"),
until = as.POSIXct("2022-03-09", format="%Y-%m-%d"),
n = 50,
feed = F,
reactions = F,
verbose = T,
api = NULL)
zeit_posts_df # a data frame
Hi all, we are trying to retrieve posts from our public page using
getPage
function, but it returns the following error:If we try forcing an older API version, the function still returns an error but with a differet message:
Has anyone run into the same issue? How could we sort it out?
Thanks!