pablobarbera / Rfacebook

Dev version of Rfacebook package: Access to Facebook API via R
http://cran.r-project.org/web/packages/Rfacebook
350 stars 250 forks source link

Error "type field is deprecated for versions v3.3 and higher" #192

Open sertal70 opened 3 years ago

sertal70 commented 3 years ago

Hi all, we are trying to retrieve posts from our public page using getPage function, but it returns the following error:

fb_page <- getPage(page="1234567890", token=fb_oauth, n=1)
Error in callAPI(url = url, token = token, api = api) :
(#12) type field is deprecated for versions v3.3 and higher

If we try forcing an older API version, the function still returns an error but with a differet message:

fb_page <- getPage(page="290577099409346", token=fb_oauth, n=1, api = 12)
Error in callAPI(url = url, token = token, api = api) :
Unknown path components: /290577099409346/posts

Has anyone run into the same issue? How could we sort it out?

Thanks!

thieled commented 2 years ago

Hi Sertal,

not sure if this is still an issue for you, but it might be an issue for others. As far as I know this error occurs due to a discontiuation of an older version of the Facebook Graph API (< 3.2) that Rfacbook was written for.

The following code loads the Rfacebook package, some helper functions as defined in Rfacebook, and defines the function "getPage33" that works (almost) like the getPage function of Rfacebook. Note that this is a rather quick and dirty fix as I did not manage to fix the function to return reations as well.

library(Rfacebook) 

# some helper functions, necessary to be loaded in the environment so that the new getPage33 function works
unlistWithNA <- function(lst, field){
  if (length(field)==1){
    notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field]])))
    vect <- rep(NA, length(lst))
    vect[notnulls] <- unlist(lapply(lst, function(x) x[[field]]))
  }
  if (length(field)==2){
    notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]])))
    vect <- rep(NA, length(lst))
    vect[notnulls] <- unlist(lapply(lst, function(x) x[[field[1]]][[field[2]]]))
  }
  if (field[1]=="shares"){
    notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]])))
    vect <- rep(0, length(lst))
    vect[notnulls] <- unlist(lapply(lst, function(x) x[[field[1]]][[field[2]]]))
  }
  if (length(field)==3){
    notnulls <- unlist(lapply(lst, function(x) 
      tryCatch(!is.null(x[[field[1]]][[field[2]]][[field[3]]]), 
               error=function(e) FALSE)))
    vect <- rep(NA, length(lst))
    vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]]))
  }
  if (length(field)==4 & field[1]=="to"){
    notnulls <- unlist(lapply(lst, function(x) 
      tryCatch(!is.null(x[[field[1]]][[field[2]]][[as.numeric(field[3])]][[field[4]]]), 
               error=function(e) FALSE)))
    vect <- rep(NA, length(lst))
    vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[as.numeric(field[3])]][[field[4]]]))
  }
  if (field[1] %in% c("comments", "likes") & !is.na(field[2])){
    notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]][[field[3]]])))
    vect <- rep(0, length(lst))
    vect[notnulls] <- unlist(lapply(lst, function(x) x[[field[1]]][[field[2]]][[field[3]]]))
  }
  return(vect)
}

formatFbDate <- function(datestring, format="datetime") {
  if (format=="datetime"){
    date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")    
  }
  if (format=="date"){
    date <- as.Date(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")   
  }
  return(date)
}

pageDataToDF <- function(json){
  df <- data.frame(
    post_id = unlistWithNA(json, 'id'),
    from_id = unlistWithNA(json, c('from', 'id')),
    from_name = unlistWithNA(json, c('from', 'name')),
    message = unlistWithNA(json, 'message'),
    created_time = unlistWithNA(json, 'created_time'),
    # story = unlistWithNA(json, 'story'),
    likes_count = unlistWithNA(json, c('likes', 'summary', 'total_count')),
    comments_count = unlistWithNA(json, c('comments', 'summary', 'total_count')),
    shares_count = unlistWithNA(json, c('shares', 'count')),
    stringsAsFactors=F)
  return(df)
}

# New getPage function

getPage33 <- function(page, token, n=25, since=NULL, until=NULL, feed=FALSE, reactions=FALSE, 
                      verbose=TRUE, api=NULL){

  url <- paste0('https://graph.facebook.com/', page,
                '/posts?fields=from,message,created_time,attachments,story,comments.summary(true)',
                ',likes.summary(true),shares')
  if (feed){
    url <- paste0('https://graph.facebook.com/', page,
                  '/feed?fields=from,message,created_time,attachments,story,comments.summary(true)',
                  ',likes.summary(true),shares')
  }
  if (!is.null(until)){
    url <- paste0(url, '&until=', until)
  }
  if (!is.null(since)){
    url <- paste0(url, '&since=', since)
  }
  if (n<=25){
    url <- paste0(url, "&limit=", n)
  }
  if (n>25){
    url <- paste0(url, "&limit=25")
  }
  # making query
  content <- callAPI(url=url, token=token, api=api)
  l <- length(content$data); if (verbose) cat(l, "posts ")

  ## retrying 3 times if error was found
  error <- 0
  while (length(content$error_code)>0){
    cat("Error!\n")
    Sys.sleep(0.5)
    error <- error + 1
    content <- callAPI(url=url, token=token)        
    if (error==3){ stop(content$error_msg) }
  }
  if (length(content$data)==0){ 
    message("No public posts were found : ", page)
    return(data.frame())
  }

  df <- pageDataToDF(content$data)

  # sometimes posts older than 'until' are returned, so here
  # I'm making sure the function stops when that happens
  if (!is.null(since)){
    dates <- formatFbDate(df$created_time, 'date')
    mindate <- min(dates)
    sincedate <- as.Date(since)
  }
  if (is.null(since)){
    sincedate <- as.Date('1970/01/01')
    mindate <- as.Date(Sys.time())
  }

  ## paging if n>25
  if (n>25){
    df.list <- list(df)
    while (l<n & length(content$data)>0 & 
           !is.null(content$paging$`next`) & sincedate <= mindate){
      # waiting one second before making next API call...
      Sys.sleep(0.5)
      url <- content$paging$`next`
      content <- callAPI(url=url, token=token, api=api)
      l <- l + length(content$data)
      if (length(content$data)>0){ if (verbose) cat(l, "posts ") }

      ## retrying 3 times if error was found
      error <- 0
      while (length(content$error_code)>0){
        cat("Error!\n")
        Sys.sleep(0.5)
        error <- error + 1
        content <- callAPI(url=url, token=token, api=api)
        if (error==3){ stop(content$error_msg) }
      }
      new.df <- pageDataToDF(content$data)

      df.list <- c(df.list, list(new.df))
      #     
      if (!is.null(since) & nrow(new.df)>0){
        dates <- formatFbDate(new.df$created_time, 'date')
        mindate <- min(dates)
      }
    }
    df <- do.call(rbind, df.list)
  }

  # returning only those requested
  if (nrow(df)>n){
    df <- df[1:n,]
  }

  # # deleting posts after specified date
  if (!is.null(since)){
    dates <- formatFbDate(df$created_time, 'date')
    df <- df[dates>=sincedate,]
  }

  # add scrape time 
  df$scrape_time <- Sys.time()

  # adding reactions data #### Somehow does not work 
  #  if (reactions==TRUE){
  #      re = getReactions(df$id, token=token, verbose=FALSE, api=api)
  #      df <- merge(df, re, all.x=TRUE)
  # # sorting
  #      df <- df[order(df$created_time),]
  #   }
  #  
  return(df)
}

Here's an example, using the Facebook page of "Die ZEIT":

fb_oauth <- "xxxxxxxxxxxxxxxxxxxxxxxxx" ## your valid oauth key

# Calling the api
zeit_posts_df <- getPage33(page = "114803848589834",
                        token = fb_oauth,
                        since = as.POSIXct("2022-03-01", format="%Y-%m-%d"),
                        until = as.POSIXct("2022-03-09", format="%Y-%m-%d"),
                        n = 50,
                        feed = F,
                        reactions = F,
                        verbose = T,
                        api = NULL) 

zeit_posts_df # a data frame