kevinzg / facebook-scraper

Scrape Facebook public pages without an API key
MIT License
2.42k stars 631 forks source link

Reactors info isn't scraping #1121

Open muhammad-faizan087 opened 1 week ago

muhammad-faizan087 commented 1 week ago

below is the code which will give an output file containing all the data about a particular post but i'm getting null value for reactors, i tried updating the module, setting up waits but nothing changes

import sys import json import time import argparse import logging import facebook_scraper as fs import requests

Set up logging for debugging

logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.StreamHandler()], )

logging.debug("Starting the Facebook scraper script...")

Load custom headers from mbasicHeaders.json

headers = {} try: with open( "/home/buyfans/domains/buyfans.pl/public_html/scraper2024-2/new_venv/bin/mbasicHeaders.json", "r", ) as file: headers = json.load(file) logging.debug("Headers loaded successfully.") except FileNotFoundError: logging.error("mbasicHeaders.json file not found. Proceeding without headers.") except json.JSONDecodeError as e: logging.error(f"Error decoding JSON in headers: {e}")

Argument parsing

parser = argparse.ArgumentParser() parser.add_argument("-pid", "--post-id", help="Post ID (URL)", required=True) parser.add_argument("-f", "--output-file", help="Output file", required=True) parser.add_argument("-c", "--cookies", help="Cookies file", required=True) args = parser.parse_args()

logging.debug(f"Post ID (URL): {args.post_id}") logging.debug(f"Output file: {args.output_file}") logging.debug(f"Cookies file: {args.cookies}")

Function to handle retries in case of connection issues or failures

def fetch_post_with_retries(post_url, options, cookies, headers, retries=3, delay=5): for attempt in range(retries): try:

Start scraping the Facebook post

        logging.debug(f"Starting scraping for post URL: {post_url}")

        # Scrape the post using facebook_scraper
        gen = fs.get_posts(post_urls=[post_url], options=options, cookies=cookies)
        post_data = next(gen)
        # print(post_data)
        logging.debug(f"Successfully scraped data: {post_data}")
        return post_data

    except requests.exceptions.RequestException as e:
        logging.error(
            f"Error fetching post: {e}, retrying ({attempt + 1}/{retries})..."
        )
        time.sleep(delay)  # Wait before retrying
    except StopIteration:
        logging.error(f"No data found for the post URL: {post_url}")
        return None
logging.error(f"Failed to fetch the post after {retries} retries.")
return None

Options to ensure we retrieve complete data

options = { "reactors": True, # Fetch reactors (people who reacted) "reactions": True, # Fetch reactions data (like, love, etc.) "comments": True, # Fetch comments "comments_full": True, # Fetch the full comment thread "allow_extra_requests": True, # Enable additional requests for more data (shares, etc.) }

try:

Fetch the post data with retries

post_data = fetch_post_with_retries(args.post_id, options, args.cookies, headers)

if post_data:
    # Open output file and write the data in JSON format
    with open(args.output_file, "w") as json_file:
        logging.debug(f"Writing data to {args.output_file}")
        json.dump(post_data, json_file, default=str, indent=2)
    logging.info(f"Post data saved to {args.output_file}")
else:
    logging.error(f"Failed to scrape the post: {args.post_id}")

except Exception as e: logging.error(f"An unexpected error occurred: {e}")

kbalicki commented 1 week ago

Same problem here. Every try, always NULL next to reactors/reactions.

Has anybody succeded here?