shaikhsajid1111 / facebook_page_scraper

Scrapes facebook's pages front end with no limitations & provides a feature to turn data into structured JSON or CSV
https://pypi.org/project/facebook-page-scraper/
MIT License
209 stars 62 forks source link

'tuple' object has no attribute 'tb_frame' #94

Open lorellav opened 4 months ago

lorellav commented 4 months ago

Hello, I run in the following issue

'tuple' object has no attribute 'tb_frame'

while running this code

from facebook_page_scraper import Facebook_scraper

page_list = ['KimKardashian','arnold','joebiden','eminem','smosh','SmoshGames','ibis','Metallica','cnn']

proxy_port = 10001

posts_count = 5000
browser = "firefox"

timeout = 600 #600 seconds
headless = False
directory = "C:\\facebook_scrape_results"

for page in page_list:

proxy = f'myusername,mypassword@us.smartproxy.com:{proxy_port}

scraper = Facebook_scraper(page, posts_count, browser, proxy=proxy, timeout=timeout, headless=headless) 
filename = page
scraper.scrap_to_csv(filename, directory)

proxy_port += 1`

I'm running this code in Jupyter NB. This is me env

IPython : 8.12.0 ipykernel : 6.19.2 ipywidgets : 8.0.6 jupyter_client : 8.1.0 jupyter_core : 5.3.0 jupyter_server : 1.23.4 jupyterlab : not installed nbclient : 0.5.13 nbconvert : 6.5.4 nbformat : 5.7.0 notebook : 6.5.3 qtconsole : 5.4.3 traitlets : 5.7.1

And this is the full error message

TimeoutException                          Traceback (most recent call last)
File ~\AppData\Local\miniconda3\lib\site-packages\facebook_page_scraper\driver_utilities.py:131, in 
Utilities.__wait_for_element_to_appear(driver, layout)
130         body.send_keys(Keys.PAGE_DOWN)
--> 131     WebDriverWait(driver, 30).until(EC.presence_of_element_located(
132         (By.CSS_SELECTOR, '.userContentWrapper')))
133 elif layout == "new":

File ~\AppData\Local\miniconda3\lib\site-packages\selenium\webdriver\support\wait.py:89, in WebDriverWait.until(self, method, message)
     88         break
---> 89 raise TimeoutException(message, screen, stacktrace)

TimeoutException: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:191:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:509:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

During handling of the above exception, another exception occurred:

SystemExit                                Traceback (most recent call last)
    [... skipping hidden 1 frame]

Cell In[16], line 28
     27 filename = page
---> 28 scraper.scrap_to_csv(filename, directory)
     30 # Rotating our proxy to the next port so we could get a new IP and avoid blocks

File ~\AppData\Local\miniconda3\lib\site-packages\facebook_page_scraper\scraper.py:154, in Facebook_scraper.scrap_to_csv(self, filename, directory)
    153 try:
--> 154     data = self.scrap_to_json()  # get the data in JSON format from the same class method
    155     # convert it and write to CSV

File ~\AppData\Local\miniconda3\lib\site-packages\facebook_page_scraper\scraper.py:93, in Facebook_scraper.scrap_to_json(self)
     92 # wait for post to load
---> 93 Utilities._Utilities__wait_for_element_to_appear(
     94     self.__driver, self.__layout)
     95 # scroll down to bottom most

File ~\AppData\Local\miniconda3\lib\site-packages\facebook_page_scraper\driver_utilities.py:142, in Utilities.__wait_for_element_to_appear(driver, layout)
    141     # exit the program, because if posts does not exists,we cannot go further
--> 142     sys.exit(1)
    143 except Exception as ex:

SystemExit: 1

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
    [... skipping hidden 1 frame]

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\interactiveshell.py:2092, in InteractiveShell.showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
   2089 if exception_only:
   2090     stb = ['An exception has occurred, use %tb to see '
   2091            'the full traceback.\n']
-> 2092     stb.extend(self.InteractiveTB.get_exception_only(etype,
   2093                                                      value))
   2094 else:
   2095     try:
   2096         # Exception classes can customise their traceback - we
   2097         # use this in IPython.parallel for exceptions occurring
   2098         # in the engines. This should return a list of strings.

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\ultratb.py:644, in ListTB.get_exception_only(self, etype, value)
    636 def get_exception_only(self, etype, value):
    637     """Only print the exception type and message, without a traceback.
    638 
    639     Parameters
   (...)
    642     value : exception value
    643     """
--> 644     return ListTB.structured_traceback(self, etype, value)

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\ultratb.py:511, in ListTB.structured_traceback(self, etype, evalue, etb, tb_offset, context)
    508     chained_exc_ids.add(id(exception[1]))
    509     chained_exceptions_tb_offset = 0
    510     out_list = (
--> 511         self.structured_traceback(
    512             etype, evalue, (etb, chained_exc_ids),
    513             chained_exceptions_tb_offset, context)
    514         + chained_exception_message
    515         + out_list)
    517 return out_list

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\ultratb.py:1310, in AutoFormattedTB.structured_traceback(self, etype, value, tb, tb_offset, number_of_lines_of_context)
   1308 else:
   1309     self.tb = tb
-> 1310 return FormattedTB.structured_traceback(
   1311     self, etype, value, tb, tb_offset, number_of_lines_of_context)

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\ultratb.py:1199, in FormattedTB.structured_traceback(self, etype, value, tb, tb_offset, number_of_lines_of_context)
   1196 mode = self.mode
   1197 if mode in self.verbose_modes:
   1198     # Verbose modes need a full traceback
-> 1199     return VerboseTB.structured_traceback(
   1200         self, etype, value, tb, tb_offset, number_of_lines_of_context
   1201     )
   1202 elif mode == 'Minimal':
   1203     return ListTB.get_exception_only(self, etype, value)

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\ultratb.py:1052, in VerboseTB.structured_traceback(self, etype, evalue, etb, tb_offset, number_of_lines_of_context)
   1043 def structured_traceback(
   1044     self,
   1045     etype: type,
   (...)
   1049     number_of_lines_of_context: int = 5,
   1050 ):
   1051     """Return a nice text document describing the traceback."""
-> 1052     formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,
   1053                                                            tb_offset)
   1055     colors = self.Colors  # just a shorthand + quicker name lookup
   1056     colorsnormal = colors.Normal  # used a lot

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\ultratb.py:953, in VerboseTB.format_exception_as_a_whole(self, etype, evalue, etb, number_of_lines_of_context, tb_offset)
    950 assert isinstance(tb_offset, int)
    951 head = self.prepare_header(etype, self.long_header)
    952 records = (
--> 953     self.get_records(etb, number_of_lines_of_context, tb_offset) if etb else []
    954 )
    956 frames = []
    957 skipped = 0

File ~\AppData\Local\miniconda3\lib\site-packages\IPython\core\ultratb.py:1021, in VerboseTB.get_records(self, etb, number_of_lines_of_context, tb_offset)
   1019 while cf is not None:
   1020     try:
-> 1021         source_file = inspect.getsourcefile(etb.tb_frame)
   1022         lines, first = inspect.getsourcelines(etb.tb_frame)
   1023     except OSError:

AttributeError: 'tuple' object has no attribute 'tb_frame'

Thank you

Lorella.

shaikhsajid1111 commented 4 months ago

Seems like It is an issue with the waiting. Try looking at the browser after setting headless to False

lorellav commented 4 months ago

Screenshot 2024-02-12 182540 Thanks! So in the browser I get the message 'this content is not available at the moment'

shaikhsajid1111 commented 4 months ago

Okay, so there's no handling for this page in the crawler. Hence, you're getting this error

Neelesh2512 commented 3 months ago

I am getting same error with headless=True and working fine with headless=False