MartinKBeck / TwitterScraper

Repository containing all files relevant to my basic and advanced tweet scraping articles.
199 stars 117 forks source link

scraper exception #14

Closed kiloppertry closed 1 year ago

kiloppertry commented 1 year ago
import os
os.environ["http_proxy"] = "http://127.0.0.1:56916"
os.environ["https_proxy"] = "http://127.0.0.1:56916"
import snscrape.modules.twitter as sntwitter
from transformers import pipeline
import pandas as pd
from tqdm import tqdm
#抓取某一用户数据

# Creating list to append tweet data 
tweets_list1 = []

# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('from:QCompounding').get_items()): # CharlieMunger00 Mayhem4Markets QCompounding
    if i>20: #number of tweets you want to scrape
        break
    tweets_list1.append([tweet.date,  tweet.content, tweet.user.username, tweet.likeCount, tweet.user.displayname, tweet.lang,tweet.hashtags,tweet.mentionedUsers,tweet.inReplyToUser,tweet.quotedTweet,tweet.retweetedTweet,tweet.media])

# Creating a dataframe from the tweets list above
tweets_df1 = pd.DataFrame(tweets_list1, columns=['Datetime',  'Text', 'Username', 'Like Count', 'Display Name', 'Language','hashtags','mentionedUsers','inReplyToUser','quotedTweet','retweetedTweet','media'])

tf=tweets_df1[tweets_df1['inReplyToUser'].isnull()]

from urllib.request import urlretrieve
tf=tweets_df1[tweets_df1['media'].isnull()==False]
for i in range(tf.shape[0]):
    try:
        kk=str(i)+'i'
        urlretrieve(tf.iloc[i,-1][0].fullUrl, "d:/data/photo2/{}.jpg".format(kk))  
    except:
        continue

`File "e:\temp\ipykernel_16024\2936908550.py", line 14, in <cell line: 14> for i,tweet in enumerate(sntwitter.TwitterSearchScraper('from:QCompounding').get_items()): # CharlieMunger00 Mayhem4Markets QCompounding

File "D:\anaconda3\envs\tensorflow\lib\site-packages\snscrape\modules\twitter.py", line 680, in get_items for obj in self._iter_api_data('https://api.twitter.com/2/search/adaptive.json', params, paginationParams, cursor = self._cursor):

File "D:\anaconda3\envs\tensorflow\lib\site-packages\snscrape\modules\twitter.py", line 369, in _iter_api_data obj = self._get_api_data(endpoint, reqParams)

File "D:\anaconda3\envs\tensorflow\lib\site-packages\snscrape\modules\twitter.py", line 338, in _get_api_data self._ensure_guest_token()

File "D:\anaconda3\envs\tensorflow\lib\site-packages\snscrape\modules\twitter.py", line 301, in _ensure_guest_token r = self._get(self._baseUrl if url is None else url, headers = {'User-Agent': self._userAgent}, responseOkCallback = self._check_guest_token_response)

File "D:\anaconda3\envs\tensorflow\lib\site-packages\snscrape\base.py", line 216, in _get return self._request('GET', *args, **kwargs)

File "D:\anaconda3\envs\tensorflow\lib\site-packages\snscrape\base.py", line 212, in _request raise ScraperException(msg)

ScraperException: 4 requests to https://twitter.com/search?f=live&lang=en&q=from%3AQCompounding&src=spelling_expansion_revert_click failed, giving up.`

kiloppertry commented 1 year ago

can you help me find out the problem,please?

MartinKBeck commented 1 year ago

Snscrape has an ongoing issue. This is beyond me. It's mentioned in their GitHub and unfortunately doesn't appear there's going to be a possible fix https://github.com/JustAnotherArchivist/snscrape/issues/996