Cell In[9], line 20, in ETL_Datapipeline()
17 attributes_container = []
19 # Using TwitterSearchScraper to scrape data and append tweets to list
---> 20 for i,tweet in enumerate(sntwitter.TwitterSearchScraper('covid since:2021-07-05 until:2022-07-06').get_items()):
21 if i>150:
22 break
File ~\anaconda3\envs\sentiment\lib\site-packages\snscrape\modules\twitter.py:915, in _TwitterAPIScraper._iter_api_data(self, endpoint, apiType, params, paginationParams, cursor, direction, instructionsPath)
913 while True:
914 _logger.info(f'Retrieving scroll page {cursor}')
--> 915 obj = self._get_api_data(endpoint, apiType, reqParams, instructionsPath = instructionsPath)
916 yield obj
918 # No data format test, just a hard and loud crash if anything's wrong :-)
File ~\anaconda3\envs\sentiment\lib\site-packages\snscrape\modules\twitter.py:886, in _TwitterAPIScraper._get_api_data(self, endpoint, apiType, params, instructionsPath)
884 if apiType is _TwitterAPIType.GRAPHQL:
885 params = urllib.parse.urlencode({k: json.dumps(v, separators = (',', ':')) for k, v in params.items()}, quote_via = urllib.parse.quote)
--> 886 r = self._get(endpoint, params = params, headers = self._apiHeaders, responseOkCallback = functools.partial(self._check_api_response, apiType = apiType, instructionsPath = instructionsPath))
887 return r._snscrapeObj
Import the snscrape library for accessing Twitter data
import snscrape.modules.twitter as sntwitter
Import the pandas library for data manipulation and analysis
import pandas as pd
Import the json library for working with JSON data
import json
Import the datetime library for working with dates and times
from datetime import datetime
def ETL_Datapipeline():
Creating list to append tweet data to
attributes_container = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('covid since:2021-07-05 until:2022-07-06').get_items()):
if i>150:
break
attributes_container.append([tweet.user.username, tweet.date, tweet.likeCount, tweet.sourceLabel, tweet.content])
# Creating a dataframe to load the list
tweets_df = pd.DataFrame(attributes_container, columns=["User", "Date Created", "Number of Likes", "Source of Tweet", "Tweet"])
#exports the contents of tweets_df to a CSV file
dataframe = tweets_df.to_csv('covid_data.csv')
Expected behaviour
A csv file with columns shown above
Screenshots and recordings
No response
Operating system
Windows 10
Python version: output of python3 --version
python 3.9.7
snscrape version: output of snscrape --version
snscrape 0.7.0.20230622
Scraper
TwitterSearchScraper
How are you using snscrape?
Module (import snscrape.modules.something in Python code)
Describe the bug
Hello im trying to scrape data from twitter the script worked fine 2 months ago by now im getting the error below Error retrieving https://twitter.com/i/api/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline?variables=%7B%22rawQuery%22%3A%22covid%20since%3A2021-07-05%20until%3A2022-07-06%22%2C%22count%22%3A20%2C%22product%22%3A%22Latest%22%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%7D&features=%7B%22rweb_lists_timeline_redesign_enabled%22%3Afalse%2C%22blue_business_profile_image_shape_enabled%22%3Afalse%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22vibe_api_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Afalse%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22interactive_text_enabled%22%3Atrue%2C%22responsive_web_text_conversations_enabled%22%3Afalse%2C%22longform_notetweets_rich_text_read_enabled%22%3Afalse%2C%22longform_notetweets_inline_media_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%2C%22responsive_web_twitter_blue_verified_badge_is_enabled%22%3Atrue%7D: blocked (404) 4 requests to https://twitter.com/i/api/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline?variables=%7B%22rawQuery%22%3A%22covid%20since%3A2021-07-05%20until%3A2022-07-06%22%2C%22count%22%3A20%2C%22product%22%3A%22Latest%22%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%7D&features=%7B%22rweb_lists_timeline_redesign_enabled%22%3Afalse%2C%22blue_business_profile_image_shape_enabled%22%3Afalse%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22vibe_api_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Afalse%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22interactive_text_enabled%22%3Atrue%2C%22responsive_web_text_conversations_enabled%22%3Afalse%2C%22longform_notetweets_rich_text_read_enabled%22%3Afalse%2C%22longform_notetweets_inline_media_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%2C%22responsive_web_twitter_blue_verified_badge_is_enabled%22%3Atrue%7D failed, giving up. Errors: blocked (403), blocked (404), blocked (404), blocked (404)
ScraperException Traceback (most recent call last) Cell In[10], line 1 ----> 1 ETL_Datapipeline()
Cell In[9], line 20, in ETL_Datapipeline() 17 attributes_container = [] 19 # Using TwitterSearchScraper to scrape data and append tweets to list ---> 20 for i,tweet in enumerate(sntwitter.TwitterSearchScraper('covid since:2021-07-05 until:2022-07-06').get_items()): 21 if i>150: 22 break
File ~\anaconda3\envs\sentiment\lib\site-packages\snscrape\modules\twitter.py:1763, in TwitterSearchScraper.get_items(self) 1760 params = {'variables': variables, 'features': features} 1761 paginationParams = {'variables': paginationVariables, 'features': features} -> 1763 for obj in self._iter_api_data('https://twitter.com/i/api/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline', _TwitterAPIType.GRAPHQL, params, paginationParams, cursor = self._cursor, instructionsPath = ['data', 'search_by_raw_query', 'search_timeline', 'timeline', 'instructions']): 1764 yield from self._graphql_timeline_instructions_to_tweets(obj['data']['search_by_raw_query']['search_timeline']['timeline']['instructions'])
File ~\anaconda3\envs\sentiment\lib\site-packages\snscrape\modules\twitter.py:915, in _TwitterAPIScraper._iter_api_data(self, endpoint, apiType, params, paginationParams, cursor, direction, instructionsPath) 913 while True: 914 _logger.info(f'Retrieving scroll page {cursor}') --> 915 obj = self._get_api_data(endpoint, apiType, reqParams, instructionsPath = instructionsPath) 916 yield obj 918 # No data format test, just a hard and loud crash if anything's wrong :-)
File ~\anaconda3\envs\sentiment\lib\site-packages\snscrape\modules\twitter.py:886, in _TwitterAPIScraper._get_api_data(self, endpoint, apiType, params, instructionsPath) 884 if apiType is _TwitterAPIType.GRAPHQL: 885 params = urllib.parse.urlencode({k: json.dumps(v, separators = (',', ':')) for k, v in params.items()}, quote_via = urllib.parse.quote) --> 886 r = self._get(endpoint, params = params, headers = self._apiHeaders, responseOkCallback = functools.partial(self._check_api_response, apiType = apiType, instructionsPath = instructionsPath)) 887 return r._snscrapeObj
File ~\anaconda3\envs\sentiment\lib\site-packages\snscrape\base.py:275, in Scraper._get(self, *args, kwargs) 274 def _get(self, *args, *kwargs): --> 275 return self._request('GET', args, kwargs)
File ~\anaconda3\envs\sentiment\lib\site-packages\snscrape\base.py:271, in Scraper._request(self, method, url, params, data, headers, timeout, responseOkCallback, allowRedirects, proxies) 269 _logger.fatal(msg) 270 _logger.fatal(f'Errors: {", ".join(errors)}') --> 271 raise ScraperException(msg) 272 raise RuntimeError('Reached unreachable code')
ScraperException: 4 requests to https://twitter.com/i/api/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline?variables=%7B%22rawQuery%22%3A%22covid%20since%3A2021-07-05%20until%3A2022-07-06%22%2C%22count%22%3A20%2C%22product%22%3A%22Latest%22%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%7D&features=%7B%22rweb_lists_timeline_redesign_enabled%22%3Afalse%2C%22blue_business_profile_image_shape_enabled%22%3Afalse%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22vibe_api_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Afalse%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22interactive_text_enabled%22%3Atrue%2C%22responsive_web_text_conversations_enabled%22%3Afalse%2C%22longform_notetweets_rich_text_read_enabled%22%3Afalse%2C%22longform_notetweets_inline_media_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%2C%22responsive_web_twitter_blue_verified_badge_is_enabled%22%3Atrue%7D failed, giving up.
How to reproduce
Import the snscrape library for accessing Twitter data
import snscrape.modules.twitter as sntwitter
Import the pandas library for data manipulation and analysis
import pandas as pd
Import the json library for working with JSON data
import json
Import the datetime library for working with dates and times
from datetime import datetime
def ETL_Datapipeline():
Creating list to append tweet data to
Expected behaviour
A csv file with columns shown above
Screenshots and recordings
No response
Operating system
Windows 10
Python version: output of
python3 --version
python 3.9.7
snscrape version: output of
snscrape --version
snscrape 0.7.0.20230622
Scraper
TwitterSearchScraper
How are you using snscrape?
Module (
import snscrape.modules.something
in Python code)Backtrace
No response
Log output
No response
Dump of locals
No response
Additional context
No response