Closed shdm2012 closed 4 years ago
Please share the command you run so I can try to reproduce.
It's kinda more complicated than just a command. I'll try to explain
def __init__(self, user="", full_name="", location="", blog="", date_joined="", id="", tweets=0,
following=0, followers=0, likes=0, lists=0, is_verified=0, date_birth=""):
self.user = user
self.full_name = full_name
self.location = location
self.blog = blog
self.date_joined = date_joined
self.id = id
self.tweets = tweets
self.following = following
self.followers = followers
self.likes = likes
self.lists = lists
self.is_verified = is_verified
self.date_birth = date_birth
date_birth = tag_prof_header.find("div",{"class" : "ProfileHeaderCard-birthdate"}).text
if date_birth is None:
self.date_birth = "Unknown"
else:
self.date_birth = date_birth.strip()
from twitterscraper.query import query_user_info
import pandas as pd
from multiprocessing import Pool
import time
from IPython.display import display
import numpy as np
global twitter_user_info twitter_user_info=[]
def get_user_info(twitter_user): """ An example of using the query_user_info method :param twitter_user: the twitter user to capture user data :return: twitter_user_data: returns a dictionary of twitter user data """ user_info = query_user_info(user= twitter_user) twitter_user_data = {} twitter_user_data["user"] = user_info.user twitter_user_data["fullname"] = user_info.full_name twitter_user_data["location"] = user_info.location twitter_user_data["blog"] = user_info.blog twitter_user_data["date_joined"] = user_info.date_joined twitter_user_data["id"] = user_info.id twitter_user_data["num_tweets"] = user_info.tweets twitter_user_data["following"] = user_info.following twitter_user_data["followers"] = user_info.followers twitter_user_data["likes"] = user_info.likes twitter_user_data["lists"] = user_info.lists twitter_user_data["date_birth"] = user_info.date_birth
return twitter_user_data
def main(): start = time.time()
df_nicknames = pd.read_json('tweets3.json', encoding='utf-8')
users = []
for index, row in df_nicknames.iterrows():
users.append(row['screen_name'])
#users = ['Carlos_F_Enguix', 'mmtung', 'dremio', 'MongoDB', 'JenWike', 'timberners_lee','ataspinar2', 'realDonaldTrump',
#'BarackObama', 'elonmusk', 'BillGates', 'BillClinton','katyperry','KimKardashian']
print(users)
pool = Pool(8)
for user in pool.map(get_user_info,users):
twitter_user_info.append(user)
cols=['id','fullname','date_joined','location','blog', 'num_tweets','following','followers','likes','lists', 'date_birth']
data_frame = pd.DataFrame(twitter_user_info, index=users, columns=cols)
data_frame.index.name = "Users"
data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last')
elapsed = time.time() - start
print(f"Elapsed time: {elapsed}")
display(data_frame)
if name == 'main': main()
I figured out what's wrong. The problem is that query_user_info() returns None from twitter profiles that have been deleted.
Facing this error while trying to scrape some users data. Help pls