guanquann / Stocksera

Finance application that provides more than 60 different alternative data to retail investors
MIT License
639 stars 102 forks source link

twitter Too Many Requests #24

Closed christrt9 closed 2 years ago

christrt9 commented 2 years ago

I am trying to run tasks_to_run.py but it never finishes. I get this error

scheduled_tasks_1  | Traceback (most recent call last):
scheduled_tasks_1  |   File "tasks_to_run.py", line 151, in <module>
scheduled_tasks_1  |     scrape_twitter_posts.main()
scheduled_tasks_1  |   File "/code/scheduled_tasks/twitter/scrape_trending_posts.py", line 17, in main
scheduled_tasks_1  |     json_response = connect_to_endpoint(url)
scheduled_tasks_1  |   File "/code/scheduled_tasks/twitter/twitter_connection.py", line 25, in connect_to_endpoint
scheduled_tasks_1  |     response.status_code, response.text
scheduled_tasks_1  | Exception: Request returned an errors: 429 {"title":"Too Many Requests","detail":"Too Many Requests","type":"about:blank","status":429}
stocksera_scheduled_tasks_1 exited with code 1

Any ideas how to make it to finish all the script.. thanks

CodeInFilth commented 2 years ago

I am trying to run tasks_to_run.py but it never finishes. I get this error

scheduled_tasks_1  | Traceback (most recent call last):
scheduled_tasks_1  |   File "tasks_to_run.py", line 151, in <module>
scheduled_tasks_1  |     scrape_twitter_posts.main()
scheduled_tasks_1  |   File "/code/scheduled_tasks/twitter/scrape_trending_posts.py", line 17, in main
scheduled_tasks_1  |     json_response = connect_to_endpoint(url)
scheduled_tasks_1  |   File "/code/scheduled_tasks/twitter/twitter_connection.py", line 25, in connect_to_endpoint
scheduled_tasks_1  |     response.status_code, response.text
scheduled_tasks_1  | Exception: Request returned an errors: 429 {"title":"Too Many Requests","detail":"Too Many Requests","type":"about:blank","status":429}
stocksera_scheduled_tasks_1 exited with code 1

Any ideas how to make it to finish all the script.. thanks

you can import time and set a timer to ensure twitter dosnt timeout scraping files. This is what I do this so I do not run into any connection issues

First edit the twitter followers file:

scheduled_tasks/twitter/get_twitter_followers.py

import os
import sys
import time 
import sqlite3
from datetime import datetime

sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
from scheduled_tasks.twitter.twitter_connection import *

conn = sqlite3.connect(r"database/database.db", check_same_thread=False)
db = conn.cursor()

# key of the dict is the symbol of the ticker, while the value is the username of the Twitter account
interested_accounts = {
    "MARA": "MarathonDigitalHoldings",
    "TSM": "TaiwanSemiconductor",
    "LODE": "ComstockMining",
    "USAS": "AmericasGoldandSilverCorporation",
    "CLSK": "CleanSpark",
    "CYDY": "CytoDyn",
    "MRNA": "moderna_tx",
    "PFE": "pfizer_news",
    "AMC": "AMCTheatres",
    "CLOV": "CloverHealth",
    "BB": "BlackBerry",
    "AMD": "AMD",
    "UWMC": "UWMlending",
    "NIO": "NIO",
    "TSLA": "Tesla",
    "AAPL": "Apple",
    "NOK": "Nokia",
    "NVDA": "Nvidia",
    "MSFT": "Microsoft",
    "RBLX": "Roblox",
    "F": "Ford",
    "PLTR": "PalantirTech",
    "COIN": "CoinBase",
    "RKT": "RocketCompanies",
    "MVIS": "MicroVision",
    "FUBO": "fuboTV",
    "VIAC": "ViacomCBS",
    "SNDL": "sundialcannabis",
    "SPCE": "virgingalactic",
    "SNAP": "Snapchat",
    "OCGN": "Ocugen",
    "ROKU": "Roku",
    "BABA": "AlibabaGroup",
    "SE": "SeaGroup",
    "EXPR": "express",
    "SOFI": "SoFi",
    "WKHS": "Workhorse_Group",
    "TLRY": "tilray",
    "WISH": "WishShopping",
    "CLF": "CliffsNR",
    "GOEV": "canoo",
    "DKNG": "DraftKings",
    "AMZN": "amazon",
    "TWTR": "Twitter",
    "FB": "Facebook",
    "PYPL": "PayPal",
    "SQ": "Square",
    "XPEV": "XPengMotors",
    "NKLA": "nikolamotor",
    "BNGO": "bionanogenomics",
    "SKLZ": "SKLZ",
    "CRSR": "CORSAIR",
    "CRSP": "CRISPRTX",
    "XELA": "ExelaTech",
    "MMAT": "Metamaterialtec",
    "HOOD": "RobinhoodApp",
    "LCID": "LucidMotors",
    "NVAX": "Novavax",
    "MRNA": "moderna_tx",
    "NFLX": "Netflix",
    "BA": "Boeing",
    "GOOG": "Google",
    "GOOGL": "Google",
    "BAC": "BankofAmerica",
    "BNTX": "BioNTech_Group",
    "DIS": "Disney",
    "SBUX": "Starbucks",
    "INTC": "intel",
    "AAL": "AmericanAir",
    "COKE": "CocaCola",
    "MCD": "McDonalds",
    "C": "Citi",
    "T": "ATT",
    "V": "Visa",
    "PEP": "pepsi",
    "NKE": "Nike",
    "JPM": "jpmorgan",
    "ADBE": "Adobe",
    "WMT": "Walmart",
    "IBM": "IBM",
    "GS": "GoldmanSachs",
    "SHOP": "Shopify",
    "TWLO": "Twilio",
    "Z": "zillow",
    "CRWD": "CrowdStrike",
    "SNOW": "SnowflakeDB",
    "NET": "Cloudflare",
    "WEN": "Wendys",
    "DPZ": "dominos",
    "PINS": "Pinterest",
    "ORCL": "Oracle",
    "UA": "UnderArmour",
    "LUMN": "lumentechco",
    "JD": "JD_Corporate",
    "CSCO": "Cisco",
    "JNJ": "JNJNews",
    "ZM": "Zoom",
    "SPOT": "Spotify",
    "MSTR": "MicroStrategy",
    "UBER": "UBER",
    "CRM": "salesforce",
    "AXP": "AmericanExpress",
    "GM": "GM",
    "GE": "generalelectric",
    "HD": "HomeDepot",
    "IPB": "MerrillLynch",
    "WFC": "wellsfargo",
    "ABT": "abbottglobal",
    "EXC": "exelon",
    "GPS": "gap",
    "ODP": "OfficeDepot",
    "STX": "SEAGATE",
    "XLNX": "XilinxInc",
    "S": "SentinelOne",
    "RIDE": "LordstownMotors",
    "RACE": "ScuderiaFerrari",
    "TM": "Toyota",
    "MU": "MicronTech",
    "QCOM": "Qualcomm",
    "STM": "ST_World",
    "AMCX": "AMC_TV",
    "MANU": "ManUtd",
    "CIDM": "Cinedigm",
    "BBY": "BestBuy",
    "BBBY": "BedBathBeyond",
    "BLNK": "BlinkCharging",
    "BODY": "Beachbody",
    "TTM": "TataMotors",
    "TTD": "TheTradeDesk",
    "MCFE": "McAfee",
    "CHWY": "Chewy",
    "UPST": "Upstart",
    "DB": "DeutscheBank",
    "MDB": "MongoDB",
    "NEGG": "Newegg",
    "PTRA": "Proterra_Inc",
    "PTON": "onepeloton",
    "FSLY": "fastly",
    "SENS": "senseonics",
    "WOOF": "Petco",
    "AI": "C3_AI",
    "PSFE": "PlugIntoPaysafe",
    "RIOT": "RiotBlockchain",
    "FUTU": "moomooApp",
    "LAZR": "luminartech",
    "PDD": "PinduoduoInc",
    "BARK": "barkbox",
    "EBAY": "eBay",
    "LYFT": "lyft",
}
date_updated = str(datetime.now()).split()[0]

def main():
    for symbol, account in interested_accounts.items():
        url = "https://api.twitter.com/1.1/users/show.json?screen_name={}".format(account)
        json_response = connect_to_endpoint(url)
        print("Twitter account of: ", symbol, json_response["followers_count"])
        db.execute("INSERT OR IGNORE INTO twitter_followers VALUES (?, ?, ?)",
                   (symbol, json_response["followers_count"], date_updated))
        conn.commit()
        time.sleep(1)

if __name__ == "__main__":
    main()

Second I edit the twitter followers file:

scheduled_tasks/twitter/scrape_trending_posts.py

import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
from scheduled_tasks.get_popular_tickers import *
from scheduled_tasks.twitter.twitter_connection import *
from scheduled_tasks.reddit.reddit_utils import *

def main():
    all_symbols = list(get_mapping_coins().keys())
    all_symbols.extend(full_ticker_list())

    for symbol in all_symbols:
        if len(symbol) > 1:
            url = f"https://api.twitter.com/2/tweets/counts/recent?query={symbol}&granularity=day"
            json_response = connect_to_endpoint(url)
            print(symbol)
            for i in json_response["data"]:
                start_date = i["start"]
                end_date = i["end"]
                if end_date.endswith("00:00:00.000Z"):
                    tweet_count = i["tweet_count"]
                    db.execute("INSERT OR IGNORE INTO twitter_trending VALUES (?, ?, ?)",
                               (symbol, tweet_count, start_date.split("T")[0]))
                    conn.commit()
    time.sleep(1)

if __name__ == "__main__":
    main()
christrt9 commented 2 years ago

thanks :)