Closed christrt9 closed 2 years ago
I am trying to run tasks_to_run.py but it never finishes. I get this error
scheduled_tasks_1 | Traceback (most recent call last): scheduled_tasks_1 | File "tasks_to_run.py", line 151, in <module> scheduled_tasks_1 | scrape_twitter_posts.main() scheduled_tasks_1 | File "/code/scheduled_tasks/twitter/scrape_trending_posts.py", line 17, in main scheduled_tasks_1 | json_response = connect_to_endpoint(url) scheduled_tasks_1 | File "/code/scheduled_tasks/twitter/twitter_connection.py", line 25, in connect_to_endpoint scheduled_tasks_1 | response.status_code, response.text scheduled_tasks_1 | Exception: Request returned an errors: 429 {"title":"Too Many Requests","detail":"Too Many Requests","type":"about:blank","status":429} stocksera_scheduled_tasks_1 exited with code 1
Any ideas how to make it to finish all the script.. thanks
you can import time and set a timer to ensure twitter dosnt timeout scraping files. This is what I do this so I do not run into any connection issues
scheduled_tasks/twitter/get_twitter_followers.py
import os
import sys
import time
import sqlite3
from datetime import datetime
sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
from scheduled_tasks.twitter.twitter_connection import *
conn = sqlite3.connect(r"database/database.db", check_same_thread=False)
db = conn.cursor()
# key of the dict is the symbol of the ticker, while the value is the username of the Twitter account
interested_accounts = {
"MARA": "MarathonDigitalHoldings",
"TSM": "TaiwanSemiconductor",
"LODE": "ComstockMining",
"USAS": "AmericasGoldandSilverCorporation",
"CLSK": "CleanSpark",
"CYDY": "CytoDyn",
"MRNA": "moderna_tx",
"PFE": "pfizer_news",
"AMC": "AMCTheatres",
"CLOV": "CloverHealth",
"BB": "BlackBerry",
"AMD": "AMD",
"UWMC": "UWMlending",
"NIO": "NIO",
"TSLA": "Tesla",
"AAPL": "Apple",
"NOK": "Nokia",
"NVDA": "Nvidia",
"MSFT": "Microsoft",
"RBLX": "Roblox",
"F": "Ford",
"PLTR": "PalantirTech",
"COIN": "CoinBase",
"RKT": "RocketCompanies",
"MVIS": "MicroVision",
"FUBO": "fuboTV",
"VIAC": "ViacomCBS",
"SNDL": "sundialcannabis",
"SPCE": "virgingalactic",
"SNAP": "Snapchat",
"OCGN": "Ocugen",
"ROKU": "Roku",
"BABA": "AlibabaGroup",
"SE": "SeaGroup",
"EXPR": "express",
"SOFI": "SoFi",
"WKHS": "Workhorse_Group",
"TLRY": "tilray",
"WISH": "WishShopping",
"CLF": "CliffsNR",
"GOEV": "canoo",
"DKNG": "DraftKings",
"AMZN": "amazon",
"TWTR": "Twitter",
"FB": "Facebook",
"PYPL": "PayPal",
"SQ": "Square",
"XPEV": "XPengMotors",
"NKLA": "nikolamotor",
"BNGO": "bionanogenomics",
"SKLZ": "SKLZ",
"CRSR": "CORSAIR",
"CRSP": "CRISPRTX",
"XELA": "ExelaTech",
"MMAT": "Metamaterialtec",
"HOOD": "RobinhoodApp",
"LCID": "LucidMotors",
"NVAX": "Novavax",
"MRNA": "moderna_tx",
"NFLX": "Netflix",
"BA": "Boeing",
"GOOG": "Google",
"GOOGL": "Google",
"BAC": "BankofAmerica",
"BNTX": "BioNTech_Group",
"DIS": "Disney",
"SBUX": "Starbucks",
"INTC": "intel",
"AAL": "AmericanAir",
"COKE": "CocaCola",
"MCD": "McDonalds",
"C": "Citi",
"T": "ATT",
"V": "Visa",
"PEP": "pepsi",
"NKE": "Nike",
"JPM": "jpmorgan",
"ADBE": "Adobe",
"WMT": "Walmart",
"IBM": "IBM",
"GS": "GoldmanSachs",
"SHOP": "Shopify",
"TWLO": "Twilio",
"Z": "zillow",
"CRWD": "CrowdStrike",
"SNOW": "SnowflakeDB",
"NET": "Cloudflare",
"WEN": "Wendys",
"DPZ": "dominos",
"PINS": "Pinterest",
"ORCL": "Oracle",
"UA": "UnderArmour",
"LUMN": "lumentechco",
"JD": "JD_Corporate",
"CSCO": "Cisco",
"JNJ": "JNJNews",
"ZM": "Zoom",
"SPOT": "Spotify",
"MSTR": "MicroStrategy",
"UBER": "UBER",
"CRM": "salesforce",
"AXP": "AmericanExpress",
"GM": "GM",
"GE": "generalelectric",
"HD": "HomeDepot",
"IPB": "MerrillLynch",
"WFC": "wellsfargo",
"ABT": "abbottglobal",
"EXC": "exelon",
"GPS": "gap",
"ODP": "OfficeDepot",
"STX": "SEAGATE",
"XLNX": "XilinxInc",
"S": "SentinelOne",
"RIDE": "LordstownMotors",
"RACE": "ScuderiaFerrari",
"TM": "Toyota",
"MU": "MicronTech",
"QCOM": "Qualcomm",
"STM": "ST_World",
"AMCX": "AMC_TV",
"MANU": "ManUtd",
"CIDM": "Cinedigm",
"BBY": "BestBuy",
"BBBY": "BedBathBeyond",
"BLNK": "BlinkCharging",
"BODY": "Beachbody",
"TTM": "TataMotors",
"TTD": "TheTradeDesk",
"MCFE": "McAfee",
"CHWY": "Chewy",
"UPST": "Upstart",
"DB": "DeutscheBank",
"MDB": "MongoDB",
"NEGG": "Newegg",
"PTRA": "Proterra_Inc",
"PTON": "onepeloton",
"FSLY": "fastly",
"SENS": "senseonics",
"WOOF": "Petco",
"AI": "C3_AI",
"PSFE": "PlugIntoPaysafe",
"RIOT": "RiotBlockchain",
"FUTU": "moomooApp",
"LAZR": "luminartech",
"PDD": "PinduoduoInc",
"BARK": "barkbox",
"EBAY": "eBay",
"LYFT": "lyft",
}
date_updated = str(datetime.now()).split()[0]
def main():
for symbol, account in interested_accounts.items():
url = "https://api.twitter.com/1.1/users/show.json?screen_name={}".format(account)
json_response = connect_to_endpoint(url)
print("Twitter account of: ", symbol, json_response["followers_count"])
db.execute("INSERT OR IGNORE INTO twitter_followers VALUES (?, ?, ?)",
(symbol, json_response["followers_count"], date_updated))
conn.commit()
time.sleep(1)
if __name__ == "__main__":
main()
scheduled_tasks/twitter/scrape_trending_posts.py
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
from scheduled_tasks.get_popular_tickers import *
from scheduled_tasks.twitter.twitter_connection import *
from scheduled_tasks.reddit.reddit_utils import *
def main():
all_symbols = list(get_mapping_coins().keys())
all_symbols.extend(full_ticker_list())
for symbol in all_symbols:
if len(symbol) > 1:
url = f"https://api.twitter.com/2/tweets/counts/recent?query={symbol}&granularity=day"
json_response = connect_to_endpoint(url)
print(symbol)
for i in json_response["data"]:
start_date = i["start"]
end_date = i["end"]
if end_date.endswith("00:00:00.000Z"):
tweet_count = i["tweet_count"]
db.execute("INSERT OR IGNORE INTO twitter_trending VALUES (?, ?, ?)",
(symbol, tweet_count, start_date.split("T")[0]))
conn.commit()
time.sleep(1)
if __name__ == "__main__":
main()
thanks :)
I am trying to run tasks_to_run.py but it never finishes. I get this error
Any ideas how to make it to finish all the script.. thanks