Open rosecongou opened 4 years ago
version = "-V3" datestring = datetime.now().strftime("%y%m%d-%H%M%S") output_folder = "Reddit-Scrapes-" + datestring + version for subreddit in subreddits: make_directory(output_folder + "/" + subreddit) for year in range(start, end): fail = open("fail.txt", "a") submissionWriter = pandas.ExcelWriter(output_folder + "/" + subreddit + '/' + subreddit + ' ' + str(year) + " submissions.xlsx", engine="xlsxwriter") commentWriter = pandas.ExcelWriter(output_folder + "/" + subreddit + '/' + subreddit + ' ' + str(year) + " comments.xlsx", engine="xlsxwriter") for month in range(month_start, month_end): since = timestamp(year, month, 1) # first of the month; midnight before = timestamp(year + month // 12, month % 12 + 1, 1) month = date(1900, month, 1).strftime('%B') # Return month as a nice string, e.g. "January" pull_result = pull_comments(subreddit, since, before) comments = pull_result while len(pull_result) == 100: print(len(pull_result)) before = pull_result[-1]["created_utc"] pull_result = pull_comments(subreddit, since, before) if isinstance(comments, str): fail.write("subreddit: " + subreddit + "year: " + str(year) + ", month: " + str(month) + ", type: comments, " + "check: " + comments) else: comments += pull_result if isinstance(comments, list): pandas.DataFrame(comments).to_excel(commentWriter, sheet_name=month) print("Succeeded pulling comments for ", year, month, subreddit) else: pandas.DataFrame([]).to_excel(commentWriter, sheet_name=month) commentWriter.save() commentWriter.close()