Closed redux-project closed 3 years ago
Can you provide your config please? I suspect you're hitting github's ip-based ratelimit, which isn't really documented from what I can tell, but might happen in environments where you're behind a shared IP like a VPN or proxy.
Not hitting it through VPN or anything like that. I also downgraded to v1.1 and no error
{
"inputs": {
"pastebin":{
"enabled": true,
"module": "pastehunter.inputs.pastebin",
"api_scrape": "https://scrape.pastebin.com/api_scraping.php",
"api_raw": "https://scrape.pastebin.com/api_scrape_item.php?i=",
"paste_limit": 100,
"store_all": false
},
"dumpz": {
"enabled": false,
"comment": "This api endpoint has been removed.",
"module": "pastehunter.inputs.dumpz",
"api_scrape": "https://dumpz.org/api/recent",
"api_raw": "https://dumpz.org/api/dump",
"paste_limit": 200,
"store_all": false
},
"gists": {
"enabled": true,
"module": "pastehunter.inputs.gists",
"api_token": "removed",
"api_limit": 100,
"store_all": false,
"user_blacklist": [],
"file_blacklist": ["grahamcofborg-eval-package-list", "Changed Paths"]
},
"github": {
"enabled": false,
"module": "pastehunter.inputs.github",
"api_token": "removed",
"api_limit": 300,
"store_all": false,
"ignore_bots": false,
"user_blacklist": [],
"file_blacklist": ["node_modules/*", "__pycache__/*", "*/grahamcofborg-eval-package-list", "*.lock", "*.3ds", "*.3g2", "*.3gp", "*.7z", "*.DS_Store", "*.a", "*.aac", "*.adp", "*.ai", "*.aif", "*.aiff", "*.alz", "*.ape", "*.apk", "*.ar", "*.arj", "*.asf", "*.au", "*.avi", "*.bak", "*.bh", "*.bin", "*.bk", "*.bmp", "*.btif", "*.bz2", "*.bzip2", "*.cab", "*.caf", "*.cgm", "*.class", "*.cmx", "*.cpio", "*.cr2", "*.cur", "*.dat", "*.deb", "*.dex", "*.djvu", "*.dll", "*.dmg", "*.dng", "*.doc", "*.docm", "*.docx", "*.dot", "*.dotm", "*.dra", "*.dsk", "*.dts", "*.dtshd", "*.dvb", "*.dwg", "*.dxf", "*.ecelp4800", "*.ecelp7470", "*.ecelp9600", "*.egg", "*.eol", "*.eot", "*.epub", "*.exe", "*.f4v", "*.fbs", "*.fh", "*.fla", "*.flac", "*.fli", "*.flv", "*.fpx", "*.fst", "*.fvt", "*.g3", "*.gif", "*.graffle", "*.gz", "*.gzip", "*.h261", "*.h263", "*.h264", "*.ico", "*.ief", "*.img", "*.ipa", "*.iso", "*.jar", "*.jpeg", "*.jpg", "*.jpgv", "*.jpm", "*.jxr","*.ktx", "*.lha", "*.lvp", "*.lz", "*.lzh", "*.lzma", "*.lzo", "*.m3u", "*.m4a", "*.m4v", "*.mar", "*.mdi", "*.mht", "*.mid", "*.midi", "*.mj2", "*.mka", "*.mkv", "*.mmr", "*.mng", "*.mobi", "*.mov", "*.movie", "*.mp3", "*.mp4", "*.mp4a", "*.mpeg", "*.mpg", "*.mpga", "*.mxu", "*.nef", "*.npx", "*.numbers", "*.o", "*.oga", "*.ogg", "*.ogv", "*.otf", "*.pages", "*.pbm", "*.pcx", "*.pdf", "*.pea", "*.pgm", "*.pic", "*.png", "*.pnm", "*.pot", "*.potm", "*.potx", "*.ppa", "*.ppam", "*.ppm", "*.pps", "*.ppsm", "*.ppsx", "*.ppt", "*.pptm", "*.pptx", "*.psd", "*.pya", "*.pyc", "*.pyo", "*.pyv", "*.qt", "*.rar", "*.ras", "*.raw", "*.rgb", "*.rip", "*.rlc", "*.rmf", "*.rmvb", "*.rtf", "*.rz", "*.s3m", "*.s7z", "*.scpt", "*.sgi", "*.shar", "*.sil", "*.sketch", "*.slk", "*.smv", "*.so", "*.sub", "*.swf", "*.tar", "*.tbz", "*.tbz2", "*.tga", "*.tgz", "*.thmx", "*.tif", "*.tiff", "*.tlz", "*.ttc", "*.ttf", "*.txz", "*.udf", "*.uvh", "*.uvi", "*.uvm", "*.uvp", "*.uvs", "*.uvu", "*.viv", "*.vob", "*.war", "*.wav", "*.wax", "*.wbmp", "*.wdp", "*.weba", "*.webm", "*.webp", "*.whl", "*.wim", "*.wm", "*.wma", "*.wmv", "*.wmx", "*.woff", "*.woff2", "*.wvx", "*.xbm", "*.xif", "*.xla", "*.xlam", "*.xls", "*.xlsb", "*.xlsm", "*.xlsx", "*.xlt", "*.xltm", "*.xltx", "*.xm", "*.xmind", "*.xpi", "*.xpm", "*.xwd", "*.xz", "*.z", "*.zip", "*.zipx", "*.css", "*.scss", "*.uasset"]
},
"slexy":{
"enabled": false,
"module": "pastehunter.inputs.slexy",
"store_all": false,
"api_scrape": "http://slexy.org/recent",
"api_raw": "http://slexy.org/raw",
"api_view": "http://slexy.org/view"
},
"stackexchange":{
"enabled": false,
"module": "pastehunter.inputs.stackexchange",
"site_list": ["stackoverflow","serverfault", "superuser", "webapps", "webmasters", "dba"],
"api_key": "",
"store_filter": "!)r_ttsG0v3bE1vo3*8Ki",
"pagesize": 100,
"store_all": true,
"api_scrape": "https://api.stackexchange.com/2.2/questions"
}
},
"outputs": {
"elastic_output": {
"enabled": true,
"module": "pastehunter.outputs.elastic_output",
"classname": "ElasticOutput",
"elastic_index": "paste-test",
"elastic_host": "localhost",
"elastic_port": 9200,
"elastic_user": "elastic",
"elastic_pass": "changeme",
"elastic_ssl": false,
"weekly_index": true
},
"splunk_output": {
"enabled": false,
"module": "pastehunter.outputs.splunk_output",
"classname": "SplunkOutput",
"splunk_host": "host",
"splunk_port": 8089,
"splunk_user": "user",
"splunk_pass": "pass",
"splunk_index": "paste-test",
"splunk_sourcetype": "pastehunter",
"store_raw": true
},
"json_output": {
"enabled": false,
"module": "pastehunter.outputs.json_output",
"classname": "JsonOutput",
"output_path": "logs/json/",
"store_raw": true,
"encode_raw": true
},
"csv_output": {
"enabled": false,
"module": "pastehunter.outputs.csv_output",
"classname": "CSVOutput",
"output_path": "logs/csv/"
},
"syslog_output": {
"enabled": false,
"module": "pastehunter.outputs.syslog_output",
"classname": "SyslogOutput",
"host": "192.168.1.1",
"port": 514
},
"smtp_output": {
"enabled": false,
"module": "pastehunter.outputs.smtp_output",
"classname": "SMTPOutput",
"smtp_host": "smtp.server.com",
"smtp_port": 25,
"smtp_security": "starttls",
"smtp_user": "smtpusername",
"smtp_pass": "smtppassword",
"recipients" : {
"recipient_1": {
"address": "emailaddress that gets the alerts",
"rule_list": ["custom_keywords"],
"mandatory_rule_list": []
},
"recipient_2": {
"address": "emailaddress that gets the alerts",
"rule_list": [],
"mandatory_rule_list": ["keyword1", "keyword2"]
}
}
},
"slack_output": {
"enabled": false,
"module": "pastehunter.outputs.slack_output",
"classname": "SlackOutput",
"webhook_url": "",
"rule_list": ["custom_keywords"]
},
"http_output": {
"enabled": false,
"module": "outputs.http_output",
"classname": "HttpOutput",
"endpoint_url": "",
"headers": {},
"http_auth": false,
"http_user": "",
"http_password": "",
"ignore_fields": [],
"timezone": "Z"
},
"twilio_output": {
"enabled": false,
"module": "pastehunter.outputs.twilio_output",
"classname": "TwilioOutput",
"account_sid": "",
"auth_token": "",
"twilio_sender": "",
"recipient_list": [],
"rule_list": ["custom_keywords"]
}
},
"yara": {
"default_rules": true,
"custom_rules": "none",
"exclude_rules": [],
"blacklist": true,
"test_rules": false
},
"log": {
"log_to_file": true,
"log_file": "pastehunter",
"logging_level": 20,
"log_path": "logs",
"format": "%(asctime)s [%(threadName)-12.12s] %(levelname)s:%(message)s"
},
"general": {
"run_frequency": 300,
"process_timeout": 5
},
"sandboxes": {
"cuckoo": {
"enabled": false,
"module": "pastehunter.sandboxes.cuckoo",
"api_host": "127.0.0.1",
"api_port": 8080
},
"viper": {
"enabled": false,
"module": "pastehunter.sandboxes.viper",
"api_host": "127.0.0.1",
"api_port": 8080
}
},
"post_process": {
"post_email": {
"enabled": true,
"module": "pastehunter.postprocess.post_email",
"rule_list": ["email_filter"]
},
"post_b64": {
"enabled": true,
"module": "pastehunter.postprocess.post_b64",
"rule_list": ["b64_exe", "b64_rar", "b64_zip", "b64_gzip"]
},
"post_entropy": {
"enabled": false,
"module": "pastehunter.postprocess.post_entropy",
"rule_list": ["ALL"]
},
"post_compress": {
"enabled": false,
"module": "pastehunter.postprocess.post_compress",
"rule_list": ["ALL"]
}
}
}
Can you please try accessing https://scrape.pastebin.com/api_scraping.php
from the host being used and running this command?
curl \
-H "User-Agent: PasteHunter" \
-H "Accept: application/vnd.github.v3+json" \
=H "Authorization: token <YOUR_API_TOKEN_HERE>" \
https://api.github.com/gists/public
If this is something that's happening only after a little while, please only run these commands after you've hit the error. I want to try and determine which service is hitting the limit and why.
Odd. I fired up newest version. Right away i got the error
INFO:pastehunter-cli:Populating Queue
INFO:pastehunter-cli:Fetching paste list from pastehunter.inputs.pastebin
INFO:pastehunter-cli:Fetching paste list from pastehunter.inputs.gists
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
INFO:gists.py:Remaining Limit: 4999. Resets at 2020-07-30T05:01:32
INFO:pastehunter-cli:Added 321 Items to the queue
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
ERROR:pastehunter-cli:Request returned unexpected response code 429: <html>
<head><title>429 Too Many Requests</title></head>
<body>
<center><h1>429 Too Many Requests</h1></center>
<hr><center>nginx</center>
</body>
</html>
Then i run that command you sent, instant results.
[{"url":"https://api.github.com/gists/55b350b3a88ada78e1085a4ca6db3650","forks_url":"https://api.github.com/gists/55b350.github.com/gists/55b350b3a88ada78e1085a4ca6db3650/commits","id":"55b350b3a88ada78e1085a4ca6db3650","node_id":"MDQ6R2lzd"https://gist.github.com/55b350b3a88ada78e1085a4ca6db3650.git","git_push_url":"https://gist.github.com/55b350b3a88ada78e350b3a88ada78e1085a4ca6db3650","files":{"for_array_to_object_data_validation.js":{"filename":"for_array_to_object_data_vvaScript","raw_url":"https://gist.githubusercontent.com/ppazos/55b350b3a88ada78e1085a4ca6db3650/raw/ec50673bf0bd80223dfe,"size":1026}},"public":true,"created_at":"2020-07-30T04:00:34Z","updated_at":"2020-07-30T04:00:35Z","description":"","cm/gists/55b350b3a88ada78e1085a4ca6db3650/comments","owner":{"login":"ppazos","id":1278556,"node_id":"MDQ6VXNlcjEyNzg1NTY78556?v=4","gravatar_id":"","url":"https://api.github.com/users/ppazos","html_url":"https://github.com/ppazos","followerowing_url":"https://api.github.com/users/ppazos/following{/other_user}","gists_url":"https://api.github.com/users/ppazoss/ppazos/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/ppazos/subscriptions","organizations_"https://api.github.com/users/ppazos/repos","events_url":"https://api.github.com/users/ppazos/events{/
I switched back to V1.1 and no issues since.
Did you get any response when you queried pastebin?
Apologies. Yes
[ { "scrape_url": "https://scrape.pastebin.com/api_scrape_item.php?i=nteYCeVx", "full_url": "https://pastebin.com/nteYCeVx", "date": "1596159066", "key": "nteYCeVx", "size": "348", "expire": "0", "title": "Download Trove folder content, this time with the comments unfucked!", "syntax": "javascript", "user": "", "hits": "10" }, { "scrape_url": "https://scrape.pastebin.com/api_scrape_item.php?i=LuzpptKH", "full_url": "https://pastebin.com/LuzpptKH", "date": "1596158929", "key": "LuzpptKH", "size": "1204", "expire": "1598750929", "title": "Festa no Polo Norte", "syntax": "cpp", "user": "danielvitor23", "hits": "15" }, { "scrape_url": "https://scrape.pastebin.com/api_scrape_item.php?i=bJkG14Ur", "full_url": "https://pastebin.com/bJkG14Ur", "date": "1596158927", "key": "bJkG14Ur", "size": "16983", "expire": "0", "title": "", "syntax": "javascript", "user": "", "hits": "42" }, { "scrape_url": "https://scrape.pastebin.com/api_scrape_item.php?i=dhcLwPpP", "full_url": "https://pastebin.com/dhcLwPpP", "date": "1596158874", "key": "dhcLwPpP", "size": "348", "expire": "0", "title": "download Trove folder content", "syntax": "javascript", "user": "", "hits": "14" }, { "scrape_url":
So I'm honestly not sure what to do here, am not able to reproduce it even though you seem to be hitting it consistently 😕
Hi, i've got same issue. Fixed it with this ugly code:
err_count = 0
while err_count < 3:
req = requests.get(raw_paste_uri, headers=headers)
if req.status_code == 200:
raw_paste_data = req.text
break
else:
logger.error("Request number {} for {} returned unexpected response code {}: {}".format(err_count, raw_paste_uri, req.status_code, req.text))
err_count += 1
sleep(10)
I also had to change general timeout to 10.
Btw, thanks for this tool, i've been using it for one year right now and I think it's great. If I could ask you a question, have you thought about using celery?
Hey sorry for the delayed response. Interesting you're hitting this now, will take a look at better exception handling for Pastebin, shouldn't be too hard to port over some of the other stuff. RE celery, it's not the craziest idea, but I don't think it's worth adding a requirement on Redis or RabbitMQ. It'd also require a pretty significant redesign of the existing code, and while it'd provide some benefit I'm not sure it'd be sufficient to justify the additional complexity, but regardless yeah the redis/rabbitmq requirement makes it a non-starter imo.
I've reached out to Pastebin to see if they're willing to work with us on some of this. I have the logic for retrying pastes, but even after adding logic similar to yours, I still encounter 429s pretty regularly and need to continue in an error state, but it should be functional now. If this really becomes/continues to be a problem we may need to create a system for retrying. I do like the idea of eventually transforming this into a more formalized queue, but right now I'm focused on maintenance and a bit of code restructuring for input classes that will eventually enable that stuff down the road. Anyways, going to push up changes, may do it as a dev branch just due to the other changes involved.
That sounds good. As for unnecessary dependencies on Redis or RabbitMQ this maybe could be a solution, although i haven't tested it yet and from my experience when you're not using RabbitMQ as a broker strange things happen. Looking forward to checking out the dev branch, maybe i could help implementing celery support.
@DrMeosch I appreciate the thought, definitely open to PRs if that's something you want to do. Making (or using) a basic system for queuing locally, then building it out so queue types can be specified (aka you can enable rabbitmq/redis backends) would probably be the best way to approach something like that, although it'd take a fair amount of work to do properly. FWIW if you do decide to implement that, Rabbit is probably a more robust solution than RQ or other similar alternatives for Redis. Anyways, I've slapped together something for pastebin, as well as added quite a few new things in #117 (on branch https://github.com/kevthehermit/PasteHunter/tree/feature/refactor-inputs)
Keep getting this error, unsure why. I've tried lowering paste limit and even let it sit a few days before trying again in case it was me hitting the PB api too much.
I'm still getting results from PB and gists. Is this causing any issues?