Open Benjamin-Loison opened 2 months ago
For next page:
{
"80226972": {
"2": "FEnotifications_inbox",
"3": "eitxdHZ3OGdvYUNoWkRTVVJqT0hKeGNEWkpZMFJIWjAxSmExRmpKVE5FRUFF"
}
}
{
"80226972": {
"type": "message",
"message_typedef": {
"2": {
"type": "string"
},
"3": {
"type": "string"
}
},
"field_order": [
"2",
"3"
]
}
}
{
"15": "qtvw8goaChZDSURjOHJxcDZJY0RHZ01Ja1FjJTNEEAE"
}
{
"15": {
"type": "string"
}
}
{
"365659573": {
"1": "CIDc8rqp6IcDGgMIkQc%3D",
"2": 1
}
}
{
"365659573": {
"type": "message",
"message_typedef": {
"1": {
"type": "string"
},
"2": {
"type": "int"
}
},
"field_order": [
"1",
"2"
]
}
}
{
"1": 1723220728000000,
"3": {
"1": 913
}
}
{
"1": {
"type": "int"
},
"3": {
"type": "message",
"message_typedef": {
"1": {
"type": "int"
}
},
"field_order": [
"1"
]
}
}
date -d @1723478091
Mon Aug 12 05:54:51 PM CEST 2024
So the pagination is quite clearly based on the timestamp.
date -d @1723220728
Fri Aug 9 06:25:28 PM CEST 2024
import requests
import blackboxprotobuf
import base64
import hashlib
import time
def getBase64Protobuf(message, typedef):
data = blackboxprotobuf.encode_message(message, typedef)
return base64.b64encode(data).decode('ascii')
currentTime = int(time.time())
__Secure_1PSIDTS = 'sidts-CENSORED'
__Secure_1PSID = 'CENSORED'
__Secure_1PAPISID = 'CENSORED'
SAPISIDHASH = f'{currentTime}_' + hashlib.sha1(f'{currentTime} {__Secure_1PAPISID} https://www.youtube.com'.encode('ascii')).digest().hex()
cookies = {
'__Secure-1PSIDTS': __Secure_1PSIDTS,
'__Secure-1PSID': __Secure_1PSID,
'__Secure-1PAPISID': __Secure_1PAPISID,
}
headers = {
'X-Goog-AuthUser': '1',
'Origin': 'https://www.youtube.com',
'Authorization': f'SAPISIDHASH {SAPISIDHASH}',
}
def getNotifications(timestamp):
message = {
'1': timestamp,
}
typedef = {
'1': {
'type': 'int'
},
'3': {
'type': 'message',
'message_typedef': {
'1': {
'type': 'int'
}
},
'field_order': [
'1'
]
}
}
one = getBase64Protobuf(message, typedef)
message = {
'365659573': {
'1': one,
}
}
typedef = {
'365659573': {
'type': 'message',
'message_typedef': {
'1': {
'type': 'string'
},
'2': {
'type': 'int'
}
},
'field_order': [
'1',
'2'
]
}
}
fifteen = getBase64Protobuf(message, typedef)
message = {
'15': fifteen,
}
typedef = {
'15': {
'type': 'string'
}
}
three = getBase64Protobuf(message, typedef)
message = {
'80226972': {
'2': 'FEnotifications_inbox',
'3': three,
}
}
typedef = {
'80226972': {
'type': 'message',
'message_typedef': {
'2': {
'type': 'string'
},
'3': {
'type': 'string'
}
},
'field_order': [
'2',
'3'
]
}
}
continuation = getBase64Protobuf(message, typedef)
json_data = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20240814.00.00',
},
},
'continuation': continuation,
}
response = requests.post('https://www.youtube.com/youtubei/v1/browse', cookies=cookies, headers=headers, json=json_data)
return response
# 1723478091000000
#1723484032000000)
response = getNotifications(int(time.time()) * 1_000_000)
#print('The Noid' in response.text)
#print('Chilla' in response.text)
#items = response.json()['onResponseReceivedEndpoints'][0]['openPopupAction']['popup']['multiPageMenuRenderer']['sections'][0]['multiPageMenuNotificationSectionRenderer']['items']
items = response.json()['onResponseReceivedEndpoints'][0]['appendContinuationItemsAction']['continuationItems']
for item in items:
#print(json.dumps(item, indent = 4))
if 'notificationRenderer' in item:
notificationRenderer = item['notificationRenderer']
print(notificationRenderer['shortMessage']['simpleText'])
print(notificationRenderer['notificationId'])
print()
#break
import requests
import time
import hashlib
import json
currentTime = int(time.time())
__Secure_1PSIDTS = 'sidts-CENSORED'
__Secure_1PSID = 'CENSORED'
__Secure_1PAPISID = 'CENSORED'
SAPISIDHASH = f'{currentTime}_' + hashlib.sha1(f'{currentTime} {__Secure_1PAPISID} https://www.youtube.com'.encode('ascii')).digest().hex()
cookies = {
'__Secure-1PSIDTS': __Secure_1PSIDTS,
'__Secure-1PSID': __Secure_1PSID,
'__Secure-1PAPISID': __Secure_1PAPISID,
}
headers = {
'Origin': 'https://www.youtube.com',
'Authorization': f'SAPISIDHASH {SAPISIDHASH}',
}
json_data = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20240814.00.00',
},
},
'browseId': 'FEnotifications_inbox',
}
response = requests.post('https://www.youtube.com/youtubei/v1/browse', cookies=cookies, headers=headers, json=json_data)
items = response.json()['onResponseReceivedEndpoints'][0]['openPopupAction']['popup']['multiPageMenuRenderer']['sections'][0]['multiPageMenuNotificationSectionRenderer']['items']
allItems = []
while True:
allItems += items
for item in items:
if 'notificationRenderer' in item:
notificationRenderer = item['notificationRenderer']
print(notificationRenderer['shortMessage']['simpleText'])
print(notificationRenderer['notificationId'])
print()
if not 'continuationItemRenderer' in item:
break
json_data['continuation'] = item['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
items = requests.post('https://www.youtube.com/youtubei/v1/browse', cookies=cookies, headers=headers, json=json_data).json()['onResponseReceivedEndpoints'][0]['appendContinuationItemsAction']['continuationItems']
print(len(allItems))
works fine.
Related to Webscrap_any_website/issues/29.
Would solve the Stack Overflow question 78873274.
Related to #260 and #9.