Open Benjamin-Loison opened 6 months ago
Current reduced continuation
:
message = {
"119693434": {
"3": "CikqJwoYVUN0NVVTWXB6ek1DWWhraXJWUUdId0tREgs5MXEzbnNoRVV4cxoT6qjduQENCgs5MXEzbnNoRVV4cyACMAA%3D",
"20": 1715521654955377,
}
}
typedef = {
"119693434": {
"type": "message",
"message_typedef": {
"3": {
"type": "string"
},
"20": {
"type": "int"
},
},
"field_order": [
"3",
"20"
]
}
}
Seems to be the most minimized:
import requests
import json
import blackboxprotobuf
import base64
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat'
headers = {
'Content-Type': 'application/json',
}
def getBase64Protobuf(message, typedef):
data = blackboxprotobuf.encode_message(message, typedef)
return base64.b64encode(data, altchars = b'-_')
message = {
'1': {
'5': {
'1': 'UCt5USYpzzMCYhkirVQGHwKQ',
'2': {
'7': 6144432111596302641,
'15': 115
}
}
},
'4': 2,
}
typedef = {
'1': {
'type': 'message',
'message_typedef': {
'5': {
'type': 'message',
'message_typedef': {
'1': {
'type': 'string'
},
'2': {
'type': 'message',
'message_typedef': {
'7': {
'type': 'fixed64'
},
'15': {
'type': 'int'
}
},
'field_order': [
'7',
'15'
]
}
},
'field_order': [
'1',
'2'
]
}
},
'field_order': [
'5'
]
},
'4': {
'type': 'int'
},
}
#three = 'CikqJwoYVUN0NVVTWXB6ek1DWWhraXJWUUdId0tREgs5MXEzbnNoRVV4cxoT6qjduQENCgs5MXEzbnNoRVV4cyACMAA%3D'
three = getBase64Protobuf(message, typedef)
message = {
'119693434': {
'3': three,
'20': 1715522824544596,
}
}
typedef = {
'119693434': {
'type': 'message',
'message_typedef': {
'3': {
'type': 'string'
},
'20': {
'type': 'int'
},
},
'field_order': [
'3',
'20'
]
}
}
#continuation = '0ofMyAOUAhpeQ2lrcUp3b1lWVU4wTlZWVFdYQjZlazFEV1docmFYSldVVWRJZDB0UkVnczVNWEV6Ym5Ob1JWVjRjeG9UNnFqZHVRRU5DZ3M1TVhFemJuTm9SVlY0Y3lBQ01BQSUzRCjluNHtlIiGAzAAQAJKbggAGAAgAEoKCAEQABgAIAAwAFDJxOSKl4iGA1gDeACiAQCqAQwQABoAIgAqBAgAEACwAQDAAQDIAcnE5IqXiIYD4gEMCO39grIGEI7Y058D6AEA8AEA-AEAiAIAkAIAmgIMCLCUg7IGEP3Q2fcBULCX_u2UiIYDWOPLlOyUiIYDggEECAQYAYgBAJoBAggAoAGMw6vpoYiGA7oBAggK0AH_kIOyBg=='
continuation = getBase64Protobuf(message, typedef)
json_ = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20240509.00.00'
}
},
'continuation': continuation
}
data = requests.post(url, headers = headers, json = json_).json()
dataStr = json.dumps(data, indent = 4)
#print(dataStr)
print('invalidationContinuationData' in dataStr)
However, now it seems to return a very large response but still containing invalidationContinuationData
at least...
Well in fact it still contains what we look for:
"reactionBuckets": [
{
"totalReactions": 0,
"duration": {
"seconds": "1"
},
"intensityScore": 1
},
{
"totalReactions": 0,
"duration": {
"seconds": "1"
},
"intensityScore": 1
},
{
"totalReactions": 0,
"duration": {
"seconds": "1"
},
"intensityScore": 1
},
{
"totalReactions": 0,
"duration": {
"seconds": "1"
},
"intensityScore": 1
},
{
"totalReactions": 4,
"duration": {
"seconds": "1"
},
"intensityScore": 0.75,
"reactionsData": [
{
"unicodeEmojiId": "\u2764",
"reactionCount": 1
},
{
"unicodeEmojiId": "\ud83d\ude04",
"reactionCount": 1
},
{
"unicodeEmojiId": "\ud83c\udf89",
"reactionCount": 1
},
{
"unicodeEmojiId": "\ud83d\ude33",
"reactionCount": 1
}
]
}
],
Note that only have to update 20
to keep it working.
To check that my solution is permanent:
import time
from datetime import datetime
initialDatetime = datetime.now()
print(initialDatetime)
while True:
data = requests.post(url, headers = headers, json = json_).json()
dataStr = json.dumps(data, indent = 4)
#print(dataStr)
isNeedleInData = 'invalidationContinuationData' in dataStr
print(isNeedleInData)
if not isNeedleInData:
break
time.sleep(10)
print(datetime.now() - initialDatetime)
2024-05-12 16:12:43.196524
True
...
True
False
0:04:34.450228
2024-05-12 16:19:14.196332
True
...
True
False
0:04:34.804377
So as there is a time for updating the Python script it seems pretty clear that the request expires 5 minutes after.
data = base64.b64decode('0ofMyAOUAhpeQ2lrcUp3b1lWVU4wTlZWVFdYQjZlazFEV1docmFYSldVVWRJZDB0UkVnczVNWEV6Ym5Ob1JWVjRjeG9UNnFqZHVRRU5DZ3M1TVhFemJuTm9SVlY0Y3lBQ01BQSUzRCjluNHtlIiGAzAAQAJKbggAGAAgAEoKCAEQABgAIAAwAFDJxOSKl4iGA1gDeACiAQCqAQwQABoAIgAqBAgAEACwAQDAAQDIAcnE5IqXiIYD4gEMCO39grIGEI7Y058D6AEA8AEA-AEAiAIAkAIAmgIMCMuwg7IGEJmLrNsCULCX_u2UiIYDWOPLlOyUiIYDggEECAQYAYgBAJoBAggAoAHS24qor4iGA7oBAggK0AGBr4OyBg%3D%3D'.replace('%3D', '='), altchars = '-_')
message, typedef = blackboxprotobuf.decode_message(data)
encodedTimestamp = message['119693434']['20']
currentTimestamp = time.time() * 1_000_000
print(f'{currentTimestamp = } {encodedTimestamp = } {(encodedTimestamp - currentTimestamp) / 1_000_000 = }')
currentTimestamp = 1715525331895160.0 encodedTimestamp = 1715525717765586 (encodedTimestamp - currentTimestamp) / 1_000_000 = 385.870426
so let us use 400
.
import requests
import json
import blackboxprotobuf
import base64
import time
from datetime import datetime
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat'
headers = {
'Content-Type': 'application/json',
}
def getBase64Protobuf(message, typedef):
data = blackboxprotobuf.encode_message(message, typedef)
return base64.b64encode(data, altchars = b'-_')
message = {
'1': {
'5': {
'1': 'UCt5USYpzzMCYhkirVQGHwKQ',
'2': {
'7': 6144432111596302641,
'15': 115
}
}
},
'4': 2,
}
typedef = {
'1': {
'type': 'message',
'message_typedef': {
'5': {
'type': 'message',
'message_typedef': {
'1': {
'type': 'string'
},
'2': {
'type': 'message',
'message_typedef': {
'7': {
'type': 'fixed64'
},
'15': {
'type': 'int'
}
},
'field_order': [
'7',
'15'
]
}
},
'field_order': [
'1',
'2'
]
}
},
'field_order': [
'5'
]
},
'4': {
'type': 'int'
},
}
three = getBase64Protobuf(message, typedef)
message = {
'119693434': {
'3': three,
}
}
typedef = {
'119693434': {
'type': 'message',
'message_typedef': {
'3': {
'type': 'string'
},
'20': {
'type': 'int'
},
},
'field_order': [
'3',
'20'
]
}
}
json_ = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20240509.00.00'
}
},
}
initialDatetime = datetime.now()
print(initialDatetime)
while True:
message['119693434']['20'] = int((time.time() + 400) * 1_000_000)
continuation = getBase64Protobuf(message, typedef)
json_['continuation'] = continuation
data = requests.post(url, headers = headers, json = json_).json()
dataStr = json.dumps(data, indent = 4)
#print(dataStr)
isNeedleInData = 'invalidationContinuationData' in dataStr
print(isNeedleInData, datetime.now() - initialDatetime)
if not isNeedleInData:
break
time.sleep(10)
2024-05-12 16:53:58.399052
True 0:00:00.151504
True 0:00:10.303548
...
True 0:12:44.219750
True 0:12:54.459445
So my algorithm always work.
What about other livestreams? Then will have to consider extracting the given reactions.
print(json.dumps(data['frameworkUpdates']['entityBatchUpdate']['mutations'][0]['payload']['emojiFountainDataEntity']['reactionBuckets'], indent = 4))
[
{
"totalReactions": 0,
"duration": {
"seconds": "1"
},
"intensityScore": 1
},
{
"totalReactions": 0,
"duration": {
"seconds": "1"
},
"intensityScore": 1
},
{
"totalReactions": 1,
"duration": {
"seconds": "1"
},
"intensityScore": 0.75,
"reactionsData": [
{
"unicodeEmojiId": "\u2764",
"reactionCount": 1
}
]
},
{
"totalReactions": 3,
"duration": {
"seconds": "1"
},
"intensityScore": 0.75,
"reactionsData": [
{
"unicodeEmojiId": "\ud83c\udf89",
"reactionCount": 1
},
{
"unicodeEmojiId": "\ud83d\ude04",
"reactionCount": 1
},
{
"unicodeEmojiId": "\ud83d\ude33",
"reactionCount": 1
}
]
},
{
"totalReactions": 1,
"duration": {
"seconds": "1"
},
"intensityScore": 0.75,
"reactionsData": [
{
"unicodeEmojiId": "\ud83d\udcaf",
"reactionCount": 1
}
]
}
]
To test on another channel I have to wait 24 hours, the email I have in mind is:
-----BEGIN PGP MESSAGE-----
hF4DTQa9Wom5MBgSAQdAb2t1oINXCE1UIfQnLLKBHkKSRz8Zjs6b7Vq9HE3j/BYw
Xk84uwcYIeqUDZGy4HPwJ5P3Uffk+gDoVPXR1rKniIl6huUd7b6yofeVvwewGZMj
0lIBhyFmJeN0BwVs8Eud52pclueEine7IjmYMzNRHUONnyWAylGDwSjHRXiGVFrr
SDfcQ4MpVn7xZNycRXaSctYGOE9/0QJlhy0a+hKGx5geoH6V
=JTsI
-----END PGP MESSAGE-----
I set myself a reminder in 24 hours. Here I am back more than 24 hours later.
The channel I have been using according to above code snippets is UCt5USYpzzMCYhkirVQGHwKQ which is my personal channel using the email:
-----BEGIN PGP MESSAGE-----
hF4DTQa9Wom5MBgSAQdA4/4XJMm1ncdoVbGyg675TMGBaQd4eRdYsNPNOVdYzWEw
bmLCLaDkWjlygMoqAMyQ8r12omkrs1/ExFU1gOR3R9rxGJ1wdFQ3OnCM0aMfQgQM
0lgBoiBEPmo3R7N3QS5vRYZW8T8oAgm2Otb3IOiToDk84Qxi4riEuQkLUSMH0TCO
hr7SHBCXGEzULFil6yE+9/Hw/LL/3KxnIdeafjW6GP/K7CPYu2gbiTC9
=NCAb
-----END PGP MESSAGE-----
Above code snippet still seems to work as it returns True
, I am just suspicious about just supporting another livestream from the same channel without modifying algorithm parameters as multiple livestreams can happen simultaneously as far as I know and without furher parameter the algorithm cannot select a precise one.
So previously I focused on the video id:
-----BEGIN PGP MESSAGE-----
hF4DTQa9Wom5MBgSAQdAgIxTq9Qvav8MZ2MLdcpCvwepeGPP7E+wahfhjsqZqCsw
2kAKspKMu5OYgoWpq7N0a/ANPzll/gUMV1/bso7zlegUPXB3ozWBxfBGK0Mv3EcJ
0kYBl9YGfL1pGN+VL9RD/3iiHHuvdHjKDQx/w1MC0n7YF7BXAy0Ch4IoON5Fjnas
Gf4G2Vmx07cZZGiDUdYHGrSWmC5emvEn
=8i1B
-----END PGP MESSAGE-----
let us generate and try another one.
Now testing the video id:
-----BEGIN PGP MESSAGE-----
hF4DTQa9Wom5MBgSAQdAh3sfOUkkVdkYBQKfLySgUpTkP421srGc9yEDnbl2f2Ew
u+qokOceuOuZ5ANrNi9FrAu3j4kKnLCvyNCFPhRv0YSZwwEnVFMeJw5JI1YUw2An
0kYBZj8VAkpcx56ViZXDjECKQVcM2qZQzVXyrpFpVAQNRu4tKncaimefStfBuESI
/JSz3tf19bbrbBpgq/xpn9B6oTc+N+r0
=Oxv+
-----END PGP MESSAGE-----
As with print(json.dumps(data['frameworkUpdates']['entityBatchUpdate']['mutations'][0]['payload']['emojiFountainDataEntity']['reactionBuckets'], indent = 4))
I added reactions to previous video id and noticed them correctly, so let us check what parameter we have to change to focus on the new video.
0ofMyAOSAhpeQ2lrcUp3b1lWVU4wTlZWVFdYQjZlazFEV1docmFYSldVVWRJZDB0UkVnczJaVkJHUzJscVZXVm5SUm9UNnFqZHVRRU5DZ3MyWlZCR1MybHFWV1ZuUlNBQ01BQSUzRCjHndHCr46GAzAAQAJKbAgAGAAgAEoIEAAYACAAMABQoL_CyLCOhgNYA3gAogEAqgEMEAAaACIAKgQIABAAsAEAwAEAyAGgv8LIsI6GA-IBDAj0_Y-yBhD0huy3AugBAPABAPgBAIgCAJACAJoCDAj0_Y-yBhD0huy3AlCT5djCr46GA1jm5NjCr46GA4IBBAgEGAGIAQCaAQIIAKABmfKmyrCOhgO6AQIICtAB8v2PsgY=
CikqJwoYVUN0NVVTWXB6ek1DWWhraXJWUUdId0tREgs2ZVBGS2lqVWVnRRoT6qjduQENCgs2ZVBGS2lqVWVnRSACMAA=
Needed this time:
message = {
'1': {
'5': {
'1': 'UCt5USYpzzMCYhkirVQGHwKQ',
'2': 'VIDEO_ID'
}
},
'4': 2,
}
typedef = {
'1': {
'type': 'message',
'message_typedef': {
'5': {
'type': 'message',
'message_typedef': {
'1': {
'type': 'string'
},
'2': {
'type': 'string'
}
},
'field_order': [
'1',
'2'
]
}
},
'field_order': [
'5'
]
},
'4': {
'type': 'int'
},
}
three = getBase64Protobuf(message, typedef)
Can test with previous video as now get:
Le chat a été désactivé pour cette diffusion en direct.
Let us generate a new video id.
It seems to work as wanted.
I tested with another channel and it works fine too. As a result I answered the Stack Overflow question.
As requested by someone as the Stack Overflow question 78419245.
https://www.youtube.com/live_chat?v=VIDEO_ID
When fetch a reaction:
When no reaction are to fetch:
When
continuation
seems to have expired:So
invalidationContinuationData
seems to be a good string to look for the request validity.Related to #256 as the request expires after a few minutes.