Benjamin-Loison / YouTube-operational-API

YouTube operational API works when YouTube Data API v3 fails.
367 stars 42 forks source link

Add `commentCount` web-scraping alternative #285

Open Benjamin-Loison opened 1 month ago

Benjamin-Loison commented 1 month ago
curl https://www.youtube.com/youtubei/v1/next -H 'Content-Type: application/json' --data-raw '{"context": {"client": {"clientName": "WEB", "clientVersion": "2.20240702.09.00"}}, "continuation": "Eg0SC204Xzh0TXptcFRnGAYyJSIRIgttOF84dE16bXBUZzAAeAJCEGNvbW1lbnRzLXNlY3Rpb24%3D"}'
import requests
import json

url = 'https://www.youtube.com/youtubei/v1/next'

data = {
    'context': {
        'client': {
            'clientName': 'WEB',
            'clientVersion': '2.20240325.01.00'
        }
    },
    'continuation': 'Eg0SC204Xzh0TXptcFRnGAYyJSIRIgttOF84dE16bXBUZzAAeAJCEGNvbW1lbnRzLXNlY3Rpb24='
}

data = requests.post(url, json = data).json()
#print(json.dumps(data, indent = 4))
print('8,090' in str(data))

Related to #91.

Benjamin-Loison commented 1 month ago

Most simplified:

import requests
import json
import blackboxprotobuf
import base64

url = 'https://www.youtube.com/youtubei/v1/next'

def getBase64Protobuf(message, typedef):
    data = blackboxprotobuf.encode_message(message, typedef)
    return base64.b64encode(data).decode('ascii')

message = {
    '2': {
        '2': 'm8_8tMzmpTg'
    },
    '3': 6,
    '6': {
        '4': {
            '4': 'm8_8tMzmpTg',
        },
    }
}

typedef = {
    '2': {
        'type': 'message',
        'message_typedef': {
            '2': {
                'type': 'string'
            }
        },
        'field_order': [
            '2'
        ]
    },
    '3': {
        'type': 'int'
    },
    '6': {
        'type': 'message',
        'message_typedef': {
            '4': {
                'type': 'message',
                'message_typedef': {
                    '4': {
                        'type': 'string'
                    },
                },
                'field_order': [
                    '4',
                ]
            },
        },
        'field_order': [
            '4',
        ]
    }
}

continuation = getBase64Protobuf(message, typedef)

data = {
    'context': {
        'client': {
            'clientName': 'WEB',
            'clientVersion': '2.20240325.01.00'
        }
    },
    'continuation': continuation
}

data = requests.post(url, json = data).json()
#print(json.dumps(data, indent = 4))
print('8,090' in str(data))
Benjamin-Loison commented 1 month ago

Related to #265.

Benjamin-Loison commented 1 month ago
Eg0SC204Xzh0TXptcFRnGAYyDyINIgttOF84dE16bXBUZw==
Benjamin-Loison commented 1 month ago

In addition to #265:

diff --git a/common.php b/common.php
index 2647852..c829355 100644
--- a/common.php
+++ b/common.php
@@ -516,7 +516,7 @@
         return $result['contents']['twoColumnBrowseResultsRenderer']['tabs'];
     }

-    function getContinuationJson($continuationToken)
+    function getContinuationJson($continuationToken, $url = 'https://www.youtube.com/youtubei/v1/browse?key=' . UI_KEY)
     {
         $containsVisitorData = str_contains($continuationToken, ',');
         if($containsVisitorData)
@@ -549,7 +549,7 @@
             'http' => $http
         ];

-        $result = getJSON('https://www.youtube.com/youtubei/v1/browse?key=' . UI_KEY, $httpOptions);
+        $result = getJSON($url, $httpOptions);
         return $result;
     }

 ?>
diff --git a/videos.php b/videos.php
index 7047138..e161627 100644
--- a/videos.php
+++ b/videos.php
@@ -19,6 +19,6 @@

     include_once 'common.php';

+    includeOnceProtos(['TwoString', 'FourString', 'FourFourString', 'VideoComments']);
+
     $realOptions = [
         'id',
         'status',
@@ -391,11 +395,27 @@
         }

         if ($options['statistics']) {
+            $twoString = new \TwoString();
+            $twoString->setTwo($id);
+            $fourString = new \FourString();
+            $fourString->setFour($id);
+            $fourFourString = new \FourFourString();
+            $fourFourString->setFour($fourString);
+
+            $videoComments = new \VideoComments();
+            $videoComments->setTwo($twoString);
+            $videoComments->setThree(6);
+            $videoComments->setSix($fourFourString);
+
+            $continuation = base64_encode($videoComments->serializeToString());
+            $videoCommentsJson = getContinuationJson($continuation, 'https://www.youtube.com/youtubei/v1/next');
+
             $json = getJSONFromHTMLForcingLanguage("https://www.youtube.com/watch?v=$id");
             preg_match('/like this video along with ([0-9,]+) other people/', $json['contents']['twoColumnWatchNextResults']['results']['results']['contents'][0]['videoPrimaryInfoRenderer']['videoActions']['menuRenderer']['topLevelButtons'][0]['segmentedLikeDislikeButtonViewModel']['likeButtonViewModel']['likeButtonViewModel']['toggleButtonViewModel']['toggleButtonViewModel']['defaultButtonViewModel']['buttonViewModel']['accessibilityText'], $viewCount);
             $statistics = [
                 'viewCount' => getIntValue($json['playerOverlays']['playerOverlayRenderer']['videoDetails']['playerOverlayVideoDetailsRenderer']['subtitle']['runs'][2]['text'], 'view'),
                 'likeCount' => getIntValue($viewCount[1]),
+                'commentCount' => getIntValue($videoCommentsJson['onResponseReceivedEndpoints'][0]['reloadContinuationItemsCommand']['continuationItems'][0]['commentsHeaderRenderer']['countText']['runs'][0]['text'])
             ];
             $item['statistics'] = $statistics;
         }

prototypes.zip

solves this issue.