googleapis / google-api-python-client

🐍 The official Python client library for Google's discovery based APIs.
https://googleapis.github.io/google-api-python-client/docs/
Apache License 2.0
7.67k stars 2.4k forks source link

HttpError 404 when requesting https://indexing.googleapis.com/v3/urlNotifications/metadata?url= #2481

Open MumukoQAQ opened 1 week ago

MumukoQAQ commented 1 week ago

I submit my url on the urlNotifications endpoint to notify Google of removal, and when I view the submission with urlNotifications().getMetadat(url), he still says "HttpError 404 Details:" Requested entity was not found."

ohmayr commented 4 days ago

@MumukoQAQ Can you please provide more information related to the issue that you're encountering? Probably a code snippet to replicate the issue, hiding any confidential information, and the error that you're receiving with a stack trace.

MumukoQAQ commented 3 days ago
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime

class GooleIndexingApi:
    URL_UPDATE = "URL_UPDATED"
    URL_DELETED = "URL_DELETED"
def __init__(self, json_key_file: str, url_list: list, req_type: int):
    scopes = ["https://www.googleapis.com/auth/indexing", "https://www.googleapis.com/auth/webmasters.readonly"]
    credentials = ServiceAccountCredentials.from_json_keyfile_name(json_key_file, scopes=scopes)
    self.service = build('indexing', 'v3', credentials=credentials)
    self.search_console = build('searchconsole', 'v1', credentials=credentials)
    self.url_list = url_list
    self.req_type = req_type
    self.sucess_count = 0
    self.fail_count = 0

def structure_urls(self) -> list:
    req_body = []
    body_type = self.URL_UPDATE if self.req_type == 1 else self.URL_DELETED
    for url in self.url_list:
        req_body.append({"url": url, "type": body_type})
    return req_body

def batch_publish(self):
    url_list = self.structure_urls()
    batch = self.service.new_batch_http_request()
    for i, url_data in enumerate(url_list):
        batch.add(self.service.urlNotifications().publish(body=url_data), request_id=str(i),
                  callback=self.batch_callback)
    batch.execute()

def check_url(self, url):
    response = self.service.urlNotifications().getMetadata(url=url).execute()
    print(response)

def get_indexing_list(self, row_limit: int, expression: str):

    site_url = 'my_domain'
    start_date = '2020-01-01'
    end_date = datetime.today().strftime('%Y-%m-%d')

    request = {
        'startDate': start_date,
        'endDate': end_date,
        'dimensions': ['page'],
        'dimensionFilterGroups': [{
            'filters': [{
                'dimension': 'page',
                'operator': 'contains',
                'expression': expression
            }]
        }],
        'rowLimit': row_limit
    }
    response = self.search_console.searchanalytics().query(siteUrl=site_url, body=request).execute()
    index_urls = [row['keys'][0] for row in response['rows']]
    print(index_urls)

def batch_callback(self, request_id, response, exception):
    if exception is not None:
        self.fail_count += 1
    else:
        url = response["urlNotificationMetadata"]["url"]
        self.sucess_count += 1
if __name__ == '__main__':
    urls = ["my_url"]
    json_key_file = "my_json_key.json"
    req_type = 0
    google_api = GooleIndexingApi(json_key_file, urls, req_type)
    google_api.batch_publish()
    for url in urls:
        google_api.check_url(url)

This is my code, and after I notified Google to remove my url, it prompts when I check it

HttpError: <HttpError 404 when requesting https://indexing.googleapis.com/v3/urlNotifications/metadata?url=my_url&alt=json returned "Requested entity was not found.". Details: "Requested entity was not found.">

I thought it had a delay at first, then a few days later I checked the same url again and it was still the same.