pulumi / pulumi-cloud-requests

Welcome to the public issue tracker for Pulumi Cloud (app.pulumi.com)! Feature requests and bug reports welcome!
11 stars 4 forks source link

Paging broken on enumerating resources (and other endpoints) #373

Open dmorel opened 2 months ago

dmorel commented 2 months ago

I was scripting around the API and i think there’s a bug in the pagination on the search/resources endpoint, on the last full page of results returned, there’s no next url in the payload, resulting in a truncated list (when there should be page n-1 pointing to page n, there's no next link in page n-1). I found the resources endpoint shows the same behaviour, which makes sense; didn't test others yet. Here's a script that shows the behaviour, with a workaround:

#!/usr/bin/env python3

import requests
import urllib.parse
import coloredlogs  # type: ignore
import logging
import os

coloredlogs.install(level="INFO")
token = os.getenv("PULUMI_API_TOKEN")
organization = os.getenv("PULUMI_ORGANIZATION")

headers = {
    "Accept": "application/json",
    "Authorization": f"token {token}",
}
page_size = 100

def get_stacks() -> list[dict[str, str]]:
    r = requests.get(
        f"https://api.pulumi.com/api/user/stacks?organization={organization}",
        headers=headers,
    )
    if r.status_code != 200:
        logging.error(f"API call failed with status code {r.status_code}")
        exit(1)
    stacks = r.json().get("stacks", [])
    return stacks

def _increment_page_in_url(url: str) -> str:
    url_parts = urllib.parse.urlparse(url)
    query_params = dict(urllib.parse.parse_qsl(url_parts.query))
    if "page" in query_params:
        query_params["page"] = str(int(query_params["page"]) + 1)
    new_query_string = urllib.parse.urlencode(query_params)
    new_url_parts = url_parts._replace(query=new_query_string)
    new_url = urllib.parse.urlunparse(new_url_parts)
    return new_url

def get_next_records(
    url: str = "", query: str = ""
) -> tuple[list[dict[str, str]], str]:
    if not url:
        url = (
            f"https://api.pulumi.com/api/orgs/{organization}/search/resources?"
            f"size={page_size}&page=1&query={query}"
        )
    r = requests.get(
        url,
        headers=headers,
    )
    payload = r.json()
    resources = payload.get("resources", [])
    next_url = payload["pagination"].get("next") if "pagination" in payload else ""

    # hack to handle faulty pagination: the API doesn't return a next URL
    # when it is on the before last page, so we force a last lookup by incrementing
    # the page number if we have a full page of results
    if not next_url and len(resources) == page_size:
        logging.warning("will force last page lookup")
        next_url = _increment_page_in_url(url)

    return (
        resources,
        next_url,
    )

stacks = get_stacks()
logging.info(f"found {len(stacks)}")

for n, stack in enumerate(stacks):
    logging.info(
        f"--- stack {n + 1}: {organization}/{stack['projectName']}/{stack['stackName']}"
    )
    query = f"projectName:{stack['projectName']}+stackName:{stack['stackName']}"
    next_url = ""
    count = 0
    while result := get_next_records(next_url, query):
        for _resource in result[0]:
            # do something with the resource
            count += 1
        if not (next_url := result[1]):
            break
        logging.info(f"Fetching next page ({count} resources found so far)")

    logging.info(f"{count} resources found in total")