atlassian-api / atlassian-python-api

Atlassian Python REST API wrapper
https://atlassian-python-api.readthedocs.io
Apache License 2.0
1.37k stars 664 forks source link

Confluence get_page_as_pdf and export_page giving 404 #1328

Open karthikpanu opened 9 months ago

karthikpanu commented 9 months ago

Tried this code confluence = Confluence( url= atlassian_url, username=username, password=password, api_version="cloud" ) with open("output.pdf", "wb") as pdf_file: pdf_file.write(confluence.export_page("142186113"))

with open("output.pdf", "wb") as pdf_file: pdf_file.write(confluence.get_page_as_pdf("142186113"))

Both giving error

Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/atlassian/confluence.py", line 3240, in raise_for_status
    error_msg = j["message"]
                ~^^^^^^^^^^^
KeyError: 'message'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/xxxxx/iac/hackathon/confluence-export.py", line 24, in <module>
    pdf_file.write(confluence.export_page("142186113"))
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/atlassian/confluence.py", line 2568, in export_page
    return self.get_page_as_pdf(page_id)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/atlassian/confluence.py", line 2544, in get_page_as_pdf
    url = self.get_pdf_download_url_for_confluence_cloud(url)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/atlassian/confluence.py", line 2795, in get_pdf_download_url_for_confluence_cloud
    progress_response = self.get(poll_url)
                        ^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/atlassian/rest_client.py", line 288, in get
    response = self.request(
               ^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/atlassian/rest_client.py", line 260, in request
    self.raise_for_status(response)
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/atlassian/confluence.py", line 3243, in raise_for_status
    response.raise_for_status()
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 404 Client Error: Not Found for url: https://xxxx.atlassian.net/wiki/services/api/v1/task/327255653/progress
karthikpanu commented 9 months ago

Saw people reporting this issue but haven't found a resolution

gonchik commented 9 months ago

Let me check that end point on cloud

maconfr commented 6 months ago

The endpoint has changed from /services/api/v1/task/<TASK_ID>/progress to /rest/api/longtask/<TASK_ID>. See API doc: Get long-running task

I solved this in my code by sub-classing the Confluence class and overwriting the two methods Confluence.get_page_as_pdf() and Confluence.get_pdf_download_url_for_confluence_cloud().

I just copied the original method bodies and ajusted the code.

In Confluence.get_page_as_pdf():

@@ -55,7 +54,7 @@ class CustomConfluence(Confluence):
         url = "spaces/flyingpdf/pdfpageexport.action?pageId={pageId}".format(
             pageId=page_id
         )
-        if self.api_version == "cloud":
+        if self.cloud:
             url = self.get_pdf_download_url_for_confluence_cloud(url)
             if not url:
                 log.error("Failed to get download PDF url.")
@@ -63,8 +62,6 @@ class CustomConfluence(Confluence):
                     "Failed to export page as PDF",
                     reason="Failed to get download PDF url.",
                 )
-            # To download the PDF file, the request should be with no headers of authentications.
-            return requests.get(url, timeout=75).content
         return self.get(url, headers=headers, not_json_response=True)

In Confluence.get_pdf_download_url_for_confluence_cloud():

     def get_pdf_download_url_for_confluence_cloud(self, url):
@@ -86,27 +83,38 @@ class CustomConfluence(Confluence):
             task_id = response_string.split('name="ajs-taskId" content="')[1].split(
                 '">'
             )[0]
-            poll_url = "/services/api/v1/task/{0}/progress".format(task_id)
+            poll_url = "/rest/api/longtask/{0}".format(task_id)
+            task_timeout = time.monotonic() + 180
             while running_task:
                 log.info("Check if export task has completed.")
                 progress_response = self.get(poll_url)
-                percentage_complete = int(progress_response.get("progress", 0))
-                task_state = progress_response.get("state")
-                if task_state == "FAILED":
-                    log.error("PDF conversion not successful.")
+                percentage_complete = progress_response["percentageComplete"]
+                if task_timeout < time.monotonic():
+                    raise Exception(
+                        f"Timeout exceeded while waiting for task '{task_id}'."
+                        f" Progress: {percentage_complete}%."
+                    )
+
+                task_successful = progress_response["successful"]
+                task_finished = progress_response["finished"]
+                task_messages = [
+                    msg["translation"] for msg in progress_response["messages"]
+                ]
+                if task_finished and not task_successful:
+                    log.error("PDF conversion not successful. %r", task_messages)
                     return None
                 elif percentage_complete == 100:
                     running_task = False
-                    log.info(
-                        "Task completed - {task_state}".format(task_state=task_state)
-                    )
+                    log.info("Task completed - successful")
                     log.debug("Extract task results to download PDF.")
-                    task_result_url = progress_response.get("result")
+                    download_url = (
+                        task_messages[0].split(' href="', 1)[1].split('"', 1)[0]
+                    )
+
                 else:
                     log.info(
-                        "{percentage_complete}% - {task_state}".format(
+                        "{percentage_complete}% complete".format(
                             percentage_complete=percentage_complete,
-                            task_state=task_state,
                         )
                     )
                     time.sleep(3)
@@ -114,8 +122,7 @@ class CustomConfluence(Confluence):
                 "Task successfully done, querying the task result for the download url"
             )
             # task result url starts with /wiki, remove it.
-            task_content = self.get(task_result_url[5:], not_json_response=True)
-            download_url = task_content.decode(encoding="utf-8", errors="strict")
+            download_url = download_url[5:]
             log.debug("Successfully got the download url")
             return download_url
         except IndexError as e: