airbytehq / airbyte

The leading data integration platform for ETL / ELT data pipelines from APIs, databases & files to data warehouses, data lakes & data lakehouses. Both self-hosted and Cloud-hosted.
https://airbyte.com
Other
14.74k stars 3.79k forks source link

[source-confluence] export 2000 records from Confluence v7.19.16 and Confluence v8.5.8, but export 80,000 records will fail #39389

Open ScGPS opened 3 weeks ago

ScGPS commented 3 weeks ago

Connector Name

source-confluence

Connector Version

0.1.3

What step the error happened?

During the sync

Relevant information

  1. Modify the source-confluence code (v0.50.19)(airbyte v0.50.19 <====> airbyte/source-confluence:0.1.3) A. airbyte/airbyte-integrations/connectors/source-confluence/source_confluence/source.py modify
    /wiki/rest/api

    to

    /rest/api

modify

auth_method="Basic"

to

auth_method="Bearer"

modify

    def __init__(self, email: str, token: str, auth_method: str = "Basic", **kwargs):
        auth_string = f"{email}:{token}".encode("utf8")
        b64_encoded = b64encode(auth_string).decode("utf8")
        super().__init__(token=b64_encoded, auth_method=auth_method, **kwargs)

to

    def __init__(self, email: str, token: str, auth_method: str = "Bearer", **kwargs):
        b64_encoded= token
        super().__init__(token=b64_encoded, auth_method=auth_method, **kwargs)

B. airbyte/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json modify

{
  "definitions": {
    "user": {
      "type": "object",
      "properties": {
        "type": {
          "type": ["string", "null"]
        },
        "accountId": {
          "type": ["string", "null"]
        },
        "email": {
          "type": ["string", "null"]
        },
        "publicName": {
          "type": ["string", "null"]
        }
      }
    },
    "content": {
      "type": "object",
      "properties": {
        "id": {
          "type": "string"
        },
        "title": {
          "type": "string"
        },
        "type": {
          "type": "string"
        },
        "status": {
          "type": "string"
        }
      }
    },
    "contentRestriction": {
      "type": "object",
      "properties": {
        "operations": {
          "type": ["string", "null"]
        },
        "restrictions": {
          "user": {
            "type": "object",
            "properties": {
              "results": {
                "type": "array",
                "items": {
                  "$ref": "#/definitions/user"
                }
              }
            }
          }
        }
      }
    },
    "usersUserKeys": {
      "type": "object",
      "properties": {
        "users": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/user"
          }
        },
        "userKeys": {
          "type": "array",
          "items": {
            "type": "string"
          }
        }
      }
    },
    "version": {
      "type": "object",
      "properties": {
        "by": {
          "$ref": "#/definitions/user"
        },
        "when": {
          "type": ["string", "null"],
          "format": "date-time"
        },
        "friendlyWhen": {
          "type": ["string", "null"]
        },
        "message": {
          "type": ["string", "null"]
        },
        "number": {
          "type": ["integer", "null"]
        },
        "minorEdit": {
          "type": ["boolean", "null"]
        },
        "collaborators": {
          "$ref": "#/definitions/usersUserKeys"
        }
      }
    }
  },
  "type": "object",
  "properties": {
    "id": {
      "type": "string"
    },
    "title": {
      "type": "string"
    },
    "type": {
      "type": "string"
    },
    "status": {
      "type": "string"
    },
    "history": {
      "type": "object",
      "properties": {
        "latest": {
          "type": "boolean"
        },
        "createdBy": {
          "$ref": "#/definitions/user"
        },
        "createdDate": {
          "type": "string",
          "format": "date-time"
        },
        "contributors": {
          "type": "object",
          "properties": {
            "publishers": {
              "$ref": "#/definitions/usersUserKeys"
            }
          }
        },
        "previousVersion": {
          "$ref": "#/definitions/version"
        }
      }
    },
    "version": {
      "$ref": "#/definitions/version"
    },
    "descendants": {
      "type": "object",
      "properties": {
        "comment": {
          "type": "object",
          "properties": {
            "results": {
              "type": "array",
              "items": {
                "$ref": "#/definitions/content"
              }
            }
          }
        }
      }
    },
    "body": {
      "type": "object",
            "properties": {
        "storage": {
          "type": "object",
                    "properties": {
                        "value": {
              "type": "string"
            },
            "representation": {
              "type": "string"
            },
            "embeddedContent": {
              "type": "array"
            },
            "_expandable": {
              "type": "object",
                            "properties": {
                "content": {
                  "type": "string"
                }
              }
            }
          }
        },
        "view": {
          "type": "object",
                    "properties": {
                        "value": {
              "type": "string"
            },
            "representation": {
              "type": "string"
            },
            "_expandable": {
              "type": "object",
                            "properties": {
                                "webresource": {
                  "type": "string"
                }
              }
            },
            "embeddedContent": {
              "type": "string"
            },
            "mediaToken": {
              "type": "string"
            },
            "content": {
              "type": "string"
            }
          }
        }
      }
    },
    "restrictions": {
      "type": "object",
      "properties": {
        "read": {
          "$ref": "#/definitions/contentRestriction"
        }
      }
    },
    "_expandable": {
      "type": "object",
      "properties": {
        "container": {
          "type": "string"
        },
        "space": {
          "type": "string"
        }
      }
    },
    "_links": {
      "type": "object",
      "properties": {
        "self": {
          "type": "string"
        },
        "tinyui": {
          "type": "string"
        },
        "editui": {
          "type": "string"
        },
        "webui": {
          "type": "string"
        }
      }
    }
  }
}

to

{
    "definitions": {
    },
    "type": "object",
    "properties": {
        "id": {
            "type": "string"
        },
        "title": {
            "type": "string"
        },
        "type": {
            "type": "string"
        },
        "status": {
            "type": "string"
        },
        "history": {
            "type": "object",
            "properties": {
                "latest": {
                    "type": "boolean"
                }
            }
        },
        "body": {
            "type": "object",
            "properties": {
                "storage": {
                    "type": "object",
                    "properties": {
                        "value": {
                            "type": "string"
                        },
                        "representation": {
                            "type": "string"
                        }
                    }
                }
            }
        }
    }
}

C. Rebuild docker image

cd airbyte/airbyte-integrations/connectors/source-confluence/
sudo docker rmi -f airbyte/source-confluence:0.1.3
sudo docker images | grep confluence
sudo docker build -t airbyte/source-confluence:0.1.3 .

D. Configure source-connector and destination-connector

  1. Exporting 2000 records in test confluece instance(v7.19.16) is ok. image

  2. Exporting 80,000 records in another test confluece instance(v7.19.16) is failed. DevOps_Docker_Kubernetes_K8s_ETL_Airbyte_导出wiki失败的记录_001

Relevant log output

In exporting 80,000 records in another test confluece instance(v7.19.16), there is no more error log found, just 'Something went wrong in the connector'. The error log can't download in web page.

A. Is there a way to stop clean up all exported data after exporting failed? Here, we found that a error found, then airbyte will clean up all exported data.

B. The try time is 5. Is there a way to set the try time is 0?

Contribute

marcosmarxm commented 3 weeks ago

@ScGPS let me know if you have problems submitting the contribution.

ScGPS commented 2 weeks ago

@marcosmarxm, Is there a step of summitting the contribution? thanks!