go-gitea / gitea

Git with a cup of tea! Painless self-hosted all-in-one software development service, including Git hosting, code review, team collaboration, package registry and CI/CD
https://gitea.com
MIT License
45.03k stars 5.49k forks source link

Store images and videos embedded in migrated issues and PRs on Gitea server #21155

Open TobiGr opened 2 years ago

TobiGr commented 2 years ago

Feature Description

Currently, migrations from other services like GitHub or GitLab copy the complete issue contents into Gitea. If a user inserted an image or video into an issue or PR, the URL to the media source is present in the migrated issue on Gitea. However, if the corresponding repository is deleted, the media is also going to be deleted automatically by time. The same applies when e.g. a custom GitLab instance is shut down. To allow complete and persistent migrations, those manually uploaded files should be migrated, too. In some cases this might not be necessary. Migrating many media files causes a lot of traffic, too. Therefore, I suggest to add an option to migrate the whole content when setting up the migration.

Screenshots

No response

TobiGr commented 1 year ago

GitLab also added support for migrating attachments. That's their default values now:

grafik

rleh commented 2 months ago

I had the same problem, and hacked a script in Python that is adds missing images/attachments to issues and comments. The attachments are identified by their Gitlab path/filename and Gitlabs internal gitlab-rails/uploads with all attachments is needed. To access Gitea a token with (at least) the following permission is needed: read:organization, write:issue, read:repository.

$ ./script.py --help
usage: script.py [-h] --attachment-directory ATTACHMENT_DIRECTORY --gitea GITEA --token TOKEN [--repo REPO | --repo-search]

Migrate attachments from Gitlab Issues to Gitea

options:
  -h, --help            show this help message and exit
  --attachment-directory ATTACHMENT_DIRECTORY, -a ATTACHMENT_DIRECTORY
                        Attachment directory, e.g. a copy of '/var/opt/gitlab/gitlab-rails/uploads' from a Gitlab Omnibus installation
  --gitea GITEA, -g GITEA
                        URL of the Gitea Instance, e.g. https://demo.gitea.com/
  --token TOKEN, -t TOKEN
                        Gitea Auth token
  --repo REPO, -r REPO  Name of the repo where attachments will be migrated. Full name is required, e.g. 'some-org/some-repo'.
  --repo-search, -s     Query list of all repos from Gitea API and migrate all attachments.
Click here to show the Python script ```python #!/usr/bin/env python3 import argparse from pathlib import Path from pprint import pprint import requests import urllib.parse parser = argparse.ArgumentParser(description='Migrate attachments from Gitlab Issues to Gitea') parser.add_argument("--attachment-directory", "-a", type=str, required=True, help="Attachment directory, e.g. a copy of '/var/opt/gitlab/gitlab-rails/uploads' from a Gitlab Omnibus installation") parser.add_argument("--gitea", "-g", type=str, required=True, help="URL of the Gitea Instance, e.g. https://demo.gitea.com/") parser.add_argument("--token", "-t", type=str, required=True, help="Gitea Auth token") group_repo = parser.add_mutually_exclusive_group() group_repo.add_argument("--repo", "-r", type=str, help="Name of the repo where attachments will be migrated. Full name is required, e.g. 'some-org/some-repo'.") group_repo.add_argument("--repo-search", "-s", action="store_true", help="Query list of all repos from Gitea API and migrate all attachments.") args = parser.parse_args() attachments = dict() for a in Path(args.attachment_directory).rglob("*"): if a.is_file(): attachments[str(a.parent.name + "/" + a.name)] = a #pprint(attachments) print(f"Found {len(attachments.keys())} possible attachments.") api_uri = args.gitea + "api/v1" api_headers = { "Authorization": "token " + args.token, "accept": "application/json", } repos = list() if args.repo_search: response = requests.get(api_uri + "/orgs", headers=api_headers) if response.status_code != 200: print(f"Error listing all Gitea Orgs: {response.status_code} {response}") orgs = list() for o in response.json(): orgs.append(o["name"]) print(f"Found {len(orgs)} orgs: {", ".join(orgs)}.") for org in orgs: response = requests.get(api_uri + f"/orgs/{org}/repos", headers=api_headers) if response.status_code != 200: print(f"Error listing repos in orgs '{org}': {response.status_code} {response}") for r in response.json(): if r["has_issues"]: repos.append(r["full_name"]) else: print(f"Warning: Repo {r["full_name"]} has issues disabled. Ignoring.") else: repos.append(args.repo) print(f"Found {len(repos)} repos.") for repo in repos: response = requests.get(api_uri + f"/repos/{repo}/issues", headers=api_headers) if response.status_code != 200: print(f"Error listing issues in repo '{repo}': {response.status_code} {response.content}") print(f"Repo '{repo}': Found {len(response.json())} issues.") for issue in response.json(): #pprint(issue) index = issue["number"] body: str = issue["body"] modified = False for a, file in attachments.items(): if a in body: files = {"attachment": open(file, "rb")} response = requests.post(api_uri + f"/repos/{repo}/issues/{index}/assets?name={Path(a).name}", headers=api_headers, files=files) if response.status_code != 201: print(f"Error uploading asset '{a}': {response.status_code} {response.content}") a_new = urllib.parse.urlparse(response.json()["browser_download_url"]).path print(f"Attachment uploaded: '{a}' -> '{a_new }'") body = body.replace("/uploads/" + a, a_new) modified = True if modified: request_body = {"body": body} response = requests.patch(api_uri + f"/repos/{repo}/issues/{index}", headers=api_headers, json=request_body) if response.status_code != 201: print(f"Error patching body: {response.status_code} {response.content}") response = requests.get(api_uri + f"/repos/{repo}/issues/comments", headers=api_headers) if response.status_code != 200: print(f"Error listing comments in repo '{repo}': {response.status_code} {response.content}") print(f"Repo '{repo}': Found {len(response.json())} comments.") for comment in response.json(): #pprint(comment) index = comment["id"] body: str = comment["body"] modified = False for a, file in attachments.items(): if a in body: files = {"attachment": open(file, "rb")} response = requests.post(api_uri + f"/repos/{repo}/issues/comments/{index}/assets?name={Path(a).name}", headers=api_headers, files=files) if response.status_code != 201: print(f"Error uploading asset '{a}': {response.status_code} {response.content}") a_new = urllib.parse.urlparse(response.json()["browser_download_url"]).path print(f"Attachment uploaded: '{a}' -> '{a_new }'") body = body.replace("/uploads/" + a, a_new) modified = True if modified: request_body = {"body": body} response = requests.patch(api_uri + f"/repos/{repo}/issues/comments/{index}", headers=api_headers, json=request_body) if response.status_code != 200: ### Why not 201, like with issue editing? WTF Gitea?!? print(f"Error patching body: {response.status_code} {response.content}") ```