vale46n1 / immich_duplicate_finder

A Comprehensive Solution for Identifying and Managing Duplicate Photos in Immich
Apache License 2.0
198 stars 13 forks source link

[QUESTION] Deleting all found duplicates at once? #41

Open elec2 opened 4 months ago

elec2 commented 4 months ago

Hey,

is there a possability to delete all my 3352 duplicates at once? It is realy enoying to click and load every single dublicate. Thank you for the app anyways.

asukahan commented 4 months ago

I have 6300+ dupes, really pain

hermesespinola commented 3 months ago

Deleting assets from the tool is painfully slow. Maybe write script to scrape all the the ids from the webUI and create a list to paste in here https://immich.app/docs/api/delete-assets

const duplicateAssetNodes = document.querySelectorAll('#root > div > div.withScreencast > div > div > div > section.main > div.block-container > div > div > div > div > div:nth-child(2) > div > div > div > div:nth-child(1) > div > div > ul > li:nth-child(2)');

const assetsToDelete = duplicateAssetNodes.values().map(li => `"${li.childNodes[1].textContent.trimStart()}"`).toArray().join(',');

console.log(assetsToDelete);
JSON_BODY="{\"force\": true, \"ids\": [$ASSETS_TO_DELETE]}"

curl -o - -L -X DELETE "$IMMICH_HOST/api/asset" \
    -H 'Content-Type: application/json' \
    -H "x-api-key: $IMMICH_API_KEY" \
    -d "$JSON_BODY"
fsniper commented 3 months ago

I wrote a very basic cli command to delete first item from the pairs.


import os
import requests, json

from db import is_db_populated, load_settings_from_db, load_duplicate_pairs
from api import deleteAsset, getAssetInfo

immich_server_url, api_key, images_folder, timeout = load_settings_from_db()

def list_duplicate_photos_faiss(assets, min_threshold, max_threshold,immich_server_url,api_key):
    # First check if the database is populated
    if not is_db_populated():
        print("The database does not contain any duplicate entries. Please generate/update the database.")
        return  # Exit the function early if the database is not populated

    # Load duplicates from database
    duplicates = load_duplicate_pairs(min_threshold, max_threshold)

    if duplicates:
        print(f"Found {len(duplicates)} duplicate pairs with FAISS code within threshold {min_threshold} < x < {max_threshold}:")

        for i, dup_pair in enumerate(duplicates):
                asset_id_1, asset_id_2 = dup_pair

                asset1_info = getAssetInfo(asset_id_1, assets)
                asset2_info = getAssetInfo(asset_id_2, assets)

                #if image1 is not None and image2 is not None:
                    # Proceed with image comparison
                print(f"Pair:\n\timg1: {asset_id_1} {asset1_info}\n\timg1: {asset_id_2} {asset2_info}")
                if deleteAsset(immich_server_url, asset_id_1, api_key):
                    print("\t\tDeleted photo")
                else:
                    print("\t\tdelete failed")

                #else:
                #   print(f"Missing information for one or both assets: {asset_id_1}, {asset_id_2}")

    else:
        print("No duplicates found.")

def fetchAssets(immich_server_url, api_key, timeout, type):
    assets = []

    # Remove trailing slash from immich_server_url if present
    base_url = immich_server_url.rstrip('/')
    asset_info_url = f"{base_url}/api/asset/"

    # Make the HTTP GET request
    response = requests.get(asset_info_url, headers={'Accept': 'application/json', 'x-api-key': api_key}, verify=False, timeout=timeout)
    response.raise_for_status()  # This will raise an exception for HTTP errors

    content_type = response.headers.get('Content-Type', '')
    if 'application/json' in content_type:
        if response.text:
            assets = response.json()  # Decode JSON response into a list of assets
            assets = [asset for asset in assets if asset.get("type") == type]
        else:
            assets = []  # Set assets to empty list if response is empty
    else:
        print(f'Unexpected Content-Type: {content_type}\nResponse content: {response.text}')
        assets = []  # Set assets to empty list if unexpected content type

    return assets

def main():

    min_threshold = 0.0
    max_threshold = 0.6
    assets = fetchAssets(immich_server_url, api_key,timeout, 'IMAGE')

    duplicates = load_duplicate_pairs(min_threshold, max_threshold)
    list_duplicate_photos_faiss(
        assets, min_threshold, max_threshold,
        immich_server_url,
        api_key
    )

if __name__ == "__main__":
    main()