Correct entries for thumbnails in cmr.json files in PSScene3Band collection

WIP Update: Backups have been made for all PSScene cmr json files. The next operation will do the modifications by loading from the backups and saving over the original. If any errors occur, then I can run this operation again from the unchanged backups. Below is the Lambda code used for copying and backing up the CMR JSON files (including some raw notes about the process)

# lambda__ks-copy-psscene-cmr-json.py
#
# Lambda Name:   ks-copy-psscene-cmr-json   us-west-2

# Config
#
# Python 3.12
# Architecture: x86_64
# Permissions:  Existing Role: ks-copy-psscene-thumbs-test-role-xtln0gv1  # This is the same role used for copying Thumbs, that has all the permissions needed.
# Timeout 15 seconds
#
# Batch Job Config, Version 1

# Test Input # Test: "batch-input-1-20150601"
#
# {
#   "invocationId": "some_long_string",
#   "job": {
#     "id": "e7306709-ea94-4dc0-863b-5c0d1bd20ee3"
#   },
#   "tasks": [
#     {
#       "taskId": "AAAAAAAAAAk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPAamOcXpU2Dc2qL1/zBdQ",
#       "s3BucketArn": "arn:aws:s3:::csdap-cumulus-prod-protected",
#       "s3Key": "planet/PSScene3Band/20150601_090322_090c_cmr.json",
#       "s3VersionId": "None"
#     }
#   ],
#   "invocationSchemaVersion": "1.0"
# }

# (Batch Job) REQUIRES INPUT - CSV File which has rows that look like this.
# # Note, this file must be uploaded to an S3 Location to be used as input
#
# "csdap-cumulus-prod-protected","planet/PSScene3Band/20190917_203931_1039_cmr.json"
# "csdap-cumulus-prod-protected","planet/PSScene3Band/20190917_203932_1039_cmr.json"
# "csdap-cumulus-prod-protected","planet/PSScene3Band/20190917_203933_1039_cmr.json"
# "csdap-cumulus-prod-protected","planet/PSScene3Band/20190917_203934_1039_cmr.json"
# "csdap-cumulus-prod-protected","planet/PSScene3Band/20190917_203936_1039_cmr.json"

# Setting up Test Batch Job 
# Copying from "ff2f25e0-2c49-46a4-a137-7df1fa72b2fb"
# Manifest Object:          s3://csdap-cumulus-prod-internal/kstest/cmr_backups/inputs/psscene_cmr_json__small_list_sample.txt
# Invoke Lambda Function:   Lambda Function:    ks-copy-psscene-cmr-json
# Leaving the IAM Role the same "ks-copy-psscene-thumbs-test-role-xtln0gv1"
# Tags:     Key: "name", Value: "psscene3_cmr_backup_copy Sample 5 Files"
#
# Final Name for Test Job:  80b4018b-46eb-4c55-9553-2e148f5e99e0 
#
# Running this test with Debug Output Logs turned on.

# Test looked good - Now running the real thing.
#
# Cloning Batch job                 "80b4018b-46eb-4c55-9553-2e148f5e99e0"
# Changing the Manifest Object to:  s3://csdap-cumulus-prod-internal/kstest/cmr_backups/inputs/psscene_cmr_json_files_only_list__2024-08-15.txt
# Changing Tags to:                 Key: "name", Value: "psscene3_cmr_backup_copy Large 2.3m Files"
# Batch Job Final Name:             a7b57bab-c5cf-4413-823c-60dd7bf5f236
#
# Running this Batch Operation Now.   (2,360,981 files)  NOTE: This is different from the 2,361,441 Thumb files that were copied previously.
# Final Outcome: Total succeeded (rate) 2,360,981 (100%), Time in active state 00:19:48

import time
import random
import json
import boto3
import sys

s3 = boto3.client('s3')

# Setting this to True will create significant output to Cloudwatch Logs
SETTING__IS_OUTPUT_DEBUG_MODE = False # True

# When we need to print the output to the logs to see what is going on
def debug_print(str_to_print="", obj_out=None):
    if(SETTING__IS_OUTPUT_DEBUG_MODE == True):
        print(f'{str_to_print}:    {obj_out}')

# Without this, we hit the throtle limits when running the S3 batch operation.
def random_sleep():
    sleep_time = random.uniform(0.01, 0.1)  # Select a random number between 0.01 and 0.10
    debug_print(str_to_print="Sleeping for: " + str(sleep_time) + " seconds.")
    time.sleep(sleep_time)

def lambda_handler(event, context):
    run_did_fail = False
    err_info = ""
    success_info = ""
    try:
        debug_print(str_to_print="Starting a new run")

        # Looking at the Event Object:
        debug_print(str_to_print="Event Object", obj_out=event)

        # Extract bucket name and key from the event
        s3BucketArn = event['tasks'][0]['s3BucketArn']
        s3Key       = event['tasks'][0]['s3Key']
        debug_print(str_to_print="s3BucketArn", obj_out=s3BucketArn)
        debug_print(str_to_print="s3Key", obj_out=s3Key)

        # Split the CSV line to get the bucket and key
        src_bucket_name = s3BucketArn.split(':::')[1]
        src_key_path    = s3Key

        # Strip any extra spaces and quotes 
        src_bucket_name = src_bucket_name.strip()
        src_key_path = src_key_path.strip()
        debug_print(str_to_print="src_bucket_name", obj_out=src_bucket_name)
        debug_print(str_to_print="src_key_path", obj_out=src_key_path)

        # Extract the Source file name 
        input_filename_only     = src_key_path.split("/")[-1]
        debug_print(str_to_print="input_filename_only", obj_out=input_filename_only)

        # Destination Keypaths / Filenames
        # Create a destination backup name .old
        dest_1__file_name_only = input_filename_only.replace(".json", ".old")
        #
        dest_1__root_dir_keypath = f'kstest/cmr_backups/planet/PSScene3Band/'
        #dest_1__root_dir_keypath = f'kstest/lambda_test/planet/PSScene3Band/'
        #dest_1__root_dir_keypath = f'planet/PSScene3Band/'  # f'planet/PSScene3Band/'
        #
        dest_1__bucket_name = f'csdap-cumulus-prod-internal'
        #dest_1__bucket_name = f'csdap-cumulus-prod-public'
        #
        dest_1__full_keypath = f'{dest_1__root_dir_keypath}{dest_1__file_name_only}'
        #
        debug_print(str_to_print="dest_1__bucket_name", obj_out=dest_1__bucket_name)
        debug_print(str_to_print="dest_1__root_dir_keypath", obj_out=dest_1__root_dir_keypath)
        debug_print(str_to_print="dest_1__file_name_only", obj_out=dest_1__file_name_only)
        debug_print(str_to_print="dest_1__full_keypath", obj_out=dest_1__full_keypath)

        # Sleep for a very short amount of time to prevent throttle limit -- BEFORE the copy operation
        random_sleep()

        # Copy the file to it's destinations
        copy_source = {'Bucket': src_bucket_name, 'Key': src_key_path}
        s3.copy_object(CopySource=copy_source, Bucket=dest_1__bucket_name, Key=dest_1__full_keypath)

        # Passing the invocation ID back in the success info.
        return {
            'statusCode': 200,
            'invocationSchemaVersion': event['invocationSchemaVersion'],
            'invocationId': event['invocationId'],
            'results': [
                {
                    'taskId': event['tasks'][0]['taskId'],
                    'resultCode': 'Succeeded',
                    'resultString': 'Copy Operations completed successfully'
                }
            ]
        }
    except:
        run_did_fail    = True
        success_info    = ""
        err_info        = str(sys.exc_info())
        #
        return {
            'statusCode': 500,
            'err_info': f'{err_info}'
        }

    # return {
    #     'err_info': f'{err_info}',
    #     'success_info': f'{success_info}'
    # }

# Manual Test Output (Single Item, run from the Lambda Interface)
#
# Test Event Name
# batch-input-1-20150601
#
# Response
# {
#   "statusCode": 200,
#   "invocationSchemaVersion": "1.0",
#   "invocationId": "some_long_string",
#   "results": [
#     {
#       "taskId": "AAAAAAAAAAExFKoqBbA5bbIDWZB9c7NGhU0gGZLhY6jh/Lp6RiPJDFpU9bJ3KtvjxmOl9BwUPDHR9+qXkcXkYS2PO0Rb9ja6QTGRqWG7NHM4/xuLk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPA8as/pqRAyKhor83bk0fVI/rvZWwQPPWQmf4Y1aqhSd0ao/kf2qhlY99oOHNYWsJ3OedeWy/2d52K3RyadUDRARTLHqhs6hYl0qcPDW9pEY+cn8v9h8mHOZY4dCslCDrUoowtGb4hvenUC+fsdzkqb+x5k4THjuf3iLFxNMBkGtPrx5EWH5AviYZn3vo95ZioT9O2zIkmBgOq/kxNglsUFfwZzw2aRx4jQtRSR3BAmnA6sWFsPfslJNInYe1fGm4142II9dNR41lTyKQlmw/1DUieXEyVREEy3YLkewSDNzW+EOYKJjKrXwpKc+1yISxJrVJTAwWC0+pG/MaZlLBR3oWjBP33zOZTb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ",
#       "resultCode": "Succeeded",
#       "resultString": "Copy Operations completed successfully"
#     }
#   ]
# }
#
# Function Logs
# START RequestId: b5ab1e15-e205-4c8b-a248-a458d57090fe Version: $LATEST
# Starting a new run:    None
# Event Object:    {'invocationId': 'some_long_string', 'job': {'id': 'e7306709-ea94-4dc0-863b-5c0d1bd20ee3'}, 'tasks': [{'taskId': 'AAAAAAAAAAExFKoqBbA5bbIDWZB9c7NGhU0gGZLhY6jh/Lp6RiPJDFpU9bJ3KtvjxmOl9BwUPDHR9+qXkcXkYS2PO0Rb9ja6QTGRqWG7NHM4/xuLk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPA8as/pqRAyKhor83bk0fVI/rvZWwQPPWQmf4Y1aqhSd0ao/kf2qhlY99oOHNYWsJ3OedeWy/2d52K3RyadUDRARTLHqhs6hYl0qcPDW9pEY+cn8v9h8mHOZY4dCslCDrUoowtGb4hvenUC+fsdzkqb+x5k4THjuf3iLFxNMBkGtPrx5EWH5AviYZn3vo95ZioT9O2zIkmBgOq/kxNglsUFfwZzw2aRx4jQtRSR3BAmnA6sWFsPfslJNInYe1fGm4142II9dNR41lTyKQlmw/1DUieXEyVREEy3YLkewSDNzW+EOYKJjKrXwpKc+1yISxJrVJTAwWC0+pG/MaZlLBR3oWjBP33zOZTb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ', 's3BucketArn': 'arn:aws:s3:::csdap-cumulus-prod-protected', 's3Key': 'planet/PSScene3Band/20150601_090322_090c_cmr.json', 's3VersionId': 'None'}], 'invocationSchemaVersion': '1.0'}
# s3BucketArn:    arn:aws:s3:::csdap-cumulus-prod-protected
# s3Key:    planet/PSScene3Band/20150601_090322_090c_cmr.json
# src_bucket_name:    csdap-cumulus-prod-protected
# src_key_path:    planet/PSScene3Band/20150601_090322_090c_cmr.json
# input_filename_only:    20150601_090322_090c_cmr.json
# dest_1__bucket_name:    csdap-cumulus-prod-internal
# dest_1__root_dir_keypath:    kstest/cmr_backups/planet/PSScene3Band/
# dest_1__file_name_only:    20150601_090322_090c_cmr.old
# dest_1__full_keypath:    kstest/cmr_backups/planet/PSScene3Band/20150601_090322_090c_cmr.old
# Sleeping for: 0.05234183068517902 seconds.:    None
# END RequestId: b5ab1e15-e205-4c8b-a248-a458d57090fe
# REPORT RequestId: b5ab1e15-e205-4c8b-a248-a458d57090fe    Duration: 723.27 ms Billed Duration: 724 ms Memory Size: 128 MB Max Memory Used: 80 MB  Init Duration: 472.75 ms
#
# Request ID
# b5ab1e15-e205-4c8b-a248-a458d57090fe

WIP Update: About to run the test Batch Job and then the FULL Batch Job for updating the PSScene CMR files now.

Note, this operation uses the BACKUPS as the input -- that way if something breaks, I can just modify the code a little bit and run it again -- no need to do full restores.

Here is the Lambda Code (including example of the Inputs)

# // ks-update-psscene-cmr-json
# arn:aws:lambda:us-west-2:339197775982:function:ks-update-psscene-cmr-json

import time
import random
import json
import boto3
import hashlib
import sys

s3 = boto3.client('s3')

# Setting this to True will create significant output to Cloudwatch Logs
SETTING__IS_OUTPUT_DEBUG_MODE = False # True # False

# When we need to print the output to the logs to see what is going on
def debug_print(str_to_print="", obj_out=None):
    if(SETTING__IS_OUTPUT_DEBUG_MODE == True):
        print(f'{str_to_print}:    {obj_out}')

# Without this, we hit the throtle limits when running the S3 batch operation.
def random_sleep():
    sleep_time = random.uniform(0.01, 0.1)  # Select a random number between 0.01 and 0.10
    debug_print(str_to_print="Sleeping for: " + str(sleep_time) + " seconds.")
    time.sleep(sleep_time)

# Convert the input json file name to a thumb name (all granules follow this same pattern)
# Convert:      # 20150601_090322_090c_cmr.old
# TO:           # 20150601_090322_090c-thumb.png
def get_expected_thumb_file_name_from_json_file_name(json_file_name=""):
    # Convert the "_cmr.old" json file to the "-thumb.png" filename
    thumb_file_name = json_file_name.replace('_cmr.old', '-thumb.png')  
    #
    debug_print(str_to_print="get_expected_thumb_file_name: (json_file_name): ", obj_out=json_file_name)            # 20150601_090322_090c_cmr.old
    debug_print(str_to_print="get_expected_thumb_file_name: (thumb_file_name): ", obj_out=thumb_file_name)          # 20150601_090322_090c-thumb.png
    return thumb_file_name

# Read the S3 file, calculate the Checksum, and then return the value.
def calculate_md5_checksum_from_s3(bucket_name="", file_key=""):
    # s3
    # Download the file content from s3
    obj = s3.get_object(Bucket=bucket_name, Key=file_key)
    file_content = obj['Body'].read()
    #
    # Initialize the MD5 hash object
    md5_hash = hashlib.md5()
    #
    # Update the hash object with the file content 
    md5_hash.update(file_content)
    #
    # I also need the file size in bytes
    head_obj = s3.head_object(Bucket=bucket_name, Key=file_key)
    file_size_in_bytes = head_obj['ContentLength']
    #
    # Return the checksum 
    return md5_hash.hexdigest(), file_size_in_bytes

# Examine each element in the input array, Once the item which is exactly "Name": "thumb", then return that index
def get_index_for__name__thumb(obj_array=[]):
    ret_index_int = -1
    current_index = 0
    for obj in obj_array:
        if(obj['Name'] == "thumb"):
            ret_index_int = current_index
            return ret_index_int
        current_index = current_index + 1
    return ret_index_int

# Examine each element in the input array, Once the item which is exactly "MimeType": "image/png", then return that index
def get_index_for__obsolete_thumb_related_url(obj_array=[]):
    ret_index_int = -1
    current_index = 0
    for obj in obj_array:
        if(obj['MimeType'] == "image/png"):
            ret_index_int = current_index
            return ret_index_int
        current_index = current_index + 1
    return ret_index_int

# This is the main function which does the updating.  
# There is a source file, a different destination file, an input checksum, and some other infos to be updated (according to: https://github.com/NASA-IMPACT/csdap-cumulus/issues/306 )
def update_json(src_bucket_name="", src_key_path="", dest_bucket_name="", dest_full_keypath="", thumb_checksum_value="", thumb_file_size_in_bytes="", thumb_file_name_only=""):
    #debug_print(str_to_print="TODO: ", obj_out="FINISH WRITING: def update_json(...)")

    # Double Checking the input
    debug_print(str_to_print="update_json: (src_bucket_name): ", obj_out=src_bucket_name)                       # csdap-cumulus-prod-internal
    debug_print(str_to_print="update_json: (src_key_path): ", obj_out=src_key_path)                             # kstest/cmr_backups/planet/PSScene3Band/20150601_090322_090c_cmr.old
    debug_print(str_to_print="update_json: (dest_bucket_name): ", obj_out=dest_bucket_name)                     # csdap-cumulus-prod-protected
    debug_print(str_to_print="update_json: (dest_full_keypath): ", obj_out=dest_full_keypath)                   # planet/PSScene3Band/20150601_090322_090c_cmr.json
    debug_print(str_to_print="update_json: (thumb_checksum_value): ", obj_out=thumb_checksum_value)             # 30590045634edcc9c3218d804a1c3220
    debug_print(str_to_print="update_json: (thumb_file_size_in_bytes): ", obj_out=thumb_file_size_in_bytes)     # 19948
    debug_print(str_to_print="update_json: (thumb_file_name_only): ", obj_out=thumb_file_name_only)             # aaaaaa

    # Open the Existing JSON file
    src_cmr_json__s3_obj        = s3.get_object(Bucket=src_bucket_name, Key=src_key_path)
    src_cmr_json__file_content  = src_cmr_json__s3_obj['Body'].read().decode('utf-8')
    #
    # Load the JSON content into a Python dictionary
    src_cmr_json__json_content = json.loads(src_cmr_json__file_content)

    # DO THE MODIFICATIONS
    #
    # The First change, is to find the item that is ONLY {"Name": "thumb"} and replace it with {"Name": "thumb", "plus_a few other properties"}
    # # This item is stored in an array, so we are going to replace the entire array, first by copying the original one, and then changing the local variable, and then finally updating the JSON.
    #new_key__DataGranule_ArchiveAndDistributionInformation = src_cmr_json__json_content["DataGranule"]["ArchiveAndDistributionInformation"] #[]
    #
    # First, get the correct index on the array that we need to change
    correct_array_index_to_change = get_index_for__name__thumb(obj_array=src_cmr_json__json_content["DataGranule"]["ArchiveAndDistributionInformation"])
    debug_print(str_to_print="update_json: (correct_array_index_to_change): ", obj_out=correct_array_index_to_change)       # 0 
    #
    # Debugging, let's look to make sure we got the right one.
    debug_obj__correct_array_obj = src_cmr_json__json_content["DataGranule"]["ArchiveAndDistributionInformation"][correct_array_index_to_change]
    debug_print(str_to_print="update_json: OLD (debug_obj__correct_array_obj): ", obj_out=debug_obj__correct_array_obj)     # {'Name': 'thumb'}
    #
    #new_key__DataGranule_ArchiveAndDistributionInformation_item = {}
    #new_dg_aadi_obj = {} # shorter name than "new_key__DataGranule_ArchiveAndDistributionInformation_item"
    checksum_obj = {"Value": f'{thumb_checksum_value}', "Algorithm": "MD5"}
    new_dg_aadi_obj = {"Name": f'{thumb_file_name_only}', "SizeInBytes": int(thumb_file_size_in_bytes), "MimeType": "image/png", "Checksum": checksum_obj } 
    debug_print(str_to_print="update_json: (new_dg_aadi_obj): ", obj_out=new_dg_aadi_obj)                                   # {'Name': '20150601_090322_090c-thumb.png', 'SizeInBytes': 19948, 'MimeType': 'image/png', 'Checksum': {'Value': '30590045634edcc9c3218d804a1c3220', 'Algorithm': 'MD5'}}
    #
    # Replace the object at the correct index with the new one.
    src_cmr_json__json_content["DataGranule"]["ArchiveAndDistributionInformation"][correct_array_index_to_change] = new_dg_aadi_obj
    obj_after_changing_dg_key = src_cmr_json__json_content["DataGranule"]["ArchiveAndDistributionInformation"][correct_array_index_to_change]
    debug_print(str_to_print="update_json: (obj_after_changing_dg_key): ", obj_out=obj_after_changing_dg_key)               # {'Name': '20150601_090322_090c-thumb.png', 'SizeInBytes': 19948, 'MimeType': 'image/png', 'Checksum': {'Value': '30590045634edcc9c3218d804a1c3220', 'Algorithm': 'MD5'}}

    # TODO -- FINISH CREATING THE NEW OBJECT HERE

    # Next, there is an entry in the RelatedUrls that needs to be just removed (Cumulus will recreate this one correctly during ingest)
    related_urls_index_to_remove = get_index_for__obsolete_thumb_related_url(obj_array=src_cmr_json__json_content["RelatedUrls"])
    debug_print(str_to_print="update_json: (related_urls_index_to_remove): ", obj_out=related_urls_index_to_remove)         # 0
    #
    debug_obj__array_obj_to_remove = src_cmr_json__json_content["RelatedUrls"][related_urls_index_to_remove]
    debug_print(str_to_print="update_json: (debug_obj__array_obj_to_remove): ", obj_out=debug_obj__array_obj_to_remove)     # {'URL': 'https://ss-ingest-dev-thumbnails115905a6-1qhb7abvyj8pc.s3.amazonaws.com/planet/PSScene3Band-20150601_090322_090c/thumb', 'Type': 'GET RELATED VISUALIZATION', 'Description': 'True color reflectance quick view', 'Format': 'PNG', 'MimeType': 'image/png'}
    #
    # Now it's time to delete this object from the related_urls array.
    del src_cmr_json__json_content["RelatedUrls"][related_urls_index_to_remove]
    #
    # Check and make sure that the object at the current index is NOT the same object (note, this may cause a problem if there was only one index -- so wrap in a try/except)
    try:
        rel_urls_obj_after_deleting_entry = src_cmr_json__json_content["RelatedUrls"][related_urls_index_to_remove]
        debug_print(str_to_print="update_json: (rel_urls_obj_after_deleting_entry): ", obj_out=rel_urls_obj_after_deleting_entry)                       # {'URL': 'https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN.tif', 'Description': 'Download 20150601_090322_090c_1B_Analytic_DN.tif', 'Type': 'GET DATA'}
    except:
        debug_print(str_to_print=f'update_json: Error Getting the latest (different) object at index: {related_urls_index_to_remove}', obj_out="")      
    #print(f'TODO --- REMOVE THE RELATED URLS OBJ.. CONTINUE HERE')
    #print(f'TODO --- REMOVE THE RELATED URLS OBJ.. CONTINUE HERE')

    #
    #debug_print(str_to_print="TODO: ", obj_out="FINISH WRITING: def update_json(...) -- Actual JSON modifications here")
    #src_cmr_json__json_content['some_key'] = 'some_val'

    # Convert the modified dictionary back to a JSON string 
    #updated_cmr_json__file_content = json.dumps(src_cmr_json__json_content, indent=4)   # FOR DEBUGGING -- No Indent when saving back to S3
    updated_cmr_json__file_content = json.dumps(src_cmr_json__json_content)

    # Save the modified JSON to the destination S3 bucket
    # UNCOMMENTING THE LINE BELOW WILL SAVE OVER THE DESTINATION CMR JSON FILES... ONLY DO THIS WHEN THIS WHOLE SCRIPT IS READY!
    s3.put_object(Bucket=dest_bucket_name, Key=dest_full_keypath, Body=updated_cmr_json__file_content)

    # Update the Logs
    debug_print(str_to_print=f'update_json: JSON should now be updated and saved to the new location', obj_out=f'{dest_bucket_name}/{dest_full_keypath}')  # csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_cmr.json     

    #debug_print(str_to_print="update_json: (src_cmr_json__json_content): ", obj_out=src_cmr_json__json_content)        # aaa
    #debug_print(str_to_print="update_json: (updated_cmr_json__file_content): ", obj_out=updated_cmr_json__file_content)        # aaa

def lambda_handler(event, context):
    run_did_fail = False
    err_info = ""
    success_info = ""
    try:
        debug_print(str_to_print="Starting a new run")

        # Looking at the Event Object:
        debug_print(str_to_print="Event Object", obj_out=event)

        # Extract bucket name and key from the event
        s3BucketArn = event['tasks'][0]['s3BucketArn']
        s3Key       = event['tasks'][0]['s3Key']
        debug_print(str_to_print="s3BucketArn", obj_out=s3BucketArn)                                        # arn:aws:s3:::csdap-cumulus-prod-internal
        debug_print(str_to_print="s3Key", obj_out=s3Key)                                                    # kstest/cmr_backups/planet/PSScene3Band/20150601_090322_090c_cmr.old

        # Split the CSV line to get the bucket and key
        src_bucket_name = s3BucketArn.split(':::')[1]
        src_key_path    = s3Key

        # Strip any extra spaces and quotes 
        src_bucket_name = src_bucket_name.strip()
        src_key_path = src_key_path.strip()
        debug_print(str_to_print="src_bucket_name", obj_out=src_bucket_name)                                # csdap-cumulus-prod-internal
        debug_print(str_to_print="src_key_path", obj_out=src_key_path)                                      # kstest/cmr_backups/planet/PSScene3Band/20150601_090322_090c_cmr.old

        # Extract the Source file name 
        input_filename_only     = src_key_path.split("/")[-1]
        debug_print(str_to_print="input_filename_only", obj_out=input_filename_only)                        # 20150601_090322_090c_cmr.old

        # Destination Keypaths / Filenames
        # Generate the existing file destination name from the .old file.  
        #dest_1__file_name_only = input_filename_only.replace(".json", ".old")
        dest_1__file_name_only = input_filename_only.replace(".old", ".json")
        #
        #dest_1__root_dir_keypath = f'kstest/cmr_backups/planet/PSScene3Band/'
        #dest_1__root_dir_keypath = f'kstest/lambda_test/planet/PSScene3Band/'
        dest_1__root_dir_keypath = f'planet/PSScene3Band/'  # f'planet/PSScene3Band/'
        #
        #dest_1__bucket_name = f'csdap-cumulus-prod-internal'
        #dest_1__bucket_name = f'csdap-cumulus-prod-public'
        dest_1__bucket_name = f'csdap-cumulus-prod-protected'
        #
        dest_1__full_keypath = f'{dest_1__root_dir_keypath}{dest_1__file_name_only}'
        #
        debug_print(str_to_print="dest_1__bucket_name", obj_out=dest_1__bucket_name)                        # csdap-cumulus-prod-protected
        debug_print(str_to_print="dest_1__root_dir_keypath", obj_out=dest_1__root_dir_keypath)              # planet/PSScene3Band/
        debug_print(str_to_print="dest_1__file_name_only", obj_out=dest_1__file_name_only)                  # 20150601_090322_090c_cmr.json
        debug_print(str_to_print="dest_1__full_keypath", obj_out=dest_1__full_keypath)                      # planet/PSScene3Band/20150601_090322_090c_cmr.json

        # Now get the details for the Thumb file 
        # Get the Expected thumb filename 
        expected_thumb__bucket_name     = dest_1__bucket_name
        expected_thumb__filename_only   = get_expected_thumb_file_name_from_json_file_name(json_file_name=input_filename_only)
        #
        # The Thumbfile should sit side by side along with the original cmr.json file (so I can use the same desitnation parts to generate it.)
        expected_thumb__full_keypath    = f'{dest_1__root_dir_keypath}{expected_thumb__filename_only}'
        #
        debug_print(str_to_print="expected_thumb__bucket_name", obj_out=expected_thumb__bucket_name)                # csdap-cumulus-prod-protected
        debug_print(str_to_print="expected_thumb__filename_only", obj_out=expected_thumb__filename_only)            # 20150601_090322_090c-thumb.png
        debug_print(str_to_print="expected_thumb__full_keypath", obj_out=expected_thumb__full_keypath)              # planet/PSScene3Band/20150601_090322_090c-thumb.png

        # Sleep for a very short amount of time to prevent throttle limit -- BEFORE ANY S3 Operations
        random_sleep()

        # Calculate the Checksum of the thumbfile
        expected_thumb__checksum_value, expected_thumb__file_size_in_bytes  = calculate_md5_checksum_from_s3(bucket_name=expected_thumb__bucket_name, file_key=expected_thumb__full_keypath)
        #
        debug_print(str_to_print="expected_thumb__checksum_value", obj_out=expected_thumb__checksum_value)          # 30590045634edcc9c3218d804a1c3220
        debug_print(str_to_print="expected_thumb__file_size_in_bytes", obj_out=expected_thumb__file_size_in_bytes)  # 19948

        # Update and Save the JSON according to the ticket specs
        #update_json(thumb_checksum_value=expected_thumb__checksum_value, params="TODO_MORE_PARAMS_HERE")
        update_json(src_bucket_name=src_bucket_name, src_key_path=src_key_path, dest_bucket_name=dest_1__bucket_name, dest_full_keypath=dest_1__full_keypath, thumb_checksum_value=expected_thumb__checksum_value, thumb_file_size_in_bytes=expected_thumb__file_size_in_bytes, thumb_file_name_only=expected_thumb__filename_only)

        # WARNING -- UNDER CONSTRUCTION --
        #print(f'We are still under construction... do nothing yet!')

        # Copy the file to it's destinations
        #copy_source = {'Bucket': src_bucket_name, 'Key': src_key_path}
        #s3.copy_object(CopySource=copy_source, Bucket=dest_1__bucket_name, Key=dest_1__full_keypath)

        # Passing the invocation ID back in the success info.
        return {
            'statusCode': 200,
            'invocationSchemaVersion': event['invocationSchemaVersion'],
            'invocationId': event['invocationId'],
            'results': [
                {
                    'taskId': event['tasks'][0]['taskId'],
                    'resultCode': 'Succeeded',
                    'resultString': 'Copy Operations completed successfully'
                }
            ]
        }
    except:
        run_did_fail    = True
        success_info    = ""
        err_info        = str(sys.exc_info())
        #
        return {
            'statusCode': 500,
            'err_info': f'{err_info}'
        }

    # return {
    #     'err_info': f'{err_info}',
    #     'success_info': f'{success_info}'
    # }

# EXAMPLE of the Test Input
# {
#   "invocationId": "some_long_string",
#   "job": {
#     "id": "e7306709-ea94-4dc0-863b-5c0d1bd20ee3"
#   },
#   "tasks": [
#     {
#       "taskId": "AAAAAAAAAAExFKoqBbA5bbIDWZB9c7NGhU0gGZLhY6jh/Lp6RiPJDFpU9bJ3KtvjxmOl9BwUPDHR9+qXkcXkYS2PO0Rb9ja6QTGRqWG7NHM4/xuLk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPA8as/pqRAyKhor83bk0fVI/rvZWwQPPWQmf4Y1aqhSd0ao/kf2qhlY99oOHNYWsJ3OedeWy/2d52K3RyadUDRARTLHqhs6hYl0qcPDW9pEY+cn8v9h8mHOZY4dCslCDrUoowtGb4hvenUC+fsdzkqb+x5k4THjuf3iLFxNMBkGtPrx5EWH5AviYZn3vo95ZioT9O2zIkmBgOq/kxNglsUFfwZzw2aRx4jQtRSR3BAmnA6sWFsPfslJNInYe1fGm4142II9dNR41lTyKQlmw/1DUieXEyVREEy3YLkewSDNzW+EOYKJjKrXwpKc+1yISxJrVJTAwWC0+pG/MaZlLBR3oWjBP33zOZTb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ",
#       "s3BucketArn": "arn:aws:s3:::csdap-cumulus-prod-internal",
#       "s3Key": "kstest/cmr_backups/planet/PSScene3Band/20150601_090322_090c_cmr.old",
#       "s3VersionId": "None"
#     }
#   ],
#   "invocationSchemaVersion": "1.0"
# }

# EXAMPLE of OLD CMR JSON (that we need to change)

# // EXPANDED
# {
#     "GranuleUR": "PSScene3Band-20150601_090322_090c",
#     "ProviderDates": 
#     [
#       {
#           "Date": "2022-01-25T16:57:20.357249+00:00",
#           "Type": "Insert"
#       },
#       {
#           "Date": "2022-01-25T16:57:20.357249+00:00",
#           "Type": "Update"
#       }
#   ],
#     "CollectionReference":
#     {
#         "ShortName": "PSScene3Band",
#         "Version": "1"
#     },
#     "AccessConstraints":
#     {
#         "Description": "Access restricted to users approved by CSDA Program",
#         "Value": 1
#     },
#     "DataGranule":
#     {
#         "ArchiveAndDistributionInformation": 
#         [
#           {
#               "Name": "thumb"
#           },
#           {
#               "Name": "20150601_090322_090c_1B_Analytic_DN.tif",
#               "SizeInBytes": 57024611,
#               "MimeType": "image/tiff",
#               "Checksum":
#               {
#                   "Value": "a26b7f8d42147c7fd52f8f5f32ab11a8",
#                   "Algorithm": "MD5"
#               }
#           },
#           {
#               "Name": "20150601_090322_090c_1B_Analytic_DN_udm.tif",
#               "SizeInBytes": 307548,
#               "MimeType": "image/tiff",
#               "Checksum":
#               {
#                   "Value": "abb11e9795cb8136b61109199cb8c2f3",
#                   "Algorithm": "MD5"
#               }
#           },
#           {
#               "Name": "20150601_090322_090c_1B_Analytic_DN_metadata.xml",
#               "SizeInBytes": 8793,
#               "MimeType": "text/xml",
#               "Checksum":
#               {
#                   "Value": "bb084575c5ed2ab33a1f2e5e806afa5e",
#                   "Algorithm": "MD5"
#               }
#           },
#           {
#               "Name": "20150601_090322_090c_metadata.json",
#               "SizeInBytes": 825,
#               "MimeType": "application/json",
#               "Checksum":
#               {
#                   "Value": "de208a42f5ff7efe3f805ad87bd2f161",
#                   "Algorithm": "MD5"
#               }
#           },
#           {
#               "Name": "20150601_090322_090c_1B_Analytic_DN_RPC.TXT",
#               "SizeInBytes": 3403,
#               "MimeType": "text/plain",
#               "Checksum":
#               {
#                   "Value": "6ba357f3ac104b5f6ffa3a8aee74a949",
#                   "Algorithm": "MD5"
#               }
#           }
#       ],
#         "DayNightFlag": "Day",
#         "ProductionDateTime": "2015-06-01T09:03:22.174139+00:00"
#     },
#     "TemporalExtent":
#     {
#         "SingleDateTime": "2015-06-01T09:03:22.174139+00:00"
#     },
#     "SpatialExtent":
#     {
#         "HorizontalSpatialDomain":
#         {
#             "ZoneIdentifier": "32627",
#             "Geometry":
#             {
#                 "GPolygons": [
#                 {
#                     "Boundary":
#                     {
#                         "Points": [
#                         {
#                             "Longitude": -21.753728907243,
#                             "Latitude": 79.775286400988
#                         },
#                         {
#                             "Longitude": -22.220305656715,
#                             "Latitude": 79.838586366197
#                         },
#                         {
#                             "Longitude": -22.749111122059,
#                             "Latitude": 79.713817638297
#                         },
#                         {
#                             "Longitude": -22.284756508606,
#                             "Latitude": 79.651261099217
#                         },
#                         {
#                             "Longitude": -21.753728907243,
#                             "Latitude": 79.775286400988
#                         }]
#                     }
#                 }]
#             }
#         }
#     },
#     "Platforms": [
#     {
#         "ShortName": "PlanetScope",
#         "Instruments": [
#         {
#             "ShortName": "PS0"
#         }]
#     }],
#     "Projects": [
#     {
#         "ShortName": "CSDA"
#     }],
#     "RelatedUrls": 
#     [
#       {
#           "URL": "https://ss-ingest-dev-thumbnails115905a6-1qhb7abvyj8pc.s3.amazonaws.com/planet/PSScene3Band-20150601_090322_090c/thumb",
#           "Type": "GET RELATED VISUALIZATION",
#           "Description": "True color reflectance quick view",
#           "Format": "PNG",
#           "MimeType": "image/png"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN.tif",
#           "Description": "Download 20150601_090322_090c_1B_Analytic_DN.tif",
#           "Type": "GET DATA"
#       },
#       {
#           "URL": "s3://csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN.tif",
#           "Description": "This link provides direct download access via S3 to the granule",
#           "Type": "GET DATA VIA DIRECT ACCESS"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN_RPC.TXT",
#           "Description": "Download 20150601_090322_090c_1B_Analytic_DN_RPC.TXT",
#           "Type": "GET DATA"
#       },
#       {
#           "URL": "s3://csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN_RPC.TXT",
#           "Description": "This link provides direct download access via S3 to the granule",
#           "Type": "GET DATA VIA DIRECT ACCESS"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN_metadata.xml",
#           "Description": "Download 20150601_090322_090c_1B_Analytic_DN_metadata.xml",
#           "Type": "GET DATA"
#       },
#       {
#           "URL": "s3://csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN_metadata.xml",
#           "Description": "This link provides direct download access via S3 to the granule",
#           "Type": "GET DATA VIA DIRECT ACCESS"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN_udm.tif",
#           "Description": "Download 20150601_090322_090c_1B_Analytic_DN_udm.tif",
#           "Type": "GET DATA"
#       },
#       {
#           "URL": "s3://csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_1B_Analytic_DN_udm.tif",
#           "Description": "This link provides direct download access via S3 to the granule",
#           "Type": "GET DATA VIA DIRECT ACCESS"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_metadata.json",
#           "Description": "Download 20150601_090322_090c_metadata.json",
#           "Type": "GET DATA"
#       },
#       {
#           "URL": "s3://csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_metadata.json",
#           "Description": "This link provides direct download access via S3 to the granule",
#           "Type": "GET DATA VIA DIRECT ACCESS"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_thumb",
#           "Description": "Download 20150601_090322_090c_thumb",
#           "Type": "GET DATA"
#       },
#       {
#           "URL": "s3://csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_thumb",
#           "Description": "This link provides direct download access via S3 to the granule",
#           "Type": "GET DATA VIA DIRECT ACCESS"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_cmr.json",
#           "Description": "Download 20150601_090322_090c_cmr.json",
#           "Type": "EXTENDED METADATA"
#       },
#       {
#           "URL": "s3://csdap-cumulus-prod-protected/planet/PSScene3Band/20150601_090322_090c_cmr.json",
#           "Description": "This link provides direct download access via S3 to the granule",
#           "Type": "EXTENDED METADATA"
#       },
#       {
#           "URL": "https://data.csda.earthdata.nasa.gov/s3credentials",
#           "Description": "api endpoint to retrieve temporary credentials valid for same-region direct s3 access",
#           "Type": "VIEW RELATED INFORMATION"
#       }
#   ],
#     "MetadataSpecification":
#     {
#         "URL": "https://cdn.earthdata.nasa.gov/umm/granule/v1.6.3",
#         "Name": "UMM-G",
#         "Version": "1.6.3"
#     }
# }

# Batch Input (Small Sample)
# First 5 lines
# "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203931_1039_cmr.old"
# "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203932_1039_cmr.old"
# "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203933_1039_cmr.old"
# "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203934_1039_cmr.old"
# "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203936_1039_cmr.old"

# Also - Instead of making a whole new manifest for the backed up files, 
# Here is a local python script I made and used to convert the manifest paths to point to the backups instead of the original
# So to be clear, this Lambda above uses the backed up CMR files as their inputs
#
# make_cmr_backup_list_from_original_manifest.py
#
# python make_cmr_backup_list_from_original_manifest.py
#
#
#
# SETTING__Source_File = 'cmrjson_only_list/psscene_cmr_json_files_only_list__2024-08-15.txt'
# SETTING__Out_file    = 'cmrjson_only_list/BACKEDUP_psscene_cmr_json_files_only_list__2024-08-15.txt'
#
# print(f'')
# print(f'Making a new list of backedup CMR files!')
# print(f'')
#
#
# # Main Function
# def run_process():
#   
#   # Open the first file and make an array
#   input_lines = []
#   with open(SETTING__Source_File, 'r') as in_file:
#       input_lines = in_file.readlines()
#
#   # Modify each line.
#   output_lines = []
#   for in_line in input_lines:
#       # Replace the bucket name, part of the key path, and the file name so that it matches what the backed up files are.
#       # # Example:
#       # This:                 "csdap-cumulus-prod-protected","planet/PSScene3Band/20190917_203931_1039_cmr.json"
#       # Should become This:   "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203931_1039_cmr.old"
#       out_line = in_line
#       out_line = out_line.replace('csdap-cumulus-prod-protected',     'csdap-cumulus-prod-internal')
#       out_line = out_line.replace('planet/PSScene3Band/',             'kstest/cmr_backups/planet/PSScene3Band/')
#       out_line = out_line.replace('_cmr.json',                        '_cmr.old')
#       #
#       output_lines.append(out_line)
#
#   # Save the new lines to the output file. 
#   with open(SETTING__Out_file, 'w') as out_file:
#       out_file.writelines(output_lines)
#
#   # Output Message when done.
#   print(f'')
#   print(f'Done')
#   print(f'')
#
#
#
# # Run the main process.
# run_process()
#
#
#
# # "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203932_1039_cmr.old"
#
# # First 5 lines
# # "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203931_1039_cmr.old"
# # "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203932_1039_cmr.old"
# # "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203933_1039_cmr.old"
# # "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203934_1039_cmr.old"
# # "csdap-cumulus-prod-internal","kstest/cmr_backups/planet/PSScene3Band/20190917_203936_1039_cmr.old"

NASA-IMPACT / csdap-cumulus

Correct entries for thumbnails in cmr.json files in PSScene3Band collection #306