Research Task

done for stop_delay_views, segment_speed_views will regenerate automatically as needed

code for reference in case of future jams:

# Shell command to log all filenames and generation codes

# gsutil ls -a gs://calitp-analytics-data/data-analyses/rt_delay/stop_delay_views >> sdv.log
# Remove lines from top of file that don't have an actual filename as well as blank newline at bottom

with open('sdv.log') as file:
    lines = [line.rstrip() for line in file]

storage_client = storage.Client(project='cal-itp-data-infra')
from google.cloud import storage


def copy_file_archived_generation(
        bucket_name, blob_name, destination_bucket_name, destination_blob_name, generation
):
    """Copies a blob from one bucket to another with a new name with the same generation."""

    source_bucket = storage_client.bucket(bucket_name)
    source_blob = source_bucket.blob(blob_name)
    destination_bucket = storage_client.bucket(destination_bucket_name)

    blob_copy = source_bucket.copy_blob(
        source_blob, destination_bucket, destination_blob_name, source_generation=generation
    )

    print(
        "Generation {} of the blob {} in bucket {} copied to blob {} in bucket {}.".format(
            source_blob.generation,
            source_blob.name,
            source_bucket.name,
            blob_copy.name,
            destination_bucket.name,
        )
    )

for line in lines:
    blob_gen = '/'.join(line.split('/')[3:])
    blob = blob_gen.split('#')[0]
    gen = int(blob_gen.split('#')[1])

    if blob.split('/')[-1].split('_')[1][:4] != '2022': # restore old naming convention files
        print(blob + ' restore')
        copy_file_archived_generation('calitp-analytics-data', blob, 'calitp-analytics-data', blob, gen)
        time.sleep(1)

    elif blob.split('/')[-1].split('_')[1][:4] == '2022': # remove already renamed but misformatted files
        print(blob + ' rm')
        fs.rm(f'gs://calitp-analytics-data/{blob}')
        time.sleep(.5)

cal-itp / data-analyses

Research Task - fix rt intermediate files #536