astronomer / docs

This repository contains all content and code for Astro and Astronomer Software documentation.
52 stars 60 forks source link

Improve CI/CD templates #3868

Open lzdanski opened 2 weeks ago

lzdanski commented 2 weeks ago

Issue Type

Inaccurate, misleading, or out-of-date information

Links to Affected Docs

https://www.astronomer.io/docs/astro/ci-cd-templates/aws-s3

The Issue or Context

Tester hit a few problems in implementing the code example trying to figure out what needed to be updated in the code example from their AWS account or bucket.

Fix or Proposed Change

def lambda_handler(event, context) -> None:
    """Triggered by a change to a Cloud Storage bucket.
    :param event: Event payload.
    :param context: Metadata for the event.
    """
    base_dir = '/tmp/astro'
    download_to_local(BUCKET, 'mkdemo/dags/', f'{base_dir}/dags')
    download_to_local(BUCKET, 'mkdemo/cli_binary/', base_dir)

Is from our docs, but could we call out the following

def lambda_handler(event, context) -> None:
    """Triggered by a change to a Cloud Storage bucket.
    :param event: Event payload.
    :param context: Metadata for the event.
    """
    base_dir = '/tmp/astro'
    download_to_local(BUCKET, '{bucket_name}/dags/', f'{base_dir}/dags')
    download_to_local(BUCKET, '{bucket_name}/cli_binary/', base_dir)

Additional Notes

https://astronomer.slack.com/archives/C015V2JFKT5/p1718658478428379

Required Reviewers

bearsandrhinos

bearsandrhinos commented 1 week ago

A few other updates.

  1. In section 4 about updating the trust policy, we should call out that they need to update their account

Current

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "lambdacreateloggroup",
            "Effect": "Allow",
            "Action": "logs:CreateLogGroup",
            "Resource": "arn:aws:logs:us-east-1:123456789012:*"
        },
        {
            "Sid": "lambdaputlogevents",
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": [
                "arn:aws:logs:us-east-1:123456789012:log-group:/aws/lambda/s3_to_astro:*"
            ]
        },
        {
            "Sid": "bucketpermission",
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": "arn:aws:s3:::my-demo-bucket"
        }
    ]
}

Proposed

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "lambdacreateloggroup",
            "Effect": "Allow",
            "Action": "logs:CreateLogGroup",
            "Resource": "arn:aws:logs:us-east-1:{account_id}:*"
        },
        {
            "Sid": "lambdaputlogevents",
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": [
                "arn:aws:logs:us-east-1:{account_id}:log-group:/aws/lambda/{lambda_function_name}:*"
            ]
        },
        {
            "Sid": "bucketpermission",
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3::{bucket_name}",
                "arn:aws:s3::{bucket_name}/*"
        }
    ]
}
  1. Update the actual lambda function code. This has a slight modification to what is proposed above.

Here is the original

import boto3
import subprocess
import os
import tarfile

BUCKET = os.environ.get("BUCKET", "astronomer-field-engineering-demo")
s3 = boto3.resource('s3')
deploymentId = os.environ.get('ASTRO_DEPLOYMENT_ID')

def untar(filename: str, destination: str) -> None:
    with tarfile.open(filename) as file:
        file.extractall(destination)

def run_command(cmd: str) -> None:
    p = subprocess.Popen("set -x; " + cmd, shell=True)
    p.communicate()

def download_to_local(bucket_name: str, s3_folder: str, local_dir: str = None) -> None:
    """
    Download the contents of a folder directory
    Args:
        bucket_name: the name of the s3 bucket
        s3_folder: the folder path in the s3 bucket
        local_dir: a relative or absolute directory path in the local file system
    """
    bucket = s3.Bucket(bucket_name)
    for obj in bucket.objects.filter(Prefix=s3_folder):
        target = obj.key if local_dir is None \
            else os.path.join(local_dir, os.path.relpath(obj.key, s3_folder))
        if not os.path.exists(os.path.dirname(target)):
            os.makedirs(os.path.dirname(target))
        if obj.key[-1] == '/':
            continue
        bucket.download_file(obj.key, target)
    print("downloaded file")

def lambda_handler(event, context) -> None:
    """Triggered by a change to a Cloud Storage bucket.
    :param event: Event payload.
    :param context: Metadata for the event.
    """
    base_dir = '/tmp/astro'
    download_to_local(BUCKET, 'mkdemo/dags/', f'{base_dir}/dags')
    download_to_local(BUCKET, 'mkdemo/cli_binary/', base_dir)

    os.chdir(base_dir)
    untar('./astro_cli.tar.gz', '.')

    run_command('echo y | ./astro dev init')
    run_command(f"./astro deploy {deploymentId} --dags")

    return {"statusCode": 200}

And here is the proposed

import boto3
import subprocess
import os
import tarfile

BUCKET = os.environ.get("BUCKET", "{bucket_name}")
s3 = boto3.resource('s3')
deploymentId = os.environ.get('ASTRO_DEPLOYMENT_ID')

def untar(filename: str, destination: str) -> None:
    with tarfile.open(filename) as file:
        file.extractall(destination)

def run_command(cmd: str) -> None:
    p = subprocess.Popen("set -x; " + cmd, shell=True)
    p.communicate()

def download_to_local(bucket_name: str, s3_folder: str, local_dir: str = None) -> None:
    """
    Download the contents of a folder directory
    Args:
        bucket_name: the name of the s3 bucket
        s3_folder: the folder path in the s3 bucket
        local_dir: a relative or absolute directory path in the local file system
    """
    bucket = s3.Bucket(bucket_name)
    for obj in bucket.objects.filter(Prefix=s3_folder):
        target = obj.key if local_dir is None \
            else os.path.join(local_dir, os.path.relpath(obj.key, s3_folder))
        if not os.path.exists(os.path.dirname(target)):
            os.makedirs(os.path.dirname(target))
        if obj.key[-1] == '/':
            continue
        bucket.download_file(obj.key, target)
    print("downloaded file")

def lambda_handler(event, context) -> None:
    """Triggered by a change to a Cloud Storage bucket.
    :param event: Event payload.
    :param context: Metadata for the event.
    """
    base_dir = '/tmp/astro'
    if not os.path.isdir(base_dir):

        os.mkdir(base_dir)

    download_to_local(BUCKET, 'dags/', f'{base_dir}/dags')
    download_to_local(BUCKET, 'cli_binary/', base_dir)

    os.chdir(base_dir)
    untar('./astro_cli.tar.gz', '.')

    run_command('echo y | ./astro dev init')
    run_command(f"./astro deploy {deploymentId} --dags")

    return {"statusCode": 200}