aws-samples / aws-batch-runtime-monitoring

Serverless application to monitor an AWS Batch architecture through dashboards.
MIT No Attribution
58 stars 16 forks source link

(feat) Jobs placed dashboard: Use ECS task state change events for retrieving Batch jobs history #21

Closed devendra-d-chavan closed 1 year ago

devendra-d-chavan commented 1 year ago

Issue #, if available: #20

Description of changes:

Docs

Testing

Batch jobs placement dashboard

batch-job-monitoring-screenshot-1

Execution logs from ECSRunTask state machine execution

{
    "id": "1",
    "type": "ExecutionStarted",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "roleArn": "..."
    },
    "previous_event_id": "0",
    "event_timestamp": "1687310338632",
    "execution_arn": "..."
}
{
    "id": "2",
    "type": "ChoiceStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Is RunTask call on Batch?"
    },
    "previous_event_id": "0",
    "event_timestamp": "1687310338636",
    "execution_arn": "..."
}
{
    "id": "3",
    "type": "ChoiceStateExited",
    "details": {
        "name": "Is RunTask call on Batch?",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "2",
    "event_timestamp": "1687310338637",
    "execution_arn": "..."
}
{
    "id": "4",
    "type": "PassStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Select common fields"
    },
    "previous_event_id": "3",
    "event_timestamp": "1687310338637",
    "execution_arn": "..."
}
{
    "id": "5",
    "type": "PassStateExited",
    "details": {
        "name": "Select common fields",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "4",
    "event_timestamp": "1687310338639",
    "execution_arn": "..."
}
{
    "id": "6",
    "type": "TaskStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "SQS SendMessage RunTask Call"
    },
    "previous_event_id": "5",
    "event_timestamp": "1687310338639",
    "execution_arn": "..."
}
{
    "id": "7",
    "type": "TaskScheduled",
    "details": {
        "parameters": "{\"QueueUrl\":\"...\",\"MessageBody\":{\"Dimensions\":{\"ComputeEnvironment\":\"...\",\"JobQueue\":\"...\",\"ECSCluster\":\"...\"},\"Properties\":{\"ComputeEnvironment\":\"...\",\"JobQueue\":\"...\",\"ECSCluster\":\"...8\"},\"MetricName\":\"RunTask Call\",\"MetricTime\":\"2023-06-21T01:18:46Z\"}}",
        "region": "us-east-1",
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "6",
    "event_timestamp": "1687310338641",
    "execution_arn": "..."
}
{
    "id": "8",
    "type": "TaskStarted",
    "details": {
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "7",
    "event_timestamp": "1687310338662",
    "execution_arn": "..."
}
{
    "id": "9",
    "type": "TaskSucceeded",
    "details": {
        "output": "...",
        "outputDetails": {
            "truncated": false
        },
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "8",
    "event_timestamp": "1687310338709",
    "execution_arn": "..."
}
{
    "id": "10",
    "type": "TaskStateExited",
    "details": {
        "name": "SQS SendMessage RunTask Call",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "9",
    "event_timestamp": "1687310338710",
    "execution_arn": "..."
}
{
    "id": "11",
    "type": "PassStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Job Not Placed, Skipping"
    },
    "previous_event_id": "10",
    "event_timestamp": "1687310338710",
    "execution_arn": "..."
}
{
    "id": "12",
    "type": "PassStateExited",
    "details": {
        "name": "Job Not Placed, Skipping",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "11",
    "event_timestamp": "1687310338710",
    "execution_arn": "..."
}
{
    "id": "13",
    "type": "ExecutionSucceeded",
    "details": {
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "12",
    "event_timestamp": "1687310338710",
    "execution_arn": "..."
}

Execution logs from ECSTaskEvents state machine execution

{
    "id": "1",
    "type": "ExecutionStarted",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "roleArn": "..."
    },
    "previous_event_id": "0",
    "event_timestamp": "1687310363991",
    "execution_arn": "..."
}
{
    "id": "2",
    "type": "ChoiceStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Is RunTask call on Batch?"
    },
    "previous_event_id": "0",
    "event_timestamp": "1687310363994",
    "execution_arn": "..."
}
{
    "id": "3",
    "type": "ChoiceStateExited",
    "details": {
        "name": "Is RunTask call on Batch?",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "2",
    "event_timestamp": "1687310363995",
    "execution_arn": "..."
}
{
    "id": "4",
    "type": "PassStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Select common fields"
    },
    "previous_event_id": "3",
    "event_timestamp": "1687310363995",
    "execution_arn": "..."
}
{
    "id": "5",
    "type": "PassStateExited",
    "details": {
        "name": "Select common fields",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "4",
    "event_timestamp": "1687310363998",
    "execution_arn": "..."
}
{
    "id": "6",
    "type": "TaskStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "SQS SendMessage RunTask Call"
    },
    "previous_event_id": "5",
    "event_timestamp": "1687310363998",
    "execution_arn": "..."
}
{
    "id": "7",
    "type": "TaskScheduled",
    "details": {
        "parameters": "{\"QueueUrl\":\"...\",\"MessageBody\":{\"Dimensions\":{\"ComputeEnvironment\":\"...\",\"JobQueue\":\"...\",\"ECSCluster\":\"...\"},\"Properties\":{\"ComputeEnvironment\":\"...\",\"JobQueue\":\"...\",\"ECSCluster\":\"...\",\"JobId\":\"...:29\"},\"MetricName\":\"RunTask Call\",\"MetricTime\":\"2023-06-21T01:19:23Z\"}}",
        "region": "us-east-1",
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "6",
    "event_timestamp": "1687310364000",
    "execution_arn": "..."
}
{
    "id": "8",
    "type": "TaskStarted",
    "details": {
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "7",
    "event_timestamp": "1687310364026",
    "execution_arn": "..."
}
{
    "id": "9",
    "type": "TaskSucceeded",
    "details": {
        "output": "...",
        "outputDetails": {
            "truncated": false
        },
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "8",
    "event_timestamp": "1687310364072",
    "execution_arn": "..."
}
{
    "id": "10",
    "type": "TaskStateExited",
    "details": {
        "name": "SQS SendMessage RunTask Call",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "9",
    "event_timestamp": "1687310364072",
    "execution_arn": "..."
}
{
    "id": "11",
    "type": "ChoiceStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Is Array Job?"
    },
    "previous_event_id": "10",
    "event_timestamp": "1687310364072",
    "execution_arn": "..."
}
{
    "id": "12",
    "type": "ChoiceStateExited",
    "details": {
        "name": "Is Array Job?",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "11",
    "event_timestamp": "1687310364073",
    "execution_arn": "..."
}
{
    "id": "13",
    "type": "PassStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Yes, add job index"
    },
    "previous_event_id": "12",
    "event_timestamp": "1687310364073",
    "execution_arn": "..."
}
{
    "id": "14",
    "type": "PassStateExited",
    "details": {
        "name": "Yes, add job index",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "13",
    "event_timestamp": "1687310364074",
    "execution_arn": "..."
}
{
    "id": "15",
    "type": "PassStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Job is placed AvailabilityZone"
    },
    "previous_event_id": "14",
    "event_timestamp": "1687310364074",
    "execution_arn": "..."
}
{
    "id": "16",
    "type": "PassStateExited",
    "details": {
        "name": "Job is placed AvailabilityZone",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "15",
    "event_timestamp": "1687310364074",
    "execution_arn": "..."
}
{
    "id": "17",
    "type": "PassStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Job is placed ContainerInstanceId"
    },
    "previous_event_id": "16",
    "event_timestamp": "1687310364074",
    "execution_arn": "..."
}
{
    "id": "18",
    "type": "PassStateExited",
    "details": {
        "name": "Job is placed ContainerInstanceId",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "17",
    "event_timestamp": "1687310364075",
    "execution_arn": "..."
}
{
    "id": "19",
    "type": "TaskStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "DynamoDB GetItem Instance"
    },
    "previous_event_id": "18",
    "event_timestamp": "1687310364076",
    "execution_arn": "..."
}
{
    "id": "20",
    "type": "TaskScheduled",
    "details": {
        "parameters": "{\"TableName\":\"...\",\"Key\":{\"ContainerInstanceId\":{\"S\":\"...\"}},\"ProjectionExpression\":\"InstanceType,InstanceId\",\"ConsistentRead\":true}",
        "region": "us-east-1",
        "resource": "getItem",
        "resourceType": "dynamodb"
    },
    "previous_event_id": "19",
    "event_timestamp": "1687310364077",
    "execution_arn": "..."
}
{
    "id": "21",
    "type": "TaskStarted",
    "details": {
        "resource": "getItem",
        "resourceType": "dynamodb"
    },
    "previous_event_id": "20",
    "event_timestamp": "1687310364077",
    "execution_arn": "..."
}
{
    "id": "22",
    "type": "TaskSucceeded",
    "details": {
        "output": "...",
        "outputDetails": {
            "truncated": false
        },
        "resource": "getItem",
        "resourceType": "dynamodb"
    },
    "previous_event_id": "21",
    "event_timestamp": "1687310364138",
    "execution_arn": "..."
}
{
    "id": "23",
    "type": "TaskStateExited",
    "details": {
        "name": "DynamoDB GetItem Instance",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "22",
    "event_timestamp": "1687310364138",
    "execution_arn": "..."
}
{
    "id": "24",
    "type": "ChoiceStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "Did we get the instance?"
    },
    "previous_event_id": "23",
    "event_timestamp": "1687310364139",
    "execution_arn": "..."
}
{
    "id": "25",
    "type": "ChoiceStateExited",
    "details": {
        "name": "Did we get the instance?",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "24",
    "event_timestamp": "1687310364139",
    "execution_arn": "..."
}
{
    "id": "26",
    "type": "TaskStateEntered",
    "details": {
        "input": "...",
        "inputDetails": {
            "truncated": false
        },
        "name": "SQS SendMessage Placed"
    },
    "previous_event_id": "25",
    "event_timestamp": "1687310364139",
    "execution_arn": "..."
}
{
    "id": "27",
    "type": "TaskScheduled",
    "details": {
        "parameters": "{\"QueueUrl\":\"...\",\"MessageBody\":{\"Dimensions\":{\"ComputeEnvironment\":\"...\",\"JobQueue\":\"...\",\"AvailabilityZone\":\"us-east-1c\",\"ECSCluster\":\"...\",\"InstanceType\":\"c4.8xlarge\"},\"Properties\":{\"ComputeEnvironment\":\"...\",\"InstanceId\":\"i-...\",\"JobQueue\":\"...\",\"AvailabilityZone\":\"us-east-1c\",\"ECSCluster\":\"...\",\"InstanceType\":\"c4.8xlarge\",\"JobId\":[\"...:29\"]},\"MetricName\":\"Jobs Placed\",\"MetricTime\":\"2023-06-21T01:19:23Z\"}}",
        "region": "us-east-1",
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "26",
    "event_timestamp": "1687310364142",
    "execution_arn": "..."
}
{
    "id": "28",
    "type": "TaskStarted",
    "details": {
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "27",
    "event_timestamp": "1687310364142",
    "execution_arn": "..."
}
{
    "id": "29",
    "type": "TaskSucceeded",
    "details": {
        "output": "...",
        "outputDetails": {
            "truncated": false
        },
        "resource": "sendMessage",
        "resourceType": "sqs"
    },
    "previous_event_id": "28",
    "event_timestamp": "1687310364148",
    "execution_arn": "..."
}
{
    "id": "30",
    "type": "TaskStateExited",
    "details": {
        "name": "SQS SendMessage Placed",
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "29",
    "event_timestamp": "1687310364148",
    "execution_arn": "..."
}
{
    "id": "31",
    "type": "ExecutionSucceeded",
    "details": {
        "output": "...",
        "outputDetails": {
            "truncated": false
        }
    },
    "previous_event_id": "30",
    "event_timestamp": "1687310364148",
    "execution_arn": "..."
}

By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.