terascope / teraslice

Scalable data processing pipelines in JavaScript
https://terascope.github.io/teraslice/
Apache License 2.0
50 stars 13 forks source link

Add and Set Prometheus Metrics #3602

Closed sotojn closed 2 months ago

sotojn commented 2 months ago

This PR makes the following changes:

Ref to issue #3600

sotojn commented 2 months ago

After running two jobs on the cluster, stopping one, and pausing one.

This is the result as of right now when you hit the /metrics endpoint: (excluding node metrics)

# HELP teraslice_cluster_master_controller_workers_active Number of Teraslice workers actively processing slices.
# TYPE teraslice_cluster_master_controller_workers_active gauge
teraslice_cluster_master_controller_workers_active{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 1
teraslice_cluster_master_controller_workers_active{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 0

# HELP teraslice_cluster_master_controller_workers_available Number of Teraslice workers running and waiting for work.
# TYPE teraslice_cluster_master_controller_workers_available gauge
teraslice_cluster_master_controller_workers_available{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_controller_workers_available{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 3

# HELP teraslice_cluster_master_controller_workers_joined Total number of Teraslice workers that have joined the execution controller for this job.
# TYPE teraslice_cluster_master_controller_workers_joined gauge
teraslice_cluster_master_controller_workers_joined{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 1
teraslice_cluster_master_controller_workers_joined{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 3

# HELP teraslice_cluster_master_controller_workers_reconnected Total number of Teraslice workers that have reconnected to the execution controller for this job.
# TYPE teraslice_cluster_master_controller_workers_reconnected gauge
teraslice_cluster_master_controller_workers_reconnected{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_controller_workers_reconnected{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 0

# HELP teraslice_cluster_master_controller_workers_disconnected Total number of Teraslice workers that have disconnected from execution controller for this job.
# TYPE teraslice_cluster_master_controller_workers_disconnected gauge
teraslice_cluster_master_controller_workers_disconnected{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_controller_workers_disconnected{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 0

# HELP teraslice_cluster_master_execution_info Information about Teraslice execution.
# TYPE teraslice_cluster_master_execution_info gauge

# HELP teraslice_cluster_master_controller_slicers_count Number of execution controllers (slicers) running for this execution.
# TYPE teraslice_cluster_master_controller_slicers_count gauge
teraslice_cluster_master_controller_slicers_count{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 1
teraslice_cluster_master_controller_slicers_count{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 1

# HELP teraslice_cluster_master_query_duration_seconds Total time to complete the named query, in seconds.
# TYPE teraslice_cluster_master_query_duration_seconds gauge

# HELP teraslice_cluster_master_execution_cpu_limit CPU core limit for a Teraslice worker container.
# TYPE teraslice_cluster_master_execution_cpu_limit gauge

# HELP teraslice_cluster_master_execution_cpu_request Requested number of CPU cores for a Teraslice worker container.
# TYPE teraslice_cluster_master_execution_cpu_request gauge

# HELP teraslice_cluster_master_execution_memory_limit Memory limit for Teraslice a worker container.
# TYPE teraslice_cluster_master_execution_memory_limit gauge

# HELP teraslice_cluster_master_execution_memory_request Requested amount of memory for a Teraslice worker container.
# TYPE teraslice_cluster_master_execution_memory_request gauge

# HELP teraslice_cluster_master_execution_status Current status of the Teraslice execution.
# TYPE teraslice_cluster_master_execution_status gauge
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="pending",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="scheduling",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="initializing",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="running",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="recovering",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="failing",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="paused",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="stopping",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="completed",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="stopped",name="ts-dev1",assignment="cluster_master"} 1
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="rejected",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="failed",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",status="terminated",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="pending",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="scheduling",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="initializing",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="running",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="recovering",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="failing",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="paused",name="ts-dev1",assignment="cluster_master"} 1
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="stopping",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="completed",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="stopped",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="rejected",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="failed",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_execution_status{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",status="terminated",name="ts-dev1",assignment="cluster_master"} 0

# HELP teraslice_cluster_master_controller_slices_processed Number of slices processed.
# TYPE teraslice_cluster_master_controller_slices_processed gauge
teraslice_cluster_master_controller_slices_processed{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 783
teraslice_cluster_master_controller_slices_processed{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 269

# HELP teraslice_cluster_master_controller_slices_failed Number of slices failed.
# TYPE teraslice_cluster_master_controller_slices_failed gauge
teraslice_cluster_master_controller_slices_failed{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 0
teraslice_cluster_master_controller_slices_failed{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 0

# HELP teraslice_cluster_master_controller_slices_queued Number of slices queued for processing.
# TYPE teraslice_cluster_master_controller_slices_queued gauge
teraslice_cluster_master_controller_slices_queued{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 3
teraslice_cluster_master_controller_slices_queued{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 9

# HELP teraslice_cluster_master_execution_created_timestamp_seconds Execution creation time.
# TYPE teraslice_cluster_master_execution_created_timestamp_seconds gauge
teraslice_cluster_master_execution_created_timestamp_seconds{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 1714603695.652
teraslice_cluster_master_execution_created_timestamp_seconds{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 1714604012.861

# HELP teraslice_cluster_master_execution_updated_timestamp_seconds Execution update time.
# TYPE teraslice_cluster_master_execution_updated_timestamp_seconds gauge
teraslice_cluster_master_execution_updated_timestamp_seconds{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 1714604037.386
teraslice_cluster_master_execution_updated_timestamp_seconds{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 1714604059.425

# HELP teraslice_cluster_master_execution_slicers Number of slicers defined on the execution.
# TYPE teraslice_cluster_master_execution_slicers gauge
teraslice_cluster_master_execution_slicers{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 1
teraslice_cluster_master_execution_slicers{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 1

# HELP teraslice_cluster_master_execution_workers Number of workers defined on the execution.  Note that the number of actual workers can differ from this value.
# TYPE teraslice_cluster_master_execution_workers gauge
teraslice_cluster_master_execution_workers{ex_id="a304b828-d7e9-4da9-81a4-806ed478a1ad",job_id="d5e8fa4c-49e3-4d3c-a082-5a27a3a11d87",job_name="test-job-1",name="ts-dev1",assignment="cluster_master"} 1
teraslice_cluster_master_execution_workers{ex_id="9c0ea368-5841-49d4-b886-6550dd0257be",job_id="f93d6350-94ed-422d-9a65-543923defa2f",job_name="test-job-2",name="ts-dev1",assignment="cluster_master"} 3

# HELP teraslice_cluster_master_info Information about Teraslice cluster master
# TYPE teraslice_cluster_master_info gauge
teraslice_cluster_master_info{arch="arm64",clustering_type="kubernetes",name="ts-dev1",node_version="v18.19.1",platform="linux",teraslice_version="1.3.1",assignment="cluster_master"} 1