Closed busma13 closed 2 months ago
Current Prom Metrics output for master:
# HELP process_cpu_user_seconds_total Total user CPU time spent in seconds.
# TYPE process_cpu_user_seconds_total counter
process_cpu_user_seconds_total{name="teracluster",assignment="cluster_master"} 0.590616
# HELP process_cpu_system_seconds_total Total system CPU time spent in seconds.
# TYPE process_cpu_system_seconds_total counter
process_cpu_system_seconds_total{name="teracluster",assignment="cluster_master"} 0.079571
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total{name="teracluster",assignment="cluster_master"} 0.670187
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds{name="teracluster",assignment="cluster_master"} 1714493919
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes{name="teracluster",assignment="cluster_master"} 162201600
# HELP nodejs_eventloop_lag_seconds Lag of event loop in seconds.
# TYPE nodejs_eventloop_lag_seconds gauge
# HELP nodejs_eventloop_lag_min_seconds The minimum recorded event loop delay.
# TYPE nodejs_eventloop_lag_min_seconds gauge
nodejs_eventloop_lag_min_seconds{name="teracluster",assignment="cluster_master"} 0.006434816
# HELP nodejs_eventloop_lag_max_seconds The maximum recorded event loop delay.
# TYPE nodejs_eventloop_lag_max_seconds gauge
nodejs_eventloop_lag_max_seconds{name="teracluster",assignment="cluster_master"} 0.015114239
# HELP nodejs_eventloop_lag_mean_seconds The mean of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_mean_seconds gauge
nodejs_eventloop_lag_mean_seconds{name="teracluster",assignment="cluster_master"} 0.010874028281365692
# HELP nodejs_eventloop_lag_stddev_seconds The standard deviation of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_stddev_seconds gauge
nodejs_eventloop_lag_stddev_seconds{name="teracluster",assignment="cluster_master"} 0.00042059594284707007
# HELP nodejs_eventloop_lag_p50_seconds The 50th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p50_seconds gauge
nodejs_eventloop_lag_p50_seconds{name="teracluster",assignment="cluster_master"} 0.011067391
# HELP nodejs_eventloop_lag_p90_seconds The 90th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p90_seconds gauge
nodejs_eventloop_lag_p90_seconds{name="teracluster",assignment="cluster_master"} 0.011116543
# HELP nodejs_eventloop_lag_p99_seconds The 99th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p99_seconds gauge
nodejs_eventloop_lag_p99_seconds{name="teracluster",assignment="cluster_master"} 0.011616255
# HELP nodejs_active_resources Number of active resources that are currently keeping the event loop alive, grouped by async resource type.
# TYPE nodejs_active_resources gauge
nodejs_active_resources{type="PipeWrap",name="teracluster",assignment="cluster_master"} 2
nodejs_active_resources{type="TTYWrap",name="teracluster",assignment="cluster_master"} 2
nodejs_active_resources{type="TCPSocketWrap",name="teracluster",assignment="cluster_master"} 2
nodejs_active_resources{type="Timeout",name="teracluster",assignment="cluster_master"} 11
nodejs_active_resources{type="Immediate",name="teracluster",assignment="cluster_master"} 1
# HELP nodejs_active_resources_total Total number of active resources.
# TYPE nodejs_active_resources_total gauge
nodejs_active_resources_total{name="teracluster",assignment="cluster_master"} 18
# HELP nodejs_active_handles Number of active libuv handles grouped by handle type. Every handle type is C++ class name.
# TYPE nodejs_active_handles gauge
nodejs_active_handles{type="Pipe",name="teracluster",assignment="cluster_master"} 1
nodejs_active_handles{type="WriteStream",name="teracluster",assignment="cluster_master"} 1
nodejs_active_handles{type="Socket",name="teracluster",assignment="cluster_master"} 3
nodejs_active_handles{type="ReadStream",name="teracluster",assignment="cluster_master"} 1
# HELP nodejs_active_handles_total Total number of active handles.
# TYPE nodejs_active_handles_total gauge
nodejs_active_handles_total{name="teracluster",assignment="cluster_master"} 6
# HELP nodejs_active_requests Number of active libuv requests grouped by request type. Every request type is C++ class name.
# TYPE nodejs_active_requests gauge
# HELP nodejs_active_requests_total Total number of active requests.
# TYPE nodejs_active_requests_total gauge
nodejs_active_requests_total{name="teracluster",assignment="cluster_master"} 0
# HELP nodejs_heap_size_total_bytes Process heap size from Node.js in bytes.
# TYPE nodejs_heap_size_total_bytes gauge
nodejs_heap_size_total_bytes{name="teracluster",assignment="cluster_master"} 71942144
# HELP nodejs_heap_size_used_bytes Process heap size used from Node.js in bytes.
# TYPE nodejs_heap_size_used_bytes gauge
nodejs_heap_size_used_bytes{name="teracluster",assignment="cluster_master"} 66628640
# HELP nodejs_external_memory_bytes Node.js external memory size in bytes.
# TYPE nodejs_external_memory_bytes gauge
nodejs_external_memory_bytes{name="teracluster",assignment="cluster_master"} 4174355
# HELP nodejs_heap_space_size_total_bytes Process heap space size total from Node.js in bytes.
# TYPE nodejs_heap_space_size_total_bytes gauge
nodejs_heap_space_size_total_bytes{space="read_only",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_total_bytes{space="old",name="teracluster",assignment="cluster_master"} 55459840
nodejs_heap_space_size_total_bytes{space="code",name="teracluster",assignment="cluster_master"} 3063808
nodejs_heap_space_size_total_bytes{space="map",name="teracluster",assignment="cluster_master"} 3162112
nodejs_heap_space_size_total_bytes{space="large_object",name="teracluster",assignment="cluster_master"} 9207808
nodejs_heap_space_size_total_bytes{space="code_large_object",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_total_bytes{space="new_large_object",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_total_bytes{space="new",name="teracluster",assignment="cluster_master"} 1048576
# HELP nodejs_heap_space_size_used_bytes Process heap space size used from Node.js in bytes.
# TYPE nodejs_heap_space_size_used_bytes gauge
nodejs_heap_space_size_used_bytes{space="read_only",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_used_bytes{space="old",name="teracluster",assignment="cluster_master"} 52294856
nodejs_heap_space_size_used_bytes{space="code",name="teracluster",assignment="cluster_master"} 2431200
nodejs_heap_space_size_used_bytes{space="map",name="teracluster",assignment="cluster_master"} 2053728
nodejs_heap_space_size_used_bytes{space="large_object",name="teracluster",assignment="cluster_master"} 8934600
nodejs_heap_space_size_used_bytes{space="code_large_object",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_used_bytes{space="new_large_object",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_used_bytes{space="new",name="teracluster",assignment="cluster_master"} 927952
# HELP nodejs_heap_space_size_available_bytes Process heap space size available from Node.js in bytes.
# TYPE nodejs_heap_space_size_available_bytes gauge
nodejs_heap_space_size_available_bytes{space="read_only",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_available_bytes{space="old",name="teracluster",assignment="cluster_master"} 2132528
nodejs_heap_space_size_available_bytes{space="code",name="teracluster",assignment="cluster_master"} 42784
nodejs_heap_space_size_available_bytes{space="map",name="teracluster",assignment="cluster_master"} 1051184
nodejs_heap_space_size_available_bytes{space="large_object",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_available_bytes{space="code_large_object",name="teracluster",assignment="cluster_master"} 0
nodejs_heap_space_size_available_bytes{space="new_large_object",name="teracluster",assignment="cluster_master"} 1030976
nodejs_heap_space_size_available_bytes{space="new",name="teracluster",assignment="cluster_master"} 103024
# HELP nodejs_version_info Node.js version info.
# TYPE nodejs_version_info gauge
nodejs_version_info{version="v18.19.1",major="18",minor="19",patch="1",name="teracluster",assignment="cluster_master"} 1
# HELP nodejs_gc_duration_seconds Garbage collection duration by kind, one of major, minor, incremental or weakcb.
# TYPE nodejs_gc_duration_seconds histogram
nodejs_gc_duration_seconds_bucket{le="0.001",kind="minor",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_bucket{le="0.01",kind="minor",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_bucket{le="0.1",kind="minor",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_bucket{le="1",kind="minor",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_bucket{le="2",kind="minor",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_bucket{le="5",kind="minor",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_bucket{le="+Inf",kind="minor",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_sum{kind="minor",name="teracluster",assignment="cluster_master"} 0.005336958020925522
nodejs_gc_duration_seconds_count{kind="minor",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_bucket{le="0.001",kind="incremental",name="teracluster",assignment="cluster_master"} 5
nodejs_gc_duration_seconds_bucket{le="0.01",kind="incremental",name="teracluster",assignment="cluster_master"} 6
nodejs_gc_duration_seconds_bucket{le="0.1",kind="incremental",name="teracluster",assignment="cluster_master"} 6
nodejs_gc_duration_seconds_bucket{le="1",kind="incremental",name="teracluster",assignment="cluster_master"} 6
nodejs_gc_duration_seconds_bucket{le="2",kind="incremental",name="teracluster",assignment="cluster_master"} 6
nodejs_gc_duration_seconds_bucket{le="5",kind="incremental",name="teracluster",assignment="cluster_master"} 6
nodejs_gc_duration_seconds_bucket{le="+Inf",kind="incremental",name="teracluster",assignment="cluster_master"} 6
nodejs_gc_duration_seconds_sum{kind="incremental",name="teracluster",assignment="cluster_master"} 0.0023947079926729203
nodejs_gc_duration_seconds_count{kind="incremental",name="teracluster",assignment="cluster_master"} 6
nodejs_gc_duration_seconds_bucket{le="0.001",kind="major",name="teracluster",assignment="cluster_master"} 0
nodejs_gc_duration_seconds_bucket{le="0.01",kind="major",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_bucket{le="0.1",kind="major",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_bucket{le="1",kind="major",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_bucket{le="2",kind="major",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_bucket{le="5",kind="major",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_bucket{le="+Inf",kind="major",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_sum{kind="major",name="teracluster",assignment="cluster_master"} 0.01382558299601078
nodejs_gc_duration_seconds_count{kind="major",name="teracluster",assignment="cluster_master"} 3
nodejs_gc_duration_seconds_bucket{le="0.001",kind="weakcb",name="teracluster",assignment="cluster_master"} 1
nodejs_gc_duration_seconds_bucket{le="0.01",kind="weakcb",name="teracluster",assignment="cluster_master"} 1
nodejs_gc_duration_seconds_bucket{le="0.1",kind="weakcb",name="teracluster",assignment="cluster_master"} 1
nodejs_gc_duration_seconds_bucket{le="1",kind="weakcb",name="teracluster",assignment="cluster_master"} 1
nodejs_gc_duration_seconds_bucket{le="2",kind="weakcb",name="teracluster",assignment="cluster_master"} 1
nodejs_gc_duration_seconds_bucket{le="5",kind="weakcb",name="teracluster",assignment="cluster_master"} 1
nodejs_gc_duration_seconds_bucket{le="+Inf",kind="weakcb",name="teracluster",assignment="cluster_master"} 1
nodejs_gc_duration_seconds_sum{kind="weakcb",name="teracluster",assignment="cluster_master"} 0.000018250003457069398
nodejs_gc_duration_seconds_count{kind="weakcb",name="teracluster",assignment="cluster_master"} 1
# HELP teraslice_cluster_master_info Information about Teraslice cluster master
# TYPE teraslice_cluster_master_info gauge
teraslice_cluster_master_info{arch="arm64",clustering_type="native",name="teracluster",node_version="v18.19.1",platform="darwin",teraslice_version="1.3.1",assignment="cluster_master"} 1
All of the comments about the asset storage stuff ... is important but unrelated to this PR.
This PR makes the following changes:
PromMetrics
andExporter
classes fromstandard-assets
intoterafoundation
promMetrics
as a field tocontext.apis.foundation
, allowing the promMetrics class to be used anywhere that usesterafoundation
. ApromMetricsProxy
is used to catch calls topromMetrics
if metrics are disabled.terafoundation
schema.ts
andjob-components
job-schemas.ts
. Values in ajobConfig
will override values interafoundation
.prom_metrics_enabled
- start a Prometheus exporter serverprom_metrics_port
- port the server will listen onprom_metrics_add_defaults
- collect default metrics recommended by Prometheus as well as Node.js-specific metrics.MockPromMetrics
injob-components
TestContext
to be used in unit tests.terafoundation
,types
andjob-components
.promMetrics.init
and add an info metric toClusterMaster
,SlicerExecutionContext
andWorkerExecutionContext
classes.ref: #3360