Closed guocuimi closed 4 months ago
new added/updated metrics
$ curl localhost:8080/metrics ... # HELP num_processing_tokens_total Total number of processing tokens # TYPE num_processing_tokens_total counter num_processing_tokens_total{type="generated"} 30 num_processing_tokens_total{type="prompt"} 22 # HELP prefix_cache_latency_seconds Latency of prefix cache in seconds # TYPE prefix_cache_latency_seconds counter prefix_cache_latency_seconds{op="evict"} 0 prefix_cache_latency_seconds{op="match"} 3.496e-06 prefix_cache_latency_seconds{op="insert"} 2.906e-06 # HELP prefix_cache_match_length_total Length of matched prefix in tokens # TYPE prefix_cache_match_length_total counter prefix_cache_match_length_total 0 # HELP allocate_blocks_latency_seconds Latency of blocks allocation in seconds # TYPE allocate_blocks_latency_seconds counter allocate_blocks_latency_seconds 1.059199999999999e-05 # HELP time_to_first_token_latency_seconds Histogram of time to first token latency in seconds # TYPE time_to_first_token_latency_seconds histogram time_to_first_token_latency_seconds_count 2 time_to_first_token_latency_seconds_sum 0.113438361 time_to_first_token_latency_seconds_bucket{le="0.001"} 0 time_to_first_token_latency_seconds_bucket{le="0.002"} 0 time_to_first_token_latency_seconds_bucket{le="0.005"} 0 time_to_first_token_latency_seconds_bucket{le="0.01"} 0 time_to_first_token_latency_seconds_bucket{le="0.02"} 0 time_to_first_token_latency_seconds_bucket{le="0.05"} 1 time_to_first_token_latency_seconds_bucket{le="0.1"} 2 time_to_first_token_latency_seconds_bucket{le="0.5"} 2 time_to_first_token_latency_seconds_bucket{le="1"} 2 time_to_first_token_latency_seconds_bucket{le="+Inf"} 2 # HELP inter_token_latency_seconds Histogram of inter token latency in seconds # TYPE inter_token_latency_seconds histogram inter_token_latency_seconds_count 30 inter_token_latency_seconds_sum 0.300972625 inter_token_latency_seconds_bucket{le="0.001"} 0 inter_token_latency_seconds_bucket{le="0.002"} 0 inter_token_latency_seconds_bucket{le="0.005"} 0 inter_token_latency_seconds_bucket{le="0.01"} 9 inter_token_latency_seconds_bucket{le="0.02"} 30 inter_token_latency_seconds_bucket{le="0.05"} 30 inter_token_latency_seconds_bucket{le="0.1"} 30 inter_token_latency_seconds_bucket{le="0.5"} 30 inter_token_latency_seconds_bucket{le="1"} 30 inter_token_latency_seconds_bucket{le="+Inf"} 30
new added/updated metrics