Open rahulbot opened 10 months ago
Quick googling: https://www.elastic.co/guide/en/kibana/current/elasticsearch-metrics.html shows plots from (from an ELK stack?)
If there are ES API endpoints that deliver the data you're interested in, it should be trivial to poll them from https://github.com/mediacloud/story-indexer/blob/main/indexer/scripts/elastic-stats.py
Other ES stats: https://sematext.com/blog/top-10-elasticsearch-metrics-to-watch/
This looks to be promising data, BUT the documentation is pretty thin as to what it all means!
# output from Elasticsearch object indices.stats() call on ramos
# See https://www.elastic.co/guide/en/elasticsearch/reference/8.12/indices-stats.html
a = {
"_shards": {"total": 360, "successful": 360, "failed": 0},
"_all": {
"primaries": {
"docs": {"count": 37211973, "deleted": 0},
"shard_stats": {"total_count": 180},
"store": {
"size_in_bytes": 301351356704,
"total_data_set_size_in_bytes": 301351356704,
"reserved_in_bytes": 0,
},
"indexing": {
"index_total": 6199277,
"index_time_in_millis": 6426052,
"index_current": 0,
"index_failed": 42143,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": False,
"throttle_time_in_millis": 0,
"write_load": 3.1562112245189645e-05,
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0,
},
"search": {
"open_contexts": 0,
"query_total": 7966965,
"query_time_in_millis": 633988395,
"query_current": 0,
"fetch_total": 294201,
"fetch_time_in_millis": 378795,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0,
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 65035,
"total_time_in_millis": 17832455,
"total_docs": 24507775,
"total_size_in_bytes": 265865427435,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 4685094,
"total_auto_throttle_in_bytes": 3188723010,
},
"refresh": {
"total": 691686,
"total_time_in_millis": 6116472,
"external_total": 312247,
"external_total_time_in_millis": 5966907,
"listeners": 0,
},
"flush": {
"total": 377112,
"periodic": 377092,
"total_time_in_millis": 17146216,
},
"warmer": {"current": 0, "total": 312067, "total_time_in_millis": 10822},
"query_cache": {
"memory_size_in_bytes": 73767821,
"total_count": 384453791,
"hit_count": 3513518,
"miss_count": 380940273,
"cache_size": 38350,
"cache_count": 44574,
"evictions": 6224,
},
"fielddata": {
"memory_size_in_bytes": 8776609836,
"evictions": 0,
"global_ordinals": {"build_time_in_millis": 4405793},
},
"completion": {"size_in_bytes": 0},
"segments": {
"count": 3025,
"memory_in_bytes": 0,
"terms_memory_in_bytes": 0,
"stored_fields_memory_in_bytes": 0,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 0,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {},
},
"translog": {
"operations": 0,
"size_in_bytes": 9900,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 9900,
"earliest_last_modified_age": 55791054,
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0,
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 12140246,
},
"bulk": {
"total_operations": 6258294,
"total_time_in_millis": 6542769,
"total_size_in_bytes": 25744722280,
"avg_time_in_millis": 0,
"avg_size_in_bytes": 4685,
},
}, # primaries
"total": {
"docs": {"count": 74423946, "deleted": 0},
"shard_stats": {"total_count": 360},
"store": {
"size_in_bytes": 602727651892,
"total_data_set_size_in_bytes": 602727651892,
"reserved_in_bytes": 0,
},
"indexing": {
"index_total": 12304185,
"index_time_in_millis": 12124280,
"index_current": 0,
"index_failed": 42143,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": False,
"throttle_time_in_millis": 0,
"write_load": 2.9796330925959386e-05,
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0,
},
"search": {
"open_contexts": 0,
"query_total": 15522605,
"query_time_in_millis": 1504726365,
"query_current": 0,
"fetch_total": 584851,
"fetch_time_in_millis": 1100938,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0,
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 128942,
"total_time_in_millis": 36370566,
"total_docs": 48734821,
"total_size_in_bytes": 528102259853,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 9112115,
"total_auto_throttle_in_bytes": 6379758576,
},
"refresh": {
"total": 1373854,
"total_time_in_millis": 14315714,
"external_total": 617832,
"external_total_time_in_millis": 14026585,
"listeners": 0,
},
"flush": {
"total": 752413,
"periodic": 752305,
"total_time_in_millis": 34331553,
},
"warmer": {"current": 0, "total": 617472, "total_time_in_millis": 22683},
"query_cache": {
"memory_size_in_bytes": 150527800,
"total_count": 751545384,
"hit_count": 6706770,
"miss_count": 744838614,
"cache_size": 75698,
"cache_count": 87057,
"evictions": 11359,
},
"fielddata": {
"memory_size_in_bytes": 14866782292,
"evictions": 0,
"global_ordinals": {"build_time_in_millis": 9386383},
},
"completion": {"size_in_bytes": 0},
"segments": {
"count": 6056,
"memory_in_bytes": 0,
"terms_memory_in_bytes": 0,
"stored_fields_memory_in_bytes": 0,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 0,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {},
},
"translog": {
"operations": 0,
"size_in_bytes": 19800,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 19800,
"earliest_last_modified_age": 55791054,
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0,
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 25099989,
},
"bulk": {
"total_operations": 12421712,
"total_time_in_millis": 12279661,
"total_size_in_bytes": 51116579287,
"avg_time_in_millis": 0,
"avg_size_in_bytes": 4641,
},
}, # total
}, # _all
"indices": {
"mediacloud_search_text_other": {
"uuid": "5jCe2RVAR5qd1OWBzhGTNw",
"health": "green",
"status": "open",
"primaries": {
"docs": {"count": 3479189, "deleted": 0},
"shard_stats": {"total_count": 30},
"store": {
"size_in_bytes": 37886550792,
"total_data_set_size_in_bytes": 37886550792,
"reserved_in_bytes": 0,
},
"indexing": {
"index_total": 591867,
"index_time_in_millis": 749691,
"index_current": 0,
"index_failed": 2185,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": False,
"throttle_time_in_millis": 0,
"write_load": 2.209285748865003e-05,
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0,
},
"search": {
"open_contexts": 0,
"query_total": 1347200,
"query_time_in_millis": 40277081,
"query_current": 0,
"fetch_total": 0,
"fetch_time_in_millis": 0,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0,
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 11515,
"total_time_in_millis": 1864300,
"total_docs": 2623041,
"total_size_in_bytes": 35699032243,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 224696,
"total_auto_throttle_in_bytes": 520271585,
},
"refresh": {
"total": 125225,
"total_time_in_millis": 922849,
"external_total": 49989,
"external_total_time_in_millis": 896917,
"listeners": 0,
},
"flush": {
"total": 74689,
"periodic": 74685,
"total_time_in_millis": 2977710,
},
"warmer": {"current": 0, "total": 49959, "total_time_in_millis": 1692},
"query_cache": {
"memory_size_in_bytes": 6933617,
"total_count": 89524453,
"hit_count": 717354,
"miss_count": 88807099,
"cache_size": 11340,
"cache_count": 11733,
"evictions": 393,
},
"fielddata": {
"memory_size_in_bytes": 1212900968,
"evictions": 0,
"global_ordinals": {"build_time_in_millis": 489228},
},
"completion": {"size_in_bytes": 0},
"segments": {
"count": 641,
"memory_in_bytes": 0,
"terms_memory_in_bytes": 0,
"stored_fields_memory_in_bytes": 0,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 0,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {},
},
"translog": {
"operations": 0,
"size_in_bytes": 1650,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 1650,
"earliest_last_modified_age": 55886055,
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0,
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 2166274,
},
"bulk": {
"total_operations": 598883,
"total_time_in_millis": 765189,
"total_size_in_bytes": 3316725384,
"avg_time_in_millis": 1,
"avg_size_in_bytes": 7752,
},
},
"total": {
"docs": {"count": 6958378, "deleted": 0},
"shard_stats": {"total_count": 60},
"store": {
"size_in_bytes": 76062196013,
"total_data_set_size_in_bytes": 76062196013,
"reserved_in_bytes": 0,
},
"indexing": {
"index_total": 1183605,
"index_time_in_millis": 1468800,
"index_current": 0,
"index_failed": 2185,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": False,
"throttle_time_in_millis": 0,
"write_load": 2.1650794065567543e-05,
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0,
},
"search": {
"open_contexts": 0,
"query_total": 2586594,
"query_time_in_millis": 88119155,
"query_current": 0,
"fetch_total": 0,
"fetch_time_in_millis": 0,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0,
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 22843,
"total_time_in_millis": 3738054,
"total_docs": 4999118,
"total_size_in_bytes": 68516746003,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 392063,
"total_auto_throttle_in_bytes": 1041014420,
},
"refresh": {
"total": 248707,
"total_time_in_millis": 2216439,
"external_total": 98406,
"external_total_time_in_millis": 2146417,
"listeners": 0,
},
"flush": {
"total": 149337,
"periodic": 149321,
"total_time_in_millis": 6174386,
},
"warmer": {"current": 0, "total": 98346, "total_time_in_millis": 3617},
"query_cache": {
"memory_size_in_bytes": 13615867,
"total_count": 169071873,
"hit_count": 1369037,
"miss_count": 167702836,
"cache_size": 22152,
"cache_count": 22545,
"evictions": 393,
},
"fielddata": {
"memory_size_in_bytes": 2697642584,
"evictions": 0,
"global_ordinals": {"build_time_in_millis": 1060741},
},
"completion": {"size_in_bytes": 0},
"segments": {
"count": 1285,
"memory_in_bytes": 0,
"terms_memory_in_bytes": 0,
"stored_fields_memory_in_bytes": 0,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 0,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {},
},
"translog": {
"operations": 0,
"size_in_bytes": 3300,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 3300,
"earliest_last_modified_age": 55884867,
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0,
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 3554315,
},
"bulk": {
"total_operations": 1197643,
"total_time_in_millis": 1488207,
"total_size_in_bytes": 6634508746,
"avg_time_in_millis": 0,
"avg_size_in_bytes": 7747,
},
}, # total
} # mediacloud_search_text_other
}, # indices
}
Next step: potentially expose timings on sandbox for review to figure out which are useful
ILM updates to elastic-stats.py included using different stats call, and sending the "_all.primaries" tree to statsd/graphite/grafana.
I've added two panes to "Phil's Sandbox" dashboard:
If you're logged in as admin you can click on "edit" at the title of the "ES stats...." pane, then on the last two elements of the "Series" to see what other ES stats are available (all seem to be totals over time (integrals), so I've applied the "nonNegativeDerivative function to make them incremental counts).
I'm feeling like the Django stats are more of a service-level priority, so we can wait on this. Next task would be to review sandbox to pull out stats if we want this on main dashboard.
Going to start with this on the user side via Sentry and then return to this later. @philbudne please do add any charts to main dashboard if you understood and found them useful already,
Now that we're actually using Elasticsearch is there any way to monitor its performance? average query time? total query count?