apache / skywalking

APM, Application Performance Monitoring System
https://skywalking.apache.org/
Apache License 2.0
23.91k stars 6.53k forks source link

Add `doc_values` for fields that need to be sorted or aggregated in ElasticSearch, and disable all others. #12782

Open kezhenxu94 opened 3 days ago

kezhenxu94 commented 3 days ago
kezhenxu94 commented 3 days ago

Unlike what is suggested in https://github.com/apache/skywalking/issues/12741, I use @ElasticSearch.EnableDocValues for those fields that need this feature to opt in, because most of the fields won’t require this feature.

wu-sheng commented 1 day ago

Please note, we are better to verify what is the impact to existing(last version) indices? Does Elasticsearch and our storage implementation support to change this config automatically when upgrade.

kezhenxu94 commented 4 hours ago

We support modifying index/template mapping actually, just run a test on the same ES server from the previous commit (master branch) and then upgrade to this branch, the existing indices's mappings remain the same and the template mappings changed as expected, which will impact the new indices created in the future.

Diff of sw_metrics_all template mapping

```diff diff --git a/tmp/before-template.json b/tmp/after-template.json index 603432b7f9..113f050e94 100644 --- a/tmp/before-template.json +++ b/tmp/after-template.json @@ -27,90 +27,110 @@ }, "properties": { "dest_service_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "agent_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "last_ping": { - "type": "long" + "type": "long", + "doc_values": false }, "precision": { "index": false, - "type": "integer" + "type": "integer", + "doc_values": false }, "double_summation": { "index": false, - "type": "double" + "type": "double", + "doc_values": false }, "labels_json": { "index": false, - "type": "keyword" + "type": "keyword", + "doc_values": false }, "tag_key": { "type": "keyword" }, "type": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "uuid": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "summation": { "index": false, - "type": "long" + "type": "long", + "doc_values": false }, "instance_traffic_name": { "index": false, - "type": "keyword" + "type": "keyword", + "doc_values": false }, "percentage": { "type": "integer" }, "total_num": { "index": false, - "type": "long" + "type": "long", + "doc_values": false }, "time_bucket": { "type": "long" }, "service_layer": { - "type": "integer" + "type": "integer", + "doc_values": false }, "component_id": { "index": false, "type": "integer" }, "service_name": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "count": { "index": false, - "type": "long" + "type": "long", + "doc_values": false }, "entity_id": { "type": "keyword" }, "denominator": { - "type": "long" + "type": "long", + "doc_values": false }, "numerator": { - "type": "long" + "type": "long", + "doc_values": false }, "dest_process_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "start_time": { "type": "long" }, "related_service_layer": { - "type": "integer" + "type": "integer", + "doc_values": false }, "instance_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "tag_value": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "ranks": { "index": false, @@ -118,58 +138,71 @@ }, "t_num": { "index": false, - "type": "long" + "type": "long", + "doc_values": false }, "service_traffic_name_match": { "analyzer": "oap_analyzer", "type": "text" }, "related_service_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "name": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "service_traffic_name": { "copy_to": "service_traffic_name_match", - "type": "keyword" + "type": "keyword", + "doc_values": false }, "short_name": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "s_num": { "index": false, - "type": "long" + "type": "long", + "doc_values": false }, "parameters": { "index": false, - "type": "keyword" + "type": "keyword", + "doc_values": false }, "process_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "span_name": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "datatable_summation": { "index": false, "type": "text" }, "detect_type": { - "type": "integer" + "type": "integer", + "doc_values": false }, "tag_type": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "task_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "component_ids": { "index": false, "type": "keyword" }, "layer": { - "type": "integer" + "type": "integer", + "doc_values": false }, "int_value": { "type": "integer" @@ -178,122 +211,154 @@ "type": "keyword" }, "remote_service_name": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "endpoint": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "attr0": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "total": { "index": false, - "type": "long" + "type": "long", + "doc_values": false }, "ebpf_profiling_schedule_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "endpoint_traffic_name_match": { "analyzer": "oap_analyzer", "type": "text" }, "service_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "datatable_count": { "index": false, "type": "text" }, "source_service_instance_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "service_instance": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "profiling_support_status": { - "type": "integer" + "type": "integer", + "doc_values": false }, "value": { "type": "long" }, "source_service_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "address": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "datatable_value": { "index": false, "type": "text" }, "represent_service_instance_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "dest_endpoint": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "represent_service_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "end_time": { - "type": "long" + "type": "long", + "doc_values": false }, "match": { "index": false, - "type": "long" + "type": "long", + "doc_values": false }, "service_group": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "attr5": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "label": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "service_instance_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "related_instance_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "source_process_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "message": { "index": false, - "type": "keyword" + "type": "keyword", + "doc_values": false }, "attr2": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "attr1": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "double_value": { "type": "double" }, "attr4": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "endpoint_traffic_name": { "copy_to": "endpoint_traffic_name_match", - "type": "keyword" + "type": "keyword", + "doc_values": false }, "attr3": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "dest_service_instance_id": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "last_update_time_bucket": { - "type": "long" + "type": "long", + "doc_values": false }, "source_endpoint": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "service": { - "type": "keyword" + "type": "keyword", + "doc_values": false }, "dataset": { "index": false, ```

wu-sheng commented 4 hours ago

OK, this seems good enough not breaking anything once we added all necessary annotations.