Closed tetianakravchenko closed 5 years ago
Hi @tetianakravchenko,
I agree this is a useful metric. Unfortunately I don't have a 7.x cluster at hand. Can you send me the output of a curl http://<a_cluster_node_addr>:9200/_cluster/settings\?include_defaults=true | jq .
so I can compare it to the ES 5.x output (which I have at hand). Until now the exporter doesn't really distinguish between ES versions. If the output differs significantly, we have to see how to handle it
Hi @zwopir. Used ES version is 7.1.1
, here is an output:
http://localhost:9200/_cluster/settings\?include_defaults=true | jq .
{
"persistent": {},
"transient": {},
"defaults": {
"cluster": {
"max_voting_config_exclusions": "10",
"auto_shrink_voting_configuration": "true",
"election": {
"duration": "500ms",
"initial_timeout": "100ms",
"max_timeout": "10s",
"back_off_time": "100ms"
},
"no_master_block": "write",
"persistent_tasks": {
"allocation": {
"enable": "all",
"recheck_interval": "30s"
}
},
"blocks": {
"read_only_allow_delete": "false",
"read_only": "false"
},
"remote": {
"node": {
"attr": ""
},
"initial_connect_timeout": "30s",
"connect": "true",
"connections_per_cluster": "3"
},
"follower_lag": {
"timeout": "90000ms"
},
"routing": {
"use_adaptive_replica_selection": "true",
"rebalance": {
"enable": "all"
},
"allocation": {
"node_concurrent_incoming_recoveries": "2",
"node_initial_primaries_recoveries": "4",
"same_shard": {
"host": "false"
},
"total_shards_per_node": "-1",
"type": "balanced",
"disk": {
"threshold_enabled": "true",
"watermark": {
"low": "85%",
"flood_stage": "95%",
"high": "90%"
},
"include_relocations": "true",
"reroute_interval": "60s"
},
"awareness": {
"attributes": []
},
"balance": {
"index": "0.55",
"threshold": "1.0",
"shard": "0.45"
},
"enable": "all",
"node_concurrent_outgoing_recoveries": "2",
"allow_rebalance": "indices_all_active",
"cluster_concurrent_rebalance": "2",
"node_concurrent_recoveries": "2"
}
},
"indices": {
"tombstones": {
"size": "500"
},
"close": {
"enable": "true"
}
},
"nodes": {
"reconnect_interval": "10s"
},
"service": {
"slow_task_logging_threshold": "30s"
},
"publish": {
"timeout": "30000ms"
},
"name": "search",
"fault_detection": {
"leader_check": {
"interval": "1000ms",
"timeout": "10000ms",
"retry_count": "3"
},
"follower_check": {
"interval": "1000ms",
"timeout": "10000ms",
"retry_count": "3"
}
},
"join": {
"timeout": "60000ms"
},
"max_shards_per_node": "1000",
"initial_master_nodes": [],
"info": {
"update": {
"interval": "30s",
"timeout": "15s"
}
}
},
"no": {
"model": {
"state": {
"persist": "false"
}
}
},
"logger": {
"level": "INFO"
},
"bootstrap": {
"memory_lock": "false",
"system_call_filter": "true",
"ctrlhandler": "true"
},
"processors": "1",
"ingest": {
"geoip": {
"cache_size": "1000"
},
"grok": {
"watchdog": {
"max_execution_time": "1s",
"interval": "1s"
}
}
},
"network": {
"host": [
"0.0.0.0"
],
"tcp": {
"reuse_address": "true",
"keep_alive": "true",
"connect_timeout": "30s",
"receive_buffer_size": "-1b",
"no_delay": "true",
"send_buffer_size": "-1b"
},
"bind_host": [
"0.0.0.0"
],
"server": "true",
"breaker": {
"inflight_requests": {
"limit": "100%",
"overhead": "2.0"
}
},
"publish_host": [
"0.0.0.0"
]
},
"pidfile": "",
"path": {
"data": [],
"logs": "/usr/share/elasticsearch/logs",
"shared_data": "",
"home": "/usr/share/elasticsearch",
"repo": []
},
"search": {
"default_search_timeout": "-1",
"highlight": {
"term_vector_multi_value": "true"
},
"default_allow_partial_results": "true",
"max_open_scroll_context": "500",
"max_buckets": "10000",
"low_level_cancellation": "false",
"keep_alive_interval": "1m",
"remote": {
"node": {
"attr": ""
},
"initial_connect_timeout": "30s",
"connect": "true",
"connections_per_cluster": "3"
},
"default_keep_alive": "5m",
"max_keep_alive": "24h"
},
"security": {
"manager": {
"filter_bad_defaults": "true"
}
},
"ccr": {
"wait_for_metadata_timeout": "60s",
"indices": {
"recovery": {
"recovery_activity_timeout": "60s",
"chunk_size": "1mb",
"internal_action_timeout": "60s",
"max_bytes_per_sec": "40mb",
"max_concurrent_file_chunks": "5"
}
},
"auto_follow": {
"wait_for_metadata_timeout": "60s"
}
},
"repositories": {
"fs": {
"compress": "false",
"chunk_size": "9223372036854775807b",
"location": ""
},
"url": {
"supported_protocols": [
"http",
"https",
"ftp",
"file",
"jar"
],
"allowed_urls": [],
"url": "http:"
}
},
"action": {
"auto_create_index": "true",
"search": {
"shard_count": {
"limit": "9223372036854775807"
}
},
"destructive_requires_name": "false"
},
"client": {
"type": "node",
"transport": {
"ignore_cluster_name": "false",
"nodes_sampler_interval": "5s",
"sniff": "false",
"ping_timeout": "5s"
}
},
"xpack": {
"watcher": {
"execution": {
"scroll": {
"size": "0",
"timeout": ""
},
"default_throttle_period": "5s"
},
"internal": {
"ops": {
"bulk": {
"default_timeout": ""
},
"index": {
"default_timeout": ""
},
"search": {
"default_timeout": ""
}
}
},
"thread_pool": {
"queue_size": "1000",
"size": "1"
},
"index": {
"rest": {
"direct_access": ""
}
},
"history": {
"cleaner_service": {
"enabled": "true"
}
},
"trigger": {
"schedule": {
"ticker": {
"tick_interval": "500ms"
}
}
},
"enabled": "true",
"input": {
"search": {
"default_timeout": ""
}
},
"encrypt_sensitive_data": "false",
"transform": {
"search": {
"default_timeout": ""
}
},
"stop": {
"timeout": "30s"
},
"watch": {
"scroll": {
"size": "0"
}
},
"bulk": {
"concurrent_requests": "0",
"flush_interval": "1s",
"size": "1mb",
"actions": "1"
},
"actions": {
"bulk": {
"default_timeout": ""
},
"index": {
"default_timeout": ""
}
}
},
"ilm": {
"enabled": "true"
},
"monitoring": {
"collection": {
"cluster": {
"stats": {
"timeout": "10s"
}
},
"node": {
"stats": {
"timeout": "10s"
}
},
"indices": [],
"ccr": {
"stats": {
"timeout": "10s"
}
},
"index": {
"stats": {
"timeout": "10s"
},
"recovery": {
"active_only": "false",
"timeout": "10s"
}
},
"interval": "10s",
"enabled": "false",
"ml": {
"job": {
"stats": {
"timeout": "10s"
}
}
}
},
"history": {
"duration": "168h"
},
"elasticsearch": {
"collection": {
"enabled": "true"
}
},
"enabled": "true"
},
"graph": {
"enabled": "true"
},
"rollup": {
"enabled": "true",
"task_thread_pool": {
"queue_size": "4",
"size": "4"
}
},
"sql": {
"enabled": "true"
},
"license": {
"self_generated": {
"type": "basic"
}
},
"logstash": {
"enabled": "true"
},
"notification": {
"pagerduty": {
"default_account": ""
},
"email": {
"default_account": "",
"html": {
"sanitization": {
"allow": [
"body",
"head",
"_tables",
"_links",
"_blocks",
"_formatting",
"img:embedded"
],
"disallow": [],
"enabled": "true"
}
}
},
"reporting": {
"retries": "40",
"interval": "15s"
},
"jira": {
"default_account": ""
},
"slack": {
"default_account": ""
}
},
"security": {
"dls_fls": {
"enabled": "true"
},
"transport": {
"filter": {
"allow": [],
"deny": [],
"enabled": "true"
},
"ssl": {
"enabled": "false"
}
},
"enabled": "true",
"filter": {
"always_allow_bound_address": "true"
},
"encryption": {
"algorithm": "AES/CTR/NoPadding"
},
"audit": {
"enabled": "false",
"logfile": {
"emit_node_id": "true",
"emit_node_host_name": "false",
"emit_node_name": "false",
"events": {
"emit_request_body": "false",
"include": [
"ACCESS_DENIED",
"ACCESS_GRANTED",
"ANONYMOUS_ACCESS_DENIED",
"AUTHENTICATION_FAILED",
"CONNECTION_DENIED",
"TAMPERED_REQUEST",
"RUN_AS_DENIED",
"RUN_AS_GRANTED"
],
"exclude": []
},
"emit_node_host_address": "false"
}
},
"authc": {
"password_hashing": {
"algorithm": "bcrypt"
},
"success_cache": {
"size": "10000",
"enabled": "true",
"expire_after_access": "1h"
},
"api_key": {
"cache": {
"hash_algo": "ssha256",
"max_keys": "10000",
"ttl": "24h"
},
"delete": {
"interval": "24h",
"timeout": "-1"
},
"enabled": "false",
"hashing": {
"algorithm": "pbkdf2"
}
},
"anonymous": {
"authz_exception": "true",
"roles": [],
"username": "_anonymous"
},
"run_as": {
"enabled": "true"
},
"reserved_realm": {
"enabled": "true"
},
"token": {
"delete": {
"interval": "30m",
"timeout": "-1"
},
"enabled": "false",
"thread_pool": {
"queue_size": "1000",
"size": "1"
},
"timeout": "20m"
}
},
"fips_mode": {
"enabled": "false"
},
"encryption_key": {
"length": "128",
"algorithm": "AES"
},
"http": {
"filter": {
"allow": [],
"deny": [],
"enabled": "true"
},
"ssl": {
"enabled": "false"
}
},
"automata": {
"max_determinized_states": "100000",
"cache": {
"size": "10000",
"ttl": "48h",
"enabled": "true"
}
},
"user": null,
"authz": {
"store": {
"roles": {
"index": {
"cache": {
"ttl": "20m",
"max_size": "10000"
}
},
"cache": {
"max_size": "10000"
},
"negative_lookup_cache": {
"max_size": "10000"
},
"field_permissions": {
"cache": {
"max_size_in_bytes": "104857600"
}
}
}
}
}
},
"ccr": {
"enabled": "true",
"ccr_thread_pool": {
"queue_size": "100",
"size": "32"
}
},
"http": {
"default_connection_timeout": "10s",
"proxy": {
"host": "",
"scheme": "",
"port": "0"
},
"whitelist": [
"*"
],
"default_read_timeout": "10s",
"max_response_size": "10mb"
},
"ml": {
"utility_thread_pool": {
"queue_size": "500",
"size": "80"
},
"max_anomaly_records": "500",
"enable_config_migration": "true",
"max_open_jobs": "20",
"min_disk_space_off_heap": "5gb",
"node_concurrent_job_allocations": "2",
"max_model_memory_limit": "0b",
"enabled": "true",
"max_lazy_ml_nodes": "0",
"max_machine_memory_percent": "30",
"autodetect_process": "true",
"datafeed_thread_pool": {
"queue_size": "200",
"size": "20"
},
"autodetect_thread_pool": {
"queue_size": "80",
"size": "80"
}
}
},
"rest": {
"action": {
"multi": {
"allow_explicit_index": "true"
}
}
},
"cache": {
"recycler": {
"page": {
"limit": {
"heap": "10%"
},
"type": "CONCURRENT",
"weight": {
"longs": "1.0",
"ints": "1.0",
"bytes": "1.0",
"objects": "0.1"
}
}
}
},
"reindex": {
"remote": {
"whitelist": []
}
},
"resource": {
"reload": {
"enabled": "true",
"interval": {
"low": "60s",
"high": "5s",
"medium": "30s"
}
}
},
"thread_pool": {
"force_merge": {
"queue_size": "-1",
"size": "1"
},
"fetch_shard_started": {
"core": "1",
"max": "2",
"keep_alive": "5m"
},
"listener": {
"queue_size": "-1",
"size": "1"
},
"refresh": {
"core": "1",
"max": "1",
"keep_alive": "5m"
},
"generic": {
"core": "4",
"max": "128",
"keep_alive": "30s"
},
"warmer": {
"core": "1",
"max": "1",
"keep_alive": "5m"
},
"search": {
"max_queue_size": "1000",
"queue_size": "1000",
"size": "2",
"auto_queue_frame_size": "2000",
"target_response_time": "1s",
"min_queue_size": "1000"
},
"fetch_shard_store": {
"core": "1",
"max": "2",
"keep_alive": "5m"
},
"flush": {
"core": "1",
"max": "1",
"keep_alive": "5m"
},
"management": {
"core": "1",
"max": "5",
"keep_alive": "5m"
},
"analyze": {
"queue_size": "16",
"size": "1"
},
"get": {
"queue_size": "1000",
"size": "1"
},
"estimated_time_interval": "200ms",
"write": {
"queue_size": "200",
"size": "1"
},
"snapshot": {
"core": "1",
"max": "1",
"keep_alive": "5m"
},
"search_throttled": {
"max_queue_size": "100",
"queue_size": "100",
"size": "1",
"auto_queue_frame_size": "200",
"target_response_time": "1s",
"min_queue_size": "100"
}
},
"index": {
"codec": "default",
"store": {
"type": "",
"fs": {
"fs_lock": "native"
},
"preload": []
}
},
"monitor": {
"jvm": {
"gc": {
"enabled": "true",
"overhead": {
"warn": "50",
"debug": "10",
"info": "25"
},
"refresh_interval": "1s"
},
"refresh_interval": "1s"
},
"process": {
"refresh_interval": "1s"
},
"os": {
"refresh_interval": "1s"
},
"fs": {
"refresh_interval": "1s"
}
},
"transport": {
"tcp": {
"reuse_address": "true",
"connect_timeout": "30s",
"compress": "false",
"port": "9300-9400",
"no_delay": "true",
"keep_alive": "true",
"receive_buffer_size": "-1b",
"send_buffer_size": "-1b"
},
"bind_host": [],
"connect_timeout": "30s",
"compress": "false",
"ping_schedule": "-1",
"connections_per_node": {
"recovery": "2",
"state": "1",
"bulk": "3",
"reg": "6",
"ping": "1"
},
"tracer": {
"include": [],
"exclude": [
"internal:discovery/zen/fd*",
"internal:coordination/fault_detection/*",
"cluster:monitor/nodes/liveness"
]
},
"type": "security4",
"type.default": "netty4",
"features": {
"x-pack": "true"
},
"port": "9300-9400",
"host": [],
"publish_port": "-1",
"tcp_no_delay": "true",
"publish_host": [],
"netty": {
"receive_predictor_size": "64kb",
"receive_predictor_max": "64kb",
"worker_count": "2",
"receive_predictor_min": "64kb",
"boss_count": "1"
}
},
"script": {
"allowed_contexts": [],
"max_compilations_rate": "75/5m",
"cache": {
"max_size": "100",
"expire": "0ms"
},
"painless": {
"regex": {
"enabled": "false"
}
},
"max_size_in_bytes": "65535",
"allowed_types": []
},
"node": {
"data": "false",
"enable_lucene_segment_infos_trace": "false",
"local_storage": "true",
"max_local_storage_nodes": "1",
"name": "search-client-1",
"id": {
"seed": "0"
},
"store": {
"allow_mmap": "true"
},
"attr": {
"xpack": {
"installed": "true"
},
"ml": {
"machine_memory": "2147483648",
"max_open_jobs": "20"
}
},
"portsfile": "false",
"ingest": "false",
"master": "false",
"ml": "true"
},
"indices": {
"lifecycle": {
"poll_interval": "10m"
},
"cache": {
"cleanup_interval": "1m"
},
"mapping": {
"dynamic_timeout": "30s"
},
"memory": {
"interval": "5s",
"max_index_buffer_size": "-1",
"shard_inactive_time": "5m",
"index_buffer_size": "10%",
"min_index_buffer_size": "48mb"
},
"breaker": {
"request": {
"limit": "60%",
"type": "memory",
"overhead": "1.0"
},
"total": {
"limit": "95%",
"use_real_memory": "true"
},
"accounting": {
"limit": "100%",
"overhead": "1.0"
},
"fielddata": {
"limit": "40%",
"type": "memory",
"overhead": "1.03"
},
"type": "hierarchy"
},
"fielddata": {
"cache": {
"size": "-1b"
}
},
"query": {
"bool": {
"max_clause_count": "1024"
},
"query_string": {
"analyze_wildcard": "false",
"allowLeadingWildcard": "true"
}
},
"recovery": {
"recovery_activity_timeout": "1800000ms",
"retry_delay_network": "5s",
"internal_action_timeout": "15m",
"retry_delay_state_sync": "500ms",
"internal_action_long_timeout": "1800000ms",
"max_bytes_per_sec": "40mb",
"max_concurrent_file_chunks": "2"
},
"requests": {
"cache": {
"size": "1%",
"expire": "0ms"
}
},
"store": {
"delete": {
"shard": {
"timeout": "30s"
}
}
},
"analysis": {
"hunspell": {
"dictionary": {
"ignore_case": "false",
"lazy": "false"
}
}
},
"queries": {
"cache": {
"count": "10000",
"size": "10%",
"all_segments": "false"
}
}
},
"plugin": {
"mandatory": []
},
"discovery": {
"seed_hosts": [
"search-master-headless"
],
"unconfigured_bootstrap_timeout": "3s",
"request_peers_timeout": "3000ms",
"zen": {
"commit_timeout": "30s",
"no_master_block": "write",
"join_retry_delay": "100ms",
"join_retry_attempts": "3",
"ping": {
"unicast": {
"concurrent_connects": "10",
"hosts": [],
"hosts.resolve_timeout": "5s"
}
},
"master_election": {
"ignore_non_master_pings": "false",
"wait_for_joins_timeout": "30000ms"
},
"send_leave_request": "true",
"ping_timeout": "3s",
"bwc_ping_timeout": "3s",
"join_timeout": "60000ms",
"publish_diff": {
"enable": "true"
},
"publish": {
"max_pending_cluster_states": "25"
},
"minimum_master_nodes": "-1",
"unsafe_rolling_upgrades_enabled": "true",
"hosts_provider": [],
"publish_timeout": "30s",
"fd": {
"connect_on_network_disconnect": "false",
"ping_interval": "1s",
"ping_retries": "3",
"register_connection_listener": "true",
"ping_timeout": "30s"
},
"max_pings_from_another_master": "3"
},
"initial_state_timeout": "30s",
"cluster_formation_warning_timeout": "10000ms",
"seed_providers": [],
"type": "zen",
"seed_resolver": {
"max_concurrent_resolvers": "10",
"timeout": "5s"
},
"find_peers_interval": "1000ms"
},
"http": {
"tcp": {
"reuse_address": "true",
"keep_alive": "true",
"receive_buffer_size": "-1b",
"no_delay": "true",
"send_buffer_size": "-1b"
},
"bind_host": [],
"cors": {
"max-age": "1728000",
"allow-origin": "",
"allow-headers": "X-Requested-With,Content-Type,Content-Length",
"allow-credentials": "false",
"allow-methods": "OPTIONS,HEAD,GET,POST,PUT,DELETE",
"enabled": "false"
},
"max_chunk_size": "8kb",
"compression_level": "3",
"reset_cookies": "false",
"max_initial_line_length": "4kb",
"max_warning_header_count": "-1",
"type": "security4",
"pipelining": {
"max_events": "10000"
},
"max_warning_header_size": "-1b",
"type.default": "netty4",
"detailed_errors": {
"enabled": "true"
},
"content_type": {
"required": "true"
},
"port": "9200-9300",
"host": [],
"publish_port": "-1",
"max_header_size": "8kb",
"tcp_no_delay": "true",
"compression": "true",
"read_timeout": "0ms",
"publish_host": [],
"max_content_length": "100mb",
"netty": {
"receive_predictor_size": "64kb",
"max_composite_buffer_components": "69905",
"worker_count": "2"
}
},
"gateway": {
"recover_after_master_nodes": "0",
"expected_nodes": "-1",
"recover_after_data_nodes": "-1",
"expected_data_nodes": "-1",
"recover_after_time": "0ms",
"expected_master_nodes": "-1",
"recover_after_nodes": "-1"
}
}
}
@zwopir hi, is it possible to get pr that fixes this issue released?
In elasticsearch 7.x was introduced soft limit on the number of shards in a cluster https://www.elastic.co/guide/en/elasticsearch/reference/7.0/misc-cluster.html#cluster-shard-limit. As mentioned in the documentation reaching this limitation can lead to failures:
So it might be useful to have dashboard/alert to indicate shards usage before the actual failures.