elastic / elasticsearch

Free and Open Source, Distributed, RESTful Search Engine
https://www.elastic.co/products/elasticsearch
Other
69.95k stars 24.74k forks source link

ES getting OOM and crashing every hour #34311

Closed simplyviks closed 6 years ago

simplyviks commented 6 years ago

Linux host-70-1-1-48 4.4.0-47-generic #68-Ubuntu SMP Wed Oct 26 19:39:52 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux ro

http://10.131.69.72:9200/_nodes/settings

{"_nodes":{"total":1,"successful":1,"failed":0},"cluster_name":"elasticsearch-default","nodes":{"PK847kdjSjC2uXGJ0FdFAQ":{"name":"es_node","transport_address":"172.17.0.2:9300","host":"172.17.0.2","ip":"172.17.0.2","version":"6.2.1","build_hash":"7299dc3","roles":["master","data","ingest"],"settings":{"cluster":{"name":"elasticsearch-default"},"node":{"max_local_storage_nodes":"1","name":"es_node","data":"true","ingest":"true","master":"true"},"path":{"data":["/data/data"],"logs":"/data/log","home":"/usr/share/elasticsearch"},"discovery":{"zen":{"minimum_master_nodes":"1","ping":{"unicast":{"hosts":"localhost"}}}},"client":{"type":"node"},"http":{"cors":{"allow-origin":"*","enabled":"true"},"compression":"true","type":{"default":"netty4"},"enabled":"true"},"transport":{"type":{"default":"netty4"}},"network":{"host":"site"}}}}}

http://10.131.69.72:9200/_cat/indices?v

health status index uuid pri rep docs.count docs.deleted store.size pri.store.size red open logstash-2018.10.04 xmGmft50QeyYfAws581vHQ 5 1
yellow open logstash-2018.10.03 RVJLXdWxQUS6dzD8rotw_g 5 1 233742 0 88.2mb 88.2mb red open .kibana E0R8kheQTnOO-469HtLPMg 1 0
red open syslog-2018.10.02 EFn6M-XtSQ-krP8QW2OMjQ 5 1
red open syslog-2018.10.01 tvSkXe-LRc-KY_SHUnC-_Q 5 1
red open syslog-2018.09.30 mhL4QgkiRbqz-hQNLygUQw 5 1
red open syslog-2018.10.03 Of563IEWSPW6xUbHYKMv9A 5 1
red open syslog-2018.10.04 _FBeG8udRROBDRTMGK4GXg 5 1

=====================================================

ps -aef | grep elastic

/etc/alternatives/jre_openjdk//bin/java -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -XX:+DisableExplicitGC -XX:+AlwaysPreTouch -server -Xss1m -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Djna.nosys=true -Djdk.io.permissionsUseCanonicalPath=true -Dio.netty.noUnsafe=true -Dio.netty.noKeySetOptimization=true -Dlog4j.shutdownHookEnabled=false -Dlog4j2.disable.jmx=true -Dlog4j.skipJansi=true -XX:+HeapDumpOnOutOfMemoryError -Des.cgroups.hierarchy.override=/ -Xms26g -Xmx26g -Des.path.home=/usr/share/elasticsearch -Des.path.conf=/etc/elasticsearch -cp /usr/share/elasticsearch/lib/* org.elasticsearch.bootstrap.Elasticsearch

============================================================

{ "_nodes": { "total": 1, "successful": 1, "failed": 0 }, "cluster_name": "elasticsearch-default", "nodes": { "PK847kdjSjC2uXGJ0FdFAQ": { "timestamp": 1538669663511, "name": "es_node", "transport_address": "172.17.0.2:9300", "host": "172.17.0.2", "ip": "172.17.0.2:9300", "roles": [ "master", "data", "ingest" ], "indices": { "docs": { "count": 74076223, "deleted": 198 }, "store": { "size_in_bytes": 23681846937 }, "indexing": { "index_total": 531546, "index_time_in_millis": 202873, "index_current": 0, "index_failed": 0, "delete_total": 0, "delete_time_in_millis": 0, "delete_current": 0, "noop_update_total": 0, "is_throttled": false, "throttle_time_in_millis": 0 }, "get": { "total": 0, "time_in_millis": 0, "exists_total": 0, "exists_time_in_millis": 0, "missing_total": 0, "missing_time_in_millis": 0, "current": 0 }, "search": { "open_contexts": 0, "query_total": 300, "query_time_in_millis": 461, "query_current": 0, "fetch_total": 31, "fetch_time_in_millis": 168, "fetch_current": 0, "scroll_total": 180, "scroll_time_in_millis": 5918898, "scroll_current": 0, "suggest_total": 0, "suggest_time_in_millis": 0, "suggest_current": 0 }, "merges": { "current": 0, "current_docs": 0, "current_size_in_bytes": 0, "total": 135, "total_time_in_millis": 111968, "total_docs": 5664759, "total_size_in_bytes": 1771554646, "total_stopped_time_in_millis": 0, "total_throttled_time_in_millis": 58894, "total_auto_throttle_in_bytes": 751161716 }, "refresh": { "total": 1211, "total_time_in_millis": 14425, "listeners": 0 }, "flush": { "total": 5, "total_time_in_millis": 27112 }, "warmer": { "current": 0, "total": 1118, "total_time_in_millis": 41 }, "query_cache": { "memory_size_in_bytes": 14137, "total_count": 446, "hit_count": 32, "miss_count": 414, "cache_size": 65, "cache_count": 68, "evictions": 3 }, "fielddata": { "memory_size_in_bytes": 0, "evictions": 0 }, "completion": { "size_in_bytes": 0 }, "segments": { "count": 296, "memory_in_bytes": 46049394, "terms_memory_in_bytes": 32707217, "stored_fields_memory_in_bytes": 12057760, "term_vectors_memory_in_bytes": 0, "norms_memory_in_bytes": 178112, "points_memory_in_bytes": 937177, "doc_values_memory_in_bytes": 169128, "index_writer_memory_in_bytes": 0, "version_map_memory_in_bytes": 0, "fixed_bit_set_memory_in_bytes": 0, "max_unsafe_auto_id_timestamp": 1538669403231, "file_sizes": {

      }
    },
    "translog": {
      "operations": 3427837,
      "size_in_bytes": 3242513629,
      "uncommitted_operations": 556634,
      "uncommitted_size_in_bytes": 467343093
    },
    "request_cache": {
      "memory_size_in_bytes": 0,
      "evictions": 0,
      "hit_count": 0,
      "miss_count": 0
    },
    "recovery": {
      "current_as_source": 0,
      "current_as_target": 0,
      "throttle_time_in_millis": 0
    }
  }
}

} }

jasontedor commented 6 years ago

Thanks very much for your interest in Elasticsearch and I am sorry that you're having trouble here, I can imagine it's frustrating to watch your nodes crash every hour.

There is not enough information here to debug the problem. Sometimes these issues are due to bugs on our side (e.g., memory leaks) and sometimes they are due to pushing your cluster beyond its capacity. We want to help you, but without more information, we can't.

We prefer to keep only verified bug reports and feature requests on GitHub, and we use the forum to troubleshoot user issues. If it turns out there is a bug here, we would definitely reopen this issue and work on a fix. Would you please start a topic on the forum so that we can gather more information?