logstash-plugins / logstash-output-s3

Apache License 2.0
58 stars 151 forks source link

Plugin S3 output- (EMFILE) Too many open files #260

Open nabil86 opened 1 year ago

nabil86 commented 1 year ago

In our case we read a billion of data per day from kafka, and we push this data to S3 throught logstash. this is the pipeline config:

 input {
  kafka {
    bootstrap_servers => "{{ msk_bootstrap_brokers }}"
    codec => "json"
    decorate_events => "extended"
    group_id => "{{ msk_consumer_archive_consumer_group }}"
    id => "{{ msk_consumer_archive_consumer_group }}"
    partition_assignment_strategy => "cooperative_sticky"
    security_protocol => SSL
    ssl_truststore_location => "{{ msk_client_truststore_path }}"

    # Use Filebeat topic and not filebeat_archive topic to avoid instance to duplicate message in topic
    topics => ["{{ msk_topic_name_filebeat_odigo }}"]
  }
}

output {
    if [input][type] == "container" or [input][type] == "docker" {
        s3 {
            bucket => "{{ bucket_logging.name }}"
            region => "{{ bucket_logging.region }}"
            role_arn => "arn:aws:iam::{{ bucket_logging.account_id }}:role/{{ logstash_aws_role_name }}"
            canned_acl => "bucket-owner-full-control"
            validate_credentials_on_root_bucket => false
            codec => "json_lines"
            prefix => xxxxxxxxxxxxxxxxxxxxxxxxxxxx
            temporary_directory => "/tmp/logstash/docker_uploads"
        }
    } else if [input][type] == "log" and [event][dataset] == "system.auth" {
        s3 {
            bucket => "{{ bucket_logging.name }}"
            region => "{{ bucket_logging.region }}"
            role_arn => "arn:aws:iam::{{ bucket_logging.account_id }}:role/{{ logstash_aws_role_name }}"
            canned_acl => "bucket-owner-full-control"
            validate_credentials_on_root_bucket => false
            codec => "json_lines"
            prefix => xxxxxxxxxxxxxxxxxxxx
            temporary_directory => "/tmp/logstash/authlog_uploads"
        }
    } else {
        s3 {
            bucket => "{{ bucket_logging.name }}"
            region => "{{ bucket_logging.region }}"
            role_arn => "arn:aws:iam::{{ bucket_logging.account_id }}:role/{{ logstash_aws_role_name }}"
            canned_acl => "bucket-owner-full-control"
            validate_credentials_on_root_bucket => false
            codec => "json_lines"
            prefix => xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
            temporary_directory => "/tmp/logstash/syslogs_uploads"
        }
    }
}

every morning we have this issue when there is one consumer and 2 millions of data to ingest:

[2023-09-20T00:02:21,944][ERROR][logstash.javapipeline ][consumer_archive_filebeat_odigo] Pipeline worker error, the pipeline will be stopped {:pipeline_id=>"consumer_archive_filebeat_odigo", :error=>"(EMFILE) Too many open files - Too many open files", :exception=>Java::OrgJrubyExceptions::SystemCallError, :backtrace=>["org.jruby.RubyIO.sysopen(org/jruby/RubyIO.java:1234)", "org.jruby.RubyFile.initialize(org/jruby/RubyFile.java:365)", "org.jruby.RubyIO.open(org/jruby/RubyIO.java:1156)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.new_file(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:89)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.rotate!(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:43)", "org.jruby.ext.thread.Mutex.synchronize(org/jruby/ext/thread/Mutex.java:164)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.rotate!(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:42)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.initialize(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:38)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.apply(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:53)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.get_factory(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:83)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.get_file(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:87)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.multi_receive_encoded(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3.rb:246)", "org.jruby.RubyArray.each(org/jruby/RubyArray.java:1821)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.multi_receive_encoded(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3.rb:241)", "usr.share.logstash.logstash_minus_core.lib.logstash.outputs.base.multi_receive(/usr/share/logstash/logstash-core/lib/logstash/outputs/base.rb:103)", "org.logstash.config.ir.compiler.OutputStrategyExt$AbstractOutputStrategyExt.multi_receive(org/logstash/config/ir/compiler/OutputStrategyExt.java:143)", "org.logstash.config.ir.compiler.AbstractOutputDelegatorExt.multi_receive(org/logstash/config/ir/compiler/AbstractOutputDelegatorExt.java:121)", "usr.share.logstash.logstash_minus_core.lib.logstash.java_pipeline.start_workers(/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:299)"], :thread=>"#<Thread:0x6cde7172@/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:129 sleep>"} [2023-09-20T00:02:22,812][ERROR][logstash.javapipeline ][main] Pipeline worker error, the pipeline will be stopped {:pipeline_id=>"main", :error=>"(EMFILE) Too many open files - Too many open files", :exception=>Java::OrgJrubyExceptions::SystemCallError, :backtrace=>["org.jruby.RubyIO.sysopen(org/jruby/RubyIO.java:1234)", "org.jruby.RubyFile.initialize(org/jruby/RubyFile.java:365)", "org.jruby.RubyIO.open(org/jruby/RubyIO.java:1156)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.new_file(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:89)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.rotate!(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:43)", "org.jruby.ext.thread.Mutex.synchronize(org/jruby/ext/thread/Mutex.java:164)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.rotate!(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:42)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.initialize(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:38)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.apply(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:53)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.get_factory(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:83)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.get_file(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:87)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.multi_receive_encoded(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3.rb:246)", "org.jruby.RubyArray.each(org/jruby/RubyArray.java:1821)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.multi_receive_encoded(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3.rb:241)", "usr.share.logstash.logstash_minus_core.lib.logstash.outputs.base.multi_receive(/usr/share/logstash/logstash-core/lib/logstash/outputs/base.rb:103)", "org.logstash.config.ir.compiler.OutputStrategyExt$AbstractOutputStrategyExt.multi_receive(org/logstash/config/ir/compiler/OutputStrategyExt.java:143)", "org.logstash.config.ir.compiler.AbstractOutputDelegatorExt.multi_receive(org/logstash/config/ir/compiler/AbstractOutputDelegatorExt.java:121)", "usr.share.logstash.logstash_minus_core.lib.logstash.java_pipeline.start_workers(/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:299)"], :thread=>"#<Thread:0x40bce2da@/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:129 sleep>"} [2023-09-20T00:02:22,826][ERROR][logstash.javapipeline ][main] Pipeline worker error, the pipeline will be stopped {:pipeline_id=>"main", :error=>"(EMFILE) Too many open files - Too many open files", :exception=>Java::OrgJrubyExceptions::SystemCallError, :backtrace=>["org.jruby.RubyIO.sysopen(org/jruby/RubyIO.java:1234)", "org.jruby.RubyFile.initialize(org/jruby/RubyFile.java:365)", "org.jruby.RubyIO.open(org/jruby/RubyIO.java:1156)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.new_file(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:89)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.rotate!(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:43)", "org.jruby.ext.thread.Mutex.synchronize(org/jruby/ext/thread/Mutex.java:164)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.rotate!(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:42)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.temporary_file_factory.initialize(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/temporary_file_factory.rb:38)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.apply(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:53)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.get_factory(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:83)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.file_repository.get_file(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3/file_repository.rb:87)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.multi_receive_encoded(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3.rb:246)", "org.jruby.RubyArray.each(org/jruby/RubyArray.java:1821)", "usr.share.logstash.vendor.bundle.jruby.$2_dot_5_dot_0.gems.logstash_minus_output_minus_s3_minus_4_dot_3_dot_5.lib.logstash.outputs.s3.multi_receive_encoded(/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-output-s3-4.3.5/lib/logstash/outputs/s3.rb:241)", "usr.share.logstash.logstash_minus_core.lib.logstash.outputs.base.multi_receive(/usr/share/logstash/logstash-core/lib/logstash/outputs/base.rb:103)", "org.logstash.config.ir.compiler.OutputStrategyExt$AbstractOutputStrategyExt.multi_receive(org/logstash/config/ir/compiler/OutputStrategyExt.java:143)", "org.logstash.config.ir.compiler.AbstractOutputDelegatorExt.multi_receive(org/logstash/config/ir/compiler/AbstractOutputDelegatorExt.java:121)", "usr.share.logstash.logstash_minus_core.lib.logstash.java_pipeline.start_workers(/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:299)"], :thread=>"#<Thread:0x40bce2da@/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:129 sleep>"}

I think there is a lot of temporary file created in the temporary directory.

thanks