logstash-plugins / logstash-input-jdbc

Logstash Plugin for JDBC Inputs
Apache License 2.0
449 stars 187 forks source link

logstash-input-jdbc data loss(about 15% loss) #158

Open hackforfun opened 8 years ago

hackforfun commented 8 years ago

hi, i'm using logstash 2.3 and logstash-input-jdbc plugin,below is my config file:

input {

jdbc {
    jdbc_driver_library => "ojdbc7.jar"
    jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
    jdbc_connection_string => "jdbc:oracle:thin:@127.0.0.1:1521:LOG"
    jdbc_user => "user"
    jdbc_password => "pass"
    clean_run => false
            use_column_value => true
    tracking_column => ID
    record_last_run => true
    last_run_metadata_path => "jdbc_last_run_cas_validation"
    schedule => "* * * * *"
    statement => "select * from CAS.CAS_VALIDATION_LOG WHERE ID > :sql_last_value"
    type => "cas_validation"
}
jdbc {
    jdbc_driver_library => "ojdbc7.jar"
    jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
    jdbc_connection_string => "jdbc:oracle:thin:@127.0.0.1:1521:LOG"
    jdbc_user => "user"
    jdbc_password => "pass"
    clean_run => false
            use_column_value => true
    tracking_column => ID
    record_last_run => true
    last_run_metadata_path => "jdbc_last_run_editpass"
    schedule => "* * * * *"
    statement => "select * from CAS.FW_EDIT_PASS_LOG WHERE ID > :sql_last_value"
    type => "editpass"
}

jdbc {
    jdbc_driver_library => "ojdbc7.jar"
    jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
    jdbc_connection_string => "jdbc:oracle:thin:@127.0.0.1:1521:LOG"
    jdbc_user => "user"
    jdbc_password => "pass"
    clean_run => false
            use_column_value => true
    tracking_column => ID
    record_last_run => true
    last_run_metadata_path => "jdbc_last_run_login_log"
    schedule => "* * * * *"
    statement => "select * CAS.FW_LOGIN_LOG_201607 WHERE ID > :sql_last_value"
    type => "login_log"
}
jdbc {
    jdbc_driver_library => "ojdbc7.jar"
    jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
    jdbc_connection_string => "jdbc:oracle:thin:@127.0.0.1:1521:LOG"
    jdbc_user => "user"
    jdbc_password => "pass"
    clean_run => false
            use_column_value => true
    tracking_column => ID
    record_last_run => true
    last_run_metadata_path => "jdbc_last_run_login_log"
    schedule => "* * * * *"
    statement => "select * from CAS.FW_LOGIN_LOG_201608 WHERE ID > :sql_last_value"
    type => "login_log"
}

}

filter { mutate { rename => ["updation_date", "creation_date"] } }

output { elasticsearch { hosts => ["10.1.3.1"] user => "user" password => "pass2" workers => 5 index => "caslog-%{+YYYY.MM.dd}" template_overwrite => true document_id => "%{log_id}" } }

As you see,I use primary key ID in my table as the document_id and "tracking_column => ID" to avoid dumplicate data, but now, the problem is that I found my data loss, about 15%.I think it happens when logstash's query timeout or failed but the sql_last_value still updated. Is there any way to resolve this problem?

vanga commented 8 years ago

@hackforfun just curious are these timeouts happening in output? while indexing to elasticsearch?