Closed doron-td-agent closed 4 years ago
Sorry for the late.
on all our AWS instances we have it on (~140%) while restarting the td-agent with no traffic active on other Vmware instances CPU is much lower.
Does this 'no traffic' mean no incoming logs and no buffered files or no incoming logs but buffered files exists?
on other Vmware instances CPU is much lower.
What is the difference between AWS and VMware instance? Same os and same resources?
it's stale. closed. if you have any problem, updating fluentd would might it.
1) we have the following Env: php5-fpm +nginx -> td-agent 2.3.1 -> bigquery (Google Cloud) 1) since we rebooted the machines (we have only problems on AWS instances) few days ago, we get "connection time out" on PHP while we try to communicate with td-agent. while this happening even simple nc command to td-agent got hanging. 2) we tried recreate the instance and double the Cpu power (meltdown patch). it didn't solved the problem. 3) on all our AWS instances we have it on (~140%) while restarting the td-agent with no traffic active on other Vmware instances CPU is much lower.
we redirected the traffic via other Server with td-agent and it's working fine.
i would like to try to figure out what's the problem: strace with -p -f i see many:
[pid 27123] futex(0x7fbada82d644, FUTEX_WAIT_PRIVATE, 50563879, NULL) = -1 EAGAIN (Resource temporarily unavailable)
pid 27113] <... futex resumed> ) = 1 [pid 27113] futex(0x7fba947228a4, FUTEX_WAIT_BITSET_PRIVATE, 47845, {7666, 716555325}, ffffffff <unfinished ...> [pid 27112] futex(0x7fbada82d610, FUTEX_WAKE_PRIVATE, 1 <unfinished ...> [pid 27125] <... futex resumed> ) = -1 ETIMEDOUT (Connection timed out) [pid 27125] futex(0x7fba94725920, FUTEX_WAKE_PRIVATE, 1) = 0 [pid 27125] futex(0x7fbada82d644, FUTEX_WAIT_PRIVATE, 50563900, NULL <unfinished ...> [pid 27112] <... futex resumed> ) = 0 [pid 27112] clock_gettime(CLOCK_MONOTONIC, {7666, 671083616}) = 0 [pid 27112] clock_gettime(CLOCK_REALTIME, {1515586707, 822298094}) = 0 [pid 27112] clock_gettime(CLOCK_REALTIME, {1515586707, 822483342}) = 0 [pid 27112] clock_gettime(CLOCK_REALTIME, {1515586707, 822664208}) = 0 [pid 27112] clock_gettime(CLOCK_REALTIME, {1515586707, 822843878}) = 0 [pid 27112] clock_gettime(CLOCK_MONOTONIC, {7666, 672087314}) = 0 [pid 27112] clock_gettime(CLOCK_MONOTONIC, {7666, 672294216}) = 0 [pid 27112] futex(0x7fbada82d644, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7fbada82d640, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1 [pid 27112] futex(0x7fba947224a4, FUTEX_WAIT_BITSET_PRIVATE, 47669, {7666, 721933216}, ffffffff <unfinished ...> [pid 27111] <... futex resumed> ) = 0 [pid 27111] futex(0x7fbada82d610, FUTEX_WAKE_PRIVATE, 1) = 0 [pid 27111] clock_gettime(CLOCK_MONOTONIC, {7666, 673027638}) = 0 [pid 27111] clock_gettime(CLOCK_REALTIME, {1515586707, 824151364}) = 0 [pid 27111] clock_gettime(CLOCK_REALTIME, {1515586707, 824350574}) = 0 [pid 27111] clock_gettime(CLOCK_REALTIME, {1515586707, 824550627}) = 0 [pid 27111] clock_gettime(CLOCK_REALTIME, {1515586707, 824750533}) = 0 [pid 27111] clock_gettime(CLOCK_MONOTONIC, {7666, 673976971}) = 0 [pid 27111] clock_gettime(CLOCK_MONOTONIC, {7666, 674188906}) = 0 [pid 27111] futex(0x7fbada82d644, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7fbada82d640, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1 [pid 27124] <... futex resumed> ) = -1 ETIMEDOUT (Connection timed out) [pid 27124] futex(0x7fba94725520, FUTEX_WAKE_PRIVATE, 1) = 0 [pid 27124] clock_gettime(CLOCK_MONOTONIC, {7666, 675002738}) = 0 [pid 27111] futex(0x7fba947220a4, FUTEX_WAIT_BITSET_PRIVATE, 47779, {7666, 723787906}, ffffffff <unfinished ...> [pid 27124] clock_gettime(CLOCK_REALTIME, {1515586707, 826274958}) = 0 [pid 27110] <... futex resumed> ) = 0 [pid 27124] clock_gettime(CLOCK_REALTIME, {1515586707, 826571554}) = 0 [pid 27124] clock_gettime(CLOCK_REALTIME, {1515586707, 826751495}) = 0 [pid 27124] clock_gettime(CLOCK_REALTIME, {1515586707, 826930402}) = 0 [pid 27124] clock_gettime(CLOCK_MONOTONIC, {7666, 676181264}) = 0 [pid 27124] clock_gettime(CLOCK_MONOTONIC, {7666, 676373697}) = 0 [pid 27124] futex(0x7fbada82d640, FUTEX_WAIT_PRIVATE, 2, NULL <unfinished ...> [pid 27110] futex(0x7fbada82d610, FUTEX_WAKE_PRIVATE, 1) = 0 [pid 27110] futex(0x7fbada82d640, FUTEX_WAKE_PRIVATE, 1 <unfinished ...> [pid 27124] <... futex resumed> ) = 0 [pid 27124] futex(0x7fbada82d644, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7fbada82d640, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1} <unfinished ...> [pid 27110] <... futex resumed> ) = 1 [pid 27124] <... futex resumed> ) = 1 [pid 27124] futex(0x7fba947254a4, FUTEX_WAIT_BITSET_PRIVATE, 48039, {7666, 726013697}, ffffffff <unfinished ...> [pid 27110] futex(0x7fbada82d644, FUTEX_WAIT_PRIVATE, 50563903, NULL) = -1 EAGAIN (Resource temporarily unavailable) [pid 27110] futex(0x7fbada82d610, FUTEX_WAKE_PRIVATE, 1) = 0 [pid 27110] clock_gettime(CLOCK_MONOTONIC, {7666, 678114669}) = 0 [pid 27109] <... futex resumed> ) = 0 [pid 27110] clock_gettime(CLOCK_REALTIME, {1515586707, 829385957}) = 0 [pid 27109] futex(0x7fbada82d644, FUTEX_WAIT_PRIVATE, 50563904, NULL <unfinished ...> [pid 27110] clock_gettime(CLOCK_REALTIME, {1515586707, 829709168}) = 0 [pid 27110] clock_gettime(CLOCK_REALTIME, {1515586707, 829890526}) = 0 [pid 27110] clock_gettime(CLOCK_REALTIME, {1515586707, 830104228}) = 0 [pid 27110] clock_gettime(CLOCK_MONOTONIC, {7666, 679344919}) = 0 [pid 27110] clock_gettime(CLOCK_MONOTONIC, {7666, 679521448}) = 0 [pid 27110] futex(0x7fbada82d644, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7fbada82d640, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
i see there are 1064 ruby LWP td-agent 26734 1 0 11:10 ? 00:00:00 /opt/td-agent/embedded/bin/ruby /usr/sbin/td-agent --log /var/log/td-agent/td-agent.log --daemon /var/run/td-agent/td-agent.pid td-agent 26737 26734 99 11:10 ? 01:49:04 /opt/td-agent/embedded/bin/ruby /usr/sbin/td-agent --log /var/log/td-agent/td-agent.log --daemon /var/run/td-agent/td-agent.pid