Open sandheep-oml opened 1 year ago
Thanks for opening this issue. A team member should give feedback soon. In the meantime, feel free to check out the contributing guidelines.
@sandheep-oml Can you share: Your OS and version: Your CPU Architecture(ARM/Intel): CPU and Memory availability in the machine where you were installing SigNoz
@sandheep-oml this happens when the collector has issues connecting with the ClickHouse. Make sure ClickHouse is healthy and reachable. Let us know if the issue persists afterwards.
For reference, this is the same error I had in #1928
@pranay01 @srikanthccv OS: Ubuntu 22.04.1 LTS Architecture: x86_64 CPU and memory: t2.medium aws setup with nothing else running (https://aws.amazon.com/ec2/instance-types/t2/) CPU usage<1%
Clickhouse is healthy and reachable.
Do you need any other information?
Can you share the logs of the ClickHouse? While it is healthy, sometimes it doesn't accept the connection if there is an issue.
The clickhouse is throwing errors.
Your ClickHouse failed to connect to the zookeeper, which is necessary for it to work. Can you share the more logs of the ZooKeeper and ClickHouse?
Clickhouse logs:
{"log":"12. ThreadPoolImpl\u003cstd::__1::thread\u003e::worker(std::__1::__list_iterator\u003cstd::__1::thread, void*\u003e) @ 0xa4b11c8 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:46.480945817Z"} {"log":"13. ? @ 0xa4b43dd in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:46.480950896Z"} {"log":"14. ? @ 0x7f09e1fcd609 in ?\r\n","stream":"stdout","time":"2023-01-07T15:43:46.480955107Z"} {"log":"15. __clone @ 0x7f09e1ef2133 in ?\r\n","stream":"stdout","time":"2023-01-07T15:43:46.480959517Z"} {"log":" (version 22.8.8.3 (official build))\r\n","stream":"stdout","time":"2023-01-07T15:43:46.480963851Z"} {"log":"2023.01.07 15:43:54.484554 [ \u001b[38;2;140;145;5m256\u001b[0m ] {\u001b[38;2;38;190;41m\u001b[0m} \u003c\u001b[1;31mError\u001b[0m\u003e \u001b[38;2;104;146;94mvirtual bool DB::DDLWorker::initializeMainThread()\u001b[0m: Code: 999. Coordination::Exception: All connection tries failed while connecting to ZooKeeper. nodes: 172.20.0.4:2181\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484799622Z"} {"log":"Poco::Exception. Code: 1000, e.code() = 0, Timeout: connect timed out: 172.20.0.4:2181 (version 22.8.8.3 (official build)), 172.20.0.4:2181\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484830514Z"} {"log":"Poco::Exception. Code: 1000, e.code() = 0, Timeout: connect timed out: 172.20.0.4:2181 (version 22.8.8.3 (official build)), 172.20.0.4:2181\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484862185Z"} {"log":"Poco::Exception. Code: 1000, e.code() = 0, Timeout: connect timed out: 172.20.0.4:2181 (version 22.8.8.3 (official build)), 172.20.0.4:2181\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484868206Z"} {"log":" (Connection loss). (KEEPER_EXCEPTION), Stack trace (when copying this message, always include the lines below):\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484872844Z"} {"log":"\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484877358Z"} {"log":"0. DB::Exception::Exception(std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, int, bool) @ 0xa3ef75a in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484881577Z"} {"log":"1. Coordination::Exception::Exception(std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, Coordination::Error, int) @ 0x16168f96 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484886869Z"} {"log":"2. Coordination::Exception::Exception(std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, Coordination::Error) @ 0x1616921c in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484891744Z"} {"log":"3. Coordination::ZooKeeper::connect(std::__1::vector\u003cCoordination::ZooKeeper::Node, std::__1::allocator\u003cCoordination::ZooKeeper::Node\u003e \u003e const\u0026, Poco::Timespan) @ 0x161b2d1c in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484896496Z"} {"log":"4. Coordination::ZooKeeper::ZooKeeper(std::__1::vector\u003cCoordination::ZooKeeper::Node, std::__1::allocator\u003cCoordination::ZooKeeper::Node\u003e \u003e const\u0026, std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, Poco::Timespan, Poco::Timespan, Poco::Timespan, std::__1::shared_ptr\u003cDB::ZooKeeperLog\u003e) @ 0x161b183d in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484902503Z"} {"log":"5. zkutil::ZooKeeper::init(std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, std::__1::vector\u003cstd::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e, std::__1::allocator\u003cstd::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e \u003e \u003e const\u0026, std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, int, int, std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, DB::GetPriorityForLoadBalancing const\u0026) @ 0x1616c0e5 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484908788Z"} {"log":"6. zkutil::ZooKeeper::ZooKeeper(Poco::Util::AbstractConfiguration const\u0026, std::__1::basic_string\u003cchar, std::__1::char_traits\u003cchar\u003e, std::__1::allocator\u003cchar\u003e \u003e const\u0026, std::__1::shared_ptr\u003cDB::ZooKeeperLog\u003e) @ 0x1616f2db in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484915399Z"} {"log":"7. DB::Context::getZooKeeper() const @ 0x148bf159 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484920391Z"} {"log":"8. DB::DDLWorker::getAndSetZooKeeper() @ 0x1491f9fb in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484929252Z"} {"log":"9. DB::DDLWorker::initializeMainThread() @ 0x14931561 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484933698Z"} {"log":"10. DB::DDLWorker::runMainThread() @ 0x1491d952 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.48493799Z"} {"log":"11. void std::__1::__function::__policy_invoker\u003cvoid ()\u003e::__call_impl\u003cstd::__1::__function::__default_alloc_func\u003cThreadFromGlobalPool::ThreadFromGlobalPool\u003cvoid (DB::DDLWorker::*)(), DB::DDLWorker*\u003e(void (DB::DDLWorker::*\u0026\u0026)(), DB::DDLWorker*\u0026\u0026)::'lambda'(), void ()\u003e \u003e(std::__1::__function::__policy_storage const*) @ 0x14932587 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484942549Z"} {"log":"12. ThreadPoolImpl\u003cstd::__1::thread\u003e::worker(std::__1::__list_iterator\u003cstd::__1::thread, void*\u003e) @ 0xa4b11c8 in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484948165Z"} {"log":"13. ? @ 0xa4b43dd in /usr/bin/clickhouse\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484953083Z"} {"log":"14. ? @ 0x7f09e1fcd609 in ?\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484957547Z"} {"log":"15. __clone @ 0x7f09e1ef2133 in ?\r\n","stream":"stdout","time":"2023-01-07T15:43:54.484961923Z"} {"log":" (version 22.8.8.3 (official build))\r\n","stream":"stdout","time":"2023-01-07T15:43:54.48496613Z"}
zookeeper logs:
{"log":"2023-01-07 14:13:40,480 [myid:1] - INFO [main:WatchManagerFactory@42] - Using org.apache.zookeeper.server.watch.WatchManager as watch manager\n","stream":"stdout","time":"2023-01-07T14:13:40.48091917Z"} {"log":"2023-01-07 14:13:40,480 [myid:1] - INFO [main:WatchManagerFactory@42] - Using org.apache.zookeeper.server.watch.WatchManager as watch manager\n","stream":"stdout","time":"2023-01-07T14:13:40.480991905Z"} {"log":"2023-01-07 14:13:40,482 [myid:1] - INFO [main:ZKDatabase@133] - zookeeper.snapshotSizeFactor = 0.33\n","stream":"stdout","time":"2023-01-07T14:13:40.482766912Z"} {"log":"2023-01-07 14:13:40,482 [myid:1] - INFO [main:ZKDatabase@153] - zookeeper.commitLogCount=500\n","stream":"stdout","time":"2023-01-07T14:13:40.482850005Z"} {"log":"2023-01-07 14:13:40,483 [myid:1] - INFO [main:FileSnap@85] - Reading snapshot /bitnami/zookeeper/data/version-2/snapshot.0\n","stream":"stdout","time":"2023-01-07T14:13:40.483181666Z"} {"log":"2023-01-07 14:13:40,486 [myid:1] - INFO [main:DataTree@1716] - The digest value is empty in snapshot\n","stream":"stdout","time":"2023-01-07T14:13:40.48623586Z"} {"log":"2023-01-07 14:13:40,488 [myid:1] - INFO [main:ZKDatabase@290] - Snapshot loaded in 6 ms, highest zxid is 0x0, digest is 1371985504\n","stream":"stdout","time":"2023-01-07T14:13:40.488914149Z"} {"log":"2023-01-07 14:13:40,489 [myid:1] - INFO [main:FileTxnSnapLog@479] - Snapshotting: 0x0 to /bitnami/zookeeper/data/version-2/snapshot.0\n","stream":"stdout","time":"2023-01-07T14:13:40.490149999Z"} {"log":"2023-01-07 14:13:40,491 [myid:1] - INFO [main:ZooKeeperServer@543] - Snapshot taken in 1 ms\n","stream":"stdout","time":"2023-01-07T14:13:40.491516642Z"} {"log":"2023-01-07 14:13:40,501 [myid:1] - INFO [ProcessThread(sid:0 cport:2181)::PrepRequestProcessor@137] - PrepRequestProcessor (sid:0) started, reconfigEnabled=false\n","stream":"stdout","time":"2023-01-07T14:13:40.50149997Z"} {"log":"2023-01-07 14:13:40,501 [myid:1] - INFO [main:RequestThrottler@75] - zookeeper.request_throttler.shutdownTimeout = 10000\n","stream":"stdout","time":"2023-01-07T14:13:40.50178624Z"} {"log":"2023-01-07 14:13:40,515 [myid:1] - INFO [main:ContainerManager@84] - Using checkIntervalMs=60000 maxPerMinute=10000 maxNeverUsedIntervalMs=0\n","stream":"stdout","time":"2023-01-07T14:13:40.515935942Z"} {"log":"2023-01-07 14:13:40,516 [myid:1] - INFO [main:ZKAuditProvider@42] - ZooKeeper audit is disabled.\n","stream":"stdout","time":"2023-01-07T14:13:40.516534978Z"} {"log":"2023-01-07 15:13:40,060 [myid:1] - INFO [PurgeTask:DatadirCleanupManager$PurgeTask@139] - Purge task started.\n","stream":"stdout","time":"2023-01-07T15:13:40.060488061Z"} {"log":"2023-01-07 15:13:40,060 [myid:1] - INFO [PurgeTask:FileTxnSnapLog@124] - zookeeper.snapshot.trust.empty : false\n","stream":"stdout","time":"2023-01-07T15:13:40.060690929Z"} {"log":"2023-01-07 15:13:40,061 [myid:1] - INFO [PurgeTask:DatadirCleanupManager$PurgeTask@145] - Purge task completed.\n","stream":"stdout","time":"2023-01-07T15:13:40.062091756Z"} "9676328d34cb8e3a56408d500f72056456bf9a47e1627b9dddf9c67b34aeb715/9676328d34cb8e3a56408d500f72056456bf9a47e1627b9dddf9c67b34aeb715-json.log" 123L, 25925B
@srikanthccv It isn't reproducible with the same images?
No, it's not reproducible. I have a t2.xlarge machine where I am trying to reproduce this but that doesn't happen to me.
Welcome to Ubuntu 22.04.1 LTS (GNU/Linux 5.15.0-1026-aws x86_64)
I am trying to understand what that specific to your environment that is causing the issue.
Did you make any changes to SigNoz config files or n/w config modifications?
@sandheep-oml, if you could help me understand what steps did you follow that reproduce the issue, that would be great.
@srikanthccv I didn't make any changes. For this install, I cloned the repo and ran the ./install.sh file.
in my case, clickhouse is not in cluster and no zookeeper,upgrade from 0.11.3 and logs 'CREATE DATABASE IF NOT EXISTS signoz_metrics ON CLUSTER cluster'
version >= 0.12 not support single node clickhouse?
That's correct.
version >= 0.12 not support single node clickhouse?
@YoKv Clickhouse cluster with 1 clickhouse instance and zookeeper is needed.
Only single node clickhouse is not supported.
Bug description
Container for otel collector keeps restarting. I attached myself to the container while it was restarting to get this output.
2023-01-04T07:59:40.628Z info service/telemetry.go:111 Setting up own telemetry... 2023-01-04T07:59:40.628Z info service/telemetry.go:141 Serving Prometheus metrics {"address": "0.0.0.0:8888", "level": "Basic"} 2023-01-04T07:59:40.629Z info components/components.go:30 Stability level of component is undefined {"kind": "exporter", "data_type": "metrics", "name": "clickhousemetricswrite", "stability": "Undefined"} time="2023-01-04T07:59:40Z" level=info msg="Executing:\nCREATE DATABASE IF NOT EXISTS signoz_metrics ON CLUSTER cluster\n" component=clickhouse ^[[Apanic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x28 pc=0x3864f79]
goroutine 1 [running]: github.com/SigNoz/signoz-otel-collector/exporter/clickhousemetricsexporter.NewPrwExporter(0xc000e68a80, {{0xc000270e70, {0x52a3400, 0xc000547400}, {0x52ab340, 0x7d40118}, 0x0}, {{0x493758c, 0x15}, {0x4934ee9, ...}, ...}}) /src/exporter/clickhousemetricsexporter/exporter.go:90 +0x2f9 github.com/SigNoz/signoz-otel-collector/exporter/clickhousemetricsexporter.createMetricsExporter({0x52dc448, 0xc00012a000}, {{0xc000270e70, {0x52a3400, 0xc000547400}, {0x52ab340, 0x7d40118}, 0x0}, {{0x493758c, 0x15}, ...}}, ...) /src/exporter/clickhousemetricsexporter/factory.go:50 +0x9f go.opentelemetry.io/collector/component.CreateMetricsExporterFunc.CreateMetricsExporter(...) /go/pkg/mod/go.opentelemetry.io/collector/component@v0.66.0/exporter.go:149 go.opentelemetry.io/collector/service/internal/pipelines.createExporter({0x52dc448?, 0xc00012a000?}, {{0xc000270e70, {0x52a3400, 0xc000547400}, {0x52ab340, 0x7d40118}, 0x0}, {{0x493758c, 0x15}, ...}}, ...) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/internal/pipelines/pipelines.go:374 +0x269 go.opentelemetry.io/collector/service/internal/pipelines.buildExporter({0x52dc448, 0xc00012a000}, {0xc000270380, {0x52a3400, 0xc000547400}, {0x52ab340, 0x7d40118}, 0x0}, {{0x493758c, 0x15}, ...}, ...) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/internal/pipelines/pipelines.go:360 +0x2ec go.opentelemetry.io/collector/service/internal/pipelines.Build({0x52dc448, 0xc00012a000}, {{0xc000270380, {0x52a3400, 0xc000547400}, {0x52ab340, 0x7d40118}, 0x0}, {{0x493758c, 0x15}, ...}, ...}) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/internal/pipelines/pipelines.go:243 +0x1a85 go.opentelemetry.io/collector/service.(service).initExtensionsAndPipeline(0xc000547280, 0xc0009c7540) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/service.go:162 +0x2bf go.opentelemetry.io/collector/service.newService(0xc0009c7540) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/service.go:75 +0x525 go.opentelemetry.io/collector/service.(Collector).setupConfigurationComponents(0xc0009c7a88, {0x52dc448, 0xc00012a000}) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/collector.go:149 +0x1db go.opentelemetry.io/collector/service.(Collector).Run(0xc0009c7a88, {0x52dc448, 0xc00012a000}) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/collector.go:190 +0x46 go.opentelemetry.io/collector/service.NewCommand.func1(0xc00014c300, {0x48fdbae?, 0x1?, 0x1?}) /go/pkg/mod/go.opentelemetry.io/collector@v0.66.0/service/command.go:53 +0x479 github.com/spf13/cobra.(Command).execute(0xc00014c300, {0xc000126010, 0x1, 0x1}) /go/pkg/mod/github.com/spf13/cobra@v1.6.1/command.go:916 +0x862 github.com/spf13/cobra.(Command).ExecuteC(0xc00014c300) /go/pkg/mod/github.com/spf13/cobra@v1.6.1/command.go:1044 +0x3bc github.com/spf13/cobra.(Command).Execute(...) /go/pkg/mod/github.com/spf13/cobra@v1.6.1/command .go:968 main.runInteractive({{0xc000a7c1e0, 0xc000a7d380, 0xc000a7c600, 0xc000a73e00}, {{0x493758c, 0x15}, {0x4934ee9, 0x15}, {0x48fa0e3, 0x6}}, ...}) /src/cmd/signozcollector/main.go:37 +0x5e main.run(...) /src/cmd/signozcollector/main_others.go:8 main.main() /src/cmd/signozcollector/main.go:30 +0x1d8
Expected behavior
How to reproduce
Version information
Additional context
Thank you for your bug report – we love squashing them!