metrico / qryn

⭐️ All-in-One Polyglot Observability with OLAP Storage for Logs, Metrics, Traces & Profiles. Drop-in Grafana Cloud replacement compatible with Loki, Prometheus, Tempo, Pyroscope, Opentelemetry, Datadog and beyond :rocket:
https://qryn.dev
GNU Affero General Public License v3.0
1.24k stars 68 forks source link

Can I provide replication and distributed SQL statements for Clickhouse clusters #188

Open ktpktr0 opened 2 years ago

ktpktr0 commented 2 years ago

I set up a Clickhouse cluster locally, which has three shard 2 replicas. I want to know how distributed and cluster replication work in cloki.

I try the following operation. Is it correct?

# Create database

CREATE DATABASE IF NOT EXISTS on cluster cloki;

CREATE TABLE cloki.time_series on cluster cloki (
    date Date,
    fingerprint UInt64,
    labels String,
    name String
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cloki/time_series','{replica}')
    PARTITION BY date
    ORDER BY fingerprint;

CREATE TABLE cloki.samples on cluster cloki (
    fingerprint UInt64,
    timestamp_ms Int64,
    value Float64,
    string String
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cloki/samples','{replica}')
    PARTITION BY toDate(timestamp_ms / 1000)
    ORDER BY (fingerprint, timestamp_ms);

# Database cluster mode

CREATE TABLE cloki.distributed_time_series on cluster cloki (
    date Date,
    fingerprint UInt64,
    labels String,
    name String
) ENGINE = Distributed('cloki','cloki','time_series',fingerprint);

CREATE TABLE cloki.distributed_samples on cluster cloki (
    fingerprint UInt64,
    timestamp_ms Int64,
    value Float64,
    string String
) ENGINE = Distributed('cloki','cloki','samples',fingerprint);

When I try to start cloki, it has the following error:

{"level":50,"time":1659327803048,"pid":60877,"hostname":"k8s-master1","name":"qryn","reqId":"req-5","err":"Clickhouse DB not ready\nError: Clickhouse DB not ready\n    at Object.handler (/usr/lib/node_modules/qryn/lib/handlers/ready.js:15:14)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)","msg":"Clickhouse DB not ready"}
{"level":30,"time":1659327803049,"pid":60877,"hostname":"k8s-master1","name":"qryn","reqId":"req-5","res":{"statusCode":500},"responseTime":4.258558988571167,"msg":"request completed"}
ktpktr0 commented 2 years ago

When I use the following statement to create a database and data table, it has the following error:

CREATE DATABASE IF NOT EXISTS cloki on cluster cloki ENGINE=Atomic;

// NOTE: Make sure you set SAMPLES_DAYS: 3650 & LABELS_DAYS: 3650 to avoid the `ALTER TABLE ...`

// NOTE: You also need to set "distributed_product_mode" to "global" in your profile.
// https://clickhouse.com/docs/en/operations/settings/settings-profiles/

CREATE TABLE cloki.samples_read (
    `fingerprint` UInt64, 
    `timestamp_ms` Int64, 
    `value` Float64, 
    `string` String
)
ENGINE = Merge('cloki', '^(samples|samples_v2)$');

////

CREATE VIEW cloki.samples_read_v2_1 (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64, 
    `value` Float64, 
    `string` String
) AS SELECT fingerprint, timestamp_ms * 1000000 AS timestamp_ns, value, string FROM cloki.samples_read;

////

CREATE TABLE cloki.samples_read_v2_2 (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64, 
    `value` Float64, 
    `string` String
)
ENGINE = Merge('cloki', '^(samples_read_v2_1|samples_v3)$');

////

CREATE TABLE cloki.samples_v3_ (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64 CODEC(DoubleDelta), 
    `value` Float64 CODEC(Gorilla), 
    `string` String
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cloki/samples_v3_', '{replica}') 
PARTITION BY toStartOfDay(toDateTime(timestamp_ns / 1000000000)) 
ORDER BY timestamp_ns TTL toDateTime(timestamp_ns / 1000000000) + toIntervalDay(3650) 
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 3600;

CREATE TABLE cloki.samples_v3 (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64 CODEC(DoubleDelta), 
    `value` Float64 CODEC(Gorilla), 
    `string` String
)
ENGINE = Distributed('cloki', 'cloki', 'samples_v3_', fingerprint);

////

CREATE TABLE cloki.settings_ (
    `fingerprint` UInt64, 
    `type` String, 
    `name` String, 
    `value` String, 
    `inserted_at` DateTime64(9, 'UTC')
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/cloki/settings_', '{replica}', inserted_at) 
ORDER BY fingerprint 
SETTINGS index_granularity = 8192;

CREATE TABLE cloki.settings (
    `fingerprint` UInt64, 
    `type` String, 
    `name` String, 
    `value` String, 
    `inserted_at` DateTime64(9, 'UTC')
)
ENGINE = Distributed('cloki', 'cloki', 'settings_', fingerprint);

////

CREATE TABLE cloki.time_series_ (
    `date` Date, 
    `fingerprint` UInt64, 
    `labels` String, 
    `name` String
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/cloki/time_series_', '{replica}', date) 
PARTITION BY date 
ORDER BY fingerprint TTL date + toIntervalDay(3650) 
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 3600;

CREATE TABLE cloki.time_series (
    `date` Date, 
    `fingerprint` UInt64, 
    `labels` String, 
    `name` String
)
ENGINE = Distributed('cloki', 'cloki', 'time_series_', fingerprint);

////

CREATE TABLE cloki.time_series_gin_ (
    `date` Date, 
    `key` String, 
    `val` String, 
    `fingerprint` UInt64
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/cloki/time_series_gin_', '{replica}') 
PARTITION BY date 
ORDER BY (key, val, fingerprint) TTL date + toIntervalDay(3650) 
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 3600;

CREATE TABLE cloki.time_series_gin (
    `date` Date, 
    `key` String, 
    `val` String, 
    `fingerprint` UInt64
)
ENGINE = Distributed('cloki', 'cloki', 'time_series_gin_', fingerprint);

////

CREATE MATERIALIZED VIEW cloki.time_series_gin_view TO cloki.time_series_gin (
    `date` Date, 
    `key` String, 
    `val` String, 
    `fingerprint` UInt64
) AS SELECT date, pairs.1 AS key, pairs.2 AS val, fingerprint FROM cloki.time_series ARRAY JOIN JSONExtractKeysAndValues(time_series.labels, 'String') AS pairs;

////

CREATE TABLE cloki.ver_ (
    `k` UInt64, 
    `ver` UInt64
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/cloki/ver_', '{replica}', ver) 
ORDER BY k 
SETTINGS index_granularity = 8192;

CREATE TABLE cloki.ver (
    `k` UInt64, 
    `ver` UInt64
)
ENGINE = Distributed('cloki', 'cloki', 'ver_', k);

////

INSERT INTO cloki.settings (`fingerprint`, `type`, `name`, `value`, `inserted_at`) 
VALUES (990984054, 'rotate', 'v3_samples_days', '3650', '2022-07-31 05:53:52.000000000')
, (4103757074, 'rotate', 'v3_time_series_days', '3650', '2022-07-31 05:53:54.000000000')
, (14553193486094442270, 'update', 'v3_1', '1659246830', '2022-07-31 05:53:50.000000000');

////

INSERT INTO cloki.ver (`k`, `ver`) 
VALUES (1, 10);

qryn logs:

# cd $(dirname $(readlink -f `which cloki`))   && CLICKHOUSE_SERVER="192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123" CLICKHOUSE_CLUSTERED=cloki CLICKHOUSE_AUTH="default:123456" CLICKHOUSE_DB="cloki"   npm start qryn--name "cloki"

> qryn@2.1.10 start
> node qryn.js "qryn--name" "cloki"

{"level":30,"time":1659691646689,"pid":39047,"hostname":"k8s-master3","name":"qryn","msg":"Initializing DB... cloki"}
(node:39047) [FST_MODULE_DEP_FASTIFY-WEBSOCKET] FastifyWarning.fastify-websocket: fastify-websocket has been deprecated. Use @fastify/websocket@5.0.0 instead.
(Use `node --trace-warnings ...` to show where the warning was created)
(node:39047) [FST_MODULE_DEP_FASTIFY-CORS] FastifyWarning.fastify-cors: fastify-cors has been deprecated. Use @fastify/cors@7.0.0 instead.
(node:39047) [FST_MODULE_DEP_FASTIFY-STATIC] FastifyWarning.fastify-static: fastify-static has been deprecated. Use @fastify/static@5.0.0 instead.
{"level":30,"time":1659691647118,"pid":39047,"hostname":"k8s-master3","name":"qryn","msg":"Server listening at http://0.0.0.0:3100"}
{"level":30,"time":1659691647118,"pid":39047,"hostname":"k8s-master3","name":"qryn","msg":"Qryn API up"}
{"level":30,"time":1659691647118,"pid":39047,"hostname":"k8s-master3","name":"qryn","msg":"Qryn API listening on http://0.0.0.0:3100"}
{"level":30,"time":1659691647268,"pid":39047,"hostname":"k8s-master3","name":"qryn","msg":"xxh ready"}
{"level":50,"time":1659691647665,"pid":39047,"hostname":"k8s-master3","name":"qryn","err":"Error: Request failed with status code 500\nResponse: [500] Code: 279. DB::NetException: All connection tries failed. Log: \n\nThere is no table `cloki`.`ver_` on server: 192.168.10.152:9001\nThere is no table `cloki`.`ver_` on server: 192.168.10.151:9000\n\n: While executing Remote. (ALL_CONNECTION_TRIES_FAILED) (version 22.7.1.2484 (official build))\n\nError: Request failed with status code 500\n    at createError (/usr/lib/node_modules/qryn/node_modules/_axios@0.21.4@axios/lib/core/createError.js:16:15)\n    at settle (/usr/lib/node_modules/qryn/node_modules/_axios@0.21.4@axios/lib/core/settle.js:17:12)\n    at IncomingMessage.handleStreamEnd (/usr/lib/node_modules/qryn/node_modules/_axios@0.21.4@axios/lib/adapters/http.js:269:11)\n    at IncomingMessage.emit (node:events:539:35)\n    at endReadableNT (node:internal/streams/readable:1345:12)\n    at processTicksAndRejections (node:internal/process/task_queues:83:21)","msg":"Error starting qryn"}
lmangani commented 2 years ago

Hello @ktpktr0 - did you also set the following?

NOTE: You also need to set "distributed_product_mode" to "global" in your profile. https://clickhouse.com/docs/en/operations/settings/settings-profiles/

ktpktr0 commented 2 years ago

@lmangani Yes, I have set

<?xml version="1.0"?>
<yandex>
    <!-- Profiles of settings. -->
    <profiles>
        <!-- Default settings. -->
        <default>
            <max_memory_usage>10000000000</max_memory_usage>
            <use_uncompressed_cache>0</use_uncompressed_cache>
            <load_balancing>random</load_balancing>
            <distributed_product_mode>global</distributed_product_mode>
        </default>

           <rw_role>
               <distributed_product_mode>allow</distributed_product_mode>
               <constraints>
               <max_memory_usage>
                   <min>5000000000</min>
                   <max>20000000000</max>
               </max_memory_usage>
               </constraints>

            <readonly>0</readonly>
            <allow_ddl>1</allow_ddl>
           </rw_role>

        <ro_role>
            <distributed_product_mode>allow</distributed_product_mode>
            <constraints>
                <max_memory_usage>
                    <min>5000000000</min>
                    <max>10000000000</max>
                </max_memory_usage>
            </constraints>

            <readonly>2</readonly>
            <allow_ddl>1</allow_ddl>
        </ro_role>

    </profiles>

    <!-- Users and ACL. -->
    <users>
        <!-- If user name was not specified, 'default' user is used. -->
        <default>
            <!--echo -n 123456 | openssl dgst -sha256-->
            <password_sha256_hex>8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92</password_sha256_hex>
            <networks incl="networks" replace="replace">
                <ip>192.168.10.0/24</ip>
            </networks>

            <!-- Settings profile for user. -->
            <profile>default</profile>

            <!-- Quota for user. -->
            <quota>default</quota>
lmangani commented 2 years ago

@ktpktr0 the wiki has been updated, could you recheck against it? https://github.com/metrico/qryn/wiki/qryn-tables-replication-support

ktpktr0 commented 2 years ago

@lmangani I refer to the example to perform data table creation, but it does not work.

lmangani commented 2 years ago

@ktpktr0 please attach the errors and make sure any relevant part of troubleshooting section is also completed

ktpktr0 commented 2 years ago
# cd $(dirname $(readlink -f `which cloki`))   && CLICKHOUSE_SERVER="192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123" CLICKHOUSE_CLUSTERED=cloki CLICKHOUSE_AUTH="default:123456" CLICKHOUSE_DB="cloki"   npm start qryn--name "cloki"

> qryn@2.1.10 start
> node qryn.js

{"level":30,"time":1659880148856,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"Initializing DB... cloki"}
(node:18930) [FST_MODULE_DEP_FASTIFY-WEBSOCKET] FastifyWarning.fastify-websocket: fastify-websocket has been deprecated. Use @fastify/websocket@5.0.0 instead.
(Use `node --trace-warnings ...` to show where the warning was created)
(node:18930) [FST_MODULE_DEP_FASTIFY-CORS] FastifyWarning.fastify-cors: fastify-cors has been deprecated. Use @fastify/cors@7.0.0 instead.
(node:18930) [FST_MODULE_DEP_FASTIFY-STATIC] FastifyWarning.fastify-static: fastify-static has been deprecated. Use @fastify/static@5.0.0 instead.
{"level":30,"time":1659880149082,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"Server listening at http://0.0.0.0:3100"}
{"level":30,"time":1659880149082,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"Qryn API up"}
{"level":30,"time":1659880149082,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"Qryn API listening on http://0.0.0.0:3100"}
{"level":30,"time":1659880149102,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"xxh ready"}
{"level":30,"time":1659880149117,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"Checking clickhouse capabilities"}
{"level":30,"time":1659880149120,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"LIVE VIEW: supported"}
{"level":30,"time":1659880149127,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"checking old samples support: samples_v2"}
{"level":30,"time":1659880149131,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"checking old samples support: samples"}
{"level":30,"time":1659880149237,"pid":18930,"hostname":"k8s-master2","name":"qryn","msg":"xxh ready"}
{"level":30,"time":1659880156481,"pid":18930,"hostname":"k8s-master2","name":"qryn","reqId":"req-1","req":{"method":"GET","url":"/ready","hostname":"cloki","remoteAddress":"192.168.10.160","remotePort":36414},"msg":"incoming request"}
{"level":50,"time":1659880156486,"pid":18930,"hostname":"k8s-master2","name":"qryn","reqId":"req-1","err":"Clickhouse DB not ready\nError: Clickhouse DB not ready\n    at Object.handler (/usr/lib/node_modules/qryn/lib/handlers/ready.js:15:14)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)","msg":"Clickhouse DB not ready"}
{"level":30,"time":1659880156489,"pid":18930,"hostname":"k8s-master2","name":"qryn","reqId":"req-1","res":{"statusCode":500},"responseTime":7.663643002510071,"msg":"request completed"}
{"level":30,"time":1659880156490,"pid":18930,"hostname":"k8s-master2","name":"qryn","reqId":"req-2","req":{"method":"GET","url":"/ready","hostname":"cloki","remoteAddress":"192.168.10.160","remotePort":36418},"msg":"incoming request"}
{"level":50,"time":1659880156493,"pid":18930,"hostname":"k8s-master2","name":"qryn","reqId":"req-2","err":"Clickhouse DB not ready\nError: Clickhouse DB not ready\n    at Object.handler (/usr/lib/node_modules/qryn/lib/handlers/ready.js:15:14)\n    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)","msg":"Clickhouse DB not ready"}
{"level":30,"time":1659880156494,"pid":18930,"hostname":"k8s-master2","name":"qryn","reqId":"req-2","res":{"statusCode":500},"responseTime":3.120368003845215,"msg":"request completed"}
# vector -c /etc/vector/vector_aggregate1.toml
2022-08-07T14:02:14.039256Z  INFO vector::app: Log level is enabled. level="vector=info,codec=info,vrl=info,file_source=info,tower_limit=trace,rdkafka=info,buffers=info,kube=info"
2022-08-07T14:02:14.039327Z  INFO vector::app: Loading configs. paths=["/etc/vector/vector_aggregate1.toml"]
2022-08-07T14:02:14.051613Z  INFO vector::topology::running: Running healthchecks.
2022-08-07T14:02:14.051843Z  INFO vector: Vector has started. debug="false" version="0.23.0" arch="x86_64" build_id="38c2435 2022-07-11"
2022-08-07T14:02:14.051856Z  INFO vector::app: API is disabled, enable by setting `api.enabled` to `true` and use commands like `vector top`.
2022-08-07T14:02:14.059999Z ERROR vector::topology::builder: msg="Healthcheck: Failed Reason." error=A non-successful status returned: 500 Internal Server Error component_kind="sink" component_type="loki" component_id=out component_name=out
lmangani commented 2 years ago

Clickhouse DB not ready

The issue is still in ClickHouse and those are the relevant logs most likely? There's no point in running qryn or vector until ClickHouse is up and running.

ktpktr0 commented 2 years ago

I have tried to copy tables and distributed tables, and it works normally

lmangani commented 2 years ago

{"level":50,"time":1659880156493,"pid":18930,"hostname":"k8s-master2","name":"qryn","reqId":"req-2","err":"Clickhouse DB not ready\nError: Clickhouse DB not ready\n at Object.handler (/usr/lib/node_modules/qryn/lib/handlers/ready.js:15:14)\n at process.processTicksAndRejections (node:internal/process/task_queues:95:5)","msg":"Clickhouse DB not ready"}

The database is not, you need to look at what happened during table creation. Perhaps @akvlad has more suggestions.

ktpktr0 commented 2 years ago

I tried a cluster of two nodes (qryn-clickhouse-cluster/docker/docker-compose-clickhouse-cluster.yml), but it also failed to work

akvlad commented 2 years ago

@ktpktr0 I pushed a new tag with more logs on ping. Please update the version and tell us the error you receive around the Clickhouse DB not ready one.

ktpktr0 commented 2 years ago

@akvlad clickhouse log:

2022.08.13 09:58:44.050853 [ 54568 ] {} <Trace> HTTPHandler-factory: HTTP Request for HTTPHandler-factory. Method: GET, Address: 192.168.10.152:48920, User-Agent: axios/0.21.4, Content Type: , Transfer Encoding: identity, X-Forwarded-For: (none)
2022.08.13 09:58:44.050946 [ 54568 ] {} <Trace> DynamicQueryHandler: Request URI: ;192.168.10.151:8123;192.168.10.152:8123:8123/?query=SELECT%201
2022.08.13 09:58:44.050963 [ 54568 ] {} <Debug> HTTP-Session: 6f88b562-7f50-4ac5-b614-95e9ccfa1f18 Authenticating user 'default' from 192.168.10.152:48920
2022.08.13 09:58:44.050986 [ 54568 ] {} <Debug> HTTP-Session: 6f88b562-7f50-4ac5-b614-95e9ccfa1f18 Authenticated with global context as user 94309d50-4f52-5250-31bd-74fecac179db
2022.08.13 09:58:44.051167 [ 54568 ] {f8762910-f741-4b30-b700-7a5bea8ec770} <Debug> executeQuery: (from 192.168.10.152:48920) SELECT 1  (stage: Complete)
2022.08.13 09:58:44.051358 [ 54568 ] {f8762910-f741-4b30-b700-7a5bea8ec770} <Trace> ContextAccess (default): Access granted: SELECT(dummy) ON system.one
2022.08.13 09:58:44.051396 [ 54568 ] {f8762910-f741-4b30-b700-7a5bea8ec770} <Trace> InterpreterSelectQuery: FetchColumns -> Complete
2022.08.13 09:58:44.052770 [ 54568 ] {f8762910-f741-4b30-b700-7a5bea8ec770} <Information> executeQuery: Read 1 rows, 1.00 B in 0.001572661 sec., 635 rows/sec., 635.86 B/sec.
2022.08.13 09:58:44.052853 [ 54568 ] {f8762910-f741-4b30-b700-7a5bea8ec770} <Debug> DynamicQueryHandler: Done processing query
2022.08.13 09:58:44.052864 [ 54568 ] {f8762910-f741-4b30-b700-7a5bea8ec770} <Debug> MemoryTracker: Peak memory usage (for query): 4.00 MiB.
2022.08.13 09:58:44.052870 [ 54568 ] {} <Debug> HTTP-Session: 6f88b562-7f50-4ac5-b614-95e9ccfa1f18 Destroying unnamed session
2022.08.13 09:58:44.055715 [ 54568 ] {} <Trace> HTTPHandler-factory: HTTP Request for HTTPHandler-factory. Method: GET, Address: 192.168.10.152:48922, User-Agent: axios/0.21.4, Content Type: , Transfer Encoding: identity, X-Forwarded-For: (none)
2022.08.13 09:58:44.055779 [ 54568 ] {} <Trace> DynamicQueryHandler: Request URI: ;192.168.10.151:8123;192.168.10.152:8123:8123/?query=SELECT%201
2022.08.13 09:58:44.055796 [ 54568 ] {} <Debug> HTTP-Session: 75fc1b87-0755-4840-827d-250956e23458 Authenticating user 'default' from 192.168.10.152:48922
2022.08.13 09:58:44.055817 [ 54568 ] {} <Debug> HTTP-Session: 75fc1b87-0755-4840-827d-250956e23458 Authenticated with global context as user 94309d50-4f52-5250-31bd-74fecac179db
2022.08.13 09:58:44.055965 [ 54568 ] {2bf4ac2a-a34b-4daf-b923-6c372d1b97bb} <Debug> executeQuery: (from 192.168.10.152:48922) SELECT 1  (stage: Complete)
2022.08.13 09:58:44.056078 [ 54568 ] {2bf4ac2a-a34b-4daf-b923-6c372d1b97bb} <Trace> ContextAccess (default): Access granted: SELECT(dummy) ON system.one
2022.08.13 09:58:44.056106 [ 54568 ] {2bf4ac2a-a34b-4daf-b923-6c372d1b97bb} <Trace> InterpreterSelectQuery: FetchColumns -> Complete
2022.08.13 09:58:44.057023 [ 54568 ] {2bf4ac2a-a34b-4daf-b923-6c372d1b97bb} <Information> executeQuery: Read 1 rows, 1.00 B in 0.001028865 sec., 971 rows/sec., 971.94 B/sec.
2022.08.13 09:58:44.057096 [ 54568 ] {2bf4ac2a-a34b-4daf-b923-6c372d1b97bb} <Debug> DynamicQueryHandler: Done processing query
2022.08.13 09:58:44.057111 [ 54568 ] {2bf4ac2a-a34b-4daf-b923-6c372d1b97bb} <Debug> MemoryTracker: Peak memory usage (for query): 4.00 MiB.
2022.08.13 09:58:44.057117 [ 54568 ] {} <Debug> HTTP-Session: 75fc1b87-0755-4840-827d-250956e23458 Destroying unnamed session
2022.08.13 09:58:51.000258 [ 54710 ] {} <Trace> AsynchronousMetrics: Scanning /sys/block

qryn log:


# cd $(dirname $(readlink -f `which cloki`))   && CLICKHOUSE_SERVER="192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123" CLICKHOUSE_CLUSTERED=cloki CLICKHOUSE_AUTH="default:123456" CLICKHOUSE_DB="cloki"   npm start qryn--name "cloki" node --trace-warnings

> qryn@2.1.14 start
> node qryn.js "qryn--name" "cloki" "node"

{"level":30,"time":1661391774182,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"Initializing DB... cloki"}
(node:65399) [FST_MODULE_DEP_FASTIFY-WEBSOCKET] FastifyWarning.fastify-websocket: fastify-websocket has been deprecated. Use @fastify/websocket@5.0.0 instead.
(Use `node --trace-warnings ...` to show where the warning was created)
(node:65399) [FST_MODULE_DEP_FASTIFY-CORS] FastifyWarning.fastify-cors: fastify-cors has been deprecated. Use @fastify/cors@7.0.0 instead.
(node:65399) [FST_MODULE_DEP_FASTIFY-STATIC] FastifyWarning.fastify-static: fastify-static has been deprecated. Use @fastify/static@5.0.0 instead.
{"level":30,"time":1661391774472,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"Server listening at http://0.0.0.0:3100"}
{"level":30,"time":1661391774478,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"Qryn API up"}
{"level":30,"time":1661391774478,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"Qryn API listening on http://0.0.0.0:3100"}
{"level":30,"time":1661391774505,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"xxh ready"}
{"level":30,"time":1661391774518,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"Checking clickhouse capabilities"}
{"level":30,"time":1661391774521,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"LIVE VIEW: supported"}
{"level":30,"time":1661391774529,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"checking old samples support: samples_v2"}
{"level":30,"time":1661391774533,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"checking old samples support: samples"}
{"level":30,"time":1661391774586,"pid":65399,"hostname":"k8s-master3","name":"qryn","msg":"xxh ready"}
{"level":30,"time":1661391782051,"pid":65399,"hostname":"k8s-master3","name":"qryn","reqId":"req-1","req":{"method":"GET","url":"/ready","hostname":"cloki","remoteAddress":"192.168.10.160","remotePort":52638},"msg":"incoming request"}
{"level":50,"time":1661391782058,"pid":65399,"hostname":"k8s-master3","name":"qryn","err":"getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123\nError: getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123\n    at GetAddrInfoReqWrap.onlookup [as oncomplete] (node:dns:71:26)","msg":"getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123"}
{"level":50,"time":1661391782059,"pid":65399,"hostname":"k8s-master3","name":"qryn","reqId":"req-1","err":"Clickhouse DB not ready\nError: Clickhouse DB not ready\n    at Object.handler (/usr/lib/node_modules/qryn/lib/handlers/ready.js:15:14)\n    at processTicksAndRejections (node:internal/process/task_queues:96:5)","msg":"Clickhouse DB not ready"}
{"level":30,"time":1661391782061,"pid":65399,"hostname":"k8s-master3","name":"qryn","reqId":"req-1","res":{"statusCode":500},"responseTime":9.111939996480942,"msg":"request completed"}
{"level":30,"time":1661391782062,"pid":65399,"hostname":"k8s-master3","name":"qryn","reqId":"req-2","req":{"method":"GET","url":"/ready","hostname":"cloki","remoteAddress":"192.168.10.160","remotePort":52642},"msg":"incoming request"}
{"level":50,"time":1661391782064,"pid":65399,"hostname":"k8s-master3","name":"qryn","err":"getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123\nError: getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123\n    at GetAddrInfoReqWrap.onlookup [as oncomplete] (node:dns:71:26)","msg":"getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123"}
{"level":50,"time":1661391782065,"pid":65399,"hostname":"k8s-master3","name":"qryn","reqId":"req-2","err":"Clickhouse DB not ready\nError: Clickhouse DB not ready\n    at Object.handler (/usr/lib/node_modules/qryn/lib/handlers/ready.js:15:14)\n    at processTicksAndRejections (node:internal/process/task_queues:96:5)","msg":"Clickhouse DB not ready"}
{"level":30,"time":1661391782065,"pid":65399,"hostname":"k8s-master3","name":"qryn","reqId":"req-2","res":{"statusCode":500},"responseTime":2.506179004907608,"msg":"request completed"}
``
akvlad commented 2 years ago

@ktpktr0 getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123\nError: getaddrinfo ENOTFOUND 192.168.10.150:8123;192.168.10.151:8123;192.168.10.152:8123 do you specify multiple hostnames of clickhouse with the ; sign in the CLICKHOUSE_SERVER variable? We don't support multiple clickhouse servers in qrynJS. You can request only one clickhouse server from one qrynJS instance.

ktpktr0 commented 2 years ago

How does qryn connect when my Clickhouse is a cluster? Using chproxy?

akvlad commented 2 years ago
ktpktr0 commented 2 years ago

For example, I have a Clickhouse cluster with 3 partitions

About 2:

Ningx -- > qryn1 + slice 1; Qryn2 + fragment 2; Qryn2 + slice 3

About 3:

Based on the previous architecture, which tables need to be written using distributed tables? Can I use this example to establish a log structure https://gist.github.com/coelho/c3b7bbb2c95caa61115d93692f9e4ae2

Regarding the first two architectures, when querying logs, if a fragment does not exist, will it query other nodes?

akvlad commented 2 years ago

@ktpktr0 https://clickhouse.com/docs/en/engines/table-engines/special/distributed#distributed-reading-data This is the distributed table engine used in the gist to provide cluster support. Every table with the distributed engine you see is needed to provide it.

In fact you don't need to connect qryn-js to each node of the cluster. Distributed table will do all the work. It's just to spread queries load among the nodes.

ktpktr0 commented 2 years ago

Multiple qryn are connected to the Clickhouse cluster for high availability of qryn. When using distributed write and read, perhaps multiple qryn can be used to connect the same partition?

For distributed tabular tables, all tables need to be created and used for querying and writing. Then the example I provided should work.

lmangani commented 2 years ago

@ktpktr0 if you're scaling qryn, the same configuration logic applies to all instances in the same "cluster" - since they are basically stateless, as long as they all talk to the same CH cluster (distributed table or otherwise) things should work and scale fine. You can also have instances dedicated to querying w/o any ingestion if you want to separate the two loads

ktpktr0 commented 2 years ago

Load separation refers to read / write separation? One or more qryn is used to read logs, and it connects the first copy of the Clickhouse fragment; One or more qryns are used to query logs, and it connects the second copy of the Clickhouse partition?

ktpktr0 commented 2 years ago

When I change to a Clickhouse node, it still reports an error

# cd $(dirname $(readlink -f `which cloki`))   && CLICKHOUSE_SERVER="192.168.10.150:8123" CLICKHOUSE_CLUSTERED=cloki CLICKHOUSE_AUTH="default:123456" CLICKHOUSE_DB="cloki"   npm start qryn--name "cloki"

> qryn@2.1.14 start
> node qryn.js "qryn--name" "cloki"

{"level":30,"time":1661936164690,"pid":21107,"hostname":"k8s-master3","name":"qryn","msg":"Initializing DB... cloki"}
(node:21107) [FST_MODULE_DEP_FASTIFY-WEBSOCKET] FastifyWarning.fastify-websocket: fastify-websocket has been deprecated. Use @fastify/websocket@5.0.0 instead.
(Use `node --trace-warnings ...` to show where the warning was created)
(node:21107) [FST_MODULE_DEP_FASTIFY-CORS] FastifyWarning.fastify-cors: fastify-cors has been deprecated. Use @fastify/cors@7.0.0 instead.
(node:21107) [FST_MODULE_DEP_FASTIFY-STATIC] FastifyWarning.fastify-static: fastify-static has been deprecated. Use @fastify/static@5.0.0 instead.
{"level":30,"time":1661936164995,"pid":21107,"hostname":"k8s-master3","name":"qryn","msg":"Server listening at http://0.0.0.0:3100"}
{"level":30,"time":1661936164996,"pid":21107,"hostname":"k8s-master3","name":"qryn","msg":"Qryn API up"}
{"level":30,"time":1661936164996,"pid":21107,"hostname":"k8s-master3","name":"qryn","msg":"Qryn API listening on http://0.0.0.0:3100"}
{"level":30,"time":1661936165007,"pid":21107,"hostname":"k8s-master3","name":"qryn","msg":"xxh ready"}
{"level":50,"time":1661936165033,"pid":21107,"hostname":"k8s-master3","name":"qryn","err":"Error: Request failed with status code 500\nResponse: [500] Code: 36. DB::Exception: Cannot alter settings, because table engine doesn't support settings changes. (BAD_ARGUMENTS) (version 22.7.3.5 (official build))\n\nError: Request failed with status code 500\n    at createError (/usr/lib/node_modules/qryn/node_modules/_axios@0.21.4@axios/lib/core/createError.js:16:15)\n    at settle (/usr/lib/node_modules/qryn/node_modules/_axios@0.21.4@axios/lib/core/settle.js:17:12)\n    at IncomingMessage.handleStreamEnd (/usr/lib/node_modules/qryn/node_modules/_axios@0.21.4@axios/lib/adapters/http.js:269:11)\n    at IncomingMessage.emit (node:events:539:35)\n    at endReadableNT (node:internal/streams/readable:1345:12)\n    at processTicksAndRejections (node:internal/process/task_queues:83:21)","msg":"Error starting qryn"}
ktpktr0 commented 2 years ago

Whether a configuration (cluster) can be added. When Clickhouse is a single machine, it is set to no; When Clickhouse is a cluster, it is set to yes. There is no need to manually execute SQL statements.

lmangani commented 2 years ago

@ktpktr0 good point - we'll discuss this and come up with some possibilities such as allowing provisioning templates.

note: from the logs you seem to be using an outdated version of qryn

ktpktr0 commented 2 years ago

I look forward to these improvements. In addition, when multiple service logs are stored in one table, will the performance of a single table decrease if the data volume is large or the storage time is long?

svc-design commented 2 years ago

{"level":50,"time":1663951939638,"pid":19,"hostname":"qryn-947657ddb-phphr","name":"qryn","err":"Error: Request failed with status code 404\nResponse: [404] Code: 60. DB::Exception: Table cloki.tempo_traces doesn't exist. (UNKNOWN_TABLE) (version 22.9.2.7 (official build))\n\nError: Request failed with status code 404\n at createError (/app/node_modules/axios/lib/core/createError.js:16:15)\n at settle (/app/node_modules/axios/lib/core/settle.js:17:12)\n at IncomingMessage.handleStreamEnd (/app/node_modules/axios/lib/adapters/http.js:269:11)\n at IncomingMessage.emit (node:events:525:35)\n at endReadableNT (node:internal/streams/readable:1358:12)\n at processTicksAndRejections (node:internal/process/task_queues:83:21)","msg":"Error starting qryn"}

lmangani commented 2 years ago

@panhaitao does a second restart create the tables?