Closed motyla closed 6 years ago
please share your metrictank.ini and storage*conf files. you get this straight after restart? consistently?
not straight after. it take a while until panic
storage-aggregation.conf
[default]
pattern = .*
xFilesFactor = 0.1
aggregationMethod = avg,min,max
storage-schemas.conf
chunkspan = 2h
numchunks = 1
[kkk]
pattern = kkk
retentions = 1h:1y:24h:1,1d:3y:24h:1
[mmm]
pattern = ^mmm.
retentions = 5m:7d:12h:1,1h:1y:24h:1,1d:3y:24h:1
[vvv30m]
pattern = vvv_.*_30_min
retentions = 30m:30d:24h:1,1h:1y:24h:1,1d:3y:24h:1
[zzz]
pattern = ^zzz.
retentions = 1m:7d:2h:2,5m:30d:12h:1,1h:1y:24h:1,1d:3y:24h:1
reorderBuffer = 60
[my_queue]
pattern = ^application.production.from_db.technical_alerts.my_queue.per_code.
retentions = 15m:30d:24h:1,6h:1y:24h:1,1d:3y:24h:1
[application]
pattern = ^application.
retentions = 1m:1d:2h:1,5m:7d:12h:1,15m:30d:12h:1,6h:1y:24h:1,1d:3y:24h:1
[bbb]
pattern = ^aaa.production..*.ccc.
retentions = 1h:1y:24h:1,1d:3y:24h:1
[aaa]
pattern = ^aaa.
retentions = 1m:7d:2h:2,5m:30d:12h:1,1h:1y:24h:1,1d:3y:24h:1
reorderBuffer = 60
[metrictank]
pattern = ^metrictank.
retentions = 1s:35d:10min:2
[carbon]
pattern = ^service_is_carbon-relay-ng\.
retentions = 1s:35d:10min:2
[default]
pattern = .*
retentions = 60s:1d:2h:1,1h:3d:24h:1
metrictank.ini:
instance = mt
accounting-period = 5min
drop-first-chunk = false
chunk-max-stale = 1h
metric-max-stale = 6h
gc-interval = 15m
warm-up-period = 1h
cassandra-addrs = "cluster addresses"
cassandra-keyspace = metrictank
cassandra-consistency = one
cassandra-host-selection-policy = tokenaware,hostpool-epsilon-greedy
cassandra-timeout = 10000
cassandra-read-concurrency = 100
cassandra-write-concurrency = 10
cassandra-read-queue-size = 200000
cassandra-write-queue-size = 100000
cassandra-retries = 10
cql-protocol-version = 4
cassandra-create-keyspace = false
cassandra-ssl = false
cassandra-ca-path = /etc/metrictank/ca.pem
cassandra-host-verification = true
cassandra-auth = false
cassandra-username = cassandra
cassandra-password = cassandra
block-profile-rate = 0
mem-profile-rate = 524288
proftrigger-freq = 60s
proftrigger-path = /tmp
proftrigger-min-diff = 1h
proftrigger-heap-thresh = 25000000000
log-level = 3
tracing-enabled = false
timeout = 10s
[retention]
schemas-file = /etc/metrictank/storage-schemas.conf
aggregations-file = /etc/metrictank/storage-aggregation.conf
[stats]
enabled = true
prefix = metrictank.stats.default.$instance
addr = localhost:20003
interval = 1
buffer-size = 20000
[chunk-cache]
max-size = 4294967296
[http]
listen = :6060
gzip = true
ssl = false
cert-file = /etc/ssl/certs/ssl-cert-snakeoil.pem
key-file = /etc/ssl/private/ssl-cert-snakeoil.key
max-points-per-req-soft = 1000000
max-points-per-req-hard = 20000000
multi-tenant = true
fallback-graphite-addr = http://graphite_addr
log-min-dur = 5min
time-zone = local
get-targets-concurrency = 20
[carbon-in]
enabled = false
addr = :2003
partition = 1
[kafka-mdm-in]
enabled = true
brokers = "kafka servers addresses"
topics = "topic lists"
offset = last
partitions = "partition set list"
offset-commit-interval = 5s
data-dir = /var/lib/metrictank
channel-buffer-size = 100000
consumer-fetch-min = 1
consumer-fetch-default = 32768
consumer-max-wait-time = 1s
consumer-max-processing-time = 1s
net-max-open-requests = 100
[cluster]
name = metrictank
primary-node = true
max-priority = 10
peers = "list of other metrictank servers"
mode = multi
http-timeout = 60s
min-available-shards = 0
[swim]
use-config = default-lan
bind-addr = x.x.x.x:7946
tcp-timeout = 10s
indirect-checks = 3
retransmit-mult = 4
suspicion-multi = 4
suspicion-max-timeout-mult = 6
push-pull-interval = 30s
probe-interval = 1s
probe-timeout = 500ms
disable-tcp-pings = false
awareness-max-multiplier = 8
gossip-nodes = 3
gossip-interval = 200ms
gossip-to-the-dead-time = 30s
enable-compression = true
dns-config-path = /etc/resolv.conf
[kafka-cluster]
enabled = true
brokers = "list of kafka brokers addresses"
topic = metricpersist
partitions = "partition set list"
partition-scheme = bySeries
offset = last
offset-commit-interval = 5s
backlog-process-timeout = 60s
data-dir = /var/lib/metrictank
[nsq-cluster]
enabled = false
[cassandra-idx]
enabled = true
keyspace = metrictank
hosts = "list of cassandra addresses"
protocol-version = 4
consistency = one
timeout = 10s
num-conns = 10
write-queue-size = 100000
max-stale = 0
prune-interval = 1h
update-cassandra-index = true
update-interval = 2h
ssl = false
ca-path = /etc/metrictank/ca.pem
host-verification = true
auth = false
username = cassandra
password = cassandra
create-keyspace = false
[memory-idx]
enabled = false
tag-support = false
match-cache-size = 1000
This is a bug introduced in https://github.com/grafana/metrictank/commit/fc64bb55e3eb0335b8bc57666100fcd0ad8f9f49
The problem is in: https://github.com/grafana/metrictank/blob/fc64bb55e3eb0335b8bc57666100fcd0ad8f9f49/mdata/aggmetric.go#L562-L567
if len(a.Chunks) is 0, and the reorderBuffer has points, then the call to a.getChunk() will panic.
this needs to be changed to
if len(a.Chunks) == 0 {
if (a.rob == nil || !a.rob.HasData()) {
return true
} else {
return false
}
}
Seems like it could be
if len(a.Chunks) == 0 {
return a.rob == nil || a.rob.IsEmpty()
}
hi @motyla the fix is now in master and building a new build.
Thanks , will soon test it
On Tue, Dec 19, 2017, 20:30 Dieter Plaetinck notifications@github.com wrote:
hi @motyla https://github.com/motyla the fix is now in master and building a new build.
— You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub https://github.com/grafana/metrictank/issues/788#issuecomment-352845788, or mute the thread https://github.com/notifications/unsubscribe-auth/AAGugOAYaak_BqG1FdLdpw7cPXAzvU2fks5tCACzgaJpZM4RFiYU .
latest build, 0.7.4-556 solve that issue
just upgraded from 0.7.4_419_gef90f826-1 to 0.7.4_435_ge8334309-1 and got this: