rwynn / monstache

a go daemon that syncs MongoDB to Elasticsearch in realtime. you know, for search.
https://rwynn.github.io/monstache-site/
MIT License
1.28k stars 181 forks source link

Why can't we use cluster name ='myleguan-dy ' #393

Open linjin200 opened 4 years ago

linjin200 commented 4 years ago
mongo-url = "mongodb://linjin:root@192.168.30.251:27017,192.168.30.252:27017,192.168.1.212:27017/admin?replicaSet=mongors&slaveOk=true&readPreference=secondaryPreferred&connectTimeoutMS=300000"
# connect to the Elasticsearch REST API at the following node URLs
elasticsearch-urls = ["http://192.168.30.204:9200/", "http://192.168.30.103:9200/"]

# frequently required settings
# if you don't want to listen for changes to all collections in MongoDB but only a few
# e.g. only listen for inserts, updates, deletes, and drops from mydb.mycollection
# this setting does not initiate a copy, it is a filter on the oplog change listener only
#namespace-regex = '^myleguan\.dy_challenge$'      #aaa表示mongodb的数据库,bbb表示集合,表示要匹配的名字空间
#namespace-regex = '^myleguan\.dy_challenge$' 
# additionally, if you need to seed an index from a collection and not just listen for changes from the oplog
# you can copy entire collections or views from MongoDB to Elasticsearch
# direct-read-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#direct-read-namespaces = ["myleguan.dy_goods_info"]
direct-read-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# if you want to use MongoDB change streams instead of legacy oplog tailing add the following
# in this case you don't need regexes to filter collections.
# change streams require MongoDB version 3.6+
# change streams can only be combined with resume, replay, or cluster-name options on MongoDB 4+
# if you have MongoDB 4+ you can listen for changes to an entire database or entire deployment
# to listen to an entire db use only the database name.  For a deployment use an empty string.
# change-stream-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#change-stream-namespaces = ["myleguan.dy_goods_info"]
change-stream-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# additional settings
# compress requests to Elasticsearch
 gzip = true
# generate indexing statistics
 stats = true
# index statistics into Elasticsearch
 index-stats = true
# use the following PEM file for connections to MongoDB
# mongo-pem-file = "/path/to/mongoCert.pem"
# disable PEM validation
# mongo-validate-pem-file = false
# use the following user name for Elasticsearch basic auth
 elasticsearch-user = "elastic"
# use the following password for Elasticsearch basic auth
 elasticsearch-password = "root20182020"
# use 4 go routines concurrently pushing documents to Elasticsearch
 elasticsearch-max-conns = 10
# direct-read-split-max = 9
# elasticsearch-max-bytes = 8000000
# elasticsearch-healthcheck-timeout = 30
# elasticsearch-client-timeout = 50

# use the following PEM file to connections to Elasticsearch
# elasticsearch-pem-file = "/path/to/elasticCert.pem"
# validate connections to Elasticsearch
# elastic-validate-pem-file = true
# propogate dropped collections in MongoDB as index deletes in Elasticsearch
dropped-collections = true
# propogate dropped databases in MongoDB as index deletes in Elasticsearch
dropped-databases = true
# do not start processing at the beginning of the MongoDB oplog
# if you set the replay to true you may see version conflict messages
# in the log if you had synced previously. This just means that you are replaying old docs which are already
# in Elasticsearch with a newer version. Elasticsearch is preventing the old docs from overwriting new ones.
#replay = false
# resume processing from a timestamp saved in a previous run
resume = true #从上次同步的时间开始同步
# do not validate that progress timestamps have been saved
#resume-write-unsafe = false
resume-write-unsafe = true
# override the name under which resume state is saved
#resume-name = "default"
# use a custom resume strategy (tokens) instead of the default strategy (timestamps)
# # tokens work with MongoDB API 3.6+ while timestamps work only with MongoDB API 4.0+
#resume-strategy = 1 
# exclude documents whose namespace matches the following pattern
# namespace-exclude-regex = '^myleguan\.(dy_goods_info|dy_video|dy_music)$'
# turn on indexing of GridFS file content
# connection settings
# connect to MongoDB using the following URL
#mongo-url = "mongodb://myleguan:46026949leguan@120.35.10.209:27001/myleguan"
#mongo-url = "mongodb://linjin:46026949@120.35.10.209:27001/admin"
#mongo-url = "mongodb://linjin:46026949@192.168.30.251/admin"
# connect to the Elasticsearch REST API at the following node URLs
elasticsearch-urls = ["http://192.168.30.204:9200/", "http://192.168.30.103:9200/"]

# frequently required settings
# if you don't want to listen for changes to all collections in MongoDB but only a few
# e.g. only listen for inserts, updates, deletes, and drops from mydb.mycollection
# this setting does not initiate a copy, it is a filter on the oplog change listener only
#namespace-regex = '^myleguan\.dy_challenge$'      #aaa表示mongodb的数据库,bbb表示集合,表示要匹配的名字空间
#namespace-regex = '^myleguan\.dy_challenge$' 
# additionally, if you need to seed an index from a collection and not just listen for changes from the oplog
# you can copy entire collections or views from MongoDB to Elasticsearch
# direct-read-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#direct-read-namespaces = ["myleguan.dy_goods_info"]
direct-read-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# if you want to use MongoDB change streams instead of legacy oplog tailing add the following
# in this case you don't need regexes to filter collections.
# change streams require MongoDB version 3.6+
# change streams can only be combined with resume, replay, or cluster-name options on MongoDB 4+
# if you have MongoDB 4+ you can listen for changes to an entire database or entire deployment
# to listen to an entire db use only the database name.  For a deployment use an empty string.
# change-stream-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#change-stream-namespaces = ["myleguan.dy_goods_info"]
change-stream-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# additional settings
# compress requests to Elasticsearch
 gzip = true
# generate indexing statistics
 stats = true
# index statistics into Elasticsearch
 index-stats = true
# use the following PEM file for connections to MongoDB
# mongo-pem-file = "/path/to/mongoCert.pem"
# disable PEM validation
# mongo-validate-pem-file = false
# use the following user name for Elasticsearch basic auth
 elasticsearch-user = "elastic"
# use the following password for Elasticsearch basic auth
 elasticsearch-password = "root20182020"
# use 4 go routines concurrently pushing documents to Elasticsearch
 elasticsearch-max-conns = 10
# direct-read-split-max = 9
# elasticsearch-max-bytes = 8000000
# elasticsearch-healthcheck-timeout = 30
# elasticsearch-client-timeout = 50

# use the following PEM file to connections to Elasticsearch
# elasticsearch-pem-file = "/path/to/elasticCert.pem"
# validate connections to Elasticsearch
# elastic-validate-pem-file = true
# propogate dropped collections in MongoDB as index deletes in Elasticsearch
dropped-collections = true
# propogate dropped databases in MongoDB as index deletes in Elasticsearch
dropped-databases = true
# do not start processing at the beginning of the MongoDB oplog
# if you set the replay to true you may see version conflict messages
# in the log if you had synced previously. This just means that you are replaying old docs which are already
# in Elasticsearch with a newer version. Elasticsearch is preventing the old docs from overwriting new ones.
#replay = false
# resume processing from a timestamp saved in a previous run
resume = true #从上次同步的时间开始同步
# do not validate that progress timestamps have been saved
#resume-write-unsafe = false
resume-write-unsafe = true
# override the name under which resume state is saved
#resume-name = "default"
# use a custom resume strategy (tokens) instead of the default strategy (timestamps)
# # tokens work with MongoDB API 3.6+ while timestamps work only with MongoDB API 4.0+
#resume-strategy = 1 
# exclude documents whose namespace matches the following pattern
# namespace-exclude-regex = '^myleguan\.(dy_goods_info|dy_video|dy_music)$'
# turn on indexing of GridFS file content
# index-files = true
# turn on search result highlighting of GridFS content
# file-highlighting = true
# index GridFS files inserted into the following collections
# file-namespaces = ["users.fs.files"]
# print detailed information including request traces
# verbose = true
# enable clustering mode
# cluster-name = 'myleguan-dy'  #es集群名
cluster-name = ''
# do not exit after full-sync, rather continue tailing the oplog
 exit-after-direct-reads = false
[[mapping]]
namespace = "myleguan.dy_goods_info"
index = "dy_goods_info"

[[mapping]]
namespace = "myleguan.dy_video"
index = "dy_video"

[[mapping]]
namespace = "myleguan.dy_music"
index = "dy_music"

[[mapping]]
namespace = "myleguan.dy_user"
index = "dy_user"
[gtm-settings]
channel-size = 1024
buffer-size = 2048
#buffer-duration = 4s  

ok This is the modified configuration LF It can run without stopping Can this configuration help me optimize it Why can't we use cluster name ='myleguan-dy '

elasticsearch-7.8.0 MongoDB version v4.0.12 monstache-6.6.0

rwynn commented 4 years ago

Unfortunately, there is not much debug logging around the cluster mode. I would suggest putting some logging statements into monstache at https://github.com/rwynn/monstache/blob/d37b60e4e92dfa16640e86fc31383d78f44eccbd/monstache.go#L4549 to see why it might be getting stuck.

When I've investigated this in the past I have not been able to replicate the process getting stuck in cluster mode on my end. Note cluster mode requires that your MongoDB user is able to write to cluster collection in the monstache database and create TTL indexes.