rwynn / monstache

a go daemon that syncs MongoDB to Elasticsearch in realtime. you know, for search.
https://rwynn.github.io/monstache-site/
MIT License
1.29k stars 181 forks source link

[BUG]INFO 2020/06/29 16:16:20 Resuming stream 'myleguan.dy_goods_info' from collection monstache.tokens using resume name 'myleguan-dy' stop here #391

Open linjin200 opened 4 years ago

linjin200 commented 4 years ago

Describe the bug A clear and concise description of what the bug is.

To Reproduce

Monstache configuration:


# describe config here
```The operation was interrupted in a few minutes

Steps to reproduce the behavior:

**Expected behavior**
A clear and concise description of what you expected to happen.

**Software information (please complete the following information):**
 - Operating System:
 - Monstache Version:
 - MongoDB Version:
 - Elasticsearch Version:
 - Docker Version: 

**Additional context**
Add any other context about the problem here.

**Screenshots**
If applicable, add screenshots to help explain your problem.
linjin200 commented 4 years ago

INFO 2020/06/29 15:52:40 Successfully connected to Elasticsearch version 7.8.0 INFO 2020/06/29 15:52:40 Joined cluster myleguan-dy INFO 2020/06/29 15:52:40 Starting work for cluster myleguan-dy INFO 2020/06/29 15:52:40 Listening for events INFO 2020/06/29 15:52:40 Watching changes on collection myleguan.dy_video INFO 2020/06/29 15:52:40 Watching changes on collection myleguan.dy_goods_info INFO 2020/06/29 15:52:40 Watching changes on collection myleguan.dy_music INFO 2020/06/29 15:52:40 Resuming stream 'myleguan.dy_video' from collection monstache.tokens using resume name 'myleguan-dy' INFO 2020/06/29 15:52:40 Resuming stream 'myleguan.dy_music' from collection monstache.tokens using resume name 'myleguan-dy' INFO 2020/06/29 15:52:40 Resuming stream 'myleguan.dy_goods_info' from collection monstache.tokens using resume name 'myleguan-dy' INFO 2020/06/29 16:16:10 Pausing work for cluster myleguan-dy INFO 2020/06/29 16:16:20 Resuming work for cluster myleguan-dy INFO 2020/06/29 16:16:20 Pausing work for cluster myleguan-dy INFO 2020/06/29 16:16:20 Resuming stream 'myleguan.dy_goods_info' from collection monstache.tokens using resume name 'myleguan-dy'

linjin200 commented 4 years ago

不知错在哪 不知错在哪 I don't know what's wrong

linjin200 commented 4 years ago
elasticsearch-urls = ["http://192.168.30.204:9200/", "http://192.168.30.103:9200/"]

# frequently required settings
# if you don't want to listen for changes to all collections in MongoDB but only a few
# e.g. only listen for inserts, updates, deletes, and drops from mydb.mycollection
# this setting does not initiate a copy, it is a filter on the oplog change listener only
#namespace-regex = '^myleguan\.dy_challenge$'      #aaa表示mongodb的数据库,bbb表示集合,表示要匹配的名字空间
#namespace-regex = '^myleguan\.dy_challenge$' 
# additionally, if you need to seed an index from a collection and not just listen for changes from the oplog
# you can copy entire collections or views from MongoDB to Elasticsearch
# direct-read-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
direct-read-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music"]
# if you want to use MongoDB change streams instead of legacy oplog tailing add the following
# in this case you don't need regexes to filter collections.
# change streams require MongoDB version 3.6+
# change streams can only be combined with resume, replay, or cluster-name options on MongoDB 4+
# if you have MongoDB 4+ you can listen for changes to an entire database or entire deployment
# to listen to an entire db use only the database name.  For a deployment use an empty string.
# change-stream-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
change-stream-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music"]

# additional settings
# compress requests to Elasticsearch
 gzip = true
# generate indexing statistics
 stats = true
# index statistics into Elasticsearch
 index-stats = true
# use the following PEM file for connections to MongoDB
# mongo-pem-file = "/path/to/mongoCert.pem"
# disable PEM validation
# mongo-validate-pem-file = false
# use the following user name for Elasticsearch basic auth
 elasticsearch-user = "elastic"
# use the following password for Elasticsearch basic auth
 elasticsearch-password = "root20182020"
# use 4 go routines concurrently pushing documents to Elasticsearch
 elasticsearch-max-conns = 8
 direct-read-split-max = 9
 elasticsearch-max-bytes = 8000000
# elasticsearch-healthcheck-timeout = 30
# elasticsearch-client-timeout = 50

# use the following PEM file to connections to Elasticsearch
# elasticsearch-pem-file = "/path/to/elasticCert.pem"
# validate connections to Elasticsearch
# elastic-validate-pem-file = true
# propogate dropped collections in MongoDB as index deletes in Elasticsearch
dropped-collections = true
# propogate dropped databases in MongoDB as index deletes in Elasticsearch
dropped-databases = true
# do not start processing at the beginning of the MongoDB oplog
# if you set the replay to true you may see version conflict messages
# in the log if you had synced previously. This just means that you are replaying old docs which are already
# in Elasticsearch with a newer version. Elasticsearch is preventing the old docs from overwriting new ones.
replay = false
# resume processing from a timestamp saved in a previous run
resume = true #从上次同步的时间开始同步
# do not validate that progress timestamps have been saved
resume-write-unsafe = false
# override the name under which resume state is saved
resume-name = "default"
# use a custom resume strategy (tokens) instead of the default strategy (timestamps)
# # tokens work with MongoDB API 3.6+ while timestamps work only with MongoDB API 4.0+
resume-strategy = 1
# exclude documents whose namespace matches the following pattern
# namespace-exclude-regex = '^myleguan\.(dy_goods_info|dy_video|dy_music)$'
# turn on indexing of GridFS file content
# index-files = true
# turn on search result highlighting of GridFS content
# file-highlighting = true
# index GridFS files inserted into the following collections
# file-namespaces = ["users.fs.files"]
# print detailed information including request traces
# verbose = true
# enable clustering mode
 cluster-name = 'myleguan-dy'  #es集群名
# do not exit after full-sync, rather continue tailing the oplog
 exit-after-direct-reads = false
[[mapping]]
namespace = "myleguan.dy_goods_info"
index = "dy_goods_info"

[[mapping]]
namespace = "myleguan.dy_video"
index = "dy_video"

[[mapping]]
namespace = "myleguan.dy_music"
index = "dy_music"
[gtm-settings]
channel-size = 10240
rwynn commented 4 years ago

It's hard to tell what exactly the problem is but looks related to cluster mode. I would suggest the following:

cluster-name = ""
linjin200 commented 4 years ago
mongo-url = "mongodb://linjin:root@192.168.30.251:27017,192.168.30.252:27017,192.168.1.212:27017/admin?replicaSet=mongors&slaveOk=true&readPreference=secondaryPreferred&connectTimeoutMS=300000"
# connect to the Elasticsearch REST API at the following node URLs
elasticsearch-urls = ["http://192.168.30.204:9200/", "http://192.168.30.103:9200/"]

# frequently required settings
# if you don't want to listen for changes to all collections in MongoDB but only a few
# e.g. only listen for inserts, updates, deletes, and drops from mydb.mycollection
# this setting does not initiate a copy, it is a filter on the oplog change listener only
#namespace-regex = '^myleguan\.dy_challenge$'      #aaa表示mongodb的数据库,bbb表示集合,表示要匹配的名字空间
#namespace-regex = '^myleguan\.dy_challenge$' 
# additionally, if you need to seed an index from a collection and not just listen for changes from the oplog
# you can copy entire collections or views from MongoDB to Elasticsearch
# direct-read-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#direct-read-namespaces = ["myleguan.dy_goods_info"]
direct-read-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# if you want to use MongoDB change streams instead of legacy oplog tailing add the following
# in this case you don't need regexes to filter collections.
# change streams require MongoDB version 3.6+
# change streams can only be combined with resume, replay, or cluster-name options on MongoDB 4+
# if you have MongoDB 4+ you can listen for changes to an entire database or entire deployment
# to listen to an entire db use only the database name.  For a deployment use an empty string.
# change-stream-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#change-stream-namespaces = ["myleguan.dy_goods_info"]
change-stream-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# additional settings
# compress requests to Elasticsearch
 gzip = true
# generate indexing statistics
 stats = true
# index statistics into Elasticsearch
 index-stats = true
# use the following PEM file for connections to MongoDB
# mongo-pem-file = "/path/to/mongoCert.pem"
# disable PEM validation
# mongo-validate-pem-file = false
# use the following user name for Elasticsearch basic auth
 elasticsearch-user = "elastic"
# use the following password for Elasticsearch basic auth
 elasticsearch-password = "root20182020"
# use 4 go routines concurrently pushing documents to Elasticsearch
 elasticsearch-max-conns = 10
# direct-read-split-max = 9
# elasticsearch-max-bytes = 8000000
# elasticsearch-healthcheck-timeout = 30
# elasticsearch-client-timeout = 50

# use the following PEM file to connections to Elasticsearch
# elasticsearch-pem-file = "/path/to/elasticCert.pem"
# validate connections to Elasticsearch
# elastic-validate-pem-file = true
# propogate dropped collections in MongoDB as index deletes in Elasticsearch
dropped-collections = true
# propogate dropped databases in MongoDB as index deletes in Elasticsearch
dropped-databases = true
# do not start processing at the beginning of the MongoDB oplog
# if you set the replay to true you may see version conflict messages
# in the log if you had synced previously. This just means that you are replaying old docs which are already
# in Elasticsearch with a newer version. Elasticsearch is preventing the old docs from overwriting new ones.
#replay = false
# resume processing from a timestamp saved in a previous run
resume = true #从上次同步的时间开始同步
# do not validate that progress timestamps have been saved
#resume-write-unsafe = false
resume-write-unsafe = true
# override the name under which resume state is saved
#resume-name = "default"
# use a custom resume strategy (tokens) instead of the default strategy (timestamps)
# # tokens work with MongoDB API 3.6+ while timestamps work only with MongoDB API 4.0+
#resume-strategy = 1 
# exclude documents whose namespace matches the following pattern
# namespace-exclude-regex = '^myleguan\.(dy_goods_info|dy_video|dy_music)$'
# turn on indexing of GridFS file content
# connection settings
# connect to MongoDB using the following URL
#mongo-url = "mongodb://myleguan:46026949leguan@120.35.10.209:27001/myleguan"
#mongo-url = "mongodb://linjin:46026949@120.35.10.209:27001/admin"
#mongo-url = "mongodb://linjin:46026949@192.168.30.251/admin"
# connect to the Elasticsearch REST API at the following node URLs
elasticsearch-urls = ["http://192.168.30.204:9200/", "http://192.168.30.103:9200/"]

# frequently required settings
# if you don't want to listen for changes to all collections in MongoDB but only a few
# e.g. only listen for inserts, updates, deletes, and drops from mydb.mycollection
# this setting does not initiate a copy, it is a filter on the oplog change listener only
#namespace-regex = '^myleguan\.dy_challenge$'      #aaa表示mongodb的数据库,bbb表示集合,表示要匹配的名字空间
#namespace-regex = '^myleguan\.dy_challenge$' 
# additionally, if you need to seed an index from a collection and not just listen for changes from the oplog
# you can copy entire collections or views from MongoDB to Elasticsearch
# direct-read-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#direct-read-namespaces = ["myleguan.dy_goods_info"]
direct-read-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# if you want to use MongoDB change streams instead of legacy oplog tailing add the following
# in this case you don't need regexes to filter collections.
# change streams require MongoDB version 3.6+
# change streams can only be combined with resume, replay, or cluster-name options on MongoDB 4+
# if you have MongoDB 4+ you can listen for changes to an entire database or entire deployment
# to listen to an entire db use only the database name.  For a deployment use an empty string.
# change-stream-namespaces = ["mydb.mycollection", "db.collection", "test.test"]
#change-stream-namespaces = ["myleguan.dy_goods_info"]
change-stream-namespaces = ["myleguan.dy_goods_info", "myleguan.dy_video", "myleguan.dy_music", "myleguan.dy_user"]

# additional settings
# compress requests to Elasticsearch
 gzip = true
# generate indexing statistics
 stats = true
# index statistics into Elasticsearch
 index-stats = true
# use the following PEM file for connections to MongoDB
# mongo-pem-file = "/path/to/mongoCert.pem"
# disable PEM validation
# mongo-validate-pem-file = false
# use the following user name for Elasticsearch basic auth
 elasticsearch-user = "elastic"
# use the following password for Elasticsearch basic auth
 elasticsearch-password = "root20182020"
# use 4 go routines concurrently pushing documents to Elasticsearch
 elasticsearch-max-conns = 10
# direct-read-split-max = 9
# elasticsearch-max-bytes = 8000000
# elasticsearch-healthcheck-timeout = 30
# elasticsearch-client-timeout = 50

# use the following PEM file to connections to Elasticsearch
# elasticsearch-pem-file = "/path/to/elasticCert.pem"
# validate connections to Elasticsearch
# elastic-validate-pem-file = true
# propogate dropped collections in MongoDB as index deletes in Elasticsearch
dropped-collections = true
# propogate dropped databases in MongoDB as index deletes in Elasticsearch
dropped-databases = true
# do not start processing at the beginning of the MongoDB oplog
# if you set the replay to true you may see version conflict messages
# in the log if you had synced previously. This just means that you are replaying old docs which are already
# in Elasticsearch with a newer version. Elasticsearch is preventing the old docs from overwriting new ones.
#replay = false
# resume processing from a timestamp saved in a previous run
resume = true #从上次同步的时间开始同步
# do not validate that progress timestamps have been saved
#resume-write-unsafe = false
resume-write-unsafe = true
# override the name under which resume state is saved
#resume-name = "default"
# use a custom resume strategy (tokens) instead of the default strategy (timestamps)
# # tokens work with MongoDB API 3.6+ while timestamps work only with MongoDB API 4.0+
#resume-strategy = 1 
# exclude documents whose namespace matches the following pattern
# namespace-exclude-regex = '^myleguan\.(dy_goods_info|dy_video|dy_music)$'
# turn on indexing of GridFS file content
# index-files = true
# turn on search result highlighting of GridFS content
# file-highlighting = true
# index GridFS files inserted into the following collections
# file-namespaces = ["users.fs.files"]
# print detailed information including request traces
# verbose = true
# enable clustering mode
# cluster-name = 'myleguan-dy'  #es集群名
cluster-name = ''
# do not exit after full-sync, rather continue tailing the oplog
 exit-after-direct-reads = false
[[mapping]]
namespace = "myleguan.dy_goods_info"
index = "dy_goods_info"

[[mapping]]
namespace = "myleguan.dy_video"
index = "dy_video"

[[mapping]]
namespace = "myleguan.dy_music"
index = "dy_music"

[[mapping]]
namespace = "myleguan.dy_user"
index = "dy_user"
[gtm-settings]
channel-size = 1024
buffer-size = 2048
#buffer-duration = 4s  
linjin200 commented 4 years ago

ok
This is the modified configuration LF It can run without stopping Can this configuration help me optimize it Why can't we use cluster name ='myleguan-dy '