mongoShake docker 部署（mongoshake:2.8.4）

mongoshake:2.8.4  docker

1. 下载镜像

sudo docker pull happysea/mongoshake:2.8.4

2. 同步mongo 数据 (192.168.18.176:27018,192.168.18.176:27019,192.168.18.176:27020)到kafka(192.168.18.51:9092) topic mongosheketopic

sudo docker run -itd  --restart=always --name=mongoshake -p 9101:9101 -p 9100:9100 \
 -e tunnel_address=seatest_topic@192.168.18.51:9092 \
 -e log_dir=./logs/ \
 -e sync_mode=incr \
 -e mongo_urls=mongodb://root:root@192.168.18.176:27018,192.168.18.176:27019,192.168.18.176:27020   \
 -e tunnel_kafka_partition_number=1 \
 -e tunnel_message=json \
 -e incr_sync_mongo_fetch_method=oplog \
 -v /opt/docker/mongoshake/logs:/mongo-shake-v2.6.5/logs:rw \
happysea/mongoshake:2.8.4

eg:

sudo docker run -it -d --name=mongoshake-awb --restart=unless-stopped -m 500m  \
 --net=host -p 9101:9101 -p 9100:9100 \
 -e 'sync_mode=incr'  \
 -e mongo_urls=mongodb://root:root@192.168.186.54:27017   \
 -e 'filter_namespace_white=sea-booking.sea-air-info' \
 -e 'incr_sync_mongo_fetch_method = change_stream' \
 -e 'incr_sync_change_stream_watch_full_document=true' \
 -e 'tunnel=kafka'  \
 -e 'tunnel_address=sea.event@192.168.186.54:9092,192.168.186.176:9092,192.168.186.199:9092'  \
 -e 'checkpoint_storage_collection=ckpt_sea'  \
 -e 'filter_ddl_enable=true' \
 -v /var/lib/docker/mongoshake/logs/part:/mongo-shake-v2.8.4/logs \
 happysea/mongoshake:2.8.4

2.1 同步到其它地方，自己配置（略）

3. 变量参数

collector.conf 中的配置名，用 "_" 替换掉点 " . "

eg: tunnel.address 变为 tunnel_address

然后指定 -e tunnel_address=topic@192.168.18.51:9092 即可

详见：

master_quorum = '${master_quorum:=false}'
full_sync.http_port = 9101
incr_sync.http_port = 9100
# profiling on net/http/profile
# profiling端口，用于查看内部go堆栈。
system_profile_port = 9200
# global log level: debug, info, warning, error. lower level message will be filter
log.level = '${log_level:=info}'
log.dir = '${log_dir}'
log.flush = '${log_flush:=false}'
sync_mode = '${sync_mode:=incr}'
mongo_urls = '${mongo_urls:=mongodb://root:root@192.168.18.176:27018,192.168.18.176:27019,192.168.18.176:27020}'
# please fill the source config server url if source mongodb is sharding.  #############
mongo_cs_url = '${mongo_cs_url}'
mongo_s_url = '${mongo_s_url}'
# enable source ssl
mongo_ssl_root_ca_file = '${mongo_ssl_root_ca_file}'
tunnel = '${tunnel:=kafka}'
############################################### tunnel.address mgtest1@192.168.18.51:9092 ###################################
tunnel.address = '${tunnel_address:=mongosheketest@192.168.18.51:9092}'
tunnel.message = '${tunnel_message:=json}'
tunnel.kafka.partition_number = '${tunnel_kafka_partition_number:=1}'
# canonical_extended_json
tunnel.json.format = '${tunnel_json_format}'
tunnel.mongo_ssl_root_ca_file =　'${tunnel_mongo_ssl_root_ca_file}'
mongo_connect_mode = '${mongo_connect_mode:=secondaryPreferred}'
filter.namespace.black = '${filter_namespace_black}'
filter.namespace.white = '${filter_namespace_white}'
filter.pass.special.db = '${filter_pass_special_db}'
filter.ddl_enable = '${filter_ddl_enable:=false}'
filter.oplog.gids = '${filter_oplog_gids:=false}'
# 2.4版本以后不需要配置为源端cs的地址。
checkpoint.storage.url = '${checkpoint_storage_url}'
checkpoint.storage.db = '${checkpoint_storage_db:=mongoshake}'
# checkpoint collection,s name.
# checkpoint存储的表的名字，如果启动多个mongoshake拉取同一个源可以修改这个表名以防止冲突。
checkpoint.storage.collection = '${checkpoint_storage_collection:=ckpt_default}'
# set if enable ssl
checkpoint.storage.url.mongo_ssl_root_ca_file = '${checkpoint_storage_url_mongo_ssl_root_ca_file}'
# 大于给定的时间，如果是则会直接报错退出。
checkpoint.start_position =${checkpoint_start_position:=1970-01-01T00:00:00Z}' 
transform.namespace = '${transform_namespace}'
full_sync.reader.collection_parallel = '${full_sync_reader_collection_parallel:=6}'
# the number of document writer thread in each collection.
# 同一个表内并发写的线程数，例如，8表示对于同一个表，将会有8个写线程进行并发写入。#######
full_sync.reader.write_document_parallel = '${full_sync_reader_write_document_parallel:=8}'
# number of documents in a batch insert in a document concurrence
# 目的端写入的batch大小，例如，128表示一个线程将会一次聚合128个文档然后再写入。#######
full_sync.reader.document_batch_size = '${full_sync_reader_document_batch_size:=128}'
# max number of fetching thread per table. default is 1
# 单个表最大拉取的线程数，默认是单线程拉取。需要具备splitVector权限。
# 注意：对单个表来说，仅支持索引对应的value是同种类型，如果有不同类型请勿启用该配置项！
full_sync.reader.parallel_thread = '${full_sync_reader_parallel_thread:=1}'
# the parallel query index if set full_sync.reader.parallel_thread. index should only has
# 1 field.
# 如果设置了full_sync.reader.parallel_thread，还需要设置该参数，并行拉取所扫描的index，value
# 必须是同种类型。对于副本集，建议设置_id；对于集群版，建议设置shard_key。key只能有1个field。
full_sync.reader.parallel_index = '${full_sync_reader_parallel_index:=_id}'
# drop the same name of collection in dest mongodb in full synchronization
# 同步时如果目的库存在，是否先删除目的库再进行同步，true表示先删除再同步，false表示不删除。
full_sync.collection_exist_drop = '${full_sync_collection_exist_drop:=true}'
# create index option.

# background表示创建后台索引。
full_sync.create_index = '${full_sync_create_index:=none}'
# convert insert to update when duplicate key found
# 如果_id存在在目的库，是否将insert语句修改为update语句。
full_sync.executor.insert_on_dup_update = '${full_sync_executor_insert_on_dup_update:=false}'
# filter orphan document for source type is sharding.
# 源端是sharding，是否需要过滤orphan文档
full_sync.executor.filter.orphan_document = '${full_sync_executor_filter_orphan_document:=false}'
# enable majority write in full sync.
# the performance will degrade if enable.
# 全量阶段写入端是否启用majority write
full_sync.executor.majority_enable = '${full_sync_executor_majority_enable:=false}'
# --------------------------- incrmental sync configuration ---------------------------

#######################################   incr_sync.mongo_fetch_method ###########################################
incr_sync.mongo_fetch_method = '${incr_sync_mongo_fetch_method:=oplog}'

incr_sync.change_stream.watch_full_document = '${incr_sync_change_stream_watch_full_document:=false}'

incr_sync.oplog.gids = '${incr_sync_oplog_gids}'

incr_sync.shard_key = '${incr_sync_shard_key:=collection}' 

incr_sync.shard_by_object_id_whitelist = '${incr_sync_shard_by_object_id_whitelist}' 

incr_sync.worker = '${incr_sync_worker:=8}'

incr_sync.tunnel.write_thread = '${incr_sync_tunnel_write_thread:=8}'
# set the sync delay just like mongodb secondary slaveDelay parameter. unit second.
# 设置目的端的延迟，比如延迟源端20分钟，类似MongoDB本身主从同步slaveDelay参数，单位：秒
# 0表示不启用
incr_sync.target_delay = '${incr_sync_target_delay:=0}'
# memory queue configuration, plz visit FAQ document to see more details.
# do not modify these variables if the performance and resource usage can
# meet your needs.
# 内部队列的配置参数，如果目前性能足够不建议修改，详细信息参考FAQ。
incr_sync.worker.batch_queue_size = '${incr_sync_worker_batch_queue_size:=64}'
incr_sync.adaptive.batching_max_size = '${incr_sync_adaptive_batching_max_size:=1024}'
incr_sync.fetcher.buffer_capacity = '${incr_sync_fetcher_buffer_capacity:=256}'

incr_sync.executor.upsert = '${incr_sync_executor_upsert:=false}'
# oplog changes to Update while Insert found duplicated key (_id or unique-index)
# 如果_id存在在目的库，是否将insert语句修改为update语句。
incr_sync.executor.insert_on_dup_update = '${incr_sync_executor_insert_on_dup_update:=false}'

incr_sync.conflict_write_to = '${incr_sync_conflict_write_to:=none}'

incr_sync.executor.majority_enable = '${incr_sync_executor_majority_enable:=false}'

special.source.db.flag = '${special_source_db_flag}'

posted on 2021-11-04 14:07 lshan 阅读(774) 评论(0) 收藏举报

mongoShake docker 部署 （mongoshake:2.8.4）

mongoShake docker 部署（mongoshake:2.8.4）