docker版mongodb4.0同步数据到es 的简单配置(mongo无安全校验)

622 阅读3分钟

1.新增配置文件路径

mkdir -p /data/javaProject/mongo-config/ /data/javaProject/mongo-data/   /data/javaProject/monstache-conf/
 
  1. mongodb配置文件。我用的是4.0版本。首先需要配置开启副本集,新增配置文件
vi /data/javaProject/mongo-config/mongod.conf.orig:
storage:
  dbPath: /data/db
  journal:
    enabled: true
#  engine:
#  mmapv1:
#  wiredTiger:

# where to write logging data.
systemLog:
  destination: file
  logAppend: true
  path: /var/log/mongodb/mongod.log
# network interfaces
net:
  port: 27017
#开启远程访问
  bindIp: 0.0.0.0

# how the process runs
processManagement:
  timeZoneInfo: /usr/share/zoneinfo
#security:

#operationProfiling:
#副本集配置内容。
replication:
  replSetName: es
  oplogSizeMB: 10240
  1. 启动mongo,并同时启动 mongo-express(mongo webUI)
version: '3.7'

services:
  mongo:
    image: mongo:4.0
    restart: always
    container_name: mongodb
    environment:
      TZ: Asia/Shanghai       
    volumes:
      - /data/javaProject/mongo-data:/data/db
      - /data/javaProject/mongo-config/mongod.conf.orig:/etc/mongod.conf.orig
    command: --config /etc/mongod.conf.orig
    ports:
      - 27017:27017
  mongo-express:
    image: mongo-express
    container_name: mongo-express
    restart: always
    depends_on:
      - mongo
    ports:
      - 8081:8081
  1. 配置mongo,初始化副本集、初始化参考:blog.csdn.net/jack_brandy…
docker exec -it mongodb mongo
show dbs
use admin
rs.initiate()  或  rs.initiate({_id:"es",members:[{_id:0,host:'127.0.0.1:27017'}]})(测试时未生效)
rs.conf()
  1. 查看 mongo-express http://HostIp:8081/

  2. 通过 mongo-express 新增需要监控的数据库 mydb

  3. 创建 elasticsearch elasticsearch-head,并通过 http://HostIp:9100/ 验证elasticsearch是否正常

docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms512m -Xmx512m" -e "discovery.type=single-node" elasticsearch:7.4.2

docker run -d -p 9100:9100 mobz/elasticsearch-head:5
  1. 编写monstache配置文件, 设置mydb的同步策略
 /data/javaProject/monstache-conf/monstache.config.toml
# connectionn settings

# connect to MongoDB using the following URL
# ip地址注意要修改
mongo-url = "mongodb://192.168.10.20:27017"
# connect to the Elasticsearch REST API at the following node URLs
# ip地址注意要修改
elasticsearch-urls = ["http://192.168.10.20:9200"]

# frequently required settings

# if you need to seed an index from a collection and not just listen and sync changes events
# you can copy entire collections or views from MongoDB to Elasticsearch
# mydb指的是需要同步的数据库
direct-read-namespaces = ["mydb.*"] 

# if you want to use MongoDB change streams instead of legacy oplog tailing use change-stream-namespaces
# change streams require at least MongoDB API 3.6+
# if you have MongoDB 4+ you can listen for changes to an entire database or entire deployment
# in this case you usually don't need regexes in your config to filter collections unless you target the deployment.
# to listen to an entire db use only the database name.  For a deployment use an empty string.
#change-stream-namespaces = [""]

# additional settings

# if you don't want to listen for changes to all collections in MongoDB but only a few
# e.g. only listen for inserts, updates, deletes, and drops from mydb.mycollection
# this setting does not initiate a copy, it is only a filter on the change event listener
#namespace-regex = ''
# compress requests to Elasticsearch
#gzip = true
# generate indexing statistics
#stats = true
# index statistics into Elasticsearch
#index-stats = true
# use the following PEM file for connections to MongoDB
#mongo-pem-file = ""
# disable PEM validation
#mongo-validate-pem-file = true
# use the following user name for Elasticsearch basic auth
elasticsearch-user = "elastic"
# use the following password for Elasticsearch basic auth
elasticsearch-password = "pwd"
# use 4 go routines concurrently pushing documents to Elasticsearch
elasticsearch-max-conns = 4 
# use the following PEM file to connections to Elasticsearch
#elasticsearch-pem-file = ""
# validate connections to Elasticsearch
#elastic-validate-pem-file = false
# propogate dropped collections in MongoDB as index deletes in Elasticsearch
dropped-collections = true
# propogate dropped databases in MongoDB as index deletes in Elasticsearch
dropped-databases = true
# do not start processing at the beginning of the MongoDB oplog
# if you set the replay to true you may see version conflict messages
# in the log if you had synced previously. This just means that you are replaying old docs which are already
# in Elasticsearch with a newer version. Elasticsearch is preventing the old docs from overwriting new ones.
#replay = false
# resume processing from a timestamp saved in a previous run
resume = true
# do not validate that progress timestamps have been saved
#resume-write-unsafe = false
# override the name under which resume state is saved
#resume-name = "default"
# use a custom resume strategy (tokens) instead of the default strategy (timestamps)
# tokens work with MongoDB API 3.6+ while timestamps work only with MongoDB API 4.0+
resume-strategy = 0
# exclude documents whose namespace matches the following pattern
#namespace-exclude-regex = '^mydb\.ignorecollection$'
# turn on indexing of GridFS file content
#index-files = true
# turn on search result highlighting of GridFS content
#file-highlighting = true
# index GridFS files inserted into the following collections
#file-namespaces = ["users.fs.files"]
# print detailed information including request traces
verbose = true
# enable clustering mode
cluster-name = 'docker-cluster'
# do not exit after full-sync, rather continue tailing the oplog
#exit-after-direct-reads = false
  1. 重新编写docker-compose.yml配置文件,执行docker-compose up -d 新增运行monstache实例 monstache官网 rwynn.github.io/monstache-s…
version: '3.7'

services:

  mongo:
    image: mongo:4.0
    restart: always
    container_name: mongodb
    environment:
      TZ: Asia/Shanghai       
    volumes:
      - /data/javaProject/mongo-data:/data/db
      - /data/javaProject/mongo-config/mongod.conf.orig:/etc/mongod.conf.orig
    command: --config /etc/mongod.conf.orig
    ports:
      - 27017:27017
  mongo-express:
    image: mongo-express
    container_name: mongo-express
    restart: always
    depends_on:
      - mongo
    ports:
      - 8081:8081
  monstache: 
    image: rwynn/monstache:rel6
    restart: always
    container_name: monstache
    volumes: 
      - /data/javaProject/monstache-conf/monstache.config.toml:/app/monstache.config.toml
    command: -f /app/monstache.config.toml
  1. 查询配置结果

docker logs  -f monstache

2021-11-23T11:54:21.070105639Z ERROR 2021/11/23 11:54:21 Unable to connect to MongoDB using URL mongodb://192.168.253.96:27017: server selection error: server selection timeout, current topology: { Type: Unknown, Servers: [{ Addr: 192.168.253.96:27017, Type: RSGhost, Average RTT: 858604 }, ] }
2021-11-23T11:54:25.800712322Z INFO 2021/11/23 11:54:25 Started monstache version 6.7.6
2021-11-23T11:54:25.800765452Z INFO 2021/11/23 11:54:25 Go version go1.15.5
2021-11-23T11:54:25.800780184Z INFO 2021/11/23 11:54:25 MongoDB go driver v1.7.2
2021-11-23T11:54:25.800791256Z INFO 2021/11/23 11:54:25 Elasticsearch go driver 7.0.28
2021-11-23T11:54:25.801520284Z INFO 2021/11/23 11:54:25 Successfully connected to MongoDB version 4.0.27
2021-11-23T11:54:25.817122083Z INFO 2021/11/23 11:54:25 Successfully connected to Elasticsearch version 7.4.2
2021-11-23T11:54:25.817180073Z INFO 2021/11/23 11:54:25 Sending systemd READY=1
2021-11-23T11:54:25.817199703Z WARN 2021/11/23 11:54:25 Systemd notification not supported (i.e. NOTIFY_SOCKET is unset)
2021-11-23T11:54:25.833554855Z INFO 2021/11/23 11:54:25 Joined cluster docker-cluster
2021-11-23T11:54:25.838284652Z INFO 2021/11/23 11:54:25 Starting work for cluster docker-cluster
2021-11-23T11:54:25.838536322Z INFO 2021/11/23 11:54:25 Listening for events
2021-11-23T11:54:25.839620728Z INFO 2021/11/23 11:54:25 Watching changes on the deployment
2021-11-23T11:54:25.844352294Z INFO 2021/11/23 11:54:25 Resuming from timestamp {T:1637668465 I:13}
2021-11-23T11:54:25.844397012Z INFO 2021/11/23 11:54:25 Direct reads completed

10、修改mydb数据、访问http://HostIp:9100/ 查询是否数据是否同步 或者 查询monstache日志看是否有日志产生

docker logs -f monstache
2021-11-23T12:29:25.817639090Z TRACE 2021/11/23 12:29:25 POST /_bulk HTTP/1.1
2021-11-23T12:29:25.817702396Z Host: 192.168.253.96:9200
2021-11-23T12:29:25.817714451Z User-Agent: elastic/7.0.28 (linux-amd64)
2021-11-23T12:29:25.817723248Z Content-Length: 270
2021-11-23T12:29:25.817731778Z Accept: application/json
2021-11-23T12:29:25.817740555Z Authorization: Basic ZWxhc3RpYzpwd2Q=
2021-11-23T12:29:25.817748880Z Content-Type: application/x-ndjson
2021-11-23T12:29:25.817757536Z Accept-Encoding: gzip
2021-11-23T12:29:25.817765510Z 
2021-11-23T12:29:25.817774479Z {"index":{"_index":"mydb.showtb","_id":"619cdde4d7e0000c7037c986","version":7033741501116973057,"version_type":"external"}}
2021-11-23T12:29:25.817786009Z {"_name":"619cdea1d7e0000c7037c993","_names":"619cdea1d7e0000c7037c991","_scott":"619cdea1d7e0000c7037c994","_scotts":"619cdea1d7e0000c7037c992"}

11、默认情况下monstache会将mongo的数据库表插入全量到es中。如果有需要将数据进行加工后再更新到es,需要通过Go语言编写代码

11.1 默认情况下monstache提供4个函数,对数据进行加工。参考

https://rwynn.github.io/monstache-site/advanced/#middleware
func Map(input *monstachemap.MapperPluginInput) (output *monstachemap.MapperPluginOutput, err error)
func Filter(input *monstachemap.MapperPluginInput) (keep bool, err error)
func Pipeline(ns string, changeStream bool) (stages []interface, err error) 
func Process(input*monstachemap.ProcessPluginInput) error

11.2 示例 myplugin.go

package main
import (
    "github.com/rwynn/monstache/monstachemap"
    "strings"
)
// a plugin to convert document values to uppercase
func Map(input *monstachemap.MapperPluginInput) (output *monstachemap.MapperPluginOutput, err error) {
    doc := input.Document
    for k, v := range doc {
        switch v.(type) {
        case string:
            doc[k] = strings.ToUpper(v.(string))
        }
    }
    output = &monstachemap.MapperPluginOutput{Document: doc}
    return
}

11.3 编译

go build -buildmode=plugin -o myplugin.so myplugin.go

11.3 运行

linux版运行
$GOPATH/bin/monstache -mapper-plugin-path /path/to/myplugin.so

docker版运行
docker run --rm --net=host -v ~/plugin:/tmp/plugin rwynn/monstache:6.7.4 -mapper-plugin-path /tmp/plugin/plugin.so