Kafka Connect相关插件配置文档之十二

132 阅读5分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路。

Kafka Connect相关插件配置文档之一

Kafka Connect相关插件配置文档之二

Kafka Connect相关插件配置文档之三

Kafka Connect相关插件配置文档之四

Kafka Connect相关插件配置文档之五

Kafka Connect相关插件配置文档之六

Kafka Connect相关插件配置文档之七

Kafka Connect相关插件配置文档之八

Kafka Connect相关插件配置文档之九

Kafka Connect相关插件配置文档之十

Kafka Connect相关插件配置文档之十一

四、问题汇总及其解决方案

  1. hive内依赖的guava.jar和hadoop内的版本不一致造成的。

 SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/data/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/data/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
2020-05-17 15:15:01,030 INFO  [main] conf.HiveConf (HiveConf.java:findConfigFile(187)) - Found configuration file file:/data/hive/conf/hive-site.xml
Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V
        at org.apache.hadoop.conf.Configuration.set(Configuration.java:1357)
        at org.apache.hadoop.conf.Configuration.set(Configuration.java:1338)
        at org.apache.hadoop.mapred.JobConf.setJar(JobConf.java:518)
        at org.apache.hadoop.mapred.JobConf.setJarByClass(JobConf.java:536)
        at org.apache.hadoop.mapred.JobConf.<init>(JobConf.java:430)
        at org.apache.hadoop.hive.conf.HiveConf.initialize(HiveConf.java:5141)
        at org.apache.hadoop.hive.conf.HiveConf.<init>(HiveConf.java:5099)
        at org.apache.hadoop.hive.common.LogUtils.initHiveLog4jCommon(LogUtils.java:97)
        at org.apache.hadoop.hive.common.LogUtils.initHiveLog4j(LogUtils.java:81)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:699)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:318)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:232)

------------------------------------解决方案------------------------------------------	
		com.google.common.base.Preconditions.checkArgument 这是因为hive内依赖的guava.jar和hadoop内的版本不一致造成的。
		
  1. hiveserver2启动不了端口10000解决过程

  2. Decimal精确度引起的Hive不能解析问题

    解决思路参考

    ------------------------------HDFS插件自动建表的sql语句-----------------
    
    CREATE EXTERNAL TABLE `dataxhive1`(                 
     )                                                  
     PARTITIONED BY (                                   
       `partition` string COMMENT '')                   
     ROW FORMAT SERDE                                   
       'org.apache.hadoop.hive.serde2.avro.AvroSerDe'   
     STORED AS INPUTFORMAT                              
       'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'  
     OUTPUTFORMAT                                       
       'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' 
     LOCATION                                           
       'hdfs://master:9002/topics/dataxhive'        
     TBLPROPERTIES (                                    
       'avro.schema.literal'='{"type":"record","name":"dataxhive","fields":[{"name":"id","type":["null",{"type":"int","connect.name":"INT"}],"default":null},{"name":"a","type":["null",{"type":"double","connect.name":"DOUBLE"}],"default":null},{"name":"b","type":["null",{"type":"float","connect.name":"FLOAT"}],"default":null},{"name":"c","type":["null",{"type":"bytes","scale":2,"precision":64,"connect.version":1,"connect.parameters":{"scale":"2"},"connect.name":"org.apache.kafka.connect.data.Decimal","logicalType":"decimal"}],"default":null},{"name":"d","type":["null",{"type":"int","connect.version":1,"connect.name":"org.apache.kafka.connect.data.Date","logicalType":"date"}],"default":null},{"name":"e","type":["null",{"type":"string","connect.name":"STRING"}],"default":null},{"name":"f","type":["null",{"type":"int","connect.version":1,"connect.name":"org.apache.kafka.connect.data.Time","logicalType":"time-millis"}],"default":null},{"name":"g","type":["null",{"type":"long","connect.version":1,"connect.name":"org.apache.kafka.connect.data.Timestamp","logicalType":"timestamp-millis"}],"default":null}],"connect.version":1,"connect.name":"dataxhive"}',  
       'transient_lastDdlTime'='1589767539') 
    

  • 在调试本文档时需要用到的相关指令集
 # 获取所有任务
 curl -X GET http://localhost:8083/connectors
# 删除指定任务
 curl -X DELETE http://localhost:8083/connectors/hdfs3-sink
 # metastore 后台运行
 nohup hive --service metastore 1>/dev/null 2>&1 &
 # hiveserver2 后台运行
 nohup hiveserver2 1>/dev/null 2>&1 &
 # 进入hiveSql
beeline -u jdbc:hive2://localhost:10000 -n root
 # 获取Kafka集群消费组
kafka-consumer-groups.sh --bootstrap-server localhost:9092 --list
 # 删除指定的消费组
kafka-consumer-groups.sh --bootstrap-server localhost:9092 --delete --group test

附录:

mysql表信息

/*
Navicat MySQL Data Transfer

Source Server         : master
Source Server Version : 50721
Source Host           : master:3306
Source Database       : test

Target Server Type    : MYSQL
Target Server Version : 50721
File Encoding         : 65001

Date: 2020-05-18 17:01:06
*/

SET FOREIGN_KEY_CHECKS=0;

-- ----------------------------
-- Table structure for test
-- ----------------------------
DROP TABLE IF EXISTS `test`;
CREATE TABLE `test` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `a` double(255,2) DEFAULT NULL,
  `b` float(255,3) DEFAULT NULL,
  `c` decimal(56,2) DEFAULT NULL,
  `d` date DEFAULT NULL,
  `e` varchar(255) DEFAULT NULL,
  `f` time(6) DEFAULT NULL,
  `g` datetime(6) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=24 DEFAULT CHARSET=utf8;

-- ----------------------------
-- Records of test
-- ----------------------------
INSERT INTO `test` VALUES ('1', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('2', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('3', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('4', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('5', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('6', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('7', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('8', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('9', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('10', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('11', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('12', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('13', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('14', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('15', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('16', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('17', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('18', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('19', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('20', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('21', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('22', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');
INSERT INTO `test` VALUES ('23', '1.20', '1.055', '1.25', '2020-05-16', 'dadadas', '16:04:42.000000', '2020-05-16 16:04:47.000000');

DataX 任务配置信息

{
    "job":{
        "setting":{
            "speed":{
                "channel":1
            },
            "errorLimit":{
                "record":0,
                "percentage":0.02
            }
        },
        "content":[
            {
                "reader":{
                    "name":"mysqlreader",
                    "parameter":{
                        "username":"root",
                        "password":"",
                        "column":[
                            "id",
                            "a",
                            "b",
                            "c",
                            "d",
                            "e",
                            "f",
                            "g"
                        ],
                        "connection":[
                            {
                                "table":[
                                    "test"
                                ],
                                "jdbcUrl":[
                                    "jdbc:mysql://master:3306/test"
                                ]
                            }
                        ]
                    }
                },
                "writer":{
                    "name":"kafkaWriter",
                    "parameter":{
                        "kafkaConfig":{
                            "bootstrap.servers":"master:9092,slaver:9092"
                        },
                        "topics":"dataxhive",
                        "column":{
                            "type":"record",
                            "name":"dataxhive",
                            "fields":[
                                {
                                    "name":"id",
                                    "type":"int",
                                    "index":0
                                },
                                {
                                    "name":"a",
                                    "type":"double",
                                    "index":1
                                },
                                {
                                    "name":"b",
                                    "type":"float",
                                    "index":2
                                },
                                {
                                    "name":"c",
                                    "type":"decimal",
                                    "scale":2,
                                    "index":3
                                },
                                {
                                    "name":"d",
                                    "type":"date",
                                    "index":4
                                },
                                {
                                    "name":"e",
                                    "type":"string",
                                    "index":5
                                },
                                {
                                    "name":"f",
                                    "type":"time",
                                    "index":6
                                },
                                {
                                    "name":"g",
                                    "type":"datetime",
                                    "index":7
                                }
                            ]
                        },
                        "medusa":{
                            "hostName":"http://192.168.101.42:8083,http://192.168.101.43:8083",
                            "name":"dataxhive"
                        },
                        "schemaRegistry":{
                            "schema.registry.url":"http://192.168.101.43:8081",
                            "schemas.enable":true,
                            "value.converter":"io.confluent.connect.avro.AvroConverter"
                        }
                    }
                }
            }
        ]
    }
}