spark sql写入到Clickhouse

2,745 阅读1分钟

加入依赖

        <!-- clickhouse -->
        <dependency>
            <groupId>com.github.housepower</groupId>
            <artifactId>clickhouse-native-jdbc-shaded</artifactId>
            <version>2.5.4</version>
        </dependency>

Demo

/**
 * @Description: TODO
 * @author: XinXing
 * @date:2021/3/23 16:14
 */
object Demo extends SparkBase {
  def main(args: Array[String]): Unit = {
    val sparksession: SparkSession = getContext("Demo")

    val sql =
      """
        |select
        |    *
        |from dw.dw_xxxxxxx_172
        |""".stripMargin
    val dataFrame = sparksession.sql(sql)

    dataFrame
      .write
      .format("jdbc")
      .option("driver", "com.github.housepower.jdbc.ClickHouseDriver")
      //rewriteBatchedStatements是批量提交,视情况选择
      .option("url", "jdbc:clickhouse://127.0.0.1:9000?rewriteBatchedStatements=true")
      .option("batchsize", 10000)
      .option("user", "default")
      .option("password", "123456")
      .option("dbtable", "cli_test")
      //写入模式为Overwrite需增加该参,SparkSQL官网有详细参数解释
      .option("truncate", "true")
      .option("createTableOptions", "ENGINE=Log()")
      .option("isolationLevel", "NONE")
      .mode(SaveMode.Overwrite)
      .save()

    sparksession.stop()
  }
}