SparkSQL外部数据源读写Oracle

888 阅读1分钟

POM依赖

        <!-- https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8 -->
        <dependency>
            <groupId>com.oracle.database.jdbc</groupId>
            <artifactId>ojdbc8</artifactId>
            <version>21.1.0.0</version>
        </dependency>

使用jdbc连接oracle时url有三种格式

  • jdbc:oracle:thin:@host:port:SID
    • Example: jdbc:oracle:thin:@localhost:1521:orcl
  • jdbc:oracle:thin:@//host:port/service_name
    • Example:jdbc:oracle:thin:@//localhost:1521/orcl.city.com
  • jdbc:oracle:thin:@TNSName
    • Example: jdbc:oracle:thin:@TNS_ALIAS_NAME

Demo

SparkSQL参数参考:spark.apache.org/docs/2.4.4/…

#配置文件
#idmToHuisheng.url=jdbc:mysql://127.0.0.1:3306/dbName?characterEncoding=utf-8&useSSL=false&rewriteBatchedStatements=true
#idmToHuisheng.user=xxxx
#idmToHuisheng.passwd=xxxx
#idmToHuisheng.oracle.driver=oracle.jdbc.driver.OracleDriver
#idmToHuisheng.oracle.url=jdbc:oracle:thin:@127.0.0.1:1521/dsmtest
#idmToHuisheng.oracle.user=xxx
#idmToHuisheng.oracle.passwd=xxx

  def getContext(name: String): SparkSession =
    SparkSession
      .builder()
      .appName(name)
      .master("local[*]")
      .enableHiveSupport()
      .getOrCreate()
      
def executor(sparksession: SparkSession): Unit ={
    //Oracle连接配置文件
    val properties = new Properties()
    properties.setProperty("user", Props.get("idmToHuisheng.oracle.user"))
    properties.setProperty("password", Props.get("idmToHuisheng.oracle.passwd"))
    properties.setProperty("driver", Props.get("idmToHuisheng.oracle.driver"))
    
    sparksession
                .read
                .format("jdbc")
                .option("url", Props.get("idmToHuisheng.url"))
                .option("dbtable", "idm_data_jobs")
                .option("user", Props.get("idmToHuisheng.user"))
                .option("password", Props.get("idmToHuisheng.passwd"))
                .option("truncate", "true")
                .option("batchsize","10000")
                .load()
                  .withColumn("dt", lit(TimeFormatter.Y4M2D2.now))
                  .write
                  .mode(SaveMode.Overwrite)
                  .option("truncate", "true")
                  .jdbc(Props.get("idmToHuisheng.oracle.url"),"ODS.MTM_IDM_DATA_JOBS",properties)
}
      
def main(args: Array[String]): Unit = {
    val ss = getContext(this.getClass.getName)
    executor(ss)
    ss.stop()
  }

不想抽全表,希望使用query来抽取部分数据或Column

  • ( SQL语句 ) temp
sparkSession
.read
.format("jdbc")
.option("url", "jdbc:oracle:thin:@10.0.254.0:1521/服务名")
.option("driver", "oracle.jdbc.driver.OracleDriver")
.option("dbtable", "(select col1,col2 from lala) temp")
.option("user", "ETL_READ")
.option("password", "ETL123456")
.load()