POM依赖
<!-- https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8 -->
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>21.1.0.0</version>
</dependency>
使用jdbc连接oracle时url有三种格式
- jdbc:oracle:thin:@host:port:SID
- Example: jdbc:oracle:thin:@localhost:1521:orcl
- jdbc:oracle:thin:@//host:port/service_name
- Example:jdbc:oracle:thin:@//localhost:1521/orcl.city.com
- jdbc:oracle:thin:@TNSName
- Example: jdbc:oracle:thin:@TNS_ALIAS_NAME
Demo
SparkSQL参数参考:spark.apache.org/docs/2.4.4/…
#配置文件
#idmToHuisheng.url=jdbc:mysql://127.0.0.1:3306/dbName?characterEncoding=utf-8&useSSL=false&rewriteBatchedStatements=true
#idmToHuisheng.user=xxxx
#idmToHuisheng.passwd=xxxx
#idmToHuisheng.oracle.driver=oracle.jdbc.driver.OracleDriver
#idmToHuisheng.oracle.url=jdbc:oracle:thin:@127.0.0.1:1521/dsmtest
#idmToHuisheng.oracle.user=xxx
#idmToHuisheng.oracle.passwd=xxx
def getContext(name: String): SparkSession =
SparkSession
.builder()
.appName(name)
.master("local[*]")
.enableHiveSupport()
.getOrCreate()
def executor(sparksession: SparkSession): Unit ={
//Oracle连接配置文件
val properties = new Properties()
properties.setProperty("user", Props.get("idmToHuisheng.oracle.user"))
properties.setProperty("password", Props.get("idmToHuisheng.oracle.passwd"))
properties.setProperty("driver", Props.get("idmToHuisheng.oracle.driver"))
sparksession
.read
.format("jdbc")
.option("url", Props.get("idmToHuisheng.url"))
.option("dbtable", "idm_data_jobs")
.option("user", Props.get("idmToHuisheng.user"))
.option("password", Props.get("idmToHuisheng.passwd"))
.option("truncate", "true")
.option("batchsize","10000")
.load()
.withColumn("dt", lit(TimeFormatter.Y4M2D2.now))
.write
.mode(SaveMode.Overwrite)
.option("truncate", "true")
.jdbc(Props.get("idmToHuisheng.oracle.url"),"ODS.MTM_IDM_DATA_JOBS",properties)
}
def main(args: Array[String]): Unit = {
val ss = getContext(this.getClass.getName)
executor(ss)
ss.stop()
}
不想抽全表,希望使用query来抽取部分数据或Column
- (
SQL语句
) temp
sparkSession
.read
.format("jdbc")
.option("url", "jdbc:oracle:thin:@10.0.254.0:1521/服务名")
.option("driver", "oracle.jdbc.driver.OracleDriver")
.option("dbtable", "(select col1,col2 from lala) temp")
.option("user", "ETL_READ")
.option("password", "ETL123456")
.load()