大数据自定义函数udf,程序员进阶

43 阅读1分钟

<java.version>1.8</java.version>

<lombok.version>1.16.18</lombok.version>

<fastjson.version>1.2.4</fastjson.version>

<commons.version>3.5</commons.version>

<slf4j.version>1.7.7</slf4j.version>

<log4j.version>1.2.17</log4j.version>

<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

<maven.compiler.source>1.8</maven.compiler.source>

<maven.compiler.target>1.8</maven.compiler.target>

<scala.version>2.11.7</scala.version>

<hadoop.version>2.7.0</hadoop.version>

<spark.version>2.2.0</spark.version>

org.apache.hadoop

hadoop-common

2.6.0

org.apache.hadoop

hadoop-client

2.6.0

org.apache.hive

hive-jdbc

2.3.1

org.apache.hive

hive-metastore

2.3.1

org.apache.hive

hive-exec

2.3.1

dom4j

dom4j

1.5.2

org.apache.maven.plugins

maven-compiler-plugin

3.3

${java.version}

${java.version}

${java.version}

UTF-8

false

maven-assembly-plugin

make-assembly

package

assembly

com.dazhen.udf.encryption.EncrHive

jar-with-dependencies

${session.executionRootDirectory}/target/

2,实现hive中的udf

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.hive.ql.exec.Description;

import org.apache.hadoop.hive.ql.exec.UDF;

import utils.AESUtil;

/**

  • @author water

  • @desc AES 加密

*/

@Description(name = "AESEncr", value = "AESEncr context")

public class AESEncr extends UDF {

/**

  • 根据password,对content进行AES加密,最后返回结果

  • @param password

  • @param content

  • @return

  • @throws Exception

*/

public String evaluate(String password, String content) throws Exception {

if(StringUtils.isBlank(content)){

return null;

}

String result;

try {

result = AESUtil.aesEncrypt(password, content);

return result;

} catch (Exception e) {

throw e;

}

}

}

3,打包

mvn clean assembly:assembly -DskipTests

#打包后在target文件夹会出现 xxx-1.0.0-RELEASE-jar-with-dependencies.jar

4,部署jar

#1将上面打包的jar放到hdfs上某路径下

hdfs dfs -put -f(强制更新) xxx-1.0.0-RELEASE-jar-with-dependencies.jar(jar位置) /user/xxx/(hdfs路径)

hdfs dfs -put -f xxx-1.0.0-RELEASE-jar-with-dependencies.jar /user/xxx/

#2 进入hive或者sparkSQL,将函数注册

#注册有分为永久函数,和临时函数

##永久方法,需要指定一个数据库名

##CREATE FUNCTION [db_name.]function_name AS class_name

[USING JAR|FILE|ARCHIVE'file_uri' [, JAR|FILE|ARCHIVE'file_uri'] ];

create function udf.aesEncrypt as 'com.xxx.xxx.xxx.AESEncr' using jar 'hdfs:///user/xxx/xxx-1.0.0-RELEASE-jar-with-dependencies.jar';

##临时方法,作用域为当前session

create temporary function encr as 'EncrHive'

img img img

既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!

由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新

需要这份系统化资料的朋友,可以戳这里获取