hive使用UDF解析IP

1,282 阅读1分钟

数据库采用Maxmind新版本的GeoLite2-City.mmdb

官方地址

package com.hoolai.bi;

import com.maxmind.geoip2.DatabaseReader;
import com.maxmind.geoip2.model.CityResponse;
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;

import java.io.File;
import java.io.IOException;
import java.net.InetAddress;

/**
 * @description:
 * @author: Ksssss(chenlin @ hoolai.com)
 * @time: 2019-10-21 15:38
 */

@Description(name = "ip_address",
        value = "_FUNC_(ip,liberarys) - Find the address of the ip from the database")
public class IPToAddress extends GenericUDF {
    PrimitiveObjectInspector[] argumentOIs;
    String fileAddress = "";
    String ipAddress = "";

    /**
     * 确定参数的返回类型
     * @param objectInspectors
     * @return
     * @throws UDFArgumentException
     */
    @Override
    public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
        argumentOIs = new PrimitiveObjectInspector[objectInspectors.length];
        if (objectInspectors.length != 2) {
            throw new UDFArgumentLengthException(
                    "The function GenericUDFGeoIP( 'input', 'datafile' ) "
                            + " accepts 2 arguments.");
        }

        if (!(objectInspectors[0] instanceof StringObjectInspector) && !(objectInspectors[0] instanceof LongObjectInspector)) {
            throw new UDFArgumentTypeException(0,
                    "The first 2 parameters of GenericUDFGeoIP('input', 'resultfield', 'datafile')"
                            + " should be string.");
        }
        argumentOIs[0] = (PrimitiveObjectInspector) objectInspectors[0];

        for (int i = 1; i < objectInspectors.length; i++) {
            if (!(objectInspectors[i] instanceof StringObjectInspector)) {
                throw new UDFArgumentTypeException(i,
                        "The first 2 parameters of GenericUDFGeoIP('input', 'resultfield', 'datafile')"
                                + " should be string.");
            }
            argumentOIs[i] = (StringObjectInspector) objectInspectors[i];
        }
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.STRING);
    }

    /**
     * 获取参数并执行查询
     * @param deferredObjects
     * @return
     * @throws HiveException
     */
    @Override
    public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {

        if (argumentOIs[0] instanceof StringObjectInspector) {
            ipAddress = ((StringObjectInspector) argumentOIs[0]).getPrimitiveJavaObject(deferredObjects[0].get());
        }

        if (argumentOIs[1] instanceof StringObjectInspector) {
            fileAddress = ((StringObjectInspector) argumentOIs[1]).getPrimitiveJavaObject(deferredObjects[1].get());
        }

        File database = new File(fileAddress);
        DatabaseReader reader = null;
        InetAddress ip = null;
        CityResponse response = null;

        try {
            reader = new DatabaseReader.Builder(database).build();
            ip = InetAddress.getByName(ipAddress);
            response = reader.city(ip);
        } catch (Exception e) {
            e.printStackTrace();
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e1) {
                    e.printStackTrace();
                    return null;
                }
            }
            return null;
        }
        String country = response.getCountry().getNames().get("zh-CN");
        String province = response.getMostSpecificSubdivision().getNames().get("zh-CN");
        String city = response.getCity().getNames().get("zh-CN");
        String result = String.format("%s,%s,%s", country, province, city);
        return result;
    }

    @Override
    public String getDisplayString(String[] strings) {
        assert (strings.length == 2);
        return "GenericUDFGeoIP ( " + strings[0] + ", " + strings[1] + ")";
    }
}