HiveSQL 校验

4,275 阅读4分钟

1.问题分析

输入一个hiveSQL,校验语义,语法是否正确,并校验在数据是否存在查询的表及字段

  • 1.1 HiveSQL 语义校验

          如:select * from table1 where id = 1 and class = '英语'
          以上语法属于正确语法. 如果将  **'英语'**去掉变为
          select * from table1 where id = 1 and class =
          语法将error
    
  • 1.2 HiveSQL 语法校验

      将人的语言规则为语法校验规则,如需要取表中的字段socre 和 name
      (1)select * from table 虽然语义无错误,但是没有人为规定的score 和name.则错误
    
  • 1.3 HiveSQL中的校验

      当数据库中的表中没有sex字段时候
      
      如select score ,name from table2 where sex = 2;
      (1)满足HiveSQL语法规则
      (2)满足人为语法规则 score 和name
      (3)但是数据库中没有sex 字段.
      满足(1)(2)不满足(3).校验失败
    

2.源码解析

(1)方案一:借用阿里连接池Druid中的SQLUtils 方法

  • 能够完成语法和语义校验.但是不能校验数据库中是否有查询的字段和表明
  • 源码略

(2)方案二:与hive进行通信进行SQL校验

  • 利用hive服务端进行SQL校验
  • 源码如下

在hive-jdbc-1.1.0中找到类 HiveStatement.class

 public boolean execute(String sql) throws SQLException {
        this.checkConnection("execute");
        this.reInitState();
        
//验证SQL
        TExecuteStatementReq execReq = new TExecuteStatementReq(this.sessHandle, sql);
        execReq.setRunAsync(true);
        execReq.setConfOverlay(this.sessConf);
        execReq.setQueryTimeout((long)this.queryTimeout);

        try {
            TExecuteStatementResp execResp = this.client.ExecuteStatement(execReq);
            Utils.verifySuccessWithInfo(execResp.getStatus());
            this.stmtHandle = execResp.getOperationHandle();
            this.isExecuteStatementFailed = false;
        } catch (SQLException var9) {
            this.isExecuteStatementFailed = true;
            this.isLogBeingGenerated = false;
            throw var9;
        } catch (Exception var10) {
            this.isExecuteStatementFailed = true;
            this.isLogBeingGenerated = false;
            throw new SQLException(var10.toString(), "08S01", var10);
        }

//执行SQL
        TGetOperationStatusReq statusReq = new TGetOperationStatusReq(this.stmtHandle);
        boolean operationComplete = false;

        while(!operationComplete) {
            try {
                TGetOperationStatusResp statusResp = this.client.GetOperationStatus(statusReq);
                Utils.verifySuccessWithInfo(statusResp.getStatus());
                if (statusResp.isSetOperationState()) {
                    switch(statusResp.getOperationState()) {
                    case CLOSED_STATE:
                    case FINISHED_STATE:
                        operationComplete = true;
                        break;
                    case CANCELED_STATE:
                        throw new SQLException("Query was cancelled", "01000");
                    case TIMEDOUT_STATE:
                        throw new SQLTimeoutException("Query timed out after " + this.queryTimeout + " seconds");
                    case ERROR_STATE:
                        throw new SQLException(statusResp.getErrorMessage(), statusResp.getSqlState(), statusResp.getErrorCode());
                    case UKNOWN_STATE:
                        throw new SQLException("Unknown query", "HY000");
                    case INITIALIZED_STATE:
                    case PENDING_STATE:
                    case RUNNING_STATE:
                    }
                }
            } catch (SQLException var7) {
                this.isLogBeingGenerated = false;
                throw var7;
            } catch (Exception var8) {
                this.isLogBeingGenerated = false;
                throw new SQLException(var8.toString(), "08S01", var8);
            }
        }

        this.isLogBeingGenerated = false;
        if (!this.stmtHandle.isHasResultSet()) {
            return false;
        } else {
            this.resultSet = (new Builder(this)).setClient(this.client).setSessionHandle(this.sessHandle).setStmtHandle(this.stmtHandle).setMaxRows(this.maxRows).setFetchSize(this.fetchSize).setScrollable(this.isScrollableResultset).build();
            return true;
        }
    }
  • 其中
    //sessHandle这个对象,有很多关于与hiveServer通信的属性值.如:sessionID
    //sql 是传进的SQL(String类型)
 TExecuteStatementReq execReq = new TExecuteStatementReq(this.sessHandle, sql);
        //异步查询方式开启(true)
        execReq.setRunAsync(true);
        execReq.setConfOverlay(this.sessConf);
        //设置查询时间
        execReq.setQueryTimeout((long)this.queryTimeout);

        try {
        //用客户端执行请求execReq,返回对象Resp
            TExecuteStatementResp execResp = this.client.ExecuteStatement(execReq);
        //用Utils包将返回来的对象(含有返回的信息)进行验证如果有错误就会抛出异常.
            Utils.verifySuccessWithInfo(execResp.getStatus());//(已经结束验证)
            
            this.stmtHandle = execResp.getOperationHandle();
            this.isExecuteStatementFailed = false;
        } catch (SQLException var9) {
            this.isExecuteStatementFailed = true;
            this.isLogBeingGenerated = false;
            throw var9;
        } catch (Exception var10) {
            this.isExecuteStatementFailed = true;
            this.isLogBeingGenerated = false;
            throw new SQLException(var10.toString(), "08S01", var10);
        }

3.实现(第三种方法)

package com.xes.abtest.hive.common.service.impl;

import com.alibaba.druid.pool.DruidDataSource;
import com.alibaba.druid.pool.DruidPooledStatement;
import com.alibaba.druid.proxy.jdbc.StatementProxyImpl;
import com.xes.abtest.hive.common.api.ApiResult;
import com.xes.abtest.hive.common.service.HiveSqlVerifyService;
import org.apache.hive.jdbc.HiveConnection;
import org.apache.hive.jdbc.HiveStatement;
import org.apache.hive.service.cli.thrift.*;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.jdbc.datasource.DataSourceUtils;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.lang.reflect.Field;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Map;

/**
 * @author:Satan
 * @date :14:23
 * @description :
 */
@Component
public class HiveSqlVerifyServiceImpl implements HiveSqlVerifyService {

//先要在@Configuration中配置DruidDataSource 连接池配置好
    @Resource
    @Qualifier("hiveDruidDataSource")
    DruidDataSource druidDataSource;


//    @Resource(name = "getDruidPooledConnection")
//    DruidPooledConnection druidPooledConnection;



    @Override
    public ApiResult sqlVerify(String sql) throws SQLException {
//throws SQLException, NoSuchFieldException, IllegalAccessException, TException
        HiveConnection connection = null;
        HiveStatement statement = null;
        TExecuteStatementResp execResp = null;
        TExecuteStatementReq execReq = null;
        //拿到连接
        Connection con = DataSourceUtils.getConnection(druidDataSource);

        //以下是利用反射,找到我需要的类和参数.
        try {

          ((DruidPooledStatement)druidPooledConnection.createStatement()).getStatement();
            Statement stat = con.createStatement();

            stat = ((DruidPooledStatement) stat).getStatement();
            Field prox = StatementProxyImpl.class.getDeclaredField("statement");
            prox.setAccessible(true);

             statement = (HiveStatement)prox.get(stat);
          

          (HiveConnection)druidDataSource.getConnection().getConnection();


            Field hiveSessHandle = HiveStatement.class.getDeclaredField("sessHandle");
            hiveSessHandle.setAccessible(true);
            TSessionHandle sessHandle = (TSessionHandle) hiveSessHandle.get(statement);
            Field hiveClient = HiveStatement.class.getDeclaredField("client");
            hiveClient.setAccessible(true);
            TCLIService.Iface client = (TCLIService.Iface) hiveClient.get(statement);
            Field sessConf = HiveStatement.class.getDeclaredField("sessConf");
            sessConf.setAccessible(true);


            Map<String, String> o = (Map<String, String>)sessConf.get(statement);

            execReq = new TExecuteStatementReq(sessHandle, sql);
            execReq.setRunAsync(true);
            execReq.setQueryTimeout(0L);
            execReq.setConfOverlay(o);

            execResp = client.ExecuteStatement(execReq);
        } catch (Exception e) {


            e.printStackTrace();
            throw new SQLException("sqlVerify异常");
        }finally {

        //利用工具释放释放连接
            DataSourceUtils.releaseConnection(con,druidDataSource);



        }
        boolean equals = execResp.getStatus().getStatusCode().equals(TStatusCode.SUCCESS_STATUS);

            if(equals){
                //返回统一的类型
                return ApiResult.ok();
            }else {
                return ApiResult.fail(execResp.getStatus().getErrorMessage());
            }


        }


}



  • Hive的Druid连接池配置.
package com.xes.abtest.hive.common.config;


import com.alibaba.druid.pool.DruidDataSource;
import com.alibaba.druid.pool.DruidPooledConnection;
import com.xes.abtest.hive.common.util.JdbcTemplateEdit;
import lombok.extern.slf4j.Slf4j;
import org.mybatis.spring.annotation.MapperScan;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.jdbc.core.JdbcTemplate;

import javax.sql.DataSource;
import java.sql.SQLException;


/**
 * @author:Satan
 * @date :14:11
 * @description :
 */
@Slf4j
@Configuration
@EnableConfigurationProperties({DataSourceProperties.class,DataSourceCommonProperties.class})
@MapperScan(basePackages ="com.xes.abtest.hive.common.mapper"/*, sqlSessionFactoryRef = "slaveSqlSessionFactory"*/)
public class HiveDruidConfig {
    public static DruidPooledConnection druidPooledConnection = null;
    private  Logger logger = LoggerFactory.getLogger(HiveDruidConfig.class);


    @Autowired
    private DataSourceProperties dataSourceProperties;

    @Autowired
    private DataSourceCommonProperties dataSourceCommonProperties;






    @Bean("hiveDruidDataSource") //新建bean实例
    @Qualifier("hiveDruidDataSource")//标识
    //
    public DruidDataSource/*DataSource*/ dataSource(){



        DruidDataSource datasource = new DruidDataSource();

        //配置数据源属性
        datasource.setUrl(dataSourceProperties.getHive().get("url"));
        datasource.setUsername(dataSourceProperties.getHive().get("username"));
        datasource.setPassword(dataSourceProperties.getHive().get("password"));
        datasource.setDriverClassName(dataSourceProperties.getHive().get("driver-class-name"));

        //配置统一属性
        datasource.setInitialSize(dataSourceCommonProperties.getInitialSize());
        datasource.setMinIdle(dataSourceCommonProperties.getMinIdle());
        datasource.setMaxActive(dataSourceCommonProperties.getMaxActive());
        datasource.setMaxWait(dataSourceCommonProperties.getMaxWait());
        datasource.setTimeBetweenEvictionRunsMillis(dataSourceCommonProperties.getTimeBetweenEvictionRunsMillis());
        datasource.setMinEvictableIdleTimeMillis(dataSourceCommonProperties.getMinEvictableIdleTimeMillis());
        datasource.setValidationQuery(dataSourceCommonProperties.getValidationQuery());
        datasource.setTestWhileIdle(dataSourceCommonProperties.isTestWhileIdle());
        datasource.setTestOnBorrow(dataSourceCommonProperties.isTestOnBorrow());
        datasource.setTestOnReturn(dataSourceCommonProperties.isTestOnReturn());
        datasource.setPoolPreparedStatements(dataSourceCommonProperties.isPoolPreparedStatements());

        try {
            datasource.setFilters(dataSourceCommonProperties.getFilters());
        } catch (SQLException e) {
            logger.error("Druid configuration initialization filter error.", e);
        }
        return datasource;
    }

    /**
     * 该方法仅在需要使用JdbcTemplate对象时选用
     *
     * @param dataSource 注入名为primaryDataSource的bean
     * @return 数据源JdbcTemplate对象
     */


//    @Bean(name = "slaveSqlSessionFactory")
//    public SqlSessionFactory sqlSessionFactory(@Qualifier("hiveDruidDataSource") DataSource dataSource) throws Exception {
//        MybatisSqlSessionFactoryBean sessionFactoryBean = new MybatisSqlSessionFactoryBean();
//        //sessionFactoryBean.setMapperLocations(new Resource[]{new ClassPathResource("mapper/*Mapper.xml")});
//        sessionFactoryBean.setDataSource(dataSource);
//
//        return sessionFactoryBean.getObject();
//    }

    @Bean(name = "primaryJdbcTemplate")
    public JdbcTemplate jdbcTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
        return new JdbcTemplate(dataSource);
    }



    @Bean("hiveDruidDataSource1") //新建bean实例
    @Qualifier("hiveDruidDataSource1")//标识
    //
    public DruidDataSource/*DataSource*/ dataSource1(){
        DruidDataSource datasource = new DruidDataSource();
        datasource.setUrl(dataSourceProperties.getHive().get("url"));
        datasource.setUsername(dataSourceProperties.getHive().get("username"));
        datasource.setPassword(dataSourceProperties.getHive().get("password"));
        datasource.setDriverClassName(dataSourceProperties.getHive().get("driver-class-name"));
        return datasource;
    }


    @Bean("getJdbcTemplateEdit") //新建bean实例
    @Qualifier("getJdbcTemplateEdit")//标识
    public JdbcTemplateEdit/*DataSource*/ getJdbcTemplateEdit(@Qualifier("hiveDruidDataSource") DruidDataSource dataSource){


        return  new JdbcTemplateEdit(dataSource);
    }




    @Bean("getDruidPooledConnection" ) //新建bean实例
    @Qualifier("getDruidPooledConnection")//标识
    //@Scope(ConfigurableBeanFactory.r)
//    @Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
    public DruidPooledConnection/*DataSource*/ getDruidPooledConnection(@Qualifier("hiveDruidDataSource") DruidDataSource dataSource){

        DruidPooledConnection connection = null;
        try {
            connection = dataSource.getConnection();
            druidPooledConnection = connection;
        } catch (SQLException e) {
            e.printStackTrace();
        }

        return connection;
    }


}