calcite使用的一点思考

687 阅读1分钟

最近SQL实时调度当中有个需求要解析嵌套子查询当中的表名 由于原来dfs+剪枝的双递归方式需要自己解析递归类型,对于上游自定义算子的编排有些复杂.所以调研了下calcite的visitor ,其实跟anltr4的解法也差不多 = = 目前生产环境已可用,先分享出来.之后再补注释

SQL切分地址: github.com/kaori-seaso…

添加依赖

<dependency>
  <groupId>org.apache.flink</groupId>
  <artifactId>flink-sql-parser</artifactId>
  <version>1.14.4</version>
</dependency>

<dependency>
  <groupId>org.apache.flink</groupId>
  <artifactId>flink-table-planner_2.12</artifactId>
  <version>1.14.4</version>
</dependency>

<dependency>
  <groupId>org.apache.flink</groupId>
  <artifactId>flink-table-api-java-bridge</artifactId>
  <version>1.15.1</version>
</dependency>
<dependency>
    <groupId>org.apache.calcite</groupId>
    <artifactId>calcite-core</artifactId>
    <version>1.26.0</version>
    <scope>compile</scope>
</dependency>

编写visitor

public class StatementVisitor extends SqlBasicVisitor<Void> {

    List<String> sourceList = new ArrayList<>();

    //设置解析嵌套子查询时是否遇到from
    boolean isInFrom = false;

    @Override
    public Void visit(SqlCall call) {
        switch (call.getKind()) {
            case SELECT:
                return visit((SqlSelect) call);
            case JOIN:
                return visit((SqlJoin) call);
            default:
                return super.visit(call);
        }
    }

    public Void visit(SqlSelect select) {
        boolean isInFromBackup = isInFrom;
        for (SqlNode child : select.getOperandList()) {
            if (child == null) continue;
            // 如果子查询解析到from关键词 ,可能会存在子查询中嵌套子查询的情况,返回visitor实例,恢复状态
            isInFrom = select.getFrom() == child;
            child.accept(this);
        }
        isInFrom = isInFromBackup;
        return null;
    }

    Void visit(SqlJoin join) {
        boolean isInFromBackup = isInFrom;
        for (SqlNode child : join.getOperandList()) {
            if (child == null) continue;
            //如果解析到join的连接条件, 不处理,继续向下寻找其他谓词节点
            if (child == join.getCondition()) {
                isInFrom = false;
            }
            // 返回vistor实例,继续向下递归
            child.accept(this);
            if (child == join.getCondition()) {
                isInFrom = isInFromBackup;
            }
        }
        return null;
    }

    @Override
    public Void visit(SqlIdentifier id) {
        //SqlIdentifier为当前 一些变量信息,比如字段名或者表名,由于拜访者模式处于一直在递归的状态。所以解析遇到from节点则开始收集
        if (isInFrom) {
            // it's table name
            sourceList.add(id.toString());
        } else {
            // it's field name
        }
        return super.visit(id);
    }

    public List<String> getSourceList() {
        return sourceList;
    }


}

单元测试

@Test
public void test1() {
    String statement = " insert into xxx ( select sss from ( select * from ggg ) left join yyyy1 on xxx1.id = yyyy1.id union all select sss from yyyy2 left join xxxx2 on xxxx2.id = yyyy2.id ) ";
    SqlParser.Config config = StatementParser.createSqlParserConfig(true);
    SqlParser sqlParser = SqlParser.create(statement, config);
    SqlNodeList sqlNodes = null;
    SqlNode node = null;
    try {

        sqlNodes = sqlParser.parseStmtList();
        if (sqlNodes.size() != 1) {
            throw new SqlParseException("Only single statement is supported now");
        }
        node = sqlNodes.get(0);
        StatementVisitor visitor = new StatementVisitor();
        node.accept(visitor);
        // ggg, yyyy1, yyyy2, xxxx2
        System.out.println(visitor.getSourceList());

    } catch (org.apache.calcite.sql.parser.SqlParseException e) {

    }
}

Continue...下班有时间再更新