最近SQL实时调度当中有个需求要解析嵌套子查询当中的表名 由于原来dfs+剪枝的双递归方式需要自己解析递归类型,对于上游自定义算子的编排有些复杂.所以调研了下calcite的visitor ,其实跟anltr4的解法也差不多 = = 目前生产环境已可用,先分享出来.之后再补注释
SQL切分地址: github.com/kaori-seaso…
添加依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-sql-parser</artifactId>
<version>1.14.4</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>1.14.4</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge</artifactId>
<version>1.15.1</version>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<version>1.26.0</version>
<scope>compile</scope>
</dependency>
编写visitor
public class StatementVisitor extends SqlBasicVisitor<Void> {
List<String> sourceList = new ArrayList<>();
//设置解析嵌套子查询时是否遇到from
boolean isInFrom = false;
@Override
public Void visit(SqlCall call) {
switch (call.getKind()) {
case SELECT:
return visit((SqlSelect) call);
case JOIN:
return visit((SqlJoin) call);
default:
return super.visit(call);
}
}
public Void visit(SqlSelect select) {
boolean isInFromBackup = isInFrom;
for (SqlNode child : select.getOperandList()) {
if (child == null) continue;
// 如果子查询解析到from关键词 ,可能会存在子查询中嵌套子查询的情况,返回visitor实例,恢复状态
isInFrom = select.getFrom() == child;
child.accept(this);
}
isInFrom = isInFromBackup;
return null;
}
Void visit(SqlJoin join) {
boolean isInFromBackup = isInFrom;
for (SqlNode child : join.getOperandList()) {
if (child == null) continue;
//如果解析到join的连接条件, 不处理,继续向下寻找其他谓词节点
if (child == join.getCondition()) {
isInFrom = false;
}
// 返回vistor实例,继续向下递归
child.accept(this);
if (child == join.getCondition()) {
isInFrom = isInFromBackup;
}
}
return null;
}
@Override
public Void visit(SqlIdentifier id) {
//SqlIdentifier为当前 一些变量信息,比如字段名或者表名,由于拜访者模式处于一直在递归的状态。所以解析遇到from节点则开始收集
if (isInFrom) {
// it's table name
sourceList.add(id.toString());
} else {
// it's field name
}
return super.visit(id);
}
public List<String> getSourceList() {
return sourceList;
}
}
单元测试
@Test
public void test1() {
String statement = " insert into xxx ( select sss from ( select * from ggg ) left join yyyy1 on xxx1.id = yyyy1.id union all select sss from yyyy2 left join xxxx2 on xxxx2.id = yyyy2.id ) ";
SqlParser.Config config = StatementParser.createSqlParserConfig(true);
SqlParser sqlParser = SqlParser.create(statement, config);
SqlNodeList sqlNodes = null;
SqlNode node = null;
try {
sqlNodes = sqlParser.parseStmtList();
if (sqlNodes.size() != 1) {
throw new SqlParseException("Only single statement is supported now");
}
node = sqlNodes.get(0);
StatementVisitor visitor = new StatementVisitor();
node.accept(visitor);
// ggg, yyyy1, yyyy2, xxxx2
System.out.println(visitor.getSourceList());
} catch (org.apache.calcite.sql.parser.SqlParseException e) {
}
}
Continue...下班有时间再更新