Skip to content

Commit

Permalink
HIVE-28720: EXPLAIN CBO is empty when hive.cbo.returnpath.hiveop is t…
Browse files Browse the repository at this point in the history
…rue (Stamatis Zampetakis reviewed by Krisztian Kasa)

Closes #5619
  • Loading branch information
zabetak authored Jan 29, 2025
1 parent 544c8b3 commit 4f22ecd
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 38 deletions.
74 changes: 36 additions & 38 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -590,15 +590,16 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept
if (cboCtx.type == PreCboCtx.Type.VIEW && !materializedView) {
throw new SemanticException("Create view is not supported in cbo return path.");
}
newPlan =
PlanModifierForReturnPath.convertOpTree(newPlan, resultSchema, this.getQB().getTableDesc() != null);
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after return path modifier:\n" + RelOptUtil.toString(newPlan));
}
sinkOp = getOptimizedHiveOPDag(newPlan);
if (oldHints.size() > 0) {
LOG.debug("Propagating hints to QB: " + oldHints);
getQB().getParseInfo().setHintList(oldHints);
}
LOG.info("CBO Succeeded; optimized logical plan.");

this.ctx.setCboInfo(getOptimizedByCboInfo());
this.ctx.setCboSucceeded(true);
} else {
// 1. Convert Plan to AST
ASTNode newAST = getOptimizedAST(newPlan);
Expand Down Expand Up @@ -649,39 +650,40 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept

disableJoinMerge = defaultJoinMerge;
sinkOp = genPlan(getQB());
LOG.info("CBO Succeeded; optimized logical plan.");

this.ctx.setCboInfo(getOptimizedByCboInfo());
this.ctx.setCboSucceeded(true);
if (this.ctx.isExplainPlan()) {
// Enrich explain with information derived from CBO
ExplainConfiguration explainConfig = this.ctx.getExplainConfig();
if (explainConfig.isCbo()) {
if (!explainConfig.isCboJoinCost()) {
// Include cost as provided by Calcite
newPlan.getCluster().invalidateMetadataQuery();
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
}
if (explainConfig.isFormatted()) {
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
} else if (explainConfig.isCboCost() || explainConfig.isCboJoinCost()) {
this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan, SqlExplainLevel.ALL_ATTRIBUTES));
} else {
// Do not include join cost
this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan));
}
} else if (explainConfig.isFormatted()) {
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
} else if (explainConfig.isExtended()) {
this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
}
}
if (LOG.isTraceEnabled()) {
LOG.trace(getOptimizedSql(newPlan));
LOG.trace(newAST.dump());
}
}
LOG.info("CBO Succeeded; optimized logical plan.");
this.ctx.setCboInfo(getOptimizedByCboInfo());
this.ctx.setCboSucceeded(true);
if (this.ctx.isExplainPlan()) {
// Enrich explain with information derived from CBO
ExplainConfiguration explainConfig = this.ctx.getExplainConfig();
if (explainConfig.isCbo()) {
if (!explainConfig.isCboJoinCost()) {
// Include cost as provided by Calcite
newPlan.getCluster().invalidateMetadataQuery();
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
}
if (explainConfig.isFormatted()) {
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
} else if (explainConfig.isCboCost() || explainConfig.isCboJoinCost()) {
this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan, SqlExplainLevel.ALL_ATTRIBUTES));
} else {
// Do not include join cost
this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan));
}
} else if (explainConfig.isFormatted()) {
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
} else if (explainConfig.isExtended()) {
this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
}
}
if (LOG.isTraceEnabled()) {
LOG.trace(getOptimizedSql(newPlan));
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.GENERATE_OPERATOR_TREE);
} catch (Exception e) {
LOG.error("CBO failed, skipping CBO. ", e);
Expand Down Expand Up @@ -1415,12 +1417,8 @@ ASTNode getOptimizedAST(RelNode optimizedOptiqPlan) throws SemanticException {
* @throws SemanticException
*/
Operator getOptimizedHiveOPDag(RelNode optimizedOptiqPlan) throws SemanticException {
RelNode modifiedOptimizedOptiqPlan = PlanModifierForReturnPath.convertOpTree(
optimizedOptiqPlan, resultSchema, this.getQB().getTableDesc() != null);

LOG.debug("Translating the following plan:\n" + RelOptUtil.toString(modifiedOptimizedOptiqPlan));
Operator<?> hiveRoot = new HiveOpConverter(this, conf, unparseTranslator, topOps)
.convert(modifiedOptimizedOptiqPlan);
.convert(optimizedOptiqPlan);
RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB());
opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR));
String dest = getQB().getParseInfo().getClauseNames().iterator().next();
Expand Down
13 changes: 13 additions & 0 deletions ql/src/test/queries/clientpositive/cbo_rp_explain.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set hive.cbo.returnpath.hiveop=true;

CREATE TABLE author (id INT, fname STRING, lname STRING, birth DATE);
CREATE TABLE book (id INT, title STRING, author INT);

EXPLAIN CBO
SELECT lname, MAX(birth) FROM author GROUP BY lname;

EXPLAIN CBO
SELECT author.lname, book.title
FROM author
INNER JOIN book ON author.id=book.author
WHERE author.fname = 'Victor';
61 changes: 61 additions & 0 deletions ql/src/test/results/clientpositive/llap/cbo_rp_explain.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
PREHOOK: query: CREATE TABLE author (id INT, fname STRING, lname STRING, birth DATE)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@author
POSTHOOK: query: CREATE TABLE author (id INT, fname STRING, lname STRING, birth DATE)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@author
PREHOOK: query: CREATE TABLE book (id INT, title STRING, author INT)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@book
POSTHOOK: query: CREATE TABLE book (id INT, title STRING, author INT)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@book
PREHOOK: query: EXPLAIN CBO
SELECT lname, MAX(birth) FROM author GROUP BY lname
PREHOOK: type: QUERY
PREHOOK: Input: default@author
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO
SELECT lname, MAX(birth) FROM author GROUP BY lname
POSTHOOK: type: QUERY
POSTHOOK: Input: default@author
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{0}], agg#0=[max($1)])
HiveProject(lname=[$2], birth=[$3])
HiveTableScan(table=[[default, author]], qbid:alias=[author])

PREHOOK: query: EXPLAIN CBO
SELECT author.lname, book.title
FROM author
INNER JOIN book ON author.id=book.author
WHERE author.fname = 'Victor'
PREHOOK: type: QUERY
PREHOOK: Input: default@author
PREHOOK: Input: default@book
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO
SELECT author.lname, book.title
FROM author
INNER JOIN book ON author.id=book.author
WHERE author.fname = 'Victor'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@author
POSTHOOK: Input: default@book
#### A masked pattern was here ####
CBO PLAN:
HiveProject(lname=[$2], title=[$3])
HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
HiveSortExchange(distribution=[hash[0]], collation=[[0]])
HiveProject(id=[$0], fname=[CAST(_UTF-16LE'Victor':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], lname=[$2])
HiveFilter(condition=[AND(=($1, _UTF-16LE'Victor'), IS NOT NULL($0))])
HiveTableScan(table=[[default, author]], qbid:alias=[author])
HiveSortExchange(distribution=[hash[1]], collation=[[1]])
HiveProject(title=[$1], author=[$2])
HiveFilter(condition=[IS NOT NULL($2)])
HiveTableScan(table=[[default, book]], qbid:alias=[book])

Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
POSTHOOK: Output: default@dest1
OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4`
FROM `default`.`srcpart`
WHERE `ds` = '2008-04-08'
GROUP BY SUBSTR(`key`, 1, 1)
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Expand Down

0 comments on commit 4f22ecd

Please sign in to comment.