HIVE-21184: Add explain and explain formatted CBO plan with cost information (Jesus...
authorJesus Camacho Rodriguez <jcamacho@apache.org>
Fri, 1 Feb 2019 22:09:03 +0000 (14:09 -0800)
committerJesus Camacho Rodriguez <jcamacho@apache.org>
Mon, 4 Feb 2019 16:52:41 +0000 (08:52 -0800)
Close apache/hive#525

20 files changed:
itests/src/test/resources/testconfiguration.properties
itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java
ql/src/java/org/apache/hadoop/hive/ql/Driver.java
ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java [new file with mode: 0644]
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
ql/src/test/queries/clientpositive/perf/cbo_ext_query1.q [new file with mode: 0644]
ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out [new file with mode: 0644]
ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out [new file with mode: 0644]

index 8ba0ddf..a237745 100644 (file)
@@ -1747,6 +1747,7 @@ tez.perf.disabled.query.files=mv_query44.q
 spark.perf.disabled.query.files=query14.q,\
   query64.q,\
   cbo_query1.q,\
+  cbo_ext_query1.q,\
   cbo_query10.q,\
   cbo_query11.q,\
   cbo_query12.q,\
index 254cc95..b87d904 100644 (file)
@@ -287,6 +287,10 @@ public class QOutProcessor {
     ppm.add(new PatternReplacementPair(Pattern.compile("task_[0-9_]+"), "task_#ID#"));
     ppm.add(new PatternReplacementPair(Pattern.compile("for Spark session.*?:"),
             "#SPARK_SESSION_ID#:"));
+
+    ppm.add(new PatternReplacementPair(Pattern.compile("rowcount = [0-9]+(\\.[0-9]+(E[0-9]+)?)?, cumulative cost = \\{.*\\}, id = [0-9]*"),
+        "rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###"));
+
     partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]);
   }
   /* This list may be modified by specific cli drivers to mask strings that change on every test */
index 0dd50de..d622ce0 100644 (file)
@@ -692,7 +692,8 @@ public class Driver implements IDriver {
       schema = getSchema(sem, conf);
       plan = new QueryPlan(queryStr, sem, queryDisplay.getQueryStartTime(), queryId,
           queryState.getHiveOperation(), schema);
-      // save the optimized sql for the explain
+      // save the optimized plan and sql for the explain
+      plan.setOptimizedCBOPlan(ctx.getCalcitePlan());
       plan.setOptimizedQueryString(ctx.getOptimizedSql());
 
       conf.set("mapreduce.workflow.id", "hive_" + queryId);
index ac03efe..7636019 100644 (file)
@@ -75,6 +75,7 @@ public class QueryPlan implements Serializable {
 
   private String cboInfo;
   private String queryString;
+  private String optimizedCBOPlan;
   private String optimizedQueryString;
 
   private ArrayList<Task<? extends Serializable>> rootTasks;
@@ -761,6 +762,14 @@ public class QueryPlan implements Serializable {
     this.optimizedQueryString = optimizedQueryString;
   }
 
+  public String getOptimizedCBOPlan() {
+    return this.optimizedCBOPlan;
+  }
+
+  public void setOptimizedCBOPlan(String optimizedCBOPlan) {
+    this.optimizedCBOPlan = optimizedCBOPlan;
+  }
+
   public org.apache.hadoop.hive.ql.plan.api.Query getQuery() {
     return query;
   }
index 7c4efab..b9d1e0b 100644 (file)
@@ -140,14 +140,19 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
     return outJSONObject;
   }
 
-  public String outputCboPlan(String cboPlan, PrintStream out, boolean jsonOutput)
-      throws JSONException {
-    if (out != null) {
-      out.println("CBO PLAN:");
-      out.println(cboPlan);
+  public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) throws Exception {
+    JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
+    boolean jsonOutput = work.isFormatted();
+    String cboPlan = work.getCboPlan();
+    if (cboPlan != null) {
+      if (jsonOutput) {
+        outJSONObject.put("CBOPlan", cboPlan);
+      } else {
+        out.println("CBO PLAN:");
+        out.println(cboPlan);
+      }
     }
-
-    return jsonOutput ? cboPlan : null;
+    return outJSONObject;
   }
 
   public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws Exception {
@@ -236,7 +241,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
   }
 
   public JSONObject getJSONPlan(PrintStream out, List<Task<?>> tasks, Task<?> fetchTask,
-      boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, String optimizedSQL) throws Exception {
+      boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo,
+      String optimizedSQL) throws Exception {
 
     // If the user asked for a formatted output, dump the json output
     // in the output stream
@@ -392,8 +398,9 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
       out = new PrintStream(outS);
 
       if (work.isCbo()) {
-        if (work.getCboPlan() != null) {
-          outputCboPlan(work.getCboPlan(), out, work.isFormatted());
+        JSONObject jsonCBOPlan = getJSONCBOPlan(out, work);
+        if (work.isFormatted()) {
+          out.print(jsonCBOPlan);
         }
       } else if (work.isLogical()) {
         JSONObject jsonLogicalPlan = getJSONLogicalPlan(out, work);
index 8b10823..9cb4d8c 100644 (file)
@@ -269,7 +269,7 @@ public class ATSHook implements ExecuteWithHookContext {
                   config, //explainConfig
                   null, // cboInfo
                   plan.getOptimizedQueryString(), // optimizedSQL
-                  null
+                  plan.getOptimizedCBOPlan()
               );
                 @SuppressWarnings("unchecked")
                 ExplainTask explain = (ExplainTask) TaskFactory.get(work);
index 0a09675..3d51c07 100644 (file)
@@ -512,7 +512,7 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext {
           config, // explainConfig
           plan.getCboInfo(), // cboInfo,
           plan.getOptimizedQueryString(),
-          null
+          plan.getOptimizedCBOPlan()
       );
       ExplainTask explain = (ExplainTask) TaskFactory.get(work, conf);
       explain.initialize(hookContext.getQueryState(), plan, null, null);
index e99e6d3..2c2f91b 100644 (file)
@@ -47,7 +47,6 @@ import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.RelFactories;
 import org.apache.calcite.rel.core.Sort;
 import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.metadata.RelColumnOrigin;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeField;
@@ -66,7 +65,6 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.Pair;
-import org.apache.commons.lang3.tuple.Triple;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -1034,4 +1032,19 @@ public class HiveRelOptUtil extends RelOptUtil {
     }
     return null;
   }
+
+  /**
+   * Converts a relational expression to a string, showing information that will aid
+   * to parse the string back.
+   */
+  public static String toJsonString(final RelNode rel) {
+    if (rel == null) {
+      return null;
+    }
+
+    final HiveRelWriterImpl planWriter = new HiveRelWriterImpl();
+    rel.explain(planWriter);
+    return planWriter.asString();
+  }
+
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java
new file mode 100644 (file)
index 0000000..d4e6c25
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import java.lang.reflect.Field;
+import java.util.List;
+import java.util.Map;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.externalize.RelJsonWriter;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Writer implementation for rel nodes that produces an output in json that is easily
+ * parseable back into rel nodes.
+ */
+public class HiveRelWriterImpl extends RelJsonWriter {
+
+  protected static final Logger LOG = LoggerFactory.getLogger(HiveRelWriterImpl.class);
+
+  //~ Constructors -------------------------------------------------------------
+
+  public HiveRelWriterImpl() {
+    super();
+  }
+
+  //~ Methods ------------------------------------------------------------------
+
+  protected void explain_(RelNode rel, List<Pair<String, Object>> values) {
+    super.explain_(rel, values);
+    // TODO: The following is hackish since we do not have visibility over relList
+    // and we do not want to bring all the writer utilities from Calcite. It should
+    // be changed once we move to new Calcite version and relList is visible for
+    // subclasses.
+    try {
+      final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+      Field fs = RelJsonWriter.class.getDeclaredField("relList");
+      fs.setAccessible(true);
+      List<Object> relList = (List<Object>) fs.get(this);
+      Map<String, Object> map = (Map<String, Object>) relList.get(relList.size() - 1);
+      map.put("rowCount", mq.getRowCount(rel));
+      if (rel.getInputs().size() == 0) {
+        // This is a leaf, we will print the average row size and schema
+        map.put("avgRowSize", mq.getAverageRowSize(rel));
+        map.put("rowType", rel.getRowType().toString());
+      }
+    } catch (Exception e) {
+      LOG.warn("Failed to add additional fields in json writer", e);
+    }
+  }
+
+}
index c549d8d..16f1a5c 100644 (file)
@@ -220,9 +220,9 @@ public class HiveJoin extends Join implements HiveRelNode {
   public RelWriter explainTerms(RelWriter pw) {
     return super.explainTerms(pw)
         .item("algorithm", joinAlgorithm == null ?
-                "none" : joinAlgorithm)
+                "none" : joinAlgorithm.toString())
         .item("cost", joinCost == null ?
-                "not available" : joinCost);
+                "not available" : joinCost.toString());
   }
 
   //required for HiveRelDecorrelator
index 646ce09..47d6e7c 100644 (file)
@@ -150,6 +150,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvide
 import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -531,10 +532,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
             this.ctx.setCboInfo("Plan optimized by CBO.");
             this.ctx.setCboSucceeded(true);
             if (this.ctx.isExplainPlan()) {
+              // Enrich explain with information derived from CBO
               ExplainConfiguration explainConfig = this.ctx.getExplainConfig();
               if (explainConfig.isCbo()) {
-                if (explainConfig.isCboExtended()) {
-                  // Include join cost
+                if (!explainConfig.isCboJoinCost()) {
+                  // Include cost as provided by Calcite
+                  newPlan.getCluster().invalidateMetadataQuery();
+                  RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE));
+                }
+                if (explainConfig.isFormatted()) {
+                  this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
+                } else if (explainConfig.isCboCost() || explainConfig.isCboJoinCost()) {
                   this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan, SqlExplainLevel.ALL_ATTRIBUTES));
                 } else {
                   // Do not include join cost
index 28a7b43..994ef14 100644 (file)
@@ -42,7 +42,8 @@ public class ExplainConfiguration {
   private boolean formatted = false;
   private boolean dependency = false;
   private boolean cbo = false;
-  private boolean cboExtended = false;
+  private boolean cboCost = false;
+  private boolean cboJoinCost = false;
   private boolean logical = false;
   private boolean authorize = false;
   private boolean userLevelExplain = false;
@@ -94,12 +95,20 @@ public class ExplainConfiguration {
     this.cbo = cbo;
   }
 
-  public boolean isCboExtended() {
-    return cboExtended;
+  public boolean isCboCost() {
+    return cboCost;
   }
 
-  public void setCboExtended(boolean cboExtended) {
-    this.cboExtended = cboExtended;
+  public void setCboCost(boolean cboExtended) {
+    this.cboCost = cboExtended;
+  }
+
+  public boolean isCboJoinCost() {
+    return cboJoinCost;
+  }
+
+  public void setCboJoinCost(boolean cboJoinCost) {
+    this.cboJoinCost = cboJoinCost;
   }
 
   public boolean isLogical() {
index 6721a37..6d7af38 100644 (file)
@@ -77,12 +77,10 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer {
         config.setDependency(true);
       } else if (explainOptions == HiveParser.KW_CBO) {
         config.setCbo(true);
-        if (i + 1 < childCount) {
-          if (ast.getChild(i + 1).getType() == HiveParser.KW_EXTENDED) {
-            config.setCboExtended(true);
-            i++;
-          }
-        }
+      } else if (explainOptions == HiveParser.KW_COST) {
+        config.setCboCost(true);
+      } else if (explainOptions == HiveParser.KW_JOINCOST) {
+        config.setCboJoinCost(true);
       } else if (explainOptions == HiveParser.KW_LOGICAL) {
         config.setLogical(true);
       } else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
index 253633c..608befc 100644 (file)
@@ -381,6 +381,8 @@ KW_UNMANAGED: 'UNMANAGED';
 KW_APPLICATION: 'APPLICATION';
 KW_SYNC: 'SYNC';
 KW_AST: 'AST';
+KW_COST: 'COST';
+KW_JOINCOST: 'JOINCOST';
 
 // Operators
 // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.
index 1ffdec0..ce13ceb 100644 (file)
@@ -805,7 +805,7 @@ explainOption
     : KW_EXTENDED
     | KW_FORMATTED
     | KW_DEPENDENCY
-    | KW_CBO KW_EXTENDED?
+    | KW_CBO (KW_COST | KW_JOINCOST)?
     | KW_LOGICAL
     | KW_AUTHORIZATION
     | KW_ANALYZE
index 417955c..47be76c 100644 (file)
@@ -795,12 +795,12 @@ nonReserved
     :
     KW_ABORT | KW_ADD | KW_ADMIN | KW_AFTER | KW_ANALYZE | KW_ARCHIVE | KW_ASC | KW_BEFORE | KW_BUCKET | KW_BUCKETS
     | KW_CASCADE | KW_CBO | KW_CHANGE | KW_CHECK | KW_CLUSTER | KW_CLUSTERED | KW_CLUSTERSTATUS | KW_COLLECTION | KW_COLUMNS
-    | KW_COMMENT | KW_COMPACT | KW_COMPACTIONS | KW_COMPUTE | KW_CONCATENATE | KW_CONTINUE | KW_DATA | KW_DAY
+    | KW_COMMENT | KW_COMPACT | KW_COMPACTIONS | KW_COMPUTE | KW_CONCATENATE | KW_CONTINUE | KW_COST | KW_DATA | KW_DAY
     | KW_DATABASES | KW_DATETIME | KW_DBPROPERTIES | KW_DEFERRED | KW_DEFINED | KW_DELIMITED | KW_DEPENDENCY 
     | KW_DESC | KW_DIRECTORIES | KW_DIRECTORY | KW_DISABLE | KW_DISTRIBUTE | KW_DOW | KW_ELEM_TYPE 
     | KW_ENABLE | KW_ENFORCED | KW_ESCAPED | KW_EXCLUSIVE | KW_EXPLAIN | KW_EXPORT | KW_FIELDS | KW_FILE | KW_FILEFORMAT
     | KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE
-    | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR | KW_KILL
+    | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR | KW_JOINCOST | KW_KILL
     | KW_KEYS | KW_KEY_TYPE | KW_LAST | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG
     | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_NULLS | KW_OFFLINE
     | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS
index f449c6b..9b6827e 100644 (file)
@@ -300,7 +300,7 @@ public class TestUpdateDeleteSemanticAnalyzer {
     ExplainConfiguration config = new ExplainConfiguration();
     config.setExtended(true);
     ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(),
-        sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), null);
+        sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), plan.getOptimizedCBOPlan());
     ExplainTask task = new ExplainTask();
     task.setWork(work);
     task.initialize(queryState, plan, null, null);
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_ext_query1.q b/ql/src/test/queries/clientpositive/perf/cbo_ext_query1.q
new file mode 100644 (file)
index 0000000..e591f87
--- /dev/null
@@ -0,0 +1,51 @@
+set hive.mapred.mode=nonstrict;
+-- start query 1 in stream 0 using template query1.tpl and seed 2031708268
+explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100;
+
+explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100;
+
+-- end query 1 in stream 0 using template query1.tpl
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out
new file mode 100644 (file)
index 0000000..bcd1f8d
--- /dev/null
@@ -0,0 +1,182 @@
+PREHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+  HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+    HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveFilter(condition=[IS NOT NULL($0)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+
+PREHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+  HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+    HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[{415687.382770037 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[{8.00093932086143E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveFilter(condition=[IS NOT NULL($0)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[{460301.9976112889 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.3635511784936875E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out
new file mode 100644 (file)
index 0000000..255261b
--- /dev/null
@@ -0,0 +1,180 @@
+PREHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+  HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+    HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+
+PREHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+  HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+    HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[{415687.382770037 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[{8.00093932086143E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[{460301.9976112889 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+      HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+        HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+          HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+            HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+              HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.3635511784936875E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                  HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+                    HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+