VXQUERY-105: Add group-by functionality, Add scalability to JSON parser
authorChristina Pavlopoulou <cpavl001@ucr.edu>
Thu, 29 Dec 2016 19:56:20 +0000 (11:56 -0800)
committerChristina Pavlopoulou <cpavl001@ucr.edu>
Fri, 19 May 2017 02:07:57 +0000 (19:07 -0700)
1) Adding group-by feature according to XQuery 3.0 specifications
2) Creating group by-specific rewrite rules
3) Adding rewrite rules to enable parallel access to JSON data
4) Changing JSON parser to enable JSONiq scalability

62 files changed:
vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/NestGroupByRule.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/PushGroupByThroughProduct.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractPushExpressionIntoDatascanRule.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractRemoveRedundantTypeExpressionsRule.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignToAggregateRule.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/EliminateUnnestAggregateSubplanRule.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushAggregateIntoGroupbyRule.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushKeysOrMembersIntoDatascanRule.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushValueIntoDatascanRule.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/SetVariableIdContextRule.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/ExpressionToolbox.java
vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java
vxquery-core/src/main/java/org/apache/vxquery/exceptions/SystemException.java
vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-operators.xml
vxquery-core/src/main/java/org/apache/vxquery/jsonparser/JSONParser.java
vxquery-core/src/main/java/org/apache/vxquery/metadata/AbstractVXQueryDataSource.java
vxquery-core/src/main/java/org/apache/vxquery/metadata/IVXQueryDataSource.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionDataSource.java
vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingDataSource.java
vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingOperatorDescriptor.java
vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java
vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/base/AbstractTaggedValueArgumentUnnestingEvaluator.java
vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersScalarEvaluator.java
vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnesting.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnestingEvaluator.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnestingEvaluatorFactory.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/xmlquery/ast/ASTTag.java
vxquery-core/src/main/java/org/apache/vxquery/xmlquery/ast/GroupSpecNode.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/xmlquery/ast/GroupbyClauseNode.java [new file with mode: 0644]
vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryParser.java
vxquery-core/src/main/java/org/apache/vxquery/xmlquery/translator/XMLQueryTranslator.java
vxquery-core/src/main/javacc/xquery-grammar.jj
vxquery-core/src/main/xslt/generate-op-defns.xsl
vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group_json.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group_json_count.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-1/q15_parser.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-1/q16_parser.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-2/q14_parser.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-2/q16_parser.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-4/q14_parser.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-4/q15_parser.txt [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group_json.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group_json_count.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex6.xq
vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-1/q15_parser.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-1/q16_parser.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-2/q14_parser.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-2/q16_parser.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-4/q14_parser.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-4/q15_parser.xq [new file with mode: 0644]
vxquery-xtest/src/test/resources/VXQueryCatalog.xml
vxquery-xtest/src/test/resources/cat/GroupQueries.xml [new file with mode: 0644]
vxquery-xtest/src/test/resources/cat/JsonParserQueries.xml

index ec85207..d196229 100644 (file)
@@ -62,7 +62,7 @@ handlers= java.util.logging.ConsoleHandler
 
 # Limit the message that are printed on the console to FINE and above.
 
-java.util.logging.ConsoleHandler.level = INFO
+java.util.logging.ConsoleHandler.level = FINE
 java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
 
 
@@ -75,5 +75,5 @@ java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
 # messages:
 
 # edu.uci.ics.asterix.level = FINE
-# edu.uci.ics.hyracks.algebricks.level = FINE
-edu.uci.ics.hyracks.level = SEVERE
+org.apache.hyracks.algebricks.level = FINE
+org.apache.hyracks.level = SEVERE
index 1ee4833..d5fb32a 100644 (file)
@@ -35,7 +35,6 @@ import org.apache.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecor
 import org.apache.hyracks.algebricks.rewriter.rules.InferTypesRule;
 import org.apache.hyracks.algebricks.rewriter.rules.InlineAssignIntoAggregateRule;
 import org.apache.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
-//import org.apache.hyracks.algebricks.rewriter.rules.InsertOuterJoinRule;
 import org.apache.hyracks.algebricks.rewriter.rules.IntroJoinInsideSubplanRule;
 import org.apache.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
 import org.apache.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
@@ -59,8 +58,11 @@ import org.apache.hyracks.algebricks.rewriter.rules.subplan.EliminateSubplanWith
 import org.apache.hyracks.algebricks.rewriter.rules.subplan.NestedSubplanToJoinRule;
 import org.apache.hyracks.algebricks.rewriter.rules.subplan.PushSubplanIntoGroupByRule;
 import org.apache.hyracks.algebricks.rewriter.rules.subplan.SubplanOutOfGroupRule;
+import org.apache.vxquery.compiler.rewriter.algebricks_new_version.NestGroupByRule;
+import org.apache.vxquery.compiler.rewriter.algebricks_new_version.PushGroupByThroughProduct;
 import org.apache.vxquery.compiler.rewriter.rules.ConsolidateAssignAggregateRule;
 import org.apache.vxquery.compiler.rewriter.rules.ConsolidateDescandantChild;
+import org.apache.vxquery.compiler.rewriter.rules.ConvertAssignToAggregateRule;
 import org.apache.vxquery.compiler.rewriter.rules.ConvertAssignToUnnestRule;
 import org.apache.vxquery.compiler.rewriter.rules.ConvertFromAlgebricksExpressionsRule;
 import org.apache.vxquery.compiler.rewriter.rules.ConvertToAlgebricksExpressionsRule;
@@ -70,8 +72,11 @@ import org.apache.vxquery.compiler.rewriter.rules.EliminateUnnestAggregateSubpla
 import org.apache.vxquery.compiler.rewriter.rules.IntroduceCollectionRule;
 import org.apache.vxquery.compiler.rewriter.rules.IntroduceIndexingRule;
 import org.apache.vxquery.compiler.rewriter.rules.IntroduceTwoStepAggregateRule;
+import org.apache.vxquery.compiler.rewriter.rules.PushAggregateIntoGroupbyRule;
 import org.apache.vxquery.compiler.rewriter.rules.PushChildIntoDataScanRule;
 import org.apache.vxquery.compiler.rewriter.rules.PushFunctionsOntoEqJoinBranches;
+import org.apache.vxquery.compiler.rewriter.rules.PushKeysOrMembersIntoDatascanRule;
+import org.apache.vxquery.compiler.rewriter.rules.PushValueIntoDatascanRule;
 import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantBooleanExpressionsRule;
 import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantCastExpressionsRule;
 import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantDataExpressionsRule;
@@ -98,7 +103,7 @@ public class RewriteRuleset {
     public static final List<IAlgebraicRewriteRule> buildPathStepNormalizationRuleCollection() {
         List<IAlgebraicRewriteRule> normalization = new LinkedList<>();
         normalization.add(new SetVariableIdContextRule());
-
+        normalization.add(new InferTypesRule());
         // Remove unused functions.
         normalization.add(new RemoveUnusedSortDistinctNodesRule());
         normalization.add(new RemoveRedundantTreatExpressionsRule());
@@ -163,8 +168,6 @@ public class RewriteRuleset {
         normalization.add(new RemoveUnusedAssignAndAggregateRule());
 
         // Find assign for scalar aggregate function.
-        // normalization.add(new ConvertAssignToAggregateRule());
-
         // Use two step aggregate operators if possible.
         normalization.add(new IntroduceTwoStepAggregateRule());
 
@@ -190,8 +193,12 @@ public class RewriteRuleset {
         normalization.add(new ConvertToAlgebricksExpressionsRule());
         normalization.add(new RemoveRedundantBooleanExpressionsRule());
         // Clean up
+        normalization.add(new ConvertAssignToAggregateRule());
+        normalization.add(new IntroduceTwoStepAggregateRule());
         normalization.add(new RemoveRedundantVariablesRule());
         normalization.add(new RemoveUnusedAssignAndAggregateRule());
+        normalization.add(new PushValueIntoDatascanRule());
+        normalization.add(new PushKeysOrMembersIntoDatascanRule());
         return normalization;
     }
 
@@ -206,8 +213,11 @@ public class RewriteRuleset {
         xquery.add(new SimpleUnnestToProductRule());
         xquery.add(new PushMapOperatorDownThroughProductRule());
         xquery.add(new PushSubplanWithAggregateDownThroughProductRule());
+        xquery.add(new PushMapOperatorDownThroughProductRule());
+        xquery.add(new PushGroupByThroughProduct());
         xquery.add(new PushSelectDownRule());
         xquery.add(new PushSelectIntoJoinRule());
+
         // Clean up
         xquery.add(new RemoveRedundantVariablesRule());
         xquery.add(new RemoveUnusedAssignAndAggregateRule());
@@ -229,7 +239,6 @@ public class RewriteRuleset {
         List<IAlgebraicRewriteRule> xquery = new LinkedList<>();
 
         xquery.add(new PushSelectDownRule());
-        xquery.add(new SimpleUnnestToProductRule());
         xquery.add(new ComplexUnnestToProductRule());
         xquery.add(new ComplexJoinInferenceRule());
         xquery.add(new PushSelectIntoJoinRule());
@@ -240,23 +249,18 @@ public class RewriteRuleset {
         xquery.add(new SubplanOutOfGroupRule());
         //        xquery.add(new InsertOuterJoinRule());
         xquery.add(new ExtractFunctionsFromJoinConditionRule());
-
         xquery.add(new RemoveRedundantVariablesRule());
         xquery.add(new RemoveUnusedAssignAndAggregateRule());
-
         xquery.add(new FactorRedundantGroupAndDecorVarsRule());
-        xquery.add(new EliminateSubplanRule());
-        xquery.add(new EliminateGroupByEmptyKeyRule());
-        xquery.add(new PushSubplanIntoGroupByRule());
-        xquery.add(new NestedSubplanToJoinRule());
-        xquery.add(new EliminateSubplanWithInputCardinalityOneRule());
-
         return xquery;
     }
 
     public static final List<IAlgebraicRewriteRule> buildNormalizationRuleCollection() {
         List<IAlgebraicRewriteRule> normalization = new LinkedList<>();
         normalization.add(new EliminateSubplanRule());
+        normalization.add(new SimpleUnnestToProductRule());
+        normalization.add(new NestedSubplanToJoinRule());
+        normalization.add(new EliminateSubplanWithInputCardinalityOneRule());
         normalization.add(new BreakSelectIntoConjunctsRule());
         normalization.add(new PushSelectIntoJoinRule());
         normalization.add(new ExtractGbyExpressionsRule());
@@ -267,8 +271,17 @@ public class RewriteRuleset {
         List<IAlgebraicRewriteRule> condPushDown = new LinkedList<>();
         condPushDown.add(new PushSelectDownRule());
         condPushDown.add(new InlineVariablesRule());
+        condPushDown.add(new SubplanOutOfGroupRule());
+        condPushDown.add(new RemoveRedundantVariablesRule());
+        condPushDown.add(new RemoveUnusedAssignAndAggregateRule());
         condPushDown.add(new FactorRedundantGroupAndDecorVarsRule());
+        condPushDown.add(new PushAggregateIntoGroupbyRule());
         condPushDown.add(new EliminateSubplanRule());
+        condPushDown.add(new PushGroupByThroughProduct());
+        condPushDown.add(new NestGroupByRule());
+        condPushDown.add(new EliminateGroupByEmptyKeyRule());
+        condPushDown.add(new PushSubplanIntoGroupByRule());
+        condPushDown.add(new NestedSubplanToJoinRule());
         return condPushDown;
     }
 
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/NestGroupByRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/NestGroupByRule.java
new file mode 100644 (file)
index 0000000..ad3db93
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.algebricks_new_version;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil;
+import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import org.apache.vxquery.functions.BuiltinOperators;
+
+public class NestGroupByRule implements IAlgebraicRewriteRule {
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue();
+        if (op1.getOperatorTag() != LogicalOperatorTag.SUBPLAN) {
+            return false;
+        }
+        SubplanOperator subplan = (SubplanOperator) op1;
+        if (subplan.getNestedPlans().size() != 1) {
+            return false;
+        }
+        ILogicalPlan p = subplan.getNestedPlans().get(0);
+        if (p.getRoots().size() != 1) {
+            return false;
+        }
+
+        Set<LogicalVariable> free = new HashSet<LogicalVariable>();
+        OperatorPropertiesUtil.getFreeVariablesInSubplans(subplan, free);
+        if (free.size() != 1) {
+            return false;
+        }
+        LogicalVariable fVar = null;
+        for (LogicalVariable v : free) {
+            fVar = v;
+            break;
+        }
+
+        AbstractLogicalOperator op2 = (AbstractLogicalOperator) op1.getInputs().get(0).getValue();
+        if (op2.getOperatorTag() != LogicalOperatorTag.GROUP) {
+            return false;
+        }
+        GroupByOperator gby = (GroupByOperator) op2;
+        if (gby.getNestedPlans().size() != 1) {
+            return false;
+        }
+        ILogicalPlan p2 = gby.getNestedPlans().get(0);
+        if (p2.getRoots().size() != 1) {
+            return false;
+        }
+        Mutable<ILogicalOperator> r2 = p2.getRoots().get(0);
+        AbstractLogicalOperator opr2 = (AbstractLogicalOperator) r2.getValue();
+        if (opr2.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
+            return false;
+        }
+        AggregateOperator aggOuter = (AggregateOperator) opr2;
+        int posInAggList = aggOuter.getVariables().indexOf(fVar);
+        if (posInAggList < 0) {
+            return false;
+        }
+        AbstractLogicalOperator outerAggSon = (AbstractLogicalOperator) aggOuter.getInputs().get(0).getValue();
+        if (outerAggSon.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) {
+            return false;
+        }
+        ILogicalExpression eAgg = aggOuter.getExpressions().get(posInAggList).getValue();
+        if (eAgg.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return false;
+        }
+        AbstractFunctionCallExpression listifyCall = (AbstractFunctionCallExpression) eAgg;
+        if (listifyCall.getFunctionIdentifier() != BuiltinOperators.SEQUENCE.getFunctionIdentifier()) {
+            return false;
+        }
+        ILogicalExpression argListify = listifyCall.getArguments().get(0).getValue();
+        if (argListify.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+            return false;
+        }
+
+        Mutable<ILogicalOperator> r = p.getRoots().get(0);
+        AbstractLogicalOperator opInS = (AbstractLogicalOperator) r.getValue();
+        if (opInS.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
+            return false;
+        }
+        AggregateOperator aggInner = (AggregateOperator) opInS;
+        do {
+            opInS = (AbstractLogicalOperator) opInS.getInputs().get(0).getValue();
+        } while (opInS.getOperatorTag() == LogicalOperatorTag.ASSIGN);
+        if (opInS.getOperatorTag() != LogicalOperatorTag.GROUP) {
+            return false;
+        }
+        AbstractLogicalOperator unnestParent = opInS;
+        AbstractLogicalOperator opUnder = (AbstractLogicalOperator) opInS.getInputs().get(0).getValue();
+        // skip Assigns
+        while (opUnder.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+            unnestParent = opUnder;
+            opUnder = (AbstractLogicalOperator) opUnder.getInputs().get(0).getValue();
+        }
+        if (opUnder.getOperatorTag() != LogicalOperatorTag.UNNEST) {
+            return false;
+        }
+        UnnestOperator unnest = (UnnestOperator) opUnder;
+        AbstractLogicalOperator unnestSon = (AbstractLogicalOperator) unnest.getInputs().get(0).getValue();
+        if (unnestSon.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) {
+            return false;
+        }
+        NestedTupleSourceOperator innerNts = (NestedTupleSourceOperator) unnestSon;
+
+        ILogicalExpression eUnnest = unnest.getExpressionRef().getValue();
+        if (eUnnest.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return false;
+        }
+        AbstractFunctionCallExpression uf = (AbstractFunctionCallExpression) eUnnest;
+        if (uf.getFunctionIdentifier() != BuiltinOperators.ITERATE.getFunctionIdentifier()) {
+            return false;
+        }
+        ILogicalExpression scanArg = uf.getArguments().get(0).getValue();
+        if (scanArg.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+            return false;
+        }
+        if (((VariableReferenceExpression) scanArg).getVariableReference() != fVar) {
+            return false;
+        }
+        LogicalVariable uVar = unnest.getVariable();
+        GroupByOperator innerGby = (GroupByOperator) opInS;
+        Set<LogicalVariable> freeInInnerGby = new HashSet<LogicalVariable>();
+        OperatorPropertiesUtil.getFreeVariablesInSubplans(innerGby, freeInInnerGby);
+        for (LogicalVariable v : freeInInnerGby) {
+            if (v != uVar) {
+                return false;
+            }
+        }
+
+        unnestParent.getInputs().get(0).setValue(innerNts);
+        LogicalVariable listifiedVar = ((VariableReferenceExpression) argListify).getVariableReference();
+        substInSubplan(aggInner, uVar, listifiedVar, context);
+        gby.getNestedPlans().add(p);
+        innerNts.getDataSourceReference().setValue(gby);
+        opRef.setValue(gby);
+        OperatorPropertiesUtil.typePlan(p, context);
+        OperatorPropertiesUtil.typePlan(p2, context);
+        context.computeAndSetTypeEnvironmentForOperator(gby);
+        return true;
+
+    }
+
+    private void substInSubplan(AggregateOperator aggInner, LogicalVariable v1, LogicalVariable v2,
+            IOptimizationContext context) throws AlgebricksException {
+        ILogicalOperator op = aggInner;
+        while (op.getInputs().size() == 1) {
+            VariableUtilities.substituteVariables(op, v1, v2, context);
+            op = op.getInputs().get(0).getValue();
+        }
+    }
+}
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/PushGroupByThroughProduct.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/PushGroupByThroughProduct.java
new file mode 100644 (file)
index 0000000..4c770ff
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.algebricks_new_version;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
+import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil;
+import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+* The rule searches for a group by operator immediately following a join operator
+* operator.
+*
+* <pre>
+* Before
+*
+*   GROUPBY( $v2 : $v1  ){
+*   ...
+*   }
+*   JOIN(TRUE)
+*
+*
+* After
+*
+*   JOIN(TRUE)
+*   GROUPBY( $v2 : $v1  ){
+*   ...
+*   }
+* </pre>
+*/
+
+public class PushGroupByThroughProduct implements IAlgebraicRewriteRule {
+
+    private enum PushTestResult {
+        FALSE,
+        TRUE,
+        REPEATED_DECORS
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue();
+        if (op1.getOperatorTag() != LogicalOperatorTag.GROUP) {
+            return false;
+        }
+        Mutable<ILogicalOperator> opRef2 = op1.getInputs().get(0);
+        AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue();
+        if (op2.getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
+            return false;
+        }
+        InnerJoinOperator join = (InnerJoinOperator) op2;
+        if (!OperatorPropertiesUtil.isAlwaysTrueCond(join.getCondition().getValue())) {
+            // not a product
+            return false;
+        }
+        GroupByOperator gby = (GroupByOperator) op1;
+
+        List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorToPush = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>();
+        List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorNotToPush = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>();
+
+        Mutable<ILogicalOperator> opLeftRef = join.getInputs().get(0);
+        ILogicalOperator opLeft = opLeftRef.getValue();
+        switch (canPushThrough(gby, opLeft, decorToPush, decorNotToPush)) {
+            case REPEATED_DECORS: {
+                return false;
+            }
+            case TRUE: {
+                push(opRef, opRef2, 0, decorToPush, decorNotToPush, context);
+                return true;
+            }
+            case FALSE: {
+                decorToPush.clear();
+                Mutable<ILogicalOperator> opRightRef = join.getInputs().get(1);
+                ILogicalOperator opRight = opRightRef.getValue();
+                if (canPushThrough(gby, opRight, decorToPush, decorNotToPush) == PushTestResult.TRUE) {
+                    push(opRef, opRef2, 1, decorToPush, decorNotToPush, context);
+                    return true;
+                } else {
+                    return false;
+                }
+            }
+            default: {
+                throw new IllegalStateException();
+            }
+        }
+    }
+
+    private void push(Mutable<ILogicalOperator> opRefGby, Mutable<ILogicalOperator> opRefJoin, int branch,
+            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorToPush,
+            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorNotToPush, IOptimizationContext context)
+                    throws AlgebricksException {
+        GroupByOperator gby = (GroupByOperator) opRefGby.getValue();
+        AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) opRefJoin.getValue();
+        gby.getDecorList().clear();
+        gby.getDecorList().addAll(decorToPush);
+        for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : decorNotToPush) {
+            LogicalVariable v1 = p.first;
+            if (v1 != null) {
+                VariableReferenceExpression varRef = (VariableReferenceExpression) p.second.getValue();
+                LogicalVariable v2 = varRef.getVariableReference();
+                OperatorManipulationUtil.substituteVarRec(join, v2, v1, true, context);
+            }
+        }
+        Mutable<ILogicalOperator> branchRef = join.getInputs().get(branch);
+        ILogicalOperator opBranch = branchRef.getValue();
+        opRefJoin.setValue(opBranch);
+        branchRef.setValue(gby);
+        opRefGby.setValue(join);
+    }
+
+    private PushTestResult canPushThrough(GroupByOperator gby, ILogicalOperator branch,
+            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> toPush,
+            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> notToPush) throws AlgebricksException {
+        Collection<LogicalVariable> fromBranch = new HashSet<LogicalVariable>();
+        VariableUtilities.getLiveVariables(branch, fromBranch);
+        Collection<LogicalVariable> usedInGbyExprList = new ArrayList<LogicalVariable>();
+        for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
+            p.second.getValue().getUsedVariables(usedInGbyExprList);
+        }
+
+        if (!fromBranch.containsAll(usedInGbyExprList)) {
+            return PushTestResult.FALSE;
+        }
+        Set<LogicalVariable> free = new HashSet<LogicalVariable>();
+        for (ILogicalPlan p : gby.getNestedPlans()) {
+            for (Mutable<ILogicalOperator> r : p.getRoots()) {
+                OperatorPropertiesUtil.getFreeVariablesInSelfOrDesc((AbstractLogicalOperator) r.getValue(), free);
+            }
+        }
+        if (!fromBranch.containsAll(free)) {
+            return PushTestResult.FALSE;
+        }
+
+        Set<LogicalVariable> decorVarRhs = new HashSet<LogicalVariable>();
+        decorVarRhs.clear();
+        for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
+            ILogicalExpression expr = p.second.getValue();
+            if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+                return PushTestResult.FALSE;
+            }
+            VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
+            LogicalVariable v = varRef.getVariableReference();
+            if (decorVarRhs.contains(v)) {
+                return PushTestResult.REPEATED_DECORS;
+            }
+            decorVarRhs.add(v);
+
+            if (fromBranch.contains(v)) {
+                toPush.add(p);
+            } else {
+                notToPush.add(p);
+            }
+        }
+        return PushTestResult.TRUE;
+    }
+}
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractPushExpressionIntoDatascanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractPushExpressionIntoDatascanRule.java
new file mode 100644 (file)
index 0000000..d35ffca
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import org.apache.vxquery.compiler.rewriter.VXQueryOptimizationContext;
+import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
+import org.apache.vxquery.context.StaticContext;
+import org.apache.vxquery.metadata.IVXQueryDataSource;
+import org.apache.vxquery.metadata.VXQueryMetadataProvider;
+
+public abstract class AbstractPushExpressionIntoDatascanRule extends AbstractUsedVariablesProcessingRule {
+    StaticContext dCtx = null;
+    final int ARG_DATA = 0;
+    final int ARG_TYPE = 1;
+
+    protected boolean processOperator(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        boolean unnestOp = false;
+        boolean assignOp = false;
+
+        UnnestOperator unnest = null;
+        AssignOperator assign = null;
+        AbstractLogicalOperator op2 = null;
+
+        if (dCtx == null) {
+            VXQueryOptimizationContext vxqueryCtx = (VXQueryOptimizationContext) context;
+            dCtx = ((VXQueryMetadataProvider) vxqueryCtx.getMetadataProvider()).getStaticContext();
+        }
+        AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue();
+        if (!(op1.getOperatorTag() == getOperator())) {
+            return false;
+        }
+        if (op1.getOperatorTag() == LogicalOperatorTag.UNNEST) {
+            unnest = (UnnestOperator) op1;
+            unnestOp = true;
+            op2 = (AbstractLogicalOperator) unnest.getInputs().get(0).getValue();
+        } else {
+            assign = (AssignOperator) op1;
+            assignOp = true;
+            op2 = (AbstractLogicalOperator) assign.getInputs().get(0).getValue();
+        }
+
+        if (op2.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) {
+            return false;
+        }
+        DataSourceScanOperator datascan = (DataSourceScanOperator) op2;
+
+        if (!usedVariables.contains(datascan.getVariables())) {
+
+            Mutable<ILogicalExpression> expressionRef = null;
+            if (unnestOp) {
+                expressionRef = unnest.getExpressionRef();
+            } else if (assignOp) {
+                expressionRef = assign.getExpressions().get(0);
+            }
+            if (!(updateDataSource((IVXQueryDataSource) datascan.getDataSource(), expressionRef))) {
+                return false;
+            }
+            if (unnestOp) {
+                Mutable<ILogicalExpression> varExp = ExpressionToolbox.findVariableExpression(expressionRef,
+                        datascan.getVariables().get(0));
+                AssignOperator noOp = new AssignOperator(unnest.getVariable(), varExp);
+                noOp.getInputs().addAll(unnest.getInputs());
+                opRef.setValue(noOp);
+            } else if (assignOp) {
+                Mutable<ILogicalExpression> varExp = ExpressionToolbox
+                        .findVariableExpression(assign.getExpressions().get(0), datascan.getVariables().get(0));
+                AssignOperator noOp = new AssignOperator(assign.getVariables().get(0), varExp);
+                noOp.getInputs().addAll(assign.getInputs());
+                opRef.setValue(noOp);
+            }
+
+            return true;
+        }
+        return false;
+
+    }
+
+    abstract boolean updateDataSource(IVXQueryDataSource datasource, Mutable<ILogicalExpression> expression);
+
+    abstract LogicalOperatorTag getOperator();
+
+}
index 2430b5d..afafdf1 100644 (file)
@@ -20,30 +20,32 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
-import org.apache.vxquery.compiler.rewriter.rules.util.OperatorToolbox;
-import org.apache.vxquery.context.RootStaticContextImpl;
-import org.apache.vxquery.context.StaticContextImpl;
-import org.apache.vxquery.types.SequenceType;
-
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans;
 import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
+import org.apache.vxquery.compiler.rewriter.rules.util.OperatorToolbox;
+import org.apache.vxquery.context.RootStaticContextImpl;
+import org.apache.vxquery.context.StaticContextImpl;
+import org.apache.vxquery.types.SequenceType;
 
 public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlgebraicRewriteRule {
     final StaticContextImpl dCtx = new StaticContextImpl(RootStaticContextImpl.INSTANCE);
     final int ARG_DATA = 0;
     final int ARG_TYPE = 1;
     final List<Mutable<ILogicalExpression>> functionList = new ArrayList<Mutable<ILogicalExpression>>();
-    
+
     protected abstract FunctionIdentifier getSearchFunction();
 
     @Override
-    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
         return false;
     }
 
@@ -54,6 +56,7 @@ public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlg
         List<Mutable<ILogicalExpression>> expressions = OperatorToolbox.getExpressions(opRef);
         for (Mutable<ILogicalExpression> expression : expressions) {
             if (processTypeExpression(opRef, expression)) {
+                context.computeAndSetTypeEnvironmentForOperator(opRef.getValue());
                 modified = true;
             }
         }
@@ -70,11 +73,14 @@ public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlg
             // Get input function
             AbstractFunctionCallExpression searchFunction = (AbstractFunctionCallExpression) searchM.getValue();
             Mutable<ILogicalExpression> argFirstM = searchFunction.getArguments().get(ARG_DATA);
-
             // Find the input return type.
             inputSequenceType = ExpressionToolbox.getOutputSequenceType(opRef, argFirstM, dCtx);
-
             // Find the argument type.
+            if (inputSequenceType == null && !isNestedPlanOperator(opRef).isEmpty()) {
+                for (Mutable<ILogicalOperator> agg : isNestedPlanOperator(opRef)) {
+                    inputSequenceType = ExpressionToolbox.getOutputSequenceType(agg, argFirstM, dCtx);
+                }
+            }
             sTypeArg = null;
             if (hasTypeArgument()) {
                 sTypeArg = ExpressionToolbox.getTypeExpressionTypeArgument(searchM, dCtx);
@@ -89,6 +95,18 @@ public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlg
         return modified;
     }
 
+    public List<Mutable<ILogicalOperator>> isNestedPlanOperator(Mutable<ILogicalOperator> opRef) {
+        List<Mutable<ILogicalOperator>> nestedPlans = new ArrayList<Mutable<ILogicalOperator>>();
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue().getInputs().get(0).getValue();
+        if (op.hasNestedPlans()) {
+            AbstractOperatorWithNestedPlans aownp = (AbstractOperatorWithNestedPlans) op;
+            for (Mutable<ILogicalOperator> input : aownp.getNestedPlans().get(0).getRoots()) {
+                nestedPlans.add(input);
+            }
+        }
+        return nestedPlans;
+    }
+
     public abstract boolean matchesAllInstancesOf(SequenceType sTypeArg, SequenceType sTypeOutput);
 
     public boolean hasTypeArgument() {
index 7ea0614..0fe6f09 100644 (file)
@@ -145,6 +145,7 @@ public class ConvertAssignToAggregateRule extends AbstractVXQueryAggregateRule {
         subplanOperator.getInputs().add(assign.getInputs().get(0));
         subplanOperator.setRootOp(nextOperatorRef);
 
+        assign.getInputs().clear();
         opRef.setValue(subplanOperator);
 
         return true;
@@ -164,7 +165,7 @@ public class ConvertAssignToAggregateRule extends AbstractVXQueryAggregateRule {
         aggregateSequenceArgs.add(aggregateArgs);
 
         List<Mutable<ILogicalExpression>> exprs = new ArrayList<Mutable<ILogicalExpression>>();
-        ILogicalExpression aggregateExp = new AggregateFunctionCallExpression(aggregateFunction, true,
+        ILogicalExpression aggregateExp = new AggregateFunctionCallExpression(aggregateFunction, false,
                 aggregateSequenceArgs);
         Mutable<ILogicalExpression> aggregateExpRef = new MutableObject<ILogicalExpression>(aggregateExp);
         exprs.add(aggregateExpRef);
index faf6e09..7b04857 100644 (file)
@@ -65,7 +65,8 @@ import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
  */
 public class EliminateUnnestAggregateSubplanRule implements IAlgebraicRewriteRule {
     @Override
-    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
         AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
         if (op.getOperatorTag() != LogicalOperatorTag.UNNEST) {
             return false;
@@ -121,7 +122,8 @@ public class EliminateUnnestAggregateSubplanRule implements IAlgebraicRewriteRul
         }
         functionCall.getArguments().get(0).setValue(new VariableReferenceExpression(assignVariable));
         unnest.getInputs().get(0).setValue(aOp);
-
+        context.computeAndSetTypeEnvironmentForOperator(aOp);
+        context.computeAndSetTypeEnvironmentForOperator(unnest);
         return true;
     }
 
index 4343522..806b532 100644 (file)
@@ -71,27 +71,30 @@ public class IntroduceTwoStepAggregateRule implements IAlgebraicRewriteRule {
     final Map<FunctionIdentifier, Pair<IFunctionInfo, IFunctionInfo>> AGGREGATE_MAP = new HashMap<FunctionIdentifier, Pair<IFunctionInfo, IFunctionInfo>>();
 
     public IntroduceTwoStepAggregateRule() {
-        AGGREGATE_MAP.put(BuiltinFunctions.FN_AVG_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>(
-                BuiltinOperators.AVG_LOCAL, BuiltinOperators.AVG_GLOBAL));
-        AGGREGATE_MAP.put(BuiltinFunctions.FN_COUNT_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>(
-                BuiltinFunctions.FN_COUNT_1, BuiltinFunctions.FN_SUM_1));
-        AGGREGATE_MAP.put(BuiltinFunctions.FN_MAX_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>(
-                BuiltinFunctions.FN_MAX_1, BuiltinFunctions.FN_MAX_1));
-        AGGREGATE_MAP.put(BuiltinFunctions.FN_MIN_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>(
-                BuiltinFunctions.FN_MIN_1, BuiltinFunctions.FN_MIN_1));
-        AGGREGATE_MAP.put(BuiltinFunctions.FN_SUM_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>(
-                BuiltinFunctions.FN_SUM_1, BuiltinFunctions.FN_SUM_1));
+        AGGREGATE_MAP.put(BuiltinFunctions.FN_AVG_1.getFunctionIdentifier(),
+                new Pair<IFunctionInfo, IFunctionInfo>(BuiltinOperators.AVG_LOCAL, BuiltinOperators.AVG_GLOBAL));
+        AGGREGATE_MAP.put(BuiltinFunctions.FN_COUNT_1.getFunctionIdentifier(),
+                new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_COUNT_1, BuiltinFunctions.FN_SUM_1));
+        AGGREGATE_MAP.put(BuiltinFunctions.FN_MAX_1.getFunctionIdentifier(),
+                new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_MAX_1, BuiltinFunctions.FN_MAX_1));
+        AGGREGATE_MAP.put(BuiltinFunctions.FN_MIN_1.getFunctionIdentifier(),
+                new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_MIN_1, BuiltinFunctions.FN_MIN_1));
+        AGGREGATE_MAP.put(BuiltinFunctions.FN_SUM_1.getFunctionIdentifier(),
+                new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_SUM_1, BuiltinFunctions.FN_SUM_1));
     }
 
     @Override
-    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
         // Check if aggregate function.
         AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
         if (op.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
             return false;
         }
         AggregateOperator aggregate = (AggregateOperator) op;
-
+        if (aggregate.getExpressions().size() == 0) {
+            return false;
+        }
         Mutable<ILogicalExpression> mutableLogicalExpression = aggregate.getExpressions().get(0);
         ILogicalExpression logicalExpression = mutableLogicalExpression.getValue();
         if (logicalExpression.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushAggregateIntoGroupbyRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushAggregateIntoGroupbyRule.java
new file mode 100644 (file)
index 0000000..5cb111c
--- /dev/null
@@ -0,0 +1,494 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
+import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil;
+import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import org.apache.vxquery.functions.BuiltinOperators;
+import org.apache.vxquery.functions.Function;
+
+/**
+ * This rule pushes a subplan on top of a group-by into the
+ * nested plan of the group-by.
+ * 
+ * <pre>
+ * Before
+ * 
+ *  SUBPLAN {
+ *      AGGREGATE ($v5 : $v4)
+ *      UNNEST ($v4 :$v3)
+ *  } 
+ *  GROUPBY ($v2 : $v1]) decor ([]) {
+ *      AGGREGATE ($v3 : $v0) 
+ *  } 
+ *
+ * After
+ * 
+ * GROUPBY ($v2 : $v1]) decor ([]) {
+ *      AGGREGATE ($v5 : $v0) 
+ *  } 
+ *  
+ *  </pre>
+ */
+
+public class PushAggregateIntoGroupbyRule implements IAlgebraicRewriteRule {
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        Map<LogicalVariable, Integer> gbyAggVars = new HashMap<LogicalVariable, Integer>();
+        Map<LogicalVariable, Integer> gbyAggVarToPlanIndex = new HashMap<LogicalVariable, Integer>();
+        Map<LogicalVariable, GroupByOperator> gbyWithAgg = new HashMap<LogicalVariable, GroupByOperator>();
+        Map<ILogicalExpression, ILogicalExpression> aggExprToVarExpr = new HashMap<ILogicalExpression, ILogicalExpression>();
+        // first collect vars. referring to listified sequences
+        boolean changed = collectVarsBottomUp(opRef, context, gbyAggVars, gbyWithAgg, gbyAggVarToPlanIndex,
+                aggExprToVarExpr);
+        if (changed) {
+            removeRedundantListifies(opRef, context, gbyAggVars, gbyWithAgg, gbyAggVarToPlanIndex);
+        }
+        return changed;
+    }
+
+    private void removeRedundantListifies(Mutable<ILogicalOperator> opRef, IOptimizationContext context,
+            Map<LogicalVariable, Integer> gbyAggVars, Map<LogicalVariable, GroupByOperator> gbyWithAgg,
+            Map<LogicalVariable, Integer> gbyAggVarToPlanIndex) throws AlgebricksException {
+        for (LogicalVariable aggVar : gbyAggVars.keySet()) {
+            int occurs = gbyAggVars.get(aggVar);
+            if (occurs == 0) {
+                GroupByOperator gbyOp = gbyWithAgg.get(aggVar);
+                AggregateOperator aggOp = (AggregateOperator) gbyOp.getNestedPlans()
+                        .get(gbyAggVarToPlanIndex.get(aggVar)).getRoots().get(0).getValue();
+                int pos = aggOp.getVariables().indexOf(aggVar);
+                if (pos >= 0) {
+                    aggOp.getVariables().remove(pos);
+                    aggOp.getExpressions().remove(pos);
+                    List<LogicalVariable> producedVarsAtAgg = new ArrayList<LogicalVariable>();
+                    VariableUtilities.getProducedVariablesInDescendantsAndSelf(aggOp, producedVarsAtAgg);
+                    if (producedVarsAtAgg.isEmpty()) {
+                        gbyOp.getNestedPlans().remove(gbyAggVarToPlanIndex.get(aggVar));
+                    }
+                }
+            }
+        }
+    }
+
+    private boolean collectVarsBottomUp(Mutable<ILogicalOperator> opRef, IOptimizationContext context,
+            Map<LogicalVariable, Integer> gbyListifyVarsCount, Map<LogicalVariable, GroupByOperator> gbyWithAgg,
+            Map<LogicalVariable, Integer> gbyAggVarToPlanIndex,
+            Map<ILogicalExpression, ILogicalExpression> aggregateExprToVarExpr) throws AlgebricksException {
+        AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue();
+        context.addToDontApplySet(this, op1);
+        boolean change = false;
+        for (Mutable<ILogicalOperator> child : op1.getInputs()) {
+            if (collectVarsBottomUp(child, context, gbyListifyVarsCount, gbyWithAgg, gbyAggVarToPlanIndex,
+                    aggregateExprToVarExpr)) {
+                change = true;
+            }
+        }
+        Set<LogicalVariable> used = new HashSet<>();
+        VariableUtilities.getUsedVariables(op1, used);
+        switch (op1.getOperatorTag()) {
+            case ASSIGN:
+            case SELECT: {
+                boolean found = false;
+                // Do some prefiltering: check if the Assign uses any gby vars.
+                for (LogicalVariable v : used) {
+                    if (gbyListifyVarsCount.get(v) != null) {
+                        found = true;
+                        break;
+                    }
+                }
+                if (found) {
+                    if (op1.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+                        AssignOperator assign = (AssignOperator) op1;
+                        for (Mutable<ILogicalExpression> exprRef : assign.getExpressions()) {
+                            Pair<Boolean, ILogicalExpression> p = extractAggFunctionsFromExpression(exprRef, gbyWithAgg,
+                                    aggregateExprToVarExpr, context);
+                            if (p.first) {
+                                change = true;
+                                exprRef.setValue(p.second);
+                            }
+                        }
+                    }
+                    if (op1.getOperatorTag() == LogicalOperatorTag.SELECT) {
+                        SelectOperator select = (SelectOperator) op1;
+                        Mutable<ILogicalExpression> exprRef = select.getCondition();
+                        Pair<Boolean, ILogicalExpression> p = extractAggFunctionsFromExpression(exprRef, gbyWithAgg,
+                                aggregateExprToVarExpr, context);
+                        if (p.first) {
+                            change = true;
+                            exprRef.setValue(p.second);
+                        }
+                    }
+                    used.clear();
+                    VariableUtilities.getUsedVariables(op1, used);
+                    // increment the count for the ones which are still used
+                    for (LogicalVariable v : used) {
+                        Integer m = gbyListifyVarsCount.get(v);
+                        if (m != null) {
+                            gbyListifyVarsCount.put(v, m + 1);
+                        }
+                    }
+                }
+                break;
+            }
+            case SUBPLAN: {
+                for (LogicalVariable v : used) {
+                    Integer m = gbyListifyVarsCount.get(v);
+                    if (m != null) {
+                        GroupByOperator gbyOp = gbyWithAgg.get(v);
+                        if (pushSubplanAsAggIntoGby(opRef, gbyOp, v, gbyListifyVarsCount, gbyWithAgg,
+                                gbyAggVarToPlanIndex, context)) {
+                            change = true;
+                        } else {
+                            gbyListifyVarsCount.put(v, m + 1);
+                        }
+                    }
+                }
+                break;
+            }
+            case GROUP: {
+                List<LogicalVariable> vars = collectOneVarPerAggFromGroupOp((GroupByOperator) op1);
+                if (vars != null) {
+                    for (int i = 0; i < vars.size(); i++) {
+                        LogicalVariable v = vars.get(i);
+                        if (v != null) {
+                            gbyListifyVarsCount.put(v, 0);
+                            gbyAggVarToPlanIndex.put(v, i);
+                            gbyWithAgg.put(v, (GroupByOperator) op1);
+                        }
+                    }
+                }
+                break;
+            }
+            default: {
+                for (LogicalVariable v : used) {
+                    Integer m = gbyListifyVarsCount.get(v);
+                    if (m != null) {
+                        gbyListifyVarsCount.put(v, m + 1);
+                    }
+                }
+            }
+        }
+        return change;
+    }
+
+    private List<LogicalVariable> collectOneVarPerAggFromGroupOp(GroupByOperator group) {
+        List<ILogicalPlan> nPlans = group.getNestedPlans();
+        if (nPlans == null || nPlans.size() < 1) {
+            return null;
+        }
+
+        List<LogicalVariable> aggVars = new ArrayList<LogicalVariable>();
+        // test that the group-by computes a "listify" aggregate
+        for (int i = 0; i < nPlans.size(); i++) {
+            AbstractLogicalOperator topOp = (AbstractLogicalOperator) nPlans.get(i).getRoots().get(0).getValue();
+            if (topOp.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
+                continue;
+            }
+            AggregateOperator agg = (AggregateOperator) topOp;
+            if (agg.getVariables().size() != 1) {
+                continue;
+            }
+            ILogicalExpression expr = agg.getExpressions().get(0).getValue();
+            if (((AbstractLogicalExpression) expr).getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+                continue;
+            }
+            AbstractFunctionCallExpression fceAgg = (AbstractFunctionCallExpression) expr;
+            if (fceAgg.getFunctionIdentifier() != BuiltinOperators.SEQUENCE.getFunctionIdentifier()) {
+                continue;
+            }
+            aggVars.add(agg.getVariables().get(0));
+        }
+        return aggVars;
+    }
+
+    /**
+     * @param expr
+     * @param aggVars
+     * @param gbyWithAgg
+     * @param context
+     * @return a pair whose first member is a boolean which is true iff
+     *         something was changed in the expression tree rooted at expr. The
+     *         second member is the result of transforming expr.
+     * @throws AlgebricksException
+     */
+
+    private Pair<Boolean, ILogicalExpression> extractAggFunctionsFromExpression(Mutable<ILogicalExpression> exprRef,
+            Map<LogicalVariable, GroupByOperator> gbyWithAgg,
+            Map<ILogicalExpression, ILogicalExpression> aggregateExprToVarExpr, IOptimizationContext context)
+                    throws AlgebricksException {
+        ILogicalExpression expr = exprRef.getValue();
+        switch (expr.getExpressionTag()) {
+            case FUNCTION_CALL: {
+                AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) expr;
+                Function functionInfo = (Function) fce.getFunctionInfo();
+                FunctionIdentifier fi = null;
+                if (functionInfo.hasAggregateEvaluatorFactory()) {
+                    fi = functionInfo.getFunctionIdentifier();
+                } //FunctionIdentifier fi = functionInfo.getFunctionIdentifier();
+                if (fi != null) {
+                    ILogicalExpression a1 = fce.getArguments().get(0).getValue();
+                    if (a1.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+                        LogicalVariable argVar = ((VariableReferenceExpression) a1).getVariableReference();
+                        GroupByOperator gbyOp = gbyWithAgg.get(argVar);
+
+                        if (gbyOp != null) {
+                            if (!aggregateExprToVarExpr.containsKey(expr)) {
+                                LogicalVariable newVar = context.newVar();
+                                AggregateFunctionCallExpression aggFun = new AggregateFunctionCallExpression(
+                                        functionInfo, false, fce.getArguments());
+                                rewriteGroupByAggregate(argVar, gbyOp, aggFun, newVar, context);
+                                ILogicalExpression newVarExpr = new VariableReferenceExpression(newVar);
+                                aggregateExprToVarExpr.put(expr, newVarExpr);
+                                return new Pair<Boolean, ILogicalExpression>(Boolean.TRUE, newVarExpr);
+                            } else {
+                                ILogicalExpression varExpr = aggregateExprToVarExpr.get(expr);
+                                return new Pair<Boolean, ILogicalExpression>(Boolean.TRUE, varExpr);
+                            }
+                        }
+                    }
+                }
+
+                boolean change = false;
+                for (Mutable<ILogicalExpression> a : fce.getArguments()) {
+                    Pair<Boolean, ILogicalExpression> aggArg = extractAggFunctionsFromExpression(a, gbyWithAgg,
+                            aggregateExprToVarExpr, context);
+                    if (aggArg.first.booleanValue()) {
+                        a.setValue(aggArg.second);
+                        change = true;
+                    }
+                }
+                return new Pair<Boolean, ILogicalExpression>(change, fce);
+
+            }
+            case VARIABLE:
+            case CONSTANT: {
+                return new Pair<Boolean, ILogicalExpression>(Boolean.FALSE, expr);
+            }
+            default: {
+                throw new IllegalArgumentException();
+            }
+        }
+    }
+
+    private void rewriteGroupByAggregate(LogicalVariable oldAggVar, GroupByOperator gbyOp,
+            AggregateFunctionCallExpression aggFun, LogicalVariable newAggVar, IOptimizationContext context)
+                    throws AlgebricksException {
+        for (int j = 0; j < gbyOp.getNestedPlans().size(); j++) {
+            AggregateOperator aggOp = (AggregateOperator) gbyOp.getNestedPlans().get(j).getRoots().get(0).getValue();
+            int n = aggOp.getVariables().size();
+            for (int i = 0; i < n; i++) {
+                LogicalVariable v = aggOp.getVariables().get(i);
+                if (v.equals(oldAggVar)) {
+                    AbstractFunctionCallExpression oldAggExpr = (AbstractFunctionCallExpression) aggOp.getExpressions()
+                            .get(i).getValue();
+                    AggregateFunctionCallExpression newAggFun = new AggregateFunctionCallExpression(
+                            aggFun.getFunctionInfo(), false, new ArrayList<Mutable<ILogicalExpression>>());
+                    for (Mutable<ILogicalExpression> arg : oldAggExpr.getArguments()) {
+                        ILogicalExpression cloned = ((AbstractLogicalExpression) arg.getValue()).cloneExpression();
+                        newAggFun.getArguments().add(new MutableObject<ILogicalExpression>(cloned));
+                    }
+                    aggOp.getVariables().add(newAggVar);
+                    aggOp.getExpressions().add(new MutableObject<ILogicalExpression>(newAggFun));
+                    context.computeAndSetTypeEnvironmentForOperator(aggOp);
+                    break;
+                }
+            }
+        }
+    }
+
+    private boolean pushSubplanAsAggIntoGby(Mutable<ILogicalOperator> subplanOpRef, GroupByOperator gbyOp,
+            LogicalVariable varFromGroupAgg, Map<LogicalVariable, Integer> gbyAggVars,
+            Map<LogicalVariable, GroupByOperator> gbyWithAgg, Map<LogicalVariable, Integer> gbyAggVarToPlanIndex,
+            IOptimizationContext context) throws AlgebricksException {
+        SubplanOperator subplan = (SubplanOperator) subplanOpRef.getValue();
+        // only free var can be varFromGroupAgg
+        HashSet<LogicalVariable> freeVars = new HashSet<LogicalVariable>();
+        OperatorPropertiesUtil.getFreeVariablesInSubplans(subplan, freeVars);
+        for (LogicalVariable vFree : freeVars) {
+            if (!vFree.equals(varFromGroupAgg)) {
+                return false;
+            }
+        }
+
+        List<ILogicalPlan> plans = subplan.getNestedPlans();
+        if (plans.size() > 1) {
+            return false;
+        }
+        ILogicalPlan p = plans.get(0);
+        if (p.getRoots().size() > 1) {
+            return false;
+        }
+        Mutable<ILogicalOperator> opRef = p.getRoots().get(0);
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        if (op.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
+            return false;
+        }
+        AggregateOperator aggInSubplanOp = (AggregateOperator) op;
+        LogicalVariable unnestVar = null;
+        boolean pushableNestedSubplan = false;
+        while (op.getInputs().size() == 1) {
+            opRef = op.getInputs().get(0);
+            op = (AbstractLogicalOperator) opRef.getValue();
+            switch (op.getOperatorTag()) {
+                case ASSIGN: {
+                    break;
+                }
+                case UNNEST: {
+                    UnnestOperator unnest = (UnnestOperator) op;
+                    if (unnest.getPositionalVariable() != null) {
+                        // TODO currently subplan with both accumulating and running aggregate is not supported.
+                        return false;
+                    }
+                    ILogicalExpression expr = unnest.getExpressionRef().getValue();
+                    if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+                        return false;
+                    }
+                    AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) expr;
+                    if (fun.getFunctionIdentifier() != BuiltinOperators.ITERATE.getFunctionIdentifier()) {
+                        return false;
+                    }
+                    ILogicalExpression arg0 = fun.getArguments().get(0).getValue();
+                    if (arg0.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+                        return false;
+                    }
+                    VariableReferenceExpression varExpr = (VariableReferenceExpression) arg0;
+                    if (!varExpr.getVariableReference().equals(varFromGroupAgg)) {
+                        return false;
+                    }
+                    opRef = op.getInputs().get(0);
+                    op = (AbstractLogicalOperator) opRef.getValue();
+                    if (op.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) {
+                        return false;
+                    }
+                    pushableNestedSubplan = true;
+                    unnestVar = unnest.getVariable();
+                    break;
+                }
+                default: {
+                    return false;
+                }
+            }
+        }
+        if (pushableNestedSubplan) {
+            for (int i = 0; i < gbyOp.getNestedPlans().size(); i++) {
+                Mutable<ILogicalOperator> gbyAggRef = gbyOp.getNestedPlans().get(i).getRoots().get(0);
+                AggregateOperator gbyAgg = (AggregateOperator) gbyAggRef.getValue();
+                Mutable<ILogicalOperator> gbyAggChildRef = gbyAgg.getInputs().get(0);
+                LogicalVariable listifyVar = findListifiedVariable(gbyAgg, varFromGroupAgg);
+                if (listifyVar == null) {
+                    continue;
+                }
+                OperatorManipulationUtil.substituteVarRec(aggInSubplanOp, unnestVar, listifyVar, true, context);
+                gbyAgg.getVariables().addAll(aggInSubplanOp.getVariables());
+                gbyAgg.getExpressions().addAll(aggInSubplanOp.getExpressions());
+                for (LogicalVariable v : aggInSubplanOp.getVariables()) {
+                    gbyWithAgg.put(v, gbyOp);
+                    gbyAggVars.put(v, 0);
+                    gbyAggVarToPlanIndex.put(v, i);
+                }
+
+                Mutable<ILogicalOperator> opRef1InSubplan = aggInSubplanOp.getInputs().get(0);
+                if (opRef1InSubplan.getValue().getInputs().size() > 0) {
+                    Mutable<ILogicalOperator> opRef2InSubplan = opRef1InSubplan.getValue().getInputs().get(0);
+                    AbstractLogicalOperator op2InSubplan = (AbstractLogicalOperator) opRef2InSubplan.getValue();
+                    if (op2InSubplan.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) {
+                        List<Mutable<ILogicalOperator>> gbyInpList = gbyAgg.getInputs();
+                        gbyInpList.clear();
+                        gbyInpList.add(opRef1InSubplan);
+                        while (true) {
+                            opRef2InSubplan = opRef1InSubplan.getValue().getInputs().get(0);
+                            op2InSubplan = (AbstractLogicalOperator) opRef2InSubplan.getValue();
+                            if (op2InSubplan.getOperatorTag() == LogicalOperatorTag.UNNEST) {
+                                List<Mutable<ILogicalOperator>> opInpList = opRef1InSubplan.getValue().getInputs();
+                                opInpList.clear();
+                                opInpList.add(gbyAggChildRef);
+                                break;
+                            }
+                            opRef1InSubplan = opRef2InSubplan;
+                            if (opRef1InSubplan.getValue().getInputs().size() == 0) {
+                                throw new IllegalStateException("PushAggregateIntoGroupbyRule: could not find UNNEST.");
+                            }
+                        }
+                    }
+                }
+                subplanOpRef.setValue(subplan.getInputs().get(0).getValue());
+                OperatorPropertiesUtil.typeOpRec(gbyAggRef, context);
+            }
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    private LogicalVariable findListifiedVariable(AggregateOperator gbyAgg, LogicalVariable varFromGroupAgg) {
+        int n = gbyAgg.getVariables().size();
+
+        for (int i = 0; i < n; i++) {
+            if (gbyAgg.getVariables().get(i).equals(varFromGroupAgg)) {
+                AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) gbyAgg.getExpressions().get(i)
+                        .getValue();
+                if (fce.getFunctionIdentifier().equals(BuiltinOperators.SEQUENCE.getFunctionIdentifier())) {
+                    ILogicalExpression argExpr = fce.getArguments().get(0).getValue();
+                    if (((AbstractLogicalExpression) argExpr).getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+                        return ((VariableReferenceExpression) argExpr).getVariableReference();
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+}
index 6060c19..dbcce54 100644 (file)
@@ -20,22 +20,11 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
-import org.apache.vxquery.compiler.rewriter.VXQueryOptimizationContext;
 import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
-import org.apache.vxquery.context.StaticContext;
 import org.apache.vxquery.functions.BuiltinOperators;
-import org.apache.vxquery.metadata.VXQueryCollectionDataSource;
-import org.apache.vxquery.metadata.VXQueryIndexingDataSource;
-import org.apache.vxquery.metadata.VXQueryMetadataProvider;
+import org.apache.vxquery.metadata.IVXQueryDataSource;
 import org.apache.vxquery.types.ElementType;
 
 /**
@@ -55,82 +44,42 @@ import org.apache.vxquery.types.ElementType;
  * After
  *
  *   plan__parent
+ *   ASSIGN( $v2 : $v1 ) 
  *   DATASCAN( $source : $v1 )
  *   plan__child
  *
  *   $source is encoded with the child parameters.
  * </pre>
- *
- * @author prestonc
  */
-public class PushChildIntoDataScanRule extends AbstractUsedVariablesProcessingRule {
-    StaticContext dCtx = null;
-    final int ARG_DATA = 0;
-    final int ARG_TYPE = 1;
-
-    protected boolean processOperator(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
-            throws AlgebricksException {
-        if (dCtx == null) {
-            VXQueryOptimizationContext vxqueryCtx = (VXQueryOptimizationContext) context;
-            dCtx = ((VXQueryMetadataProvider) vxqueryCtx.getMetadataProvider()).getStaticContext();
-        }
-        AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue();
-        if (op1.getOperatorTag() != LogicalOperatorTag.UNNEST) {
-            return false;
-        }
-        UnnestOperator unnest = (UnnestOperator) op1;
+public class PushChildIntoDataScanRule extends AbstractPushExpressionIntoDatascanRule {
 
-        AbstractLogicalOperator op2 = (AbstractLogicalOperator) unnest.getInputs().get(0).getValue();
-        if (op2.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) {
+    @Override
+    boolean updateDataSource(IVXQueryDataSource datasource, Mutable<ILogicalExpression> expression) {
+        //TODO: indexing needs to be extended to support push child into datascan
+        if (datasource.usingIndex()) {
             return false;
         }
-        DataSourceScanOperator datascan = (DataSourceScanOperator) op2;
-
-        if (!usedVariables.contains(datascan.getVariables())) {
-            VXQueryCollectionDataSource ds = null;
-            VXQueryIndexingDataSource ids = null;
-
-            // Find all child functions.
-            try {
-                ids = (VXQueryIndexingDataSource) datascan.getDataSource();
-            } catch (ClassCastException e) {
-                ds = (VXQueryCollectionDataSource) datascan.getDataSource();
-            }
-
-            if (!updateDataSource(ds, unnest.getExpressionRef())) {
-                return false;
-            }
-
-            // Replace unnest with noop assign. Keeps variable chain.
-            Mutable<ILogicalExpression> varExp = ExpressionToolbox.findVariableExpression(unnest.getExpressionRef(),
-                    datascan.getVariables().get(0));
-            AssignOperator noOp = new AssignOperator(unnest.getVariable(), varExp);
-            noOp.getInputs().addAll(unnest.getInputs());
-            opRef.setValue(noOp);
-            return true;
-        }
-        return false;
-    }
-
-    /**
-     * In reverse add them to the data source.
-     *
-     * @param ds
-     * @param expression
-     */
-    private boolean updateDataSource(VXQueryCollectionDataSource ds, Mutable<ILogicalExpression> expression) {
         boolean added = false;
         List<Mutable<ILogicalExpression>> finds = new ArrayList<Mutable<ILogicalExpression>>();
         ExpressionToolbox.findAllFunctionExpressions(expression, BuiltinOperators.CHILD.getFunctionIdentifier(), finds);
         for (int i = finds.size(); i > 0; --i) {
             int typeId = ExpressionToolbox.getTypeExpressionTypeArgument(finds.get(i - 1));
             if (typeId > 0) {
-                if (dCtx.lookupSequenceType(typeId).getItemType().equals(ElementType.ANYELEMENT) && typeId > 0) {
-                    ds.addChildSeq(typeId);
+                ElementType it = (ElementType) dCtx.lookupSequenceType(typeId).getItemType();
+                ElementType et = ElementType.ANYELEMENT;
+
+                if (it.getContentType().equals(et.getContentType())) {
+                    datasource.addChildSeq(typeId);
                     added = true;
                 }
             }
         }
         return added;
     }
+
+    @Override
+    LogicalOperatorTag getOperator() {
+        return LogicalOperatorTag.UNNEST;
+    }
+
 }
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushKeysOrMembersIntoDatascanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushKeysOrMembersIntoDatascanRule.java
new file mode 100644 (file)
index 0000000..41b6401
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.data.std.primitive.BooleanPointable;
+import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
+import org.apache.vxquery.datamodel.values.XDMConstants;
+import org.apache.vxquery.functions.BuiltinOperators;
+import org.apache.vxquery.metadata.AbstractVXQueryDataSource;
+import org.apache.vxquery.metadata.IVXQueryDataSource;
+
+/**
+ * The rule searches for an unnest operator immediately following a data scan
+ * operator.
+ *
+ * <pre>
+ * Before
+ *
+ *   plan__parent
+ *   UNNEST( $v2 : keys-or-members( $v1 ) )
+ *   DATASCAN( $source : $v1 )
+ *   plan__child
+ *
+ *   Where $v1 is not used in plan__parent.
+ *
+ * After
+ *
+ *   plan__parent
+ *   ASSIGN( $v2 : $v1 ) 
+ *   DATASCAN( $source : $v1 )
+ *   plan__child
+ *
+ *   $source is encoded with the child parameters.
+ * </pre>
+ */
+public class PushKeysOrMembersIntoDatascanRule extends AbstractPushExpressionIntoDatascanRule {
+
+    @Override
+    boolean updateDataSource(IVXQueryDataSource datasource, Mutable<ILogicalExpression> expression) {
+        AbstractVXQueryDataSource ds = (AbstractVXQueryDataSource) datasource;
+        boolean added = false;
+        BooleanPointable bp = (BooleanPointable) BooleanPointable.FACTORY.createPointable();
+        List<Mutable<ILogicalExpression>> findkeys = new ArrayList<Mutable<ILogicalExpression>>();
+        ExpressionToolbox.findAllFunctionExpressions(expression,
+                BuiltinOperators.KEYS_OR_MEMBERS.getFunctionIdentifier(), findkeys);
+        for (int i = findkeys.size(); i > 0; --i) {
+            XDMConstants.setTrue(bp);
+            ds.addValueSeq(ArrayUtils.toObject(bp.getByteArray()));
+            added = true;
+        }
+        return added;
+    }
+
+    @Override
+    LogicalOperatorTag getOperator() {
+        return LogicalOperatorTag.UNNEST;
+    }
+}
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushValueIntoDatascanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushValueIntoDatascanRule.java
new file mode 100644 (file)
index 0000000..1d8a55d
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.compiler.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox;
+import org.apache.vxquery.functions.BuiltinFunctions;
+import org.apache.vxquery.functions.BuiltinOperators;
+import org.apache.vxquery.metadata.IVXQueryDataSource;
+import org.apache.vxquery.metadata.VXQueryCollectionDataSource;
+
+/**
+ * The rule searches for an assign operator immediately following a data scan
+ * operator.
+ *
+ * <pre>
+ * Before
+ *
+ *   plan__parent
+ *   ASSIGN( $v2 : value( $v1, constant) )
+ *   DATASCAN( $source : $v1 )
+ *   plan__child
+ *
+ *   Where $v1 is not used in plan__parent.
+ *
+ * After
+ *
+ *   plan__parent
+ *   ASSIGN( $v2 : $v1 )
+ *   DATASCAN( $source : $v1 )
+ *   plan__child
+ *
+ *   $source is encoded with the value parameters.
+ * </pre>
+ */
+
+public class PushValueIntoDatascanRule extends AbstractPushExpressionIntoDatascanRule {
+
+    @Override
+    boolean updateDataSource(IVXQueryDataSource datasource, Mutable<ILogicalExpression> expression) {
+        VXQueryCollectionDataSource ds = (VXQueryCollectionDataSource) datasource;
+        boolean added = false;
+        List<Mutable<ILogicalExpression>> finds = new ArrayList<Mutable<ILogicalExpression>>();
+        ILogicalExpression le = expression.getValue();
+        if (le.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+            AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression) le;
+            if (afce.getFunctionIdentifier().equals(BuiltinFunctions.FN_ZERO_OR_ONE_1.getFunctionIdentifier())) {
+                return false;
+            }
+        }
+        ExpressionToolbox.findAllFunctionExpressions(expression, BuiltinOperators.VALUE.getFunctionIdentifier(), finds);
+
+        for (int i = finds.size(); i > 0; --i) {
+            Byte[] value = null;
+            List<ILogicalExpression> values = ExpressionToolbox.getFullArguments(finds.get(i - 1));
+            if (values.size() > 1) {
+                value = ExpressionToolbox.getConstantArgument(finds.get(i - 1), 1);
+                ds.addValueSeq(value);
+                added = true;
+            }
+        }
+
+        return added;
+    }
+
+    @Override
+    LogicalOperatorTag getOperator() {
+        return LogicalOperatorTag.ASSIGN;
+    }
+
+}
index 69940ad..d86de98 100644 (file)
@@ -41,6 +41,7 @@ import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceE
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
@@ -94,12 +95,14 @@ import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
 public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule {
 
     @Override
-    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
         return false;
     }
 
     @Override
-    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
         boolean operatorChanged = false;
         // Do not process empty or nested tuple source.
         AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
@@ -151,7 +154,9 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 }
             }
         }
-
+        if (operatorChanged) {
+            context.computeAndSetTypeEnvironmentForOperator(op);
+        }
         // Now with the new operator, update the variable mappings.
         cardinalityVariable = CardinalityRuleToolbox.updateCardinalityVariable(op, cardinalityVariable, vxqueryContext);
         updateVariableMap(op, cardinalityVariable, documentOrderVariables, uniqueNodesVariables, vxqueryContext);
@@ -178,8 +183,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
             return 0;
         }
         AbstractFunctionCallExpression functionCall = (AbstractFunctionCallExpression) logicalExpression;
-        if (!functionCall.getFunctionIdentifier().equals(
-                BuiltinOperators.SORT_DISTINCT_NODES_ASC_OR_ATOMICS.getFunctionIdentifier())) {
+        if (!functionCall.getFunctionIdentifier()
+                .equals(BuiltinOperators.SORT_DISTINCT_NODES_ASC_OR_ATOMICS.getFunctionIdentifier())) {
             return 0;
         }
 
@@ -314,7 +319,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
      * @param uniqueNodesVariables
      * @param uniqueNodes
      */
-    private void resetUniqueNodesVariables(HashMap<Integer, UniqueNodes> uniqueNodesVariables, UniqueNodes uniqueNodes) {
+    private void resetUniqueNodesVariables(HashMap<Integer, UniqueNodes> uniqueNodesVariables,
+            UniqueNodes uniqueNodes) {
         for (Entry<Integer, UniqueNodes> entry : uniqueNodesVariables.entrySet()) {
             uniqueNodesVariables.put(entry.getKey(), uniqueNodes);
         }
@@ -349,8 +355,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
             case ASSIGN:
                 AssignOperator assign = (AssignOperator) op;
                 for (int index = 0; index < assign.getExpressions().size(); index++) {
-                    ILogicalExpression assignLogicalExpression = (ILogicalExpression) assign.getExpressions()
-                            .get(index).getValue();
+                    ILogicalExpression assignLogicalExpression = (ILogicalExpression) assign.getExpressions().get(index)
+                            .getValue();
                     variableId = assign.getVariables().get(index).getId();
                     documentOrder = propagateDocumentOrder(assignLogicalExpression, documentOrderVariablesForOperator);
                     uniqueNodes = propagateUniqueNodes(assignLogicalExpression, uniqueNodesVariablesForOperator);
@@ -384,8 +390,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 // Find the last operator to set a variable and call this function again.
                 SubplanOperator subplan = (SubplanOperator) op;
                 for (int index = 0; index < subplan.getNestedPlans().size(); index++) {
-                    AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans()
-                            .get(index).getRoots().get(0).getValue();
+                    AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans().get(index)
+                            .getRoots().get(0).getValue();
                     updateVariableMap(lastOperator, cardinalityVariable, documentOrderVariables, uniqueNodesVariables,
                             vxqueryContext);
                 }
@@ -395,8 +401,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 UnnestOperator unnest = (UnnestOperator) op;
                 ILogicalExpression unnestLogicalExpression = (ILogicalExpression) unnest.getExpressionRef().getValue();
                 variableId = unnest.getVariables().get(0).getId();
-                Cardinality inputCardinality = vxqueryContext.getCardinalityOperatorMap(op.getInputs().get(0)
-                        .getValue());
+                Cardinality inputCardinality = vxqueryContext
+                        .getCardinalityOperatorMap(op.getInputs().get(0).getValue());
                 documentOrder = propagateDocumentOrder(unnestLogicalExpression, documentOrderVariablesForOperator);
                 uniqueNodes = propagateUniqueNodes(unnestLogicalExpression, uniqueNodesVariablesForOperator);
 
@@ -425,10 +431,12 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
                 break;
 
             // The following operators do not change or add to the variable map.
+
             case DATASOURCESCAN:
             case DISTRIBUTE_RESULT:
             case EMPTYTUPLESOURCE:
             case EXCHANGE:
+            case GROUP:
             case NESTEDTUPLESOURCE:
             case PROJECT:
             case SELECT:
@@ -438,8 +446,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule
 
             // The following operators' analysis has not yet been implemented.
             default:
-                throw new RuntimeException("Operator (" + op.getOperatorTag()
-                        + ") has not been implemented in rewrite rule.");
+                throw new RuntimeException(
+                        "Operator (" + op.getOperatorTag() + ") has not been implemented in rewrite rule.");
         }
     }
 
index 82be94c..3fb2696 100644 (file)
@@ -28,11 +28,13 @@ import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
 
 /**
  * Set the default context for the variable id in the optimization context.
+ * 
  * @author prestonc
  */
 public class SetVariableIdContextRule implements IAlgebraicRewriteRule {
     @Override
-    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
         if (context.checkIfInDontApplySet(this, opRef.getValue())) {
             return false;
         }
@@ -44,7 +46,9 @@ public class SetVariableIdContextRule implements IAlgebraicRewriteRule {
             case ASSIGN:
             case AGGREGATE:
                 AbstractAssignOperator assign = (AbstractAssignOperator) op;
-                variableId = assign.getVariables().get(0).getId();
+                if (assign.getVariables().size() > 0) {
+                    variableId = assign.getVariables().get(0).getId();
+                }
                 break;
             case UNNEST:
                 UnnestOperator unnest = (UnnestOperator) op;
@@ -62,7 +66,8 @@ public class SetVariableIdContextRule implements IAlgebraicRewriteRule {
     }
 
     @Override
-    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
         return false;
     }
 }
index 0dd3b31..50bc07e 100644 (file)
  */
 package org.apache.vxquery.compiler.rewriter.rules.util;
 
+import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.vxquery.compiler.algebricks.VXQueryConstantValue;
 import org.apache.vxquery.context.StaticContext;
@@ -211,6 +213,27 @@ public class ExpressionToolbox {
         return pTypeCode.getInteger();
     }
 
+    public static Byte[] getConstantArgument(Mutable<ILogicalExpression> searchM, int arg) {
+        AbstractFunctionCallExpression searchFunction = (AbstractFunctionCallExpression) searchM.getValue();
+        ILogicalExpression argType = searchFunction.getArguments().get(arg).getValue();
+        searchFunction.getArguments().size();
+        if (argType.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
+            return null;
+        }
+        TaggedValuePointable tvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+        ExpressionToolbox.getConstantAsPointable((ConstantExpression) argType, tvp);
+        return ArrayUtils.toObject(tvp.getByteArray());
+    }
+
+    public static List<ILogicalExpression> getFullArguments(Mutable<ILogicalExpression> searchM) {
+        AbstractFunctionCallExpression searchFunction = (AbstractFunctionCallExpression) searchM.getValue();
+        ArrayList<ILogicalExpression> args = new ArrayList<ILogicalExpression>();
+        for (int i = 0; i < searchFunction.getArguments().size(); i++) {
+            args.add(searchFunction.getArguments().get(i).getValue());
+        }
+        return args;
+    }
+
     public static SequenceType getTypeExpressionTypeArgument(Mutable<ILogicalExpression> searchM, StaticContext dCtx) {
         int typeId = getTypeExpressionTypeArgument(searchM);
         if (typeId > 0) {
index 2c57c32..94040d2 100644 (file)
@@ -20,7 +20,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.lang3.mutable.Mutable;
-
+import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
index a3f7bf8..886229d 100644 (file)
@@ -51,6 +51,11 @@ public class SystemException extends HyracksDataException {
         super(message(code, loc));
         this.code = code;
     }
+    
+    public SystemException(ErrorCode code, SourceLocation loc, Throwable cause) {
+        super(message(code, loc), cause);
+        this.code = code;
+    }
 
     public ErrorCode getCode() {
         return code;
index 0b03c34..ec27864 100644 (file)
         <param name="expr" type="json-item()"/>
         <return type="xs:string*"/>
         <runtime type="scalar" class="org.apache.vxquery.runtime.functions.json.KeysOrMembersScalarEvaluatorFactory"/>
+        <runtime type="unnesting" class="org.apache.vxquery.runtime.functions.json.KeysOrMembersUnnestingEvaluatorFactory"/>
     </operator>
 
     <!-- opext:subtract($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:anyAtomicType? -->
index ad8db4e..edf8dbc 100644 (file)
@@ -1,43 +1,54 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.vxquery.jsonparser;
 
+import java.io.ByteArrayOutputStream;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.io.Reader;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.htrace.fasterxml.jackson.core.JsonFactory;
+import org.apache.htrace.fasterxml.jackson.core.JsonParser;
+import org.apache.htrace.fasterxml.jackson.core.JsonToken;
+import org.apache.hyracks.api.comm.IFrameFieldAppender;
+import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.BooleanPointable;
 import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
 import org.apache.vxquery.datamodel.builders.atomic.StringValueBuilder;
 import org.apache.vxquery.datamodel.builders.jsonitem.ArrayBuilder;
 import org.apache.vxquery.datamodel.builders.jsonitem.ObjectBuilder;
 import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
 import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.datamodel.values.XDMConstants;
 import org.apache.vxquery.xmlparser.IParser;
 
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonToken;
-
 public class JSONParser implements IParser {
-
     final JsonFactory factory;
+    final List<Byte[]> valueSeq;
     protected final ArrayBackedValueStorage atomic;
+    private TaggedValuePointable tvp;
+    private BooleanPointable bp;
     protected final List<ArrayBuilder> abStack;
     protected final List<ObjectBuilder> obStack;
     protected final List<ArrayBackedValueStorage> abvsStack;
@@ -48,6 +59,16 @@ public class JSONParser implements IParser {
     protected final DataOutput out;
     protected itemType checkItem;
     protected int levelArray, levelObject;
+    protected final List<Byte[]> allKeys;
+    protected ByteArrayOutputStream outputStream, prefixStream, pathStream;
+    protected int objectMatchLevel;
+    protected int arrayMatchLevel;
+    protected boolean matched, literal;
+    protected ArrayBackedValueStorage tempABVS;
+    protected List<Integer> arrayCounters;
+    protected List<Boolean> keysOrMembers;
+    protected IFrameWriter writer;
+    protected IFrameFieldAppender appender;
 
     enum itemType {
         ARRAY,
@@ -57,8 +78,14 @@ public class JSONParser implements IParser {
     protected final List<itemType> itemStack;
 
     public JSONParser() {
+        this(null);
+    }
+
+    public JSONParser(List<Byte[]> valueSeq) {
         factory = new JsonFactory();
+        this.valueSeq = valueSeq;
         atomic = new ArrayBackedValueStorage();
+        tvp = new TaggedValuePointable();
         abStack = new ArrayList<ArrayBuilder>();
         obStack = new ArrayList<ObjectBuilder>();
         abvsStack = new ArrayList<ArrayBackedValueStorage>();
@@ -67,9 +94,59 @@ public class JSONParser implements IParser {
         itemStack = new ArrayList<itemType>();
         svb = new StringValueBuilder();
         sb = new SequenceBuilder();
+        bp = new BooleanPointable();
+        allKeys = new ArrayList<Byte[]>();
         abvsStack.add(atomic);
         out = abvsStack.get(abvsStack.size() - 1).getDataOutput();
+        tempABVS = new ArrayBackedValueStorage();
+        this.objectMatchLevel = 1;
+        this.arrayMatchLevel = 0;
+        matched = false;
+        literal = false;
+        arrayCounters = new ArrayList<Integer>();
+        outputStream = new ByteArrayOutputStream();
+        prefixStream = new ByteArrayOutputStream();
+        pathStream = new ByteArrayOutputStream();
+        this.keysOrMembers = new ArrayList<Boolean>();
+        outputStream.reset();
+        pathStream.reset();
+        if (valueSeq != null) {
+            for (int i = 0; i < this.valueSeq.size(); i++) {
+                tvp.set(ArrayUtils.toPrimitive(valueSeq.get(i)), 0, ArrayUtils.toPrimitive(valueSeq.get(i)).length);
+                //access an item of an array
+                if (tvp.getTag() == ValueTag.XS_INTEGER_TAG) {
+                    pathStream.write(tvp.getByteArray(), 0, tvp.getLength());
+                    this.arrayMatchLevel++;
+                    this.keysOrMembers.add(Boolean.valueOf(true));
+                    //access all the items of an array or
+                    //all the keys of an object
+                } else if (tvp.getTag() == ValueTag.XS_BOOLEAN_TAG) {
+                    pathStream.write(tvp.getByteArray(), 0, tvp.getLength());
+                    this.arrayMatchLevel++;
+                    this.keysOrMembers.add(Boolean.valueOf(false));
+                    //access an object 
+                } else {
+                    pathStream.write(tvp.getByteArray(), 1, tvp.getLength() - 1);
+                }
+            }
+        }
+    }
 
+    Byte[] toBytes(Integer v) {
+        Byte[] barr = ArrayUtils.toObject(ByteBuffer.allocate(9).putLong(1, v).array());
+        barr[0] = ValueTag.XS_INTEGER_TAG;
+        return barr;
+    }
+
+    public int parse(Reader input, ArrayBackedValueStorage result, IFrameWriter writer, IFrameFieldAppender appender)
+            throws HyracksDataException {
+        this.writer = writer;
+        this.appender = appender;
+        if (this.valueSeq != null) {
+            return parseElements(input, result);
+        } else {
+            return parse(input, result);
+        }
     }
 
     public int parse(Reader input, ArrayBackedValueStorage result) throws HyracksDataException {
@@ -79,7 +156,6 @@ public class JSONParser implements IParser {
             JsonParser parser = factory.createParser(input);
             JsonToken token = parser.nextToken();
             checkItem = null;
-
             levelArray = 0;
             levelObject = 0;
             sb.reset(result);
@@ -89,47 +165,22 @@ public class JSONParser implements IParser {
                 }
                 switch (token) {
                     case START_ARRAY:
-                        levelArray++;
-                        if (levelArray > abStack.size()) {
-                            abStack.add(new ArrayBuilder());
-                        }
-                        if (levelArray + levelObject > abvsStack.size() - 1) {
-                            abvsStack.add(new ArrayBackedValueStorage());
-                        }
-                        itemStack.add(itemType.ARRAY);
-                        abvsStack.get(levelArray + levelObject).reset();
-                        abStack.get(levelArray - 1).reset(abvsStack.get(levelArray + levelObject));
+                        startArray();
                         break;
                     case START_OBJECT:
-                        levelObject++;
-                        if (levelObject > obStack.size()) {
-                            obStack.add(new ObjectBuilder());
-                        }
-                        if (levelArray + levelObject > abvsStack.size() - 1) {
-                            abvsStack.add(new ArrayBackedValueStorage());
-                        }
-                        itemStack.add(itemType.OBJECT);
-                        abvsStack.get(levelArray + levelObject).reset();
-                        obStack.get(levelObject - 1).reset(abvsStack.get(levelArray + levelObject));
+                        startObject();
                         break;
                     case FIELD_NAME:
-                        if (levelObject > spStack.size()) {
-                            keyStack.add(new ArrayBackedValueStorage());
-                            spStack.add(new UTF8StringPointable());
-                        }
-                        keyStack.get(levelObject - 1).reset();
-                        DataOutput outk = keyStack.get(levelObject - 1).getDataOutput();
-                        svb.write(parser.getText(), outk);
-                        spStack.get(levelObject - 1).set(keyStack.get(levelObject - 1));
+                        startFieldName(parser);
                         break;
                     case VALUE_NUMBER_INT:
-                        atomicValues(ValueTag.XS_INTEGER_TAG, parser, out, svb, levelArray, levelObject);
+                        startAtomicValues(ValueTag.XS_INTEGER_TAG, parser);
                         break;
                     case VALUE_STRING:
-                        atomicValues(ValueTag.XS_STRING_TAG, parser, out, svb, levelArray, levelObject);
+                        startAtomicValues(ValueTag.XS_STRING_TAG, parser);
                         break;
                     case VALUE_NUMBER_FLOAT:
-                        atomicValues(ValueTag.XS_DOUBLE_TAG, parser, out, svb, levelArray, levelObject);
+                        startAtomicValues(ValueTag.XS_DOUBLE_TAG, parser);
                         break;
                     case END_ARRAY:
                         abStack.get(levelArray - 1).finish();
@@ -173,11 +224,160 @@ public class JSONParser implements IParser {
             sb.finish();
             outResult.write(result.getByteArray());
         } catch (Exception e) {
-            throw new HyracksDataException(e.toString());
+            throw new HyracksDataException("Accessing or writing in out of bounds space", e);
+        }
+        return items;
+    }
+
+    public int parseElements(Reader input, ArrayBackedValueStorage result) throws HyracksDataException {
+        int items = 0;
+        try {
+            JsonParser parser = factory.createParser(input);
+            JsonToken token = parser.nextToken();
+            checkItem = null;
+
+            this.objectMatchLevel = 0;
+            this.matched = false;
+
+            levelArray = 0;
+            levelObject = 0;
+            sb.reset(result);
+            while (token != null) {
+                if (itemStack.size() > 1) {
+                    checkItem = itemStack.get(itemStack.size() - 2);
+                }
+                switch (token) {
+                    case START_ARRAY:
+                        startArray();
+                        break;
+                    case START_OBJECT:
+                        startObject();
+                        break;
+                    case FIELD_NAME:
+                        startFieldName(parser);
+                        break;
+                    case VALUE_NUMBER_INT:
+                        startAtomicValues(ValueTag.XS_INTEGER_TAG, parser);
+                        break;
+                    case VALUE_STRING:
+                        startAtomicValues(ValueTag.XS_STRING_TAG, parser);
+                        break;
+                    case VALUE_NUMBER_FLOAT:
+                        startAtomicValues(ValueTag.XS_DOUBLE_TAG, parser);
+                        break;
+                    case END_ARRAY:
+                        //if the query doesn't ask for an atomic value
+                        if (!this.literal && this.pathMatch()) {
+                            //check if the path asked from the query includes the current path 
+                            abStack.get(levelArray - 1).finish();
+                            if (itemStack.size() > 1) {
+                                if (checkItem == itemType.ARRAY) {
+                                    if (levelArray > this.arrayMatchLevel + 1) {
+                                        abStack.get(levelArray - 2).addItem(abvsStack.get(levelArray + levelObject));
+                                    } else if (this.matched) {
+                                        this.matched = false;
+                                        items++;
+                                        writeElement(abvsStack.get(levelArray + levelObject));
+                                    }
+                                } else if (checkItem == itemType.OBJECT) {
+                                    if (levelArray > this.arrayMatchLevel && !this.matched) {
+                                        obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1),
+                                                abvsStack.get(levelArray + levelObject));
+                                    } else if (this.matched) {
+                                        writeElement(abvsStack.get(levelArray + levelObject));
+                                        this.matched = false;
+                                        items++;
+                                    }
+                                }
+                            }
+                        }
+                        if (allKeys.size() - 1 >= 0) {
+                            allKeys.remove(allKeys.size() - 1);
+                        }
+                        this.arrayCounters.remove(levelArray - 1);
+                        itemStack.remove(itemStack.size() - 1);
+                        levelArray--;
+                        break;
+                    case END_OBJECT:
+                        //if the query doesn't ask for an atomic value
+                        if (!this.literal && this.pathMatch()) {
+                            //check if the path asked from the query includes the current path 
+                            obStack.get(levelObject - 1).finish();
+                            if (itemStack.size() > 1) {
+                                if (checkItem == itemType.OBJECT) {
+                                    if (levelObject > this.objectMatchLevel) {
+                                        obStack.get(levelObject - 2).addItem(spStack.get(levelObject - 2),
+                                                abvsStack.get(levelArray + levelObject));
+                                    } else if (this.matched) {
+                                        this.matched = false;
+                                        items++;
+                                        writeElement(abvsStack.get(levelArray + levelObject));
+                                    }
+                                } else if (checkItem == itemType.ARRAY) {
+                                    abStack.get(levelArray - 1).addItem(abvsStack.get(levelArray + levelObject));
+                                    if (this.matched) {
+                                        writeElement(abvsStack.get(levelArray + levelObject));
+                                        this.matched = false;
+                                    }
+                                }
+                            }
+                        }
+                        if (allKeys.size() - 1 >= 0) {
+                            allKeys.remove(allKeys.size() - 1);
+                        }
+                        itemStack.remove(itemStack.size() - 1);
+                        levelObject--;
+                        break;
+                    default:
+                        break;
+                }
+                token = parser.nextToken();
+            }
+            sb.finish();
+        } catch (Exception e) {
+            throw new HyracksDataException("Accessing or writing in out of bounds space", e);
         }
         return items;
     }
 
+    private boolean pathMatch() {
+        outputStream.reset();
+        for (Byte[] bb : allKeys) {
+            outputStream.write(ArrayUtils.toPrimitive(bb), 0, ArrayUtils.toPrimitive(bb).length);
+        }
+        //the path of values created by parsing the file 
+        boolean contains = false;
+        this.matched = false;
+        prefixStream.reset();
+        if (pathStream.size() < outputStream.size()) {
+            prefixStream.write(outputStream.toByteArray(), 0, pathStream.size());
+            contains = Arrays.equals(prefixStream.toByteArray(), pathStream.toByteArray());
+        } else {
+            prefixStream.write(pathStream.toByteArray(), 0, outputStream.size());
+            contains = Arrays.equals(prefixStream.toByteArray(), outputStream.toByteArray());
+        }
+        if (pathStream.size() == outputStream.size() && contains) {
+            this.objectMatchLevel = this.levelObject;
+            this.matched = true;
+            this.literal = false;
+        }
+        return contains;
+    }
+
+    public void itemsInArray() {
+        if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY && !this.arrayCounters.isEmpty()) {
+            boolean addCounter = levelArray - 1 < this.keysOrMembers.size() ? this.keysOrMembers.get(levelArray - 1)
+                    : true;
+            if (addCounter) {
+                this.arrayCounters.set(levelArray - 1, this.arrayCounters.get(levelArray - 1) + 1);
+                this.allKeys.add(this.toBytes(this.arrayCounters.get(levelArray - 1)));
+            } else {
+                Byte[] bool = { (byte) 0x2B, 0x01 };
+                this.allKeys.add(bool);
+            }
+        }
+    }
+
     public void atomicValues(int tag, JsonParser parser, DataOutput out, StringValueBuilder svb, int levelArray,
             int levelObject) throws IOException {
         abvsStack.get(0).reset();
@@ -189,12 +389,141 @@ public class JSONParser implements IParser {
         } else if (tag == ValueTag.XS_INTEGER_TAG) {
             out.writeLong(parser.getLongValue());
         }
-        if (itemStack.size() != 0) {
+        if (!itemStack.isEmpty()) {
             if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY) {
                 abStack.get(levelArray - 1).addItem(abvsStack.get(0));
+                if (valueSeq != null && this.matched && levelArray == this.arrayMatchLevel) {
+                    this.literal = true;
+                    this.matched = false;
+                    writeElement(abvsStack.get(0));
+                }
             } else if (itemStack.get(itemStack.size() - 1) == itemType.OBJECT) {
                 obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1), abvsStack.get(0));
+                if (valueSeq != null && this.matched && levelObject == this.objectMatchLevel) {
+                    this.literal = true;
+                    this.matched = false;
+                    writeElement(abvsStack.get(0));
+                }
+            }
+        }
+    }
+
+    public void writeElement(ArrayBackedValueStorage abvs) throws IOException {
+        tempABVS.reset();
+        DataOutput out = tempABVS.getDataOutput();
+        out.write(abvs.getByteArray(), abvs.getStartOffset(), abvs.getLength());
+        FrameUtils.appendFieldToWriter(writer, appender, tempABVS.getByteArray(), tempABVS.getStartOffset(),
+                tempABVS.getLength());
+    }
+
+    public void startArrayOrObjects(int count) {
+        if (valueSeq != null && !this.arrayCounters.isEmpty()) {
+            boolean addCounter = levelArray - count < this.keysOrMembers.size()
+                    ? this.keysOrMembers.get(levelArray - count) : true;
+            if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY) {
+                if (addCounter) {
+                    this.arrayCounters.set(levelArray - count, this.arrayCounters.get(levelArray - count) + 1);
+                    this.allKeys.add(this.toBytes(this.arrayCounters.get(levelArray - count)));
+                } else {
+                    XDMConstants.setTrue(bp);
+                    this.allKeys.add(ArrayUtils.toObject(bp.getByteArray()));
+                }
+            }
+
+        }
+        if (count == 2 && valueSeq != null) {
+            this.arrayCounters.add(Integer.valueOf(0));
+        }
+    }
+
+    public void startArray() throws HyracksDataException {
+        levelArray++;
+        if (levelArray > abStack.size()) {
+            abStack.add(new ArrayBuilder());
+        }
+        if (levelArray + levelObject > abvsStack.size() - 1) {
+            abvsStack.add(new ArrayBackedValueStorage());
+        }
+        startArrayOrObjects(2);
+        itemStack.add(itemType.ARRAY);
+        if (this.pathMatch() || this.valueSeq == null) {
+            abvsStack.get(levelArray + levelObject).reset();
+            try {
+                abStack.get(levelArray - 1).reset(abvsStack.get(levelArray + levelObject));
+            } catch (Exception e) {
+                throw new HyracksDataException("Accessing index out of bounds", e);
+            }
+        }
+    }
+
+    public void startObject() throws HyracksDataException {
+        levelObject++;
+        if (levelObject > obStack.size()) {
+            obStack.add(new ObjectBuilder());
+        }
+        if (levelArray + levelObject > abvsStack.size() - 1) {
+            abvsStack.add(new ArrayBackedValueStorage());
+        }
+        startArrayOrObjects(1);
+        itemStack.add(itemType.OBJECT);
+        if (this.pathMatch() || this.valueSeq == null) {
+            abvsStack.get(levelArray + levelObject).reset();
+            try {
+                obStack.get(levelObject - 1).reset(abvsStack.get(levelArray + levelObject));
+            } catch (Exception e) {
+                throw new HyracksDataException("Accessing index out of bounds", e);
             }
         }
     }
+
+    public void startFieldName(JsonParser parser) throws HyracksDataException {
+        if (levelObject > spStack.size()) {
+            keyStack.add(new ArrayBackedValueStorage());
+            spStack.add(new UTF8StringPointable());
+        }
+        keyStack.get(levelObject - 1).reset();
+        DataOutput outk = keyStack.get(levelObject - 1).getDataOutput();
+        try {
+            svb.write(parser.getText(), outk);
+            spStack.get(levelObject - 1).set(keyStack.get(levelObject - 1));
+            if (this.valueSeq != null) {
+                int length = 0;
+                byte[] barr = spStack.get(levelObject - 1).getByteArray();
+                outputStream.reset();
+                outputStream.write(barr, 0, spStack.get(levelObject - 1).getLength());
+                allKeys.add(ArrayUtils.toObject(outputStream.toByteArray()));
+                for (int i = 0; i < allKeys.size() - 1; i++) {
+                    tvp.set(ArrayUtils.toPrimitive(allKeys.get(i)), 0, ArrayUtils.toPrimitive(allKeys.get(i)).length);
+                    length += ArrayUtils.toPrimitive(allKeys.get(i)).length;
+                }
+                //if the next two bytes represent a boolean (boolean has only two bytes), 
+                //it means that query asks for all the keys of the object
+                if (length <= pathStream.size() && (length + 2) <= pathStream.size()) {
+                    tvp.set(pathStream.toByteArray(), length, length + 2);
+                    if (tvp.getTag() == ValueTag.XS_BOOLEAN_TAG) {
+                        abvsStack.get(0).reset();
+                        out.write(ValueTag.XS_STRING_TAG);
+                        svb.write(parser.getText(), out);
+                        writeElement(abvsStack.get(0));
+                    }
+                }
+            }
+        } catch (Exception e) {
+            throw new HyracksDataException("Writing in out of bounds space", e);
+        }
+    }
+
+    public void startAtomicValues(int tag, JsonParser parser) throws HyracksDataException {
+        itemsInArray();
+        if (this.pathMatch() || this.valueSeq == null) {
+            try {
+                atomicValues(tag, parser, out, svb, levelArray, levelObject);
+            } catch (Exception e) {
+                throw new HyracksDataException(e);
+            }
+        }
+        if (allKeys.size() - 1 >= 0) {
+            allKeys.remove(allKeys.size() - 1);
+        }
+    }
 }
index df6fb4b..2459944 100644 (file)
@@ -18,37 +18,102 @@ package org.apache.vxquery.metadata;
 
 import java.util.List;
 
-import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
 import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourcePropertiesProvider;
+import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
 import org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain;
 
-public abstract class AbstractVXQueryDataSource implements IDataSource<String> {
+public abstract class AbstractVXQueryDataSource implements IVXQueryDataSource {
     protected static final String DELIMITER = "\\|";
     protected int dataSourceId;
     protected String collectionName;
     protected String[] collectionPartitions;
-    protected String elementPath;
+
     protected List<Integer> childSeq;
+    protected List<Byte[]> valueSeq;
     protected int totalDataSources;
     protected String tag;
-    protected String function;
 
     protected Object[] types;
 
     protected IDataSourcePropertiesProvider propProvider;
-
-    public abstract String getFunctionCall();
+    
+    @Override
+    public INodeDomain getDomain() {
+        return null;
+    }
 
     @Override
     public boolean isScanAccessPathALeaf() {
-        // TODO Auto-generated method stub
         return false;
     }
 
+    public int getTotalDataSources() {
+        return totalDataSources;
+    }
+
+    public void setTotalDataSources(int totalDataSources) {
+        this.totalDataSources = totalDataSources;
+    }
+
+    public int getDataSourceId() {
+        return dataSourceId;
+    }
+
+    public int getPartitionCount() {
+        return collectionPartitions.length;
+    }
+
+    public String getTag() {
+        return this.tag;
+    }
+
+    public void setTag(String tag) {
+        this.tag = tag;
+    }
+
     @Override
-    public INodeDomain getDomain() {
-        // TODO Auto-generated method stub
-        return null;
+    public String getId() {
+        return collectionName;
+    }
+
+    @Override
+    public Object[] getSchemaTypes() {
+        return types;
+    }
+
+    @Override
+    public IDataSourcePropertiesProvider getPropertiesProvider() {
+        return propProvider;
+    }
+
+    @Override
+    public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList) {
+    }
+
+    public void addChildSeq(int integer) {
+        childSeq.add(integer);
+    }
+
+    public List<Integer> getChildSeq() {
+        return childSeq;
+    }
+
+    public void addValueSeq(Byte[] value) {
+        valueSeq.add(value);
+    }
+
+    public List<Byte[]> getValueSeq() {
+        return valueSeq;
+    }
+
+    public String[] getPartitions() {
+        return collectionPartitions;
+    }
+
+    public void setPartitions(String[] collectionPartitions) {
+        this.collectionPartitions = collectionPartitions;
     }
 
+    abstract public boolean usingIndex();
 }
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/IVXQueryDataSource.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/IVXQueryDataSource.java
new file mode 100644 (file)
index 0000000..8e71339
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.metadata;
+
+import java.util.List;
+
+import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource;
+
+public interface IVXQueryDataSource extends IDataSource<String> {
+    boolean usingIndex();
+    
+    void addChildSeq(int integer);
+    
+    List<Integer> getChildSeq();
+}
+
index bee7c7b..c5761c5 100644 (file)
@@ -21,7 +21,6 @@ import java.util.List;
 
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
 import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourcePropertiesProvider;
-import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
 import org.apache.hyracks.algebricks.core.algebra.properties.ILocalStructuralProperty;
 import org.apache.hyracks.algebricks.core.algebra.properties.IPhysicalPropertiesVector;
 import org.apache.hyracks.algebricks.core.algebra.properties.RandomPartitioningProperty;
@@ -29,95 +28,38 @@ import org.apache.hyracks.algebricks.core.algebra.properties.StructuralPropertie
 import org.apache.vxquery.compiler.rewriter.rules.CollectionFileDomain;
 
 public class VXQueryCollectionDataSource extends AbstractVXQueryDataSource {
-
-    private VXQueryCollectionDataSource(int id, String file, Object[] types) {
+    private VXQueryCollectionDataSource(int id, String collection, Object[] types) {
         this.dataSourceId = id;
-        this.collectionName = file;
-        collectionPartitions = collectionName.split(DELIMITER);
+        this.collectionName = collection;
+        this.collectionPartitions = collectionName.split(DELIMITER);
         this.types = types;
+
         final IPhysicalPropertiesVector vec = new StructuralPropertiesVector(
                 new RandomPartitioningProperty(new CollectionFileDomain(collectionName)),
                 new ArrayList<ILocalStructuralProperty>());
-        propProvider = new IDataSourcePropertiesProvider() {
+        this.propProvider = new IDataSourcePropertiesProvider() {
             @Override
             public IPhysicalPropertiesVector computePropertiesVector(List<LogicalVariable> scanVariables) {
                 return vec;
             }
         };
-        this.childSeq = new ArrayList<>();
         this.tag = null;
+        this.childSeq = new ArrayList<>();
+        this.valueSeq = new ArrayList<>();
     }
 
-    public static VXQueryCollectionDataSource create(int id, String file, Object type) {
-        return new VXQueryCollectionDataSource(id, file, new Object[] { type });
-    }
-
-    public int getTotalDataSources() {
-        return totalDataSources;
-    }
-
-    public void setTotalDataSources(int totalDataSources) {
-        this.totalDataSources = totalDataSources;
-    }
-
-    public int getDataSourceId() {
-        return dataSourceId;
-    }
-
-    public String[] getPartitions() {
-        return collectionPartitions;
-    }
-
-    public void setPartitions(String[] collectionPartitions) {
-        this.collectionPartitions = collectionPartitions;
-    }
-
-    public int getPartitionCount() {
-        return collectionPartitions.length;
-    }
-
-    public String getTag() {
-        return this.tag;
-    }
-
-    public void setTag(String tag) {
-        this.tag = tag;
-    }
-
-    @Override
-    public String getId() {
-        return collectionName;
-    }
-
-    @Override
-    public Object[] getSchemaTypes() {
-        return types;
-    }
-
-    @Override
-    public IDataSourcePropertiesProvider getPropertiesProvider() {
-        return propProvider;
-    }
-
-    @Override
-    public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList) {
-    }
-
-    public void addChildSeq(int integer) {
-        childSeq.add(integer);
-    }
-
-    public List<Integer> getChildSeq() {
-        return childSeq;
+    public static VXQueryCollectionDataSource create(int id, String collection, Object type) {
+        return new VXQueryCollectionDataSource(id, collection, new Object[] { type });
     }
 
     @Override
     public String toString() {
-        return "VXQueryCollectionDataSource [collectionName=" + collectionName + ", childSeq=" + childSeq + "]";
+        return "VXQueryCollectionDataSource [collectionName=" + collectionName + ", childSeq=" + childSeq
+                + ", valueSeq=" + valueSeq + "]";
     }
 
-    @Override
-    public String getFunctionCall() {
-        return function;
+    public boolean usingIndex() {
+        return false;
     }
+
 }
index be95f93..623b48c 100644 (file)
@@ -77,6 +77,7 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
     private short totalDataSources;
     private String[] collectionPartitions;
     private List<Integer> childSeq;
+    private List<Byte[]> valueSeq;
     protected static final Logger LOGGER = Logger.getLogger(VXQueryCollectionOperatorDescriptor.class.getName());
     private HDFSFunctions hdfs;
     private String tag;
@@ -84,13 +85,14 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
     private final String hdfsConf;
     private final Map<String, NodeControllerInfo> nodeControllerInfos;
 
-    public VXQueryCollectionOperatorDescriptor(IOperatorDescriptorRegistry spec, VXQueryCollectionDataSource ds,
+    public VXQueryCollectionOperatorDescriptor(IOperatorDescriptorRegistry spec, AbstractVXQueryDataSource ds,
             RecordDescriptor rDesc, String hdfsConf, Map<String, NodeControllerInfo> nodeControllerInfos) {
         super(spec, 1, 1);
         collectionPartitions = ds.getPartitions();
         dataSourceId = (short) ds.getDataSourceId();
         totalDataSources = (short) ds.getTotalDataSources();
         childSeq = ds.getChildSeq();
+        valueSeq = ds.getValueSeq();
         recordDescriptors[0] = rDesc;
         this.tag = ds.getTag();
         this.hdfsConf = hdfsConf;
@@ -113,7 +115,7 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
         final String collectionName = collectionPartitions[partition % collectionPartitions.length];
         final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, appender, childSeq,
                 dCtx.getStaticContext());
-        final JSONParser jparser = new JSONParser();
+        final JSONParser jparser = new JSONParser(valueSeq);
 
         return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
             @Override
@@ -130,7 +132,7 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
                 Reader input;
                 if (!collectionModifiedName.contains("hdfs:/")) {
                     File collectionDirectory = new File(collectionModifiedName);
-                    //check if directory is in the local file system
+                    // check if directory is in the local file system
                     if (collectionDirectory.exists()) {
                         // Go through each tuple.
                         if (collectionDirectory.isDirectory()) {
@@ -152,9 +154,7 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
                                         try {
                                             jsonAbvs.reset();
                                             input = new InputStreamReader(new FileInputStream(file));
-                                            jparser.parse(input, jsonAbvs);
-                                            FrameUtils.appendFieldToWriter(writer, appender, jsonAbvs.getByteArray(),
-                                                    jsonAbvs.getStartOffset(), jsonAbvs.getLength());
+                                            jparser.parse(input, jsonAbvs, writer, appender);
                                         } catch (FileNotFoundException e) {
                                             throw new HyracksDataException(e.toString());
                                         }
@@ -197,14 +197,14 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
                                 RecordReader reader;
                                 TaskAttemptContext context;
                                 for (int i = 0; i < size; i++) {
-                                    //read split
+                                    // read split
                                     context = ctxFactory.createContext(job.getConfiguration(), i);
-
                                     reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                                     reader.initialize(inputSplits.get(i), context);
                                     while (reader.nextKeyValue()) {
                                         value = reader.getCurrentValue().toString();
-                                        //Split value if it contains more than one item with the tag
+                                        // Split value if it contains more than
+                                        // one item with the tag
                                         if (StringUtils.countMatches(value, tag) > 1) {
                                             String[] items = value.split(tag);
                                             for (String item : items) {
@@ -218,7 +218,9 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
                                             }
                                         } else {
                                             value = START_TAG + value;
-                                            //create an input stream to the file currently reading and send it to parser
+                                            // create an input stream to the
+                                            // file currently reading and send
+                                            // it to parser
                                             stream = new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8));
                                             parser.parseHDFSElements(stream, writer, fta, i);
                                             stream.close();
@@ -232,10 +234,10 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
                             }
                         } else {
                             try {
-                                //check if the path exists and is a directory
+                                // check if the path exists and is a directory
                                 if (fs.exists(directory) && fs.isDirectory(directory)) {
                                     for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) {
-                                        //read every file in the directory
+                                        // read every file in the directory
                                         RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true);
                                         while (it.hasNext()) {
                                             xmlDocument = it.next().getPath();
@@ -244,7 +246,9 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
                                                     LOGGER.fine(
                                                             "Starting to read XML document: " + xmlDocument.getName());
                                                 }
-                                                //create an input stream to the file currently reading and send it to parser
+                                                // create an input stream to the
+                                                // file currently reading and
+                                                // send it to parser
                                                 InputStream in = fs.open(xmlDocument).getWrappedStream();
                                                 parser.parseHDFSElements(in, writer, fta, tupleIndex);
                                                 in.close();
index ddbc984..ea69cfd 100644 (file)
@@ -31,9 +31,9 @@ import org.apache.vxquery.compiler.rewriter.rules.CollectionFileDomain;
  */
 public class VXQueryIndexingDataSource extends AbstractVXQueryDataSource {
 
-    protected Object[] types;
+    private String elementPath;
+    private String function;
 
-    protected IDataSourcePropertiesProvider propProvider;
     private VXQueryIndexingDataSource(int id, String collection, String elementPath, Object[] types,
             String functionCall) {
         this.dataSourceId = id;
@@ -41,12 +41,11 @@ public class VXQueryIndexingDataSource extends AbstractVXQueryDataSource {
         this.elementPath = elementPath;
         this.function = functionCall;
         this.collectionPartitions = collectionName.split(DELIMITER);
-
         this.types = types;
+
         final IPhysicalPropertiesVector vec = new StructuralPropertiesVector(
-                new RandomPartitioningProperty(new CollectionFileDomain(collectionName)),
-                new ArrayList<>());
-        propProvider = new IDataSourcePropertiesProvider() {
+                new RandomPartitioningProperty(new CollectionFileDomain(collectionName)), new ArrayList<>());
+        this.propProvider = new IDataSourcePropertiesProvider() {
             @Override
             public IPhysicalPropertiesVector computePropertiesVector(List<LogicalVariable> scanVariables) {
                 return vec;
@@ -54,83 +53,30 @@ public class VXQueryIndexingDataSource extends AbstractVXQueryDataSource {
         };
         this.tag = null;
         this.childSeq = new ArrayList<>();
+        this.valueSeq = new ArrayList<>();
     }
 
-    public static VXQueryIndexingDataSource create(int id, String collection, String index, Object type, String
-            function) {
+    public static VXQueryIndexingDataSource create(int id, String collection, String index, Object type,
+            String function) {
         return new VXQueryIndexingDataSource(id, collection, index, new Object[] { type }, function);
     }
 
-    public int getTotalDataSources() {
-        return totalDataSources;
-    }
-
-    public void setTotalDataSources(int totalDataSources) {
-        this.totalDataSources = totalDataSources;
-    }
-
-    public int getDataSourceId() {
-        return dataSourceId;
-    }
-
     public String getElementPath() {
         return elementPath;
     }
 
-    public String[] getCollectionPartitions() {
-        return collectionPartitions;
-    }
-
-    public void setCollectionPartitions(String[] collectionPartitions) {
-        this.collectionPartitions = collectionPartitions;
-    }
-
-    public int getPartitionCount() {
-        return collectionPartitions.length;
-    }
-
-    public String getTag() {
-        return this.tag;
-    }
-
-    public void setTag(String tag) {
-        this.tag = tag;
-    }
-
-    @Override
-    public String getId() {
-        return collectionName;
-    }
-
-    @Override
-    public Object[] getSchemaTypes() {
-        return types;
-    }
-
-    @Override
-    public IDataSourcePropertiesProvider getPropertiesProvider() {
-        return propProvider;
-    }
-
-    @Override
-    public void computeFDs(List scanVariables, List fdList) {
+    public String getFunctionCall() {
+        return function;
     }
 
     @Override
     public String toString() {
-        return "VXQueryIndexingDataSource [collectionName=" + collectionName + ", elementPath=" + elementPath + " "
-                + "function=" + function + "]";
+        return "VXQueryIndexingDataSource [collectionName=" + collectionName + ", elementPath=" + elementPath
+                + "function=" + function + "]";
     }
 
-    @Override
-    public String getFunctionCall() {
-        return function;
-    }
-
-    public List<Integer> getChildSeq() {
-        return childSeq;
+    public boolean usingIndex() {
+        return true;
     }
 
 }
-
-
index a24a629..ac92a0e 100644 (file)
@@ -68,7 +68,7 @@ public class VXQueryIndexingOperatorDescriptor extends AbstractSingleActivityOpe
             RecordDescriptor rDesc, String hdfsConf, Map<String, NodeControllerInfo> nodeControllerInfos) {
         super(spec, 1, 1);
         this.functionCall = ds.getFunctionCall();
-        collectionPartitions = ds.getCollectionPartitions();
+        collectionPartitions = ds.getPartitions();
         dataSourceId = (short) ds.getDataSourceId();
         totalDataSources = (short) ds.getTotalDataSources();
         recordDescriptors[0] = rDesc;
index e552f68..f6644d6 100644 (file)
@@ -88,43 +88,32 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
             List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed,
             List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema,
             IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig)
-            throws AlgebricksException {
-        VXQueryCollectionDataSource ds = null;
-        VXQueryIndexingDataSource ids = null;
-
-        try {
-            ids = (VXQueryIndexingDataSource) dataSource;
-        } catch (ClassCastException e) {
-            ds = (VXQueryCollectionDataSource) dataSource;
-        }
+                    throws AlgebricksException {
+        AbstractVXQueryDataSource ds = (AbstractVXQueryDataSource) dataSource;
         if (sourceFileMap != null) {
-            final int len = ds != null ? ds.getPartitions().length : ids.getCollectionPartitions().length;
+            final int len = ds.getPartitions().length;
             String[] collectionPartitions = new String[len];
             for (int i = 0; i < len; ++i) {
-                String partition = ds != null ? ds.getPartitions()[i] : ids.getCollectionPartitions()[i];
+                String partition = ds.getPartitions()[i];
                 File mapped = sourceFileMap.get(partition);
                 collectionPartitions[i] = mapped != null ? mapped.toString() : partition;
             }
-            if (ds != null) {
-                ds.setPartitions(collectionPartitions);
-            } else {
-                ids.setCollectionPartitions(collectionPartitions);
-            }
+            ds.setPartitions(collectionPartitions);
         }
         RecordDescriptor rDesc;
         IOperatorDescriptor scanner;
         AlgebricksPartitionConstraint constraint;
 
-        if (ds != null) {
+        if (!ds.usingIndex()) {
             rDesc = new RecordDescriptor(new ISerializerDeserializer[opSchema.getSize()]);
             scanner = new VXQueryCollectionOperatorDescriptor(jobSpec, ds, rDesc, this.hdfsConf,
                     this.nodeControllerInfos);
             constraint = getClusterLocations(nodeList, ds.getPartitionCount());
         } else {
             rDesc = new RecordDescriptor(new ISerializerDeserializer[opSchema.getSize()]);
-            scanner = new VXQueryIndexingOperatorDescriptor(jobSpec, ids, rDesc, this.hdfsConf,
-                    this.nodeControllerInfos);
-            constraint = getClusterLocations(nodeList, ids.getPartitionCount());
+            scanner = new VXQueryIndexingOperatorDescriptor(jobSpec, (VXQueryIndexingDataSource) ds, rDesc,
+                    this.hdfsConf, this.nodeControllerInfos);
+            constraint = getClusterLocations(nodeList, ds.getPartitionCount());
         }
 
         return new Pair<>(scanner, constraint);
@@ -245,7 +234,7 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
             JobGenContext context, JobSpecification spec, boolean bulkload) throws AlgebricksException {
         throw new UnsupportedOperationException();
     }
-
+    
     @Override
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(IDataSource<String> dataSource,
             IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys,
@@ -254,7 +243,7 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
             JobSpecification jobSpec, boolean bulkload) throws AlgebricksException {
         throw new UnsupportedOperationException();
     }
-
+    
     @Override
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getUpsertRuntime(IDataSource<String> dataSource,
             IOperatorSchema inputSchema, IVariableTypeEnvironment typeEnv, List<LogicalVariable> keys,
@@ -263,7 +252,7 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
             JobSpecification jobSpec) throws AlgebricksException {
         throw new UnsupportedOperationException();
     }
-
+    
     @Override
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexUpsertRuntime(
             IDataSourceIndex<String, String> dataSourceIndex, IOperatorSchema propagatedSchema,
@@ -274,10 +263,11 @@ public class VXQueryMetadataProvider implements IMetadataProvider<String, String
             JobSpecification spec) throws AlgebricksException {
         throw new UnsupportedOperationException();
     }
-
+    
     @Override
     public Map<String, String> getConfig() {
         return new HashMap<>();
     }
 
+
 }
index 4117774..a8be359 100644 (file)
  */
 package org.apache.vxquery.runtime.functions.base;
 
+import org.apache.vxquery.datamodel.accessors.ArrayBackedValueStoragePool;
+import org.apache.vxquery.datamodel.accessors.PointablePool;
+import org.apache.vxquery.datamodel.accessors.PointablePoolFactory;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.exceptions.SystemException;
+
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
 import org.apache.hyracks.algebricks.runtime.base.IUnnestingEvaluator;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-import org.apache.vxquery.datamodel.accessors.PointablePool;
-import org.apache.vxquery.datamodel.accessors.PointablePoolFactory;
-import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
 
 public abstract class AbstractTaggedValueArgumentUnnestingEvaluator implements IUnnestingEvaluator {
     private final IScalarEvaluator[] args;
@@ -30,6 +34,7 @@ public abstract class AbstractTaggedValueArgumentUnnestingEvaluator implements I
     protected final TaggedValuePointable[] tvps;
 
     protected final PointablePool ppool = PointablePoolFactory.INSTANCE.createPointablePool();
+    protected final ArrayBackedValueStoragePool abvsPool = new ArrayBackedValueStoragePool();
 
     public AbstractTaggedValueArgumentUnnestingEvaluator(IScalarEvaluator[] args) {
         this.args = args;
index d255345..b127d2a 100644 (file)
@@ -21,10 +21,7 @@ import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
-import org.apache.vxquery.datamodel.accessors.jsonitem.ArrayPointable;
-import org.apache.vxquery.datamodel.accessors.jsonitem.ObjectPointable;
 import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
-import org.apache.vxquery.datamodel.values.ValueTag;
 import org.apache.vxquery.exceptions.ErrorCode;
 import org.apache.vxquery.exceptions.SystemException;
 import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluator;
@@ -33,42 +30,32 @@ import java.io.IOException;
 
 public class KeysOrMembersScalarEvaluator extends AbstractTaggedValueArgumentScalarEvaluator {
     protected final IHyracksTaskContext ctx;
-    private final ObjectPointable op;
-    private final ArrayPointable ap;
+    private ArrayBackedValueStorage abvs;
     private final SequenceBuilder sb;
     private final TaggedValuePointable tempTvp;
+    private final KeysOrMembersUnnesting keysOrMembers;
 
     public KeysOrMembersScalarEvaluator(IHyracksTaskContext ctx, IScalarEvaluator[] args) {
         super(args);
         this.ctx = ctx;
-        op = (ObjectPointable) ObjectPointable.FACTORY.createPointable();
-        ap = (ArrayPointable) ArrayPointable.FACTORY.createPointable();
+        abvs = new ArrayBackedValueStorage();
         sb = new SequenceBuilder();
         tempTvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+        keysOrMembers = new KeysOrMembersUnnesting(ctx, ppool);
     }
 
     @Override
     protected void evaluate(TaggedValuePointable[] args, IPointable result) throws SystemException {
-        final TaggedValuePointable tvp = args[0];
-        ArrayBackedValueStorage abvs = abvsPool.takeOne();
+        abvs = abvsPool.takeOne();
+        keysOrMembers.init(args);
         try {
-            switch (tvp.getTag()) {
-                case ValueTag.OBJECT_TAG:
-                    tvp.getValue(op);
-                    op.getKeys(abvs);
-                    result.set(abvs);
-                    break;
-                case ValueTag.ARRAY_TAG:
-                    abvs.reset();
-                    sb.reset(abvs);
-                    tvp.getValue(ap);
-                    ap.appendItems(sb);
-                    sb.finish();
-                    result.set(abvs);
-                    break;
-                default:
-                    throw new SystemException(ErrorCode.FORG0006);
+            abvs.reset();
+            sb.reset(abvs);
+            while (keysOrMembers.step(tempTvp)) {
+                sb.addItem(tempTvp);
             }
+            sb.finish();
+            result.set(abvs);
         } catch (IOException e) {
             throw new SystemException(ErrorCode.SYSE0001, e);
         } finally {
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnesting.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnesting.java
new file mode 100644 (file)
index 0000000..0e2597c
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.json;
+
+import java.io.IOException;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.vxquery.datamodel.accessors.ArrayBackedValueStoragePool;
+import org.apache.vxquery.datamodel.accessors.PointablePool;
+import org.apache.vxquery.datamodel.accessors.SequencePointable;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.accessors.jsonitem.ArrayPointable;
+import org.apache.vxquery.datamodel.accessors.jsonitem.ObjectPointable;
+import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.exceptions.ErrorCode;
+import org.apache.vxquery.exceptions.SystemException;
+import org.apache.vxquery.runtime.functions.step.AbstractForwardAxisPathStep;
+
+public class KeysOrMembersUnnesting extends AbstractForwardAxisPathStep {
+    private final ArrayPointable ap = (ArrayPointable) ArrayPointable.FACTORY.createPointable();
+    private final SequencePointable sp = (SequencePointable) SequencePointable.FACTORY.createPointable();
+    private final ObjectPointable op = (ObjectPointable) ObjectPointable.FACTORY.createPointable();
+    private final TaggedValuePointable tvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+    private final TaggedValuePointable tempTvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+    private TaggedValuePointable arg = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+    private final ArrayBackedValueStoragePool abvsPool = new ArrayBackedValueStoragePool();
+    private ArrayBackedValueStorage abvs = new ArrayBackedValueStorage();
+    private int arOrObArgsLength;
+    private int indexArrayArgs;
+
+    public KeysOrMembersUnnesting(IHyracksTaskContext ctx, PointablePool pp) {
+        super(ctx, pp);
+    }
+
+    protected void init(TaggedValuePointable[] args) throws SystemException {
+        abvs = abvsPool.takeOne();
+        indexArrayArgs = 0;
+        arg = args[0];
+        switch (arg.getTag()) {
+            case ValueTag.OBJECT_TAG:
+                arg.getValue(op);
+                arOrObArgsLength = op.getEntryCount();
+                break;
+            case ValueTag.ARRAY_TAG:
+                arg.getValue(ap);
+                arOrObArgsLength = ap.getEntryCount();
+                break;
+            default:
+                throw new SystemException(ErrorCode.FORG0006);
+        }
+    }
+
+    public boolean step(IPointable result) throws SystemException {
+        abvs = abvsPool.takeOne();
+        if (arOrObArgsLength > 0) {
+            while (indexArrayArgs < arOrObArgsLength) {
+                if (arg.getTag() == ValueTag.ARRAY_TAG) {
+                    ap.getEntry(indexArrayArgs, tvp);
+                } else {
+                    try {
+                        op.getKeys(abvs);
+                    } catch (IOException e) {
+                        throw new SystemException(ErrorCode.SYSE0001, e);
+                    }
+                    tempTvp.set(abvs);
+                    tempTvp.getValue(sp);
+                    sp.getEntry(indexArrayArgs, tvp);
+                }
+                result.set(tvp.getByteArray(), tvp.getStartOffset(), tvp.getLength());
+                indexArrayArgs++;
+                return true;
+            }
+        }
+        return false;
+    }
+}
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnestingEvaluator.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnestingEvaluator.java
new file mode 100644 (file)
index 0000000..a4e146c
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.json;
+
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentUnnestingEvaluator;
+
+public class KeysOrMembersUnnestingEvaluator extends AbstractTaggedValueArgumentUnnestingEvaluator {
+    private final KeysOrMembersUnnesting keysOrMembersStep;
+
+    public KeysOrMembersUnnestingEvaluator(IHyracksTaskContext ctx, IScalarEvaluator[] args) {
+        super(args);
+        keysOrMembersStep = new KeysOrMembersUnnesting(ctx, ppool);
+    }
+
+    @Override
+    public boolean step(IPointable result) throws HyracksDataException {
+        return keysOrMembersStep.step(result);
+    }
+
+    @Override
+    protected void init(TaggedValuePointable[] args) throws HyracksDataException {
+        keysOrMembersStep.init(args);
+
+    }
+}
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnestingEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/KeysOrMembersUnnestingEvaluatorFactory.java
new file mode 100644 (file)
index 0000000..3f94f7e
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.json;
+
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.algebricks.runtime.base.IUnnestingEvaluator;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentUnnestingEvaluatorFactory;
+
+public class KeysOrMembersUnnestingEvaluatorFactory extends AbstractTaggedValueArgumentUnnestingEvaluatorFactory {
+    private static final long serialVersionUID = 1L;
+
+    public KeysOrMembersUnnestingEvaluatorFactory(IScalarEvaluatorFactory[] args) {
+        super(args);
+    }
+
+    @Override
+    protected IUnnestingEvaluator createEvaluator(IHyracksTaskContext ctx, IScalarEvaluator[] args)
+            throws HyracksDataException {
+        return new KeysOrMembersUnnestingEvaluator(ctx, args);
+    }
+
+}
index eb71923..8627658 100644 (file)
@@ -111,5 +111,7 @@ public enum ASTTag {
     ARRAY_CONSTRUCTOR,
     OBJECT_CONSTRUCTOR,
     SIMPLE_OBJECT_UNION_CONSTRUCTOR,
-    PAIR_CONSTRUCTOR
+    PAIR_CONSTRUCTOR,
+    GROUPBY_CLAUSE,
+    GROUP_SPECIFICATION
 }
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/ast/GroupSpecNode.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/ast/GroupSpecNode.java
new file mode 100644 (file)
index 0000000..634bf66
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.xmlquery.ast;
+
+import org.apache.vxquery.util.SourceLocation;
+
+public class GroupSpecNode extends ASTNode {
+    private QNameNode varName;
+    private TypeDeclNode type;
+    private ASTNode expr;
+    private String collation;
+
+    public GroupSpecNode(SourceLocation loc) {
+        super(loc);
+    }
+
+    @Override
+    public ASTTag getTag() {
+        return ASTTag.GROUP_SPECIFICATION;
+    }
+
+    public QNameNode getVar() {
+        return varName;
+    }
+
+    public void setDefaultVar(QNameNode varName) {
+        this.varName = varName;
+    }
+
+    public TypeDeclNode getType() {
+        return type;
+    }
+
+    public void setType(TypeDeclNode type) {
+        this.type = type;
+    }
+
+    public ASTNode getExpr() {
+        return expr;
+    }
+
+    public void setExpression(ASTNode expr) {
+        this.expr = expr;
+    }
+
+    public String getCollation() {
+        return collation;
+    }
+
+    public void setCollation(String collation) {
+        this.collation = collation;
+    }
+}
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/ast/GroupbyClauseNode.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/ast/GroupbyClauseNode.java
new file mode 100644 (file)
index 0000000..ee58b7a
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.xmlquery.ast;
+
+import java.util.List;
+
+import org.apache.vxquery.util.SourceLocation;
+
+public class GroupbyClauseNode extends FLWORClauseNode {
+    private List<GroupSpecNode> groupSpec;
+
+    public GroupbyClauseNode(SourceLocation loc) {
+        super(loc);
+    }
+
+    @Override
+    public ASTTag getTag() {
+        return ASTTag.GROUPBY_CLAUSE;
+    }
+
+    public List<GroupSpecNode> getGroupSpec() {
+        return groupSpec;
+    }
+
+    public void setGroupSpec(List<GroupSpecNode> groupSpec) {
+        this.groupSpec = groupSpec;
+    }
+}
index b7839d0..0c252b4 100644 (file)
@@ -84,6 +84,8 @@ public class XMLQueryCompiler {
 
     private LogicalOperatorPrettyPrintVisitor pprinter;
 
+    private VXQueryLogicalExpressionPrettyPrintVisitor epprinter;
+
     private ModuleNode moduleNode;
 
     private Module module;
index 04690f5..9aa7d9a 100644 (file)
@@ -37,8 +37,8 @@ public class XMLQueryParser {
             if (!exceptions.isEmpty()) {
                 throw exceptions.get(0);
             }
-            throw new SystemException(ErrorCode.XPST0003, new SourceLocation(sourceName, pe.currentToken.beginLine,
-                    pe.currentToken.beginColumn));
+            SourceLocation loc = new SourceLocation(sourceName, pe.currentToken.beginLine, pe.currentToken.beginColumn);
+            throw new SystemException(ErrorCode.XPST0003, loc, pe);
         }
     }
 
index 3d95cb1..6d0b35a 100644 (file)
@@ -21,10 +21,14 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -33,12 +37,14 @@ import javax.xml.namespace.QName;
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
 import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
@@ -49,12 +55,14 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOpe
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
 import org.apache.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
+import org.apache.hyracks.algebricks.core.algebra.typing.ITypingContext;
 import org.apache.hyracks.data.std.primitive.DoublePointable;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
 import org.apache.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
@@ -134,6 +142,8 @@ import org.apache.vxquery.xmlquery.ast.ForClauseNode;
 import org.apache.vxquery.xmlquery.ast.ForVarDeclNode;
 import org.apache.vxquery.xmlquery.ast.FunctionDeclNode;
 import org.apache.vxquery.xmlquery.ast.FunctionExprNode;
+import org.apache.vxquery.xmlquery.ast.GroupSpecNode;
+import org.apache.vxquery.xmlquery.ast.GroupbyClauseNode;
 import org.apache.vxquery.xmlquery.ast.IfExprNode;
 import org.apache.vxquery.xmlquery.ast.InfixExprNode;
 import org.apache.vxquery.xmlquery.ast.InfixExprNode.InfixOperator;
@@ -768,7 +778,7 @@ public class XMLQueryTranslator {
                 return translateQuantifiedExprNode(tCtx, qeNode);
             }
 
-            /*
+                /*
                     case TYPESWITCH_EXPRESSION: {
                         TypeswitchExprNode teNode = (TypeswitchExprNode) value;
                         Expression sExpr = translateExpression(teNode.getSwitchExpr());
@@ -1143,6 +1153,69 @@ public class XMLQueryTranslator {
                     order.getInputs().add(mutable(tCtx.op));
                     tCtx.op = order;
                     break;
+
+                }
+                case GROUPBY_CLAUSE: {
+                    GroupbyClauseNode gcNode = (GroupbyClauseNode) cNode;
+                    GroupByOperator group = new GroupByOperator();
+                    ArrayList<XQueryVariable> al = new ArrayList<XQueryVariable>();
+                    ArrayList<ILogicalExpression> expr = new ArrayList<ILogicalExpression>();
+                    ArrayList<QName> qvar = new ArrayList<QName>();
+                    ArrayList<TypeDeclNode> stype = new ArrayList<TypeDeclNode>();
+                    //set the groupbyexpression list of the group by operator
+                    for (GroupSpecNode gsNode : gcNode.getGroupSpec()) {//data and createAssignment
+                        if (gsNode.getExpr() != null) {
+                            expr.add(vre(createAssignment(
+                                    zeroOrOne(data(vre(translateExpression(gsNode.getExpr(), tCtx)))), tCtx)));
+                        } else {
+                            XQueryVariable x = tCtx.varScope.lookupVariable(createQName(gsNode.getVar()));
+                            expr.add(vre(createAssignment(data(vre(x.getLogicalVariable())), tCtx)));
+                        }
+                        qvar.add(createQName(gsNode.getVar()));
+                        stype.add(gsNode.getType());
+                    }
+                    while (pushCount > 0) {
+                        al.add(tCtx.varScope.listVariables().next());
+                        tCtx.popVariableScope();
+                        --pushCount;
+                    }
+                    group.getInputs().add(mutable(tCtx.op));
+                    for (int i = 0; i < qvar.size(); i++) {
+                        LogicalVariable groupVar = newLogicalVariable();
+                        tCtx.pushVariableScope();
+                        SequenceType groupVarType = SequenceType.create(BuiltinTypeRegistry.XS_ANY_ATOMIC,
+                                Quantifier.QUANT_QUESTION);
+                        if (stype.get(i) != null) {
+                            groupVarType = createSequenceType(stype.get(i));
+                        }
+                        XQueryVariable z = new XQueryVariable(qvar.get(i), groupVarType, groupVar);
+                        tCtx.varScope.registerVariable(z);
+                        group.addGbyExpression(groupVar, expr.get(i));
+                        ++pushCount;
+                    }
+
+                    //set the nested plans of the group by operator
+                    for (int j = 0; j < al.size(); j++) {
+                        XQueryVariable xqv = al.get(j);
+                        ILogicalExpression sequenceInput = vre(xqv.getLogicalVariable());
+                        tCtx.pushVariableScope();
+                        AggregateFunctionCallExpression fsequence = (AggregateFunctionCallExpression) afce(
+                                BuiltinOperators.SEQUENCE, false, sequenceInput);
+                        LogicalVariable aggVar = newLogicalVariable();
+                        AggregateOperator agg = new AggregateOperator(mkSingletonArrayList(aggVar),
+                                mkSingletonArrayList(new MutableObject<>(fsequence)));
+                        agg.getInputs()
+                                .add(new MutableObject<>(new NestedTupleSourceOperator(new MutableObject<>(group))));
+                        ILogicalPlan plan = new ALogicalPlanImpl(new MutableObject<>(agg));
+                        group.getNestedPlans().add(plan);
+                        SequenceType aggVarType = SequenceType.create(xqv.getType().getItemType(),
+                                Quantifier.QUANT_STAR);
+                        XQueryVariable nagg = new XQueryVariable(xqv.getName(), aggVarType, aggVar);
+                        tCtx.varScope.registerVariable(nagg);
+                        pushCount++;
+                    }
+                    tCtx.op = group;
+                    break;
                 }
                 default:
                     throw new IllegalStateException("Unknown clause: " + cNode.getTag());
@@ -1164,6 +1237,12 @@ public class XMLQueryTranslator {
         return var;
     }
 
+    protected <T> List<T> mkSingletonArrayList(T item) {
+        ArrayList<T> array = new ArrayList<>(1);
+        array.add(item);
+        return array;
+    }
+
     private LogicalVariable translateVarRefNode(TranslationContext tCtx, VarRefNode vrNode) throws SystemException {
         QName vName = createQName(vrNode.getVariable());
         XQueryVariable var = tCtx.varScope.lookupVariable(vName);
@@ -1580,8 +1659,10 @@ public class XMLQueryTranslator {
                                 }
                                 if (arguments.size() == 0) {
                                     if (ctxExpr == null) {
+                                        ctxExpr = vre(createAssignment(expr, tCtx));
                                         ctxExpr = sfce(BuiltinOperators.KEYS_OR_MEMBERS, expr);
                                     } else {
+                                        ctxExpr = vre(createAssignment(ctxExpr, tCtx));
                                         ctxExpr = sfce(BuiltinOperators.KEYS_OR_MEMBERS, ctxExpr);
                                     }
                                 }
@@ -2107,6 +2188,11 @@ public class XMLQueryTranslator {
         return new ScalarFunctionCallExpression(BuiltinFunctions.FN_DATA_1, Collections.singletonList(mutable(expr)));
     }
 
+    private ILogicalExpression zeroOrOne(ILogicalExpression expr) throws SystemException {
+        return new ScalarFunctionCallExpression(BuiltinFunctions.FN_ZERO_OR_ONE_1,
+                Collections.singletonList(mutable(expr)));
+    }
+
     private ILogicalExpression string(ILogicalExpression expr) throws SystemException {
         return new ScalarFunctionCallExpression(BuiltinFunctions.FN_STRING_1, Collections.singletonList(mutable(expr)));
     }
@@ -2177,6 +2263,11 @@ public class XMLQueryTranslator {
         public void registerVariable(XQueryVariable var) {
             moduleCtx.registerVariable(var);
         }
+
+        @Override
+        public Iterator<XQueryVariable> listVariables() {
+            return moduleCtx.listVariables();
+        }
     }
 
     private class TranslationContext {
@@ -2222,6 +2313,8 @@ public class XMLQueryTranslator {
         public XQueryVariable lookupVariable(QName name);
 
         public void registerVariable(XQueryVariable var);
+
+        public Iterator<XQueryVariable> listVariables();
     }
 
     private static class ExpressionVariableScope implements IVariableScope {
@@ -2251,5 +2344,10 @@ public class XMLQueryTranslator {
         public void registerVariable(XQueryVariable var) {
             varMap.put(var.getName(), var);
         }
+
+        public Iterator<XQueryVariable> listVariables() {
+            return varMap.values().iterator();
+        }
+
     }
 }
index bfb5d26..38e96d8 100644 (file)
@@ -716,7 +716,7 @@ ASTNode FLWORExpr()  :
     ASTNode rExpr;
 }
 {
-    ((ForClause(clauses) | LetClause(clauses)))+  [WhereClause(clauses)] [OrderByClause(clauses)] "return" rExpr = ExprSingle()
+    (ForClause(clauses) | LetClause(clauses)) (ForClause(clauses) | LetClause(clauses) | WhereClause(clauses) | GroupByClause(clauses) | OrderByClause(clauses))* "return" rExpr = ExprSingle()
     {
         FLWORExprNode flwor = new FLWORExprNode(clauses.get(0).getSourceLocation());
         flwor.setClauses(clauses);
@@ -912,6 +912,66 @@ void OrderModifier(OrderSpecNode os)  :
     }
 }
 
+void GroupByClause(List<FLWORClauseNode> clauses)  :
+{
+    List<GroupSpecNode> gsList;
+    Token start;
+}
+{
+    start = "group" "by" gsList = GroupSpecList() {
+        GroupbyClauseNode gc = new GroupbyClauseNode(createSourceLocation(start));
+        gc.setGroupSpec(gsList);
+        clauses.add(gc);
+    }
+}
+
+List<GroupSpecNode> GroupSpecList()  :
+{
+    List<GroupSpecNode> gsList = new ArrayList<GroupSpecNode>();
+    GroupSpecNode gs;
+}
+{
+    gs = GroupSpec() {
+        gsList.add(gs);
+    } (
+        "," gs = GroupSpec() {
+            gsList.add(gs);
+        }
+    )* {
+        return gsList;
+    }
+}
+
+GroupSpecNode GroupSpec()  :
+{
+   
+    QNameNode varName;
+    ASTNode expr = null;
+    TypeDeclNode type = null;
+    GroupSpecNode gs;
+    String collation = null;
+}
+{
+    varName = GroupingVariable() [[type=TypeDeclaration()] ":=" expr=ExprSingle()] ["collation" collation = URILiteral()]{
+        gs = new GroupSpecNode(varName.getSourceLocation());
+        gs.setDefaultVar(varName);
+        gs.setType(type);
+        gs.setExpression(expr);
+        gs.setCollation(collation);
+        return gs;
+    } 
+}
+
+QNameNode GroupingVariable()  :
+{
+    QNameNode qname;
+}
+{
+    "$" qname = VarName() {
+        return qname;
+    }
+}
+
 QuantifiedExprNode QuantifiedExpr() :
 {
     Token some = null;
@@ -3281,6 +3341,9 @@ QNameNode FunctionQName()  :
     | t = "order"  {
         return createQName(t);
     }
+    | t = "group"  {
+        return createQName(t);
+    }
     | t = "empty"  {
         return createQName(t);
     }
index f706370..64f1218 100644 (file)
                             );
                         </xsl:if>
                     </xsl:for-each>
-                    <xsl:for-each select="runtime">
-                        <xsl:if test="@type = 'scalar'">
-                        this.scalarEvaluatorFactory = true;
-                        </xsl:if>
-                        <xsl:if test="@type = 'aggregate'">
-                        this.aggregateEvaluatorFactory = true;
-                        </xsl:if>
-                        <xsl:if test="@type = 'unnesting'">
-                        this.unnestingEvaluatorFactory = true;
-                        </xsl:if>
-                    </xsl:for-each>
                     }
                 </xsl:if>
                 <xsl:for-each select="runtime">
                     <xsl:if test="@type = 'scalar'">
+                    {
+                         this.scalarEvaluatorFactory = true;
+                    }
                     public org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory createScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory[] args) {
                         return new <xsl:value-of select="@class"/>(args);
                     }
                     </xsl:if>
                     <xsl:if test="@type = 'aggregate'">
+                    {
+                        this.aggregateEvaluatorFactory = true;
+                    }
                     public org.apache.hyracks.algebricks.runtime.base.IAggregateEvaluatorFactory createAggregateEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory[] args) {
                         return new <xsl:value-of select="@class"/>(args);
                     }
                     </xsl:if>
                     <xsl:if test="@type = 'unnesting'">
+                    {
+                        this.unnestingEvaluatorFactory = true;
+                    }
                     public org.apache.hyracks.algebricks.runtime.base.IUnnestingEvaluatorFactory createUnnestingEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory[] args) {
                         return new <xsl:value-of select="@class"/>(args);
                     }
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group.txt
new file mode 100644 (file)
index 0000000..2057a08
--- /dev/null
@@ -0,0 +1,4 @@
+{"date":"2001-01-01T00:00:00.000","value":[<value>11.25</value>,<value>31</value>,<value>1000</value>]}
+{"date":"2002-02-02T00:00:00.000","value":[<value>12.5</value>,<value>32</value>,<value>20</value>]}
+{"date":"2003-03-03T00:00:00.000","value":[<value>13.75</value>,<value>33</value>]}
+{"date":"2004-04-04T00:00:00.000","value":[<value>40</value>,<value>4</value>]}
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group_json.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group_json.txt
new file mode 100644 (file)
index 0000000..e27fe3d
--- /dev/null
@@ -0,0 +1,4 @@
+2001-01-01T00:00:00.000
+2002-02-02T00:00:00.000
+2003-03-03T00:00:00.000
+2004-04-04T00:00:00.000
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group_json_count.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Groups/group_json_count.txt
new file mode 100644 (file)
index 0000000..601478f
--- /dev/null
@@ -0,0 +1,4 @@
+3
+3
+3
+1
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-1/q15_parser.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-1/q15_parser.txt
new file mode 100644 (file)
index 0000000..3ca741c
--- /dev/null
@@ -0,0 +1,3 @@
+{"date":"2001-01-01T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000001","attributes":",,","value":11.25}
+{"date":"2002-02-02T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000002","attributes":",,","value":12.5}
+{"date":"2003-03-03T00:00:00.000","datatype":"TMIN","station":"GHCND:AS000000003","attributes":",,","value":13.75}
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-1/q16_parser.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-1/q16_parser.txt
new file mode 100644 (file)
index 0000000..abbafea
--- /dev/null
@@ -0,0 +1,3 @@
+{"date":"2001-01-01T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000001","attributes":",,","value":11.25}
+{"date":"2001-01-01T00:00:00.000","datatype":"TMAX","station":"GHCND:US000000001","attributes":",,","value":31}
+{"date":"2001-01-01T00:00:00.000","datatype":"AWND","station":"GHCND:US000000001","attributes":",,","value":1000}
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-2/q14_parser.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-2/q14_parser.txt
new file mode 100644 (file)
index 0000000..abbafea
--- /dev/null
@@ -0,0 +1,3 @@
+{"date":"2001-01-01T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000001","attributes":",,","value":11.25}
+{"date":"2001-01-01T00:00:00.000","datatype":"TMAX","station":"GHCND:US000000001","attributes":",,","value":31}
+{"date":"2001-01-01T00:00:00.000","datatype":"AWND","station":"GHCND:US000000001","attributes":",,","value":1000}
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-2/q16_parser.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-2/q16_parser.txt
new file mode 100644 (file)
index 0000000..abbafea
--- /dev/null
@@ -0,0 +1,3 @@
+{"date":"2001-01-01T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000001","attributes":",,","value":11.25}
+{"date":"2001-01-01T00:00:00.000","datatype":"TMAX","station":"GHCND:US000000001","attributes":",,","value":31}
+{"date":"2001-01-01T00:00:00.000","datatype":"AWND","station":"GHCND:US000000001","attributes":",,","value":1000}
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-4/q14_parser.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-4/q14_parser.txt
new file mode 100644 (file)
index 0000000..abbafea
--- /dev/null
@@ -0,0 +1,3 @@
+{"date":"2001-01-01T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000001","attributes":",,","value":11.25}
+{"date":"2001-01-01T00:00:00.000","datatype":"TMAX","station":"GHCND:US000000001","attributes":",,","value":31}
+{"date":"2001-01-01T00:00:00.000","datatype":"AWND","station":"GHCND:US000000001","attributes":",,","value":1000}
diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-4/q15_parser.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Json/Parser/Partition-4/q15_parser.txt
new file mode 100644 (file)
index 0000000..3ca741c
--- /dev/null
@@ -0,0 +1,3 @@
+{"date":"2001-01-01T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000001","attributes":",,","value":11.25}
+{"date":"2002-02-02T00:00:00.000","datatype":"TMIN","station":"GHCND:US000000002","attributes":",,","value":12.5}
+{"date":"2003-03-03T00:00:00.000","datatype":"TMIN","station":"GHCND:AS000000003","attributes":",,","value":13.75}
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group.xq
new file mode 100644 (file)
index 0000000..e88c496
--- /dev/null
@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+   
+(: Return all the temperatures for each date :)
+let $sensor_collection := "ghcnd"
+for $i in collection($sensor_collection)/dataCollection/data
+group by $date := $i/date 
+order by data($date)
+return {"date":$date, "value":$i/value} 
+   
\ No newline at end of file
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group_json.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group_json.xq
new file mode 100644 (file)
index 0000000..17be0f6
--- /dev/null
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+   
+(: Return all the groups of dates :)  
+let $x:="jsonCollection"
+for $r in collection($x)
+let $z:=$r("results")()
+for $i in $z
+group by $date:=$i("date")
+order by $date
+return $date
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group_json_count.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Groups/group_json_count.xq
new file mode 100644 (file)
index 0000000..9fea16c
--- /dev/null
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Return the number of records for each date :)  
+let $x:="jsonCollection"
+for $r in collection($x)
+let $z:=$r("results")()
+for $i in $z
+group by $date:= $i("date")
+order by $date
+let $count:=count($i)
+return $count
+
index 537e4f1..c81f271 100644 (file)
@@ -20,4 +20,5 @@
 (: 2002-2-2.                                                                  :)
 for $s in collection-from-index("src/test/resources/TestSources/ghcnd", "/stationCollection/station")/station
 where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1"))
+order by $s/id
 return $s
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-1/q15_parser.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-1/q15_parser.xq
new file mode 100644 (file)
index 0000000..89555dc
--- /dev/null
@@ -0,0 +1,26 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Json Parser Query :)
+(: parse a string with arrays :)
+let $x:="jsonCollection"
+for $r in collection($x)
+    let $z:=$r("results")()
+    for $i in $z
+where $i("datatype")="TMIN"
+order by $i("date")
+return $i
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-1/q16_parser.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-1/q16_parser.xq
new file mode 100644 (file)
index 0000000..db4eaf8
--- /dev/null
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Json Parser Query :)
+(: parse a string with arrays :)
+let $x:="jsonCollection"
+for $r in collection($x)
+    let $z:=$r("results")()
+    for $i in $z
+where $i("station")="GHCND:US000000001"
+return $i
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-2/q14_parser.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-2/q14_parser.xq
new file mode 100644 (file)
index 0000000..f8b9fde
--- /dev/null
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Json Parser Query :)
+(: parse a string with arrays :)
+let $x:="json_half_1|json_half_2"
+for $r in collection($x)
+    let $z:=$r("results")()
+    for $i in $z
+where $i("date")="2001-01-01T00:00:00.000"
+return $i
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-2/q16_parser.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-2/q16_parser.xq
new file mode 100644 (file)
index 0000000..58ef8fe
--- /dev/null
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Json Parser Query :)
+(: parse a string with arrays :)
+let $x:="json_half_1|json_half_2"
+for $r in collection($x)
+    let $z:=$r("results")()
+    for $i in $z
+where $i("station")="GHCND:US000000001"
+return $i
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-4/q14_parser.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-4/q14_parser.xq
new file mode 100644 (file)
index 0000000..12d028d
--- /dev/null
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Json Parser Query :)
+(: parse a string with arrays :)
+let $x:="json_quarter_1|json_quarter_2|json_quarter_3|json_quarter_4"
+for $r in collection($x)
+    let $z:=$r("results")()
+    for $i in $z
+where $i("date")="2001-01-01T00:00:00.000"
+return $i
diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-4/q15_parser.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Json/Parser/Partition-4/q15_parser.xq
new file mode 100644 (file)
index 0000000..6046303
--- /dev/null
@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Json Parser Query :)
+(: parse a string with arrays :)
+let $x:="json_quarter_1|json_quarter_2|json_quarter_3|json_quarter_4"
+for $r in collection($x)
+    let $z:=$r("results")()
+    for $i in $z
+where $i("datatype")="TMIN"
+return $i
index 3fc9a72..5ecdb94 100644 (file)
@@ -52,6 +52,7 @@
 <!ENTITY JsonParserQueries SYSTEM "cat/JsonParserQueries.xml">
 <!ENTITY LibrariesInJSONiq SYSTEM "cat/LibrariesInJSONiq.xml">
 <!ENTITY XMLInJSONQueries SYSTEM "cat/XMLInJSONQueries.xml">
+<!ENTITY GroupQueries SYSTEM "cat/GroupQueries.xml">
 
 <!ENTITY TraceQuery SYSTEM "cat/TraceQuery.xml">
 
          &IndexingQueries;
         </test-group>
     </test-group>
+    <test-group name="GroupQueries" featureOwner="Christina Pavlopoulou">
+        <GroupInfo>
+            <title>Group Queries</title>
+            <description/>
+        </GroupInfo>
+        <test-group name="GroupTests" featureOwner="Christina Pavlopoulou">
+            <GroupInfo>
+                <title>Group-by Tests</title>
+                <description/>
+            </GroupInfo>
+         &GroupQueries;
+        </test-group>
+    </test-group>
     <test-group name="JsoniqQueries" featureOwner="Christina Pavlopoulou">
         <GroupInfo>
             <title>Jsoniq Queries</title>
diff --git a/vxquery-xtest/src/test/resources/cat/GroupQueries.xml b/vxquery-xtest/src/test/resources/cat/GroupQueries.xml
new file mode 100644 (file)
index 0000000..71cace0
--- /dev/null
@@ -0,0 +1,38 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<test-group xmlns="http://www.w3.org/2005/02/query-test-XQTSCatalog" name="GroupQueries" featureOwner="VXQuery">
+    <GroupInfo>
+        <title>Group by</title>
+        <description/>
+    </GroupInfo>
+    <test-case name="group" FilePath="Groups/" Creator="Christina Pavlopoulou">
+        <description>Group-by query.</description>
+        <query name="group" date="2017-04-20"/>
+        <output-file compare="Text">group.txt</output-file>
+    </test-case>
+    <test-case name="group_json" FilePath="Groups/" Creator="Christina Pavlopoulou">
+        <description>Group-by query in json data.</description>
+        <query name="group_json" date="2017-04-20"/>
+        <output-file compare="Text">group_json.txt</output-file>
+    </test-case>
+    <test-case name="group_json_count" FilePath="Groups/" Creator="Christina Pavlopoulou">
+        <description>Group-by query with aggregation function.</description>
+        <query name="group_json_count" date="2017-04-20"/>
+        <output-file compare="Text">group_json_count.txt</output-file>
+    </test-case>
+</test-group>
\ No newline at end of file
index 795ab9b..1287225 100644 (file)
       <query name="q14_parser" date="2016-07-12"/>
      <output-file compare="Text">q14_parser.txt</output-file>
    </test-case>
-   <test-case name="json-parser-q15" FilePath="Json/Parser/Partition-2" Creator="Christina Pavlopoulou">
+   <test-case name="json-parser-q14-2" FilePath="Json/Parser/Partition-2" Creator="Christina Pavlopoulou">
+      <description>Parsing a collection of json files.</description>
+      <query name="q14_parser" date="2016-07-12"/>
+     <output-file compare="Text">q14_parser.txt</output-file>
+   </test-case>
+   <test-case name="json-parser-q14-4" FilePath="Json/Parser/Partition-4" Creator="Christina Pavlopoulou">
+      <description>Parsing a collection of json files.</description>
+      <query name="q14_parser" date="2016-07-12"/>
+     <output-file compare="Text">q14_parser.txt</output-file>
+   </test-case>
+   <test-case name="json-parser-q15" FilePath="Json/Parser/Partition-1" Creator="Christina Pavlopoulou">
+      <description>Parsing a collection of json files.</description>
+      <query name="q15_parser" date="2016-07-15"/>
+     <output-file compare="Text">q15_parser.txt</output-file>
+   </test-case>
+   <test-case name="json-parser-q15-2" FilePath="Json/Parser/Partition-2" Creator="Christina Pavlopoulou">
       <description>Parsing a collection of json files.</description>
       <query name="q15_parser" date="2016-07-15"/>
      <output-file compare="Text">q15_parser.txt</output-file>
    </test-case>
-   <test-case name="json-parser-q16" FilePath="Json/Parser/Partition-4" Creator="Christina Pavlopoulou">
+   <test-case name="json-parser-q15-4" FilePath="Json/Parser/Partition-4" Creator="Christina Pavlopoulou">
+      <description>Parsing a collection of json files.</description>
+      <query name="q15_parser" date="2016-07-15"/>
+     <output-file compare="Text">q15_parser.txt</output-file>
+   </test-case>
+   <test-case name="json-parser-q16" FilePath="Json/Parser/Partition-1" Creator="Christina Pavlopoulou">
+      <description>Parsing a collection of json files.</description>
+      <query name="q16_parser" date="2016-07-15"/>
+     <output-file compare="Text">q16_parser.txt</output-file>
+   </test-case>
+   <test-case name="json-parser-q16-2" FilePath="Json/Parser/Partition-2" Creator="Christina Pavlopoulou">
+      <description>Parsing a collection of json files.</description>
+      <query name="q16_parser" date="2016-07-15"/>
+     <output-file compare="Text">q16_parser.txt</output-file>
+   </test-case>
+   <test-case name="json-parser-q16-4" FilePath="Json/Parser/Partition-4" Creator="Christina Pavlopoulou">
       <description>Parsing a collection of json files.</description>
       <query name="q16_parser" date="2016-07-15"/>
      <output-file compare="Text">q16_parser.txt</output-file>