DRILL-7450: Improve performance for ANALYZE command
authorVolodymyr Vysotskyi <vvovyk@gmail.com>
Fri, 22 Nov 2019 17:53:08 +0000 (19:53 +0200)
committerVolodymyr Vysotskyi <vvovyk@gmail.com>
Wed, 4 Dec 2019 10:41:59 +0000 (12:41 +0200)
commit20293b63c0bb559ae35d57f7cb1ab7fa24e9ee6d
tree2b33da93bf73aa437205af5f0916f45f13f0c456
parentde41559e748b7c44139cdd0a3eefef05085570a8
DRILL-7450: Improve performance for ANALYZE command

- Implement two-phase aggregation for the lowest metadata aggregate to optimize performance
- Allow using complex functions with hash aggregate
- Use hash aggregation for PHASE_1of2 for ANALYZE to reduce memory usage and avoid sorting non-aggregated data
- Add sort above hash aggregation to fix correctness of merge exchange and stream aggregate

closes #1907
56 files changed:
docs/dev/MetastoreAnalyze.md
exec/java-exec/src/main/java/org/apache/drill/exec/expr/IsPredicate.java
exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillAggFuncHolder.java
exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillComplexWriterAggFuncHolder.java
exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillFuncHolder.java
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/ColumnNamesOptions.java [new file with mode: 0644]
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeFileInfoProvider.java
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeInfoProvider.java
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeParquetInfoProvider.java
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataAggregateContext.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/base/AbstractGroupScanWithMetadata.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/config/HashToMergeExchange.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/config/MetadataHashAggPOP.java [new file with mode: 0644]
exec/java-exec/src/main/java/org/apache/drill/exec/physical/config/MetadataStreamAggPOP.java [moved from exec/java-exec/src/main/java/org/apache/drill/exec/physical/config/MetadataAggPOP.java with 72% similarity]
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/StreamingAggBatch.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/flatten/FlattenRecordBatch.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataAggregateHelper.java [moved from exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataAggBatch.java with 68% similarity]
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataHandlerBatch.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataHashAggBatch.java [new file with mode: 0644]
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataHashAggBatchCreator.java [moved from exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataAggBatchCreator.java with 80% similarity]
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataStreamAggBatch.java [new file with mode: 0644]
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataStreamAggBatchCreator.java [new file with mode: 0644]
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/project/ProjectRecordBatch.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/resultSet/model/single/BaseReaderBuilder.java
exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/RowSetFormatter.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/PlannerPhase.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java [new file with mode: 0644]
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/DrillDistributionTrait.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/HashAggPrule.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/HashPrelUtil.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/MetadataAggPrule.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/MetadataHandlerPrule.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/MetadataHashAggPrel.java [moved from exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/MetadataAggPrel.java with 78% similarity]
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/MetadataStreamAggPrel.java [new file with mode: 0644]
exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PrelUtil.java
exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MetastoreAnalyzeTableHandler.java
exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileTableMetadataProviderBuilder.java
exec/java-exec/src/main/java/org/apache/drill/exec/store/pojo/DynamicPojoRecordReader.java
exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunction.java
exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/agg/TestAggWithAnyValue.java
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/agg/TestHashAggEmitOutcome.java
exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
exec/java-exec/src/test/resources/functions/test_covariance.json
exec/java-exec/src/test/resources/functions/test_logical_aggr.json
exec/vector/src/main/codegen/templates/ComplexWriters.java
logical/src/main/java/org/apache/drill/common/expression/FunctionHolderExpression.java
logical/src/main/java/org/apache/drill/common/logical/data/MetadataAggregate.java