HIVE-18910 : Migrate to Murmur hash for shuffle and bucketing (Deepak Jaiswal, review...
authorDeepak Jaiswal <djaiswal@apache.org>
Mon, 30 Apr 2018 05:24:15 +0000 (22:24 -0700)
committerDeepak Jaiswal <djaiswal@apache.org>
Mon, 30 Apr 2018 05:24:15 +0000 (22:24 -0700)
656 files changed:
hbase-handler/src/test/results/positive/external_table_ppd.q.out
hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out
hbase-handler/src/test/results/positive/hbase_ddl.q.out
hbase-handler/src/test/results/positive/hbasestats.q.out
hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java
hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java
itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out
itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out
itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out
itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
itests/src/test/resources/testconfiguration.properties
ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keyseries/VectorKeySeriesSerializedImpl.java
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java
ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java
ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMurmurHash.java [new file with mode: 0644]
ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java
ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/TestPrivilegesV1.java
ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
ql/src/test/queries/clientpositive/bucket_many.q
ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
ql/src/test/queries/clientpositive/bucket_num_reducers.q
ql/src/test/queries/clientpositive/bucket_num_reducers2.q
ql/src/test/queries/clientpositive/bucket_num_reducers_acid2.q
ql/src/test/queries/clientpositive/insert_update_delete.q
ql/src/test/queries/clientpositive/sample10.q
ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q
ql/src/test/queries/clientpositive/tez_smb_1.q
ql/src/test/results/clientnegative/alter_file_format.q.out
ql/src/test/results/clientnegative/alter_view_as_select_with_partition.q.out
ql/src/test/results/clientnegative/unset_table_property.q.out
ql/src/test/results/clientnegative/unset_view_property.q.out
ql/src/test/results/clientpositive/acid_mapjoin.q.out
ql/src/test/results/clientpositive/acid_nullscan.q.out
ql/src/test/results/clientpositive/acid_table_stats.q.out
ql/src/test/results/clientpositive/alterColumnStats.q.out
ql/src/test/results/clientpositive/alterColumnStatsPart.q.out
ql/src/test/results/clientpositive/alter_file_format.q.out
ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out
ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out
ql/src/test/results/clientpositive/alter_partition_clusterby_sortby.q.out
ql/src/test/results/clientpositive/alter_partition_coltype.q.out
ql/src/test/results/clientpositive/alter_skewed_table.q.out
ql/src/test/results/clientpositive/alter_table_add_partition.q.out
ql/src/test/results/clientpositive/alter_table_column_stats.q.out
ql/src/test/results/clientpositive/alter_table_not_sorted.q.out
ql/src/test/results/clientpositive/alter_table_serde2.q.out
ql/src/test/results/clientpositive/alter_view_as_select.q.out
ql/src/test/results/clientpositive/alter_view_col_type.q.out
ql/src/test/results/clientpositive/analyze_table_null_partition.q.out
ql/src/test/results/clientpositive/analyze_tbl_date.q.out
ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out
ql/src/test/results/clientpositive/autoColumnStats_1.q.out
ql/src/test/results/clientpositive/autoColumnStats_10.q.out
ql/src/test/results/clientpositive/autoColumnStats_2.q.out
ql/src/test/results/clientpositive/autoColumnStats_3.q.out
ql/src/test/results/clientpositive/autoColumnStats_4.q.out
ql/src/test/results/clientpositive/autoColumnStats_5a.q.out
ql/src/test/results/clientpositive/autoColumnStats_8.q.out
ql/src/test/results/clientpositive/autoColumnStats_9.q.out
ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out
ql/src/test/results/clientpositive/basicstat_partval.q.out
ql/src/test/results/clientpositive/beeline/escape_comments.q.out
ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out
ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out
ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out
ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out
ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out
ql/src/test/results/clientpositive/binary_output_format.q.out
ql/src/test/results/clientpositive/bucket1.q.out
ql/src/test/results/clientpositive/bucket2.q.out
ql/src/test/results/clientpositive/bucket3.q.out
ql/src/test/results/clientpositive/bucket_map_join_1.q.out
ql/src/test/results/clientpositive/bucket_map_join_2.q.out
ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out
ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out
ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out
ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out
ql/src/test/results/clientpositive/bucket_num_reducers.q.out [deleted file]
ql/src/test/results/clientpositive/bucket_num_reducers2.q.out [deleted file]
ql/src/test/results/clientpositive/bucketcontext_1.q.out
ql/src/test/results/clientpositive/bucketcontext_2.q.out
ql/src/test/results/clientpositive/bucketcontext_3.q.out
ql/src/test/results/clientpositive/bucketcontext_4.q.out
ql/src/test/results/clientpositive/bucketcontext_5.q.out
ql/src/test/results/clientpositive/bucketcontext_6.q.out
ql/src/test/results/clientpositive/bucketcontext_7.q.out
ql/src/test/results/clientpositive/bucketcontext_8.q.out
ql/src/test/results/clientpositive/bucketmapjoin10.q.out
ql/src/test/results/clientpositive/bucketmapjoin11.q.out
ql/src/test/results/clientpositive/bucketmapjoin12.q.out
ql/src/test/results/clientpositive/bucketmapjoin13.q.out
ql/src/test/results/clientpositive/bucketmapjoin5.q.out
ql/src/test/results/clientpositive/bucketmapjoin8.q.out
ql/src/test/results/clientpositive/bucketmapjoin9.q.out
ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out
ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out
ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out
ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out
ql/src/test/results/clientpositive/column_names_with_leading_and_trailing_spaces.q.out
ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
ql/src/test/results/clientpositive/columnstats_infinity.q.out
ql/src/test/results/clientpositive/columnstats_partlvl.q.out
ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
ql/src/test/results/clientpositive/combine3.q.out
ql/src/test/results/clientpositive/comments.q.out
ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out
ql/src/test/results/clientpositive/create_like.q.out
ql/src/test/results/clientpositive/create_like_tbl_props.q.out
ql/src/test/results/clientpositive/create_like_view.q.out
ql/src/test/results/clientpositive/create_or_replace_view.q.out
ql/src/test/results/clientpositive/create_skewed_table1.q.out
ql/src/test/results/clientpositive/create_table_like_stats.q.out
ql/src/test/results/clientpositive/create_view.q.out
ql/src/test/results/clientpositive/create_view_defaultformats.q.out
ql/src/test/results/clientpositive/create_view_partitioned.q.out
ql/src/test/results/clientpositive/create_view_translate.q.out
ql/src/test/results/clientpositive/create_with_constraints.q.out
ql/src/test/results/clientpositive/create_with_constraints2.q.out
ql/src/test/results/clientpositive/ctas.q.out
ql/src/test/results/clientpositive/ctas_colname.q.out
ql/src/test/results/clientpositive/ctas_uses_database_location.q.out
ql/src/test/results/clientpositive/database_location.q.out
ql/src/test/results/clientpositive/decimal_serde.q.out
ql/src/test/results/clientpositive/default_file_format.q.out
ql/src/test/results/clientpositive/deleteAnalyze.q.out
ql/src/test/results/clientpositive/describe_comment_indent.q.out
ql/src/test/results/clientpositive/describe_comment_nonascii.q.out
ql/src/test/results/clientpositive/describe_syntax.q.out
ql/src/test/results/clientpositive/describe_table.q.out
ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out
ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
ql/src/test/results/clientpositive/druid_basic1.q.out
ql/src/test/results/clientpositive/druid_basic2.q.out
ql/src/test/results/clientpositive/druid_intervals.q.out
ql/src/test/results/clientpositive/druid_topn.q.out
ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out
ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out
ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out
ql/src/test/results/clientpositive/escape_comments.q.out
ql/src/test/results/clientpositive/exim_hidden_files.q.out
ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out
ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
ql/src/test/results/clientpositive/filter_aggr.q.out
ql/src/test/results/clientpositive/filter_join_breaktask.q.out
ql/src/test/results/clientpositive/filter_union.q.out
ql/src/test/results/clientpositive/fouter_join_ppr.q.out
ql/src/test/results/clientpositive/groupby_map_ppr.q.out
ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
ql/src/test/results/clientpositive/groupby_ppr.q.out
ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
ql/src/test/results/clientpositive/groupby_sort_6.q.out
ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out
ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out
ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out
ql/src/test/results/clientpositive/input23.q.out
ql/src/test/results/clientpositive/input30.q.out
ql/src/test/results/clientpositive/input31.q.out
ql/src/test/results/clientpositive/input39.q.out
ql/src/test/results/clientpositive/input42.q.out
ql/src/test/results/clientpositive/input_part1.q.out
ql/src/test/results/clientpositive/input_part2.q.out
ql/src/test/results/clientpositive/input_part7.q.out
ql/src/test/results/clientpositive/input_part9.q.out
ql/src/test/results/clientpositive/join17.q.out
ql/src/test/results/clientpositive/join26.q.out
ql/src/test/results/clientpositive/join32.q.out
ql/src/test/results/clientpositive/join33.q.out
ql/src/test/results/clientpositive/join34.q.out
ql/src/test/results/clientpositive/join35.q.out
ql/src/test/results/clientpositive/join9.q.out
ql/src/test/results/clientpositive/join_filters_overlap.q.out
ql/src/test/results/clientpositive/join_map_ppr.q.out
ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
ql/src/test/results/clientpositive/list_bucket_dml_13.q.out
ql/src/test/results/clientpositive/list_bucket_dml_14.q.out
ql/src/test/results/clientpositive/list_bucket_dml_2.q.out
ql/src/test/results/clientpositive/list_bucket_dml_3.q.out
ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out
ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out
ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out
ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out
ql/src/test/results/clientpositive/llap/autoColumnStats_1.q.out
ql/src/test/results/clientpositive/llap/autoColumnStats_10.q.out
ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_5.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out
ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out
ql/src/test/results/clientpositive/llap/bucket2.q.out
ql/src/test/results/clientpositive/llap/bucket3.q.out
ql/src/test/results/clientpositive/llap/bucket4.q.out
ql/src/test/results/clientpositive/llap/bucket5.q.out
ql/src/test/results/clientpositive/llap/bucket6.q.out
ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
ql/src/test/results/clientpositive/llap/bucket_many.q.out
ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
ql/src/test/results/clientpositive/llap/bucket_num_reducers.q.out [new file with mode: 0644]
ql/src/test/results/clientpositive/llap/bucket_num_reducers2.q.out [new file with mode: 0644]
ql/src/test/results/clientpositive/llap/bucket_num_reducers_acid2.q.out [moved from ql/src/test/results/clientpositive/bucket_num_reducers_acid2.q.out with 53% similarity]
ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out
ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out
ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out
ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
ql/src/test/results/clientpositive/llap/cbo_rp_views.q.out
ql/src/test/results/clientpositive/llap/cbo_views.q.out
ql/src/test/results/clientpositive/llap/check_constraint.q.out
ql/src/test/results/clientpositive/llap/cluster.q.out
ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out
ql/src/test/results/clientpositive/llap/column_names_with_leading_and_trailing_spaces.q.out
ql/src/test/results/clientpositive/llap/column_table_stats.q.out
ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out
ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out
ql/src/test/results/clientpositive/llap/ctas.q.out
ql/src/test/results/clientpositive/llap/cte_1.q.out
ql/src/test/results/clientpositive/llap/default_constraint.q.out
ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out
ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out
ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
ql/src/test/results/clientpositive/llap/except_distinct.q.out
ql/src/test/results/clientpositive/llap/explainuser_4.q.out
ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out
ql/src/test/results/clientpositive/llap/filter_union.q.out
ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out
ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out
ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out
ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out
ql/src/test/results/clientpositive/llap/intersect_all.q.out
ql/src/test/results/clientpositive/llap/intersect_distinct.q.out
ql/src/test/results/clientpositive/llap/join32_lessSize.q.out
ql/src/test/results/clientpositive/llap/lateral_view.q.out
ql/src/test/results/clientpositive/llap/lineage2.q.out
ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out
ql/src/test/results/clientpositive/llap/llap_nullscan.q.out
ql/src/test/results/clientpositive/llap/llap_smb.q.out
ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_2.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out
ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out
ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out
ql/src/test/results/clientpositive/llap/metadataonly1.q.out
ql/src/test/results/clientpositive/llap/mm_all.q.out
ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
ql/src/test/results/clientpositive/llap/multi_column_in.q.out
ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out
ql/src/test/results/clientpositive/llap/orc_analyze.q.out
ql/src/test/results/clientpositive/llap/orc_create.q.out
ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out
ql/src/test/results/clientpositive/llap/parquet_types.q.out
ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
ql/src/test/results/clientpositive/llap/partition_multilevels.q.out
ql/src/test/results/clientpositive/llap/partition_pruning.q.out
ql/src/test/results/clientpositive/llap/ppd_union_view.q.out
ql/src/test/results/clientpositive/llap/quotedid_smb.q.out
ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out
ql/src/test/results/clientpositive/llap/reduce_deduplicate_extended.q.out
ql/src/test/results/clientpositive/llap/results_cache_1.q.out
ql/src/test/results/clientpositive/llap/results_cache_capacity.q.out
ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
ql/src/test/results/clientpositive/llap/sample1.q.out
ql/src/test/results/clientpositive/llap/sample10.q.out
ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out
ql/src/test/results/clientpositive/llap/selectDistinctStar.q.out
ql/src/test/results/clientpositive/llap/skiphf_aggr.q.out
ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out
ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out
ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out
ql/src/test/results/clientpositive/llap/stats11.q.out
ql/src/test/results/clientpositive/llap/subquery_notin.q.out
ql/src/test/results/clientpositive/llap/sysdb.q.out
ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out
ql/src/test/results/clientpositive/llap/tez_smb_main.q.out
ql/src/test/results/clientpositive/llap/tez_union2.q.out
ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out
ql/src/test/results/clientpositive/llap/udaf_collect_set_2.q.out
ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
ql/src/test/results/clientpositive/llap/unionDistinct_3.q.out
ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
ql/src/test/results/clientpositive/llap/union_stats.q.out
ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out
ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out
ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out
ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out
ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out
ql/src/test/results/clientpositive/llap/vectorization_0.q.out
ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
ql/src/test/results/clientpositive/llap/vectorized_insert_into_bucketed_table.q.out
ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
ql/src/test/results/clientpositive/load_dyn_part8.q.out
ql/src/test/results/clientpositive/louter_join_ppr.q.out
ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
ql/src/test/results/clientpositive/masking_5.q.out
ql/src/test/results/clientpositive/merge3.q.out
ql/src/test/results/clientpositive/mm_all.q.out
ql/src/test/results/clientpositive/mm_buckets.q.out
ql/src/test/results/clientpositive/mm_default.q.out
ql/src/test/results/clientpositive/multi_insert_partitioned.q.out
ql/src/test/results/clientpositive/named_column_join.q.out
ql/src/test/results/clientpositive/nonmr_fetch.q.out
ql/src/test/results/clientpositive/nullformat.q.out
ql/src/test/results/clientpositive/nullformatCTAS.q.out
ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out
ql/src/test/results/clientpositive/outer_join_ppr.q.out
ql/src/test/results/clientpositive/outer_reference_windowed.q.out
ql/src/test/results/clientpositive/parallel_orderby.q.out
ql/src/test/results/clientpositive/parquet_analyze.q.out
ql/src/test/results/clientpositive/parquet_array_null_element.q.out
ql/src/test/results/clientpositive/parquet_create.q.out
ql/src/test/results/clientpositive/parquet_mixed_partition_formats.q.out
ql/src/test/results/clientpositive/parquet_partitioned.q.out
ql/src/test/results/clientpositive/parquet_serde.q.out
ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
ql/src/test/results/clientpositive/part_inherit_tbl_props_with_star.q.out
ql/src/test/results/clientpositive/pcr.q.out
ql/src/test/results/clientpositive/pcs.q.out
ql/src/test/results/clientpositive/pointlookup2.q.out
ql/src/test/results/clientpositive/pointlookup3.q.out
ql/src/test/results/clientpositive/pointlookup4.q.out
ql/src/test/results/clientpositive/ppd_join_filter.q.out
ql/src/test/results/clientpositive/ppd_vc.q.out
ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out
ql/src/test/results/clientpositive/push_or.q.out
ql/src/test/results/clientpositive/quotedid_stats.q.out
ql/src/test/results/clientpositive/rand_partitionpruner1.q.out
ql/src/test/results/clientpositive/rand_partitionpruner2.q.out
ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
ql/src/test/results/clientpositive/rcfile_default_format.q.out
ql/src/test/results/clientpositive/regexp_extract.q.out
ql/src/test/results/clientpositive/rename_external_partition_location.q.out
ql/src/test/results/clientpositive/repl_2_exim_basic.q.out
ql/src/test/results/clientpositive/repl_3_exim_metadata.q.out
ql/src/test/results/clientpositive/router_join_ppr.q.out
ql/src/test/results/clientpositive/row__id.q.out
ql/src/test/results/clientpositive/sample1.q.out
ql/src/test/results/clientpositive/sample2.q.out
ql/src/test/results/clientpositive/sample3.q.out
ql/src/test/results/clientpositive/sample4.q.out
ql/src/test/results/clientpositive/sample5.q.out
ql/src/test/results/clientpositive/sample6.q.out
ql/src/test/results/clientpositive/sample7.q.out
ql/src/test/results/clientpositive/sample8.q.out
ql/src/test/results/clientpositive/sample9.q.out
ql/src/test/results/clientpositive/sample_islocalmode_hook_use_metadata.q.out
ql/src/test/results/clientpositive/serde_user_properties.q.out
ql/src/test/results/clientpositive/show_create_table_alter.q.out
ql/src/test/results/clientpositive/show_create_table_db_table.q.out
ql/src/test/results/clientpositive/show_create_table_delimited.q.out
ql/src/test/results/clientpositive/show_create_table_partitioned.q.out
ql/src/test/results/clientpositive/show_create_table_serde.q.out
ql/src/test/results/clientpositive/show_create_table_temp_table.q.out
ql/src/test/results/clientpositive/show_functions.q.out
ql/src/test/results/clientpositive/show_tblproperties.q.out
ql/src/test/results/clientpositive/smb_mapjoin_1.q.out
ql/src/test/results/clientpositive/smb_mapjoin_11.q.out
ql/src/test/results/clientpositive/smb_mapjoin_12.q.out
ql/src/test/results/clientpositive/smb_mapjoin_13.q.out
ql/src/test/results/clientpositive/smb_mapjoin_20.q.out
ql/src/test/results/clientpositive/smb_mapjoin_22.q.out
ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
ql/src/test/results/clientpositive/smb_mapjoin_47.q.out
ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out
ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out
ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out
ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out
ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
ql/src/test/results/clientpositive/spark/bucket2.q.out
ql/src/test/results/clientpositive/spark/bucket3.q.out
ql/src/test/results/clientpositive/spark/bucket4.q.out
ql/src/test/results/clientpositive/spark/bucket4.q.out_spark
ql/src/test/results/clientpositive/spark/bucket5.q.out
ql/src/test/results/clientpositive/spark/bucket6.q.out
ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out
ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out
ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out
ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out
ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out
ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out
ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out_spark
ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out
ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out
ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out
ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out
ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out
ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out
ql/src/test/results/clientpositive/spark/cbo_semijoin.q.out
ql/src/test/results/clientpositive/spark/ctas.q.out
ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out
ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out_spark
ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out
ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out
ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out
ql/src/test/results/clientpositive/spark/input_part2.q.out
ql/src/test/results/clientpositive/spark/join17.q.out
ql/src/test/results/clientpositive/spark/join26.q.out
ql/src/test/results/clientpositive/spark/join32.q.out
ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
ql/src/test/results/clientpositive/spark/join33.q.out
ql/src/test/results/clientpositive/spark/join34.q.out
ql/src/test/results/clientpositive/spark/join35.q.out
ql/src/test/results/clientpositive/spark/join9.q.out
ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out
ql/src/test/results/clientpositive/spark/join_map_ppr.q.out
ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out
ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out
ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out
ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out
ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
ql/src/test/results/clientpositive/spark/mapreduce1.q.out
ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out
ql/src/test/results/clientpositive/spark/parallel_orderby.q.out
ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
ql/src/test/results/clientpositive/spark/pcr.q.out
ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out
ql/src/test/results/clientpositive/spark/quotedid_smb.q.out
ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out
ql/src/test/results/clientpositive/spark/router_join_ppr.q.out
ql/src/test/results/clientpositive/spark/sample1.q.out
ql/src/test/results/clientpositive/spark/sample10.q.out
ql/src/test/results/clientpositive/spark/sample2.q.out
ql/src/test/results/clientpositive/spark/sample3.q.out
ql/src/test/results/clientpositive/spark/sample4.q.out
ql/src/test/results/clientpositive/spark/sample5.q.out
ql/src/test/results/clientpositive/spark/sample6.q.out
ql/src/test/results/clientpositive/spark/sample7.q.out
ql/src/test/results/clientpositive/spark/sample8.q.out
ql/src/test/results/clientpositive/spark/sample9.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out
ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out
ql/src/test/results/clientpositive/spark/spark_union_merge.q.out
ql/src/test/results/clientpositive/spark/stats0.q.out
ql/src/test/results/clientpositive/spark/stats1.q.out
ql/src/test/results/clientpositive/spark/stats10.q.out
ql/src/test/results/clientpositive/spark/stats16.q.out
ql/src/test/results/clientpositive/spark/stats3.q.out
ql/src/test/results/clientpositive/spark/stats5.q.out
ql/src/test/results/clientpositive/spark/stats_noscan_2.q.out
ql/src/test/results/clientpositive/spark/statsfs.q.out
ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out
ql/src/test/results/clientpositive/spark/subquery_notin.q.out
ql/src/test/results/clientpositive/spark/subquery_select.q.out
ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out
ql/src/test/results/clientpositive/spark/transform_ppr1.q.out
ql/src/test/results/clientpositive/spark/transform_ppr2.q.out
ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out
ql/src/test/results/clientpositive/spark/union22.q.out
ql/src/test/results/clientpositive/spark/union24.q.out
ql/src/test/results/clientpositive/spark/union_ppr.q.out
ql/src/test/results/clientpositive/spark/union_remove_1.q.out
ql/src/test/results/clientpositive/spark/union_remove_10.q.out
ql/src/test/results/clientpositive/spark/union_remove_11.q.out
ql/src/test/results/clientpositive/spark/union_remove_12.q.out
ql/src/test/results/clientpositive/spark/union_remove_13.q.out
ql/src/test/results/clientpositive/spark/union_remove_14.q.out
ql/src/test/results/clientpositive/spark/union_remove_15.q.out
ql/src/test/results/clientpositive/spark/union_remove_16.q.out
ql/src/test/results/clientpositive/spark/union_remove_17.q.out
ql/src/test/results/clientpositive/spark/union_remove_18.q.out
ql/src/test/results/clientpositive/spark/union_remove_19.q.out
ql/src/test/results/clientpositive/spark/union_remove_2.q.out
ql/src/test/results/clientpositive/spark/union_remove_20.q.out
ql/src/test/results/clientpositive/spark/union_remove_21.q.out
ql/src/test/results/clientpositive/spark/union_remove_22.q.out
ql/src/test/results/clientpositive/spark/union_remove_23.q.out
ql/src/test/results/clientpositive/spark/union_remove_24.q.out
ql/src/test/results/clientpositive/spark/union_remove_3.q.out
ql/src/test/results/clientpositive/spark/union_remove_4.q.out
ql/src/test/results/clientpositive/spark/union_remove_5.q.out
ql/src/test/results/clientpositive/spark/union_remove_7.q.out
ql/src/test/results/clientpositive/spark/union_remove_8.q.out
ql/src/test/results/clientpositive/spark/union_remove_9.q.out
ql/src/test/results/clientpositive/spark/vectorization_0.q.out
ql/src/test/results/clientpositive/stats0.q.out
ql/src/test/results/clientpositive/stats1.q.out
ql/src/test/results/clientpositive/stats10.q.out
ql/src/test/results/clientpositive/stats16.q.out
ql/src/test/results/clientpositive/stats3.q.out
ql/src/test/results/clientpositive/stats5.q.out
ql/src/test/results/clientpositive/stats_empty_partition2.q.out
ql/src/test/results/clientpositive/stats_invalidation.q.out
ql/src/test/results/clientpositive/stats_list_bucket.q.out
ql/src/test/results/clientpositive/stats_noscan_2.q.out
ql/src/test/results/clientpositive/statsfs.q.out
ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
ql/src/test/results/clientpositive/transform_ppr1.q.out
ql/src/test/results/clientpositive/transform_ppr2.q.out
ql/src/test/results/clientpositive/truncate_column.q.out
ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out
ql/src/test/results/clientpositive/udf_explode.q.out
ql/src/test/results/clientpositive/udtf_explode.q.out
ql/src/test/results/clientpositive/unicode_comments.q.out
ql/src/test/results/clientpositive/unicode_notation.q.out
ql/src/test/results/clientpositive/union22.q.out
ql/src/test/results/clientpositive/union24.q.out
ql/src/test/results/clientpositive/union_pos_alias.q.out
ql/src/test/results/clientpositive/union_ppr.q.out
ql/src/test/results/clientpositive/union_remove_1.q.out
ql/src/test/results/clientpositive/union_remove_10.q.out
ql/src/test/results/clientpositive/union_remove_11.q.out
ql/src/test/results/clientpositive/union_remove_12.q.out
ql/src/test/results/clientpositive/union_remove_13.q.out
ql/src/test/results/clientpositive/union_remove_14.q.out
ql/src/test/results/clientpositive/union_remove_15.q.out
ql/src/test/results/clientpositive/union_remove_16.q.out
ql/src/test/results/clientpositive/union_remove_17.q.out
ql/src/test/results/clientpositive/union_remove_18.q.out
ql/src/test/results/clientpositive/union_remove_19.q.out
ql/src/test/results/clientpositive/union_remove_2.q.out
ql/src/test/results/clientpositive/union_remove_20.q.out
ql/src/test/results/clientpositive/union_remove_21.q.out
ql/src/test/results/clientpositive/union_remove_22.q.out
ql/src/test/results/clientpositive/union_remove_23.q.out
ql/src/test/results/clientpositive/union_remove_24.q.out
ql/src/test/results/clientpositive/union_remove_3.q.out
ql/src/test/results/clientpositive/union_remove_4.q.out
ql/src/test/results/clientpositive/union_remove_5.q.out
ql/src/test/results/clientpositive/union_remove_7.q.out
ql/src/test/results/clientpositive/union_remove_8.q.out
ql/src/test/results/clientpositive/union_remove_9.q.out
ql/src/test/results/clientpositive/union_stats.q.out
ql/src/test/results/clientpositive/unset_table_view_property.q.out
ql/src/test/results/clientpositive/view_alias.q.out
ql/src/test/results/clientpositive/view_cbo.q.out
serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
storage-api/src/java/org/apache/hive/common/util/Murmur3.java
storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
streaming/src/test/org/apache/hive/streaming/TestStreaming.java

index 6acdb3f..aed43cf 100644 (file)
@@ -56,6 +56,7 @@ Retention:            0
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}}
+       bucketing_version       2                   
        hbase.table.default.storage.type        binary              
        hbase.table.name        t_hive              
        numFiles                0                   
index 153613e..172db75 100644 (file)
@@ -56,6 +56,7 @@ Retention:            0
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}}
+       bucketing_version       2                   
        hbase.table.default.storage.type        binary              
        hbase.table.name        t_hive              
        numFiles                0                   
@@ -236,6 +237,7 @@ Table Type:                 EXTERNAL_TABLE
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}}
        EXTERNAL                TRUE                
+       bucketing_version       2                   
        hbase.table.name        t_hive              
        numFiles                0                   
        numRows                 0                   
index db40f84..bf7da98 100644 (file)
@@ -117,6 +117,7 @@ Retention:                  0
 #### A masked pattern was here ####
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
+       bucketing_version       2                   
        hbase.mapred.output.outputtable kkk                 
        hbase.table.name        hbase_table_0       
 #### A masked pattern was here ####
@@ -163,6 +164,7 @@ Retention:                  0
 #### A masked pattern was here ####
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
+       bucketing_version       2                   
        hbase.table.name        hbase_table_0       
 #### A masked pattern was here ####
        numFiles                0                   
index f2285d4..92cf8cc 100644 (file)
@@ -38,6 +38,7 @@ Retention:            0
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}}
+       bucketing_version       2                   
        numFiles                0                   
        numRows                 0                   
        rawDataSize             0                   
@@ -130,6 +131,7 @@ Retention:                  0
 #### A masked pattern was here ####
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
+       bucketing_version       2                   
 #### A masked pattern was here ####
        numFiles                0                   
        numRows                 0                   
@@ -194,6 +196,7 @@ Retention:                  0
 #### A masked pattern was here ####
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
+       bucketing_version       2                   
 #### A masked pattern was here ####
        numFiles                0                   
        numRows                 0                   
@@ -250,6 +253,7 @@ Retention:                  0
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       bucketing_version       2                   
 #### A masked pattern was here ####
        numFiles                0                   
        numRows                 2                   
@@ -319,6 +323,7 @@ Retention:                  0
 #### A masked pattern was here ####
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
+       bucketing_version       2                   
 #### A masked pattern was here ####
        numFiles                0                   
        numRows                 2                   
index 924e233..2a68220 100644 (file)
@@ -19,6 +19,7 @@
 package org.apache.hive.hcatalog.streaming;
 
 
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -27,6 +28,7 @@ import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -185,7 +187,12 @@ public abstract class AbstractRecordWriter implements RecordWriter {
     }
     ObjectInspector[] inspectors = getBucketObjectInspectors();
     Object[] bucketFields = getBucketFields(row);
-    return ObjectInspectorUtils.getBucketNumber(bucketFields, inspectors, totalBuckets);
+    int bucketingVersion = Utilities.getBucketingVersion(
+        tbl.getParameters().get(hive_metastoreConstants.TABLE_BUCKETING_VERSION));
+
+    return bucketingVersion == 2 ?
+        ObjectInspectorUtils.getBucketNumber(bucketFields, inspectors, totalBuckets) :
+        ObjectInspectorUtils.getBucketNumberOld(bucketFields, inspectors, totalBuckets);
   }
 
   @Override
index fe2b1c1..90dbdac 100644 (file)
@@ -1491,9 +1491,9 @@ public class TestStreaming {
 
     // assert bucket listing is as expected
     Assert.assertEquals("number of buckets does not match expectation", actual1.values().size(), 3);
-    Assert.assertEquals("records in bucket does not match expectation", actual1.get(0).size(), 2);
+    Assert.assertTrue("bucket 0 shouldn't have been created", actual1.get(0) == null);
     Assert.assertEquals("records in bucket does not match expectation", actual1.get(1).size(), 1);
-    Assert.assertTrue("bucket 2 shouldn't have been created", actual1.get(2) == null);
+    Assert.assertEquals("records in bucket does not match expectation", actual1.get(2).size(), 2);
     Assert.assertEquals("records in bucket does not match expectation", actual1.get(3).size(), 1);
   }
   private void runCmdOnDriver(String cmd) throws QueryFailedException {
@@ -1624,7 +1624,7 @@ public class TestStreaming {
       } else if (file.contains("bucket_00001")) {
         corruptDataFile(file, conf, -1);
       } else if (file.contains("bucket_00002")) {
-        Assert.assertFalse("bucket 2 shouldn't have been created", true);
+        corruptDataFile(file, conf, 100);
       } else if (file.contains("bucket_00003")) {
         corruptDataFile(file, conf, 100);
       }
@@ -1654,9 +1654,9 @@ public class TestStreaming {
     System.setErr(origErr);
 
     errDump = new String(myErr.toByteArray());
-    Assert.assertEquals(true, errDump.contains("bucket_00000 recovered successfully!"));
-    Assert.assertEquals(true, errDump.contains("No readable footers found. Creating empty orc file."));
     Assert.assertEquals(true, errDump.contains("bucket_00001 recovered successfully!"));
+    Assert.assertEquals(true, errDump.contains("No readable footers found. Creating empty orc file."));
+    Assert.assertEquals(true, errDump.contains("bucket_00002 recovered successfully!"));
     Assert.assertEquals(true, errDump.contains("bucket_00003 recovered successfully!"));
     Assert.assertEquals(false, errDump.contains("Exception"));
     Assert.assertEquals(false, errDump.contains("is still open for writes."));
index 03c28a3..e890c52 100644 (file)
@@ -42,7 +42,7 @@ public class TestBucketIdResolverImpl {
   public void testAttachBucketIdToRecord() {
     MutableRecord record = new MutableRecord(1, "hello");
     capturingBucketIdResolver.attachBucketIdToRecord(record);
-    assertThat(record.rowId, is(new RecordIdentifier(-1L, 
+    assertThat(record.rowId, is(new RecordIdentifier(-1L,
       BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(1)),
       -1L)));
     assertThat(record.id, is(1));
index 9963291..ed2aef4 100644 (file)
@@ -176,6 +176,10 @@ public class HCatTable {
       newTable.putToParameters("comment", comment);
     }
 
+    if (newTable.getParameters().get("bucketing_version") == null) {
+      newTable.putToParameters("bucketing_version", "2");
+    }
+
     newTable.setSd(sd);
     if (partCols != null) {
       ArrayList<FieldSchema> hivePtnCols = new ArrayList<FieldSchema>();
index f9ee9d9..da08d2f 100644 (file)
@@ -297,6 +297,8 @@ public class TestHCatClient {
     assertNotNull(inner);
     assertNotNull(outer);
     for ( Map.Entry<String,String> e : inner.entrySet()){
+      // If it is bucketing version, skip it
+      if (e.getKey().equals("bucketing_version")) continue;
       assertTrue(outer.containsKey(e.getKey()));
       assertEquals(outer.get(e.getKey()), e.getValue());
     }
index caa0029..a0ebed7 100644 (file)
@@ -122,6 +122,7 @@ STAGE PLANS:
             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns 
               columns.comments 
@@ -137,6 +138,7 @@ STAGE PLANS:
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns 
                 columns.comments 
@@ -171,6 +173,7 @@ STAGE PLANS:
                 properties:
                   bucket_count 2
                   bucket_field_name id
+                  bucketing_version 2
                   column.name.delimiter ,
                   columns id
                   columns.comments 
@@ -203,6 +206,7 @@ STAGE PLANS:
               properties:
                 bucket_count 2
                 bucket_field_name id
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
index ab8ad77..f6b6fee 100644 (file)
@@ -87,6 +87,7 @@ STAGE PLANS:
                         properties:
                           COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                           bucket_count -1
+                          bucketing_version 2
                           column.name.delimiter ,
                           columns id
                           columns.comments 
@@ -133,6 +134,7 @@ STAGE PLANS:
             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns 
               columns.comments 
@@ -148,6 +150,7 @@ STAGE PLANS:
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns 
                 columns.comments 
@@ -206,6 +209,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
@@ -250,6 +254,7 @@ STAGE PLANS:
                   properties:
                     COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                     bucket_count -1
+                    bucketing_version 2
                     column.name.delimiter ,
                     columns id
                     columns.comments 
@@ -281,6 +286,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns id
               columns.comments 
@@ -303,6 +309,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
@@ -335,6 +342,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
@@ -369,6 +377,7 @@ STAGE PLANS:
                   properties:
                     COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                     bucket_count -1
+                    bucketing_version 2
                     column.name.delimiter ,
                     columns id
                     columns.comments 
@@ -400,6 +409,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns id
               columns.comments 
@@ -422,6 +432,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
index 18fe4cd..8581a17 100644 (file)
@@ -114,6 +114,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns id,key
               columns.comments 
@@ -136,6 +137,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id,key
                 columns.comments 
@@ -366,6 +368,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns id,key
               columns.comments 
@@ -388,6 +391,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id,key
                 columns.comments 
index cdb67dd..6dba301 100644 (file)
@@ -140,6 +140,7 @@ STAGE PLANS:
             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns 
               columns.comments 
@@ -155,6 +156,7 @@ STAGE PLANS:
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns 
                 columns.comments 
@@ -189,6 +191,7 @@ STAGE PLANS:
                 properties:
                   bucket_count 2
                   bucket_field_name id
+                  bucketing_version 2
                   column.name.delimiter ,
                   columns id
                   columns.comments 
@@ -221,6 +224,7 @@ STAGE PLANS:
               properties:
                 bucket_count 2
                 bucket_field_name id
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
index 2c23a7e..fdfe44c 100644 (file)
@@ -95,6 +95,7 @@ STAGE PLANS:
                         properties:
                           COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                           bucket_count -1
+                          bucketing_version 2
                           column.name.delimiter ,
                           columns id
                           columns.comments 
@@ -141,6 +142,7 @@ STAGE PLANS:
             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns 
               columns.comments 
@@ -156,6 +158,7 @@ STAGE PLANS:
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns 
                 columns.comments 
@@ -214,6 +217,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
@@ -258,6 +262,7 @@ STAGE PLANS:
                   properties:
                     COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                     bucket_count -1
+                    bucketing_version 2
                     column.name.delimiter ,
                     columns id
                     columns.comments 
@@ -289,6 +294,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns id
               columns.comments 
@@ -311,6 +317,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
@@ -343,6 +350,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
@@ -377,6 +385,7 @@ STAGE PLANS:
                   properties:
                     COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                     bucket_count -1
+                    bucketing_version 2
                     column.name.delimiter ,
                     columns id
                     columns.comments 
@@ -408,6 +417,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns id
               columns.comments 
@@ -430,6 +440,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns id
                 columns.comments 
index 4016d25..73fe3f9 100644 (file)
@@ -71,6 +71,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns key
               columns.comments 
@@ -92,6 +93,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns key
                 columns.comments 
@@ -196,6 +198,7 @@ STAGE PLANS:
                 properties:
                   COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
                   bucket_count -1
+                  bucketing_version 2
                   column.name.delimiter ,
                   columns key
                   columns.comments 
@@ -263,6 +266,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns key
                 columns.comments 
@@ -338,6 +342,7 @@ STAGE PLANS:
             properties:
               COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
               bucket_count -1
+              bucketing_version 2
               column.name.delimiter ,
               columns key
               columns.comments 
@@ -359,6 +364,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns key
                 columns.comments 
@@ -463,6 +469,7 @@ STAGE PLANS:
                 properties:
                   COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
                   bucket_count -1
+                  bucketing_version 2
                   column.name.delimiter ,
                   columns key
                   columns.comments 
@@ -530,6 +537,7 @@ STAGE PLANS:
               properties:
                 COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true"}}
                 bucket_count -1
+                bucketing_version 2
                 column.name.delimiter ,
                 columns key
                 columns.comments 
index 4ebd096..8ee033d 100644 (file)
@@ -1107,7 +1107,7 @@ public class TestCompactor {
       }
       String name = stat[0].getPath().getName();
       Assert.assertEquals(name, "base_0000004");
-      checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L, 2);
+      checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 1, 1L, 4L, 2);
     } finally {
       connection.close();
     }
index 1a34659..2ca7b5f 100644 (file)
@@ -8,14 +8,11 @@ minimr.query.files=infer_bucket_sort_map_operators.q,\
   infer_bucket_sort_num_buckets.q,\
   root_dir_external_table.q,\
   parallel_orderby.q,\
-  bucket_num_reducers.q,\
   udf_using.q,\
   index_bitmap3.q,\
   index_bitmap_auto.q,\
   scriptfile1.q,\
-  bucket_num_reducers2.q,\
   bucket_num_reducers_acid.q,\
-  bucket_num_reducers_acid2.q,\
   scriptfile1_win.q
 
 # These tests are disabled for minimr
@@ -464,6 +461,9 @@ minillaplocal.query.files=\
   bucket_map_join_tez1.q,\
   bucket_map_join_tez2.q,\
   bucket_map_join_tez_empty.q,\
+  bucket_num_reducers.q,\
+  bucket_num_reducers2.q,\
+  bucket_num_reducers_acid2.q,\
   bucketizedhiveinputformat.q,\
   bucketmapjoin6.q,\
   bucketmapjoin7.q,\
index c084fa0..962fc5d 100644 (file)
@@ -66,10 +66,7 @@ import org.apache.hadoop.hive.ql.plan.SkewedColumnPositionPair;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeStats;
-import org.apache.hadoop.hive.serde2.Serializer;
+import org.apache.hadoop.hive.serde2.*;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
@@ -86,6 +83,7 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hive.common.util.HiveStringUtils;
+import org.apache.hive.common.util.Murmur3;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -99,6 +97,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
+import java.util.function.BiFunction;
 
 import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_TEMPORARY_TABLE_STORAGE;
 
@@ -144,6 +143,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
   private transient Path destTablePath;
   private transient boolean isInsertOverwrite;
   private transient String counterGroup;
+  private transient BiFunction<Object[], ObjectInspector[], Integer> hashFunc;
   /**
    * Counters.
    */
@@ -587,6 +587,11 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
       logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
 
       statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
+
+      // Setup hashcode
+      hashFunc = conf.getTableInfo().getBucketingVersion() == 2 ?
+          ObjectInspectorUtils::getBucketHashCode :
+          ObjectInspectorUtils::getBucketHashCodeOld;
     } catch (HiveException e) {
       throw e;
     } catch (Exception e) {
@@ -1050,7 +1055,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
       for(int i = 0; i < partitionEval.length; i++) {
         bucketFieldValues[i] = partitionEval[i].evaluate(row);
       }
-      int keyHashCode = ObjectInspectorUtils.getBucketHashCode(bucketFieldValues, partitionObjectInspectors);
+      int keyHashCode = hashFunc.apply(bucketFieldValues, partitionObjectInspectors);
       key.setHashCode(keyHashCode);
       int bucketNum = prtner.getBucket(key, null, totalFiles);
       return bucketMap.get(bucketNum);
@@ -1578,4 +1583,5 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
   private boolean isNativeTable() {
     return !conf.getTableInfo().isNonNative();
   }
+
 }
index d59bf1f..bb91eea 100644 (file)
@@ -478,6 +478,7 @@ public final class FunctionRegistry {
     system.registerGenericUDF("when", GenericUDFWhen.class);
     system.registerGenericUDF("nullif", GenericUDFNullif.class);
     system.registerGenericUDF("hash", GenericUDFHash.class);
+    system.registerGenericUDF("murmur_hash", GenericUDFMurmurHash.class);
     system.registerGenericUDF("coalesce", GenericUDFCoalesce.class);
     system.registerGenericUDF("index", GenericUDFIndex.class);
     system.registerGenericUDF("in_file", GenericUDFInFile.class);
index c28ef99..108bb57 100644 (file)
@@ -90,6 +90,7 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
   private transient Configuration hconf;
   protected final transient Collection<Future<?>> asyncInitOperations = new HashSet<>();
 
+  protected int bucketingVersion = -1;
   // It can be optimized later so that an operator operator (init/close) is performed
   // only after that operation has been performed on all the parents. This will require
   // initializing the whole tree in all the mappers (which might be required for mappers
@@ -1600,4 +1601,12 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
     }
     return true;
   }
+
+  public void setBucketingVersion(int bucketingVersion) {
+    this.bucketingVersion = bucketingVersion;
+  }
+
+  public int getBucketingVersion() {
+    return bucketingVersion;
+  }
 }
index 21ca04d..7bb6590 100644 (file)
@@ -265,6 +265,9 @@ public final class OperatorFactory {
     Operator<T> ret = get(oplist0.getCompilationOpContext(), (Class<T>) conf.getClass());
     ret.setConf(conf);
 
+    // Set the bucketing Version
+    ret.setBucketingVersion(oplist0.getBucketingVersion());
+
     // Add the new operator as child of each of the passed in operators
     List<Operator> children = oplist0.getChildOperators();
     children.add(ret);
index d4363fd..caaf543 100644 (file)
@@ -27,10 +27,12 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
+import java.util.function.BiFunction;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.HiveKey;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -39,20 +41,19 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.serde2.ByteStream;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.Serializer;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.*;
 import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.util.hash.MurmurHash;
+import org.apache.hive.common.util.Murmur3;
 
 /**
  * Reduce Sink Operator sends output to the reduce stage.
@@ -61,7 +62,6 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
     implements Serializable, TopNHash.BinaryCollector {
 
   private static final long serialVersionUID = 1L;
-  private static final MurmurHash hash = (MurmurHash) MurmurHash.getInstance();
 
   private transient ObjectInspector[] partitionObjectInspectors;
   private transient ObjectInspector[] bucketObjectInspectors;
@@ -114,11 +114,13 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
   protected transient List<List<Integer>> distinctColIndices;
   protected transient Random random;
 
+  protected transient BiFunction<Object[], ObjectInspector[], Integer> hashFunc;
+
   /**
    * This two dimensional array holds key data and a corresponding Union object
    * which contains the tag identifying the aggregate expression for distinct columns.
    *
-   * If there is no distict expression, cachedKeys is simply like this.
+   * If there is no distinct expression, cachedKeys is simply like this.
    * cachedKeys[0] = [col0][col1]
    *
    * with two distict expression, union(tag:key) is attatched for each distinct expression
@@ -228,6 +230,14 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
       useUniformHash = conf.getReducerTraits().contains(UNIFORM);
 
       firstRow = true;
+      // acidOp flag has to be checked to use JAVA hash which works like
+      // identity function for integers, necessary to read RecordIdentifier
+      // incase of ACID updates/deletes.
+      boolean acidOp = conf.getWriteType() == AcidUtils.Operation.UPDATE ||
+          conf.getWriteType() == AcidUtils.Operation.DELETE;
+      hashFunc = bucketingVersion == 2 && !acidOp ?
+          ObjectInspectorUtils::getBucketHashCode :
+          ObjectInspectorUtils::getBucketHashCodeOld;
     } catch (Exception e) {
       String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
       LOG.error(msg, e);
@@ -308,7 +318,7 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
       }
 
       // Determine distKeyLength (w/o distincts), and then add the first if present.
-      populateCachedDistributionKeys(row, 0);
+      populateCachedDistributionKeys(row);
 
       // replace bucketing columns with hashcode % numBuckets
       int bucketNumber = -1;
@@ -335,7 +345,6 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
       } else {
         hashCode = computeHashCode(row, bucketNumber);
       }
-
       firstKey.setHashCode(hashCode);
 
       /*
@@ -379,20 +388,22 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
     }
   }
 
-  private int computeBucketNumber(Object row, int numBuckets) throws HiveException {
+  private int computeBucketNumber(Object row, int numBuckets)
+          throws HiveException, SerDeException {
     Object[] bucketFieldValues = new Object[bucketEval.length];
     for (int i = 0; i < bucketEval.length; i++) {
       bucketFieldValues[i] = bucketEval[i].evaluate(row);
     }
-    return ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketObjectInspectors, numBuckets);
+    return ObjectInspectorUtils.getBucketNumber(
+        hashFunc.apply(bucketFieldValues, bucketObjectInspectors), numBuckets);
   }
 
-  private void populateCachedDistributionKeys(Object row, int index) throws HiveException {
+  private void populateCachedDistributionKeys(Object row) throws HiveException {
     for (int i = 0; i < numDistributionKeys; i++) {
-      cachedKeys[index][i] = keyEval[i].evaluate(row);
+      cachedKeys[0][i] = keyEval[i].evaluate(row);
     }
     if (cachedKeys[0].length > numDistributionKeys) {
-      cachedKeys[index][numDistributionKeys] = null;
+      cachedKeys[0][numDistributionKeys] = null;
     }
   }
 
@@ -414,7 +425,7 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
   }
 
   protected final int computeMurmurHash(HiveKey firstKey) {
-    return hash.hash(firstKey.getBytes(), firstKey.getDistKeyLength(), 0);
+    return Murmur3.hash32(firstKey.getBytes(), firstKey.getDistKeyLength(), 0);
   }
 
   /**
@@ -439,7 +450,7 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
       for(int i = 0; i < partitionEval.length; i++) {
         bucketFieldValues[i] = partitionEval[i].evaluate(row);
       }
-      keyHashCode = ObjectInspectorUtils.getBucketHashCode(bucketFieldValues, partitionObjectInspectors);
+      keyHashCode = hashFunc.apply(bucketFieldValues, partitionObjectInspectors);
     }
     int hashCode = buckNum < 0 ? keyHashCode : keyHashCode * 31 + buckNum;
     if (LOG.isTraceEnabled()) {
@@ -586,4 +597,5 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
   public void setOutputCollector(OutputCollector _out) {
     this.out = _out;
   }
+
 }
index 2503543..b5a7853 100644 (file)
@@ -4475,4 +4475,17 @@ public final class Utilities {
           + " on HDFS should be writable. Current permissions are: " + currentHDFSDirPermission);
     }
   }
+
+  // Get the bucketing version stored in the string format
+  public static int getBucketingVersion(final String versionStr) {
+    int bucketingVersion = 1;
+    if (versionStr != null) {
+      try {
+        bucketingVersion = Integer.parseInt(versionStr);
+      } catch (NumberFormatException e) {
+        // Do nothing
+      }
+    }
+    return bucketingVersion;
+  }
 }
index a42c299..7a95716 100644 (file)
@@ -142,7 +142,8 @@ public class LoadPartitions {
       if (table == null) {
         //new table
 
-        table = new Table(tableDesc.getDatabaseName(), tableDesc.getTableName());
+        table = new Table(tableDesc.getDatabaseName(),
+            tableDesc.getTableName());
         if (isPartitioned(tableDesc)) {
           updateReplicationState(initialReplicationState());
           if (!forNewTable().hasReplicationState()) {
index 86f466f..77c9ecc 100644 (file)
@@ -21,9 +21,9 @@ package org.apache.hadoop.hive.ql.exec.vector.keyseries;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.ByteStream.Output;
 import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
-import org.apache.hive.common.util.HashCodeUtil;
 
 import com.google.common.base.Preconditions;
+import org.apache.hive.common.util.Murmur3;
 
 /**
  * Implementation of base serialization interface.
@@ -103,7 +103,7 @@ public abstract class VectorKeySeriesSerializedImpl<T extends SerializeWrite>
     byte[] bytes = output.getData();
     for (int i = 0; i < nonNullKeyCount; i++) {
       keyLength = serializedKeyLengths[i];
-      hashCodes[i] = HashCodeUtil.murmurHash(bytes, offset, keyLength);
+      hashCodes[i] = Murmur3.hash32(bytes, offset, keyLength, 0);
       offset += keyLength;
     }
   }
index 1bc3fda..42b7784 100644 (file)
@@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorDesc;
+import org.apache.hadoop.hive.serde2.ByteStream;
 import org.apache.hadoop.hive.serde2.ByteStream.Output;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -40,6 +41,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 import com.google.common.base.Preconditions;
+import org.apache.hive.common.util.Murmur3;
 
 /**
  * This class is the object hash (not Uniform Hash) operator class for native vectorized reduce sink.
@@ -226,61 +228,110 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
       int[] selected = batch.selected;
 
       final int size = batch.size;
-      for (int logical = 0; logical < size; logical++) {
-        final int batchIndex = (selectedInUse ? selected[logical] : logical);
-
-        final int hashCode;
-        if (isEmptyBuckets) {
-          if (isEmptyPartitions) {
-            hashCode = nonPartitionRandom.nextInt();
-          } else {
+
+      // EmptyBuckets = true
+      if (isEmptyBuckets) {
+        if (isEmptyPartitions) {
+          for (int logical = 0; logical< size; logical++) {
+            final int batchIndex = (selectedInUse ? selected[logical] : logical);
+            final int hashCode = nonPartitionRandom.nextInt();
+            postProcess(batch, batchIndex, tag, hashCode);
+          }
+        } else { // isEmptyPartition = false
+          for (int logical = 0; logical< size; logical++) {
+            final int batchIndex = (selectedInUse ? selected[logical] : logical);
             partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
-            hashCode =
+            final int hashCode = bucketingVersion == 2 && !vectorDesc.getIsAcidChange() ?
                 ObjectInspectorUtils.getBucketHashCode(
+                    partitionFieldValues, partitionObjectInspectors) :
+                ObjectInspectorUtils.getBucketHashCodeOld(
                     partitionFieldValues, partitionObjectInspectors);
+            postProcess(batch, batchIndex, tag, hashCode);
           }
-        } else {
-          bucketVectorExtractRow.extractRow(batch, batchIndex, bucketFieldValues);
-          final int bucketNum =
-              ObjectInspectorUtils.getBucketNumber(
+        }
+      } else { // EmptyBuckets = false
+        if (isEmptyPartitions) {
+          for (int logical = 0; logical< size; logical++) {
+            final int batchIndex = (selectedInUse ? selected[logical] : logical);
+            bucketVectorExtractRow.extractRow(batch, batchIndex, bucketFieldValues);
+            final int bucketNum = bucketingVersion == 2 ?
+                ObjectInspectorUtils.getBucketNumber(bucketFieldValues,
+                  bucketObjectInspectors, numBuckets) :
+                ObjectInspectorUtils.getBucketNumberOld(
                   bucketFieldValues, bucketObjectInspectors, numBuckets);
-          if (isEmptyPartitions) {
-            hashCode = nonPartitionRandom.nextInt() * 31 + bucketNum;
-          } else {
+            final int hashCode = nonPartitionRandom.nextInt() * 31 + bucketNum;
+            postProcess(batch, batchIndex, tag, hashCode);
+          }
+        } else { // isEmptyPartition = false
+          for (int logical = 0; logical< size; logical++) {
+            final int batchIndex = (selectedInUse ? selected[logical] : logical);
             partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
-            hashCode =
-                ObjectInspectorUtils.getBucketHashCode(
-                    partitionFieldValues, partitionObjectInspectors) * 31 + bucketNum;
+            bucketVectorExtractRow.extractRow(batch, batchIndex, bucketFieldValues);
+            final int hashCode, bucketNum;
+            if (bucketingVersion == 2 && !vectorDesc.getIsAcidChange()) {
+              bucketNum =
+                  ObjectInspectorUtils.getBucketNumber(
+                      bucketFieldValues, bucketObjectInspectors, numBuckets);
+              hashCode = ObjectInspectorUtils.getBucketHashCode(
+                  partitionFieldValues, partitionObjectInspectors) * 31 + bucketNum;
+            } else { // old bucketing logic
+              bucketNum =
+                  ObjectInspectorUtils.getBucketNumberOld(
+                      bucketFieldValues, bucketObjectInspectors, numBuckets);
+              hashCode = ObjectInspectorUtils.getBucketHashCodeOld(
+                  partitionFieldValues, partitionObjectInspectors) * 31 + bucketNum;
+            }
+            postProcess(batch, batchIndex, tag, hashCode);
           }
         }
+      }
+    } catch (Exception e) {
+      throw new HiveException(e);
+    }
+  }
 
-        if (!isEmptyKey) {
-          keyBinarySortableSerializeWrite.reset();
-          keyVectorSerializeRow.serializeWrite(batch, batchIndex);
-
-          // One serialized key for 1 or more rows for the duplicate keys.
-          final int keyLength = keyOutput.getLength();
-          if (tag == -1 || reduceSkipTag) {
-            keyWritable.set(keyOutput.getData(), 0, keyLength);
-          } else {
-            keyWritable.setSize(keyLength + 1);
-            System.arraycopy(keyOutput.getData(), 0, keyWritable.get(), 0, keyLength);
-            keyWritable.get()[keyLength] = reduceTagByte;
-          }
-          keyWritable.setDistKeyLength(keyLength);
-        }
+  private void processKey(VectorizedRowBatch batch, int batchIndex, int tag)
+  throws HiveException{
+    if (isEmptyKey) return;
 
-        keyWritable.setHashCode(hashCode);
+    try {
+      keyBinarySortableSerializeWrite.reset();
+      keyVectorSerializeRow.serializeWrite(batch, batchIndex);
+
+      // One serialized key for 1 or more rows for the duplicate keys.
+      final int keyLength = keyOutput.getLength();
+      if (tag == -1 || reduceSkipTag) {
+        keyWritable.set(keyOutput.getData(), 0, keyLength);
+      } else {
+        keyWritable.setSize(keyLength + 1);
+        System.arraycopy(keyOutput.getData(), 0, keyWritable.get(), 0, keyLength);
+        keyWritable.get()[keyLength] = reduceTagByte;
+      }
+      keyWritable.setDistKeyLength(keyLength);
+    } catch (Exception e) {
+      throw new HiveException(e);
+    }
+  }
 
-        if (!isEmptyValue) {
-          valueLazyBinarySerializeWrite.reset();
-          valueVectorSerializeRow.serializeWrite(batch, batchIndex);
+  private void processValue(VectorizedRowBatch batch, int batchIndex)  throws HiveException {
+    if (isEmptyValue) return;
 
-          valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
-        }
+    try {
+      valueLazyBinarySerializeWrite.reset();
+      valueVectorSerializeRow.serializeWrite(batch, batchIndex);
 
-        collect(keyWritable, valueBytesWritable);
-      }
+      valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
+    } catch (Exception e) {
+      throw new HiveException(e);
+    }
+  }
+
+  private void postProcess(VectorizedRowBatch batch, int batchIndex, int tag, int hashCode) throws HiveException {
+    try {
+      processKey(batch, batchIndex, tag);
+      keyWritable.setHashCode(hashCode);
+      processValue(batch, batchIndex);
+      collect(keyWritable, valueBytesWritable);
     } catch (Exception e) {
       throw new HiveException(e);
     }
index 71498a1..9a21503 100644 (file)
@@ -91,6 +91,7 @@ public final class HiveFileFormatUtils {
 
     // immutable maps
     Map<Class<? extends InputFormat>, Class<? extends InputFormatChecker>> inputFormatCheckerMap;
+    Map<Class<? extends InputFormat>, Class<? extends InputFormatChecker>> textInputFormatCheckerMap;
     Map<Class<?>, Class<? extends OutputFormat>> outputFormatSubstituteMap;
 
     // mutable thread-safe map to store instances
@@ -114,6 +115,10 @@ public final class HiveFileFormatUtils {
           .put(RCFileInputFormat.class, RCFileInputFormat.class)
           .put(OrcInputFormat.class, OrcInputFormat.class)
           .build();
+      textInputFormatCheckerMap = ImmutableMap
+          .<Class<? extends InputFormat>, Class<? extends InputFormatChecker>>builder()
+          .put(SequenceFileInputFormat.class, SequenceFileInputFormatChecker.class)
+          .build();
       outputFormatSubstituteMap = ImmutableMap
           .<Class<?>, Class<? extends OutputFormat>>builder()
           .put(IgnoreKeyTextOutputFormat.class, HiveIgnoreKeyTextOutputFormat.class)
@@ -129,6 +134,10 @@ public final class HiveFileFormatUtils {
       return inputFormatCheckerMap.keySet();
     }
 
+    public Set<Class<? extends InputFormat>> registeredTextClasses() {
+      return textInputFormatCheckerMap.keySet();
+    }
+
     public Class<? extends OutputFormat> getOutputFormatSubstiture(Class<?> origin) {
       return outputFormatSubstituteMap.get(origin);
     }
@@ -214,7 +223,7 @@ public final class HiveFileFormatUtils {
       }
     }
     if (files2.isEmpty()) return true;
-    Set<Class<? extends InputFormat>> inputFormatter = FileChecker.getInstance().registeredClasses();
+    Set<Class<? extends InputFormat>> inputFormatter = FileChecker.getInstance().registeredTextClasses();
     for (Class<? extends InputFormat> reg : inputFormatter) {
       boolean result = checkInputFormat(fs, conf, reg, files2);
       if (result) {
index 019682f..2337a35 100644 (file)
@@ -555,9 +555,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       return false;
     }
     for (FileStatus file : files) {
-      // 0 length files cannot be ORC files
-      if (file.getLen() == 0) {
-        return false;
+      if (!HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_ENGINE).equals("mr")) {
+        // 0 length files cannot be ORC files, not valid for MR.
+        if (file.getLen() == 0) {
+          return false;
+        }
       }
       try {
         OrcFile.createReader(file.getPath(),
index a51fdd3..abd678b 100644 (file)
@@ -188,6 +188,9 @@ public class Table implements Serializable {
       // set create time
       t.setCreateTime((int) (System.currentTimeMillis() / 1000));
     }
+    // Explictly set the bucketing version
+    t.getParameters().put(hive_metastoreConstants.TABLE_BUCKETING_VERSION,
+        "2");
     return t;
   }
 
@@ -399,6 +402,9 @@ public class Table implements Serializable {
     tTable.getParameters().put(name, value);
   }
 
+  // Please note : Be very careful in using this function. If not used carefully,
+  // you may end up overwriting all the existing properties. If the usecase is to
+  // add or update certain properties use setProperty() instead.
   public void setParameters(Map<String, String> params) {
     tTable.setParameters(params);
   }
@@ -450,6 +456,11 @@ public class Table implements Serializable {
     }
   }
 
+  public int getBucketingVersion() {
+    return Utilities.getBucketingVersion(
+        getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION));
+  }
+
    @Override
   public String toString() {
     return tTable.getTableName();
index 7121bce..5d4774d 100644 (file)
@@ -26,6 +26,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.Stack;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -153,15 +154,16 @@ public class ConvertJoinMapJoin implements NodeProcessor {
       }
     }
 
-    if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
-      // Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
-      if (llapInfo != null) {
-        if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversionPos, numBuckets)) {
+    if (numBuckets > 1) {
+      if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
+        // Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
+        if (llapInfo != null) {
+          if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversionPos, numBuckets)) {
+            return null;
+          }
+        } else if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
           return null;
         }
-      } else if (numBuckets > 1 &&
-              convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
-        return null;
       }
     }
 
@@ -180,7 +182,8 @@ public class ConvertJoinMapJoin implements NodeProcessor {
     MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true);
     // map join operator by default has no bucket cols and num of reduce sinks
     // reduced by 1
-    mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
+    mapJoinOp.setOpTraits(new OpTraits(null, -1, null,
+        joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion()));
     preserveOperatorInfos(mapJoinOp, joinOp, context);
     // propagate this change till the next RS
     for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
@@ -381,7 +384,8 @@ public class ConvertJoinMapJoin implements NodeProcessor {
     context.parseContext.getContext().getPlanMapper().link(joinOp, mergeJoinOp);
     int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks();
     OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets,
-      joinOp.getOpTraits().getSortCols(), numReduceSinks);
+      joinOp.getOpTraits().getSortCols(), numReduceSinks,
+      joinOp.getOpTraits().getBucketingVersion());
     mergeJoinOp.setOpTraits(opTraits);
     preserveOperatorInfos(mergeJoinOp, joinOp, context);
 
@@ -448,7 +452,8 @@ public class ConvertJoinMapJoin implements NodeProcessor {
       return;
     }
     currentOp.setOpTraits(new OpTraits(opTraits.getBucketColNames(),
-      opTraits.getNumBuckets(), opTraits.getSortCols(), opTraits.getNumReduceSinks()));
+      opTraits.getNumBuckets(), opTraits.getSortCols(), opTraits.getNumReduceSinks(),
+            opTraits.getBucketingVersion()));
     for (Operator<? extends OperatorDesc> childOp : currentOp.getChildOperators()) {
       if ((childOp instanceof ReduceSinkOperator) || (childOp instanceof GroupByOperator)) {
         break;
@@ -501,7 +506,8 @@ public class ConvertJoinMapJoin implements NodeProcessor {
 
     // we can set the traits for this join operator
     opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(),
-        tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
+        tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks(),
+        joinOp.getOpTraits().getBucketingVersion());
     mapJoinOp.setOpTraits(opTraits);
     preserveOperatorInfos(mapJoinOp, joinOp, context);
     setNumberOfBucketsOnChildren(mapJoinOp);
@@ -612,6 +618,38 @@ public class ConvertJoinMapJoin implements NodeProcessor {
       numBuckets = bigTableRS.getConf().getNumReducers();
     }
     tezBucketJoinProcCtx.setNumBuckets(numBuckets);
+
+    // With bucketing using two different versions. Version 1 for exiting
+    // tables and version 2 for new tables. All the inputs to the SMB must be
+    // from same version. This only applies to tables read directly and not
+    // intermediate outputs of joins/groupbys
+    int bucketingVersion = -1;
+    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
+      // Check if the parent is coming from a table scan, if so, what is the version of it.
+      assert parentOp.getParentOperators() != null && parentOp.getParentOperators().size() == 1;
+      Operator<?> op = parentOp.getParentOperators().get(0);
+      while(op != null && !(op instanceof TableScanOperator
+              || op instanceof ReduceSinkOperator
+              || op instanceof CommonJoinOperator)) {
+        // If op has parents it is guaranteed to be 1.
+        List<Operator<?>> parents = op.getParentOperators();
+        Preconditions.checkState(parents.size() == 0 || parents.size() == 1);
+        op = parents.size() == 1 ? parents.get(0) : null;
+      }
+
+      if (op instanceof TableScanOperator) {
+        int localVersion = ((TableScanOperator)op).getConf().
+                getTableMetadata().getBucketingVersion();
+        if (bucketingVersion == -1) {
+          bucketingVersion = localVersion;
+        } else if (bucketingVersion != localVersion) {
+          // versions dont match, return false.
+          LOG.debug("SMB Join can't be performed due to bucketing version mismatch");
+          return false;
+        }
+      }
+    }
+
     LOG.info("We can convert the join to an SMB join.");
     return true;
   }
@@ -1189,7 +1227,8 @@ public class ConvertJoinMapJoin implements NodeProcessor {
             joinOp.getOpTraits().getBucketColNames(),
             numReducers,
             null,
-            joinOp.getOpTraits().getNumReduceSinks());
+            joinOp.getOpTraits().getNumReduceSinks(),
+            joinOp.getOpTraits().getBucketingVersion());
         mapJoinOp.setOpTraits(opTraits);
         preserveOperatorInfos(mapJoinOp, joinOp, context);
         // propagate this change till the next RS
index 5f65f63..4f7d3c2 100644 (file)
@@ -28,7 +28,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
 import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree.Operator;
@@ -84,8 +83,7 @@ public class FixedBucketPruningOptimizer extends Transform {
 
     @Override
     protected void generatePredicate(NodeProcessorCtx procCtx,
-        FilterOperator fop, TableScanOperator top) throws SemanticException,
-        UDFArgumentException {
+        FilterOperator fop, TableScanOperator top) throws SemanticException {
       FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
       Table tbl = top.getConf().getTableMetadata();
       if (tbl.getNumBuckets() > 0) {
@@ -122,8 +120,7 @@ public class FixedBucketPruningOptimizer extends Transform {
 
     @Override
     protected void generatePredicate(NodeProcessorCtx procCtx,
-        FilterOperator fop, TableScanOperator top) throws SemanticException,
-        UDFArgumentException {
+        FilterOperator fop, TableScanOperator top) throws SemanticException {
       FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
       if (ctxt.getNumBuckets() <= 0 || ctxt.getBucketCols().size() != 1) {
         // bucketing isn't consistent or there are >1 bucket columns
@@ -225,6 +222,9 @@ public class FixedBucketPruningOptimizer extends Transform {
       bs.clear();
       PrimitiveObjectInspector bucketOI = (PrimitiveObjectInspector)bucketField.getFieldObjectInspector();
       PrimitiveObjectInspector constOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(bucketOI.getPrimitiveCategory());
+      // Fetch the bucketing version from table scan operator
+      int bucketingVersion = top.getConf().getTableMetadata().getBucketingVersion();
+
       for (Object literal: literals) {
         PrimitiveObjectInspector origOI = PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(literal.getClass());
         Converter conv = ObjectInspectorConverters.getConverter(origOI, constOI);
@@ -233,10 +233,12 @@ public class FixedBucketPruningOptimizer extends Transform {
           return;
         }
         Object convCols[] = new Object[] {conv.convert(literal)};
-        int n = ObjectInspectorUtils.getBucketNumber(convCols, new ObjectInspector[]{constOI}, ctxt.getNumBuckets());
+        int n = bucketingVersion == 2 ?
+            ObjectInspectorUtils.getBucketNumber(convCols, new ObjectInspector[]{constOI}, ctxt.getNumBuckets()) :
+            ObjectInspectorUtils.getBucketNumberOld(convCols, new ObjectInspector[]{constOI}, ctxt.getNumBuckets());
         bs.set(n);
-        if (ctxt.isCompat()) {
-          int h = ObjectInspectorUtils.getBucketHashCode(convCols, new ObjectInspector[]{constOI});
+        if (bucketingVersion == 1 && ctxt.isCompat()) {
+          int h = ObjectInspectorUtils.getBucketHashCodeOld(convCols, new ObjectInspector[]{constOI});
           // -ve hashcodes had conversion to positive done in different ways in the past
           // abs() is now obsolete and all inserts now use & Integer.MAX_VALUE 
           // the compat mode assumes that old data could've been loaded using the other conversion
index 2be3c9b..1626e26 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.serde2.SerDeException;
 
 /**
  * Operator factory for pruning processing of operator graph We find
@@ -101,7 +102,7 @@ public abstract class PrunerOperatorFactory {
      * @throws UDFArgumentException
      */
     protected abstract void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop,
-        TableScanOperator top) throws SemanticException, UDFArgumentException;
+        TableScanOperator top) throws SemanticException;
     /**
      * Add pruning predicate.
      *
index 1c56562..51010aa 100644 (file)
@@ -245,7 +245,7 @@ public class SortedDynPartitionOptimizer extends Transform {
 
       // Create ReduceSink operator
       ReduceSinkOperator rsOp = getReduceSinkOp(partitionPositions, sortPositions, sortOrder, sortNullOrder,
-          allRSCols, bucketColumns, numBuckets, fsParent);
+          allRSCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType());
 
       List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>(allRSCols.size());
       List<String> colNames = new ArrayList<String>();
@@ -442,7 +442,7 @@ public class SortedDynPartitionOptimizer extends Transform {
     public ReduceSinkOperator getReduceSinkOp(List<Integer> partitionPositions,
         List<Integer> sortPositions, List<Integer> sortOrder, List<Integer> sortNullOrder,
         ArrayList<ExprNodeDesc> allCols, ArrayList<ExprNodeDesc> bucketColumns, int numBuckets,
-        Operator<? extends OperatorDesc> parent) throws SemanticException {
+        Operator<? extends OperatorDesc> parent, AcidUtils.Operation writeType) throws SemanticException {
 
       // Order of KEY columns
       // 1) Partition columns
@@ -577,7 +577,7 @@ public class SortedDynPartitionOptimizer extends Transform {
       // Number of reducers is set to default (-1)
       ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols,
           keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable,
-          valueTable);
+          valueTable, writeType);
       rsConf.setBucketCols(bucketColumns);
       rsConf.setNumBuckets(numBuckets);
 
index 0e995d7..0ce359f 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -209,7 +210,7 @@ public class SortedDynPartitionTimeGranularityOptimizer extends Transform {
         sortNullOrder = Lists.newArrayList(0); // nulls first
       }
       ReduceSinkOperator rsOp = getReduceSinkOp(keyPositions, sortOrder,
-          sortNullOrder, allRSCols, granularitySelOp);
+          sortNullOrder, allRSCols, granularitySelOp, fsOp.getConf().getWriteType());
 
       // Create backtrack SelectOp
       final List<ExprNodeDesc> descs = new ArrayList<>(allRSCols.size());
@@ -393,8 +394,8 @@ public class SortedDynPartitionTimeGranularityOptimizer extends Transform {
     }
 
     private ReduceSinkOperator getReduceSinkOp(List<Integer> keyPositions, List<Integer> sortOrder,
-        List<Integer> sortNullOrder, ArrayList<ExprNodeDesc> allCols, Operator<? extends OperatorDesc> parent
-    ) {
+        List<Integer> sortNullOrder, ArrayList<ExprNodeDesc> allCols, Operator<? extends OperatorDesc> parent,
+        AcidUtils.Operation writeType) {
       // we will clone here as RS will update bucket column key with its
       // corresponding with bucket number and hence their OIs
       final ArrayList<ExprNodeDesc> keyCols = keyPositions.stream()
@@ -452,7 +453,7 @@ public class SortedDynPartitionTimeGranularityOptimizer extends Transform {
       // Number of reducers is set to default (-1)
       final ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols,
           keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable,
-          valueTable);
+          valueTable, writeType);
 
       final ArrayList<ColumnInfo> signature =
           parent.getSchema().getSignature()
index 69d9f31..9e54465 100644 (file)
@@ -92,10 +92,12 @@ public class OpTraitsRulesProcFactory {
       List<List<String>> listBucketCols = new ArrayList<List<String>>();
       int numBuckets = -1;
       int numReduceSinks = 1;
+      int bucketingVersion = -1;
       OpTraits parentOpTraits = rs.getParentOperators().get(0).getOpTraits();
       if (parentOpTraits != null) {
         numBuckets = parentOpTraits.getNumBuckets();
         numReduceSinks += parentOpTraits.getNumReduceSinks();
+        bucketingVersion = parentOpTraits.getBucketingVersion();
       }
 
       List<String> bucketCols = new ArrayList<>();
@@ -134,8 +136,10 @@ public class OpTraitsRulesProcFactory {
       }
 
       listBucketCols.add(bucketCols);
-      OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listBucketCols, numReduceSinks);
+      OpTraits opTraits = new OpTraits(listBucketCols, numBuckets,
+              listBucketCols, numReduceSinks, bucketingVersion);
       rs.setOpTraits(opTraits);
+      rs.setBucketingVersion(bucketingVersion);
       return null;
     }
   }
@@ -213,7 +217,8 @@ public class OpTraitsRulesProcFactory {
         sortedColsList.add(sortCols);
       }
       // num reduce sinks hardcoded to 0 because TS has no parents
-      OpTraits opTraits = new OpTraits(bucketColsList, numBuckets, sortedColsList, 0);
+      OpTraits opTraits = new OpTraits(bucketColsList, numBuckets,
+              sortedColsList, 0, table.getBucketingVersion());
       ts.setOpTraits(opTraits);
       return null;
     }
@@ -239,12 +244,15 @@ public class OpTraitsRulesProcFactory {
 
       List<List<String>> listBucketCols = new ArrayList<List<String>>();
       int numReduceSinks = 0;
+      int bucketingVersion = -1;
       OpTraits parentOpTraits = gbyOp.getParentOperators().get(0).getOpTraits();
       if (parentOpTraits != null) {
         numReduceSinks = parentOpTraits.getNumReduceSinks();
+        bucketingVersion = parentOpTraits.getBucketingVersion();
       }
       listBucketCols.add(gbyKeys);
-      OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols, numReduceSinks);
+      OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols,
+              numReduceSinks, bucketingVersion);
       gbyOp.setOpTraits(opTraits);
       return null;
     }
@@ -298,12 +306,15 @@ public class OpTraitsRulesProcFactory {
 
       int numBuckets = -1;
       int numReduceSinks = 0;
+      int bucketingVersion = -1;
       OpTraits parentOpTraits = selOp.getParentOperators().get(0).getOpTraits();
       if (parentOpTraits != null) {
         numBuckets = parentOpTraits.getNumBuckets();
         numReduceSinks = parentOpTraits.getNumReduceSinks();
+        bucketingVersion = parentOpTraits.getBucketingVersion();
       }
-      OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols, numReduceSinks);
+      OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols,
+              numReduceSinks, bucketingVersion);
       selOp.setOpTraits(opTraits);
       return null;
     }
@@ -338,7 +349,10 @@ public class OpTraitsRulesProcFactory {
         pos++;
       }
 
-      joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList, numReduceSinks));
+      // The bucketingVersion is not relevant here as it is never used.
+      // For SMB, we look at the parent tables' bucketing versions and for
+      // bucket map join the big table's bucketing version is considered.
+      joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList, numReduceSinks, 2));
       return null;
     }
 
@@ -392,6 +406,8 @@ public class OpTraitsRulesProcFactory {
       Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>) nd;
 
       int numReduceSinks = 0;
+      int bucketingVersion = -1;
+      boolean bucketingVersionSeen = false;
       for (Operator<?> parentOp : operator.getParentOperators()) {
         if (parentOp.getOpTraits() == null) {
           continue;
@@ -399,8 +415,17 @@ public class OpTraitsRulesProcFactory {
         if (parentOp.getOpTraits().getNumReduceSinks() > numReduceSinks) {
           numReduceSinks = parentOp.getOpTraits().getNumReduceSinks();
         }
+        // If there is mismatch in bucketingVersion, then it should be set to
+        // -1, that way SMB will be disabled.
+        if (bucketingVersion == -1 && !bucketingVersionSeen) {
+          bucketingVersion = parentOp.getOpTraits().getBucketingVersion();
+          bucketingVersionSeen = true;
+        } else if (bucketingVersion != parentOp.getOpTraits().getBucketingVersion()) {
+          bucketingVersion = -1;
+        }
       }
-      OpTraits opTraits = new OpTraits(null, -1, null, numReduceSinks);
+      OpTraits opTraits = new OpTraits(null, -1,
+              null, numReduceSinks, bucketingVersion);
       operator.setOpTraits(opTraits);
       return null;
     }
index 068f25e..394f826 100644 (file)
@@ -40,6 +40,8 @@ import java.util.regex.Pattern;
 
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.hadoop.hive.ql.exec.vector.reducesink.*;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -68,11 +70,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOpera
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator;
 import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator;
-import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator;
-import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator;
-import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator;
-import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator;
-import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator;
 import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
@@ -3808,6 +3805,9 @@ public class Vectorizer implements PhysicalPlanResolver {
 
     LOG.info("Vectorizer vectorizeOperator reduce sink class " + opClass.getSimpleName());
 
+    // Get the bucketing version
+    int bucketingVersion = ((ReduceSinkOperator)op).getBucketingVersion();
+
     Operator<? extends OperatorDesc> vectorOp = null;
     try {
       vectorOp = OperatorFactory.getVectorOperator(
@@ -3819,6 +3819,10 @@ public class Vectorizer implements PhysicalPlanResolver {
       throw new HiveException(e);
     }
 
+    // Set the bucketing version
+    Preconditions.checkArgument(vectorOp instanceof VectorReduceSinkCommonOperator);
+    vectorOp.setBucketingVersion(bucketingVersion);
+
     return vectorOp;
   }
 
@@ -4026,6 +4030,8 @@ public class Vectorizer implements PhysicalPlanResolver {
     vectorDesc.setHasDistinctColumns(hasDistinctColumns);
     vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
     vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
+    vectorDesc.setIsAcidChange(desc.getWriteType() == AcidUtils.Operation.DELETE ||
+                               desc.getWriteType() == AcidUtils.Operation.UPDATE);
 
     // This indicates we logged an inconsistency (from our point-of-view) and will not make this
     // operator native...
index 7b1fd5f..8e75db9 100644 (file)
@@ -121,7 +121,8 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
     }
 
     // we can set the traits for this join operator
-    OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks());
+    OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null,
+            joinOp.getOpTraits().getNumReduceSinks(), joinOp.getOpTraits().getBucketingVersion());
     mapJoinOp.setOpTraits(opTraits);
     mapJoinOp.setStatistics(joinOp.getStatistics());
     setNumberOfBucketsOnChildren(mapJoinOp);
index 1dccf96..0205650 100644 (file)
@@ -235,6 +235,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCardinalityViolation;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline;
@@ -8411,9 +8412,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         sortCols.add(exprNode);
       }
     }
+
+    Table dest_tab = qb.getMetaData().getDestTableForAlias(dest);
+    AcidUtils.Operation acidOp = Operation.NOT_ACID;
+    if (AcidUtils.isFullAcidTable(dest_tab)) {
+      acidOp = getAcidType(Utilities.getTableDesc(dest_tab).getOutputFileFormatClass(), dest);
+    }
     Operator result = genReduceSinkPlan(
         input, partCols, sortCols, order.toString(), nullOrder.toString(),
-        numReducers, Operation.NOT_ACID, true);
+        numReducers, acidOp, true);
     if (result.getParentOperators().size() == 1 &&
         result.getParentOperators().get(0) instanceof ReduceSinkOperator) {
       ((ReduceSinkOperator) result.getParentOperators().get(0))
@@ -10806,7 +10813,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
    */
   private ExprNodeDesc genSamplePredicate(TableSample ts,
                                           List<String> bucketCols, boolean useBucketCols, String alias,
-                                          RowResolver rwsch, QBMetaData qbm, ExprNodeDesc planExpr)
+                                          RowResolver rwsch, QBMetaData qbm, ExprNodeDesc planExpr,
+                                          int bucketingVersion)
       throws SemanticException {
 
     ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(
@@ -10836,22 +10844,19 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     ExprNodeDesc equalsExpr = null;
     {
       ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(
-          TypeInfoFactory.intTypeInfo, new GenericUDFHash(), args);
-      assert (hashfnExpr != null);
+          TypeInfoFactory.intTypeInfo,
+              bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args);
       LOG.info("hashfnExpr = " + hashfnExpr);
       ExprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor
           .getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
-      assert (andExpr != null);
       LOG.info("andExpr = " + andExpr);
       ExprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor
           .getFuncExprNodeDesc("%", andExpr, denominatorExpr);
-      assert (modExpr != null);
       LOG.info("modExpr = " + modExpr);
       LOG.info("numeratorExpr = " + numeratorExpr);
       equalsExpr = TypeCheckProcFactory.DefaultExprProcessor
           .getFuncExprNodeDesc("==", modExpr, numeratorExpr);
       LOG.info("equalsExpr = " + equalsExpr);
-      assert (equalsExpr != null);
     }
     return equalsExpr;
   }
@@ -10952,6 +10957,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         topToTableProps.put(top, properties);
         tsDesc.setOpProps(properties);
       }
+
+      // Set the bucketing Version
+      top.setBucketingVersion(tsDesc.getTableMetadata().getBucketingVersion());
     } else {
       rwsch = opParseCtx.get(top).getRowResolver();
       top.setChildOperators(null);
@@ -11020,7 +11028,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         // later
         LOG.info("No need for sample filter");
         ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
-            colsEqual, alias, rwsch, qb.getMetaData(), null);
+            colsEqual, alias, rwsch, qb.getMetaData(), null,
+                tab.getBucketingVersion());
         FilterDesc filterDesc = new FilterDesc(
             samplePredicate, true, new SampleDesc(ts.getNumerator(),
             ts.getDenominator(), tabBucketCols, true));
@@ -11032,7 +11041,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         // create tableOp to be filterDesc and set as child to 'top'
         LOG.info("Need sample filter");
         ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
-            colsEqual, alias, rwsch, qb.getMetaData(), null);
+            colsEqual, alias, rwsch, qb.getMetaData(), null,
+                tab.getBucketingVersion());
         FilterDesc filterDesc = new FilterDesc(samplePredicate, true);
         filterDesc.setGenerated(true);
         op = OperatorFactory.getAndMakeChild(filterDesc,
@@ -11063,7 +11073,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
             tsSample.setInputPruning(true);
             qb.getParseInfo().setTabSample(alias, tsSample);
             ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab
-                .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
+                .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null,
+                    tab.getBucketingVersion());
             FilterDesc filterDesc = new FilterDesc(samplePred, true,
                 new SampleDesc(tsSample.getNumerator(), tsSample
                     .getDenominator(), tab.getBucketCols(), true));
@@ -11082,7 +11093,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
                 .getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer
                     .valueOf(460476415)));
             ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false,
-                alias, rwsch, qb.getMetaData(), randFunc);
+                alias, rwsch, qb.getMetaData(), randFunc, tab.getBucketingVersion());
             FilterDesc filterDesc = new FilterDesc(samplePred, true);
             filterDesc.setGenerated(true);
             op = OperatorFactory.getAndMakeChild(filterDesc,
index 9621c3b..d3b62ce 100644 (file)
@@ -22,17 +22,20 @@ import java.util.List;
 
 public class OpTraits {
 
-  List<List<String>> bucketColNames;
-  List<List<String>> sortColNames;
-  int numBuckets;
-  int numReduceSinks;
+  private List<List<String>> bucketColNames;
+  private List<List<String>> sortColNames;
+  private int numBuckets;
+  private int numReduceSinks;
+  private int bucketingVersion;
 
   public OpTraits(List<List<String>> bucketColNames, int numBuckets,
-      List<List<String>> sortColNames, int numReduceSinks) {
+      List<List<String>> sortColNames, int numReduceSinks,
+                  int bucketingVersion) {
     this.bucketColNames = bucketColNames;
     this.numBuckets = numBuckets;
     this.sortColNames = sortColNames;
     this.numReduceSinks = numReduceSinks;
+    this.bucketingVersion = bucketingVersion;
   }
 
   public List<List<String>> getBucketColNames() {
@@ -68,10 +71,17 @@ public class OpTraits {
     return this.numReduceSinks;
   }
 
-  
+  public void setBucketingVersion(int bucketingVersion) {
+    this.bucketingVersion = bucketingVersion;
+  }
+
+  public int getBucketingVersion() {
+    return bucketingVersion;
+  }
+
   @Override
   public String toString() {
     return "{ bucket column names: " + bucketColNames + "; sort column names: "
-        + sortColNames + "; bucket count: " + numBuckets + " }";
+        + sortColNames + "; bucket count: " + numBuckets + "; bucketing version: " + bucketingVersion + " }";
   }
 }
index 056dfa4..2c5b655 100644 (file)
@@ -790,7 +790,7 @@ public final class PlanUtils {
     return new ReduceSinkDesc(keyCols, numKeys, valueCols, outputKeyCols,
         distinctColIndices, outputValCols,
         tag, partitionCols, numReducers, keyTable,
-        valueTable);
+        valueTable, writeType);
   }
 
   /**
index aa3c72b..61216bc 100644 (file)
@@ -27,6 +27,7 @@ import java.util.Objects;
 import java.util.Set;
 
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.optimizer.signature.Signature;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
 import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
@@ -127,6 +128,8 @@ public class ReduceSinkDesc extends AbstractOperatorDesc {
 
   private static transient Logger LOG = LoggerFactory.getLogger(ReduceSinkDesc.class);
 
+  private AcidUtils.Operation writeType;
+
   public ReduceSinkDesc() {
   }
 
@@ -137,7 +140,8 @@ public class ReduceSinkDesc extends AbstractOperatorDesc {
       List<List<Integer>> distinctColumnIndices,
       ArrayList<String> outputValueColumnNames, int tag,
       ArrayList<ExprNodeDesc> partitionCols, int numReducers,
-      final TableDesc keySerializeInfo, final TableDesc valueSerializeInfo) {
+      final TableDesc keySerializeInfo, final TableDesc valueSerializeInfo,
+      AcidUtils.Operation writeType) {
     this.keyCols = keyCols;
     this.numDistributionKeys = numDistributionKeys;
     this.valueCols = valueCols;
@@ -151,6 +155,7 @@ public class ReduceSinkDesc extends AbstractOperatorDesc {
     this.distinctColumnIndices = distinctColumnIndices;
     this.setNumBuckets(-1);
     this.setBucketCols(null);
+    this.writeType = writeType;
   }
 
   @Override
@@ -669,4 +674,7 @@ public class ReduceSinkDesc extends AbstractOperatorDesc {
     return false;
   }
 
+  public AcidUtils.Operation getWriteType() {
+    return writeType;
+  }
 }
index 25b9189..4068e56 100644 (file)
@@ -184,6 +184,11 @@ public class TableDesc implements Serializable, Cloneable {
     return (properties.getProperty(hive_metastoreConstants.META_TABLE_STORAGE) != null);
   }
 
+  public int getBucketingVersion() {
+    return Utilities.getBucketingVersion(
+        properties.getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION));
+  }
+
   @Override
   public Object clone() {
     TableDesc ret = new TableDesc();
index adea3b5..97e4284 100644 (file)
@@ -73,6 +73,7 @@ public class VectorReduceSinkDesc extends AbstractVectorDesc  {
   private boolean isKeyBinarySortable;
   private boolean isValueLazyBinary;
   private boolean isUnexpectedCondition;
+  private boolean isAcidChange;
 
   /*
    * The following conditions are for native Vector ReduceSink.
@@ -143,4 +144,12 @@ public class VectorReduceSinkDesc extends AbstractVectorDesc  {
   public boolean getIsUnexpectedCondition() {
     return isUnexpectedCondition;
   }
+
+  public void setIsAcidChange(boolean isAcidChange) {
+    this.isAcidChange = isAcidChange;
+  }
+
+  public boolean getIsAcidChange() {
+    return isAcidChange;
+  }
 }
index 7cd5718..1a75843 100644 (file)
@@ -29,6 +29,7 @@ import org.apache.hadoop.io.IntWritable;
 /**
  * GenericUDF Class for computing hash values.
  */
+@Deprecated
 @Description(name = "hash", value = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments")
 public class GenericUDFHash extends GenericUDF {
   private transient ObjectInspector[] argumentOIs;
@@ -48,7 +49,7 @@ public class GenericUDFHash extends GenericUDF {
     for(int i = 0; i < arguments.length; i++) {
       fieldValues[i] = arguments[i].get();
     }
-    int r = ObjectInspectorUtils.getBucketHashCode(fieldValues, argumentOIs);
+    int r = ObjectInspectorUtils.getBucketHashCodeOld(fieldValues, argumentOIs);
     result.set(r);
     return result;
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMurmurHash.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMurmurHash.java
new file mode 100644 (file)
index 0000000..f55ab9d
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hive.common.util.Murmur3;
+
+/**
+ * GenericUDF Class for computing murmurhash values.
+ */
+@Description(name = "hash", value = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments")
+public class GenericUDFMurmurHash extends GenericUDF {
+  private transient ObjectInspector[] argumentOIs;
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException {
+
+    argumentOIs = arguments;
+    return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
+  }
+
+  private final IntWritable result = new IntWritable();
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    Object[] fieldValues = new Object[arguments.length];
+    for(int i = 0; i < arguments.length; i++) {
+      fieldValues[i] = arguments[i].get();
+    }
+    int r = ObjectInspectorUtils.getBucketHashCode(fieldValues, argumentOIs);
+    result.set(r);
+    return result;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    return getStandardDisplayString("hash", children, ",");
+  }
+
+}
index 7f7bc11..589e3b7 100644 (file)
@@ -63,6 +63,7 @@ public class TestTxnAddPartition extends TxnCommandsBaseForTests {
 
   @Test
   public void addPartition() throws Exception {
+
     addPartition(false);
   }
 
@@ -222,9 +223,9 @@ public class TestTxnAddPartition extends TxnCommandsBaseForTests {
     List<String> rs = runStatementOnDriver(
         "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID");
     String[][] expected = new String[][]{
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2",
-            "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"},
         {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t0\t1\t4",
+            "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"},
+        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t0\t2",
             "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}};
     checkExpected(rs, expected, "add partition (p=0)");
   }
@@ -238,7 +239,7 @@ public class TestTxnAddPartition extends TxnCommandsBaseForTests {
    * renamed during add.
    */
   @Test
-  public void addPartitionReaname() throws Exception {
+  public void addPartitionRename() throws Exception {
     runStatementOnDriver("drop table if exists T");
     runStatementOnDriver("drop table if exists Tstage");
     runStatementOnDriver("create table T (a int, b int) partitioned by (p int) " +
@@ -261,9 +262,9 @@ public class TestTxnAddPartition extends TxnCommandsBaseForTests {
     List<String> rs = runStatementOnDriver(
         "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID");
     String[][] expected = new String[][]{
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2",
-            "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"},
         {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t0\t1\t4",
+            "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"},
+        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t0\t2",
             "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}};
     checkExpected(rs, expected, "add partition (p=0)");
   }
index 12d57c6..6a3be39 100644 (file)
@@ -765,13 +765,13 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
       BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(1)));
     Assert.assertEquals("", 4, rs.size());
     Assert.assertTrue(rs.get(0),
-            rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t12"));
-    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/000000_0_copy_1"));
+            rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
+    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/000001_0"));
     Assert.assertTrue(rs.get(1),
-            rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
-    Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/000001_0"));
+            rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
+    Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/000001_0_copy_1"));
     Assert.assertTrue(rs.get(2),
-            rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
+            rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12"));
     Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/000001_0_copy_1"));
     Assert.assertTrue(rs.get(3),
             rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
@@ -786,13 +786,13 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
     }
     Assert.assertEquals("", 4, rs.size());
     Assert.assertTrue(rs.get(0),
-            rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t12"));
-    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_0000001/bucket_00000"));
+            rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
+    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
     Assert.assertTrue(rs.get(1),
-            rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
+            rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
     Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
     Assert.assertTrue(rs.get(2),
-            rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
+            rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12"));
     Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
     Assert.assertTrue(rs.get(3),
             rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
@@ -820,7 +820,7 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
     int[][] expected = {{0, -1}, {1, -1}, {3, -1}};
     Assert.assertEquals(stringifyValues(expected), r);
   }
-  //@Ignore("see bucket_num_reducers_acid2.q")
+  @Ignore("Moved to Tez")
   @Test
   public void testMoreBucketsThanReducers2() throws Exception {
     //todo: try using set VerifyNumReducersHook.num.reducers=10;
index dc19752..e882e40 100644 (file)
@@ -368,14 +368,14 @@ public class TestTxnCommands2 {
      * Note: order of rows in a file ends up being the reverse of order in values clause (why?!)
      */
     String[][] expected = {
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t13",  "bucket_00000"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000"},
-        {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000"},
-        {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t0\t13",  "bucket_00001"},
+        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t15", "bucket_00001"},
+        {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"},
+        {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t0\t120", "bucket_00001"},
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2",   "bucket_00001"},
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":3}\t1\t4",   "bucket_00001"},
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5",   "bucket_00001"},
-        {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t1\t6",   "bucket_00001"},
+        {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":6}\t1\t6",   "bucket_00001"},
         {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"}
     };
     Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size());
@@ -469,7 +469,7 @@ public class TestTxnCommands2 {
         sawNewDelta = true;
         FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
         Assert.assertEquals(1, buckets.length); // only one bucket file
-        Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001"));
+        Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00000"));
       } else {
         Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0"));
       }
@@ -495,14 +495,14 @@ public class TestTxnCommands2 {
       if (status[i].getPath().getName().matches("base_.*")) {
         sawNewBase = true;
         FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
-        Assert.assertEquals(1, buckets.length);
+        Assert.assertEquals(2, buckets.length);
         Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001"));
       }
     }
     Assert.assertTrue(sawNewBase);
     rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL);
-    resultData = new int[][] {{1, 2}, {3, 4}};
-    Assert.assertEquals(stringifyValues(resultData), rs);
+    resultData = new int[][] {{3, 4}, {1, 2}};
+    Assert.assertEquals(stringifyValuesNoSort(resultData), rs);
     rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);
     resultCount = 2;
     Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0)));
@@ -528,11 +528,11 @@ public class TestTxnCommands2 {
     Assert.assertEquals(1, status.length);
     Assert.assertTrue(status[0].getPath().getName().matches("base_.*"));
     FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
-    Assert.assertEquals(1, buckets.length);
+    Assert.assertEquals(2, buckets.length);
     Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001"));
     rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL);
-    resultData = new int[][] {{1, 2}, {3, 4}};
-    Assert.assertEquals(stringifyValues(resultData), rs);
+    resultData = new int[][] {{3, 4}, {1, 2}};
+    Assert.assertEquals(stringifyValuesNoSort(resultData), rs);
     rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);
     resultCount = 2;
     Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0)));
@@ -767,7 +767,7 @@ public class TestTxnCommands2 {
         } else if (numDelta == 2) {
           Assert.assertEquals("delta_0000002_0000002_0000", status[i].getPath().getName());
           Assert.assertEquals(1, buckets.length);
-          Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
+          Assert.assertEquals("bucket_00000", buckets[0].getPath().getName());
         }
       } else if (status[i].getPath().getName().matches("delete_delta_.*")) {
         numDeleteDelta++;
@@ -822,15 +822,15 @@ public class TestTxnCommands2 {
         } else if (numBase == 2) {
           // The new base dir now has two bucket files, since the delta dir has two bucket files
           Assert.assertEquals("base_0000002", status[i].getPath().getName());
-          Assert.assertEquals(1, buckets.length);
-          Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
+          Assert.assertEquals(2, buckets.length);
+          Assert.assertEquals("bucket_00000", buckets[0].getPath().getName());
         }
       }
     }
     Assert.assertEquals(2, numBase);
     rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL);
-    resultData = new int[][] {{1, 3}, {3, 4}};
-    Assert.assertEquals(stringifyValues(resultData), rs);
+    resultData = new int[][] {{3, 4}, {1, 3}};
+    Assert.assertEquals(stringifyValuesNoSort(resultData), rs);
     rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);
     resultCount = 2;
     Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0)));
@@ -850,11 +850,11 @@ public class TestTxnCommands2 {
     Assert.assertEquals("base_0000002", status[0].getPath().getName());
     FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
     Arrays.sort(buckets);
-    Assert.assertEquals(1, buckets.length);
-    Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
+    Assert.assertEquals(2, buckets.length);
+    Assert.assertEquals("bucket_00000", buckets[0].getPath().getName());
     rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL);
-    resultData = new int[][] {{1, 3}, {3, 4}};
-    Assert.assertEquals(stringifyValues(resultData), rs);
+    resultData = new int[][] {{3, 4}, {1, 3}};
+    Assert.assertEquals(stringifyValuesNoSort(resultData), rs);
     rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);
     resultCount = 2;
     Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0)));
@@ -2176,6 +2176,27 @@ public class TestTxnCommands2 {
     }
     return rs;
   }
+
+  /**
+   * takes raw data and turns it into a string as if from Driver.getResults()
+   * sorts rows in dictionary order
+   */
+  static List<String> stringifyValuesNoSort(int[][] rowsIn) {
+    assert rowsIn.length > 0;
+    int[][] rows = rowsIn.clone();
+    List<String> rs = new ArrayList<String>();
+    for(int[] row : rows) {
+      assert row.length > 0;
+      StringBuilder sb = new StringBuilder();
+      for(int value : row) {
+        sb.append(value).append("\t");
+      }
+      sb.setLength(sb.length() - 1);
+      rs.add(sb.toString());
+    }
+    return rs;
+  }
+
   static class RowComp implements Comparator<int[]> {
     @Override
     public int compare(int[] row1, int[] row2) {
index 4b2f961..af43b14 100644 (file)
@@ -184,7 +184,7 @@ public class TestTxnNoBuckets extends TxnCommandsBaseForTests {
     List<String> rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas order by ROW__ID");
     String expected[][] = {
         {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas/delta_0000001_0000001_0000/bucket_00000"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas/delta_0000001_0000001_0000/bucket_00000"},
+        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t2", "warehouse/myctas/delta_0000001_0000001_0000/bucket_00001"},
     };
     checkExpected(rs, expected, "Unexpected row count after ctas from non acid table");
 
@@ -195,7 +195,7 @@ public class TestTxnNoBuckets extends TxnCommandsBaseForTests {
     rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas2 order by ROW__ID");
     String expected2[][] = {
         {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"}
+        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t2", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00001"}
     };
     checkExpected(rs, expected2, "Unexpected row count after ctas from acid table");
 
@@ -204,10 +204,10 @@ public class TestTxnNoBuckets extends TxnCommandsBaseForTests {
       " union all select a, b from " + Table.ACIDTBL);
     rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas3 order by ROW__ID");
     String expected3[][] = {
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000"},
+        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000"},
         {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00001"},
-        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00001"},
+        {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00002"},
+        {"{\"writeid\":1,\"bucketid\":537067520,\"rowid\":0}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00003"},
     };
     checkExpected(rs, expected3, "Unexpected row count after ctas from union all query");
 
@@ -269,9 +269,9 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
     String expected[][] = {
         {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t1\t2", "/delta_0000001_0000001_0001/bucket_00000"},
         {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t3\t4", "/delta_0000001_0000001_0001/bucket_00000"},
-        {"{\"writeid\":1,\"bucketid\":536870914,\"rowid\":0}\t7\t8", "/delta_0000001_0000001_0002/bucket_00000"},
-        {"{\"writeid\":1,\"bucketid\":536870914,\"rowid\":1}\t5\t6", "/delta_0000001_0000001_0002/bucket_00000"},
+        {"{\"writeid\":1,\"bucketid\":536870914,\"rowid\":0}\t5\t6", "/delta_0000001_0000001_0002/bucket_00000"},
         {"{\"writeid\":1,\"bucketid\":536870915,\"rowid\":0}\t9\t10", "/delta_0000001_0000001_0003/bucket_00000"},
+        {"{\"writeid\":1,\"bucketid\":536936450,\"rowid\":0}\t7\t8", "/delta_0000001_0000001_0002/bucket_00001"},
     };
     checkExpected(rs, expected, "Unexpected row count after ctas");
   }
index 4a33885..1fa11fc 100644 (file)
@@ -41,9 +41,9 @@ public class TestPrivilegesV1 extends PrivilegesTestBase{
   public void setup() throws Exception {
     queryState = new QueryState.Builder().build();
     db = Mockito.mock(Hive.class);
+    HiveConf hiveConf = queryState.getConf();
     table = new Table(DB, TABLE);
     partition = new Partition(table);
-    HiveConf hiveConf = queryState.getConf();
     hiveConf
     .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
         "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
index 75bd579..6d5e2ac 100644 (file)
@@ -47,7 +47,7 @@ CREATE TABLE harbucket(key INT)
 PARTITIONED by (ds STRING)
 CLUSTERED BY (key) INTO 10 BUCKETS;
 
-INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50;
+INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key > 50;
 
 SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
 ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12');
@@ -59,7 +59,7 @@ SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
 CREATE TABLE old_name(key INT)
 PARTITIONED by (ds STRING);
 
-INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50;
+INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key > 50;
 ALTER TABLE old_name ARCHIVE PARTITION (ds='1');
 SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2;
index 6cd3004..8abcdc7 100644 (file)
@@ -12,6 +12,6 @@ insert overwrite table bucket_many
 select * from src;
 
 explain
-select * from bucket_many tablesample (bucket 1 out of 256) s;
+select * from bucket_many tablesample (bucket 2 out of 256) s;
 
-select * from bucket_many tablesample (bucket 1 out of 256) s;
+select * from bucket_many tablesample (bucket 2 out of 256) s;
index 725dd4c..5622ce2 100644 (file)
@@ -227,7 +227,7 @@ from tab1 a join tab_part b on a.key = b.key;
 
 -- No map joins should be created.
 set hive.convert.join.bucket.mapjoin.tez = false;
-set hive.auto.convert.join.noconditionaltask.size=1500;
+set hive.auto.convert.join.noconditionaltask.size=15000;
 explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value;
 set hive.convert.join.bucket.mapjoin.tez = true;
 explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value;
index 48f7b75..6345166 100644 (file)
@@ -9,11 +9,10 @@ set mapred.reduce.tasks = 10;
 -- and uses a post-hook to confirm that 10 tasks were created
 
 CREATE TABLE bucket_nr(key int, value string) CLUSTERED BY (key) INTO 50 BUCKETS;
-set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.VerifyNumReducersHook;
-set VerifyNumReducersHook.num.reducers=10;
 
+explain extended insert overwrite table bucket_nr
+  select * from src;
 insert overwrite table bucket_nr
 select * from src;
 
-set hive.exec.post.hooks=;
 drop table bucket_nr;
index 6d5716d..40965da 100644 (file)
@@ -8,10 +8,10 @@ set hive.exec.reducers.max = 2;
 -- table with 3 buckets, and uses a post-hook to confirm that 1 reducer was used
 
 CREATE TABLE test_table(key int, value string) CLUSTERED BY (key) INTO 3 BUCKETS;
-set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.VerifyNumReducersHook;
-set VerifyNumReducersHook.num.reducers=1;
 
+explain extended insert overwrite table test_table
+  select * from src;
 insert overwrite table test_table
 select * from src;
 
-set hive.exec.post.hooks=;
+drop table test_table;
index 9776785..51b5885 100644 (file)
@@ -10,8 +10,6 @@ set mapred.reduce.tasks = 2;
 
 drop table if exists bucket_nr_acid2;
 create table bucket_nr_acid2 (a int, b int) clustered by (a) into 4 buckets stored as orc TBLPROPERTIES ('transactional'='true');
-set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.VerifyNumReducersHook;
-set VerifyNumReducersHook.num.reducers=2;
 
 -- txn X write to b0 + b1
 insert into bucket_nr_acid2 values(0,1),(1,1);
@@ -27,7 +25,6 @@ insert into bucket_nr_acid2 values(2,4),(3,4);
 
 
 update bucket_nr_acid2 set b = -1;
-set hive.exec.post.hooks=;
 select * from bucket_nr_acid2 order by a, b;
 
 drop table bucket_nr_acid2;
index bd9f777..06f4db6 100644 (file)
@@ -3,7 +3,6 @@ set hive.mapred.mode=nonstrict;
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
 
-
 create table acid_iud(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
 
 insert into table acid_iud select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint < 0 order by cint limit 10;
index 7c2de2e..b0aab14 100644 (file)
@@ -17,9 +17,9 @@ create table srcpartbucket (key string, value string) partitioned by (ds string,
 insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10;
 
 explain extended
-select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC;
+select ds, count(1) from srcpartbucket tablesample (bucket 2 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC;
 
-select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC;
+select ds, count(1) from srcpartbucket tablesample (bucket 2 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC;
 
 select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC;
 
index d318f7d..0500a62 100644 (file)
@@ -16,8 +16,8 @@ limit 1;
 
 set hive.auto.convert.join=true;
 set hive.optimize.dynamic.partition.hashjoin=true;
-set hive.auto.convert.join.noconditionaltask.size=200000;
-set hive.exec.reducers.bytes.per.reducer=200000;
+set hive.auto.convert.join.noconditionaltask.size=20000;
+set hive.exec.reducers.bytes.per.reducer=2000;
 
 explain
 select a.*
index ecfb0dc..0f8f22f 100644 (file)
@@ -34,7 +34,7 @@ select key,value from srcbucket_mapjoin;
 set hive.convert.join.bucket.mapjoin.tez = true;
 set hive.auto.convert.sortmerge.join = true;
 
-set hive.auto.convert.join.noconditionaltask.size=500;
+set hive.auto.convert.join.noconditionaltask.size=50;
 
 explain
 select count(*) from tab s1 join tab s3 on s1.key=s3.key;
index 96259e5..3033cbe 100644 (file)
@@ -24,6 +24,7 @@ Retention:            0
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+       bucketing_version       2                   
        numFiles                0                   
        numRows                 0                   
        rawDataSize             0                   
index 8e240ea..90c3d02 100644 (file)
@@ -51,6 +51,7 @@ Retention:            0
 Table Type:            VIRTUAL_VIEW             
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       bucketing_version       2                   
        numFiles                0                   
        numPartitions           2                   
        numRows                 0                   
index 2dd047a..eb308eb 100644 (file)
@@ -20,6 +20,7 @@ POSTHOOK: query: SHOW TBLPROPERTIES testTable
 POSTHOOK: type: SHOW_TBLPROPERTIES
 COLUMN_STATS_ACCURATE  {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true"}}
 a      1
+bucketing_version      2
 c      3
 #### A masked pattern was here ####
 numFiles       0
index ae7f4f6..b5038d9 100644 (file)
@@ -21,6 +21,7 @@ PREHOOK: query: SHOW TBLPROPERTIES testView
 PREHOOK: type: SHOW_TBLPROPERTIES
 POSTHOOK: query: SHOW TBLPROPERTIES testView
 POSTHOOK: type: SHOW_TBLPROPERTIES
+bucketing_version      2
 #### A masked pattern was here ####
 propA  100
 propB  200
index 5569a03..76a781e 100644 (file)
@@ -73,21 +73,21 @@ STAGE PLANS:
   Stage: Stage-5
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_1:acid2 
+        $hdt$_0:acid1 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_1:acid2 
+        $hdt$_0:acid1 
           TableScan
-            alias: acid2
-            Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: NONE
+            alias: acid1
+            Statistics: Num rows: 316 Data size: 1265 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 316 Data size: 1265 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 316 Data size: 1265 Basic stats: COMPLETE Column stats: NONE
                 HashTable Sink Operator
                   keys:
                     0 _col0 (type: int)
@@ -97,15 +97,15 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: acid1
-            Statistics: Num rows: 316 Data size: 1265 Basic stats: COMPLETE Column stats: NONE
+            alias: acid2
+            Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 316 Data size: 1265 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 316 Data size: 1265 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Inner Join 0 to 1
index 5b2227f..9024719 100644 (file)
@@ -71,6 +71,7 @@ STAGE PLANS:
             properties:
               bucket_count 2
               bucket_field_name a
+              bucketing_version 2
               column.name.delimiter ,
               columns a,b
               columns.comments 
@@ -92,6 +93,7 @@ STAGE PLANS:
               properties:
                 bucket_count 2
                 bucket_field_name a
+                bucketing_version 2
                 column.name.delimiter ,
                 columns a,b
                 columns.comments 
index 1fc71db..42c2a79 100644 (file)
@@ -32,6 +32,7 @@ Retention:            0
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       bucketing_version       2                   
        numFiles                0                   
        numPartitions           0                   
        numRows                 0                   
@@ -603,6