[HUDI-4488] Improve S3EventsHoodieIncrSource efficiency (#6228) master
authorvamshigv <107005799+vamshigv@users.noreply.github.com>
Thu, 11 Aug 2022 21:41:30 +0000 (14:41 -0700)
committerGitHub <noreply@github.com>
Thu, 11 Aug 2022 21:41:30 +0000 (16:41 -0500)
834 files changed:
.github/PULL_REQUEST_TEMPLATE.md
.github/workflows/bot.yml
azure-pipelines.yml
docker/demo/config/hoodie-incr.properties
docker/demo/config/test-suite/deltastreamer-hive-sync-presto.yaml [new file with mode: 0644]
docker/demo/sparksql-incremental.commands
docker/hoodie/hadoop/base/pom.xml
docker/hoodie/hadoop/base_java11/pom.xml
docker/hoodie/hadoop/datanode/pom.xml
docker/hoodie/hadoop/historyserver/pom.xml
docker/hoodie/hadoop/hive_base/pom.xml
docker/hoodie/hadoop/namenode/pom.xml
docker/hoodie/hadoop/pom.xml
docker/hoodie/hadoop/prestobase/pom.xml
docker/hoodie/hadoop/spark_base/pom.xml
docker/hoodie/hadoop/sparkadhoc/pom.xml
docker/hoodie/hadoop/sparkmaster/pom.xml
docker/hoodie/hadoop/sparkworker/pom.xml
docker/hoodie/hadoop/trinobase/pom.xml
docker/hoodie/hadoop/trinocoordinator/pom.xml
docker/hoodie/hadoop/trinoworker/pom.xml
hudi-aws/pom.xml
hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java
hudi-cli/pom.xml
hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java
hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java
hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java
hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieSyncValidateCommand.java [moved from hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieSyncCommand.java with 98% similarity]
hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java
hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java
hudi-client/hudi-client-common/pom.xml
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/callback/client/http/HoodieWriteCommitHttpCallbackClient.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieInternalWriteStatus.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/TransactionManager.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java [new file with mode: 0644]
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieArchivalConfig.java [new file with mode: 0644]
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java [new file with mode: 0644]
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodiePayloadConfig.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/ListBasedHoodieBloomIndexHelper.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/GlobalAvroDeleteKeyGenerator.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/NonpartitionedAvroKeyGenerator.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/SimpleAvroKeyGenerator.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/datadog/DatadogHttpClient.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/datadog/DatadogReporter.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactionTriggerStrategy.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/ScheduleCompactionActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/RunIndexActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/CopyOnWriteRestoreActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/MergeOnReadRestoreActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackPlanActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/CopyOnWriteRollbackActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MergeOnReadRollbackActionExecutor.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/MarkerBasedRollbackUtils.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FiveToFourDowngradeHandler.java [new file with mode: 0644]
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToFiveUpgradeHandler.java [new file with mode: 0644]
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngrade.java
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/callback/http/TestCallbackHttpClient.java
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/datadog/TestDatadogHttpClient.java
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/datadog/TestDatadogReporter.java
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
hudi-client/hudi-flink-client/pom.xml
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/common/HoodieFlinkEngineContext.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeAndReplaceHandle.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandle.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetConfig.java [deleted file]
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriter.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteHelper.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/index/bloom/TestFlinkHoodieBloomIndex.java
hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkClientTestHarness.java
hudi-client/hudi-java-client/pom.xml
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaTable.java
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteHelper.java
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java
hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java
hudi-client/hudi-spark-client/pom.xml
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/bootstrap/HoodieSparkBootstrapSchemaProvider.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/model/HoodieInternalRow.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/data/HoodieJavaPairRDD.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/data/HoodieJavaRDD.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/RebalancedSparkHoodieHBaseIndex.java [new file with mode: 0644]
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriter.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandleWithoutMetaFields.java [deleted file]
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetConfig.java [deleted file]
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java [deleted file]
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SparkKeyGeneratorInterface.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BaseBootstrapMetadataHandler.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/util/DataTypeUtils.java
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/unsafe/UTF8StringBuilder.java [new file with mode: 0644]
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/util/JFunction.scala
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala [new file with mode: 0644]
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieUnsafeRowUtils.scala [new file with mode: 0644]
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestFileBasedLockProvider.java [new file with mode: 0644]
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBootstrap.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/model/TestHoodieInternalRow.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestCopyOnWriteRollbackActionExecutor.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkCopyOnWriteTableArchiveWithReplace.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestMarkerBasedRollbackStrategy.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestBase.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java
hudi-client/hudi-spark-client/src/test/scala/org/apache/hudi/keygen/TestRowGeneratorHelper.scala [deleted file]
hudi-client/hudi-spark-client/src/test/scala/org/apache/spark/sql/TestHoodieUnsafeRowUtils.scala [new file with mode: 0644]
hudi-client/pom.xml
hudi-common/pom.xml
hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilter.java
hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java
hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java
hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java
hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java
hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
hudi-common/src/main/java/org/apache/hudi/common/config/LockConfiguration.java
hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java
hudi-common/src/main/java/org/apache/hudi/common/data/HoodieBaseListData.java [new file with mode: 0644]
hudi-common/src/main/java/org/apache/hudi/common/data/HoodieData.java
hudi-common/src/main/java/org/apache/hudi/common/data/HoodieList.java [deleted file]
hudi-common/src/main/java/org/apache/hudi/common/data/HoodieListData.java [new file with mode: 0644]
hudi-common/src/main/java/org/apache/hudi/common/data/HoodieListPairData.java [new file with mode: 0644]
hudi-common/src/main/java/org/apache/hudi/common/data/HoodieMapPair.java [deleted file]
hudi-common/src/main/java/org/apache/hudi/common/data/HoodiePairData.java
hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieLocalEngineContext.java
hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java
hudi-common/src/main/java/org/apache/hudi/common/model/AWSDmsAvroPayload.java [new file with mode: 0644]
hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroup.java
hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteNonDefaultsWithLatestAvroPayload.java
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
hudi-common/src/main/java/org/apache/hudi/common/table/log/InstantRange.java
hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewManager.java
hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewStorageConfig.java
hudi-common/src/main/java/org/apache/hudi/common/table/view/RemoteHoodieTableFileSystemView.java
hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/CompactionUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/DefaultSizeEstimator.java
hudi-common/src/main/java/org/apache/hudi/common/util/Either.java [new file with mode: 0644]
hudi-common/src/main/java/org/apache/hudi/common/util/HoodieRecordSizeEstimator.java
hudi-common/src/main/java/org/apache/hudi/common/util/HoodieTimer.java
hudi-common/src/main/java/org/apache/hudi/common/util/MarkerUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/PartitionPathEncodeUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/TypeUtils.java
hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java
hudi-common/src/main/java/org/apache/hudi/common/util/collection/RocksDBBasedMap.java
hudi-common/src/main/java/org/apache/hudi/exception/HoodieCatalogException.java [new file with mode: 0644]
hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaMerger.java
hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java
hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/InternalSchemaUtils.java
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java [deleted file]
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java [moved from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java with 82% similarity]
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
hudi-common/src/main/java/org/apache/hudi/keygen/BaseKeyGenerator.java
hudi-common/src/main/java/org/apache/hudi/keygen/KeyGenerator.java
hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataFileSystemView.java
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
hudi-common/src/main/resources/hbase-site.xml
hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieListData.java [moved from hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieList.java with 64% similarity]
hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieListDataPairData.java [moved from hudi-common/src/test/java/org/apache/hudi/common/data/TestHoodieMapPair.java with 75% similarity]
hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java
hudi-common/src/test/java/org/apache/hudi/common/model/TestAWSDmsAvroPayload.java [moved from hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java with 97% similarity]
hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieFileGroup.java
hudi-common/src/test/java/org/apache/hudi/common/model/TestOverwriteNonDefaultsWithLatestAvroPayload.java
hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java
hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java
hudi-common/src/test/java/org/apache/hudi/common/testutils/MockHoodieTimeline.java
hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/HdfsTestService.java
hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/MiniClusterUtil.java
hudi-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java
hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java
hudi-common/src/test/java/org/apache/hudi/internal/schema/utils/TestAvroSchemaEvolutionUtils.java
hudi-examples/hudi-examples-common/pom.xml
hudi-examples/hudi-examples-flink/pom.xml
hudi-examples/hudi-examples-flink/src/main/java/org/apache/hudi/examples/quickstart/source/ContinuousFileSource.java
hudi-examples/hudi-examples-flink/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieFlinkQuickstart.java
hudi-examples/hudi-examples-java/pom.xml
hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
hudi-examples/hudi-examples-spark/pom.xml
hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/common/HoodieExampleSparkUtils.java
hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java
hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java
hudi-examples/pom.xml
hudi-flink-datasource/hudi-flink/pom.xml
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/HadoopConfigurations.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/CleanFunction.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunction.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/RowDataKeyGen.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperatorGen.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/common/AbstractStreamWriteFunction.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionCommitSink.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionPlanOperator.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionPlanSourceFunction.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/FlinkCompactionConfig.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/HoodieFlinkCompactor.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/strategy/CompactionPlanStrategies.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/strategy/CompactionPlanStrategy.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/NonThrownExecutor.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/ColumnStatsIndices.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/ExpressionEvaluator.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/CatalogOptions.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HiveSchemaUtils.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalogFactory.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalogUtil.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TypeInfoLogicalTypeVisitor.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroSchemaConverter.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/AvroToRowDataConverters.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/DataTypeUtils.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ExpressionUtils.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/HoodiePipeline.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/RowDataProjection.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnRead.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bulk/TestRowDataKeyGen.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/TestCompactionPlanStrategy.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestHiveSyncContext.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/stats/TestColumnStatsIndices.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/stats/TestExpressionEvaluator.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/HoodieCatalogTestUtils.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalogFactory.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/format/TestInputFormat.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestAvroSchemaConverter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestCompactionUtil.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestViewStorageProperties.java
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java
hudi-flink-datasource/hudi-flink/src/test/resources/hive-site.xml [new file with mode: 0644]
hudi-flink-datasource/hudi-flink/src/test/resources/test-catalog-factory-conf/hive-site.xml [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.13.x/pom.xml
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java with 100% similarity]
hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java [moved from hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java with 98% similarity]
hudi-flink-datasource/hudi-flink1.14.x/pom.xml
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/pom.xml [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/Utils.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java [new file with mode: 0644]
hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java [new file with mode: 0644]
hudi-flink-datasource/pom.xml
hudi-gcp/pom.xml
hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncConfig.java
hudi-hadoop-mr/pom.xml
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieCopyOnWriteTableInputFormat.java [new file with mode: 0644]
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java [new file with mode: 0644]
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
hudi-integ-test/pom.xml
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteWriter.java
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DeltaConfig.java
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/DagUtils.java
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseQueryNode.java [new file with mode: 0644]
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/HiveQueryNode.java
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/PrestoQueryNode.java [new file with mode: 0644]
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/TrinoQueryNode.java [new file with mode: 0644]
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/helpers/HiveServiceProvider.java
hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/job/TestHoodieTestSuiteJob.java
hudi-integ-test/src/test/resources/unit-test-cow-dag.yaml
hudi-kafka-connect/pom.xml
hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java
hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectWriterProvider.java
hudi-spark-datasource/README.md
hudi-spark-datasource/hudi-spark-common/pom.xml
hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java [deleted file]
hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/SparkRowWriteHelper.java [deleted file]
hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/internal/BulkInsertDataInternalWriterHelper.java
hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/internal/DataSourceInternalWriterHelper.java
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieCLIUtils.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieCatalystUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDatasetUtils.scala [deleted file]
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieUnsafeUtils.scala [moved from hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieUnsafeRDDUtils.scala with 50% similarity]
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/SqlTypedRecord.scala
hudi-spark-datasource/hudi-spark/pom.xml
hudi-spark-datasource/hudi-spark/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlCommon.g4
hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/BootstrapExecutorUtils.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/SchemaProvider.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/payload/AWSDmsAvroPayload.java
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/DeDupeType.scala [moved from hudi-common/src/main/java/org/apache/hudi/TypeUtils.java with 66% similarity]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/DedupeSparkJob.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/SparkHelpers.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/BaseProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/CommitsCompareProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/CopyToTableProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/CreateMetadataTableProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/CreateSavepointProcedure.scala [moved from hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/CreateSavepointsProcedure.scala with 88% similarity]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/DeleteMarkerProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/DeleteMetadataTableProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/DeleteSavepointProcedure.scala [moved from hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/DeleteSavepointsProcedure.scala with 89% similarity]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HdfsParquetImportProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/InitMetadataTableProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairAddpartitionmetaProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairCorruptedCleanFilesProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairDeduplicateProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairMigratePartitionMetaProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RollbackToInstantTimeProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RollbackToSavepointProcedure.scala [moved from hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RollbackSavepointsProcedure.scala with 89% similarity]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunBootstrapProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunCleanProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunClusteringProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowArchivedCommitsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowBootstrapMappingProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowBootstrapPartitionsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCommitFilesProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCommitPartitionsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCommitWriteStatsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCommitsProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFileSystemViewProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFsPathDetailProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowMetadataTableFilesProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowMetadataTablePartitionsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowMetadataTableStatsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/StatsFileSizeProcedure.scala
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/UpgradeOrDowngradeProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ValidateHoodieSyncProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ValidateMetadataTableFilesProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/parser/HoodieCommonSqlParser.scala
hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaApp.java
hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaGenerateApp.java
hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteRecordGenerator.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
hudi-spark-datasource/hudi-spark/src/test/resources/index/colstats/cow-updated2-column-stats-index-table.json [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/resources/index/colstats/mor-updated2-column-stats-index-table.json [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/resources/index/colstats/update-input-table-json/part-00000-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/resources/table-config.properties [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/ScalaAssertionSupport.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestConvertFilterToCatalystExpression.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestGenericRecordAndRowConsistency.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieRelations.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStructuredStreaming.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/avro/TestAvroSerDe.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/avro/TestSchemaConverters.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteFromTable.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieOptionConfig.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestNestedSchemaPruningOptimization.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSqlConf.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestTimeTravelTable.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestUpdateTable.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/HoodieSparkProcedureTestBase.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestBootstrapProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallCommandParser.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCallProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCleanProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestClusteringProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCommitsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCompactionProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestCopyToTableProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestExportInstantsProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestFsViewProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHoodieLogFileProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestMetadataProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestSavepointsProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestShowFsPathDetailProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestStatsProcedure.scala
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestUpgradeOrDowngradeProcedure.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark2-common/pom.xml
hudi-spark-datasource/hudi-spark2/pom.xml
hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java
hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/hudi/Spark2HoodieFileScanRDD.scala [moved from hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala with 71% similarity]
hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystPlanUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala
hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
hudi-spark-datasource/hudi-spark3-common/pom.xml
hudi-spark-datasource/hudi-spark3-common/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java
hudi-spark-datasource/hudi-spark3-common/src/main/java/org/apache/hudi/spark3/internal/ReflectUtil.java
hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/HoodieSpark3CatalystPlanUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/HoodieSpark3SqlUtils.scala [deleted file]
hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
hudi-spark-datasource/hudi-spark3.1.x/pom.xml
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/hudi/Spark31HoodieFileScanRDD.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31CatalystExpressionUtils.scala [moved from hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark3_1CatalystExpressionUtils.scala with 98% similarity]
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31CatalystPlanUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_1Adapter.scala
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/Spark312ResolveHudiAlterTableCommand.scala [moved from hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommand312.scala with 99% similarity]
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_1ExtendedSqlParser.scala [moved from hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark312ExtendedSqlParser.scala with 95% similarity]
hudi-spark-datasource/hudi-spark3.2.x/pom.xml [moved from hudi-spark-datasource/hudi-spark3/pom.xml with 96% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/antlr4/imports/SqlBase.g4 [moved from hudi-spark-datasource/hudi-spark3/src/main/antlr4/imports/SqlBase.g4 with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlBase.g4 [moved from hudi-spark-datasource/hudi-spark3/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlBase.g4 with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieVectorizedParquetRecordReader.java [moved from hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieVectorizedParquetRecordReader.java with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister [moved from hudi-spark-datasource/hudi-spark3/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/hudi/Spark32HoodieFileScanRDD.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/hudi/Spark3DefaultSource.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/hudi/Spark3DefaultSource.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32CatalystExpressionUtils.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/HoodieSpark3_2CatalystExpressionUtils.scala with 98% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32CatalystPlanUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_2Adapter.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/adapter/Spark3_2Adapter.scala with 54% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala with 99% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala with 93% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroDeserializer.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroDeserializer.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroSerializer.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_2AvroSerializer.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TimeTravelRelation.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TimeTravelRelation.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/connector/catalog/HoodieIdentifier.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/connector/catalog/HoodieIdentifier.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark32NestedSchemaPruning.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32DataSourceUtils.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32DataSourceUtils.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/Spark32ResolveHudiAlterTableCommand.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/ResolveHudiAlterTableCommandSpark32.scala with 98% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala with 96% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/catalog/BasicStagedTable.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/BasicStagedTable.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieInternalV2Table.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieInternalV2Table.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/catalog/TableCreationMode.java [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/TableCreationMode.java with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_2ExtendedSqlAstBuilder.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_2ExtendedSqlAstBuilder.scala with 99% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_2ExtendedSqlParser.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_2ExtendedSqlParser.scala with 88% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java [moved from hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java [moved from hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java [moved from hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/test/resources/log4j-surefire-quiet.properties [moved from hudi-spark-datasource/hudi-spark3/src/test/resources/log4j-surefire-quiet.properties with 100% similarity]
hudi-spark-datasource/hudi-spark3.2.x/src/test/resources/log4j-surefire.properties [moved from hudi-spark-datasource/hudi-spark3/src/test/resources/log4j-surefire.properties with 100% similarity]
hudi-spark-datasource/hudi-spark3.3.x/pom.xml [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/antlr4/imports/SqlBase.g4 [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlBase.g4 [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark33HoodieVectorizedParquetRecordReader.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/hudi/Spark33HoodieFileScanRDD.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/hudi/Spark3DefaultSource.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystExpressionUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystPlanUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_3Adapter.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_3AvroDeserializer.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_3AvroSerializer.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TimeTravelRelation.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/connector/catalog/HoodieIdentifier.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark33NestedSchemaPruning.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33DataSourceUtils.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33HoodieParquetFileFormat.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/Spark33ResolveHudiAlterTableCommand.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/catalog/BasicStagedTable.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala [moved from hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala with 93% similarity]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieInternalV2Table.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/catalog/TableCreationMode.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlAstBuilder.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlParser.scala [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/test/resources/log4j-surefire-quiet.properties [new file with mode: 0644]
hudi-spark-datasource/hudi-spark3.3.x/src/test/resources/log4j-surefire.properties [new file with mode: 0644]
hudi-spark-datasource/pom.xml
hudi-sync/hudi-adb-sync/pom.xml
hudi-sync/hudi-adb-sync/src/main/java/org/apache/hudi/sync/adb/AbstractAdbSyncHoodieClient.java [deleted file]
hudi-sync/hudi-adb-sync/src/main/java/org/apache/hudi/sync/adb/AdbSyncConfig.java
hudi-sync/hudi-adb-sync/src/main/java/org/apache/hudi/sync/adb/AdbSyncTool.java
hudi-sync/hudi-adb-sync/src/main/java/org/apache/hudi/sync/adb/HoodieAdbJdbcClient.java
hudi-sync/hudi-adb-sync/src/test/java/org/apache/hudi/sync/adb/TestAdbSyncConfig.java
hudi-sync/hudi-datahub-sync/pom.xml
hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncClient.java
hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/DataHubSyncTool.java
hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java
hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java
hudi-sync/hudi-datahub-sync/src/test/java/org/apache/hudi/sync/datahub/config/TestDataHubSyncConfig.java
hudi-sync/hudi-hive-sync/pom.xml
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/AbstractHiveSyncHoodieClient.java [deleted file]
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java [new file with mode: 0644]
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java [moved from hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java with 64% similarity]
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/MultiPartKeysValueExtractor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/NonPartitionedExtractor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SlashEncodedDayPartitionValueExtractor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SlashEncodedHourPartitionValueExtractor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/GlobalHiveSyncConfig.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/GlobalHiveSyncTool.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java [moved from hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitConfig.java with 65% similarity]
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitTool.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/ReplicationStateSync.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/transaction/lock/HiveMetastoreBasedLockProvider.java [moved from hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveMetastoreBasedLockProvider.java with 95% similarity]
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HivePartitionUtil.java
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncGlobalCommitTool.java [deleted file]
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/functional/TestHiveMetastoreBasedLockProvider.java
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/replication/TestHiveSyncGlobalCommitTool.java [new file with mode: 0644]
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveSyncFunctionalTestHarness.java
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java [moved from hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/TestCluster.java with 86% similarity]
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
hudi-sync/hudi-sync-common/pom.xml
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java [deleted file]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java [new file with mode: 0644]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java [new file with mode: 0644]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncTool.java [new file with mode: 0644]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/FieldSchema.java [new file with mode: 0644]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/PartitionEvent.java [new file with mode: 0644]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/PartitionValueExtractor.java [moved from hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/PartitionValueExtractor.java with 69% similarity]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ConfigUtils.java
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SparkDataSourceTableUtils.java [moved from hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncTool.java with 69% similarity]
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SyncUtilHelpers.java
hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java [new file with mode: 0644]
hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestConfigUtils.java [new file with mode: 0644]
hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestSyncUtilHelpers.java
hudi-sync/pom.xml
hudi-timeline-service/pom.xml
hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java
hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerDirState.java
hudi-timeline-service/src/test/java/org/apache/hudi/timeline/service/functional/TestRemoteHoodieTableFileSystemView.java
hudi-utilities/pom.xml
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java [new file with mode: 0644]
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java
hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java
hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/AWSDmsTransformer.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHiveIncrementalPuller.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieIndexer.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestAWSDatabaseMigrationServiceSource.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java
hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
hudi-utilities/src/test/resources/hive-site.xml [new file with mode: 0644]
packaging/hudi-aws-bundle/pom.xml
packaging/hudi-datahub-sync-bundle/pom.xml
packaging/hudi-flink-bundle/pom.xml
packaging/hudi-gcp-bundle/pom.xml
packaging/hudi-hadoop-mr-bundle/pom.xml
packaging/hudi-hive-sync-bundle/pom.xml
packaging/hudi-integ-test-bundle/pom.xml
packaging/hudi-kafka-connect-bundle/pom.xml
packaging/hudi-presto-bundle/pom.xml
packaging/hudi-spark-bundle/pom.xml
packaging/hudi-timeline-server-bundle/pom.xml
packaging/hudi-trino-bundle/pom.xml
packaging/hudi-utilities-bundle/pom.xml
packaging/hudi-utilities-slim-bundle/pom.xml
pom.xml
rfc/README.md
rfc/rfc-51/arch.jpg [new file with mode: 0644]
rfc/rfc-51/points.jpg [new file with mode: 0644]
rfc/rfc-51/query_cdc_on_mor.jpg [new file with mode: 0644]
rfc/rfc-51/read_cdc_log_file.jpg [new file with mode: 0644]
rfc/rfc-51/rfc-51.md [new file with mode: 0644]
rfc/rfc-51/scenario-definition.jpg [new file with mode: 0644]
rfc/rfc-55/hudi-sync-class-diagram.png [new file with mode: 0644]
rfc/rfc-55/hudi-sync-flows.png [new file with mode: 0644]
rfc/rfc-55/rfc-55.md [new file with mode: 0644]
scripts/release/deploy_staging_jars.sh
scripts/release/validate_staged_bundles.sh

index 29ec8cce2916de5401628ca89a1b18ac86ed7d5c..2ec8b61d05205311482bd139244b103f8e1b78d3 100644 (file)
@@ -1,44 +1,18 @@
-## *Tips*
-- *Thank you very much for contributing to Apache Hudi.*
-- *Please review https://hudi.apache.org/contribute/how-to-contribute before opening a pull request.*
+### Change Logs
 
-## What is the purpose of the pull request
+_Describe context and summary for this change. Highlight if any code was copied._
 
-*(For example: This pull request adds quick-start document.)*
+### Impact
 
-## Brief change log
+_Describe any public API or user-facing feature change or any performance impact._
 
-*(for example:)*
-  - *Modify AnnotationLocation checkstyle rule in checkstyle.xml*
+**Risk level: none | low | medium | high**
 
-## Verify this pull request
+_Choose one. If medium or high, explain what verification was done to mitigate the risks._
 
-*(Please pick either of the following options)*
+### Contributor's checklist
 
-This pull request is a trivial rework / code cleanup without any test coverage.
-
-*(or)*
-
-This pull request is already covered by existing tests, such as *(please describe tests)*.
-
-(or)
-
-This change added tests and can be verified as follows:
-
-*(example:)*
-
-  - *Added integration tests for end-to-end.*
-  - *Added HoodieClientWriteTest to verify the change.*
-  - *Manually verified the change by running a job locally.*
-
-## Committer checklist
-
- - [ ] Has a corresponding JIRA in PR title & commit
- - [ ] Commit message is descriptive of the change
- - [ ] CI is green
-
- - [ ] Necessary doc changes done or have another open PR
-       
- - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA.
+- [ ] Read through [contributor's guide](https://hudi.apache.org/contribute/how-to-contribute)
+- [ ] Change Logs and Impact were stated clearly
+- [ ] Adequate tests were added if applicable
+- [ ] CI passed
index b76a465d7128c360531bcb5018b16435cef35309..3aa9bdbcc66a209f92cc1c7c29c46a5e84c1ea81 100644 (file)
@@ -36,6 +36,10 @@ jobs:
             sparkProfile: "spark3.2"
             flinkProfile: "flink1.14"
 
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.3"
+            flinkProfile: "flink1.14"
+
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK 8
@@ -56,7 +60,6 @@ jobs:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
           FLINK_PROFILE: ${{ matrix.flinkProfile }}
-        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 before hadoop upgrade to 3.x
         run:
           mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
       - name: Spark SQL Test
@@ -66,4 +69,4 @@ jobs:
           FLINK_PROFILE: ${{ matrix.flinkProfile }}
         if: ${{ !endsWith(env.SPARK_PROFILE, '2.4') }} # skip test spark 2.4 as it's covered by Azure CI
         run:
-          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" '-Dtest=org.apache.spark.sql.hudi.Test*' -pl hudi-spark-datasource/hudi-spark
+          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" '-Dtest=Test*' -pl hudi-spark-datasource/hudi-spark
index f10e243bd523a4fc4f6845345f30c49ca373b4f0..7cf83fed50e2209d4d431abacfb0145b58fa2f53 100644 (file)
@@ -74,7 +74,7 @@ parameters:
 variables:
   BUILD_PROFILES: '-Dscala-2.11 -Dspark2 -Dflink1.14'
   PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true'
-  MVN_OPTS_INSTALL: '-T 2.5C -DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS)'
+  MVN_OPTS_INSTALL: '-DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS)'
   MVN_OPTS_TEST: '-fae $(BUILD_PROFILES) $(PLUGIN_OPTS)'
   SPARK_VERSION: '2.4.4'
   HADOOP_VERSION: '2.7'
@@ -89,9 +89,9 @@ stages:
     jobs:
       - job: UT_FT_1
         displayName: UT FT common & flink & UT client/spark-client
-        timeoutInMinutes: '120'
+        timeoutInMinutes: '150'
         steps:
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
@@ -99,7 +99,7 @@ stages:
               options: $(MVN_OPTS_INSTALL)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: UT common flink client/spark-client
             inputs:
               mavenPomFile: 'pom.xml'
@@ -108,7 +108,7 @@ stages:
               publishJUnitResults: false
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: FT common flink
             inputs:
               mavenPomFile: 'pom.xml'
@@ -119,9 +119,9 @@ stages:
               mavenOptions: '-Xmx4g'
       - job: UT_FT_2
         displayName: FT client/spark-client
-        timeoutInMinutes: '120'
+        timeoutInMinutes: '150'
         steps:
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
@@ -129,7 +129,7 @@ stages:
               options: $(MVN_OPTS_INSTALL)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: FT client/spark-client
             inputs:
               mavenPomFile: 'pom.xml'
@@ -140,9 +140,9 @@ stages:
               mavenOptions: '-Xmx4g'
       - job: UT_FT_3
         displayName: UT FT clients & cli & utilities & sync
-        timeoutInMinutes: '120'
+        timeoutInMinutes: '150'
         steps:
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
@@ -150,7 +150,7 @@ stages:
               options: $(MVN_OPTS_INSTALL)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: UT clients & cli & utilities & sync
             inputs:
               mavenPomFile: 'pom.xml'
@@ -159,7 +159,7 @@ stages:
               publishJUnitResults: false
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: FT clients & cli & utilities & sync
             inputs:
               mavenPomFile: 'pom.xml'
@@ -170,9 +170,9 @@ stages:
               mavenOptions: '-Xmx4g'
       - job: UT_FT_4
         displayName: UT FT other modules
-        timeoutInMinutes: '120'
+        timeoutInMinutes: '150'
         steps:
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
@@ -180,7 +180,7 @@ stages:
               options: $(MVN_OPTS_INSTALL)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: UT other modules
             inputs:
               mavenPomFile: 'pom.xml'
@@ -189,7 +189,7 @@ stages:
               publishJUnitResults: false
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: FT other modules
             inputs:
               mavenPomFile: 'pom.xml'
@@ -200,9 +200,9 @@ stages:
               mavenOptions: '-Xmx4g'
       - job: IT
         displayName: IT modules
-        timeoutInMinutes: '120'
+        timeoutInMinutes: '150'
         steps:
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: maven install
             inputs:
               mavenPomFile: 'pom.xml'
@@ -210,7 +210,7 @@ stages:
               options: $(MVN_OPTS_INSTALL) -Pintegration-tests
               publishJUnitResults: false
               jdkVersionOption: '1.8'
-          - task: Maven@3
+          - task: Maven@3.205.1
             displayName: UT integ-test
             inputs:
               mavenPomFile: 'pom.xml'
index 80f474b1e7716213d212897d06b25c67e53b9f50..c46ec48a40184484ee45f20cec30757fec08f43c 100644 (file)
@@ -28,5 +28,6 @@ hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test
 hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true
 # hive sync
 hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2
-hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000
-hoodie.datasource.hive_sync.partition_fields=partition
\ No newline at end of file
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.partition_fields=partition
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
diff --git a/docker/demo/config/test-suite/deltastreamer-hive-sync-presto.yaml b/docker/demo/config/test-suite/deltastreamer-hive-sync-presto.yaml
new file mode 100644 (file)
index 0000000..61ea13c
--- /dev/null
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+dag_name: unit-test-cow-dag
+dag_rounds: 1
+dag_intermittent_delay_mins: 10
+dag_content:
+  first_insert:
+    config:
+      record_size: 70000
+      num_partitions_insert: 1
+      repeat_count: 2
+      num_records_insert: 100
+    type: InsertNode
+    deps: none
+  second_insert:
+    config:
+      record_size: 70000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 100
+    type: InsertNode
+    deps: first_insert
+  third_insert:
+    config:
+      record_size: 70000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 100
+    type: InsertNode
+    deps: second_insert
+  first_upsert:
+    config:
+      record_size: 70000
+      num_partitions_upsert: 1
+      repeat_count: 1
+      num_records_upsert: 100
+    type: UpsertNode
+    deps: third_insert
+  first_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: first_upsert
+  first_presto_query:
+    config:
+      presto_props:
+        prop1: "SET SESSION hive.parquet_use_column_names = true"
+      presto_queries:
+        query1: "select count(*) from testdb.table1"
+        result1: 400
+        query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
+        result2: 0
+    type: PrestoQueryNode
+    deps: first_hive_sync
+#  first_trino_query:
+#    config:
+#      trino_queries:
+#        query1: "select count(*) from testdb1.table1"
+#        result1: 300
+#        query2: "select count(*) from testdb1.table1 group   by `_row_key` having count(*) > 1"
+#        result2: 0
+#    type: TrinoQueryNode
+#    deps: first_presto_query
\ No newline at end of file
index 5ea4729b932f2719434226e52325cdf4f8ec94d7..9ec586e49d8548a80cf45b4308d418795846448c 100644 (file)
@@ -21,7 +21,7 @@ import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.spark.sql.SaveMode;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.HoodieDataSourceHelpers;
-import org.apache.hudi.hive.HiveSyncConfig;
+import org.apache.hudi.hive.HiveSyncConfigHolder;
 import org.apache.hudi.sync.common.HoodieSyncConfig;
 import org.apache.hudi.hive.MultiPartKeysValueExtractor;
 import org.apache.hadoop.fs.FileSystem;
@@ -47,10 +47,10 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl
     option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor").
     option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor").
     option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default").
-    option(HiveSyncConfig.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000").
-    option(HiveSyncConfig.HIVE_USER.key(), "hive").
-    option(HiveSyncConfig.HIVE_PASS.key(), "hive").
-    option(HiveSyncConfig.HIVE_SYNC_ENABLED.key(), "true").
+    option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000").
+    option(HiveSyncConfigHolder.HIVE_USER.key(), "hive").
+    option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive").
+    option(HiveSyncConfigHolder.HIVE_SYNC_ENABLED.key(), "true").
     option(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key(), "datestr").
     option(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(), classOf[MultiPartKeysValueExtractor].getCanonicalName).
     option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key(), "true").
@@ -79,10 +79,10 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl
     option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor_bs").
     option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor_bs").
     option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default").
-    option(HiveSyncConfig.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000").
-    option(HiveSyncConfig.HIVE_USER.key(), "hive").
-    option(HiveSyncConfig.HIVE_PASS.key(), "hive").
-    option(HiveSyncConfig.HIVE_SYNC_ENABLED.key(), "true").
+    option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000").
+    option(HiveSyncConfigHolder.HIVE_USER.key(), "hive").
+    option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive").
+    option(HiveSyncConfigHolder.HIVE_SYNC_ENABLED.key(), "true").
     option(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key(), "datestr").
     option(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key(), classOf[MultiPartKeysValueExtractor].getCanonicalName).
     option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key(), "true").
index 4b1b024958121a9fcf2454db37f085f5cc399c0e..2e4e75ef5a20ba77df98f514d3b2473042d909e9 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index a1181b53e07a02e65a381570510863c7f1670ffe..9871ccd264f85e32de4466ee86addb3a3a3ffb7b 100644 (file)
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index 9fb5e222d3f2eae18bcfe7e311217d4e1e2ddd51..1a22fddc52db1f670ba992335abb9c1b38f3747d 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index b439a88fbd2e6f3f81453a666841ea72f334b30f..dfdf3ab9a664fc89e10038ca46919d7414e675ba 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index ff73965e02c7b529e6bb95b08039dcf8d9758d3d..033b15dd90b4ac1c75bc5483503d64bd644e9cb4 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index 9ad31d9161d1ccb830409a70495d7a6e779dcb7a..4ea1ad42fd6197043188e9ad68b3d28085e27ca5 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index 3f4a0183d80f881ec4c0d47ac0c1449dd50e8d9c..80fac38d017990dad535de2ca56531a6141aebfe 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
index 2ff23833599b2ed449c95195c268ebcfcbf158c5..b0bbeb663870d07e7c28ddc45d84a6469f290645 100644 (file)
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index 8c7e8ac241c9dea93a01f98e136b91275d565a6d..6e79a3952bbf0fa584ad30dda519338e8daf7a9b 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index d61eb170f67b0acd529ecd20abe6652a466bf8e2..65358506a11ae7b048c5795b80065f6cc8c9a838 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index 40fb732cafe6a39f979e423a9aaafc22a0d9d654..cdaf644231974ed53d107de6014159d98e605b5f 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index 3304a2e5a7e6e042c09914512a4f4f1b4ddf9146..68486ada6845c6b2501ac041353c353e5659c0f6 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi-hadoop-docker</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <packaging>pom</packaging>
index 180a977915e91a3565c819191f2e3fb49cb13962..dea2efbab5ecfb15d17684bdbf19dc750be3a88a 100644 (file)
@@ -22,7 +22,7 @@
     <parent>
         <artifactId>hudi-hadoop-docker</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.13.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <packaging>pom</packaging>
index aa199bad54db53a0d75c7ff67e413cb6684fa8d1..5b162e7f6d443d5842c8b8ae5aec29ad646de2ce 100644 (file)
@@ -22,7 +22,7 @@
     <parent>
         <artifactId>hudi-hadoop-docker</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.13.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <packaging>pom</packaging>
index 8825093846434e438108a3c90c84ad491dac7f56..f9a2197339eec62faa43c8c630e709a52b08b81b 100644 (file)
@@ -22,7 +22,7 @@
     <parent>
         <artifactId>hudi-hadoop-docker</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.13.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <packaging>pom</packaging>
index dc9653a62f916253c276c6a1606d61b7d2d6c613..0152583bad757d111682a09d1e0378aa37c35a11 100644 (file)
     <parent>
         <artifactId>hudi</artifactId>
         <groupId>org.apache.hudi</groupId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.13.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
     <artifactId>hudi-aws</artifactId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
 
     <name>hudi-aws</name>
     <packaging>jar</packaging>
 
         <!-- Logging -->
         <dependency>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
+          <groupId>org.apache.logging.log4j</groupId>
+          <artifactId>log4j-api</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.logging.log4j</groupId>
+          <artifactId>log4j-core</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.logging.log4j</groupId>
+          <artifactId>log4j-1.2-api</artifactId>
         </dependency>
 
         <!-- Hadoop -->
index e15a698f27e3a022cba516c4a56c5475de56dca0..79651154ed63b702c9532820fcd3b79b76542ae8 100644 (file)
@@ -21,8 +21,8 @@ package org.apache.hudi.aws.sync;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.hive.AbstractHiveSyncHoodieClient;
 import org.apache.hudi.hive.HiveSyncConfig;
+import org.apache.hudi.sync.common.HoodieSyncClient;
 import org.apache.hudi.sync.common.model.Partition;
 
 import com.amazonaws.services.glue.AWSGlue;
@@ -50,10 +50,6 @@ import com.amazonaws.services.glue.model.StorageDescriptor;
 import com.amazonaws.services.glue.model.Table;
 import com.amazonaws.services.glue.model.TableInput;
 import com.amazonaws.services.glue.model.UpdateTableRequest;
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.schema.MessageType;
@@ -68,9 +64,13 @@ import java.util.Objects;
 import java.util.stream.Collectors;
 
 import static org.apache.hudi.aws.utils.S3Utils.s3aToS3;
-import static org.apache.hudi.common.util.MapUtils.nonEmpty;
+import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE;
+import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE;
+import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty;
 import static org.apache.hudi.hive.util.HiveSchemaUtil.getPartitionKeyType;
 import static org.apache.hudi.hive.util.HiveSchemaUtil.parquetSchemaToMapSchema;
+import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
+import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_FIELDS;
 import static org.apache.hudi.sync.common.util.TableUtils.tableId;
 
 /**
@@ -79,7 +79,7 @@ import static org.apache.hudi.sync.common.util.TableUtils.tableId;
  *
  * @Experimental
  */
-public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
+public class AWSGlueCatalogSyncClient extends HoodieSyncClient {
 
   private static final Logger LOG = LogManager.getLogger(AWSGlueCatalogSyncClient.class);
   private static final int MAX_PARTITIONS_PER_REQUEST = 100;
@@ -87,10 +87,10 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
   private final AWSGlue awsGlue;
   private final String databaseName;
 
-  public AWSGlueCatalogSyncClient(HiveSyncConfig syncConfig, Configuration hadoopConf, FileSystem fs) {
-    super(syncConfig, hadoopConf, fs);
+  public AWSGlueCatalogSyncClient(HiveSyncConfig config) {
+    super(config);
     this.awsGlue = AWSGlueClientBuilder.standard().build();
-    this.databaseName = syncConfig.databaseName;
+    this.databaseName = config.getStringOrDefault(META_SYNC_DATABASE_NAME);
   }
 
   @Override
@@ -126,7 +126,7 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
       StorageDescriptor sd = table.getStorageDescriptor();
       List<PartitionInput> partitionInputs = partitionsToAdd.stream().map(partition -> {
         StorageDescriptor partitionSd = sd.clone();
-        String fullPartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition).toString();
+        String fullPartitionPath = FSUtils.getPartitionPath(getBasePath(), partition).toString();
         List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
         partitionSd.setLocation(fullPartitionPath);
         return new PartitionInput().withValues(partitionValues).withStorageDescriptor(partitionSd);
@@ -160,7 +160,7 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
       StorageDescriptor sd = table.getStorageDescriptor();
       List<BatchUpdatePartitionRequestEntry> updatePartitionEntries = changedPartitions.stream().map(partition -> {
         StorageDescriptor partitionSd = sd.clone();
-        String fullPartitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition).toString();
+        String fullPartitionPath = FSUtils.getPartitionPath(getBasePath(), partition).toString();
         List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
         sd.setLocation(fullPartitionPath);
         PartitionInput partitionInput = new PartitionInput().withValues(partitionValues).withStorageDescriptor(partitionSd);
@@ -193,27 +193,24 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
    */
   @Override
   public void updateTableProperties(String tableName, Map<String, String> tableProperties) {
-    if (nonEmpty(tableProperties)) {
+    if (isNullOrEmpty(tableProperties)) {
       return;
     }
     try {
-      updateTableParameters(awsGlue, databaseName, tableName, tableProperties, true);
+      updateTableParameters(awsGlue, databaseName, tableName, tableProperties, false);
     } catch (Exception e) {
       throw new HoodieGlueSyncException("Fail to update properties for table " + tableId(databaseName, tableName), e);
     }
   }
 
   @Override
-  public void updateTableDefinition(String tableName, MessageType newSchema) {
+  public void updateTableSchema(String tableName, MessageType newSchema) {
     // ToDo Cascade is set in Hive meta sync, but need to investigate how to configure it for Glue meta
-    boolean cascade = syncConfig.partitionFields.size() > 0;
+    boolean cascade = config.getSplitStrings(META_SYNC_PARTITION_FIELDS).size() > 0;
     try {
       Table table = getTable(awsGlue, databaseName, tableName);
-      Map<String, String> newSchemaMap = parquetSchemaToMapSchema(newSchema, syncConfig.supportTimestamp, false);
-      List<Column> newColumns = newSchemaMap.keySet().stream().map(key -> {
-        String keyType = getPartitionKeyType(newSchemaMap, key);
-        return new Column().withName(key).withType(keyType.toLowerCase()).withComment("");
-      }).collect(Collectors.toList());
+      Map<String, String> newSchemaMap = parquetSchemaToMapSchema(newSchema, config.getBoolean(HIVE_SUPPORT_TIMESTAMP_TYPE), false);
+      List<Column> newColumns = getColumnsFromSchema(newSchemaMap);
       StorageDescriptor sd = table.getStorageDescriptor();
       sd.setColumns(newColumns);
 
@@ -237,21 +234,6 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
     }
   }
 
-  @Override
-  public List<FieldSchema> getTableCommentUsingMetastoreClient(String tableName) {
-    throw new UnsupportedOperationException("Not supported: `getTableCommentUsingMetastoreClient`");
-  }
-
-  @Override
-  public void updateTableComments(String tableName, List<FieldSchema> oldSchema, List<Schema.Field> newSchema) {
-    throw new UnsupportedOperationException("Not supported: `updateTableComments`");
-  }
-
-  @Override
-  public void updateTableComments(String tableName, List<FieldSchema> oldSchema, Map<String, String> newComments) {
-    throw new UnsupportedOperationException("Not supported: `updateTableComments`");
-  }
-
   @Override
   public void createTable(String tableName,
       MessageType storageSchema,
@@ -265,26 +247,18 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
     }
     CreateTableRequest request = new CreateTableRequest();
     Map<String, String> params = new HashMap<>();
-    if (!syncConfig.createManagedTable) {
+    if (!config.getBoolean(HIVE_CREATE_MANAGED_TABLE)) {
       params.put("EXTERNAL", "TRUE");
     }
     params.putAll(tableProperties);
 
     try {
-      Map<String, String> mapSchema = parquetSchemaToMapSchema(storageSchema, syncConfig.supportTimestamp, false);
+      Map<String, String> mapSchema = parquetSchemaToMapSchema(storageSchema, config.getBoolean(HIVE_SUPPORT_TIMESTAMP_TYPE), false);
 
-      List<Column> schemaWithoutPartitionKeys = new ArrayList<>();
-      for (String key : mapSchema.keySet()) {
-        String keyType = getPartitionKeyType(mapSchema, key);
-        Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment("");
-        // In Glue, the full schema should exclude the partition keys
-        if (!syncConfig.partitionFields.contains(key)) {
-          schemaWithoutPartitionKeys.add(column);
-        }
-      }
+      List<Column> schemaWithoutPartitionKeys = getColumnsFromSchema(mapSchema);
 
       // now create the schema partition
-      List<Column> schemaPartitionKeys = syncConfig.partitionFields.stream().map(partitionKey -> {
+      List<Column> schemaPartitionKeys = config.getSplitStrings(META_SYNC_PARTITION_FIELDS).stream().map(partitionKey -> {
         String keyType = getPartitionKeyType(mapSchema, partitionKey);
         return new Column().withName(partitionKey).withType(keyType.toLowerCase()).withComment("");
       }).collect(Collectors.toList());
@@ -293,7 +267,7 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
       serdeProperties.put("serialization.format", "1");
       storageDescriptor
           .withSerdeInfo(new SerDeInfo().withSerializationLibrary(serdeClass).withParameters(serdeProperties))
-          .withLocation(s3aToS3(syncConfig.basePath))
+          .withLocation(s3aToS3(getBasePath()))
           .withInputFormat(inputFormatClass)
           .withOutputFormat(outputFormatClass)
           .withColumns(schemaWithoutPartitionKeys);
@@ -320,7 +294,7 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
   }
 
   @Override
-  public Map<String, String> getTableSchema(String tableName) {
+  public Map<String, String> getMetastoreSchema(String tableName) {
     try {
       // GlueMetastoreClient returns partition keys separate from Columns, hence get both and merge to
       // get the Schema of the table.
@@ -340,11 +314,6 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
     }
   }
 
-  @Override
-  public boolean doesTableExist(String tableName) {
-    return tableExists(tableName);
-  }
-
   @Override
   public boolean tableExists(String tableName) {
     GetTableRequest request = new GetTableRequest()
@@ -412,11 +381,11 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
 
   @Override
   public void updateLastCommitTimeSynced(String tableName) {
-    if (!activeTimeline.lastInstant().isPresent()) {
+    if (!getActiveTimeline().lastInstant().isPresent()) {
       LOG.warn("No commit in active timeline.");
       return;
     }
-    final String lastCommitTimestamp = activeTimeline.lastInstant().get().getTimestamp();
+    final String lastCommitTimestamp = getActiveTimeline().lastInstant().get().getTimestamp();
     try {
       updateTableParameters(awsGlue, databaseName, tableName, Collections.singletonMap(HOODIE_LAST_COMMIT_TIME_SYNC, lastCommitTimestamp), false);
     } catch (Exception e) {
@@ -439,6 +408,19 @@ public class AWSGlueCatalogSyncClient extends AbstractHiveSyncHoodieClient {
     throw new UnsupportedOperationException("Not supported: `deleteLastReplicatedTimeStamp`");
   }
 
+  private List<Column> getColumnsFromSchema(Map<String, String> mapSchema) {
+    List<Column> cols = new ArrayList<>();
+    for (String key : mapSchema.keySet()) {
+      // In Glue, the full schema should exclude the partition keys
+      if (!config.getSplitStrings(META_SYNC_PARTITION_FIELDS).contains(key)) {
+        String keyType = getPartitionKeyType(mapSchema, key);
+        Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment("");
+        cols.add(column);
+      }
+    }
+    return cols;
+  }
+
   private enum TableType {
     MANAGED_TABLE,
     EXTERNAL_TABLE,
index bb1be377c9caebe4282f997f9f9c8fcac377cf60..b8f0d565df7f773739302e7e9efecb8d0a90ab26 100644 (file)
 
 package org.apache.hudi.aws.sync;
 
-import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.hive.HiveSyncConfig;
 import org.apache.hudi.hive.HiveSyncTool;
 
 import com.beust.jcommander.JCommander;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hive.conf.HiveConf;
+
+import java.util.Properties;
 
 /**
  * Currently Experimental. Utility class that implements syncing a Hudi Table with the
  * AWS Glue Data Catalog (https://docs.aws.amazon.com/glue/latest/dg/populate-data-catalog.html)
  * to enable querying via Glue ETLs, Athena etc.
- *
+ * <p>
  * Extends HiveSyncTool since most logic is similar to Hive syncing,
  * expect using a different client {@link AWSGlueCatalogSyncClient} that implements
  * the necessary functionality using Glue APIs.
@@ -41,30 +39,23 @@ import org.apache.hadoop.hive.conf.HiveConf;
  */
 public class AwsGlueCatalogSyncTool extends HiveSyncTool {
 
-  public AwsGlueCatalogSyncTool(TypedProperties props, Configuration conf, FileSystem fs) {
-    super(props, new HiveConf(conf, HiveConf.class), fs);
-  }
-
-  public AwsGlueCatalogSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fs) {
-    super(hiveSyncConfig, hiveConf, fs);
+  public AwsGlueCatalogSyncTool(Properties props, Configuration hadoopConf) {
+    super(props, hadoopConf);
   }
 
   @Override
-  protected void initClient(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf) {
-    hoodieHiveClient = new AWSGlueCatalogSyncClient(hiveSyncConfig, hiveConf, fs);
+  protected void initSyncClient(HiveSyncConfig hiveSyncConfig) {
+    syncClient = new AWSGlueCatalogSyncClient(hiveSyncConfig);
   }
 
   public static void main(String[] args) {
-    // parse the params
-    final HiveSyncConfig cfg = new HiveSyncConfig();
-    JCommander cmd = new JCommander(cfg, null, args);
-    if (cfg.help || args.length == 0) {
+    final HiveSyncConfig.HiveSyncConfigParams params = new HiveSyncConfig.HiveSyncConfigParams();
+    JCommander cmd = JCommander.newBuilder().addObject(params).build();
+    cmd.parse(args);
+    if (params.isHelp()) {
       cmd.usage();
-      System.exit(1);
+      System.exit(0);
     }
-    FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration());
-    HiveConf hiveConf = new HiveConf();
-    hiveConf.addResource(fs.getConf());
-    new AwsGlueCatalogSyncTool(cfg, hiveConf, fs).syncHoodieTable();
+    new AwsGlueCatalogSyncTool(params.toProps(), new Configuration()).syncHoodieTable();
   }
 }
index e3111f3fb9a0c2029ea1207398b99cd78ae56ef5..f4b743f1e2562e7fdb960b9fc5df1ecad3be82e6 100644 (file)
@@ -19,7 +19,7 @@
   <parent>
     <artifactId>hudi</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
 
     <!-- Logging -->
     <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-1.2-api</artifactId>
     </dependency>
 
     <dependency>
index 0827e5d0d6dd11f4b5e7699127573f51b5a48cde..b2b6940bef3cdc557e89af62842e8f7a55cc8066 100644 (file)
@@ -233,7 +233,7 @@ public class CommitsCommand implements CommandMarker {
       @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
       @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
          help = "Spark executor memory") final String sparkMemory,
-      @CliOption(key = "rollbackUsingMarkers", unspecifiedDefaultValue = "true",
+      @CliOption(key = "rollbackUsingMarkers", unspecifiedDefaultValue = "false",
          help = "Enabling marker based rollback") final String rollbackUsingMarkers)
       throws Exception {
     HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
index 097c68a542c470b291bfb9dcb2d82f7d59831dae..d3845137c8e234cbf2af7c667787ec2571106f0a 100644 (file)
@@ -558,7 +558,7 @@ public class CompactionCommand implements CommandMarker {
   @CliCommand(value = "compaction unscheduleFileId", help = "UnSchedule Compaction for a fileId")
   public String unscheduleCompactFile(
       @CliOption(key = "fileId", mandatory = true, help = "File Id") final String fileId,
-      @CliOption(key = "partitionPath", mandatory = true, help = "partition path") final String partitionPath,
+      @CliOption(key = "partitionPath", unspecifiedDefaultValue = "", help = "partition path") final String partitionPath,
       @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
       @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
       @CliOption(key = {"skipValidation"}, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV,
index a506c8030a55748d3ca13e3d99b2294c88255197..27598b5f5197f9a42a8e50689cfc36a7ac2cb782 100644 (file)
@@ -61,7 +61,7 @@ public class FileSystemViewCommand implements CommandMarker {
   @CliCommand(value = "show fsview all", help = "Show entire file-system view")
   public String showAllFileSlices(
       @CliOption(key = {"pathRegex"}, help = "regex to select files, eg: 2016/08/02",
-          unspecifiedDefaultValue = "*/*/*") String globRegex,
+          unspecifiedDefaultValue = "") String globRegex,
       @CliOption(key = {"baseFileOnly"}, help = "Only display base files view",
           unspecifiedDefaultValue = "false") boolean baseFileOnly,
       @CliOption(key = {"maxInstant"}, help = "File-Slices upto this instant are displayed",
@@ -79,6 +79,12 @@ public class FileSystemViewCommand implements CommandMarker {
           unspecifiedDefaultValue = "false") final boolean headerOnly)
       throws IOException {
 
+    globRegex = globRegex == null ? "" : globRegex;
+    // TODO: There is a bug in spring shell, if we pass */*/* to pathRegex, the last '/' will be lost, pathRegex will be */**
+    if (globRegex.endsWith("**")) {
+      globRegex = globRegex.replace("**", "*/*");
+    }
+
     HoodieTableFileSystemView fsView = buildFileSystemView(globRegex, maxInstant, baseFileOnly, includeMaxInstant,
         includeInflight, excludeCompaction);
     List<Comparable[]> rows = new ArrayList<>();
@@ -119,7 +125,7 @@ public class FileSystemViewCommand implements CommandMarker {
 
   @CliCommand(value = "show fsview latest", help = "Show latest file-system view")
   public String showLatestFileSlices(
-      @CliOption(key = {"partitionPath"}, help = "A valid partition path", mandatory = true) String partition,
+      @CliOption(key = {"partitionPath"}, help = "A valid partition path", unspecifiedDefaultValue = "") String partition,
       @CliOption(key = {"baseFileOnly"}, help = "Only display base file view",
           unspecifiedDefaultValue = "false") boolean baseFileOnly,
       @CliOption(key = {"maxInstant"}, help = "File-Slices upto this instant are displayed",
index 93866cafcd321c1126b5417e897bc2d35fa13819..5c6407cea144382dc23002b8964ef522f0061087 100644 (file)
@@ -53,7 +53,7 @@ public class HDFSParquetImportCommand implements CommandMarker {
       @CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
       @CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,
       @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,
-      @CliOption(key = "partitionPathField", mandatory = true,
+      @CliOption(key = "partitionPathField", unspecifiedDefaultValue = "",
           help = "Partition path field name") final String partitionPathField,
       @CliOption(key = {"parallelism"}, mandatory = true,
           help = "Parallelism for hoodie insert") final String parallelism,
similarity index 98%
rename from hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieSyncCommand.java
rename to hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieSyncValidateCommand.java
index 084d757f85250d470a9a0075039d643367e82926..30b42552bb16cde1d84a21709043123739a69e68 100644 (file)
@@ -39,7 +39,7 @@ import java.util.stream.Collectors;
  * CLI command to display sync options.
  */
 @Component
-public class HoodieSyncCommand implements CommandMarker {
+public class HoodieSyncValidateCommand implements CommandMarker {
 
   @CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
   public String validateSync(
index 637f1393f51adf5327d066fc0b7fa3dc801e3a17..e3d25e06b886073c696392d78f0f16223d934fff 100644 (file)
@@ -27,6 +27,7 @@ import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.metadata.HoodieBackedTableMetadata;
@@ -225,7 +226,7 @@ public class MetadataCommand implements CommandMarker {
 
   @CliCommand(value = "metadata list-files", help = "Print a list of all files in a partition from the metadata")
   public String listFiles(
-      @CliOption(key = {"partition"}, help = "Name of the partition to list files", mandatory = true) final String partition) throws IOException {
+      @CliOption(key = {"partition"}, help = "Name of the partition to list files", unspecifiedDefaultValue = "") final String partition) throws IOException {
     HoodieCLI.getTableMetaClient();
     HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
     HoodieBackedTableMetadata metaReader = new HoodieBackedTableMetadata(
@@ -235,8 +236,13 @@ public class MetadataCommand implements CommandMarker {
       return "[ERROR] Metadata Table not enabled/initialized\n\n";
     }
 
+    Path partitionPath = new Path(HoodieCLI.basePath);
+    if (!StringUtils.isNullOrEmpty(partition)) {
+      partitionPath = new Path(HoodieCLI.basePath, partition);
+    }
+
     HoodieTimer timer = new HoodieTimer().startTimer();
-    FileStatus[] statuses = metaReader.getAllFilesInPartition(new Path(HoodieCLI.basePath, partition));
+    FileStatus[] statuses = metaReader.getAllFilesInPartition(partitionPath);
     LOG.debug("Took " + timer.endTimer() + " ms");
 
     final List<Comparable[]> rows = new ArrayList<>();
index 41357453510da1a503850fc4addd500dad860843..e94c38bd16af8b9aa06ed2ae502039a20ab2c8e7 100644 (file)
@@ -35,7 +35,7 @@ import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieBootstrapConfig;
-import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieCleanConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieSavepointException;
@@ -538,7 +538,7 @@ public class SparkMain {
   private static HoodieWriteConfig getWriteConfig(String basePath, Boolean rollbackUsingMarkers, boolean lazyCleanPolicy) {
     return HoodieWriteConfig.newBuilder().withPath(basePath)
         .withRollbackUsingMarkers(rollbackUsingMarkers)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(lazyCleanPolicy ? HoodieFailedWritesCleaningPolicy.LAZY :
+        .withCleanConfig(HoodieCleanConfig.newBuilder().withFailedWritesCleaningPolicy(lazyCleanPolicy ? HoodieFailedWritesCleaningPolicy.LAZY :
             HoodieFailedWritesCleaningPolicy.EAGER).build())
         .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
   }
index fbfc1d8ec902e1daf6f8375e6403fc4cd349b3d4..b9f8df5fc2337c14877f519aa8e09f1c91c8cc1a 100644 (file)
@@ -23,12 +23,11 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hudi.avro.HoodieAvroWriteSupport
 import org.apache.hudi.client.SparkTaskContextSupplier
-import org.apache.hudi.common.HoodieJsonPayload
 import org.apache.hudi.common.bloom.{BloomFilter, BloomFilterFactory}
 import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord}
 import org.apache.hudi.common.util.BaseFileUtils
 import org.apache.hudi.config.{HoodieIndexConfig, HoodieStorageConfig}
-import org.apache.hudi.io.storage.{HoodieAvroParquetConfig, HoodieAvroParquetWriter}
+import org.apache.hudi.io.storage.{HoodieAvroParquetWriter, HoodieParquetConfig}
 import org.apache.parquet.avro.AvroSchemaConverter
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.apache.spark.sql.{DataFrame, SQLContext}
@@ -45,7 +44,7 @@ object SparkHelpers {
     val filter: BloomFilter = BloomFilterFactory.createBloomFilter(HoodieIndexConfig.BLOOM_FILTER_NUM_ENTRIES_VALUE.defaultValue.toInt, HoodieIndexConfig.BLOOM_FILTER_FPP_VALUE.defaultValue.toDouble,
       HoodieIndexConfig.BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.defaultValue.toInt, HoodieIndexConfig.BLOOM_FILTER_TYPE.defaultValue);
     val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(fs.getConf).convert(schema), schema, org.apache.hudi.common.util.Option.of(filter))
-    val parquetConfig: HoodieAvroParquetConfig = new HoodieAvroParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.PARQUET_BLOCK_SIZE.defaultValue.toInt, HoodieStorageConfig.PARQUET_PAGE_SIZE.defaultValue.toInt, HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.defaultValue.toInt, fs.getConf, HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION.defaultValue.toDouble)
+    val parquetConfig: HoodieParquetConfig[HoodieAvroWriteSupport] = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.PARQUET_BLOCK_SIZE.defaultValue.toInt, HoodieStorageConfig.PARQUET_PAGE_SIZE.defaultValue.toInt, HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.defaultValue.toInt, fs.getConf, HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION.defaultValue.toDouble)
 
     // Add current classLoad for config, if not will throw classNotFound of 'HoodieWrapperFileSystem'.
     parquetConfig.getHadoopConf().setClassLoader(Thread.currentThread.getContextClassLoader)
index d822ad658920bc6aec1b0d603ba1cd23bb99ed82..b936202bd0d804c6499516df08d1b5c8ba20adf3 100644 (file)
@@ -30,7 +30,8 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
-import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieArchivalConfig;
+import org.apache.hudi.config.HoodieCleanConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.client.HoodieTimelineArchiver;
@@ -72,7 +73,8 @@ public class TestArchivedCommitsCommand extends CLIFunctionalTestHarness {
     // Generate archive
     HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
         .withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
+        .withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 3).build())
+        .withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
         .forTable("test-trip-table").build();
index b23c6fd150dc0d0c5b80fefc3648afd1574f59a6..e03699f66e2cb7333e5716530448f53b7f05f765 100644 (file)
@@ -37,7 +37,8 @@ import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.util.NumericUtils;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieArchivalConfig;
+import org.apache.hudi.config.HoodieCleanConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.client.HoodieTimelineArchiver;
@@ -212,7 +213,8 @@ public class TestCommitsCommand extends CLIFunctionalTestHarness {
     // Generate archive
     HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1)
         .withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
+        .withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build())
+        .withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 3).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
         .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
@@ -266,7 +268,8 @@ public class TestCommitsCommand extends CLIFunctionalTestHarness {
     // Generate archive
     HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1)
         .withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
+        .withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build())
+        .withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 3).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
         .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
index 17c1002f6b0dd00133f6c35cbe894d3458028080..bc5ba168e3ed88066596e379f44ce35410cefb43 100644 (file)
@@ -39,7 +39,8 @@ import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
 import org.apache.hudi.common.testutils.CompactionTestUtils;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieArchivalConfig;
+import org.apache.hudi.config.HoodieCleanConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.table.HoodieSparkTable;
@@ -166,7 +167,8 @@ public class TestCompactionCommand extends CLIFunctionalTestHarness {
     // Generate archive
     HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
         .withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
+        .withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build())
+        .withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 3).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
         .forTable("test-trip-table").build();
index d5c535ebfe00ca9174a9f44a855e23e70616b57c..639ef0fe8dba2e8b6bb47079dc169bd917f1e805 100644 (file)
@@ -59,24 +59,73 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
 @Tag("functional")
 public class TestFileSystemViewCommand extends CLIFunctionalTestHarness {
 
+  private String nonpartitionedTablePath;
+  private String partitionedTablePath;
   private String partitionPath;
-  private SyncableFileSystemView fsView;
+  private SyncableFileSystemView nonpartitionedFsView;
+  private SyncableFileSystemView partitionedFsView;
 
   @BeforeEach
   public void init() throws IOException {
+    createNonpartitionedTable();
+    createPartitionedTable();
+  }
+
+  private void createNonpartitionedTable() throws IOException {
     HoodieCLI.conf = hadoopConf();
 
     // Create table and connect
-    String tableName = tableName();
-    String tablePath = tablePath(tableName);
+    String nonpartitionedTableName = "nonpartitioned_" + tableName();
+    nonpartitionedTablePath = tablePath(nonpartitionedTableName);
     new TableCommand().createTable(
-        tablePath, tableName,
-        "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
+            nonpartitionedTablePath, nonpartitionedTableName,
+            "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
+
+    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+
+    Files.createDirectories(Paths.get(nonpartitionedTablePath));
+
+    // Generate 2 commits
+    String commitTime1 = "3";
+    String commitTime2 = "4";
+
+    String fileId1 = UUID.randomUUID().toString();
+
+    // Write date files and log file
+    String testWriteToken = "2-0-2";
+    Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
+        .makeBaseFileName(commitTime1, testWriteToken, fileId1)));
+    Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
+        .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, testWriteToken)));
+    Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
+        .makeBaseFileName(commitTime2, testWriteToken, fileId1)));
+    Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils
+        .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken)));
+
+    // Write commit files
+    Files.createFile(Paths.get(nonpartitionedTablePath, ".hoodie", commitTime1 + ".commit"));
+    Files.createFile(Paths.get(nonpartitionedTablePath, ".hoodie", commitTime2 + ".commit"));
+
+    // Reload meta client and create fsView
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+
+    nonpartitionedFsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline(), true);
+  }
+
+  private void createPartitionedTable() throws IOException {
+    HoodieCLI.conf = hadoopConf();
+
+    // Create table and connect
+    String partitionedTableName = "partitioned_" + tableName();
+    partitionedTablePath = tablePath(partitionedTableName);
+    new TableCommand().createTable(
+            partitionedTablePath, partitionedTableName,
+            "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
 
     HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
 
     partitionPath = HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH;
-    String fullPartitionPath = Paths.get(tablePath, partitionPath).toString();
+    String fullPartitionPath = Paths.get(partitionedTablePath, partitionPath).toString();
     Files.createDirectories(Paths.get(fullPartitionPath));
 
     // Generate 2 commits
@@ -97,13 +146,13 @@ public class TestFileSystemViewCommand extends CLIFunctionalTestHarness {
         .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken)));
 
     // Write commit files
-    Files.createFile(Paths.get(tablePath, ".hoodie", commitTime1 + ".commit"));
-    Files.createFile(Paths.get(tablePath, ".hoodie", commitTime2 + ".commit"));
+    Files.createFile(Paths.get(partitionedTablePath, ".hoodie", commitTime1 + ".commit"));
+    Files.createFile(Paths.get(partitionedTablePath, ".hoodie", commitTime2 + ".commit"));
 
     // Reload meta client and create fsView
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
-    fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline(), true);
+    partitionedFsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline(), true);
   }
 
   /**
@@ -112,11 +161,11 @@ public class TestFileSystemViewCommand extends CLIFunctionalTestHarness {
   @Test
   public void testShowCommits() {
     // Test default show fsview all
-    CommandResult cr = shell().executeCommand("show fsview all");
+    CommandResult cr = shell().executeCommand("show fsview all --pathRegex */*/*");
     assertTrue(cr.isSuccess());
 
     // Get all file groups
-    Stream<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath);
+    Stream<HoodieFileGroup> fileGroups = partitionedFsView.getAllFileGroups(partitionPath);
 
     List<Comparable[]> rows = new ArrayList<>();
     fileGroups.forEach(fg -> fg.getAllFileSlices().forEach(fs -> {
@@ -160,11 +209,11 @@ public class TestFileSystemViewCommand extends CLIFunctionalTestHarness {
   @Test
   public void testShowCommitsWithSpecifiedValues() {
     // Test command with options, baseFileOnly and maxInstant is 2
-    CommandResult cr = shell().executeCommand("show fsview all --baseFileOnly true --maxInstant 2");
+    CommandResult cr = shell().executeCommand("show fsview all --pathRegex */*/* --baseFileOnly true --maxInstant 2");
     assertTrue(cr.isSuccess());
 
     List<Comparable[]> rows = new ArrayList<>();
-    Stream<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath);
+    Stream<HoodieFileGroup> fileGroups = partitionedFsView.getAllFileGroups(partitionPath);
 
     // Only get instant 1, since maxInstant was specified 2
     fileGroups.forEach(fg -> fg.getAllFileSlices().filter(fs -> fs.getBaseInstantTime().equals("1")).forEach(fs -> {
@@ -197,17 +246,7 @@ public class TestFileSystemViewCommand extends CLIFunctionalTestHarness {
     assertEquals(expected, got);
   }
 
-  /**
-   * Test case for command 'show fsview latest'.
-   */
-  @Test
-  public void testShowLatestFileSlices() {
-    // Test show with partition path '2016/03/15'
-    CommandResult cr = shell().executeCommand("show fsview latest --partitionPath " + partitionPath);
-    assertTrue(cr.isSuccess());
-
-    Stream<FileSlice> fileSlice = fsView.getLatestFileSlices(partitionPath);
-
+  private List<Comparable[]> fileSlicesToCRList(Stream<FileSlice> fileSlice, String partitionPath) {
     List<Comparable[]> rows = new ArrayList<>();
     fileSlice.forEach(fs -> {
       int idx = 0;
@@ -245,7 +284,14 @@ public class TestFileSystemViewCommand extends CLIFunctionalTestHarness {
           .collect(Collectors.toList()).toString();
       rows.add(row);
     });
+    return rows;
+  }
 
+  /**(
+   * Test case for command 'show fsview latest'.
+   */
+  @Test
+  public void testShowLatestFileSlices() throws IOException {
     Function<Object, String> converterFunction =
         entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString())));
     Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
@@ -267,9 +313,32 @@ public class TestFileSystemViewCommand extends CLIFunctionalTestHarness {
         .addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_BASE_UNSCHEDULED)
         .addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_SCHEDULED)
         .addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_UNSCHEDULED);
-    String expected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, rows);
-    expected = removeNonWordAndStripSpace(expected);
-    String got = removeNonWordAndStripSpace(cr.getResult().toString());
-    assertEquals(expected, got);
+
+    // Test show with partition path '2016/03/15'
+    new TableCommand().connect(partitionedTablePath, null, false, 0, 0, 0);
+    CommandResult partitionedTableCR = shell().executeCommand("show fsview latest --partitionPath " + partitionPath);
+    assertTrue(partitionedTableCR.isSuccess());
+
+    Stream<FileSlice> partitionedFileSlice = partitionedFsView.getLatestFileSlices(partitionPath);
+
+    List<Comparable[]> partitionedRows = fileSlicesToCRList(partitionedFileSlice, partitionPath);
+    String partitionedExpected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, partitionedRows);
+    partitionedExpected = removeNonWordAndStripSpace(partitionedExpected);
+    String partitionedResults = removeNonWordAndStripSpace(partitionedTableCR.getResult().toString());
+    assertEquals(partitionedExpected, partitionedResults);
+
+    // Test show for non-partitioned table
+    new TableCommand().connect(nonpartitionedTablePath, null, false, 0, 0, 0);
+    CommandResult nonpartitionedTableCR = shell().executeCommand("show fsview latest");
+    assertTrue(nonpartitionedTableCR.isSuccess());
+
+    Stream<FileSlice> nonpartitionedFileSlice = nonpartitionedFsView.getLatestFileSlices("");
+
+    List<Comparable[]> nonpartitionedRows = fileSlicesToCRList(nonpartitionedFileSlice, "");
+
+    String nonpartitionedExpected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, nonpartitionedRows);
+    nonpartitionedExpected = removeNonWordAndStripSpace(nonpartitionedExpected);
+    String nonpartitionedResults = removeNonWordAndStripSpace(nonpartitionedTableCR.getResult().toString());
+    assertEquals(nonpartitionedExpected, nonpartitionedResults);
   }
 }
index ddfd4a2fd584dccd1dbb05ea7fd7757b779ef961..3a4f37fdab94d95672f41be17dea5fefc4cf71a7 100644 (file)
   <parent>
     <artifactId>hudi-client</artifactId>
     <groupId>org.apache.hudi</groupId>
-    <version>0.12.0-SNAPSHOT</version>
+    <version>0.13.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>hudi-client-common</artifactId>
-  <version>0.12.0-SNAPSHOT</version>
+  <version>0.13.0-SNAPSHOT</version>
 
   <name>hudi-client-common</name>
   <packaging>jar</packaging>
 
     <!-- Logging -->
     <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-1.2-api</artifactId>
     </dependency>
 
     <!-- Parquet -->
index 6d1059cb98da879b1808b5e657ab8b45ba278f45..568204f65588bc17147b189e4517a6f38f44989c 100644 (file)
@@ -28,8 +28,8 @@ import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClientBuilder;
 import org.apache.hudi.config.HoodieWriteCommitCallbackConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 import java.io.Closeable;
 import java.io.IOException;
index 961965353b7ffe19d9541abe4872685b8f989bcc..48dd21d28ea6aee7832234fa12abc1af5af2d071 100644 (file)
@@ -39,6 +39,7 @@ import org.apache.hudi.client.heartbeat.HeartbeatUtils;
 import org.apache.hudi.client.transaction.TransactionManager;
 import org.apache.hudi.client.utils.TransactionUtils;
 import org.apache.hudi.common.HoodiePendingRollbackInfo;
+import org.apache.hudi.common.config.HoodieCommonConfig;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
@@ -60,6 +61,7 @@ import org.apache.hudi.common.util.CommitUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodieArchivalConfig;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -276,15 +278,21 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
     TableSchemaResolver schemaUtil = new TableSchemaResolver(table.getMetaClient());
     String historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata().orElse("");
     FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(table.getMetaClient());
-    if (!historySchemaStr.isEmpty()) {
-      InternalSchema internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
-          SerDeHelper.parseSchemas(historySchemaStr));
+    if (!historySchemaStr.isEmpty() || Boolean.parseBoolean(config.getString(HoodieCommonConfig.RECONCILE_SCHEMA.key()))) {
+      InternalSchema internalSchema;
       Schema avroSchema = HoodieAvroUtils.createHoodieWriteSchema(new Schema.Parser().parse(config.getSchema()));
-      InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.evolveSchemaFromNewAvroSchema(avroSchema, internalSchema);
+      if (historySchemaStr.isEmpty()) {
+        internalSchema = AvroInternalSchemaConverter.convert(avroSchema);
+        internalSchema.setSchemaId(Long.parseLong(instantTime));
+      } else {
+        internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
+            SerDeHelper.parseSchemas(historySchemaStr));
+      }
+      InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.reconcileSchema(avroSchema, internalSchema);
       if (evolvedSchema.equals(internalSchema)) {
         metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(evolvedSchema));
         //TODO save history schema by metaTable
-        schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr);
+        schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr.isEmpty() ? SerDeHelper.inheritSchemas(evolvedSchema, "") : historySchemaStr);
       } else {
         evolvedSchema.setSchemaId(Long.parseLong(instantTime));
         String newSchemaStr = SerDeHelper.toJson(evolvedSchema);
@@ -484,7 +492,7 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
    * @param writeOperationType
    * @param metaClient
    */
-  protected void preWrite(String instantTime, WriteOperationType writeOperationType,
+  public void preWrite(String instantTime, WriteOperationType writeOperationType,
       HoodieTableMetaClient metaClient) {
     setOperationType(writeOperationType);
     this.lastCompletedTxnAndMetadata = TransactionUtils.getLastCompletedTxnInstantAndMetadata(metaClient);
@@ -714,6 +722,8 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
 
     HoodieTable<T, I, K, O> table = initTable(WriteOperationType.UNKNOWN, Option.empty(), initialMetadataTableIfNecessary);
     SavepointHelpers.validateSavepointPresence(table, savepointTime);
+    ValidationUtils.checkArgument(!config.shouldArchiveBeyondSavepoint(), "Restore is not supported when " + HoodieArchivalConfig.ARCHIVE_BEYOND_SAVEPOINT.key()
+        + " is enabled");
     restoreToInstant(savepointTime, initialMetadataTableIfNecessary);
     SavepointHelpers.validateSavepointRestore(table, savepointTime);
   }
@@ -1543,13 +1553,16 @@ public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K,
         new UpgradeDowngrade(metaClient, config, context, upgradeDowngradeHelper);
 
     if (upgradeDowngrade.needsUpgradeOrDowngrade(HoodieTableVersion.current())) {
+      metaClient = HoodieTableMetaClient.reload(metaClient);
       // Ensure no inflight commits by setting EAGER policy and explicitly cleaning all failed commits
       List<String> instantsToRollback = getInstantsToRollback(metaClient, HoodieFailedWritesCleaningPolicy.EAGER, instantTime);
 
-      Map<String, Option<HoodiePendingRollbackInfo>> pendingRollbacks = getPendingRollbackInfos(metaClient);
-      instantsToRollback.forEach(entry -> pendingRollbacks.putIfAbsent(entry, Option.empty()));
+      if (!instantsToRollback.isEmpty()) {
+        Map<String, Option<HoodiePendingRollbackInfo>> pendingRollbacks = getPendingRollbackInfos(metaClient);
+        instantsToRollback.forEach(entry -> pendingRollbacks.putIfAbsent(entry, Option.empty()));
 
-      rollbackFailedWrites(pendingRollbacks, true);
+        rollbackFailedWrites(pendingRollbacks, true);
+      }
 
       new UpgradeDowngrade(metaClient, config, context, upgradeDowngradeHelper)
           .run(HoodieTableVersion.current(), instantTime.orElse(null));
index b1675dfd888aa17f8572c63bbda2395c80dd53d5..815ef4892e68f39863303a1f7fb3407843cd0154 100644 (file)
@@ -55,6 +55,10 @@ public class HoodieInternalWriteStatus implements Serializable {
     this.random = new Random(RANDOM_SEED);
   }
 
+  public boolean isTrackingSuccessfulWrites() {
+    return trackSuccessRecords;
+  }
+
   public void markSuccess(String recordKey) {
     if (trackSuccessRecords) {
       this.successRecordKeys.add(recordKey);
index c53554d8e04d2177993a92cd82256eadb54f24e3..2992f4abd4c9e5a11324040a40980807fb53454c 100644 (file)
@@ -64,6 +64,7 @@ import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -76,12 +77,14 @@ import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
+import static org.apache.hudi.common.table.timeline.HoodieTimeline.compareTimestamps;
 
 /**
  * Archiver to bound the growth of files under .hoodie meta path.
@@ -409,9 +412,11 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
             .getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION))
             .filterInflights().firstInstant();
 
-    // We cannot have any holes in the commit timeline. We cannot archive any commits which are
-    // made after the first savepoint present.
+    // NOTE: We cannot have any holes in the commit timeline.
+    // We cannot archive any commits which are made after the first savepoint present,
+    // unless HoodieArchivalConfig#ARCHIVE_BEYOND_SAVEPOINT is enabled.
     Option<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
+    Set<String> savepointTimestamps = table.getSavepointTimestamps();
     if (!commitTimeline.empty() && commitTimeline.countInstants() > maxInstantsToKeep) {
       // For Merge-On-Read table, inline or async compaction is enabled
       // We need to make sure that there are enough delta commits in the active timeline
@@ -428,28 +433,33 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
       // Actually do the commits
       Stream<HoodieInstant> instantToArchiveStream = commitTimeline.getInstants()
           .filter(s -> {
-            // if no savepoint present, then don't filter
-            return !(firstSavepoint.isPresent() && HoodieTimeline.compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
+            if (config.shouldArchiveBeyondSavepoint()) {
+              // skip savepoint commits and proceed further
+              return !savepointTimestamps.contains(s.getTimestamp());
+            } else {
+              // if no savepoint present, then don't filter
+              // stop at first savepoint commit
+              return !(firstSavepoint.isPresent() && compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
+            }
           }).filter(s -> {
             // Ensure commits >= oldest pending compaction commit is retained
             return oldestPendingCompactionAndReplaceInstant
-                .map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
+                .map(instant -> compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
                 .orElse(true);
           }).filter(s -> {
             // We need this to ensure that when multiple writers are performing conflict resolution, eligible instants don't
             // get archived, i.e, instants after the oldestInflight are retained on the timeline
             if (config.getFailedWritesCleanPolicy() == HoodieFailedWritesCleaningPolicy.LAZY) {
               return oldestInflightCommitInstant.map(instant ->
-                      HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
+                      compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
                   .orElse(true);
             }
             return true;
           }).filter(s ->
               oldestInstantToRetainForCompaction.map(instantToRetain ->
-                      HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp()))
+                      compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp()))
                   .orElse(true)
           );
-
       return instantToArchiveStream.limit(commitTimeline.countInstants() - minInstantsToKeep);
     } else {
       return Stream.empty();
@@ -479,7 +489,7 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
           instants = Stream.empty();
         } else {
           LOG.info("Limiting archiving of instants to latest compaction on metadata table at " + latestCompactionTime.get());
-          instants = instants.filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN,
+          instants = instants.filter(instant -> compareTimestamps(instant.getTimestamp(), LESSER_THAN,
               latestCompactionTime.get()));
         }
       } catch (Exception e) {
@@ -487,18 +497,29 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
       }
     }
 
-    // If this is a metadata table, do not archive the commits that live in data set
-    // active timeline. This is required by metadata table,
-    // see HoodieTableMetadataUtil#processRollbackMetadata for details.
     if (HoodieTableMetadata.isMetadataTable(config.getBasePath())) {
       HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder()
           .setBasePath(HoodieTableMetadata.getDatasetBasePath(config.getBasePath()))
           .setConf(metaClient.getHadoopConf())
           .build();
-      Option<String> earliestActiveDatasetCommit = dataMetaClient.getActiveTimeline().firstInstant().map(HoodieInstant::getTimestamp);
-      if (earliestActiveDatasetCommit.isPresent()) {
-        instants = instants.filter(instant ->
-            HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, earliestActiveDatasetCommit.get()));
+      Option<HoodieInstant> earliestActiveDatasetCommit = dataMetaClient.getActiveTimeline().firstInstant();
+
+      if (config.shouldArchiveBeyondSavepoint()) {
+        // There are chances that there could be holes in the timeline due to archival and savepoint interplay.
+        // So, the first non-savepoint commit in the data timeline is considered as beginning of the active timeline.
+        Option<HoodieInstant> firstNonSavepointCommit = dataMetaClient.getActiveTimeline().getFirstNonSavepointCommit();
+        if (firstNonSavepointCommit.isPresent()) {
+          String firstNonSavepointCommitTime = firstNonSavepointCommit.get().getTimestamp();
+          instants = instants.filter(instant ->
+              compareTimestamps(instant.getTimestamp(), LESSER_THAN, firstNonSavepointCommitTime));
+        }
+      } else {
+        // Do not archive the commits that live in data set active timeline.
+        // This is required by metadata table, see HoodieTableMetadataUtil#processRollbackMetadata for details.
+        if (earliestActiveDatasetCommit.isPresent()) {
+          instants = instants.filter(instant ->
+              compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, earliestActiveDatasetCommit.get().getTimestamp()));
+        }
       }
     }
 
@@ -589,7 +610,7 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
     }
 
     List<HoodieInstant> instantsToBeDeleted =
-        instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
+        instants.stream().filter(instant1 -> compareTimestamps(instant1.getTimestamp(),
             LESSER_THAN_OR_EQUALS, thresholdInstant.getTimestamp())).collect(Collectors.toList());
 
     for (HoodieInstant deleteInstant : instantsToBeDeleted) {
index e3c1f7f769c9dea04df36212c87fefdc762b5c2f..4d5375894d7e31fe48fda61f9df1c470041b8c69 100644 (file)
@@ -78,8 +78,7 @@ public class EmbeddedTimelineService {
         .serverPort(writeConfig.getEmbeddedTimelineServerPort())
         .numThreads(writeConfig.getEmbeddedTimelineServerThreads())
         .compress(writeConfig.getEmbeddedTimelineServerCompressOutput())
-        .async(writeConfig.getEmbeddedTimelineServerUseAsync())
-        .refreshTimelineBasedOnLatestCommit(writeConfig.isRefreshTimelineServerBasedOnLatestCommit());
+        .async(writeConfig.getEmbeddedTimelineServerUseAsync());
     // Only passing marker-related write configs to timeline server
     // if timeline-server-based markers are used.
     if (writeConfig.getMarkersType() == MarkerType.TIMELINE_SERVER_BASED) {
@@ -118,6 +117,11 @@ public class EmbeddedTimelineService {
         .withRemoteServerHost(hostAddr)
         .withRemoteServerPort(serverPort)
         .withRemoteTimelineClientTimeoutSecs(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientTimeoutSecs())
+        .withRemoteTimelineClientRetry(writeConfig.getClientSpecifiedViewStorageConfig().isRemoteTimelineClientRetryEnabled())
+        .withRemoteTimelineClientMaxRetryNumbers(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientMaxRetryNumbers())
+        .withRemoteTimelineInitialRetryIntervalMs(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineInitialRetryIntervalMs())
+        .withRemoteTimelineClientMaxRetryIntervalMs(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientMaxRetryIntervalMs())
+        .withRemoteTimelineClientRetryExceptions(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientRetryExceptions())
         .build();
   }
 
index aef1fee5e0794a618bf1b2cd36f190cfc4fe85a6..e9329fece69f3202fad54634e223d0d74d468b2b 100644 (file)
@@ -85,6 +85,10 @@ public class TransactionManager implements Serializable {
     }
   }
 
+  public LockManager getLockManager() {
+    return lockManager;
+  }
+
   public Option<HoodieInstant> getLastCompletedTransactionOwner() {
     return lastCompletedTxnOwnerInstant;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java
new file mode 100644 (file)
index 0000000..4135ef9
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.client.transaction.lock;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.config.LockConfiguration;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.lock.LockProvider;
+import org.apache.hudi.common.lock.LockState;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieLockException;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hudi.common.config.LockConfiguration.FILESYSTEM_LOCK_EXPIRE_PROP_KEY;
+import static org.apache.hudi.common.config.LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY;
+
+/**
+ * A FileSystem based lock. This {@link LockProvider} implementation allows to lock table operations
+ * using DFS. Users might need to manually clean the Locker's path if writeClient crash and never run again.
+ * NOTE: This only works for DFS with atomic create/delete operation
+ */
+public class FileSystemBasedLockProvider implements LockProvider<String>, Serializable {
+
+  private static final Logger LOG = LogManager.getLogger(FileSystemBasedLockProvider.class);
+
+  private static final String LOCK_FILE_NAME = "lock";
+
+  private final int lockTimeoutMinutes;
+  private final transient FileSystem fs;
+  private final transient Path lockFile;
+  protected LockConfiguration lockConfiguration;
+
+  public FileSystemBasedLockProvider(final LockConfiguration lockConfiguration, final Configuration configuration) {
+    checkRequiredProps(lockConfiguration);
+    this.lockConfiguration = lockConfiguration;
+    String lockDirectory = lockConfiguration.getConfig().getString(FILESYSTEM_LOCK_PATH_PROP_KEY, null);
+    if (StringUtils.isNullOrEmpty(lockDirectory)) {
+      lockDirectory = lockConfiguration.getConfig().getString(HoodieWriteConfig.BASE_PATH.key())
+            + Path.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME;
+    }
+    this.lockTimeoutMinutes = lockConfiguration.getConfig().getInteger(FILESYSTEM_LOCK_EXPIRE_PROP_KEY);
+    this.lockFile = new Path(lockDirectory + Path.SEPARATOR + LOCK_FILE_NAME);
+    this.fs = FSUtils.getFs(this.lockFile.toString(), configuration);
+  }
+
+  @Override
+  public void close() {
+    synchronized (LOCK_FILE_NAME) {
+      try {
+        fs.delete(this.lockFile, true);
+      } catch (IOException e) {
+        throw new HoodieLockException(generateLogStatement(LockState.FAILED_TO_RELEASE), e);
+      }
+    }
+  }
+
+  @Override
+  public boolean tryLock(long time, TimeUnit unit) {
+    try {
+      synchronized (LOCK_FILE_NAME) {
+        // Check whether lock is already expired, if so try to delete lock file
+        if (fs.exists(this.lockFile)) {
+          if (checkIfExpired()) {
+            fs.delete(this.lockFile, true);
+            LOG.warn("Delete expired lock file: " + this.lockFile);
+          } else {
+            return false;
+          }
+        }
+        acquireLock();
+        return fs.exists(this.lockFile);
+      }
+    } catch (IOException | HoodieIOException e) {
+      LOG.info(generateLogStatement(LockState.FAILED_TO_ACQUIRE), e);
+      return false;
+    }
+  }
+
+  @Override
+  public void unlock() {
+    synchronized (LOCK_FILE_NAME) {
+      try {
+        if (fs.exists(this.lockFile)) {
+          fs.delete(this.lockFile, true);
+        }
+      } catch (IOException io) {
+        throw new HoodieIOException(generateLogStatement(LockState.FAILED_TO_RELEASE), io);
+      }
+    }
+  }
+
+  @Override
+  public String getLock() {
+    return this.lockFile.toString();
+  }
+
+  private boolean checkIfExpired() {
+    if (lockTimeoutMinutes == 0) {
+      return false;
+    }
+    try {
+      long modificationTime = fs.getFileStatus(this.lockFile).getModificationTime();
+      if (System.currentTimeMillis() - modificationTime > lockTimeoutMinutes * 60 * 1000L) {
+        return true;
+      }
+    } catch (IOException | HoodieIOException e) {
+      LOG.error(generateLogStatement(LockState.ALREADY_RELEASED) + " failed to get lockFile's modification time", e);
+    }
+    return false;
+  }
+
+  private void acquireLock() {
+    try {
+      fs.create(this.lockFile, false).close();
+    } catch (IOException e) {
+      throw new HoodieIOException(generateLogStatement(LockState.FAILED_TO_ACQUIRE), e);
+    }
+  }
+
+  protected String generateLogStatement(LockState state) {
+    return StringUtils.join(state.name(), " lock at: ", getLock());
+  }
+
+  private void checkRequiredProps(final LockConfiguration config) {
+    ValidationUtils.checkArgument(config.getConfig().getString(FILESYSTEM_LOCK_PATH_PROP_KEY, null) != null
+          || config.getConfig().getString(HoodieWriteConfig.BASE_PATH.key(), null) != null);
+    ValidationUtils.checkArgument(config.getConfig().getInteger(FILESYSTEM_LOCK_EXPIRE_PROP_KEY) >= 0);
+  }
+}
index ca15c4fdc2a137013fb10c0668a4e58439aa3330..6ebae44fd467c92759aab335d99ca699881e77d4 100644 (file)
@@ -74,6 +74,11 @@ public class LockManager implements Serializable, AutoCloseable {
           if (retryCount >= maxRetries) {
             throw new HoodieLockException("Unable to acquire lock, lock object ", e);
           }
+          try {
+            Thread.sleep(maxWaitTimeInMs);
+          } catch (InterruptedException ex) {
+            // ignore InterruptedException here
+          }
         } finally {
           retryCount++;
         }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieArchivalConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieArchivalConfig.java
new file mode 100644 (file)
index 0000000..3244b42
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.config;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import javax.annotation.concurrent.Immutable;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * Archival related config.
+ */
+@Immutable
+@ConfigClassProperty(name = "Archival Configs",
+    groupName = ConfigGroups.Names.WRITE_CLIENT,
+    description = "Configurations that control archival.")
+public class HoodieArchivalConfig extends HoodieConfig {
+
+  public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
+      .key("hoodie.archive.automatic")
+      .defaultValue("true")
+      .withDocumentation("When enabled, the archival table service is invoked immediately after each commit,"
+          + " to archive commits if we cross a maximum value of commits."
+          + " It's recommended to enable this, to ensure number of active commits is bounded.");
+
+  public static final ConfigProperty<String> ASYNC_ARCHIVE = ConfigProperty
+      .key("hoodie.archive.async")
+      .defaultValue("false")
+      .sinceVersion("0.11.0")
+      .withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
+          + "When turned on runs archiver async with writing, which can speed up overall write performance.");
+
+  public static final ConfigProperty<String> MAX_COMMITS_TO_KEEP = ConfigProperty
+      .key("hoodie.keep.max.commits")
+      .defaultValue("30")
+      .withDocumentation("Archiving service moves older entries from timeline into an archived log after each write, to "
+          + " keep the metadata overhead constant, even as the table size grows."
+          + "This config controls the maximum number of instants to retain in the active timeline. ");
+
+  public static final ConfigProperty<Integer> DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE = ConfigProperty
+      .key("hoodie.archive.delete.parallelism")
+      .defaultValue(100)
+      .withDocumentation("Parallelism for deleting archived hoodie commits.");
+
+  public static final ConfigProperty<String> MIN_COMMITS_TO_KEEP = ConfigProperty
+      .key("hoodie.keep.min.commits")
+      .defaultValue("20")
+      .withDocumentation("Similar to " + MAX_COMMITS_TO_KEEP.key() + ", but controls the minimum number of"
+          + "instants to retain in the active timeline.");
+
+  public static final ConfigProperty<String> COMMITS_ARCHIVAL_BATCH_SIZE = ConfigProperty
+      .key("hoodie.commits.archival.batch")
+      .defaultValue(String.valueOf(10))
+      .withDocumentation("Archiving of instants is batched in best-effort manner, to pack more instants into a single"
+          + " archive log. This config controls such archival batch size.");
+
+  public static final ConfigProperty<Integer> ARCHIVE_MERGE_FILES_BATCH_SIZE = ConfigProperty
+      .key("hoodie.archive.merge.files.batch.size")
+      .defaultValue(10)
+      .withDocumentation("The number of small archive files to be merged at once.");
+
+  public static final ConfigProperty<Long> ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES = ConfigProperty
+      .key("hoodie.archive.merge.small.file.limit.bytes")
+      .defaultValue(20L * 1024 * 1024)
+      .withDocumentation("This config sets the archive file size limit below which an archive file becomes a candidate to be selected as such a small file.");
+
+  public static final ConfigProperty<Boolean> ARCHIVE_MERGE_ENABLE = ConfigProperty
+      .key("hoodie.archive.merge.enable")
+      .defaultValue(false)
+      .withDocumentation("When enable, hoodie will auto merge several small archive files into larger one. It's"
+          + " useful when storage scheme doesn't support append operation.");
+
+  public static final ConfigProperty<Boolean> ARCHIVE_BEYOND_SAVEPOINT = ConfigProperty
+      .key("hoodie.archive.beyond.savepoint")
+      .defaultValue(false)
+      .sinceVersion("0.12.0")
+      .withDocumentation("If enabled, archival will proceed beyond savepoint, skipping savepoint commits. "
+          + "If disabled, archival will stop at the earliest savepoint commit.");
+
+  /**
+   * @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
+   */
+  @Deprecated
+  public static final String MAX_COMMITS_TO_KEEP_PROP = MAX_COMMITS_TO_KEEP.key();
+  /**
+   * @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
+   */
+  @Deprecated
+  public static final String MIN_COMMITS_TO_KEEP_PROP = MIN_COMMITS_TO_KEEP.key();
+  /**
+   * @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
+   */
+  @Deprecated
+  public static final String COMMITS_ARCHIVAL_BATCH_SIZE_PROP = COMMITS_ARCHIVAL_BATCH_SIZE.key();
+  /**
+   * @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
+   */
+  @Deprecated
+  private static final String DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
+  /**
+   * @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
+   */
+  @Deprecated
+  private static final String DEFAULT_MIN_COMMITS_TO_KEEP = MIN_COMMITS_TO_KEEP.defaultValue();
+  /**
+   * @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
+   */
+  @Deprecated
+  private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = COMMITS_ARCHIVAL_BATCH_SIZE.defaultValue();
+
+  private HoodieArchivalConfig() {
+    super();
+  }
+
+  public static HoodieArchivalConfig.Builder newBuilder() {
+    return new HoodieArchivalConfig.Builder();
+  }
+
+  public static class Builder {
+
+    private final HoodieArchivalConfig archivalConfig = new HoodieArchivalConfig();
+
+    public HoodieArchivalConfig.Builder fromFile(File propertiesFile) throws IOException {
+      try (FileReader reader = new FileReader(propertiesFile)) {
+        this.archivalConfig.getProps().load(reader);
+        return this;
+      }
+    }
+
+    public HoodieArchivalConfig.Builder fromProperties(Properties props) {
+      this.archivalConfig.getProps().putAll(props);
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder withAutoArchive(Boolean autoArchive) {
+      archivalConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder withAsyncArchive(Boolean asyncArchive) {
+      archivalConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
+      archivalConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
+      archivalConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder withArchiveMergeFilesBatchSize(int number) {
+      archivalConfig.setValue(ARCHIVE_MERGE_FILES_BATCH_SIZE, String.valueOf(number));
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder withArchiveMergeSmallFileLimit(long size) {
+      archivalConfig.setValue(ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES, String.valueOf(size));
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder withArchiveMergeEnable(boolean enable) {
+      archivalConfig.setValue(ARCHIVE_MERGE_ENABLE, String.valueOf(enable));
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder withArchiveDeleteParallelism(int archiveDeleteParallelism) {
+      archivalConfig.setValue(DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE, String.valueOf(archiveDeleteParallelism));
+      return this;
+    }
+
+    public HoodieArchivalConfig.Builder withCommitsArchivalBatchSize(int batchSize) {
+      archivalConfig.setValue(COMMITS_ARCHIVAL_BATCH_SIZE, String.valueOf(batchSize));
+      return this;
+    }
+
+    public Builder withArchiveBeyondSavepoint(boolean archiveBeyondSavepoint) {
+      archivalConfig.setValue(ARCHIVE_BEYOND_SAVEPOINT, String.valueOf(archiveBeyondSavepoint));
+      return this;
+    }
+
+    public HoodieArchivalConfig build() {
+      archivalConfig.setDefaults(HoodieArchivalConfig.class.getName());
+      return archivalConfig;
+    }
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java
new file mode 100644 (file)
index 0000000..7b665f9
--- /dev/null
+++ b/