[GOBBLIN-1533] Add completeness watermark to iceberg tables (#3385)
authorvbohra <vbohra@linkedin.com>
Fri, 10 Sep 2021 19:02:42 +0000 (12:02 -0700)
committerGitHub <noreply@github.com>
Fri, 10 Sep 2021 19:02:42 +0000 (12:02 -0700)
commit47707df00a6884ada5974a5f5203408ce1efb890
tree5c9b7879fec1a3b31e34bf5e33663466e0d720c8
parent15ded96c99ab6219682ca3cf7e867ef6290a1b2b
[GOBBLIN-1533] Add completeness watermark to iceberg tables (#3385)

* [GOBBLIN-1533] Add completeness watermark to iceberg tables

* updated hive metadata writer test

* Add apache header

* Added correct default partition type

* Fixed kafka audit url and logic to get topic name for iceberg table

* Changes based on review

* Make audit check granularity configurable

* Added additional optimization to check for current hour during completion watermark calculation

* optimization to skip audit check if its upto date by checking the seconds from epoch between current watermark and now

* fixed test case

* Replace hours from epoch with duration

* Moved logging

* Update partition spec with late field even when schema has been updated
23 files changed:
gobblin-compaction/src/main/java/org/apache/gobblin/compaction/audit/AuditCountClient.java
gobblin-compaction/src/main/java/org/apache/gobblin/compaction/audit/AuditCountClientFactory.java
gobblin-compaction/src/main/java/org/apache/gobblin/compaction/audit/KafkaAuditCountHttpClient.java
gobblin-compaction/src/main/java/org/apache/gobblin/compaction/audit/KafkaAuditCountHttpClientFactory.java
gobblin-compaction/src/main/java/org/apache/gobblin/compaction/verify/CompactionAuditCountVerifier.java
gobblin-completeness/build.gradle [new file with mode: 0644]
gobblin-completeness/src/main/java/org/apache/gobblin/completeness/audit/AuditCountClient.java [new file with mode: 0644]
gobblin-completeness/src/main/java/org/apache/gobblin/completeness/audit/AuditCountClientFactory.java [new file with mode: 0644]
gobblin-completeness/src/main/java/org/apache/gobblin/completeness/audit/AuditCountHttpClient.java [new file with mode: 0644]
gobblin-completeness/src/main/java/org/apache/gobblin/completeness/audit/AuditCountHttpClientFactory.java [new file with mode: 0644]
gobblin-completeness/src/main/java/org/apache/gobblin/completeness/verifier/KafkaAuditCountVerifier.java [new file with mode: 0644]
gobblin-completeness/src/test/java/org/apache/gobblin/completeness/verifier/KafkaAuditCountVerifierTest.java [new file with mode: 0644]
gobblin-completeness/src/testFixtures/java/org/apache/gobblin/completeness/audit/TestAuditClient.java [new file with mode: 0644]
gobblin-completeness/src/testFixtures/java/org/apache/gobblin/completeness/audit/TestAuditClientFactory.java [new file with mode: 0644]
gobblin-iceberg/build.gradle
gobblin-iceberg/src/main/java/org/apache/gobblin/iceberg/writer/IcebergMetadataWriter.java
gobblin-iceberg/src/main/java/org/apache/gobblin/iceberg/writer/IcebergMetadataWriterConfigKeys.java [new file with mode: 0644]
gobblin-iceberg/src/test/java/org/apache/gobblin/iceberg/writer/HiveMetadataWriterTest.java
gobblin-iceberg/src/test/java/org/apache/gobblin/iceberg/writer/IcebergMetadataWriterTest.java
gobblin-utility/src/main/java/org/apache/gobblin/time/TimeIterator.java [moved from gobblin-data-management/src/main/java/org/apache/gobblin/time/TimeIterator.java with 69% similarity]
gobblin-utility/src/test/java/org/apache/gobblin/time/TimeIteratorTest.java [moved from gobblin-data-management/src/test/java/org/apache/gobblin/time/TimeIteratorTest.java with 100% similarity]
gradle/scripts/defaultBuildProperties.gradle
settings.gradle