GroupBy: Cap dictionary-building selector memory usage. (#12309)
authorGian Merlino <gian@imply.io>
Tue, 8 Mar 2022 21:13:11 +0000 (13:13 -0800)
committerGitHub <noreply@github.com>
Tue, 8 Mar 2022 21:13:11 +0000 (13:13 -0800)
commit875e0696e01c4348fa31c77ec6fa333a324a53d8
tree378c777b981dcfa7c4a5d7339152d127f07b8557
parentbaea3ec61406ce86d187330e264a819d7e9e0bb2
GroupBy: Cap dictionary-building selector memory usage. (#12309)

* GroupBy: Cap dictionary-building selector memory usage.

New context parameter "maxSelectorDictionarySize" controls when the
per-segment processing code should return early and trigger a trip
to the merge buffer.

Includes:

- Vectorized and nonvectorized implementations.
- Adjustments to GroupByQueryRunnerTest to exercise this code in
  the v2SmallDictionary suite. (Both the selector dictionary and
  the merging dictionary will be small in that suite.)
- Tests for the new config parameter.

* Fix issues from tests.

* Add "pre-existing" to dictionary.

* Simplify GroupByColumnSelectorStrategy interface by removing one of the writeToKeyBuffer methods.

* Adjustments from review comments.
34 files changed:
docs/configuration/index.md
docs/querying/groupbyquery.md
processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/DictionaryBuilding.java [new file with mode: 0644]
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayNumericGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingStringGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DoubleGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/FloatGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/GroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/LongGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/NullableNumericGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategy.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DoubleGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/FloatGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/LongGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/NilGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/NullableDoubleGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/NullableFloatGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/NullableLongGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/SingleValueStringGroupByVectorColumnSelector.java
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryConfigTest.java
processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayDoubleGroupByColumnSelectorStrategyTest.java
processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayLongGroupByColumnSelectorStrategyTest.java
processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/column/ArrayStringGroupByColumnSelectorStrategyTest.java
website/.spelling