From 55dd006a86ad77354370dda690519b9d4657c239 Mon Sep 17 00:00:00 2001
From: Indhumathi27 <indhumathim27@gmail.com>
Date: Fri, 3 Oct 2025 18:58:27 +0530
Subject: [PATCH] HIVE-29197: Disable vectorization for multi-column
 COUNT(DISTINCT)

---
 .../ql/optimizer/physical/Vectorizer.java     |   6 +
 .../vector_count_distinct_multiarg.q          |  35 ++
 .../clientpositive/llap/vector_count.q.out    |   2 +-
 .../llap/vector_count_distinct_multiarg.q.out | 585 ++++++++++++++++++
 4 files changed, 627 insertions(+), 1 deletion(-)
 create mode 100644 ql/src/test/queries/clientpositive/vector_count_distinct_multiarg.q
 create mode 100644 ql/src/test/results/clientpositive/llap/vector_count_distinct_multiarg.q.out

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index b8acb2661fa6..5dc2fa47af24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -4501,6 +4501,12 @@ public static ImmutablePair<VectorAggregationDesc,String> getVectorAggregationDe
       vecAggrClasses = new Class[] {
           VectorUDAFComputeDsKllSketchDouble.class, VectorUDAFComputeDsKllSketchFinal.class
       };
+    } else if (VECTORIZABLE_UDAF.COUNT.toString().equalsIgnoreCase(aggregationName) && parameterList.size() > 1) {
+      // Handle unsupported multi-column COUNT DISTINCT
+      String issue = "Unsupported COUNT DISTINCT with multiple columns: "
+              + aggregationName + "(" + parameterList + "). "
+              + "Hive only supports COUNT(DISTINCT col) in vectorized execution. ";
+      return new ImmutablePair<>(null, issue);
     } else {
       VectorizedUDAFs annotation =
           AnnotationUtils.getAnnotation(evaluator.getClass(), VectorizedUDAFs.class);
diff --git a/ql/src/test/queries/clientpositive/vector_count_distinct_multiarg.q b/ql/src/test/queries/clientpositive/vector_count_distinct_multiarg.q
new file mode 100644
index 000000000000..e701b5a60504
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_count_distinct_multiarg.q
@@ -0,0 +1,35 @@
+drop table if exists test_vector;
+create external table test_vector(id string, pid bigint) PARTITIONED BY (full_date int);
+insert into test_vector (pid, full_date, id) values (1, '20240305', '6150');
+
+--------------------------------------------------------------------------------
+-- 1. Basic COUNT cases (valid in vectorization)
+--------------------------------------------------------------------------------
+SELECT COUNT(pid) AS cnt_col, COUNT(*) AS cnt_star, COUNT(20240305) AS cnt_const, COUNT(DISTINCT pid) as cnt_distinct, COUNT(1) AS CNT
+FROM test_vector WHERE full_date=20240305;
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(pid) AS cnt_col, COUNT(*) AS cnt_star, COUNT(20240305) AS cnt_const,COUNT(DISTINCT pid) as cnt_distinct, COUNT(1) AS CNT
+FROM test_vector WHERE full_date=20240305;
+
+--------------------------------------------------------------------------------
+-- 2. COUNT with DISTINCT column + constant (INVALID in vectorization)
+--------------------------------------------------------------------------------
+SELECT COUNT(DISTINCT pid, 20240305) AS CNT FROM test_vector WHERE full_date=20240305;
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, 20240305) AS CNT FROM test_vector WHERE full_date=20240305;
+
+--------------------------------------------------------------------------------
+-- 3. COUNT(DISTINCT pid, full_date) (multi-col distinct → FAIL)
+--------------------------------------------------------------------------------
+SELECT COUNT(DISTINCT pid, full_date) AS CNT FROM test_vector WHERE full_date=20240305;
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, full_date) AS CNT FROM test_vector WHERE full_date=20240305;
+
+--------------------------------------------------------------------------------
+-- 4. COUNT(DISTINCT pid, full_date, id) (multi-col distinct → FAIL)
+--------------------------------------------------------------------------------
+SELECT COUNT(DISTINCT pid, full_date, id) AS CNT FROM test_vector WHERE full_date=20240305;
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, full_date, id) AS CNT FROM test_vector WHERE full_date=20240305;
+
+DROP TABLE test_vector;
diff --git a/ql/src/test/results/clientpositive/llap/vector_count.q.out b/ql/src/test/results/clientpositive/llap/vector_count.q.out
index bcb5b7ca792c..c9d5ec5145be 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count.q.out
@@ -212,7 +212,7 @@ STAGE PLANS:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported unless all the extra parameters are constants count([Column[a], Column[b]])
+                notVectorizedReason: GROUPBY operator: Unsupported COUNT DISTINCT with multiple columns: count([Column[a], Column[b]]). Hive only supports COUNT(DISTINCT col) in vectorized execution. 
                 vectorized: false
         Reducer 2 
             Execution mode: llap
diff --git a/ql/src/test/results/clientpositive/llap/vector_count_distinct_multiarg.q.out b/ql/src/test/results/clientpositive/llap/vector_count_distinct_multiarg.q.out
new file mode 100644
index 000000000000..36978c9791da
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct_multiarg.q.out
@@ -0,0 +1,585 @@
+PREHOOK: query: drop table if exists test_vector
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists test_vector
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create external table test_vector(id string, pid bigint) PARTITIONED BY (full_date int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_vector
+POSTHOOK: query: create external table test_vector(id string, pid bigint) PARTITIONED BY (full_date int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_vector
+PREHOOK: query: insert into test_vector (pid, full_date, id) values (1, '20240305', '6150')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_vector
+POSTHOOK: query: insert into test_vector (pid, full_date, id) values (1, '20240305', '6150')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_vector
+POSTHOOK: Output: default@test_vector@full_date=20240305
+POSTHOOK: Lineage: test_vector PARTITION(full_date=20240305).id SCRIPT []
+POSTHOOK: Lineage: test_vector PARTITION(full_date=20240305).pid SCRIPT []
+PREHOOK: query: SELECT COUNT(pid) AS cnt_col, COUNT(*) AS cnt_star, COUNT(20240305) AS cnt_const, COUNT(DISTINCT pid) as cnt_distinct, COUNT(1) AS CNT
+FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(pid) AS cnt_col, COUNT(*) AS cnt_star, COUNT(20240305) AS cnt_const, COUNT(DISTINCT pid) as cnt_distinct, COUNT(1) AS CNT
+FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+1	1	1	1	1
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(pid) AS cnt_col, COUNT(*) AS cnt_star, COUNT(20240305) AS cnt_const,COUNT(DISTINCT pid) as cnt_distinct, COUNT(1) AS CNT
+FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(pid) AS cnt_col, COUNT(*) AS cnt_star, COUNT(20240305) AS cnt_const,COUNT(DISTINCT pid) as cnt_distinct, COUNT(1) AS CNT
+FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test_vector
+                  filterExpr: (full_date = 20240305) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: pid (type: bigint)
+                    outputColumnNames: pid
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [1]
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(pid), count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFCount(col 1:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 1:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0, 1]
+                      keys: pid (type: bigint)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: bigint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint), _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: PARTIAL2
+                    keyExpressions: col 0:bigint
+                    native: false
+                    vectorProcessingMode: STREAMING
+                    projectedOutputColumnNums: [0, 1]
+                keys: KEY._col0 (type: bigint)
+                mode: partial2
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count(_col1), count(_col2), count(_col0)
+                  Group By Vectorization:
+                      aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCount(col 0:bigint) -> bigint
+                      className: VectorGroupByOperator
+                      groupByMode: PARTIAL2
+                      native: false
+                      vectorProcessingMode: STREAMING
+                      projectedOutputColumnNums: [0, 1, 2]
+                  mode: partial2
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    null sort order: 
+                    sort order: 
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkEmptyKeyOperator
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                    Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0, 1, 2]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: bigint), _col1 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col1 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 1, 2, 1]
+                  Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(DISTINCT pid, 20240305) AS CNT FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(DISTINCT pid, 20240305) AS CNT FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, 20240305) AS CNT FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, 20240305) AS CNT FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test_vector
+                  filterExpr: (full_date = 20240305) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: pid (type: bigint)
+                    outputColumnNames: pid
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [1]
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 1:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: pid (type: bigint)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: bigint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:bigint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count(_col0)
+                  Group By Vectorization:
+                      aggregators: VectorUDAFCount(col 0:bigint) -> bigint
+                      className: VectorGroupByOperator
+                      groupByMode: HASH
+                      native: false
+                      vectorProcessingMode: HASH
+                      projectedOutputColumnNums: [0]
+                  minReductionHashAggr: 0.4
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    null sort order: 
+                    sort order: 
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkEmptyKeyOperator
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(DISTINCT pid, full_date) AS CNT FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(DISTINCT pid, full_date) AS CNT FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, full_date) AS CNT FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, full_date) AS CNT FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test_vector
+                  filterExpr: (full_date = 20240305) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: pid (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(DISTINCT _col0, 20240305)
+                      keys: _col0 (type: bigint), 20240305 (type: int)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint), _col1 (type: int)
+                        null sort order: zz
+                        sort order: ++
+                        Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                notVectorizedReason: GROUPBY operator: Unsupported COUNT DISTINCT with multiple columns: count([Column[_col0], Const int 20240305]). Hive only supports COUNT(DISTINCT col) in vectorized execution. 
+                vectorized: false
+        Reducer 2 
+            Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col0:0._col0, KEY._col0:0._col1)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(DISTINCT pid, full_date, id) AS CNT FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(DISTINCT pid, full_date, id) AS CNT FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, full_date, id) AS CNT FROM test_vector WHERE full_date=20240305
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_vector
+PREHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT COUNT(DISTINCT pid, full_date, id) AS CNT FROM test_vector WHERE full_date=20240305
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Input: default@test_vector@full_date=20240305
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test_vector
+                  filterExpr: (full_date = 20240305) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: pid (type: bigint), id (type: string)
+                    outputColumnNames: _col0, _col2
+                    Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(DISTINCT _col0, 20240305, _col2)
+                      keys: _col0 (type: bigint), 20240305 (type: int), _col2 (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: string)
+                        null sort order: zzz
+                        sort order: +++
+                        Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                notVectorizedReason: GROUPBY operator: Unsupported COUNT DISTINCT with multiple columns: count([Column[_col0], Const int 20240305, Column[_col2]]). Hive only supports COUNT(DISTINCT col) in vectorized execution. 
+                vectorized: false
+        Reducer 2 
+            Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col0:0._col0, KEY._col0:0._col1, KEY._col0:0._col2)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: DROP TABLE test_vector
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_vector
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_vector
+POSTHOOK: query: DROP TABLE test_vector
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_vector
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_vector