diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 47a6e91421..a41db5f280 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -313,7 +313,7 @@ object CometArrayRepeat extends CometExpressionSerde[ArrayRepeat] { object CometArrayCompact extends CometExpressionSerde[Expression] { - override def getSupportLevel(expr: Expression): SupportLevel = Incompatible(None) + override def getSupportLevel(expr: Expression): SupportLevel = Compatible() override def convert( expr: Expression, diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 2c5cebd166..bf8406c428 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -113,6 +113,12 @@ trait CometExprShim extends CommonStringExprs { // val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) // optExprWithInfo(optExpr, wb, wb.children: _*) + // KnownNotContainsNull is a TaggingExpression added in Spark 4.0 that only + // changes schema metadata (containsNull = false). It has no runtime effect, + // so we pass through to the child expression. + case k: KnownNotContainsNull => + exprToProtoInternal(k.child, inputs, binding) + case _ => None } } diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql b/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql index 9b834a4dbd..83cd730978 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql @@ -15,7 +15,6 @@ -- specific language governing permissions and limitations -- under the License. --- ConfigMatrix: parquet.enable.dictionary=false,true statement CREATE TABLE test_array_compact(arr array) USING parquet @@ -23,9 +22,28 @@ CREATE TABLE test_array_compact(arr array) USING parquet statement INSERT INTO test_array_compact VALUES (array(1, NULL, 2, NULL, 3)), (array()), (NULL), (array(NULL, NULL)), (array(1, 2, 3)) -query spark_answer_only +-- column argument +query SELECT array_compact(arr) FROM test_array_compact -- literal arguments -query spark_answer_only +query SELECT array_compact(array(1, NULL, 2, NULL, 3)) + +-- string element type +statement +CREATE TABLE test_array_compact_str(arr array) USING parquet + +statement +INSERT INTO test_array_compact_str VALUES (array('a', NULL, 'b', NULL, 'c')), (array()), (NULL), (array(NULL, NULL)), (array('', NULL, '', NULL)) + +query +SELECT array_compact(arr) FROM test_array_compact_str + +-- double element type +query +SELECT array_compact(array(1.0, NULL, 2.0, NULL, 3.0)) + +-- nested array type (removes null arrays from outer, preserves null elements in inner) +query +SELECT array_compact(array(array(1, NULL, 3), NULL, array(NULL, 2, 3)))