Skip to content

Commit 3f9b89e

Browse files
committed
assert CometFilterExec present in non-identity transform residual tests
1 parent 560887c commit 3f9b89e

1 file changed

Lines changed: 24 additions & 7 deletions

File tree

spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import java.io.File
2323
import java.nio.file.Files
2424

2525
import org.apache.spark.sql.CometTestBase
26-
import org.apache.spark.sql.comet.CometIcebergNativeScanExec
26+
import org.apache.spark.sql.comet.{CometFilterExec, CometIcebergNativeScanExec}
2727
import org.apache.spark.sql.execution.SparkPlan
2828
import org.apache.spark.sql.types.{StringType, TimestampType}
2929

@@ -66,6 +66,23 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper {
6666
s"Expected exactly 1 CometIcebergNativeScanExec but found ${icebergScans.length}. Plan:\n$cometPlan")
6767
}
6868

69+
/**
70+
* Verifies query correctness, exactly one CometIcebergNativeScanExec, and at least one
71+
* CometFilterExec in the plan. Used for non-identity transform residual tests where
72+
* iceberg-rust skips row-group filtering and CometFilter applies the predicate post-scan.
73+
*/
74+
private def checkIcebergNativeScanWithFilter(query: String): Unit = {
75+
val (_, cometPlan) = checkSparkAnswer(query)
76+
val icebergScans = collectIcebergNativeScans(cometPlan)
77+
assert(
78+
icebergScans.length == 1,
79+
s"Expected exactly 1 CometIcebergNativeScanExec but found ${icebergScans.length}. Plan:\n$cometPlan")
80+
val filters = collect(cometPlan) { case f: CometFilterExec => f }
81+
assert(
82+
filters.nonEmpty,
83+
s"Expected CometFilterExec for post-scan filtering but found none. Plan:\n$cometPlan")
84+
}
85+
6986
test("create and query simple Iceberg table with Hadoop catalog") {
7087
assume(icebergAvailable, "Iceberg not available in classpath")
7188

@@ -2319,7 +2336,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper {
23192336
// This filter creates a residual with truncate transform
23202337
// The partition can narrow down to 'alpha' prefix, but exact match
23212338
// requires post-scan filtering
2322-
checkIcebergNativeScan(
2339+
checkIcebergNativeScanWithFilter(
23232340
"SELECT * FROM test_cat.db.truncate_residual_test WHERE name = 'alpha_2' ORDER BY id")
23242341

23252342
// Verify correct results
@@ -2366,7 +2383,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper {
23662383
// This filter creates a residual with bucket transform
23672384
// The partition pruning uses bucket hash, but exact id match
23682385
// requires post-scan filtering
2369-
checkIcebergNativeScan(
2386+
checkIcebergNativeScanWithFilter(
23702387
"SELECT * FROM test_cat.db.bucket_residual_test WHERE id = 42 ORDER BY id")
23712388

23722389
// Verify correct results
@@ -2416,7 +2433,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper {
24162433
// This filter creates a residual with year transform
24172434
// Partition pruning narrows to 2023, but exact date match
24182435
// requires post-scan filtering
2419-
checkIcebergNativeScan(
2436+
checkIcebergNativeScanWithFilter(
24202437
"SELECT * FROM test_cat.db.year_residual_test WHERE event_date = DATE '2023-06-20'")
24212438

24222439
// Verify correct results
@@ -2471,7 +2488,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper {
24712488
// This filter creates a residual with month transform
24722489
// Partition pruning narrows to June 2023, but exact date match
24732490
// requires post-scan filtering
2474-
checkIcebergNativeScan(
2491+
checkIcebergNativeScanWithFilter(
24752492
"SELECT * FROM test_cat.db.month_residual_test WHERE event_date = DATE '2023-06-15'")
24762493

24772494
// Verify correct results
@@ -2526,7 +2543,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper {
25262543
// This filter creates a residual with day transform
25272544
// Partition pruning narrows to June 15, but exact timestamp match
25282545
// requires post-scan filtering
2529-
checkIcebergNativeScan(
2546+
checkIcebergNativeScanWithFilter(
25302547
"SELECT * FROM test_cat.db.day_residual_test WHERE event_time = TIMESTAMP '2023-06-15 14:30:00'")
25312548

25322549
// Verify correct results
@@ -2580,7 +2597,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper {
25802597
// This filter creates a residual with hour transform
25812598
// Partition pruning narrows to hour 14 (2pm), but exact timestamp
25822599
// with seconds requires post-scan filtering
2583-
checkIcebergNativeScan(
2600+
checkIcebergNativeScanWithFilter(
25842601
"SELECT * FROM test_cat.db.hour_residual_test WHERE event_time = TIMESTAMP '2023-06-15 14:30:45'")
25852602

25862603
// Verify correct results

0 commit comments

Comments
 (0)