From 3327dcc79f3b84260c78a2f06a418252bcd349e6 Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Fri, 24 Oct 2025 21:05:42 +0800
Subject: [PATCH 1/8] [Bug Fix] Support casting lightweight float formats to
 complex types

---
 .../phi/core/framework/data_type_transform.cc |  72 ++++++++++++
 test/legacy_test/test_complex_cast.py         | 110 ++++++++++++++++++
 2 files changed, 182 insertions(+)
diff --git a/paddle/phi/core/framework/data_type_transform.cc b/paddle/phi/core/framework/data_type_transform.cc
index 6ed397d85d378e..d05d2a2a72c65f 100644
--- a/paddle/phi/core/framework/data_type_transform.cc
+++ b/paddle/phi/core/framework/data_type_transform.cc
@@ -34,6 +34,78 @@ struct CastDataTypeFunctor {
   }
 };
 
+template <>
+struct CastDataTypeFunctor<::phi::dtype::float8_e5m2,
+                           ::phi::dtype::complex<float>> {
+  HOSTDEVICE inline ::phi::dtype::complex<float> operator()(
+      ::phi::dtype::float8_e5m2 in) const {
+    return ::phi::dtype::complex<float>(static_cast<float>(in));
+  }
+};
+
+template <>
+struct CastDataTypeFunctor<::phi::dtype::float8_e5m2,
+                           ::phi::dtype::complex<double>> {
+  HOSTDEVICE inline ::phi::dtype::complex<double> operator()(
+      ::phi::dtype::float8_e5m2 in) const {
+    return ::phi::dtype::complex<double>(static_cast<double>(in));
+  }
+};
+
+template <>
+struct CastDataTypeFunctor<::phi::dtype::float8_e4m3fn,
+                           ::phi::dtype::complex<float>> {
+  HOSTDEVICE inline ::phi::dtype::complex<float> operator()(
+      ::phi::dtype::float8_e4m3fn in) const {
+    return ::phi::dtype::complex<float>(static_cast<float>(in));
+  }
+};
+
+template <>
+struct CastDataTypeFunctor<::phi::dtype::float8_e4m3fn,
+                           ::phi::dtype::complex<double>> {
+  HOSTDEVICE inline ::phi::dtype::complex<double> operator()(
+      ::phi::dtype::float8_e4m3fn in) const {
+    return ::phi::dtype::complex<double>(static_cast<double>(in));
+  }
+};
+
+template <>
+struct CastDataTypeFunctor<::phi::dtype::bfloat16,
+                           ::phi::dtype::complex<float>> {
+  HOSTDEVICE inline ::phi::dtype::complex<float> operator()(
+      ::phi::dtype::bfloat16 in) const {
+    return ::phi::dtype::complex<float>(static_cast<float>(in));
+  }
+};
+
+template <>
+struct CastDataTypeFunctor<::phi::dtype::bfloat16,
+                           ::phi::dtype::complex<double>> {
+  HOSTDEVICE inline ::phi::dtype::complex<double> operator()(
+      ::phi::dtype::bfloat16 in) const {
+    return ::phi::dtype::complex<double>(static_cast<double>(in));
+  }
+};
+
+template <>
+struct CastDataTypeFunctor<::phi::dtype::float16,
+                           ::phi::dtype::complex<float>> {
+  HOSTDEVICE inline ::phi::dtype::complex<float> operator()(
+      ::phi::dtype::float16 in) const {
+    return ::phi::dtype::complex<float>(static_cast<float>(in));
+  }
+};
+
+template <>
+struct CastDataTypeFunctor<::phi::dtype::float16,
+                           ::phi::dtype::complex<double>> {
+  HOSTDEVICE inline ::phi::dtype::complex<double> operator()(
+      ::phi::dtype::float16 in) const {
+    return ::phi::dtype::complex<double>(static_cast<double>(in));
+  }
+};
+
 #if defined(PADDLE_WITH_XPU)
 
 template <typename InType, typename OutType>
diff --git a/test/legacy_test/test_complex_cast.py b/test/legacy_test/test_complex_cast.py
index 348bd8a332f5ef..ba229c36b70b43 100644
--- a/test/legacy_test/test_complex_cast.py
+++ b/test/legacy_test/test_complex_cast.py
@@ -79,6 +79,116 @@ def test_complex64_complex128(self):
             c_128.cast('complex128').numpy(), c_64.numpy(), rtol=1e-05
         )
 
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "float16/bfloat16/float8 test runs only on CUDA",
+    )
+    def test_float16_bfloat16_to_complex(self):
+        # Test float16 to complex64/complex128
+        r_fp16 = np.random.random(size=[10, 10]).astype('float16')
+        r_fp16_t = paddle.to_tensor(r_fp16, dtype='float16')
+
+        self.assertEqual(r_fp16_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(r_fp16_t.cast('complex128').dtype, paddle.complex128)
+
+        np.testing.assert_allclose(
+            r_fp16_t.cast('complex64').real().numpy(),
+            r_fp16.astype('float32'),
+            rtol=1e-03,
+        )
+        np.testing.assert_allclose(
+            r_fp16_t.cast('complex128').real().numpy(),
+            r_fp16.astype('float64'),
+            rtol=1e-03,
+        )
+
+        # Test bfloat16 to complex64/complex128
+        r_bf16 = np.random.random(size=[10, 10]).astype('float32')
+        r_bf16_t = paddle.to_tensor(r_bf16, dtype='bfloat16')
+
+        self.assertEqual(r_bf16_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(r_bf16_t.cast('complex128').dtype, paddle.complex128)
+
+        np.testing.assert_allclose(
+            r_bf16_t.cast('complex64').real().numpy(),
+            r_bf16_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_bf16_t.cast('complex128').real().numpy(),
+            r_bf16_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "float8 test runs only on CUDA",
+    )
+    def test_float8_to_complex(self):
+        # Test float8_e4m3fn to complex64/complex128
+        r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
+        r_fp32_t = paddle.to_tensor(r_fp32)
+        r_fp8_e4m3fn_t = r_fp32_t.astype('float8_e4m3fn')
+
+        self.assertEqual(
+            r_fp8_e4m3fn_t.cast('complex64').dtype, paddle.complex64
+        )
+        self.assertEqual(
+            r_fp8_e4m3fn_t.cast('complex128').dtype, paddle.complex128
+        )
+
+        # Verify the real part matches the float32 version
+        np.testing.assert_allclose(
+            r_fp8_e4m3fn_t.cast('complex64').real().numpy(),
+            r_fp8_e4m3fn_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_fp8_e4m3fn_t.cast('complex128').real().numpy(),
+            r_fp8_e4m3fn_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_fp8_e4m3fn_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_fp8_e4m3fn_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
+        # Test float8_e5m2 to complex64/complex128
+        r_fp8_e5m2_t = r_fp32_t.astype('float8_e5m2')
+
+        self.assertEqual(r_fp8_e5m2_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(
+            r_fp8_e5m2_t.cast('complex128').dtype, paddle.complex128
+        )
+
+        # Verify the real part matches the float32 version
+        np.testing.assert_allclose(
+            r_fp8_e5m2_t.cast('complex64').real().numpy(),
+            r_fp8_e5m2_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_fp8_e5m2_t.cast('complex128').real().numpy(),
+            r_fp8_e5m2_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_fp8_e5m2_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_fp8_e5m2_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
 
 if __name__ == '__main__':
     unittest.main()

From 304a0a844e9fb01385d9819e748e0618b320c1ce Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Sat, 25 Oct 2025 13:54:24 +0800
Subject: [PATCH 2/8] test: add test description comment to trigger coverage
 check

---
 test/legacy_test/test_complex_cast.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/legacy_test/test_complex_cast.py b/test/legacy_test/test_complex_cast.py
index ba229c36b70b43..8163a17d1c2320 100644
--- a/test/legacy_test/test_complex_cast.py
+++ b/test/legacy_test/test_complex_cast.py
@@ -18,6 +18,9 @@
 
 import paddle
 
+# This test file covers casting operations between different data types,
+# including lightweight float formats (float8, float16, bfloat16) and complex types.
+
 
 class TestComplexCastOp(unittest.TestCase):
     def test_complex_to_real(self):

From 826c965e503c3e82a1ffea1383fb4023ae49674c Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Sat, 25 Oct 2025 20:58:23 +0800
Subject: [PATCH 3/8] Add new test file for lightweight float to complex
 casting

---
 test/legacy_test/test_complex_cast.py         | 113 -----------
 .../test_lightweight_float_to_complex.py      | 175 ++++++++++++++++++
 2 files changed, 175 insertions(+), 113 deletions(-)
 create mode 100644 test/legacy_test/test_lightweight_float_to_complex.py

diff --git a/test/legacy_test/test_complex_cast.py b/test/legacy_test/test_complex_cast.py
index 8163a17d1c2320..348bd8a332f5ef 100644
--- a/test/legacy_test/test_complex_cast.py
+++ b/test/legacy_test/test_complex_cast.py
@@ -18,9 +18,6 @@
 
 import paddle
 
-# This test file covers casting operations between different data types,
-# including lightweight float formats (float8, float16, bfloat16) and complex types.
-
 
 class TestComplexCastOp(unittest.TestCase):
     def test_complex_to_real(self):
@@ -82,116 +79,6 @@ def test_complex64_complex128(self):
             c_128.cast('complex128').numpy(), c_64.numpy(), rtol=1e-05
         )
 
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "float16/bfloat16/float8 test runs only on CUDA",
-    )
-    def test_float16_bfloat16_to_complex(self):
-        # Test float16 to complex64/complex128
-        r_fp16 = np.random.random(size=[10, 10]).astype('float16')
-        r_fp16_t = paddle.to_tensor(r_fp16, dtype='float16')
-
-        self.assertEqual(r_fp16_t.cast('complex64').dtype, paddle.complex64)
-        self.assertEqual(r_fp16_t.cast('complex128').dtype, paddle.complex128)
-
-        np.testing.assert_allclose(
-            r_fp16_t.cast('complex64').real().numpy(),
-            r_fp16.astype('float32'),
-            rtol=1e-03,
-        )
-        np.testing.assert_allclose(
-            r_fp16_t.cast('complex128').real().numpy(),
-            r_fp16.astype('float64'),
-            rtol=1e-03,
-        )
-
-        # Test bfloat16 to complex64/complex128
-        r_bf16 = np.random.random(size=[10, 10]).astype('float32')
-        r_bf16_t = paddle.to_tensor(r_bf16, dtype='bfloat16')
-
-        self.assertEqual(r_bf16_t.cast('complex64').dtype, paddle.complex64)
-        self.assertEqual(r_bf16_t.cast('complex128').dtype, paddle.complex128)
-
-        np.testing.assert_allclose(
-            r_bf16_t.cast('complex64').real().numpy(),
-            r_bf16_t.cast('float32').numpy(),
-            rtol=1e-02,
-        )
-        np.testing.assert_allclose(
-            r_bf16_t.cast('complex128').real().numpy(),
-            r_bf16_t.cast('float64').numpy(),
-            rtol=1e-02,
-        )
-
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "float8 test runs only on CUDA",
-    )
-    def test_float8_to_complex(self):
-        # Test float8_e4m3fn to complex64/complex128
-        r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
-        r_fp32_t = paddle.to_tensor(r_fp32)
-        r_fp8_e4m3fn_t = r_fp32_t.astype('float8_e4m3fn')
-
-        self.assertEqual(
-            r_fp8_e4m3fn_t.cast('complex64').dtype, paddle.complex64
-        )
-        self.assertEqual(
-            r_fp8_e4m3fn_t.cast('complex128').dtype, paddle.complex128
-        )
-
-        # Verify the real part matches the float32 version
-        np.testing.assert_allclose(
-            r_fp8_e4m3fn_t.cast('complex64').real().numpy(),
-            r_fp8_e4m3fn_t.cast('float32').numpy(),
-            rtol=1e-02,
-        )
-        np.testing.assert_allclose(
-            r_fp8_e4m3fn_t.cast('complex128').real().numpy(),
-            r_fp8_e4m3fn_t.cast('float64').numpy(),
-            rtol=1e-02,
-        )
-
-        # Verify the imaginary part is zero
-        np.testing.assert_array_equal(
-            r_fp8_e4m3fn_t.cast('complex64').imag().numpy(),
-            np.zeros([10, 10], dtype='float32'),
-        )
-        np.testing.assert_array_equal(
-            r_fp8_e4m3fn_t.cast('complex128').imag().numpy(),
-            np.zeros([10, 10], dtype='float64'),
-        )
-
-        # Test float8_e5m2 to complex64/complex128
-        r_fp8_e5m2_t = r_fp32_t.astype('float8_e5m2')
-
-        self.assertEqual(r_fp8_e5m2_t.cast('complex64').dtype, paddle.complex64)
-        self.assertEqual(
-            r_fp8_e5m2_t.cast('complex128').dtype, paddle.complex128
-        )
-
-        # Verify the real part matches the float32 version
-        np.testing.assert_allclose(
-            r_fp8_e5m2_t.cast('complex64').real().numpy(),
-            r_fp8_e5m2_t.cast('float32').numpy(),
-            rtol=1e-02,
-        )
-        np.testing.assert_allclose(
-            r_fp8_e5m2_t.cast('complex128').real().numpy(),
-            r_fp8_e5m2_t.cast('float64').numpy(),
-            rtol=1e-02,
-        )
-
-        # Verify the imaginary part is zero
-        np.testing.assert_array_equal(
-            r_fp8_e5m2_t.cast('complex64').imag().numpy(),
-            np.zeros([10, 10], dtype='float32'),
-        )
-        np.testing.assert_array_equal(
-            r_fp8_e5m2_t.cast('complex128').imag().numpy(),
-            np.zeros([10, 10], dtype='float64'),
-        )
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/legacy_test/test_lightweight_float_to_complex.py b/test/legacy_test/test_lightweight_float_to_complex.py
new file mode 100644
index 00000000000000..31e3e4f7027095
--- /dev/null
+++ b/test/legacy_test/test_lightweight_float_to_complex.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestLightweightFloatToComplex(unittest.TestCase):
+    """Test casting from lightweight float formats (float8, float16, bfloat16) to complex types."""
+
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "float16/bfloat16 test runs only on CUDA",
+    )
+    def test_float16_to_complex(self):
+        """Test float16 to complex64/complex128 conversion."""
+        r_fp16 = np.random.random(size=[10, 10]).astype('float16')
+        r_fp16_t = paddle.to_tensor(r_fp16, dtype='float16')
+
+        # Test dtype conversion
+        self.assertEqual(r_fp16_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(r_fp16_t.cast('complex128').dtype, paddle.complex128)
+
+        # Verify the real part is correct
+        np.testing.assert_allclose(
+            r_fp16_t.cast('complex64').real().numpy(),
+            r_fp16.astype('float32'),
+            rtol=1e-03,
+        )
+        np.testing.assert_allclose(
+            r_fp16_t.cast('complex128').real().numpy(),
+            r_fp16.astype('float64'),
+            rtol=1e-03,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_fp16_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_fp16_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "bfloat16 test runs only on CUDA",
+    )
+    def test_bfloat16_to_complex(self):
+        """Test bfloat16 to complex64/complex128 conversion."""
+        r_bf16 = np.random.random(size=[10, 10]).astype('float32')
+        r_bf16_t = paddle.to_tensor(r_bf16, dtype='bfloat16')
+
+        # Test dtype conversion
+        self.assertEqual(r_bf16_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(r_bf16_t.cast('complex128').dtype, paddle.complex128)
+
+        # Verify the real part is correct
+        np.testing.assert_allclose(
+            r_bf16_t.cast('complex64').real().numpy(),
+            r_bf16_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_bf16_t.cast('complex128').real().numpy(),
+            r_bf16_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_bf16_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_bf16_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "float8 test runs only on CUDA",
+    )
+    def test_float8_e4m3fn_to_complex(self):
+        """Test float8_e4m3fn to complex64/complex128 conversion."""
+        r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
+        r_fp32_t = paddle.to_tensor(r_fp32)
+        r_fp8_e4m3fn_t = r_fp32_t.astype('float8_e4m3fn')
+
+        # Test dtype conversion
+        self.assertEqual(
+            r_fp8_e4m3fn_t.cast('complex64').dtype, paddle.complex64
+        )
+        self.assertEqual(
+            r_fp8_e4m3fn_t.cast('complex128').dtype, paddle.complex128
+        )
+
+        # Verify the real part matches the float32 version
+        np.testing.assert_allclose(
+            r_fp8_e4m3fn_t.cast('complex64').real().numpy(),
+            r_fp8_e4m3fn_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_fp8_e4m3fn_t.cast('complex128').real().numpy(),
+            r_fp8_e4m3fn_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_fp8_e4m3fn_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_fp8_e4m3fn_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "float8 test runs only on CUDA",
+    )
+    def test_float8_e5m2_to_complex(self):
+        """Test float8_e5m2 to complex64/complex128 conversion."""
+        r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
+        r_fp32_t = paddle.to_tensor(r_fp32)
+        r_fp8_e5m2_t = r_fp32_t.astype('float8_e5m2')
+
+        # Test dtype conversion
+        self.assertEqual(r_fp8_e5m2_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(
+            r_fp8_e5m2_t.cast('complex128').dtype, paddle.complex128
+        )
+
+        # Verify the real part matches the float32 version
+        np.testing.assert_allclose(
+            r_fp8_e5m2_t.cast('complex64').real().numpy(),
+            r_fp8_e5m2_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_fp8_e5m2_t.cast('complex128').real().numpy(),
+            r_fp8_e5m2_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_fp8_e5m2_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_fp8_e5m2_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()

From 6d01aa328062598c49ab85298da3942f85b97cdf Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Sun, 26 Oct 2025 11:27:29 +0800
Subject: [PATCH 4/8] Remove @unittest.skipIf decorators to ensure tests run on
 CPU

---
 .../test_lightweight_float_to_complex.py      | 24 +++++++------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/test/legacy_test/test_lightweight_float_to_complex.py b/test/legacy_test/test_lightweight_float_to_complex.py
index 31e3e4f7027095..cf3d32148bae94 100644
--- a/test/legacy_test/test_lightweight_float_to_complex.py
+++ b/test/legacy_test/test_lightweight_float_to_complex.py
@@ -22,12 +22,10 @@
 class TestLightweightFloatToComplex(unittest.TestCase):
     """Test casting from lightweight float formats (float8, float16, bfloat16) to complex types."""
 
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "float16/bfloat16 test runs only on CUDA",
-    )
     def test_float16_to_complex(self):
         """Test float16 to complex64/complex128 conversion."""
+        paddle.set_device('cpu')
+
         r_fp16 = np.random.random(size=[10, 10]).astype('float16')
         r_fp16_t = paddle.to_tensor(r_fp16, dtype='float16')
 
@@ -57,12 +55,10 @@ def test_float16_to_complex(self):
             np.zeros([10, 10], dtype='float64'),
         )
 
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "bfloat16 test runs only on CUDA",
-    )
     def test_bfloat16_to_complex(self):
         """Test bfloat16 to complex64/complex128 conversion."""
+        paddle.set_device('cpu')
+
         r_bf16 = np.random.random(size=[10, 10]).astype('float32')
         r_bf16_t = paddle.to_tensor(r_bf16, dtype='bfloat16')
 
@@ -92,12 +88,10 @@ def test_bfloat16_to_complex(self):
             np.zeros([10, 10], dtype='float64'),
         )
 
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "float8 test runs only on CUDA",
-    )
     def test_float8_e4m3fn_to_complex(self):
         """Test float8_e4m3fn to complex64/complex128 conversion."""
+        paddle.set_device('cpu')
+
         r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
         r_fp32_t = paddle.to_tensor(r_fp32)
         r_fp8_e4m3fn_t = r_fp32_t.astype('float8_e4m3fn')
@@ -132,12 +126,10 @@ def test_float8_e4m3fn_to_complex(self):
             np.zeros([10, 10], dtype='float64'),
         )
 
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "float8 test runs only on CUDA",
-    )
     def test_float8_e5m2_to_complex(self):
         """Test float8_e5m2 to complex64/complex128 conversion."""
+        paddle.set_device('cpu')
+
         r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
         r_fp32_t = paddle.to_tensor(r_fp32)
         r_fp8_e5m2_t = r_fp32_t.astype('float8_e5m2')

From e185958381921f9a703405407d256a3f329c2f9d Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Sun, 26 Oct 2025 19:55:59 +0800
Subject: [PATCH 5/8] Add C++ tests for lightweight float to complex
 conversions

---
 .../framework/data_type_transform_test.cc     | 160 ++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/test/cpp/fluid/framework/data_type_transform_test.cc b/test/cpp/fluid/framework/data_type_transform_test.cc
index 6a510d21acdca4..c94b9a8d5d7da1 100644
--- a/test/cpp/fluid/framework/data_type_transform_test.cc
+++ b/test/cpp/fluid/framework/data_type_transform_test.cc
@@ -395,4 +395,164 @@ TEST(DataTypeTransform, CPUTransform) {
       EXPECT_EQ(ptr[i], static_cast<int32_t>(in_data_bool[i]));
     }
   }
+
+  // data type transform from lightweight float formats to complex types
+  {
+    auto kernel_float8_e5m2 = phi::KernelKey(
+        place, phi::DataLayout::ALL_LAYOUT, phi::DataType::FLOAT8_E5M2);
+    auto kernel_float8_e4m3fn = phi::KernelKey(
+        place, phi::DataLayout::ALL_LAYOUT, phi::DataType::FLOAT8_E4M3FN);
+    auto kernel_complex64 = phi::KernelKey(
+        place, phi::DataLayout::ALL_LAYOUT, phi::DataType::COMPLEX64);
+    auto kernel_complex128 = phi::KernelKey(
+        place, phi::DataLayout::ALL_LAYOUT, phi::DataType::COMPLEX128);
+
+    phi::DenseTensor in;
+    phi::DenseTensor out;
+    int data_number = 2 * 3;
+
+    // Test float16 to complex64
+    {
+      phi::dtype::float16* ptr = in.mutable_data<phi::dtype::float16>(
+          common::make_ddim({2, 3}), place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::float16>(i);
+      }
+
+      paddle::framework::TransDataType(kernel_fp16, kernel_complex64, in, &out);
+      phi::dtype::complex<float>* out_data =
+          out.data<phi::dtype::complex<float>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<float>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0f);
+      }
+    }
+
+    // Test float16 to complex128
+    {
+      phi::dtype::float16* ptr = in.mutable_data<phi::dtype::float16>(
+          common::make_ddim({2, 3}), place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::float16>(i);
+      }
+
+      paddle::framework::TransDataType(
+          kernel_fp16, kernel_complex128, in, &out);
+      phi::dtype::complex<double>* out_data =
+          out.data<phi::dtype::complex<double>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<double>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0);
+      }
+    }
+
+    // Test bfloat16 to complex64
+    {
+      phi::dtype::bfloat16* ptr = in.mutable_data<phi::dtype::bfloat16>(
+          common::make_ddim({2, 3}), place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::bfloat16>(i);
+      }
+
+      paddle::framework::TransDataType(kernel_bf16, kernel_complex64, in, &out);
+      phi::dtype::complex<float>* out_data =
+          out.data<phi::dtype::complex<float>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<float>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0f);
+      }
+    }
+
+    // Test bfloat16 to complex128
+    {
+      phi::dtype::bfloat16* ptr = in.mutable_data<phi::dtype::bfloat16>(
+          common::make_ddim({2, 3}), place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::bfloat16>(i);
+      }
+
+      paddle::framework::TransDataType(
+          kernel_bf16, kernel_complex128, in, &out);
+      phi::dtype::complex<double>* out_data =
+          out.data<phi::dtype::complex<double>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<double>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0);
+      }
+    }
+
+    // Test float8_e4m3fn to complex64
+    {
+      phi::dtype::float8_e4m3fn* ptr =
+          in.mutable_data<phi::dtype::float8_e4m3fn>(common::make_ddim({2, 3}),
+                                                     place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::float8_e4m3fn>(i);
+      }
+
+      paddle::framework::TransDataType(
+          kernel_float8_e4m3fn, kernel_complex64, in, &out);
+      phi::dtype::complex<float>* out_data =
+          out.data<phi::dtype::complex<float>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<float>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0f);
+      }
+    }
+
+    // Test float8_e4m3fn to complex128
+    {
+      phi::dtype::float8_e4m3fn* ptr =
+          in.mutable_data<phi::dtype::float8_e4m3fn>(common::make_ddim({2, 3}),
+                                                     place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::float8_e4m3fn>(i);
+      }
+
+      paddle::framework::TransDataType(
+          kernel_float8_e4m3fn, kernel_complex128, in, &out);
+      phi::dtype::complex<double>* out_data =
+          out.data<phi::dtype::complex<double>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<double>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0);
+      }
+    }
+
+    // Test float8_e5m2 to complex64
+    {
+      phi::dtype::float8_e5m2* ptr = in.mutable_data<phi::dtype::float8_e5m2>(
+          common::make_ddim({2, 3}), place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::float8_e5m2>(i);
+      }
+
+      paddle::framework::TransDataType(
+          kernel_float8_e5m2, kernel_complex64, in, &out);
+      phi::dtype::complex<float>* out_data =
+          out.data<phi::dtype::complex<float>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<float>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0f);
+      }
+    }
+
+    // Test float8_e5m2 to complex128
+    {
+      phi::dtype::float8_e5m2* ptr = in.mutable_data<phi::dtype::float8_e5m2>(
+          common::make_ddim({2, 3}), place);
+      for (int i = 0; i < data_number; ++i) {
+        ptr[i] = static_cast<phi::dtype::float8_e5m2>(i);
+      }
+
+      paddle::framework::TransDataType(
+          kernel_float8_e5m2, kernel_complex128, in, &out);
+      phi::dtype::complex<double>* out_data =
+          out.data<phi::dtype::complex<double>>();
+      for (int i = 0; i < data_number; ++i) {
+        EXPECT_EQ(out_data[i].real, static_cast<double>(ptr[i]));
+        EXPECT_EQ(out_data[i].imag, 0.0);
+      }
+    }
+  }
 }

From 8b26d8275a8c81c499bd5d1dcee6302d4c438d2e Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Fri, 24 Oct 2025 21:05:42 +0800
Subject: [PATCH 6/8] [Bug Fix] Support casting lightweight float formats to
 complex types

---
 test/legacy_test/test_complex_cast.py | 110 ++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/test/legacy_test/test_complex_cast.py b/test/legacy_test/test_complex_cast.py
index 348bd8a332f5ef..ba229c36b70b43 100644
--- a/test/legacy_test/test_complex_cast.py
+++ b/test/legacy_test/test_complex_cast.py
@@ -79,6 +79,116 @@ def test_complex64_complex128(self):
             c_128.cast('complex128').numpy(), c_64.numpy(), rtol=1e-05
         )
 
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "float16/bfloat16/float8 test runs only on CUDA",
+    )
+    def test_float16_bfloat16_to_complex(self):
+        # Test float16 to complex64/complex128
+        r_fp16 = np.random.random(size=[10, 10]).astype('float16')
+        r_fp16_t = paddle.to_tensor(r_fp16, dtype='float16')
+
+        self.assertEqual(r_fp16_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(r_fp16_t.cast('complex128').dtype, paddle.complex128)
+
+        np.testing.assert_allclose(
+            r_fp16_t.cast('complex64').real().numpy(),
+            r_fp16.astype('float32'),
+            rtol=1e-03,
+        )
+        np.testing.assert_allclose(
+            r_fp16_t.cast('complex128').real().numpy(),
+            r_fp16.astype('float64'),
+            rtol=1e-03,
+        )
+
+        # Test bfloat16 to complex64/complex128
+        r_bf16 = np.random.random(size=[10, 10]).astype('float32')
+        r_bf16_t = paddle.to_tensor(r_bf16, dtype='bfloat16')
+
+        self.assertEqual(r_bf16_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(r_bf16_t.cast('complex128').dtype, paddle.complex128)
+
+        np.testing.assert_allclose(
+            r_bf16_t.cast('complex64').real().numpy(),
+            r_bf16_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_bf16_t.cast('complex128').real().numpy(),
+            r_bf16_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+    @unittest.skipIf(
+        not paddle.is_compiled_with_cuda(),
+        "float8 test runs only on CUDA",
+    )
+    def test_float8_to_complex(self):
+        # Test float8_e4m3fn to complex64/complex128
+        r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
+        r_fp32_t = paddle.to_tensor(r_fp32)
+        r_fp8_e4m3fn_t = r_fp32_t.astype('float8_e4m3fn')
+
+        self.assertEqual(
+            r_fp8_e4m3fn_t.cast('complex64').dtype, paddle.complex64
+        )
+        self.assertEqual(
+            r_fp8_e4m3fn_t.cast('complex128').dtype, paddle.complex128
+        )
+
+        # Verify the real part matches the float32 version
+        np.testing.assert_allclose(
+            r_fp8_e4m3fn_t.cast('complex64').real().numpy(),
+            r_fp8_e4m3fn_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_fp8_e4m3fn_t.cast('complex128').real().numpy(),
+            r_fp8_e4m3fn_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_fp8_e4m3fn_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_fp8_e4m3fn_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
+        # Test float8_e5m2 to complex64/complex128
+        r_fp8_e5m2_t = r_fp32_t.astype('float8_e5m2')
+
+        self.assertEqual(r_fp8_e5m2_t.cast('complex64').dtype, paddle.complex64)
+        self.assertEqual(
+            r_fp8_e5m2_t.cast('complex128').dtype, paddle.complex128
+        )
+
+        # Verify the real part matches the float32 version
+        np.testing.assert_allclose(
+            r_fp8_e5m2_t.cast('complex64').real().numpy(),
+            r_fp8_e5m2_t.cast('float32').numpy(),
+            rtol=1e-02,
+        )
+        np.testing.assert_allclose(
+            r_fp8_e5m2_t.cast('complex128').real().numpy(),
+            r_fp8_e5m2_t.cast('float64').numpy(),
+            rtol=1e-02,
+        )
+
+        # Verify the imaginary part is zero
+        np.testing.assert_array_equal(
+            r_fp8_e5m2_t.cast('complex64').imag().numpy(),
+            np.zeros([10, 10], dtype='float32'),
+        )
+        np.testing.assert_array_equal(
+            r_fp8_e5m2_t.cast('complex128').imag().numpy(),
+            np.zeros([10, 10], dtype='float64'),
+        )
+
 
 if __name__ == '__main__':
     unittest.main()

From 88d3dd4e383df497b31b3c5922dc8ff3b14d0104 Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Sat, 25 Oct 2025 13:54:24 +0800
Subject: [PATCH 7/8] test: add test description comment to trigger coverage
 check

---
 test/legacy_test/test_complex_cast.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/legacy_test/test_complex_cast.py b/test/legacy_test/test_complex_cast.py
index ba229c36b70b43..8163a17d1c2320 100644
--- a/test/legacy_test/test_complex_cast.py
+++ b/test/legacy_test/test_complex_cast.py
@@ -18,6 +18,9 @@
 
 import paddle
 
+# This test file covers casting operations between different data types,
+# including lightweight float formats (float8, float16, bfloat16) and complex types.
+
 
 class TestComplexCastOp(unittest.TestCase):
     def test_complex_to_real(self):

From e016bf160626c89386d14a91170b1b19b7b2f68f Mon Sep 17 00:00:00 2001
From: youge325 <A372707325@126.com>
Date: Sat, 25 Oct 2025 20:58:23 +0800
Subject: [PATCH 8/8] Add new test file for lightweight float to complex
 casting

---
 test/legacy_test/test_complex_cast.py | 113 --------------------------
 1 file changed, 113 deletions(-)

diff --git a/test/legacy_test/test_complex_cast.py b/test/legacy_test/test_complex_cast.py
index 8163a17d1c2320..348bd8a332f5ef 100644
--- a/test/legacy_test/test_complex_cast.py
+++ b/test/legacy_test/test_complex_cast.py
@@ -18,9 +18,6 @@
 
 import paddle
 
-# This test file covers casting operations between different data types,
-# including lightweight float formats (float8, float16, bfloat16) and complex types.
-
 
 class TestComplexCastOp(unittest.TestCase):
     def test_complex_to_real(self):
@@ -82,116 +79,6 @@ def test_complex64_complex128(self):
             c_128.cast('complex128').numpy(), c_64.numpy(), rtol=1e-05
         )
 
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "float16/bfloat16/float8 test runs only on CUDA",
-    )
-    def test_float16_bfloat16_to_complex(self):
-        # Test float16 to complex64/complex128
-        r_fp16 = np.random.random(size=[10, 10]).astype('float16')
-        r_fp16_t = paddle.to_tensor(r_fp16, dtype='float16')
-
-        self.assertEqual(r_fp16_t.cast('complex64').dtype, paddle.complex64)
-        self.assertEqual(r_fp16_t.cast('complex128').dtype, paddle.complex128)
-
-        np.testing.assert_allclose(
-            r_fp16_t.cast('complex64').real().numpy(),
-            r_fp16.astype('float32'),
-            rtol=1e-03,
-        )
-        np.testing.assert_allclose(
-            r_fp16_t.cast('complex128').real().numpy(),
-            r_fp16.astype('float64'),
-            rtol=1e-03,
-        )
-
-        # Test bfloat16 to complex64/complex128
-        r_bf16 = np.random.random(size=[10, 10]).astype('float32')
-        r_bf16_t = paddle.to_tensor(r_bf16, dtype='bfloat16')
-
-        self.assertEqual(r_bf16_t.cast('complex64').dtype, paddle.complex64)
-        self.assertEqual(r_bf16_t.cast('complex128').dtype, paddle.complex128)
-
-        np.testing.assert_allclose(
-            r_bf16_t.cast('complex64').real().numpy(),
-            r_bf16_t.cast('float32').numpy(),
-            rtol=1e-02,
-        )
-        np.testing.assert_allclose(
-            r_bf16_t.cast('complex128').real().numpy(),
-            r_bf16_t.cast('float64').numpy(),
-            rtol=1e-02,
-        )
-
-    @unittest.skipIf(
-        not paddle.is_compiled_with_cuda(),
-        "float8 test runs only on CUDA",
-    )
-    def test_float8_to_complex(self):
-        # Test float8_e4m3fn to complex64/complex128
-        r_fp32 = np.random.uniform(1.0, 10.0, size=[10, 10]).astype('float32')
-        r_fp32_t = paddle.to_tensor(r_fp32)
-        r_fp8_e4m3fn_t = r_fp32_t.astype('float8_e4m3fn')
-
-        self.assertEqual(
-            r_fp8_e4m3fn_t.cast('complex64').dtype, paddle.complex64
-        )
-        self.assertEqual(
-            r_fp8_e4m3fn_t.cast('complex128').dtype, paddle.complex128
-        )
-
-        # Verify the real part matches the float32 version
-        np.testing.assert_allclose(
-            r_fp8_e4m3fn_t.cast('complex64').real().numpy(),
-            r_fp8_e4m3fn_t.cast('float32').numpy(),
-            rtol=1e-02,
-        )
-        np.testing.assert_allclose(
-            r_fp8_e4m3fn_t.cast('complex128').real().numpy(),
-            r_fp8_e4m3fn_t.cast('float64').numpy(),
-            rtol=1e-02,
-        )
-
-        # Verify the imaginary part is zero
-        np.testing.assert_array_equal(
-            r_fp8_e4m3fn_t.cast('complex64').imag().numpy(),
-            np.zeros([10, 10], dtype='float32'),
-        )
-        np.testing.assert_array_equal(
-            r_fp8_e4m3fn_t.cast('complex128').imag().numpy(),
-            np.zeros([10, 10], dtype='float64'),
-        )
-
-        # Test float8_e5m2 to complex64/complex128
-        r_fp8_e5m2_t = r_fp32_t.astype('float8_e5m2')
-
-        self.assertEqual(r_fp8_e5m2_t.cast('complex64').dtype, paddle.complex64)
-        self.assertEqual(
-            r_fp8_e5m2_t.cast('complex128').dtype, paddle.complex128
-        )
-
-        # Verify the real part matches the float32 version
-        np.testing.assert_allclose(
-            r_fp8_e5m2_t.cast('complex64').real().numpy(),
-            r_fp8_e5m2_t.cast('float32').numpy(),
-            rtol=1e-02,
-        )
-        np.testing.assert_allclose(
-            r_fp8_e5m2_t.cast('complex128').real().numpy(),
-            r_fp8_e5m2_t.cast('float64').numpy(),
-            rtol=1e-02,
-        )
-
-        # Verify the imaginary part is zero
-        np.testing.assert_array_equal(
-            r_fp8_e5m2_t.cast('complex64').imag().numpy(),
-            np.zeros([10, 10], dtype='float32'),
-        )
-        np.testing.assert_array_equal(
-            r_fp8_e5m2_t.cast('complex128').imag().numpy(),
-            np.zeros([10, 10], dtype='float64'),
-        )
-
 
 if __name__ == '__main__':
     unittest.main()