TST: Every function ensure_clean in the file /pandas/tests/io/test_common.py and /pandas/tests/io/pytables/test_store.py has been replaced. (#62895)

zhangbowen-coder · Nikhil-Narayanan · zorexsalvo · web-flow · commit a59b12671815 · 2025-10-30T08:49:32.000-07:00
Co-authored-by: Nikhil &lt;72097440+Nikhil-Narayanan@users.noreply.github.com&gt;
Co-authored-by: Zorex Salvo &lt;zorexsalvo@gmail.com&gt;
Co-authored-by: Christine P. Chai &lt;star1327p@gmail.com&gt;
Co-authored-by: Aniket &lt;148300120+Aniketsy@users.noreply.github.com&gt;
Co-authored-by: Wang Haoxiang &lt;2795352227@qq,com&gt;
Co-authored-by: Natalia Mokeeva &lt;91160475+natmokval@users.noreply.github.com&gt;
Co-authored-by: jbrockmendel &lt;jbrockmendel@gmail.com&gt;
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
@@ -41,16 +41,13 @@
 tables = pytest.importorskip("tables")
 
 
-def test_context(tmp_path):
-    path1 = tmp_path / "test1.h5"
+def test_context(setup_path, tmp_path):
     try:
-        with HDFStore(path1) as tbl:
+        with HDFStore(tmp_path / setup_path) as tbl:
             raise ValueError("blah")
     except ValueError:
         pass
-
-    path2 = tmp_path / "test2.h5"
-    with HDFStore(path2) as tbl:
+    with HDFStore(tmp_path / setup_path) as tbl:
         tbl["a"] = DataFrame(
             1.1 * np.arange(120).reshape((30, 4)),
             columns=Index(list("ABCD"), dtype=object),
@@ -979,11 +976,10 @@ def test_copy(propindexes, temp_file):
         index=Index([f"i-{i}" for i in range(30)]),
     )
 
-    path = temp_file
-    with HDFStore(path) as st:
+    with HDFStore(temp_file) as st:
         st.append("df", df, data_columns=["A"])
     with tempfile.NamedTemporaryFile() as new_f:
-        with HDFStore(path) as store:
+        with HDFStore(temp_file) as store:
             with contextlib.closing(
                 store.copy(new_f.name, keys=None, propindexes=propindexes)
             ) as tstore:
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -86,12 +86,11 @@ def test_stringify_path_fspath(self):
         result = icom.stringify_path(p)
         assert result == "foo/bar.csv"
 
-    def test_stringify_file_and_path_like(self):
+    def test_stringify_file_and_path_like(self, temp_file):
         # GH 38125: do not stringify file objects that are also path-like
         fsspec = pytest.importorskip("fsspec")
-        with tm.ensure_clean() as path:
-            with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
-                assert fsspec_obj == icom.stringify_path(fsspec_obj)
+        with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj:
+            assert fsspec_obj == icom.stringify_path(fsspec_obj)
 
     @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
     def test_infer_compression_from_path(self, compression_format, path_type):
@@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath):
             ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
         ],
     )
-    def test_write_fspath_all(self, writer_name, writer_kwargs, module):
+    def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path):
         if writer_name in ["to_latex"]:  # uses Styler implementation
             pytest.importorskip("jinja2")
-        p1 = tm.ensure_clean("string")
-        p2 = tm.ensure_clean("fspath")
+        string = str(tmp_path / "string")
+        fspath = str(tmp_path / "fspath")
         df = pd.DataFrame({"A": [1, 2]})
 
-        with p1 as string, p2 as fspath:
-            pytest.importorskip(module)
-            mypath = CustomFSPath(fspath)
-            writer = getattr(df, writer_name)
-
-            writer(string, **writer_kwargs)
-            writer(mypath, **writer_kwargs)
-            with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
-                if writer_name == "to_excel":
-                    # binary representation of excel contains time creation
-                    # data that causes flaky CI failures
-                    result = pd.read_excel(f_str, **writer_kwargs)
-                    expected = pd.read_excel(f_path, **writer_kwargs)
-                    tm.assert_frame_equal(result, expected)
-                else:
-                    result = f_str.read()
-                    expected = f_path.read()
-                    assert result == expected
-
-    def test_write_fspath_hdf5(self):
+        pytest.importorskip(module)
+        mypath = CustomFSPath(fspath)
+        writer = getattr(df, writer_name)
+
+        writer(string, **writer_kwargs)
+        writer(mypath, **writer_kwargs)
+        with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
+            if writer_name == "to_excel":
+                # binary representation of excel contains time creation
+                # data that causes flaky CI failures
+                result = pd.read_excel(f_str, **writer_kwargs)
+                expected = pd.read_excel(f_path, **writer_kwargs)
+                tm.assert_frame_equal(result, expected)
+            else:
+                result = f_str.read()
+                expected = f_path.read()
+                assert result == expected
+
+    def test_write_fspath_hdf5(self, tmp_path):
         # Same test as write_fspath_all, except HDF5 files aren't
         # necessarily byte-for-byte identical for a given dataframe, so we'll
         # have to read and compare equality
         pytest.importorskip("tables")
 
         df = pd.DataFrame({"A": [1, 2]})
-        p1 = tm.ensure_clean("string")
-        p2 = tm.ensure_clean("fspath")
+        string = str(tmp_path / "string")
+        fspath = str(tmp_path / "fspath")
 
-        with p1 as string, p2 as fspath:
-            mypath = CustomFSPath(fspath)
-            df.to_hdf(mypath, key="bar")
-            df.to_hdf(string, key="bar")
+        mypath = CustomFSPath(fspath)
+        df.to_hdf(mypath, key="bar")
+        df.to_hdf(string, key="bar")
 
-            result = pd.read_hdf(fspath, key="bar")
-            expected = pd.read_hdf(string, key="bar")
+        result = pd.read_hdf(fspath, key="bar")
+        expected = pd.read_hdf(string, key="bar")
 
         tm.assert_frame_equal(result, expected)
 
@@ -432,35 +429,33 @@ def test_next(self, mmap_file):
                 with pytest.raises(StopIteration, match=r"^$"):
                     next(wrapper)
 
-    def test_unknown_engine(self):
-        with tm.ensure_clean() as path:
-            df = pd.DataFrame(
-                1.1 * np.arange(120).reshape((30, 4)),
-                columns=pd.Index(list("ABCD")),
-                index=pd.Index([f"i-{i}" for i in range(30)]),
-            )
-            df.to_csv(path)
-            with pytest.raises(ValueError, match="Unknown engine"):
-                pd.read_csv(path, engine="pyt")
-
-    def test_binary_mode(self):
+    def test_unknown_engine(self, temp_file):
+        df = pd.DataFrame(
+            1.1 * np.arange(120).reshape((30, 4)),
+            columns=pd.Index(list("ABCD")),
+            index=pd.Index([f"i-{i}" for i in range(30)]),
+        )
+        df.to_csv(temp_file)
+        with pytest.raises(ValueError, match="Unknown engine"):
+            pd.read_csv(temp_file, engine="pyt")
+
+    def test_binary_mode(self, temp_file):
         """
         'encoding' shouldn't be passed to 'open' in binary mode.
 
         GH 35058
         """
-        with tm.ensure_clean() as path:
-            df = pd.DataFrame(
-                1.1 * np.arange(120).reshape((30, 4)),
-                columns=pd.Index(list("ABCD")),
-                index=pd.Index([f"i-{i}" for i in range(30)]),
-            )
-            df.to_csv(path, mode="w+b")
-            tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+        df = pd.DataFrame(
+            1.1 * np.arange(120).reshape((30, 4)),
+            columns=pd.Index(list("ABCD")),
+            index=pd.Index([f"i-{i}" for i in range(30)]),
+        )
+        df.to_csv(temp_file, mode="w+b")
+        tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0))
 
     @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"])
     @pytest.mark.parametrize("compression_", ["bz2", "xz"])
-    def test_warning_missing_utf_bom(self, encoding, compression_):
+    def test_warning_missing_utf_bom(self, encoding, compression_, temp_file):
         """
         bz2 and xz do not write the byte order mark (BOM) for utf-16/32.
 
@@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
             columns=pd.Index(list("ABCD")),
             index=pd.Index([f"i-{i}" for i in range(30)]),
         )
-        with tm.ensure_clean() as path:
-            with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
-                df.to_csv(path, compression=compression_, encoding=encoding)
+        with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
+            df.to_csv(temp_file, compression=compression_, encoding=encoding)
 
-            # reading should fail (otherwise we wouldn't need the warning)
-            msg = (
-                r"UTF-\d+ stream does not start with BOM|"
-                r"'utf-\d+' codec can't decode byte"
-            )
-            with pytest.raises(UnicodeError, match=msg):
-                pd.read_csv(path, compression=compression_, encoding=encoding)
+        # reading should fail (otherwise we wouldn't need the warning)
+        msg = (
+            r"UTF-\d+ stream does not start with BOM|"
+            r"'utf-\d+' codec can't decode byte"
+        )
+        with pytest.raises(UnicodeError, match=msg):
+            pd.read_csv(temp_file, compression=compression_, encoding=encoding)
 
 
 def test_is_fsspec_url():
@@ -514,38 +508,36 @@ def test_is_fsspec_url_chained():
 
 
 @pytest.mark.parametrize("format", ["csv", "json"])
-def test_codecs_encoding(format):
+def test_codecs_encoding(format, temp_file):
     # GH39247
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
         columns=pd.Index(list("ABCD")),
         index=pd.Index([f"i-{i}" for i in range(30)]),
     )
-    with tm.ensure_clean() as path:
-        with open(path, mode="w", encoding="utf-8") as handle:
-            getattr(expected, f"to_{format}")(handle)
-        with open(path, encoding="utf-8") as handle:
-            if format == "csv":
-                df = pd.read_csv(handle, index_col=0)
-            else:
-                df = pd.read_json(handle)
+    with open(temp_file, mode="w", encoding="utf-8") as handle:
+        getattr(expected, f"to_{format}")(handle)
+    with open(temp_file, encoding="utf-8") as handle:
+        if format == "csv":
+            df = pd.read_csv(handle, index_col=0)
+        else:
+            df = pd.read_json(handle)
     tm.assert_frame_equal(expected, df)
 
 
-def test_codecs_get_writer_reader():
+def test_codecs_get_writer_reader(temp_file):
     # GH39247
     expected = pd.DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
         columns=pd.Index(list("ABCD")),
         index=pd.Index([f"i-{i}" for i in range(30)]),
     )
-    with tm.ensure_clean() as path:
-        with open(path, "wb") as handle:
-            with codecs.getwriter("utf-8")(handle) as encoded:
-                expected.to_csv(encoded)
-        with open(path, "rb") as handle:
-            with codecs.getreader("utf-8")(handle) as encoded:
-                df = pd.read_csv(encoded, index_col=0)
+    with open(temp_file, "wb") as handle:
+        with codecs.getwriter("utf-8")(handle) as encoded:
+            expected.to_csv(encoded)
+    with open(temp_file, "rb") as handle:
+        with codecs.getreader("utf-8")(handle) as encoded:
+            df = pd.read_csv(encoded, index_col=0)
     tm.assert_frame_equal(expected, df)
 
 
@@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg):
 
 @pytest.mark.parametrize("encoding_errors", ["strict", "replace"])
 @pytest.mark.parametrize("format", ["csv", "json"])
-def test_encoding_errors(encoding_errors, format):
+def test_encoding_errors(encoding_errors, format, temp_file):
     # GH39450
     msg = "'utf-8' codec can't decode byte"
     bad_encoding = b"\xe4"
@@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format):
             + b'"}}'
         )
         reader = partial(pd.read_json, orient="index")
-    with tm.ensure_clean() as path:
-        file = Path(path)
-        file.write_bytes(content)
+    file = temp_file
+    file.write_bytes(content)
 
-        if encoding_errors != "replace":
-            with pytest.raises(UnicodeDecodeError, match=msg):
-                reader(path, encoding_errors=encoding_errors)
-        else:
-            df = reader(path, encoding_errors=encoding_errors)
-            decoded = bad_encoding.decode(errors=encoding_errors)
-            expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
-            tm.assert_frame_equal(df, expected)
+    if encoding_errors != "replace":
+        with pytest.raises(UnicodeDecodeError, match=msg):
+            reader(temp_file, encoding_errors=encoding_errors)
+    else:
+        df = reader(temp_file, encoding_errors=encoding_errors)
+        decoded = bad_encoding.decode(errors=encoding_errors)
+        expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
+        tm.assert_frame_equal(df, expected)
 
 
 @pytest.mark.parametrize("encoding_errors", [0, None])
@@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors):
         reader(content)
 
 
-def test_bad_encdoing_errors():
+def test_bad_encdoing_errors(temp_file):
     # GH 39777
-    with tm.ensure_clean() as path:
-        with pytest.raises(LookupError, match="unknown error handler name"):
-            icom.get_handle(path, "w", errors="bad")
+    with pytest.raises(LookupError, match="unknown error handler name"):
+        icom.get_handle(temp_file, "w", errors="bad")
 
 
 @pytest.mark.skipif(WASM, reason="limited file system access on WASM")