Skip to content

Commit a59b126

Browse files
zhangbowen-coderNikhil-Narayananzorexsalvostar1327pAniketsy
authored
TST: Every function ensure_clean in the file /pandas/tests/io/test_common.py and /pandas/tests/io/pytables/test_store.py has been replaced. (#62895)
Co-authored-by: Nikhil <72097440+Nikhil-Narayanan@users.noreply.github.com> Co-authored-by: Zorex Salvo <zorexsalvo@gmail.com> Co-authored-by: Christine P. Chai <star1327p@gmail.com> Co-authored-by: Aniket <148300120+Aniketsy@users.noreply.github.com> Co-authored-by: Wang Haoxiang <2795352227@qq,com> Co-authored-by: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>
1 parent 54c26ec commit a59b126

File tree

2 files changed

+94
-108
lines changed

2 files changed

+94
-108
lines changed

pandas/tests/io/pytables/test_store.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,13 @@
4141
tables = pytest.importorskip("tables")
4242

4343

44-
def test_context(tmp_path):
45-
path1 = tmp_path / "test1.h5"
44+
def test_context(setup_path, tmp_path):
4645
try:
47-
with HDFStore(path1) as tbl:
46+
with HDFStore(tmp_path / setup_path) as tbl:
4847
raise ValueError("blah")
4948
except ValueError:
5049
pass
51-
52-
path2 = tmp_path / "test2.h5"
53-
with HDFStore(path2) as tbl:
50+
with HDFStore(tmp_path / setup_path) as tbl:
5451
tbl["a"] = DataFrame(
5552
1.1 * np.arange(120).reshape((30, 4)),
5653
columns=Index(list("ABCD"), dtype=object),
@@ -979,11 +976,10 @@ def test_copy(propindexes, temp_file):
979976
index=Index([f"i-{i}" for i in range(30)]),
980977
)
981978

982-
path = temp_file
983-
with HDFStore(path) as st:
979+
with HDFStore(temp_file) as st:
984980
st.append("df", df, data_columns=["A"])
985981
with tempfile.NamedTemporaryFile() as new_f:
986-
with HDFStore(path) as store:
982+
with HDFStore(temp_file) as store:
987983
with contextlib.closing(
988984
store.copy(new_f.name, keys=None, propindexes=propindexes)
989985
) as tstore:

pandas/tests/io/test_common.py

Lines changed: 89 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,11 @@ def test_stringify_path_fspath(self):
8686
result = icom.stringify_path(p)
8787
assert result == "foo/bar.csv"
8888

89-
def test_stringify_file_and_path_like(self):
89+
def test_stringify_file_and_path_like(self, temp_file):
9090
# GH 38125: do not stringify file objects that are also path-like
9191
fsspec = pytest.importorskip("fsspec")
92-
with tm.ensure_clean() as path:
93-
with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
94-
assert fsspec_obj == icom.stringify_path(fsspec_obj)
92+
with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj:
93+
assert fsspec_obj == icom.stringify_path(fsspec_obj)
9594

9695
@pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
9796
def test_infer_compression_from_path(self, compression_format, path_type):
@@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath):
338337
("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
339338
],
340339
)
341-
def test_write_fspath_all(self, writer_name, writer_kwargs, module):
340+
def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path):
342341
if writer_name in ["to_latex"]: # uses Styler implementation
343342
pytest.importorskip("jinja2")
344-
p1 = tm.ensure_clean("string")
345-
p2 = tm.ensure_clean("fspath")
343+
string = str(tmp_path / "string")
344+
fspath = str(tmp_path / "fspath")
346345
df = pd.DataFrame({"A": [1, 2]})
347346

348-
with p1 as string, p2 as fspath:
349-
pytest.importorskip(module)
350-
mypath = CustomFSPath(fspath)
351-
writer = getattr(df, writer_name)
352-
353-
writer(string, **writer_kwargs)
354-
writer(mypath, **writer_kwargs)
355-
with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
356-
if writer_name == "to_excel":
357-
# binary representation of excel contains time creation
358-
# data that causes flaky CI failures
359-
result = pd.read_excel(f_str, **writer_kwargs)
360-
expected = pd.read_excel(f_path, **writer_kwargs)
361-
tm.assert_frame_equal(result, expected)
362-
else:
363-
result = f_str.read()
364-
expected = f_path.read()
365-
assert result == expected
366-
367-
def test_write_fspath_hdf5(self):
347+
pytest.importorskip(module)
348+
mypath = CustomFSPath(fspath)
349+
writer = getattr(df, writer_name)
350+
351+
writer(string, **writer_kwargs)
352+
writer(mypath, **writer_kwargs)
353+
with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
354+
if writer_name == "to_excel":
355+
# binary representation of excel contains time creation
356+
# data that causes flaky CI failures
357+
result = pd.read_excel(f_str, **writer_kwargs)
358+
expected = pd.read_excel(f_path, **writer_kwargs)
359+
tm.assert_frame_equal(result, expected)
360+
else:
361+
result = f_str.read()
362+
expected = f_path.read()
363+
assert result == expected
364+
365+
def test_write_fspath_hdf5(self, tmp_path):
368366
# Same test as write_fspath_all, except HDF5 files aren't
369367
# necessarily byte-for-byte identical for a given dataframe, so we'll
370368
# have to read and compare equality
371369
pytest.importorskip("tables")
372370

373371
df = pd.DataFrame({"A": [1, 2]})
374-
p1 = tm.ensure_clean("string")
375-
p2 = tm.ensure_clean("fspath")
372+
string = str(tmp_path / "string")
373+
fspath = str(tmp_path / "fspath")
376374

377-
with p1 as string, p2 as fspath:
378-
mypath = CustomFSPath(fspath)
379-
df.to_hdf(mypath, key="bar")
380-
df.to_hdf(string, key="bar")
375+
mypath = CustomFSPath(fspath)
376+
df.to_hdf(mypath, key="bar")
377+
df.to_hdf(string, key="bar")
381378

382-
result = pd.read_hdf(fspath, key="bar")
383-
expected = pd.read_hdf(string, key="bar")
379+
result = pd.read_hdf(fspath, key="bar")
380+
expected = pd.read_hdf(string, key="bar")
384381

385382
tm.assert_frame_equal(result, expected)
386383

@@ -432,35 +429,33 @@ def test_next(self, mmap_file):
432429
with pytest.raises(StopIteration, match=r"^$"):
433430
next(wrapper)
434431

435-
def test_unknown_engine(self):
436-
with tm.ensure_clean() as path:
437-
df = pd.DataFrame(
438-
1.1 * np.arange(120).reshape((30, 4)),
439-
columns=pd.Index(list("ABCD")),
440-
index=pd.Index([f"i-{i}" for i in range(30)]),
441-
)
442-
df.to_csv(path)
443-
with pytest.raises(ValueError, match="Unknown engine"):
444-
pd.read_csv(path, engine="pyt")
445-
446-
def test_binary_mode(self):
432+
def test_unknown_engine(self, temp_file):
433+
df = pd.DataFrame(
434+
1.1 * np.arange(120).reshape((30, 4)),
435+
columns=pd.Index(list("ABCD")),
436+
index=pd.Index([f"i-{i}" for i in range(30)]),
437+
)
438+
df.to_csv(temp_file)
439+
with pytest.raises(ValueError, match="Unknown engine"):
440+
pd.read_csv(temp_file, engine="pyt")
441+
442+
def test_binary_mode(self, temp_file):
447443
"""
448444
'encoding' shouldn't be passed to 'open' in binary mode.
449445
450446
GH 35058
451447
"""
452-
with tm.ensure_clean() as path:
453-
df = pd.DataFrame(
454-
1.1 * np.arange(120).reshape((30, 4)),
455-
columns=pd.Index(list("ABCD")),
456-
index=pd.Index([f"i-{i}" for i in range(30)]),
457-
)
458-
df.to_csv(path, mode="w+b")
459-
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
448+
df = pd.DataFrame(
449+
1.1 * np.arange(120).reshape((30, 4)),
450+
columns=pd.Index(list("ABCD")),
451+
index=pd.Index([f"i-{i}" for i in range(30)]),
452+
)
453+
df.to_csv(temp_file, mode="w+b")
454+
tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0))
460455

461456
@pytest.mark.parametrize("encoding", ["utf-16", "utf-32"])
462457
@pytest.mark.parametrize("compression_", ["bz2", "xz"])
463-
def test_warning_missing_utf_bom(self, encoding, compression_):
458+
def test_warning_missing_utf_bom(self, encoding, compression_, temp_file):
464459
"""
465460
bz2 and xz do not write the byte order mark (BOM) for utf-16/32.
466461
@@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
473468
columns=pd.Index(list("ABCD")),
474469
index=pd.Index([f"i-{i}" for i in range(30)]),
475470
)
476-
with tm.ensure_clean() as path:
477-
with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
478-
df.to_csv(path, compression=compression_, encoding=encoding)
471+
with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
472+
df.to_csv(temp_file, compression=compression_, encoding=encoding)
479473

480-
# reading should fail (otherwise we wouldn't need the warning)
481-
msg = (
482-
r"UTF-\d+ stream does not start with BOM|"
483-
r"'utf-\d+' codec can't decode byte"
484-
)
485-
with pytest.raises(UnicodeError, match=msg):
486-
pd.read_csv(path, compression=compression_, encoding=encoding)
474+
# reading should fail (otherwise we wouldn't need the warning)
475+
msg = (
476+
r"UTF-\d+ stream does not start with BOM|"
477+
r"'utf-\d+' codec can't decode byte"
478+
)
479+
with pytest.raises(UnicodeError, match=msg):
480+
pd.read_csv(temp_file, compression=compression_, encoding=encoding)
487481

488482

489483
def test_is_fsspec_url():
@@ -514,38 +508,36 @@ def test_is_fsspec_url_chained():
514508

515509

516510
@pytest.mark.parametrize("format", ["csv", "json"])
517-
def test_codecs_encoding(format):
511+
def test_codecs_encoding(format, temp_file):
518512
# GH39247
519513
expected = pd.DataFrame(
520514
1.1 * np.arange(120).reshape((30, 4)),
521515
columns=pd.Index(list("ABCD")),
522516
index=pd.Index([f"i-{i}" for i in range(30)]),
523517
)
524-
with tm.ensure_clean() as path:
525-
with open(path, mode="w", encoding="utf-8") as handle:
526-
getattr(expected, f"to_{format}")(handle)
527-
with open(path, encoding="utf-8") as handle:
528-
if format == "csv":
529-
df = pd.read_csv(handle, index_col=0)
530-
else:
531-
df = pd.read_json(handle)
518+
with open(temp_file, mode="w", encoding="utf-8") as handle:
519+
getattr(expected, f"to_{format}")(handle)
520+
with open(temp_file, encoding="utf-8") as handle:
521+
if format == "csv":
522+
df = pd.read_csv(handle, index_col=0)
523+
else:
524+
df = pd.read_json(handle)
532525
tm.assert_frame_equal(expected, df)
533526

534527

535-
def test_codecs_get_writer_reader():
528+
def test_codecs_get_writer_reader(temp_file):
536529
# GH39247
537530
expected = pd.DataFrame(
538531
1.1 * np.arange(120).reshape((30, 4)),
539532
columns=pd.Index(list("ABCD")),
540533
index=pd.Index([f"i-{i}" for i in range(30)]),
541534
)
542-
with tm.ensure_clean() as path:
543-
with open(path, "wb") as handle:
544-
with codecs.getwriter("utf-8")(handle) as encoded:
545-
expected.to_csv(encoded)
546-
with open(path, "rb") as handle:
547-
with codecs.getreader("utf-8")(handle) as encoded:
548-
df = pd.read_csv(encoded, index_col=0)
535+
with open(temp_file, "wb") as handle:
536+
with codecs.getwriter("utf-8")(handle) as encoded:
537+
expected.to_csv(encoded)
538+
with open(temp_file, "rb") as handle:
539+
with codecs.getreader("utf-8")(handle) as encoded:
540+
df = pd.read_csv(encoded, index_col=0)
549541
tm.assert_frame_equal(expected, df)
550542

551543

@@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg):
572564

573565
@pytest.mark.parametrize("encoding_errors", ["strict", "replace"])
574566
@pytest.mark.parametrize("format", ["csv", "json"])
575-
def test_encoding_errors(encoding_errors, format):
567+
def test_encoding_errors(encoding_errors, format, temp_file):
576568
# GH39450
577569
msg = "'utf-8' codec can't decode byte"
578570
bad_encoding = b"\xe4"
@@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format):
591583
+ b'"}}'
592584
)
593585
reader = partial(pd.read_json, orient="index")
594-
with tm.ensure_clean() as path:
595-
file = Path(path)
596-
file.write_bytes(content)
586+
file = temp_file
587+
file.write_bytes(content)
597588

598-
if encoding_errors != "replace":
599-
with pytest.raises(UnicodeDecodeError, match=msg):
600-
reader(path, encoding_errors=encoding_errors)
601-
else:
602-
df = reader(path, encoding_errors=encoding_errors)
603-
decoded = bad_encoding.decode(errors=encoding_errors)
604-
expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
605-
tm.assert_frame_equal(df, expected)
589+
if encoding_errors != "replace":
590+
with pytest.raises(UnicodeDecodeError, match=msg):
591+
reader(temp_file, encoding_errors=encoding_errors)
592+
else:
593+
df = reader(temp_file, encoding_errors=encoding_errors)
594+
decoded = bad_encoding.decode(errors=encoding_errors)
595+
expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
596+
tm.assert_frame_equal(df, expected)
606597

607598

608599
@pytest.mark.parametrize("encoding_errors", [0, None])
@@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors):
616607
reader(content)
617608

618609

619-
def test_bad_encdoing_errors():
610+
def test_bad_encdoing_errors(temp_file):
620611
# GH 39777
621-
with tm.ensure_clean() as path:
622-
with pytest.raises(LookupError, match="unknown error handler name"):
623-
icom.get_handle(path, "w", errors="bad")
612+
with pytest.raises(LookupError, match="unknown error handler name"):
613+
icom.get_handle(temp_file, "w", errors="bad")
624614

625615

626616
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")

0 commit comments

Comments
 (0)