Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,13 @@
tables = pytest.importorskip("tables")


def test_context(tmp_path):
path1 = tmp_path / "test1.h5"
def test_context(setup_path, tmp_path):
try:
with HDFStore(path1) as tbl:
with HDFStore(tmp_path / setup_path) as tbl:
raise ValueError("blah")
except ValueError:
pass

path2 = tmp_path / "test2.h5"
with HDFStore(path2) as tbl:
with HDFStore(tmp_path / setup_path) as tbl:
tbl["a"] = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
Expand Down Expand Up @@ -979,11 +976,10 @@ def test_copy(propindexes, temp_file):
index=Index([f"i-{i}" for i in range(30)]),
)

path = temp_file
with HDFStore(path) as st:
with HDFStore(temp_file) as st:
st.append("df", df, data_columns=["A"])
with tempfile.NamedTemporaryFile() as new_f:
with HDFStore(path) as store:
with HDFStore(temp_file) as store:
with contextlib.closing(
store.copy(new_f.name, keys=None, propindexes=propindexes)
) as tstore:
Expand Down
188 changes: 89 additions & 99 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,11 @@ def test_stringify_path_fspath(self):
result = icom.stringify_path(p)
assert result == "foo/bar.csv"

def test_stringify_file_and_path_like(self):
def test_stringify_file_and_path_like(self, temp_file):
# GH 38125: do not stringify file objects that are also path-like
fsspec = pytest.importorskip("fsspec")
with tm.ensure_clean() as path:
with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
assert fsspec_obj == icom.stringify_path(fsspec_obj)
with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj:
assert fsspec_obj == icom.stringify_path(fsspec_obj)

@pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
def test_infer_compression_from_path(self, compression_format, path_type):
Expand Down Expand Up @@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath):
("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
],
)
def test_write_fspath_all(self, writer_name, writer_kwargs, module):
def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path):
if writer_name in ["to_latex"]: # uses Styler implementation
pytest.importorskip("jinja2")
p1 = tm.ensure_clean("string")
p2 = tm.ensure_clean("fspath")
string = str(tmp_path / "string")
fspath = str(tmp_path / "fspath")
df = pd.DataFrame({"A": [1, 2]})

with p1 as string, p2 as fspath:
pytest.importorskip(module)
mypath = CustomFSPath(fspath)
writer = getattr(df, writer_name)

writer(string, **writer_kwargs)
writer(mypath, **writer_kwargs)
with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
if writer_name == "to_excel":
# binary representation of excel contains time creation
# data that causes flaky CI failures
result = pd.read_excel(f_str, **writer_kwargs)
expected = pd.read_excel(f_path, **writer_kwargs)
tm.assert_frame_equal(result, expected)
else:
result = f_str.read()
expected = f_path.read()
assert result == expected

def test_write_fspath_hdf5(self):
pytest.importorskip(module)
mypath = CustomFSPath(fspath)
writer = getattr(df, writer_name)

writer(string, **writer_kwargs)
writer(mypath, **writer_kwargs)
with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
if writer_name == "to_excel":
# binary representation of excel contains time creation
# data that causes flaky CI failures
result = pd.read_excel(f_str, **writer_kwargs)
expected = pd.read_excel(f_path, **writer_kwargs)
tm.assert_frame_equal(result, expected)
else:
result = f_str.read()
expected = f_path.read()
assert result == expected

def test_write_fspath_hdf5(self, tmp_path):
# Same test as write_fspath_all, except HDF5 files aren't
# necessarily byte-for-byte identical for a given dataframe, so we'll
# have to read and compare equality
pytest.importorskip("tables")

df = pd.DataFrame({"A": [1, 2]})
p1 = tm.ensure_clean("string")
p2 = tm.ensure_clean("fspath")
string = str(tmp_path / "string")
fspath = str(tmp_path / "fspath")

with p1 as string, p2 as fspath:
mypath = CustomFSPath(fspath)
df.to_hdf(mypath, key="bar")
df.to_hdf(string, key="bar")
mypath = CustomFSPath(fspath)
df.to_hdf(mypath, key="bar")
df.to_hdf(string, key="bar")

result = pd.read_hdf(fspath, key="bar")
expected = pd.read_hdf(string, key="bar")
result = pd.read_hdf(fspath, key="bar")
expected = pd.read_hdf(string, key="bar")

tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -432,35 +429,33 @@ def test_next(self, mmap_file):
with pytest.raises(StopIteration, match=r"^$"):
next(wrapper)

def test_unknown_engine(self):
with tm.ensure_clean() as path:
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(path)
with pytest.raises(ValueError, match="Unknown engine"):
pd.read_csv(path, engine="pyt")

def test_binary_mode(self):
def test_unknown_engine(self, temp_file):
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(temp_file)
with pytest.raises(ValueError, match="Unknown engine"):
pd.read_csv(temp_file, engine="pyt")

def test_binary_mode(self, temp_file):
"""
'encoding' shouldn't be passed to 'open' in binary mode.

GH 35058
"""
with tm.ensure_clean() as path:
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(path, mode="w+b")
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(temp_file, mode="w+b")
tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0))

@pytest.mark.parametrize("encoding", ["utf-16", "utf-32"])
@pytest.mark.parametrize("compression_", ["bz2", "xz"])
def test_warning_missing_utf_bom(self, encoding, compression_):
def test_warning_missing_utf_bom(self, encoding, compression_, temp_file):
"""
bz2 and xz do not write the byte order mark (BOM) for utf-16/32.

Expand All @@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
df.to_csv(path, compression=compression_, encoding=encoding)
with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
df.to_csv(temp_file, compression=compression_, encoding=encoding)

# reading should fail (otherwise we wouldn't need the warning)
msg = (
r"UTF-\d+ stream does not start with BOM|"
r"'utf-\d+' codec can't decode byte"
)
with pytest.raises(UnicodeError, match=msg):
pd.read_csv(path, compression=compression_, encoding=encoding)
# reading should fail (otherwise we wouldn't need the warning)
msg = (
r"UTF-\d+ stream does not start with BOM|"
r"'utf-\d+' codec can't decode byte"
)
with pytest.raises(UnicodeError, match=msg):
pd.read_csv(temp_file, compression=compression_, encoding=encoding)


def test_is_fsspec_url():
Expand Down Expand Up @@ -514,38 +508,36 @@ def test_is_fsspec_url_chained():


@pytest.mark.parametrize("format", ["csv", "json"])
def test_codecs_encoding(format):
def test_codecs_encoding(format, temp_file):
# GH39247
expected = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
with open(path, mode="w", encoding="utf-8") as handle:
getattr(expected, f"to_{format}")(handle)
with open(path, encoding="utf-8") as handle:
if format == "csv":
df = pd.read_csv(handle, index_col=0)
else:
df = pd.read_json(handle)
with open(temp_file, mode="w", encoding="utf-8") as handle:
getattr(expected, f"to_{format}")(handle)
with open(temp_file, encoding="utf-8") as handle:
if format == "csv":
df = pd.read_csv(handle, index_col=0)
else:
df = pd.read_json(handle)
tm.assert_frame_equal(expected, df)


def test_codecs_get_writer_reader():
def test_codecs_get_writer_reader(temp_file):
# GH39247
expected = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
with open(path, "wb") as handle:
with codecs.getwriter("utf-8")(handle) as encoded:
expected.to_csv(encoded)
with open(path, "rb") as handle:
with codecs.getreader("utf-8")(handle) as encoded:
df = pd.read_csv(encoded, index_col=0)
with open(temp_file, "wb") as handle:
with codecs.getwriter("utf-8")(handle) as encoded:
expected.to_csv(encoded)
with open(temp_file, "rb") as handle:
with codecs.getreader("utf-8")(handle) as encoded:
df = pd.read_csv(encoded, index_col=0)
tm.assert_frame_equal(expected, df)


Expand All @@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg):

@pytest.mark.parametrize("encoding_errors", ["strict", "replace"])
@pytest.mark.parametrize("format", ["csv", "json"])
def test_encoding_errors(encoding_errors, format):
def test_encoding_errors(encoding_errors, format, temp_file):
# GH39450
msg = "'utf-8' codec can't decode byte"
bad_encoding = b"\xe4"
Expand All @@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format):
+ b'"}}'
)
reader = partial(pd.read_json, orient="index")
with tm.ensure_clean() as path:
file = Path(path)
file.write_bytes(content)
file = Path(temp_file)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

temp_file should already be a Path

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your correction, I have never used pytest fixtures before, so I make this mistake,I will fix it today and update this PR.

file.write_bytes(content)

if encoding_errors != "replace":
with pytest.raises(UnicodeDecodeError, match=msg):
reader(path, encoding_errors=encoding_errors)
else:
df = reader(path, encoding_errors=encoding_errors)
decoded = bad_encoding.decode(errors=encoding_errors)
expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
tm.assert_frame_equal(df, expected)
if encoding_errors != "replace":
with pytest.raises(UnicodeDecodeError, match=msg):
reader(temp_file, encoding_errors=encoding_errors)
else:
df = reader(temp_file, encoding_errors=encoding_errors)
decoded = bad_encoding.decode(errors=encoding_errors)
expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
tm.assert_frame_equal(df, expected)


@pytest.mark.parametrize("encoding_errors", [0, None])
Expand All @@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors):
reader(content)


def test_bad_encdoing_errors():
def test_bad_encdoing_errors(temp_file):
# GH 39777
with tm.ensure_clean() as path:
with pytest.raises(LookupError, match="unknown error handler name"):
icom.get_handle(path, "w", errors="bad")
with pytest.raises(LookupError, match="unknown error handler name"):
icom.get_handle(temp_file, "w", errors="bad")


@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
Expand Down
Loading