@@ -86,12 +86,11 @@ def test_stringify_path_fspath(self):
8686 result = icom .stringify_path (p )
8787 assert result == "foo/bar.csv"
8888
89- def test_stringify_file_and_path_like (self ):
89+ def test_stringify_file_and_path_like (self , temp_file ):
9090 # GH 38125: do not stringify file objects that are also path-like
9191 fsspec = pytest .importorskip ("fsspec" )
92- with tm .ensure_clean () as path :
93- with fsspec .open (f"file://{ path } " , mode = "wb" ) as fsspec_obj :
94- assert fsspec_obj == icom .stringify_path (fsspec_obj )
92+ with fsspec .open (f"file://{ temp_file } " , mode = "wb" ) as fsspec_obj :
93+ assert fsspec_obj == icom .stringify_path (fsspec_obj )
9594
9695 @pytest .mark .parametrize ("path_type" , [str , CustomFSPath , Path ])
9796 def test_infer_compression_from_path (self , compression_format , path_type ):
@@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath):
338337 ("to_stata" , {"time_stamp" : pd .to_datetime ("2019-01-01 00:00" )}, "os" ),
339338 ],
340339 )
341- def test_write_fspath_all (self , writer_name , writer_kwargs , module ):
340+ def test_write_fspath_all (self , writer_name , writer_kwargs , module , tmp_path ):
342341 if writer_name in ["to_latex" ]: # uses Styler implementation
343342 pytest .importorskip ("jinja2" )
344- p1 = tm . ensure_clean ( "string" )
345- p2 = tm . ensure_clean ( "fspath" )
343+ string = str ( tmp_path / "string" )
344+ fspath = str ( tmp_path / "fspath" )
346345 df = pd .DataFrame ({"A" : [1 , 2 ]})
347346
348- with p1 as string , p2 as fspath :
349- pytest .importorskip (module )
350- mypath = CustomFSPath (fspath )
351- writer = getattr (df , writer_name )
352-
353- writer (string , ** writer_kwargs )
354- writer (mypath , ** writer_kwargs )
355- with open (string , "rb" ) as f_str , open (fspath , "rb" ) as f_path :
356- if writer_name == "to_excel" :
357- # binary representation of excel contains time creation
358- # data that causes flaky CI failures
359- result = pd .read_excel (f_str , ** writer_kwargs )
360- expected = pd .read_excel (f_path , ** writer_kwargs )
361- tm .assert_frame_equal (result , expected )
362- else :
363- result = f_str .read ()
364- expected = f_path .read ()
365- assert result == expected
366-
367- def test_write_fspath_hdf5 (self ):
347+ pytest .importorskip (module )
348+ mypath = CustomFSPath (fspath )
349+ writer = getattr (df , writer_name )
350+
351+ writer (string , ** writer_kwargs )
352+ writer (mypath , ** writer_kwargs )
353+ with open (string , "rb" ) as f_str , open (fspath , "rb" ) as f_path :
354+ if writer_name == "to_excel" :
355+ # binary representation of excel contains time creation
356+ # data that causes flaky CI failures
357+ result = pd .read_excel (f_str , ** writer_kwargs )
358+ expected = pd .read_excel (f_path , ** writer_kwargs )
359+ tm .assert_frame_equal (result , expected )
360+ else :
361+ result = f_str .read ()
362+ expected = f_path .read ()
363+ assert result == expected
364+
365+ def test_write_fspath_hdf5 (self , tmp_path ):
368366 # Same test as write_fspath_all, except HDF5 files aren't
369367 # necessarily byte-for-byte identical for a given dataframe, so we'll
370368 # have to read and compare equality
371369 pytest .importorskip ("tables" )
372370
373371 df = pd .DataFrame ({"A" : [1 , 2 ]})
374- p1 = tm . ensure_clean ( "string" )
375- p2 = tm . ensure_clean ( "fspath" )
372+ string = str ( tmp_path / "string" )
373+ fspath = str ( tmp_path / "fspath" )
376374
377- with p1 as string , p2 as fspath :
378- mypath = CustomFSPath (fspath )
379- df .to_hdf (mypath , key = "bar" )
380- df .to_hdf (string , key = "bar" )
375+ mypath = CustomFSPath (fspath )
376+ df .to_hdf (mypath , key = "bar" )
377+ df .to_hdf (string , key = "bar" )
381378
382- result = pd .read_hdf (fspath , key = "bar" )
383- expected = pd .read_hdf (string , key = "bar" )
379+ result = pd .read_hdf (fspath , key = "bar" )
380+ expected = pd .read_hdf (string , key = "bar" )
384381
385382 tm .assert_frame_equal (result , expected )
386383
@@ -432,35 +429,33 @@ def test_next(self, mmap_file):
432429 with pytest .raises (StopIteration , match = r"^$" ):
433430 next (wrapper )
434431
435- def test_unknown_engine (self ):
436- with tm .ensure_clean () as path :
437- df = pd .DataFrame (
438- 1.1 * np .arange (120 ).reshape ((30 , 4 )),
439- columns = pd .Index (list ("ABCD" )),
440- index = pd .Index ([f"i-{ i } " for i in range (30 )]),
441- )
442- df .to_csv (path )
443- with pytest .raises (ValueError , match = "Unknown engine" ):
444- pd .read_csv (path , engine = "pyt" )
445-
446- def test_binary_mode (self ):
432+ def test_unknown_engine (self , temp_file ):
433+ df = pd .DataFrame (
434+ 1.1 * np .arange (120 ).reshape ((30 , 4 )),
435+ columns = pd .Index (list ("ABCD" )),
436+ index = pd .Index ([f"i-{ i } " for i in range (30 )]),
437+ )
438+ df .to_csv (temp_file )
439+ with pytest .raises (ValueError , match = "Unknown engine" ):
440+ pd .read_csv (temp_file , engine = "pyt" )
441+
442+ def test_binary_mode (self , temp_file ):
447443 """
448444 'encoding' shouldn't be passed to 'open' in binary mode.
449445
450446 GH 35058
451447 """
452- with tm .ensure_clean () as path :
453- df = pd .DataFrame (
454- 1.1 * np .arange (120 ).reshape ((30 , 4 )),
455- columns = pd .Index (list ("ABCD" )),
456- index = pd .Index ([f"i-{ i } " for i in range (30 )]),
457- )
458- df .to_csv (path , mode = "w+b" )
459- tm .assert_frame_equal (df , pd .read_csv (path , index_col = 0 ))
448+ df = pd .DataFrame (
449+ 1.1 * np .arange (120 ).reshape ((30 , 4 )),
450+ columns = pd .Index (list ("ABCD" )),
451+ index = pd .Index ([f"i-{ i } " for i in range (30 )]),
452+ )
453+ df .to_csv (temp_file , mode = "w+b" )
454+ tm .assert_frame_equal (df , pd .read_csv (temp_file , index_col = 0 ))
460455
461456 @pytest .mark .parametrize ("encoding" , ["utf-16" , "utf-32" ])
462457 @pytest .mark .parametrize ("compression_" , ["bz2" , "xz" ])
463- def test_warning_missing_utf_bom (self , encoding , compression_ ):
458+ def test_warning_missing_utf_bom (self , encoding , compression_ , temp_file ):
464459 """
465460 bz2 and xz do not write the byte order mark (BOM) for utf-16/32.
466461
@@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
473468 columns = pd .Index (list ("ABCD" )),
474469 index = pd .Index ([f"i-{ i } " for i in range (30 )]),
475470 )
476- with tm .ensure_clean () as path :
477- with tm .assert_produces_warning (UnicodeWarning , match = "byte order mark" ):
478- df .to_csv (path , compression = compression_ , encoding = encoding )
471+ with tm .assert_produces_warning (UnicodeWarning , match = "byte order mark" ):
472+ df .to_csv (temp_file , compression = compression_ , encoding = encoding )
479473
480- # reading should fail (otherwise we wouldn't need the warning)
481- msg = (
482- r"UTF-\d+ stream does not start with BOM|"
483- r"'utf-\d+' codec can't decode byte"
484- )
485- with pytest .raises (UnicodeError , match = msg ):
486- pd .read_csv (path , compression = compression_ , encoding = encoding )
474+ # reading should fail (otherwise we wouldn't need the warning)
475+ msg = (
476+ r"UTF-\d+ stream does not start with BOM|"
477+ r"'utf-\d+' codec can't decode byte"
478+ )
479+ with pytest .raises (UnicodeError , match = msg ):
480+ pd .read_csv (temp_file , compression = compression_ , encoding = encoding )
487481
488482
489483def test_is_fsspec_url ():
@@ -514,38 +508,36 @@ def test_is_fsspec_url_chained():
514508
515509
516510@pytest .mark .parametrize ("format" , ["csv" , "json" ])
517- def test_codecs_encoding (format ):
511+ def test_codecs_encoding (format , temp_file ):
518512 # GH39247
519513 expected = pd .DataFrame (
520514 1.1 * np .arange (120 ).reshape ((30 , 4 )),
521515 columns = pd .Index (list ("ABCD" )),
522516 index = pd .Index ([f"i-{ i } " for i in range (30 )]),
523517 )
524- with tm .ensure_clean () as path :
525- with open (path , mode = "w" , encoding = "utf-8" ) as handle :
526- getattr (expected , f"to_{ format } " )(handle )
527- with open (path , encoding = "utf-8" ) as handle :
528- if format == "csv" :
529- df = pd .read_csv (handle , index_col = 0 )
530- else :
531- df = pd .read_json (handle )
518+ with open (temp_file , mode = "w" , encoding = "utf-8" ) as handle :
519+ getattr (expected , f"to_{ format } " )(handle )
520+ with open (temp_file , encoding = "utf-8" ) as handle :
521+ if format == "csv" :
522+ df = pd .read_csv (handle , index_col = 0 )
523+ else :
524+ df = pd .read_json (handle )
532525 tm .assert_frame_equal (expected , df )
533526
534527
535- def test_codecs_get_writer_reader ():
528+ def test_codecs_get_writer_reader (temp_file ):
536529 # GH39247
537530 expected = pd .DataFrame (
538531 1.1 * np .arange (120 ).reshape ((30 , 4 )),
539532 columns = pd .Index (list ("ABCD" )),
540533 index = pd .Index ([f"i-{ i } " for i in range (30 )]),
541534 )
542- with tm .ensure_clean () as path :
543- with open (path , "wb" ) as handle :
544- with codecs .getwriter ("utf-8" )(handle ) as encoded :
545- expected .to_csv (encoded )
546- with open (path , "rb" ) as handle :
547- with codecs .getreader ("utf-8" )(handle ) as encoded :
548- df = pd .read_csv (encoded , index_col = 0 )
535+ with open (temp_file , "wb" ) as handle :
536+ with codecs .getwriter ("utf-8" )(handle ) as encoded :
537+ expected .to_csv (encoded )
538+ with open (temp_file , "rb" ) as handle :
539+ with codecs .getreader ("utf-8" )(handle ) as encoded :
540+ df = pd .read_csv (encoded , index_col = 0 )
549541 tm .assert_frame_equal (expected , df )
550542
551543
@@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg):
572564
573565@pytest .mark .parametrize ("encoding_errors" , ["strict" , "replace" ])
574566@pytest .mark .parametrize ("format" , ["csv" , "json" ])
575- def test_encoding_errors (encoding_errors , format ):
567+ def test_encoding_errors (encoding_errors , format , temp_file ):
576568 # GH39450
577569 msg = "'utf-8' codec can't decode byte"
578570 bad_encoding = b"\xe4 "
@@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format):
591583 + b'"}}'
592584 )
593585 reader = partial (pd .read_json , orient = "index" )
594- with tm .ensure_clean () as path :
595- file = Path (path )
596- file .write_bytes (content )
586+ file = temp_file
587+ file .write_bytes (content )
597588
598- if encoding_errors != "replace" :
599- with pytest .raises (UnicodeDecodeError , match = msg ):
600- reader (path , encoding_errors = encoding_errors )
601- else :
602- df = reader (path , encoding_errors = encoding_errors )
603- decoded = bad_encoding .decode (errors = encoding_errors )
604- expected = pd .DataFrame ({decoded : [decoded ]}, index = [decoded * 2 ])
605- tm .assert_frame_equal (df , expected )
589+ if encoding_errors != "replace" :
590+ with pytest .raises (UnicodeDecodeError , match = msg ):
591+ reader (temp_file , encoding_errors = encoding_errors )
592+ else :
593+ df = reader (temp_file , encoding_errors = encoding_errors )
594+ decoded = bad_encoding .decode (errors = encoding_errors )
595+ expected = pd .DataFrame ({decoded : [decoded ]}, index = [decoded * 2 ])
596+ tm .assert_frame_equal (df , expected )
606597
607598
608599@pytest .mark .parametrize ("encoding_errors" , [0 , None ])
@@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors):
616607 reader (content )
617608
618609
619- def test_bad_encdoing_errors ():
610+ def test_bad_encdoing_errors (temp_file ):
620611 # GH 39777
621- with tm .ensure_clean () as path :
622- with pytest .raises (LookupError , match = "unknown error handler name" ):
623- icom .get_handle (path , "w" , errors = "bad" )
612+ with pytest .raises (LookupError , match = "unknown error handler name" ):
613+ icom .get_handle (temp_file , "w" , errors = "bad" )
624614
625615
626616@pytest .mark .skipif (WASM , reason = "limited file system access on WASM" )
0 commit comments