diff --git a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py b/packages/markitdown/src/markitdown/converters/_xlsx_converter.py index 4186ec773..a122cb217 100644 --- a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_xlsx_converter.py @@ -84,7 +84,7 @@ def convert( md_content = "" for s in sheets: md_content += f"## {s}\n" - html_content = sheets[s].to_html(index=False) + html_content = sheets[s].to_html(index=False, na_rep="") md_content += ( self._html_converter.convert_string( html_content, **kwargs @@ -146,7 +146,7 @@ def convert( md_content = "" for s in sheets: md_content += f"## {s}\n" - html_content = sheets[s].to_html(index=False) + html_content = sheets[s].to_html(index=False, na_rep="") md_content += ( self._html_converter.convert_string( html_content, **kwargs diff --git a/packages/markitdown/tests/test_files/test_xlsx_blank_cells.xlsx b/packages/markitdown/tests/test_files/test_xlsx_blank_cells.xlsx new file mode 100644 index 000000000..980b5ada7 Binary files /dev/null and b/packages/markitdown/tests/test_files/test_xlsx_blank_cells.xlsx differ diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py index 8e3acc23d..97a17a824 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitdown/tests/test_module_misc.py @@ -382,6 +382,17 @@ def test_exceptions() -> None: assert type(exc_info.value.attempts[0].converter).__name__ == "PptxConverter" +def test_xlsx_blank_cells() -> None: + # Blank cells in .xlsx should render as empty strings, not "NaN" + markitdown = MarkItDown() + result = markitdown.convert( + os.path.join(TEST_FILES_DIR, "test_xlsx_blank_cells.xlsx") + ) + assert "NaN" not in result.markdown + assert "Alice" in result.markdown + assert "Bob" in result.markdown + + @pytest.mark.skipif( skip_exiftool, reason="do not run if exiftool is not installed",