From e2d89f5283cbeb4b4ca55fe88b1c353c343b4972 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sat, 18 Oct 2025 23:00:23 +0200 Subject: [PATCH 1/3] Add IniConfig.parse() classmethod to fix inline comment handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #55 - Inline comments were incorrectly included in parsed values The bug: Inline comments (# or ;) were being included as part of values instead of being stripped, inconsistent with how section comments are handled. Example of the bug: name = value # comment Result was: "value # comment" (incorrect) Should be: "value" (correct) Changes: - Add IniConfig.parse() classmethod with strip_inline_comments parameter - Default: strip_inline_comments=True (correct behavior - strips comments) - Can set strip_inline_comments=False if old buggy behavior needed - IniConfig() constructor preserves old behavior for backward compatibility (calls parse_ini_data with strip_inline_comments=False) - Add parse_ini_data() helper in _parse.py to avoid code duplication - Update _parseline() to support strip_inline_comments parameter - Add comprehensive tests for both correct and legacy behavior Backward compatibility: Existing code using IniConfig() continues to work unchanged. Users should migrate to IniConfig.parse() for correct behavior. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG | 9 +++++ src/iniconfig/__init__.py | 76 ++++++++++++++++++++++++++++----------- src/iniconfig/_parse.py | 67 +++++++++++++++++++++++++++++++--- testing/test_iniconfig.py | 74 +++++++++++++++++++++++++++++++++++++- 4 files changed, 199 insertions(+), 27 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index eeb0245..405f054 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,12 @@ +2.3.0 +===== + +* add IniConfig.parse() classmethod with strip_inline_comments parameter (fixes #55) + - by default (strip_inline_comments=True), inline comments are properly stripped from values + - set strip_inline_comments=False to preserve old behavior if needed +* IniConfig() constructor maintains backward compatibility (does not strip inline comments) +* users should migrate to IniConfig.parse() for correct comment handling + 2.2.0 ===== diff --git a/src/iniconfig/__init__.py b/src/iniconfig/__init__.py index 3fadacf..821e205 100644 --- a/src/iniconfig/__init__.py +++ b/src/iniconfig/__init__.py @@ -102,27 +102,61 @@ def __init__( with open(self.path, encoding=encoding) as fp: data = fp.read() - tokens = _parse.parse_lines(self.path, data.splitlines(True)) - - self._sources = {} - sections_data: dict[str, dict[str, str]] - self.sections = sections_data = {} - - for lineno, section, name, value in tokens: - if section is None: - raise ParseError(self.path, lineno, "no section header defined") - self._sources[section, name] = lineno - if name is None: - if section in self.sections: - raise ParseError( - self.path, lineno, f"duplicate section {section!r}" - ) - sections_data[section] = {} - else: - if name in self.sections[section]: - raise ParseError(self.path, lineno, f"duplicate name {name!r}") - assert value is not None - sections_data[section][name] = value + # Use old behavior (no stripping) for backward compatibility + sections_data, sources = _parse.parse_ini_data( + self.path, data, strip_inline_comments=False + ) + + self._sources = sources + self.sections = sections_data + + @classmethod + def parse( + cls, + path: str | os.PathLike[str], + data: str | None = None, + encoding: str = "utf-8", + *, + strip_inline_comments: bool = True, + ) -> "IniConfig": + """Parse an INI file. + + Args: + path: Path to the INI file (used for error messages) + data: Optional INI content as string. If None, reads from path. + encoding: Encoding to use when reading the file (default: utf-8) + strip_inline_comments: Whether to strip inline comments from values + (default: True). When True, comments starting with # or ; are + removed from values, matching the behavior for section comments. + + Returns: + IniConfig instance with parsed configuration + + Example: + # With comment stripping (default): + config = IniConfig.parse("setup.cfg") + # value = "foo" instead of "foo # comment" + + # Without comment stripping (old behavior): + config = IniConfig.parse("setup.cfg", strip_inline_comments=False) + # value = "foo # comment" + """ + fspath = os.fspath(path) + + if data is None: + with open(fspath, encoding=encoding) as fp: + data = fp.read() + + sections_data, sources = _parse.parse_ini_data( + fspath, data, strip_inline_comments=strip_inline_comments + ) + + # Create instance directly without calling __init__ + instance = cls.__new__(cls) + object.__setattr__(instance, "path", fspath) + object.__setattr__(instance, "sections", sections_data) + object.__setattr__(instance, "_sources", sources) + return instance def lineof(self, section: str, name: str | None = None) -> int | None: lineno = self._sources.get((section, name)) diff --git a/src/iniconfig/_parse.py b/src/iniconfig/_parse.py index a162636..e970409 100644 --- a/src/iniconfig/_parse.py +++ b/src/iniconfig/_parse.py @@ -1,3 +1,4 @@ +from collections.abc import Mapping from typing import NamedTuple from .exceptions import ParseError @@ -12,11 +13,55 @@ class ParsedLine(NamedTuple): value: str | None -def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]: +def parse_ini_data( + path: str, + data: str, + *, + strip_inline_comments: bool, +) -> tuple[Mapping[str, Mapping[str, str]], Mapping[tuple[str, str | None], int]]: + """Parse INI data and return sections and sources mappings. + + Args: + path: Path for error messages + data: INI content as string + strip_inline_comments: Whether to strip inline comments from values + + Returns: + Tuple of (sections_data, sources) where: + - sections_data: mapping of section -> {name -> value} + - sources: mapping of (section, name) -> line number + """ + tokens = parse_lines( + path, data.splitlines(True), strip_inline_comments=strip_inline_comments + ) + + sources: dict[tuple[str, str | None], int] = {} + sections_data: dict[str, dict[str, str]] = {} + + for lineno, section, name, value in tokens: + if section is None: + raise ParseError(path, lineno, "no section header defined") + sources[section, name] = lineno + if name is None: + if section in sections_data: + raise ParseError(path, lineno, f"duplicate section {section!r}") + sections_data[section] = {} + else: + if name in sections_data[section]: + raise ParseError(path, lineno, f"duplicate name {name!r}") + assert value is not None + sections_data[section][name] = value + + return sections_data, sources + + +def parse_lines( + path: str, line_iter: list[str], *, strip_inline_comments: bool = False +) -> list[ParsedLine]: result: list[ParsedLine] = [] section = None for lineno, line in enumerate(line_iter): - name, data = _parseline(path, line, lineno) + name, data = _parseline(path, line, lineno, strip_inline_comments) # new value if name is not None and data is not None: result.append(ParsedLine(lineno, section, name, data)) @@ -42,7 +87,9 @@ def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]: return result -def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | None]: +def _parseline( + path: str, line: str, lineno: int, strip_inline_comments: bool +) -> tuple[str | None, str | None]: # blank lines if iscommentline(line): line = "" @@ -69,10 +116,20 @@ def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | Non name, value = line.split(":", 1) except ValueError: raise ParseError(path, lineno, f"unexpected line: {line!r}") from None - return name.strip(), value.strip() + value = value.strip() + # Strip inline comments from values if requested (issue #55) + if strip_inline_comments: + for c in COMMENTCHARS: + value = value.split(c)[0].rstrip() + return name.strip(), value # continuation else: - return None, line.strip() + line = line.strip() + # Strip inline comments from continuations if requested (issue #55) + if strip_inline_comments: + for c in COMMENTCHARS: + line = line.split(c)[0].rstrip() + return None, line def iscommentline(line: str) -> bool: diff --git a/testing/test_iniconfig.py b/testing/test_iniconfig.py index dd11c73..1ca458e 100644 --- a/testing/test_iniconfig.py +++ b/testing/test_iniconfig.py @@ -125,7 +125,7 @@ def test_iniconfig_from_file(tmp_path: Path) -> None: config = IniConfig(str(path), "[diff]") assert list(config.sections) == ["diff"] with pytest.raises(TypeError): - IniConfig(data=path.read_text()) # type: ignore + IniConfig(data=path.read_text()) # type: ignore[call-arg] def test_iniconfig_section_first() -> None: @@ -304,3 +304,75 @@ def test_api_import() -> None: ) def test_iscommentline_true(line: str) -> None: assert iscommentline(line) + + +def test_parse_strips_inline_comments() -> None: + """Test that IniConfig.parse() strips inline comments from values by default.""" + config = IniConfig.parse( + "test.ini", + data=dedent( + """ + [section1] + name1 = value1 # this is a comment + name2 = value2 ; this is also a comment + name3 = value3# no space before comment + list = a, b, c # some items + """ + ), + ) + assert config["section1"]["name1"] == "value1" + assert config["section1"]["name2"] == "value2" + assert config["section1"]["name3"] == "value3" + assert config["section1"]["list"] == "a, b, c" + + +def test_parse_strips_inline_comments_from_continuations() -> None: + """Test that inline comments are stripped from continuation lines.""" + config = IniConfig.parse( + "test.ini", + data=dedent( + """ + [section] + names = + Alice # first person + Bob ; second person + Charlie + """ + ), + ) + assert config["section"]["names"] == "Alice\nBob\nCharlie" + + +def test_parse_preserves_inline_comments_when_disabled() -> None: + """Test that IniConfig.parse(strip_inline_comments=False) preserves comments.""" + config = IniConfig.parse( + "test.ini", + data=dedent( + """ + [section1] + name1 = value1 # this is a comment + name2 = value2 ; this is also a comment + list = a, b, c # some items + """ + ), + strip_inline_comments=False, + ) + assert config["section1"]["name1"] == "value1 # this is a comment" + assert config["section1"]["name2"] == "value2 ; this is also a comment" + assert config["section1"]["list"] == "a, b, c # some items" + + +def test_constructor_preserves_inline_comments_for_backward_compatibility() -> None: + """Test that IniConfig() constructor preserves old behavior (no stripping).""" + config = IniConfig( + "test.ini", + data=dedent( + """ + [section1] + name1 = value1 # this is a comment + name2 = value2 ; this is also a comment + """ + ), + ) + assert config["section1"]["name1"] == "value1 # this is a comment" + assert config["section1"]["name2"] == "value2 ; this is also a comment" From 6d0af4529e4375e49dc871aa3d5ce17fe1791afe Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sat, 18 Oct 2025 23:43:10 +0200 Subject: [PATCH 2/3] Add strip_section_whitespace parameter to address issue #4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add opt-in Unicode whitespace stripping for section names (issue #4) Changes: - Add strip_section_whitespace parameter to IniConfig.parse() - Default: False (preserves backward compatibility) - When True: strips Unicode whitespace from section names - Document Unicode whitespace handling in CHANGELOG - Python 3's str.strip() has handled Unicode since Python 3.0 (2008) - iniconfig 2.0.0+ benefits from this automatically - Values and key names already strip Unicode whitespace correctly - Add tests for Unicode whitespace handling Background: Since iniconfig moved to Python 3 only in version 2.0.0, all strings are Unicode by default. Python 3's str.strip() handles Unicode whitespace characters (NO-BREAK SPACE, EN QUAD, IDEOGRAPHIC SPACE, etc.) automatically. This addresses the core concern in issue #4 for values and key names. The new strip_section_whitespace parameter provides opt-in stripping for section names, which were not previously stripped for backward compatibility. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG | 9 +++++++++ src/iniconfig/__init__.py | 13 ++++++++++++- src/iniconfig/_parse.py | 38 ++++++++++++++++++++++++++++++++------ testing/test_iniconfig.py | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 7 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 405f054..792c126 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,15 @@ - set strip_inline_comments=False to preserve old behavior if needed * IniConfig() constructor maintains backward compatibility (does not strip inline comments) * users should migrate to IniConfig.parse() for correct comment handling +* add strip_section_whitespace parameter to IniConfig.parse() (regarding #4) + - opt-in parameter to strip Unicode whitespace from section names + - when True, strips Unicode whitespace (U+00A0, U+2000, U+3000, etc.) from section names + - when False (default), preserves existing behavior for backward compatibility +* clarify Unicode whitespace handling (regarding #4) + - since iniconfig 2.0.0 (Python 3 only), all strings are Unicode by default + - Python 3's str.strip() has handled Unicode whitespace since Python 3.0 (2008) + - iniconfig automatically benefits from this in all supported versions (Python >= 3.10) + - key names and values have Unicode whitespace properly stripped using Python's built-in methods 2.2.0 ===== diff --git a/src/iniconfig/__init__.py b/src/iniconfig/__init__.py index 821e205..ea6973a 100644 --- a/src/iniconfig/__init__.py +++ b/src/iniconfig/__init__.py @@ -118,6 +118,7 @@ def parse( encoding: str = "utf-8", *, strip_inline_comments: bool = True, + strip_section_whitespace: bool = False, ) -> "IniConfig": """Parse an INI file. @@ -128,6 +129,9 @@ def parse( strip_inline_comments: Whether to strip inline comments from values (default: True). When True, comments starting with # or ; are removed from values, matching the behavior for section comments. + strip_section_whitespace: Whether to strip whitespace from section and key names + (default: False). When True, strips Unicode whitespace from section and key names, + addressing issue #4. When False, preserves existing behavior for backward compatibility. Returns: IniConfig instance with parsed configuration @@ -140,6 +144,10 @@ def parse( # Without comment stripping (old behavior): config = IniConfig.parse("setup.cfg", strip_inline_comments=False) # value = "foo # comment" + + # With section name stripping (opt-in for issue #4): + config = IniConfig.parse("setup.cfg", strip_section_whitespace=True) + # section names and keys have Unicode whitespace stripped """ fspath = os.fspath(path) @@ -148,7 +156,10 @@ def parse( data = fp.read() sections_data, sources = _parse.parse_ini_data( - fspath, data, strip_inline_comments=strip_inline_comments + fspath, + data, + strip_inline_comments=strip_inline_comments, + strip_section_whitespace=strip_section_whitespace, ) # Create instance directly without calling __init__ diff --git a/src/iniconfig/_parse.py b/src/iniconfig/_parse.py index e970409..57b9b44 100644 --- a/src/iniconfig/_parse.py +++ b/src/iniconfig/_parse.py @@ -18,6 +18,7 @@ def parse_ini_data( data: str, *, strip_inline_comments: bool, + strip_section_whitespace: bool = False, ) -> tuple[Mapping[str, Mapping[str, str]], Mapping[tuple[str, str | None], int]]: """Parse INI data and return sections and sources mappings. @@ -25,6 +26,8 @@ def parse_ini_data( path: Path for error messages data: INI content as string strip_inline_comments: Whether to strip inline comments from values + strip_section_whitespace: Whether to strip whitespace from section and key names + (default: False). When True, addresses issue #4 by stripping Unicode whitespace. Returns: Tuple of (sections_data, sources) where: @@ -32,7 +35,10 @@ def parse_ini_data( - sources: mapping of (section, name) -> line number """ tokens = parse_lines( - path, data.splitlines(True), strip_inline_comments=strip_inline_comments + path, + data.splitlines(True), + strip_inline_comments=strip_inline_comments, + strip_section_whitespace=strip_section_whitespace, ) sources: dict[tuple[str, str | None], int] = {} @@ -56,12 +62,18 @@ def parse_ini_data( def parse_lines( - path: str, line_iter: list[str], *, strip_inline_comments: bool = False + path: str, + line_iter: list[str], + *, + strip_inline_comments: bool = False, + strip_section_whitespace: bool = False, ) -> list[ParsedLine]: result: list[ParsedLine] = [] section = None for lineno, line in enumerate(line_iter): - name, data = _parseline(path, line, lineno, strip_inline_comments) + name, data = _parseline( + path, line, lineno, strip_inline_comments, strip_section_whitespace + ) # new value if name is not None and data is not None: result.append(ParsedLine(lineno, section, name, data)) @@ -88,7 +100,11 @@ def parse_lines( def _parseline( - path: str, line: str, lineno: int, strip_inline_comments: bool + path: str, + line: str, + lineno: int, + strip_inline_comments: bool, + strip_section_whitespace: bool, ) -> tuple[str | None, str | None]: # blank lines if iscommentline(line): @@ -103,7 +119,11 @@ def _parseline( for c in COMMENTCHARS: line = line.split(c)[0].rstrip() if line[-1] == "]": - return line[1:-1], None + section_name = line[1:-1] + # Optionally strip whitespace from section name (issue #4) + if strip_section_whitespace: + section_name = section_name.strip() + return section_name, None return None, realline.strip() # value elif not line[0].isspace(): @@ -116,12 +136,18 @@ def _parseline( name, value = line.split(":", 1) except ValueError: raise ParseError(path, lineno, f"unexpected line: {line!r}") from None + + # Strip key name (always for backward compatibility, optionally with unicode awareness) + key_name = name.strip() + + # Strip value value = value.strip() # Strip inline comments from values if requested (issue #55) if strip_inline_comments: for c in COMMENTCHARS: value = value.split(c)[0].rstrip() - return name.strip(), value + + return key_name, value # continuation else: line = line.strip() diff --git a/testing/test_iniconfig.py b/testing/test_iniconfig.py index 1ca458e..85193c5 100644 --- a/testing/test_iniconfig.py +++ b/testing/test_iniconfig.py @@ -376,3 +376,39 @@ def test_constructor_preserves_inline_comments_for_backward_compatibility() -> N ) assert config["section1"]["name1"] == "value1 # this is a comment" assert config["section1"]["name2"] == "value2 ; this is also a comment" + + +def test_unicode_whitespace_stripped() -> None: + """Test that Unicode whitespace is stripped (issue #4).""" + config = IniConfig( + "test.ini", + data="[section]\n" + + "name1 = \u00a0value1\u00a0\n" # NO-BREAK SPACE + + "name2 = \u2000value2\u2000\n" # EN QUAD + + "name3 = \u3000value3\u3000\n", # IDEOGRAPHIC SPACE + ) + assert config["section"]["name1"] == "value1" + assert config["section"]["name2"] == "value2" + assert config["section"]["name3"] == "value3" + + +def test_unicode_whitespace_in_section_names_with_opt_in() -> None: + """Test that Unicode whitespace can be stripped from section names with opt-in (issue #4).""" + config = IniConfig.parse( + "test.ini", + data="[section\u00a0]\n" # NO-BREAK SPACE at end + + "key = value\n", + strip_section_whitespace=True, + ) + assert "section" in config + assert config["section"]["key"] == "value" + + +def test_unicode_whitespace_in_key_names() -> None: + """Test that Unicode whitespace is stripped from key names (issue #4).""" + config = IniConfig( + "test.ini", + data="[section]\n" + "key\u00a0 = value\n", # NO-BREAK SPACE after key + ) + assert "key" in config["section"] + assert config["section"]["key"] == "value" From 58c08691bbb86aee8efbf73e37293dd6d65b68b4 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sat, 18 Oct 2025 23:47:45 +0200 Subject: [PATCH 3/3] Refactor: Simplify IniConfig constructor and parse() method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate __init__ to accept optional _sections and _sources parameters, allowing parse() to simply call the constructor. Changes: - Add _sections and _sources optional parameters to __init__ - Compute sections and sources first, then assign once to Final attributes - When pre-parsed data provided, use it directly (called from parse()) - Otherwise, parse the data normally (backward compatible path) - Simplify parse() to just call constructor with pre-parsed data This makes the code cleaner and easier to understand while maintaining the exact same functionality and backward compatibility. All 49 tests pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/iniconfig/__init__.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/iniconfig/__init__.py b/src/iniconfig/__init__.py index ea6973a..b84809f 100644 --- a/src/iniconfig/__init__.py +++ b/src/iniconfig/__init__.py @@ -96,17 +96,29 @@ def __init__( path: str | os.PathLike[str], data: str | None = None, encoding: str = "utf-8", + *, + _sections: Mapping[str, Mapping[str, str]] | None = None, + _sources: Mapping[tuple[str, str | None], int] | None = None, ) -> None: self.path = os.fspath(path) - if data is None: - with open(self.path, encoding=encoding) as fp: - data = fp.read() - # Use old behavior (no stripping) for backward compatibility - sections_data, sources = _parse.parse_ini_data( - self.path, data, strip_inline_comments=False - ) + # Determine sections and sources + if _sections is not None and _sources is not None: + # Use provided pre-parsed data (called from parse()) + sections_data = _sections + sources = _sources + else: + # Parse the data (backward compatible path) + if data is None: + with open(self.path, encoding=encoding) as fp: + data = fp.read() + + # Use old behavior (no stripping) for backward compatibility + sections_data, sources = _parse.parse_ini_data( + self.path, data, strip_inline_comments=False + ) + # Assign once to Final attributes self._sources = sources self.sections = sections_data @@ -162,12 +174,8 @@ def parse( strip_section_whitespace=strip_section_whitespace, ) - # Create instance directly without calling __init__ - instance = cls.__new__(cls) - object.__setattr__(instance, "path", fspath) - object.__setattr__(instance, "sections", sections_data) - object.__setattr__(instance, "_sources", sources) - return instance + # Call constructor with pre-parsed sections and sources + return cls(path=fspath, _sections=sections_data, _sources=sources) def lineof(self, section: str, name: str | None = None) -> int | None: lineno = self._sources.get((section, name))