qraqras · qraqras · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.1.2] - 2026-03-16
+
+### Added
+
+- `LineColumn` struct (`lineno`, `col`) in `text.rs` for representing
+  line/column positions; `lineno` is 1-based, `col` is a 0-based byte offset
+  within the line.
+- `LineIndex` in `text.rs` — a newline-offset lookup table built from source
+  text; converts any `TextSize` byte offset to `LineColumn` in O(log n).
+- `Parsed::line_col(offset: TextSize) -> LineColumn` method for resolving
+  byte offsets in the syntax tree to line/column positions.
+- Python bindings: `LineColumn` class with `lineno` and `col` properties.
+  `col` is expressed in **Unicode codepoints** (compatible with Python's
+  `ast` module convention) rather than raw bytes.
+- Python bindings: `GoogleDocstring.line_col(offset)` and
+  `NumPyDocstring.line_col(offset)` methods; `offset` is typically obtained
+  from `Token.range.start` or `Token.range.end`.
+
 ## [0.1.1] - 2026-03-10
 
 ### Added
@@ -48,5 +66,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Zero external crate dependencies
 - Python bindings via PyO3 (`pydocstring-rs`)
 
+[0.1.2]: https://github.com/qraqras/pydocstring/compare/v0.1.1...v0.1.2
 [0.1.1]: https://github.com/qraqras/pydocstring/compare/v0.1.0...v0.1.1
 [0.1.0]: https://github.com/qraqras/pydocstring/releases/tag/v0.1.0
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "pydocstring"
-version = "0.1.1"
+version = "0.1.2"
 edition = "2024"
 authors = ["Ryuma Asai"]
 description = "A zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations"
@@ -12,7 +12,7 @@ readme = "README.md"
 keywords = ["python", "docstring", "parser", "google", "numpy"]
 categories = ["parser-implementations", "development-tools"]
 rust-version = "1.85"
-exclude = [".devcontainer/", ".githooks/", ".github/", "target/", "tests/", "bindings/"]
+exclude = [".devcontainer/", ".githooks/", ".github/", "bindings/", "target/", "tests/"]
 
 [lib]
 name = "pydocstring"

diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ Python bindings are also available as [`pydocstring-rs`](https://pypi.org/projec
 
 ```toml
 [dependencies]
-pydocstring = "0.1.1"
+pydocstring = "0.1.2"
 ```
 
 ## Usage

diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "pydocstring-python"
-version = "0.1.1"
+version = "0.1.2"
 edition = "2024"
 authors = ["Ryuma Asai"]
 description = "Python bindings for pydocstring — a fast docstring parser for Google and NumPy styles"
@@ -12,5 +12,5 @@ name = "pydocstring"
 crate-type = ["cdylib"]
 
 [dependencies]
-pydocstring_core = { package = "pydocstring", version = "0.1.1", path = "../.." }
+pydocstring_core = { package = "pydocstring", version = "0.1.2", path = "../.." }
 pyo3 = { version = "0.24", features = ["extension-module"] }
diff --git a/bindings/python/pydocstring.pyi b/bindings/python/pydocstring.pyi
@@ -63,6 +63,17 @@ class TextRange:
     @property
     def end(self) -> int: ...
 
+class LineColumn:
+    """A line/column position in the source text.
+
+    ``lineno`` is 1-based; ``col`` is 0-based and counted in Unicode
+    codepoints, matching Python's ``ast`` module convention.
+    """
+    @property
+    def lineno(self) -> int: ...
+    @property
+    def col(self) -> int: ...
+
 class Token:
     @property
     def kind(self) -> SyntaxKind: ...
@@ -130,6 +141,13 @@ class GoogleDocstring:
     def source(self) -> str: ...
     def pretty_print(self) -> str: ...
     def to_model(self) -> Docstring: ...
+    def line_col(self, offset: int) -> LineColumn:
+        """Convert a byte offset to a LineColumn.
+
+        The offset is typically ``token.range.start`` or ``token.range.end``.
+        ``lineno`` is 1-based; ``col`` is 0-based Unicode codepoints.
+        """
+        ...
 
 # ─── NumPy ───────────────────────────────────────────────────────────────────
 
@@ -186,6 +204,13 @@ class NumPyDocstring:
     def source(self) -> str: ...
     def pretty_print(self) -> str: ...
     def to_model(self) -> Docstring: ...
+    def line_col(self, offset: int) -> LineColumn:
+        """Convert a byte offset to a LineColumn.
+
+        The offset is typically ``token.range.start`` or ``token.range.end``.
+        ``lineno`` is 1-based; ``col`` is 0-based Unicode codepoints.
+        """
+        ...
 
 # ─── Model IR ────────────────────────────────────────────────────────────────
 

diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "pydocstring-rs"
-version = "0.1.1"
+version = "0.1.2"
 description = "Python bindings for pydocstring — a zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations"
 license = {text = "MIT"}
 authors = [{name = "Ryuma Asai"}]

diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs
@@ -39,6 +39,60 @@ impl From<&TextRange> for PyTextRange {
     }
 }
 
+// ─── LineColumn ─────────────────────────────────────────────────────────────
+
+/// A line/column position in the source text.
+///
+/// `lineno` is 1-based; `col` is the 0-based Unicode codepoint offset from
+/// the start of the line (compatible with Python's `ast` module).
+#[pyclass(name = "LineColumn", frozen)]
+struct PyLineColumn {
+    #[pyo3(get)]
+    lineno: u32,
+    #[pyo3(get)]
+    col: u32,
+}
+
+#[pymethods]
+impl PyLineColumn {
+    fn __repr__(&self) -> String {
+        format!("LineColumn(lineno={}, col={})", self.lineno, self.col)
+    }
+}
+
+/// Convert a byte offset into a `PyLineColumn` with codepoint-based `col`.
+///
+/// Returns an error if `byte_offset` is beyond the source length or does not
+/// land on a UTF-8 character boundary.
+fn byte_offset_to_line_col(source: &str, byte_offset: usize) -> PyResult<PyLineColumn> {
+    if byte_offset > source.len() {
+        return Err(pyo3::exceptions::PyValueError::new_err(format!(
+            "offset {} is out of bounds (source length: {})",
+            byte_offset,
+            source.len()
+        )));
+    }
+    let mut lineno = 1u32;
+    let mut line_start = 0usize;
+    for (i, b) in source.bytes().enumerate() {
+        if i >= byte_offset {
+            break;
+        }
+        if b == b'\n' {
+            lineno += 1;
+            line_start = i + 1;
+        }
+    }
+    // Verify the offset falls on a char boundary before slicing.
+    if !source.is_char_boundary(byte_offset) || !source.is_char_boundary(line_start) {
+        return Err(pyo3::exceptions::PyValueError::new_err(
+            "offset is not on a UTF-8 character boundary",
+        ));
+    }
+    let col = source[line_start..byte_offset].chars().count() as u32;
+    Ok(PyLineColumn { lineno, col })
+}
+
 // ─── SyntaxKind ──────────────────────────────────────────────────────────────
 
 /// Syntax node/token kind enum.
@@ -469,6 +523,15 @@ impl PyGoogleDocstring {
             .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("failed to convert to model"))?;
         Ok(PyModelDocstring { inner: doc })
     }
+    /// Convert a byte offset to a `LineColumn` with codepoint-based `col`.
+    ///
+    /// The offset is typically obtained from `Token.range.start` or
+    /// `Token.range.end`.  `lineno` is 1-based; `col` is 0-based and counted
+    /// in Unicode codepoints, matching Python's `ast` module convention.
+    fn line_col(&self, py: Python<'_>, offset: u32) -> PyResult<Py<PyLineColumn>> {
+        let lc = byte_offset_to_line_col(&self.source, offset as usize)?;
+        Py::new(py, lc)
+    }
     fn __repr__(&self) -> String {
         "GoogleDocstring(...)".to_string()
     }
@@ -640,6 +703,15 @@ impl PyNumPyDocstring {
             .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("failed to convert to model"))?;
         Ok(PyModelDocstring { inner: doc })
     }
+    /// Convert a byte offset to a `LineColumn` with codepoint-based `col`.
+    ///
+    /// The offset is typically obtained from `Token.range.start` or
+    /// `Token.range.end`.  `lineno` is 1-based; `col` is 0-based and counted
+    /// in Unicode codepoints, matching Python's `ast` module convention.
+    fn line_col(&self, py: Python<'_>, offset: u32) -> PyResult<Py<PyLineColumn>> {
+        let lc = byte_offset_to_line_col(&self.source, offset as usize)?;
+        Py::new(py, lc)
+    }
     fn __repr__(&self) -> String {
         "NumPyDocstring(...)".to_string()
     }
@@ -1607,6 +1679,7 @@ fn pydocstring(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyStyle>()?;
     m.add_class::<PySyntaxKind>()?;
     m.add_class::<PyTextRange>()?;
+    m.add_class::<PyLineColumn>()?;
     m.add_class::<PyToken>()?;
     m.add_class::<PyNode>()?;
     m.add_class::<PyGoogleDocstring>()?;

diff --git a/bindings/python/tests/test_pydocstring.py b/bindings/python/tests/test_pydocstring.py
@@ -419,6 +419,106 @@ def test_convert_google_to_numpy(self):
         assert "----------" in numpy_text
         assert "x : int" in numpy_text
 
+
+class TestLineCol:
+    """Tests for GoogleDocstring.line_col() and NumPyDocstring.line_col()."""
+
+    # ── Google ───────────────────────────────────────────────────────────────
+
+    def test_google_summary_first_line(self):
+        doc = pydocstring.parse_google("Summary.")
+        lc = doc.line_col(doc.summary.range.start)
+        assert lc.lineno == 1
+        assert lc.col == 0
+
+    def test_google_arg_name_lineno(self):
+        src = "Summary.\n\nArgs:\n    x (int): Value."
+        doc = pydocstring.parse_google(src)
+        arg = doc.sections[0].args[0]
+        lc = doc.line_col(arg.name.range.start)
+        assert lc.lineno == 4   # "    x (int): Value." is on line 4
+        assert lc.col == 4      # 4 spaces of indentation
+
+    def test_google_col_is_codepoints_not_bytes(self):
+        # "α" is 2 bytes in UTF-8 but 1 codepoint.
+        # Source: "α.\n\nArgs:\n    x: V."
+        # "x" starts at byte 4+4=... let's compute:
+        # line 1: "α.\n"  → α=2bytes, .=1, \n=1  → line_start line4 = 2+1+1+1 = 5
+        # line 2: "\n"    → 1byte
+        # line 3: "Args:\n" → 6bytes
+        # line 4: "    x: V.\n" → "    x" starts with 4 spaces + x
+        # byte of "x" in line4 = 5+1+6+4 = 16
+        src = "α.\n\nArgs:\n    x: V."
+        doc = pydocstring.parse_google(src)
+        arg = doc.sections[0].args[0]
+        lc = doc.line_col(arg.name.range.start)
+        assert lc.lineno == 4
+        assert lc.col == 4  # 4 spaces → 4 codepoints (bytes == codepoints here)
+
+    def test_google_multibyte_col(self):
+        # Line with multibyte chars before the token.
+        # "αβ: int" as the summary — check col of "int" token text
+        # α=2bytes, β=2bytes, :=1, space=1 → "int" starts at byte 6
+        # but codepoints: α=1, β=1, :=1, space=1 → col=4
+        src = "αβ: int"
+        doc = pydocstring.parse_google(src)
+        # The whole line is treated as summary; check that line_col at byte 6
+        # returns col 4 (codepoints), not 6 (bytes)
+        lc = doc.line_col(6)
+        assert lc.lineno == 1
+        assert lc.col == 4
+
+    def test_google_multiline_lineno(self):
+        src = "Summary.\n\nExtended.\n\nArgs:\n    x: V."
+        doc = pydocstring.parse_google(src)
+        arg = doc.sections[0].args[0]
+        lc = doc.line_col(arg.name.range.start)
+        assert lc.lineno == 6
+
+    def test_google_returns_class(self):
+        lc = pydocstring.parse_google("S.").line_col(0)
+        assert isinstance(lc, pydocstring.LineColumn)
+
+    def test_google_out_of_bounds(self):
+        import pytest
+        doc = pydocstring.parse_google("S.")
+        with pytest.raises(Exception):
+            doc.line_col(9999)
+
+    # ── NumPy ────────────────────────────────────────────────────────────────
+
+    def test_numpy_summary_first_line(self):
+        doc = pydocstring.parse_numpy("Summary.")
+        lc = doc.line_col(doc.summary.range.start)
+        assert lc.lineno == 1
+        assert lc.col == 0
+
+    def test_numpy_param_name_lineno(self):
+        src = "Summary.\n\nParameters\n----------\nx : int\n    Desc."
+        doc = pydocstring.parse_numpy(src)
+        param = doc.sections[0].parameters[0]
+        lc = doc.line_col(param.names[0].range.start)
+        assert lc.lineno == 5   # "x : int" is on line 5
+        assert lc.col == 0
+
+    def test_numpy_multibyte_col(self):
+        # Same multibyte check for NumPy path
+        src = "αβ: int"
+        doc = pydocstring.parse_numpy(src)
+        lc = doc.line_col(6)
+        assert lc.lineno == 1
+        assert lc.col == 4
+
+    def test_numpy_returns_class(self):
+        lc = pydocstring.parse_numpy("S.").line_col(0)
+        assert isinstance(lc, pydocstring.LineColumn)
+
+    def test_numpy_out_of_bounds(self):
+        import pytest
+        doc = pydocstring.parse_numpy("S.")
+        with pytest.raises(Exception):
+            doc.line_col(9999)
+
     def test_emit_free_text_section(self):
         doc = pydocstring.Docstring(
             summary="Brief.",