Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.2] - 2026-03-16

### Added

- `LineColumn` struct (`lineno`, `col`) in `text.rs` for representing
line/column positions; `lineno` is 1-based, `col` is a 0-based byte offset
within the line.
- `LineIndex` in `text.rs` — a newline-offset lookup table built from source
text; converts any `TextSize` byte offset to `LineColumn` in O(log n).
- `Parsed::line_col(offset: TextSize) -> LineColumn` method for resolving
byte offsets in the syntax tree to line/column positions.
- Python bindings: `LineColumn` class with `lineno` and `col` properties.
`col` is expressed in **Unicode codepoints** (compatible with Python's
`ast` module convention) rather than raw bytes.
- Python bindings: `GoogleDocstring.line_col(offset)` and
`NumPyDocstring.line_col(offset)` methods; `offset` is typically obtained
from `Token.range.start` or `Token.range.end`.

## [0.1.1] - 2026-03-10

### Added
Expand Down Expand Up @@ -48,5 +66,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Zero external crate dependencies
- Python bindings via PyO3 (`pydocstring-rs`)

[0.1.2]: https://github.com/qraqras/pydocstring/compare/v0.1.1...v0.1.2
[0.1.1]: https://github.com/qraqras/pydocstring/compare/v0.1.0...v0.1.1
[0.1.0]: https://github.com/qraqras/pydocstring/releases/tag/v0.1.0
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pydocstring"
version = "0.1.1"
version = "0.1.2"
edition = "2024"
authors = ["Ryuma Asai"]
description = "A zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations"
Expand All @@ -12,7 +12,7 @@ readme = "README.md"
keywords = ["python", "docstring", "parser", "google", "numpy"]
categories = ["parser-implementations", "development-tools"]
rust-version = "1.85"
exclude = [".devcontainer/", ".githooks/", ".github/", "target/", "tests/", "bindings/"]
exclude = [".devcontainer/", ".githooks/", ".github/", "bindings/", "target/", "tests/"]

[lib]
name = "pydocstring"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Python bindings are also available as [`pydocstring-rs`](https://pypi.org/projec

```toml
[dependencies]
pydocstring = "0.1.1"
pydocstring = "0.1.2"
```

## Usage
Expand Down
4 changes: 2 additions & 2 deletions bindings/python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pydocstring-python"
version = "0.1.1"
version = "0.1.2"
edition = "2024"
authors = ["Ryuma Asai"]
description = "Python bindings for pydocstring — a fast docstring parser for Google and NumPy styles"
Expand All @@ -12,5 +12,5 @@ name = "pydocstring"
crate-type = ["cdylib"]

[dependencies]
pydocstring_core = { package = "pydocstring", version = "0.1.1", path = "../.." }
pydocstring_core = { package = "pydocstring", version = "0.1.2", path = "../.." }
pyo3 = { version = "0.24", features = ["extension-module"] }
25 changes: 25 additions & 0 deletions bindings/python/pydocstring.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ class TextRange:
@property
def end(self) -> int: ...

class LineColumn:
"""A line/column position in the source text.

``lineno`` is 1-based; ``col`` is 0-based and counted in Unicode
codepoints, matching Python's ``ast`` module convention.
"""
@property
def lineno(self) -> int: ...
@property
def col(self) -> int: ...

class Token:
@property
def kind(self) -> SyntaxKind: ...
Expand Down Expand Up @@ -130,6 +141,13 @@ class GoogleDocstring:
def source(self) -> str: ...
def pretty_print(self) -> str: ...
def to_model(self) -> Docstring: ...
def line_col(self, offset: int) -> LineColumn:
"""Convert a byte offset to a LineColumn.

The offset is typically ``token.range.start`` or ``token.range.end``.
``lineno`` is 1-based; ``col`` is 0-based Unicode codepoints.
"""
...

# ─── NumPy ───────────────────────────────────────────────────────────────────

Expand Down Expand Up @@ -186,6 +204,13 @@ class NumPyDocstring:
def source(self) -> str: ...
def pretty_print(self) -> str: ...
def to_model(self) -> Docstring: ...
def line_col(self, offset: int) -> LineColumn:
"""Convert a byte offset to a LineColumn.

The offset is typically ``token.range.start`` or ``token.range.end``.
``lineno`` is 1-based; ``col`` is 0-based Unicode codepoints.
"""
...

# ─── Model IR ────────────────────────────────────────────────────────────────

Expand Down
2 changes: 1 addition & 1 deletion bindings/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "pydocstring-rs"
version = "0.1.1"
version = "0.1.2"
description = "Python bindings for pydocstring — a zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations"
license = {text = "MIT"}
authors = [{name = "Ryuma Asai"}]
Expand Down
73 changes: 73 additions & 0 deletions bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,60 @@ impl From<&TextRange> for PyTextRange {
}
}

// ─── LineColumn ─────────────────────────────────────────────────────────────

/// A line/column position in the source text.
///
/// `lineno` is 1-based; `col` is the 0-based Unicode codepoint offset from
/// the start of the line (compatible with Python's `ast` module).
#[pyclass(name = "LineColumn", frozen)]
struct PyLineColumn {
#[pyo3(get)]
lineno: u32,
#[pyo3(get)]
col: u32,
}

#[pymethods]
impl PyLineColumn {
fn __repr__(&self) -> String {
format!("LineColumn(lineno={}, col={})", self.lineno, self.col)
}
}

/// Convert a byte offset into a `PyLineColumn` with codepoint-based `col`.
///
/// Returns an error if `byte_offset` is beyond the source length or does not
/// land on a UTF-8 character boundary.
fn byte_offset_to_line_col(source: &str, byte_offset: usize) -> PyResult<PyLineColumn> {
if byte_offset > source.len() {
return Err(pyo3::exceptions::PyValueError::new_err(format!(
"offset {} is out of bounds (source length: {})",
byte_offset,
source.len()
)));
}
let mut lineno = 1u32;
let mut line_start = 0usize;
for (i, b) in source.bytes().enumerate() {
if i >= byte_offset {
break;
}
if b == b'\n' {
lineno += 1;
line_start = i + 1;
}
}
// Verify the offset falls on a char boundary before slicing.
if !source.is_char_boundary(byte_offset) || !source.is_char_boundary(line_start) {
return Err(pyo3::exceptions::PyValueError::new_err(
"offset is not on a UTF-8 character boundary",
));
}
let col = source[line_start..byte_offset].chars().count() as u32;
Ok(PyLineColumn { lineno, col })
}

// ─── SyntaxKind ──────────────────────────────────────────────────────────────

/// Syntax node/token kind enum.
Expand Down Expand Up @@ -469,6 +523,15 @@ impl PyGoogleDocstring {
.ok_or_else(|| pyo3::exceptions::PyValueError::new_err("failed to convert to model"))?;
Ok(PyModelDocstring { inner: doc })
}
/// Convert a byte offset to a `LineColumn` with codepoint-based `col`.
///
/// The offset is typically obtained from `Token.range.start` or
/// `Token.range.end`. `lineno` is 1-based; `col` is 0-based and counted
/// in Unicode codepoints, matching Python's `ast` module convention.
fn line_col(&self, py: Python<'_>, offset: u32) -> PyResult<Py<PyLineColumn>> {
let lc = byte_offset_to_line_col(&self.source, offset as usize)?;
Py::new(py, lc)
}
fn __repr__(&self) -> String {
"GoogleDocstring(...)".to_string()
}
Expand Down Expand Up @@ -640,6 +703,15 @@ impl PyNumPyDocstring {
.ok_or_else(|| pyo3::exceptions::PyValueError::new_err("failed to convert to model"))?;
Ok(PyModelDocstring { inner: doc })
}
/// Convert a byte offset to a `LineColumn` with codepoint-based `col`.
///
/// The offset is typically obtained from `Token.range.start` or
/// `Token.range.end`. `lineno` is 1-based; `col` is 0-based and counted
/// in Unicode codepoints, matching Python's `ast` module convention.
fn line_col(&self, py: Python<'_>, offset: u32) -> PyResult<Py<PyLineColumn>> {
let lc = byte_offset_to_line_col(&self.source, offset as usize)?;
Py::new(py, lc)
}
fn __repr__(&self) -> String {
"NumPyDocstring(...)".to_string()
}
Expand Down Expand Up @@ -1607,6 +1679,7 @@ fn pydocstring(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyStyle>()?;
m.add_class::<PySyntaxKind>()?;
m.add_class::<PyTextRange>()?;
m.add_class::<PyLineColumn>()?;
m.add_class::<PyToken>()?;
m.add_class::<PyNode>()?;
m.add_class::<PyGoogleDocstring>()?;
Expand Down
100 changes: 100 additions & 0 deletions bindings/python/tests/test_pydocstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,106 @@ def test_convert_google_to_numpy(self):
assert "----------" in numpy_text
assert "x : int" in numpy_text


class TestLineCol:
"""Tests for GoogleDocstring.line_col() and NumPyDocstring.line_col()."""

# ── Google ───────────────────────────────────────────────────────────────

def test_google_summary_first_line(self):
doc = pydocstring.parse_google("Summary.")
lc = doc.line_col(doc.summary.range.start)
assert lc.lineno == 1
assert lc.col == 0

def test_google_arg_name_lineno(self):
src = "Summary.\n\nArgs:\n x (int): Value."
doc = pydocstring.parse_google(src)
arg = doc.sections[0].args[0]
lc = doc.line_col(arg.name.range.start)
assert lc.lineno == 4 # " x (int): Value." is on line 4
assert lc.col == 4 # 4 spaces of indentation

def test_google_col_is_codepoints_not_bytes(self):
# "α" is 2 bytes in UTF-8 but 1 codepoint.
# Source: "α.\n\nArgs:\n x: V."
# "x" starts at byte 4+4=... let's compute:
# line 1: "α.\n" → α=2bytes, .=1, \n=1 → line_start line4 = 2+1+1+1 = 5
# line 2: "\n" → 1byte
# line 3: "Args:\n" → 6bytes
# line 4: " x: V.\n" → " x" starts with 4 spaces + x
# byte of "x" in line4 = 5+1+6+4 = 16
src = "α.\n\nArgs:\n x: V."
doc = pydocstring.parse_google(src)
arg = doc.sections[0].args[0]
lc = doc.line_col(arg.name.range.start)
assert lc.lineno == 4
assert lc.col == 4 # 4 spaces → 4 codepoints (bytes == codepoints here)

def test_google_multibyte_col(self):
# Line with multibyte chars before the token.
# "αβ: int" as the summary — check col of "int" token text
# α=2bytes, β=2bytes, :=1, space=1 → "int" starts at byte 6
# but codepoints: α=1, β=1, :=1, space=1 → col=4
src = "αβ: int"
doc = pydocstring.parse_google(src)
# The whole line is treated as summary; check that line_col at byte 6
# returns col 4 (codepoints), not 6 (bytes)
lc = doc.line_col(6)
assert lc.lineno == 1
assert lc.col == 4

def test_google_multiline_lineno(self):
src = "Summary.\n\nExtended.\n\nArgs:\n x: V."
doc = pydocstring.parse_google(src)
arg = doc.sections[0].args[0]
lc = doc.line_col(arg.name.range.start)
assert lc.lineno == 6

def test_google_returns_class(self):
lc = pydocstring.parse_google("S.").line_col(0)
assert isinstance(lc, pydocstring.LineColumn)

def test_google_out_of_bounds(self):
import pytest
doc = pydocstring.parse_google("S.")
with pytest.raises(Exception):
doc.line_col(9999)

# ── NumPy ────────────────────────────────────────────────────────────────

def test_numpy_summary_first_line(self):
doc = pydocstring.parse_numpy("Summary.")
lc = doc.line_col(doc.summary.range.start)
assert lc.lineno == 1
assert lc.col == 0

def test_numpy_param_name_lineno(self):
src = "Summary.\n\nParameters\n----------\nx : int\n Desc."
doc = pydocstring.parse_numpy(src)
param = doc.sections[0].parameters[0]
lc = doc.line_col(param.names[0].range.start)
assert lc.lineno == 5 # "x : int" is on line 5
assert lc.col == 0

def test_numpy_multibyte_col(self):
# Same multibyte check for NumPy path
src = "αβ: int"
doc = pydocstring.parse_numpy(src)
lc = doc.line_col(6)
assert lc.lineno == 1
assert lc.col == 4

def test_numpy_returns_class(self):
lc = pydocstring.parse_numpy("S.").line_col(0)
assert isinstance(lc, pydocstring.LineColumn)

def test_numpy_out_of_bounds(self):
import pytest
doc = pydocstring.parse_numpy("S.")
with pytest.raises(Exception):
doc.line_col(9999)

def test_emit_free_text_section(self):
doc = pydocstring.Docstring(
summary="Brief.",
Expand Down
Loading
Loading