Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Install dependencies
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14.0-rc.3']
env:
- TOXENV: py
include:
Expand All @@ -24,9 +24,9 @@ jobs:
TOXENV: py39-parsel

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -51,9 +51,9 @@ jobs:
tox-job: ["twinecheck", "pre-commit", "typing"]

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
10 changes: 8 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
exclude: tests/test_webpages
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.5
rev: v0.13.2
hooks:
- id: ruff
- id: ruff-check
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
1 change: 0 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,3 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

7 changes: 0 additions & 7 deletions MANIFEST.in

This file was deleted.

4 changes: 2 additions & 2 deletions html_text/html_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def selector_to_text(
See html_text.extract_text docstring for description of the approach
and options.
"""
import parsel
import parsel # noqa: PLC0415

if isinstance(sel, parsel.SelectorList):
# if selecting a specific xpath
Expand All @@ -210,7 +210,7 @@ def selector_to_text(

def cleaned_selector(html: lxml.html.HtmlElement | str) -> parsel.Selector:
"""Clean parsel.selector."""
import parsel
import parsel # noqa: PLC0415

try:
tree = _cleaned_html_tree(html)
Expand Down
69 changes: 57 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,44 @@
[build-system]
requires = ["hatchling>=1.27.0"]
build-backend = "hatchling.build"

[project]
name = "html-text"
dynamic = ["version"]
description = "Extract text from HTML"
readme = "README.rst"
license = "MIT"
license-files = ["LICENSE"]
authors = [
{ name = "Konstantin Lopukhin", email = "kostia.lopuhin@gmail.com" },
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Natural Language :: English",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
dependencies = [
"lxml",
"lxml-html-clean",
]
requires-python = ">=3.9"

[project.urls]
Homepage = "https://github.com/zytedata/html-text"

[tool.bumpversion]
current_version = "0.7.0"
commit = true
tag = true
tag_name = "{new_version}"

[[tool.bumpversion.files]]
filename = "setup.py"
search = "version=\"{current_version}\""
replace = "version=\"{new_version}\""

[[tool.bumpversion.files]]
filename = "html_text/__init__.py"
search = "__version__ = \"{current_version}\""
Expand All @@ -17,22 +47,37 @@ replace = "__version__ = \"{new_version}\""
[tool.coverage.run]
branch = true

[tool.coverage.report]
exclude_also = [
"if TYPE_CHECKING:",
[tool.hatch.version]
path = "html_text/__init__.py"

[tool.hatch.build.targets.sdist]
include = [
"/html_text",
"/tests",
"/CHANGES.rst",
"/tox.ini",
]

[tool.mypy]
strict = true

[[tool.mypy.overrides]]
module = "tests.*"
check_untyped_defs = true
allow_untyped_defs = true

[tool.ruff.lint]
extend-select = [
# flake8-builtins
"A",
# flake8-async
"ASYNC",
# flake8-bugbear
"B",
# flake8-comprehensions
"C4",
# flake8-commas
"COM",
# pydocstyle
"D",
# flake8-future-annotations
Expand Down Expand Up @@ -87,6 +132,8 @@ extend-select = [
"YTT",
]
ignore = [
# Trailing comma missing
"COM812",
# Missing docstring in public module
"D100",
# Missing docstring in public class
Expand Down Expand Up @@ -141,12 +188,10 @@ ignore = [
"RUF012",
# Use of `assert` detected
"S101",
# Using lxml to parse untrusted data is known to be vulnerable to XML attacks
"S320",
]

[tool.ruff.lint.per-file-ignores]
"html_text/__init__.py" = ["F401"]
[tool.ruff.lint.isort]
split-on-trailing-comma = false

[tool.ruff.lint.pydocstyle]
convention = "pep257"
42 changes: 0 additions & 42 deletions setup.py

This file was deleted.

16 changes: 7 additions & 9 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ envlist = py39,py310,py311,py312,py313,py39-parsel,twinecheck
[testenv]
deps =
pytest
pytest-cov
pytest-cov >= 7.0.0
py39-parsel: parsel
commands =
pip install -U pip
pip install -e .
pytest --cov=html_text --cov-report=html --cov-report=xml --cov-report=term {env:PYTEST_DOC:} {posargs:.}
pytest --cov=html_text --cov-report=xml --cov-report=term-missing {env:PYTEST_DOC:} {posargs:.}

[testenv:py39-parsel]
setenv =
Expand All @@ -32,9 +30,9 @@ skip_install = true
[testenv:typing]
basepython = python3
deps =
mypy==1.15.0
parsel
pytest
types-lxml==2024.12.13
mypy==1.18.2
parsel==1.10.0
pytest==8.4.2
types-lxml==2025.8.25
commands =
mypy --strict {posargs: html_text tests}
mypy {posargs: html_text tests}