diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index cb099b0..f8ead03 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -13,9 +13,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.13' - name: Install dependencies diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ffbe7c1..b02acee 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14.0-rc.3'] env: - TOXENV: py include: @@ -24,9 +24,9 @@ jobs: TOXENV: py39-parsel steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -51,9 +51,9 @@ jobs: tox-job: ["twinecheck", "pre-commit", "typing"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2121a27..5c2af65 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,13 @@ +exclude: tests/test_webpages repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.5 + rev: v0.13.2 hooks: - - id: ruff + - id: ruff-check args: [ --fix ] - id: ruff-format +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace diff --git a/LICENSE b/LICENSE index 36abe10..c34f908 100644 --- a/LICENSE +++ b/LICENSE @@ -8,4 +8,3 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index f3270c6..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,7 +0,0 @@ -include CHANGES.rst -include LICENSE -include README.rst - -recursive-include tests * -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] diff --git a/html_text/html_text.py b/html_text/html_text.py index 68ef215..dd785c8 100644 --- a/html_text/html_text.py +++ b/html_text/html_text.py @@ -191,7 +191,7 @@ def selector_to_text( See html_text.extract_text docstring for description of the approach and options. """ - import parsel + import parsel # noqa: PLC0415 if isinstance(sel, parsel.SelectorList): # if selecting a specific xpath @@ -210,7 +210,7 @@ def selector_to_text( def cleaned_selector(html: lxml.html.HtmlElement | str) -> parsel.Selector: """Clean parsel.selector.""" - import parsel + import parsel # noqa: PLC0415 try: tree = _cleaned_html_tree(html) diff --git a/pyproject.toml b/pyproject.toml index ee1bda8..9d3631e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,44 @@ +[build-system] +requires = ["hatchling>=1.27.0"] +build-backend = "hatchling.build" + +[project] +name = "html-text" +dynamic = ["version"] +description = "Extract text from HTML" +readme = "README.rst" +license = "MIT" +license-files = ["LICENSE"] +authors = [ + { name = "Konstantin Lopukhin", email = "kostia.lopuhin@gmail.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Natural Language :: English", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +dependencies = [ + "lxml", + "lxml-html-clean", +] +requires-python = ">=3.9" + +[project.urls] +Homepage = "https://github.com/zytedata/html-text" + [tool.bumpversion] current_version = "0.7.0" commit = true tag = true tag_name = "{new_version}" -[[tool.bumpversion.files]] -filename = "setup.py" -search = "version=\"{current_version}\"" -replace = "version=\"{new_version}\"" - [[tool.bumpversion.files]] filename = "html_text/__init__.py" search = "__version__ = \"{current_version}\"" @@ -17,11 +47,20 @@ replace = "__version__ = \"{new_version}\"" [tool.coverage.run] branch = true -[tool.coverage.report] -exclude_also = [ - "if TYPE_CHECKING:", +[tool.hatch.version] +path = "html_text/__init__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/html_text", + "/tests", + "/CHANGES.rst", + "/tox.ini", ] +[tool.mypy] +strict = true + [[tool.mypy.overrides]] module = "tests.*" check_untyped_defs = true @@ -29,10 +68,16 @@ allow_untyped_defs = true [tool.ruff.lint] extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", # flake8-bugbear "B", # flake8-comprehensions "C4", + # flake8-commas + "COM", # pydocstyle "D", # flake8-future-annotations @@ -87,6 +132,8 @@ extend-select = [ "YTT", ] ignore = [ + # Trailing comma missing + "COM812", # Missing docstring in public module "D100", # Missing docstring in public class @@ -141,12 +188,10 @@ ignore = [ "RUF012", # Use of `assert` detected "S101", - # Using lxml to parse untrusted data is known to be vulnerable to XML attacks - "S320", ] -[tool.ruff.lint.per-file-ignores] -"html_text/__init__.py" = ["F401"] +[tool.ruff.lint.isort] +split-on-trailing-comma = false [tool.ruff.lint.pydocstyle] convention = "pep257" diff --git a/setup.py b/setup.py deleted file mode 100755 index 492e523..0000000 --- a/setup.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python -from pathlib import Path - -from setuptools import setup - -readme = Path("README.rst").read_text(encoding="utf-8") -history = Path("CHANGES.rst").read_text(encoding="utf-8") - - -setup( - name="html_text", - version="0.7.0", - description="Extract text from HTML", - long_description=readme + "\n\n" + history, - long_description_content_type="text/x-rst", - author="Konstantin Lopukhin", - author_email="kostia.lopuhin@gmail.com", - url="https://github.com/zytedata/html-text", - packages=["html_text"], - package_data={ - "html_text": ["py.typed"], - }, - include_package_data=True, - install_requires=[ - "lxml", - "lxml-html-clean", - ], - license="MIT license", - zip_safe=False, - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - ], -) diff --git a/tox.ini b/tox.ini index 3c71052..d8474ff 100644 --- a/tox.ini +++ b/tox.ini @@ -4,12 +4,10 @@ envlist = py39,py310,py311,py312,py313,py39-parsel,twinecheck [testenv] deps = pytest - pytest-cov + pytest-cov >= 7.0.0 py39-parsel: parsel commands = - pip install -U pip - pip install -e . - pytest --cov=html_text --cov-report=html --cov-report=xml --cov-report=term {env:PYTEST_DOC:} {posargs:.} + pytest --cov=html_text --cov-report=xml --cov-report=term-missing {env:PYTEST_DOC:} {posargs:.} [testenv:py39-parsel] setenv = @@ -32,9 +30,9 @@ skip_install = true [testenv:typing] basepython = python3 deps = - mypy==1.15.0 - parsel - pytest - types-lxml==2024.12.13 + mypy==1.18.2 + parsel==1.10.0 + pytest==8.4.2 + types-lxml==2025.8.25 commands = - mypy --strict {posargs: html_text tests} + mypy {posargs: html_text tests}