Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 48 additions & 17 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,35 +1,66 @@
name: codecov
name: CI

on:
pull_request:
branches-ignore:
branches:
- main
push:
branches:
- main

jobs:
run:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
python-version: ['3.13', '3.12', '3.11', '3.10', '3.9']
python-version: ["3.13", "3.12", "3.11", "3.10", "3.9"]
env:
OS: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
python-version: ${{ matrix.python-version }}
node-version: 16
enable-cache: true

- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}

- name: Install dependencies
run: |
pip install -r requirements.txt --use-pep517
pip install -r requirements_test.txt --use-pep517
python setup.py sdist bdist_wheel
pip install dist/*.whl
run: uv sync --python ${{ matrix.python-version }}

- name: Run tests and collect coverage
run: python -m pytest tests/ --cov=./ --cov-report=xml
run: uv run pytest

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4

lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5

- name: Ruff lint
run: uvx ruff check .

- name: Ruff format check
run: uvx ruff format --check .

typecheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5

- name: Install ty
run: uv tool install ty

- name: Run ty
run: uv run ty check obsidiantools
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,4 @@ notebooks/
.gitignore
junit.xml
codecov.yml
.claude/settings.local.json
10 changes: 5 additions & 5 deletions obsidiantools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from . import api
from . import md_utils
from . import html_processing
from . import canvas_utils
from . import media_utils
from . import api as api
from . import canvas_utils as canvas_utils
from . import html_processing as html_processing
from . import md_utils as md_utils
from . import media_utils as media_utils
83 changes: 62 additions & 21 deletions obsidiantools/_constants.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,77 @@
# WIKILINKS AND EMBEDDED FILES: regex that includes any aliases
# group 0 captures embedded link; group 1 is everything inside [[]]
WIKILINK_REGEX = r'(!)?\[{2}([^\]\]]+)\]{2}'
WIKILINK_REGEX = r"(!)?\[{2}([^\]\]]+)\]{2}"

# TAGS
TAG_INCLUDE_NESTED_REGEX = r'(?<!\()(?<!\\)#{1}([A-z]+[0-9_\-]*[A-Z0-9]?[^\s]+(?![^\[\[]*\]\]))\/?'
TAG_MAIN_ONLY_REGEX = r'(?<!\()#{1}([A-z]+[0-9_\-]*[A-Z0-9]?)\/?'
TAG_INCLUDE_NESTED_REGEX = (
r"(?<!\()(?<!\\)#{1}([A-z]+[0-9_\-]*[A-Z0-9]?[^\s]+(?![^\[\[]*\]\]))\/?"
)
TAG_MAIN_ONLY_REGEX = r"(?<!\()#{1}([A-z]+[0-9_\-]*[A-Z0-9]?)\/?"

# md links: catch URLs or paths
INLINE_LINK_AFTER_HTML_PROC_REGEX = r'\[([^\]]+)\]\(<([^)]+)>\)'
INLINE_LINK_VIA_MD_ONLY_REGEX = r'\[([^\]]+)\]\(([^)]+)\)'
INLINE_LINK_AFTER_HTML_PROC_REGEX = r"\[([^\]]+)\]\(<([^)]+)>\)"
INLINE_LINK_VIA_MD_ONLY_REGEX = r"\[([^\]]+)\]\(([^)]+)\)"

# helpers:
WIKILINK_AS_STRING_REGEX = r'\[[^\]]+\]\([^)]+\)'
EMBEDDED_FILE_LINK_AS_STRING_REGEX = r'!?\[{2}([^\]\]]+)\]{2}'
WIKILINK_AS_STRING_REGEX = r"\[[^\]]+\]\([^)]+\)"
EMBEDDED_FILE_LINK_AS_STRING_REGEX = r"!?\[{2}([^\]\]]+)\]{2}"

# Sets of extensions via https://help.obsidian.md/How+to/Embed+files :
# NB: file.ext and file.EXT can exist in same folder
IMG_EXT_SET = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.svg',
'.PNG', '.JPG', '.JPEG', '.GIF', '.BMP', '.SVG'}
AUDIO_EXT_SET = {'.mp3', '.webm', '.wav', '.m4a', '.ogg', '.3gp', '.flac',
'.MP3', '.WEBM', '.WAV', '.M4A', '.OGG', '.3GP', '.FLAC'}
VIDEO_EXT_SET = {'.mp4', '.webm', '.ogv', '.mov', '.mkv',
'.MP4', '.WEBM', '.OGV', '.MOV', '.MKV'}
PDF_EXT_SET = {'.pdf',
'.PDF'}
IMG_EXT_SET = {
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".svg",
".PNG",
".JPG",
".JPEG",
".GIF",
".BMP",
".SVG",
}
AUDIO_EXT_SET = {
".mp3",
".webm",
".wav",
".m4a",
".ogg",
".3gp",
".flac",
".MP3",
".WEBM",
".WAV",
".M4A",
".OGG",
".3GP",
".FLAC",
}
VIDEO_EXT_SET = {
".mp4",
".webm",
".ogv",
".mov",
".mkv",
".MP4",
".WEBM",
".OGV",
".MOV",
".MKV",
}
PDF_EXT_SET = {".pdf", ".PDF"}
# canvas files:
CANVAS_EXT_SET = {'.canvas',
'.CANVAS'}
CANVAS_EXT_SET = {".canvas", ".CANVAS"}

# metadata df cols order:
METADATA_DF_COLS_GENERIC_TYPE = [
'rel_filepath', 'abs_filepath',
'file_exists',
'n_backlinks', 'n_wikilinks', 'n_tags', 'n_embedded_files',
'modified_time']
"rel_filepath",
"abs_filepath",
"file_exists",
"n_backlinks",
"n_wikilinks",
"n_tags",
"n_embedded_files",
"modified_time",
]
74 changes: 39 additions & 35 deletions obsidiantools/_io.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pathlib import Path
from glob import glob
from pathlib import Path

import numpy as np


Expand All @@ -18,16 +19,20 @@ def get_relpaths_from_dir(dir_path: Path, *, extension: str) -> list[Path]:
Returns:
list of Path objects
"""
relpaths_list = [Path(p).relative_to(dir_path)
for p in glob(f"{dir_path}/**/*.{extension}",
recursive=True)]
relpaths_list = [
Path(p).relative_to(dir_path)
for p in glob(f"{dir_path}/**/*.{extension}", recursive=True)
]
return relpaths_list


def get_relpaths_matching_subdirs(dir_path: Path, *,
extension: str,
include_subdirs: list = None,
include_root: bool = True) -> list[Path]:
def get_relpaths_matching_subdirs(
dir_path: Path,
*,
extension: str,
include_subdirs: list = None,
include_root: bool = True,
) -> list[Path]:
"""Get list of relative paths for {extension} files in a given directory,
filtered to include specified subdirectories (with include_subdirs
kwarg). The default arguments align with get_relpaths_from_dir
Expand Down Expand Up @@ -61,34 +66,35 @@ def get_relpaths_matching_subdirs(dir_path: Path, *,
# forward slash consistently here.

if include_subdirs:
include_subdirs_final = [str(Path(i).as_posix())
for i in include_subdirs]
include_subdirs_final = [str(Path(i).as_posix()) for i in include_subdirs]

if not include_subdirs and include_root:
return get_relpaths_from_dir(dir_path,
extension=extension)
return get_relpaths_from_dir(dir_path, extension=extension)
elif not include_subdirs and not include_root:
return [i for i in get_relpaths_from_dir(dir_path,
extension=extension)
if str(i.parent.as_posix()) != '.']
return [
i
for i in get_relpaths_from_dir(dir_path, extension=extension)
if str(i.parent.as_posix()) != "."
]
else:
if include_root:
return [i for i in get_relpaths_from_dir(dir_path,
extension=extension)
if str(i.parent.as_posix())
in include_subdirs_final + ['.']]
return [
i
for i in get_relpaths_from_dir(dir_path, extension=extension)
if str(i.parent.as_posix()) in include_subdirs_final + ["."]
]
else:
return [i for i in get_relpaths_from_dir(dir_path,
extension=extension)
if str(i.parent.as_posix())
in include_subdirs_final]
return [
i
for i in get_relpaths_from_dir(dir_path, extension=extension)
if str(i.parent.as_posix()) in include_subdirs_final
]


def _get_valid_filepaths_by_ext_set(dirpath: Path, *,
exts: set[str]):
all_files = [p.relative_to(dirpath)
for p in Path(dirpath).glob("**/*")
if p.suffix in exts]
def _get_valid_filepaths_by_ext_set(dirpath: Path, *, exts: set[str]):
all_files = [
p.relative_to(dirpath) for p in Path(dirpath).glob("**/*") if p.suffix in exts
]
return all_files


Expand All @@ -98,15 +104,13 @@ def _get_shortest_path_by_filename(relpaths_list: list[Path]) -> dict[str, Path]

# get indices of dupe 'filename w/ ext':
_, inverse_ix, counts = np.unique(
np.array(all_file_names_list),
return_inverse=True,
return_counts=True,
axis=0)
np.array(all_file_names_list), return_inverse=True, return_counts=True, axis=0
)
dupe_names_ix = np.where(counts[inverse_ix] > 1)[0]

# get shortest paths via mask:
shortest_paths_arr = np.array(all_file_names_list, dtype=object)
shortest_paths_arr[dupe_names_ix] = np.array(
[str(fpath)
for fpath in relpaths_list])[dupe_names_ix]
return {fn: path for fn, path in zip(shortest_paths_arr, relpaths_list)}
[str(fpath) for fpath in relpaths_list]
)[dupe_names_ix]
return dict(zip(shortest_paths_arr, relpaths_list))
Loading