Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@ efemel process "**/*.py" --out output/ --hooks hooks/

### Core Patterns & Examples

#### Pattern 1: Basic Dictionary Extraction
#### Pattern 1: Basic Data Extraction

**Input (`app_config.py`):**
```python
# Basic dictionary variables are extracted
# All serializable variables are extracted
app_config = {
"name": "my-app",
"version": "1.0.0",
Expand All @@ -125,11 +125,17 @@ database = {
"name": "app_db"
}

# Simple values are also extracted
app_name = "my-application"
debug_mode = True
max_connections = 100

# Private variables (underscore prefix) are ignored
_internal_config = {"secret": "hidden"}
_debug_flag = False

# Non-dictionary variables are ignored
DEBUG = True
# Non-serializable variables are filtered out
import os # This won't be extracted
```

**Output (`efemel process app_config.py --out configs/`):**
Expand All @@ -146,7 +152,10 @@ DEBUG = True
"host": "localhost",
"port": 5432,
"name": "app_db"
}
},
"app_name": "my-application",
"debug_mode": true,
"max_connections": 100
}
```

Expand Down Expand Up @@ -242,7 +251,7 @@ health_check = {
"retries": 3
}

# Compose services using dict merging
# Compose services using Python dict merging
web_service = {
**base_service,
"image": "nginx:alpine",
Expand Down Expand Up @@ -334,8 +343,8 @@ docker_compose = {
| `--workers` | `-w` | `int` | No | `CPU_COUNT` | Number of parallel workers |
| `--hooks` | `-h` | `str` | No | `None` | Path to hooks file or directory |
| `--flatten` | `-f` | `flag` | No | `False` | Flatten directory structure |
| `--pick` | `-p` | `str` | No | `None` | Pick specific dictionary keys (can be used multiple times) |
| `--unwrap` | `-u` | `str` | No | `None` | Extract specific values from dictionaries, merging them (can be used multiple times) |
| `--pick` | `-p` | `str` | No | `None` | Pick specific keys from the extracted data (can be used multiple times) |
| `--unwrap` | `-u` | `str` | No | `None` | Extract specific values from the processed data, merging them (can be used multiple times) |

### Hook Configuration

Expand Down
16 changes: 10 additions & 6 deletions efemel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,16 @@ def info():
"--pick",
"-p",
multiple=True,
help="Pick specific dictionary keys to extract (can be used multiple times)",
help="Pick specific keys from the extracted data (can be used multiple times)",
)
@click.option(
"--unwrap",
"-u",
multiple=True,
help="Extract specific values from the processed data dictionary, merging them (can be used multiple times)",
help="Extract specific values from the processed data, merging them (can be used multiple times)",
)
def process(file_pattern, out, flatten, cwd, env, workers, hooks, pick, unwrap):
"""Process Python files and extract public dictionary variables to JSON.
"""Process Python files and extract serializable variables to JSON.

FILE_PATTERN: Glob pattern to match Python files (e.g., "**/*.py")
"""
Expand All @@ -78,12 +78,16 @@ def process(file_pattern, out, flatten, cwd, env, workers, hooks, pick, unwrap):
# Add the flatten_output_path hook to the hooks manager
hooks_manager.add("output_filename", output_filename.flatten_output_path)

hooks_manager.add("process_data", process_data_hooks.skip_private_properties)

if pick:
hooks_manager.add("process_data", process_data_hooks.pick_data(pick))

if unwrap:
hooks_manager.add("process_data", process_data_hooks.unwrap_data(unwrap))

hooks_manager.add("process_data", process_data_hooks.drop_non_json_serializable)

# Load user-defined hooks if a path is specified
if hooks:
if os.path.isfile(hooks):
Expand All @@ -107,13 +111,13 @@ def process(file_pattern, out, flatten, cwd, env, workers, hooks, pick, unwrap):
def process_single_file(file_path: Path, cwd: Path): # Added type hint for clarity
"""Process a single file and return results."""
try:
# Always create output file, even if no dictionaries found
public_dicts = process_py_file(cwd / file_path, env) or {}
# Always create output file, even if no serializable data found
serializable_data = process_py_file(cwd / file_path, env) or {}

(processed_data,) = hooks_manager.call(
"process_data",
{
"data": public_dicts,
"data": serializable_data,
"env": env,
},
return_params=["data"],
Expand Down
10 changes: 10 additions & 0 deletions efemel/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import json


def is_json_serializable(obj):
"""Check if an object is JSON serializable."""
try:
json.dumps(obj)
return True
except (TypeError, ValueError):
return False
55 changes: 55 additions & 0 deletions efemel/hooks/process_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,58 @@
from efemel.helpers import is_json_serializable


def drop_non_json_serializable(context):
"""
Filter out values that are not JSON serializable from the data.

Args:
context: Hook context containing 'data' key with the extracted module data
"""
data = context.get("data", {})

def is_serializable_recursive(obj):
"""Recursively check if an object is JSON serializable."""
match obj:
# Fast path for JSON primitives
case None | bool() | int() | float() | str():
return True
case list() | tuple():
# Recursively check all items in the sequence
return all(is_serializable_recursive(item) for item in obj)
case dict():
# Recursively check all keys and values in the dictionary
return all(isinstance(key, str) and is_serializable_recursive(value) for key, value in obj.items())
# Catchall for any other types - use expensive check
case _:
return is_json_serializable(obj)

# Filter out non-JSON serializable values
filtered_data = {}
for attr_name, attr_value in data.items():
if is_serializable_recursive(attr_value):
filtered_data[attr_name] = attr_value

context["data"] = filtered_data


def skip_private_properties(context):
"""
Filter out private properties (those starting with underscore) from the data.

Args:
context: Hook context containing 'data' key with the extracted module data
"""
data = context.get("data", {})

# Filter out private properties (starting with underscore)
filtered_data = {}
for attr_name, attr_value in data.items():
if not attr_name.startswith("_"):
filtered_data[attr_name] = attr_value

context["data"] = filtered_data


def pick_data(keys):
"""Pick specific keys from the processed python file."""

Expand Down
16 changes: 2 additions & 14 deletions efemel/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,5 @@ def process_py_file(input_path: Path, environment: str = "default"):
sys.modules[module_name] = original_module_in_sys
sys.path = original_sys_path

# Extract public dictionary variables
public_dicts = {}
for attr_name in module.__dict__:
if attr_name.startswith("_"):
continue

attr_value = getattr(module, attr_name)

if not isinstance(attr_value, dict):
continue

public_dicts[attr_name] = attr_value

return public_dicts if public_dicts else None
# Extract all variables from module
return module.__dict__
4 changes: 2 additions & 2 deletions tests/inputs/basic/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Test file for the process command.
"""

# Public dictionaries that should be extracted
# Public data that should be extracted
config = {
"name": "test_app",
"version": "1.0.0",
Expand All @@ -25,7 +25,7 @@
_private_dict = {"secret": "value"}
__dunder_dict = {"internal": "data"}

# Non-dictionary variables (should be ignored)
# Other serializable variables (should be extracted)
APP_NAME = "test_app"
VERSION = "1.0.0"
enabled_features = ["auth", "api"]
Expand Down
4 changes: 2 additions & 2 deletions tests/inputs/basic/test_dir/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Test file with no dictionaries.
Test file with no data.
"""

# Non-dictionary variables (should be ignored)
# Non-serializable variables (should be filtered out)
APP_NAME = "test_app"
VERSION = "1.0.0"
enabled_features = ["auth", "api"]
Expand Down
3 changes: 2 additions & 1 deletion tests/outputs/basic/simple_test.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"config": {
"app": "test",
"version": "1.0"
}
},
"not_a_dict": "string value"
}
8 changes: 7 additions & 1 deletion tests/outputs/basic/test_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,11 @@
"name": "Guest User",
"role": "guest"
}
}
},
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
9 changes: 8 additions & 1 deletion tests/outputs/basic/test_dir/utils.json
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
{}
{
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
3 changes: 2 additions & 1 deletion tests/outputs/flattened/simple_test.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"config": {
"app": "test",
"version": "1.0"
}
},
"not_a_dict": "string value"
}
8 changes: 7 additions & 1 deletion tests/outputs/flattened/test_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,11 @@
"name": "Guest User",
"role": "guest"
}
}
},
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
9 changes: 8 additions & 1 deletion tests/outputs/flattened/test_dir_utils.json
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
{}
{
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
3 changes: 2 additions & 1 deletion tests/outputs/with_hooks/test/simple_test_20250705.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"config": {
"app": "test",
"version": "1.0"
}
},
"not_a_dict": "string value"
}
8 changes: 7 additions & 1 deletion tests/outputs/with_hooks/test/test_data_20250705.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,11 @@
"name": "Guest User",
"role": "guest"
}
}
},
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
9 changes: 8 additions & 1 deletion tests/outputs/with_hooks/test/test_dir/utils_20250705.json
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
{}
{
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
3 changes: 2 additions & 1 deletion tests/outputs/with_hooks_dir/simple_test_1_2_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"config": {
"app": "test",
"version": "1.0"
}
},
"not_a_dict": "string value"
}
8 changes: 7 additions & 1 deletion tests/outputs/with_hooks_dir/test_data_1_2_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,11 @@
"name": "Guest User",
"role": "guest"
}
}
},
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
9 changes: 8 additions & 1 deletion tests/outputs/with_hooks_dir/test_dir/utils_1_2_3.json
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
{}
{
"APP_NAME": "test_app",
"VERSION": "1.0.0",
"enabled_features": [
"auth",
"api"
]
}
2 changes: 2 additions & 0 deletions tests/outputs/with_imports/main.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
{
"env": "prod",
"message": "Hello world",
"main": {
"message": "Hello world",
"env": "prod"
Expand Down
Loading
Loading