Skip to content

Commit 190f55f

Browse files
Merge pull request #88 from patrickfleith/bug/86-template-path-error
removing .md file for default card and using a default string instead.
2 parents e6815f6 + 9f525fe commit 190f55f

2 files changed

Lines changed: 20 additions & 32 deletions

File tree

datafast/card_utils.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,20 @@
44
from huggingface_hub import HfApi, DatasetCard, DatasetCardData
55
from huggingface_hub.file_download import hf_hub_download
66

7+
# Default template string embedded directly in code
8+
# This eliminates the need to read from a file
9+
DEFAULT_CARD_TEMPLATE = """---
10+
{{ card_data }}
11+
{{ config_data }}
12+
---
13+
[<img src="https://raw.githubusercontent.com/patrickfleith/datafast/main/assets/datafast-badge-web.png"
14+
alt="Built with Datafast" />](https://github.com/patrickfleith/datafast)
15+
16+
# {{ pretty_name }}
17+
18+
This dataset was generated using Datafast (v{{ datafast_version }}), an open-source package to generate high-quality and diverse synthetic text datasets for LLMs.
19+
"""
20+
721
def extract_readme_metadata(repo_id: str, token: str | None = None) -> str:
822
"""Extracts the metadata from the README.md file of the dataset repository.
923
We have to download the previous README.md file in the repo, extract the metadata from it.
@@ -61,8 +75,7 @@ def extract_dataset_info(repo_id: str, token: str | None = None) -> str:
6175

6276
def _generate_and_upload_dataset_card(
6377
repo_id: str,
64-
token: str | None = None,
65-
template_path: str | None = None
78+
token: str | None = None
6679
) -> None:
6780
"""
6881
Internal implementation that generates and uploads a dataset card to Hugging Face Hub.
@@ -75,24 +88,12 @@ def _generate_and_upload_dataset_card(
7588
2. Full sanitized configuration for reproducibility
7689
3. Datafast version and other metadata
7790
4. Preserved dataset_info from the existing card for proper configuration display
78-
79-
Args:
80-
template_path: Optional custom template path
8191
"""
8292

8393
try:
84-
# Load template
85-
if not template_path:
86-
# Try to find template in utils directory
87-
current_dir = os.path.dirname(__file__)
88-
template_path = os.path.join(current_dir, "datafast_card_template.md")
89-
90-
if not os.path.exists(template_path):
91-
print(f"Template file not found: {template_path}")
92-
return
93-
94-
with open(template_path, "r", encoding="utf-8") as f:
95-
template_str = f.read()
94+
# Use the built-in template string
95+
template_str = DEFAULT_CARD_TEMPLATE
96+
print(f"Using built-in template, length: {len(template_str)} characters")
9697

9798
# Get HF token
9899
if not token:
@@ -152,7 +153,7 @@ def _generate_and_upload_dataset_card(
152153
print("Full traceback:")
153154

154155

155-
def upload_dataset_card(repo_id: str, token: str | None = None, template_path: str | None = None) -> None:
156+
def upload_dataset_card(repo_id: str, token: str | None = None) -> None:
156157
"""
157158
Public interface to generate and upload a dataset card to Hugging Face Hub.
158159
@@ -163,15 +164,13 @@ def upload_dataset_card(repo_id: str, token: str | None = None, template_path: s
163164
Args:
164165
repo_id: The ID of the repository to push to
165166
token: The token to authenticate with the Hugging Face Hub
166-
template_path: Optional custom template path
167167
"""
168168
try:
169169

170170
print(f"Uploading dataset card to repository: {repo_id}")
171171
_generate_and_upload_dataset_card(
172172
repo_id=repo_id,
173-
token=token,
174-
template_path=template_path
173+
token=token
175174
)
176175

177176
except Exception as e:

datafast/datafast_card_template.md

Lines changed: 0 additions & 11 deletions
This file was deleted.

0 commit comments

Comments
 (0)