44from huggingface_hub import HfApi , DatasetCard , DatasetCardData
55from huggingface_hub .file_download import hf_hub_download
66
7+ # Default template string embedded directly in code
8+ # This eliminates the need to read from a file
9+ DEFAULT_CARD_TEMPLATE = """---
10+ {{ card_data }}
11+ {{ config_data }}
12+ ---
13+ [<img src="https://raw.githubusercontent.com/patrickfleith/datafast/main/assets/datafast-badge-web.png"
14+ alt="Built with Datafast" />](https://github.com/patrickfleith/datafast)
15+
16+ # {{ pretty_name }}
17+
18+ This dataset was generated using Datafast (v{{ datafast_version }}), an open-source package to generate high-quality and diverse synthetic text datasets for LLMs.
19+ """
20+
721def extract_readme_metadata (repo_id : str , token : str | None = None ) -> str :
822 """Extracts the metadata from the README.md file of the dataset repository.
923 We have to download the previous README.md file in the repo, extract the metadata from it.
@@ -61,8 +75,7 @@ def extract_dataset_info(repo_id: str, token: str | None = None) -> str:
6175
6276def _generate_and_upload_dataset_card (
6377 repo_id : str ,
64- token : str | None = None ,
65- template_path : str | None = None
78+ token : str | None = None
6679) -> None :
6780 """
6881 Internal implementation that generates and uploads a dataset card to Hugging Face Hub.
@@ -75,24 +88,12 @@ def _generate_and_upload_dataset_card(
7588 2. Full sanitized configuration for reproducibility
7689 3. Datafast version and other metadata
7790 4. Preserved dataset_info from the existing card for proper configuration display
78-
79- Args:
80- template_path: Optional custom template path
8191 """
8292
8393 try :
84- # Load template
85- if not template_path :
86- # Try to find template in utils directory
87- current_dir = os .path .dirname (__file__ )
88- template_path = os .path .join (current_dir , "datafast_card_template.md" )
89-
90- if not os .path .exists (template_path ):
91- print (f"Template file not found: { template_path } " )
92- return
93-
94- with open (template_path , "r" , encoding = "utf-8" ) as f :
95- template_str = f .read ()
94+ # Use the built-in template string
95+ template_str = DEFAULT_CARD_TEMPLATE
96+ print (f"Using built-in template, length: { len (template_str )} characters" )
9697
9798 # Get HF token
9899 if not token :
@@ -152,7 +153,7 @@ def _generate_and_upload_dataset_card(
152153 print ("Full traceback:" )
153154
154155
155- def upload_dataset_card (repo_id : str , token : str | None = None , template_path : str | None = None ) -> None :
156+ def upload_dataset_card (repo_id : str , token : str | None = None ) -> None :
156157 """
157158 Public interface to generate and upload a dataset card to Hugging Face Hub.
158159
@@ -163,15 +164,13 @@ def upload_dataset_card(repo_id: str, token: str | None = None, template_path: s
163164 Args:
164165 repo_id: The ID of the repository to push to
165166 token: The token to authenticate with the Hugging Face Hub
166- template_path: Optional custom template path
167167 """
168168 try :
169169
170170 print (f"Uploading dataset card to repository: { repo_id } " )
171171 _generate_and_upload_dataset_card (
172172 repo_id = repo_id ,
173- token = token ,
174- template_path = template_path
173+ token = token
175174 )
176175
177176 except Exception as e :
0 commit comments