|
8 | 8 | import pandas as pd |
9 | 9 |
|
10 | 10 | import data_designer.config as dd |
11 | | -from data_designer.config.seed import IndexRange |
12 | 11 | from data_designer.interface import DataDesigner |
13 | 12 | from data_designer_e2e_tests.plugins.column_generator.config import DemoColumnGeneratorConfig |
14 | 13 | from data_designer_e2e_tests.plugins.filesystem_seed_reader.config import DemoFileSystemSeedSource |
15 | | -from data_designer_e2e_tests.plugins.markdown_seed_reader.config import MarkdownSectionSeedSource |
16 | 14 | from data_designer_e2e_tests.plugins.regex_filter.config import RegexFilterProcessorConfig |
17 | 15 | from data_designer_e2e_tests.plugins.seed_reader.config import DemoSeedSource |
18 | 16 |
|
@@ -107,44 +105,6 @@ def test_filesystem_seed_reader_plugin(tmp_path: Path) -> None: |
107 | 105 | } |
108 | 106 |
|
109 | 107 |
|
110 | | -def test_markdown_section_seed_reader_plugin_fanout_respects_manifest_selection(tmp_path: Path) -> None: |
111 | | - seed_dir = tmp_path / "markdown-seed" |
112 | | - seed_dir.mkdir() |
113 | | - (seed_dir / "alpha.md").write_text( |
114 | | - "# Alpha Overview\nAlpha intro.\n\n## Alpha Details\nAlpha details.", |
115 | | - encoding="utf-8", |
116 | | - ) |
117 | | - (seed_dir / "beta.md").write_text( |
118 | | - "# Beta Overview\nBeta intro.\n\n## Beta Details\nBeta details.", |
119 | | - encoding="utf-8", |
120 | | - ) |
121 | | - |
122 | | - data_designer = DataDesigner() |
123 | | - |
124 | | - config_builder = dd.DataDesignerConfigBuilder() |
125 | | - config_builder.with_seed_dataset( |
126 | | - MarkdownSectionSeedSource(path=str(seed_dir)), |
127 | | - selection_strategy=IndexRange(start=1, end=1), |
128 | | - ) |
129 | | - config_builder.add_column( |
130 | | - dd.ExpressionColumnConfig( |
131 | | - name="section_summary", |
132 | | - expr="{{ file_name }} :: {{ section_header }}", |
133 | | - ) |
134 | | - ) |
135 | | - |
136 | | - preview = data_designer.preview(config_builder, num_records=2) |
137 | | - dataset = preview.dataset.sort_values("section_index").reset_index(drop=True) |
138 | | - |
139 | | - assert list(dataset["relative_path"]) == ["beta.md", "beta.md"] |
140 | | - assert list(dataset["section_header"]) == ["Beta Overview", "Beta Details"] |
141 | | - assert list(dataset["section_content"]) == ["Beta intro.", "Beta details."] |
142 | | - assert list(dataset["section_summary"]) == [ |
143 | | - "beta.md :: Beta Overview", |
144 | | - "beta.md :: Beta Details", |
145 | | - ] |
146 | | - |
147 | | - |
148 | 108 | def test_processor_plugin() -> None: |
149 | 109 | seed_data = pd.DataFrame( |
150 | 110 | { |
|
0 commit comments