@@ -183,76 +183,163 @@ def _create_answer_entry(self, first_ann: TemporalAnnotation, frames: List[Dict[
183183
184184class HierarchyBuilder (Generic [TemporalAnnotation ]):
185185 """Builds hierarchical nested classifications from temporal annotations."""
186-
186+
187187 def __init__ (self , group_manager : AnnotationGroupManager [TemporalAnnotation ], value_grouper : ValueGrouper [TemporalAnnotation ]):
188188 self .group_manager = group_manager
189189 self .value_grouper = value_grouper
190-
190+ self .parent_assignments = self ._compute_parent_assignments ()
191+
192+ def _compute_parent_assignments (self ) -> Dict [str , str ]:
193+ """
194+ Compute best parent assignment for each group based on temporal containment and hierarchy depth.
195+ Returns mapping of child_group_key -> parent_group_key.
196+ """
197+ assignments = {}
198+ assignment_depth = {} # Track depth of each assignment (0 = root)
199+
200+ # Assign depth 0 to roots
201+ for root_key in self .group_manager .root_groups :
202+ assignment_depth [root_key ] = 0
203+
204+ # Build assignments level by level
205+ remaining_groups = set (self .group_manager .groups .keys ()) - self .group_manager .root_groups
206+
207+ max_iterations = len (remaining_groups ) + 1 # Prevent infinite loops
208+ iteration = 0
209+
210+ while remaining_groups and iteration < max_iterations :
211+ iteration += 1
212+ assigned_this_round = set ()
213+
214+ for child_key in remaining_groups :
215+ child_anns = self .group_manager .groups [child_key ]
216+
217+ # Find all potential parents (groups that contain this child's annotations)
218+ potential_parents = []
219+
220+ for parent_key , parent_anns in self .group_manager .groups .items ():
221+ if parent_key == child_key :
222+ continue
223+
224+ # Check if all child annotations are contained by at least one parent annotation
225+ all_contained = True
226+ for child_ann in child_anns :
227+ child_start , child_end = self .group_manager .frame_extractor (child_ann )
228+ child_frame = TemporalFrame (child_start , child_end )
229+
230+ contained_by_parent = False
231+ for parent_ann in parent_anns :
232+ parent_start , parent_end = self .group_manager .frame_extractor (parent_ann )
233+ parent_frame = TemporalFrame (parent_start , parent_end )
234+ if parent_frame .contains (child_frame ):
235+ contained_by_parent = True
236+ break
237+
238+ if not contained_by_parent :
239+ all_contained = False
240+ break
241+
242+ if all_contained :
243+ # Calculate average container size for this parent
244+ avg_size = sum ((self .group_manager .frame_extractor (ann )[1 ] - self .group_manager .frame_extractor (ann )[0 ])
245+ for ann in parent_anns ) / len (parent_anns )
246+
247+ # Get depth of this parent (lower depth = closer to root = prefer)
248+ parent_depth = assignment_depth .get (parent_key , 999 )
249+
250+ # Name similarity heuristic: if child name contains parent name as prefix/substring,
251+ # it's likely related (e.g., "sub_radio_question_2" contains "sub_radio_question")
252+ name_similarity = 1 if parent_key in child_key else 0
253+
254+ potential_parents .append ((parent_key , avg_size , parent_depth , name_similarity ))
255+
256+ # Choose best parent: prefer name similarity, then higher depth, then smallest size
257+ if potential_parents :
258+ # Sort by: 1) prefer name similarity, 2) prefer higher depth, 3) smallest size
259+ potential_parents .sort (key = lambda x : (- x [3 ], - x [2 ], x [1 ]))
260+ best_parent = potential_parents [0 ][0 ]
261+ assignments [child_key ] = best_parent
262+ assignment_depth [child_key ] = assignment_depth .get (best_parent , 0 ) + 1
263+ assigned_this_round .add (child_key )
264+
265+ # Remove assigned groups from remaining
266+ remaining_groups -= assigned_this_round
267+
268+ # If no progress, break to avoid infinite loop
269+ if not assigned_this_round :
270+ break
271+
272+ return assignments
273+
191274 def build_hierarchy (self ) -> List [Dict [str , Any ]]:
192275 """Build the complete hierarchical structure."""
193276 results = []
194-
277+
195278 for group_key in self .group_manager .root_groups :
196279 group_anns = self .group_manager .groups [group_key ]
197280 top_entries = self .value_grouper .group_by_value (group_anns )
198-
281+
199282 # Attach nested classifications to each top-level entry
200283 for entry in top_entries :
201284 frames = [TemporalFrame (f ["start" ], f ["end" ]) for f in entry .get ("frames" , [])]
202285 nested = self ._build_nested_for_frames (frames , group_key )
203286 if nested :
204287 entry ["classifications" ] = nested
205-
288+
206289 results .append ({
207290 "name" : self .group_manager .get_group_display_name (group_key ),
208291 "answer" : top_entries ,
209292 })
210-
293+
211294 return results
212295
213- def _build_nested_for_frames (self , parent_frames : List [TemporalFrame ], exclude_group : str ) -> List [Dict [str , Any ]]:
296+ def _build_nested_for_frames (self , parent_frames : List [TemporalFrame ], parent_group_key : str ) -> List [Dict [str , Any ]]:
214297 """Recursively build nested classifications for specific parent frames."""
215298 nested = []
216-
299+
217300 # Get all annotations within parent frames
218- all_contained = self .group_manager .get_annotations_within_frames (parent_frames , exclude_group )
219-
301+ all_contained = self .group_manager .get_annotations_within_frames (parent_frames , parent_group_key )
302+
220303 # Group by classification type and process each group
221304 for group_key , group_anns in self .group_manager .groups .items ():
222- if group_key == exclude_group or group_key in self .group_manager .root_groups :
305+ if group_key == parent_group_key or group_key in self .group_manager .root_groups :
223306 continue
224-
307+
308+ # Only process groups that are assigned to this parent
309+ if self .parent_assignments .get (group_key ) != parent_group_key :
310+ continue
311+
225312 # Filter annotations that are contained by parent frames
226313 candidate_anns = []
227314 for ann in group_anns :
228315 start , end = self .group_manager .frame_extractor (ann )
229316 ann_frame = TemporalFrame (start , end )
230317 if any (frame .contains (ann_frame ) for frame in parent_frames ):
231318 candidate_anns .append (ann )
232-
319+
233320 if not candidate_anns :
234321 continue
235-
322+
236323 # Keep only immediate children (not strictly contained by other contained annotations)
237324 child_anns = self ._filter_immediate_children (candidate_anns , all_contained )
238325 if not child_anns :
239326 continue
240-
327+
241328 # Build this child classification block
242329 child_entries = self .value_grouper .group_by_value (child_anns )
243-
330+
244331 # Recursively attach further nested classifications
245332 for entry in child_entries :
246333 entry_frames = [TemporalFrame (f ["start" ], f ["end" ]) for f in entry .get ("frames" , [])]
247334 child_nested = self ._build_nested_for_frames (entry_frames , group_key )
248335 if child_nested :
249336 entry ["classifications" ] = child_nested
250-
337+
251338 nested .append ({
252339 "name" : self .group_manager .get_group_display_name (group_key ),
253340 "answer" : child_entries ,
254341 })
255-
342+
256343 return nested
257344
258345 def _filter_immediate_children (self , candidates : List [TemporalAnnotation ],
0 commit comments