diff --git a/splitgill/manager.py b/splitgill/manager.py index cfda998..ccfb5be 100644 --- a/splitgill/manager.py +++ b/splitgill/manager.py @@ -764,14 +764,29 @@ def get_field_names(self) -> List[str]: """ latest_index = Index(self.indices.latest, using=self._client.elasticsearch) mapping = latest_index.get_mapping() - parsed_fields = [] - for field_path, field_props in get_in( + parsed_fields = {} + top_level = get_in( [self.indices.latest, 'mappings', 'properties', 'data', 'properties'], mapping.body, default={}, - ).items(): - parsed_field = ParsedField(field_path) - for type_name in field_props['properties'].keys(): - parsed_field.add(type_name, 1) - parsed_fields.append(parsed_field) - return parsed_fields + ) + + def _extract_fields(field_name, field_props, parents=None): + parents = [p for p in (parents or []) if p is not None] + if 'properties' in field_props: + _extract_fields(field_name, field_props['properties'], parents) + else: + for k, v in field_props.items(): + if k.startswith('_') and 'type' in v: + field_path = '.'.join(parents + [field_name]) + parsed_field = parsed_fields.get( + field_path, ParsedField(field_path) + ) + parsed_field.add(k, 1) + parsed_fields[field_path] = parsed_field + else: + _extract_fields(k, v['properties'], parents + [field_name]) + + _extract_fields(None, top_level) + + return list(parsed_fields.values()) diff --git a/tests/test_manager.py b/tests/test_manager.py index 2357486..743b7f2 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -1485,19 +1485,21 @@ def test_get_field_names(splitgill: SplitgillClient): Record.new({'a': 2}), Record.new({'b': 3}), Record.new({'b': 'x'}), - Record.new({'b': 5}), + Record.new({'b': {'d': 1}, 'x': {'y': {'z': 1}}}), Record.new({'c': 'y'}), ] database.ingest(records, commit=True) database.sync() field_names = database.get_field_names() - assert len(field_names) == 4 + assert len(field_names) == 6 expected_fields = [ pf('_id', 3, t=1), pf('a', 4, t=1, n=1), pf('b', 4, t=1, n=1), + pf('b.d', 4, t=1, n=1), pf('c', 3, t=1), + pf('x.y.z', 4, t=1, n=1), ] for f in expected_fields: f.type_counts[ParsedType.UNPARSED] = 1