1414
1515import pytest
1616from databend_udf .client import UDFClient
17+ from typing import List , Dict , Any
1718
18- from tests .integration .conftest import build_stage_mapping
19+ from tests .integration .conftest import build_stage_mapping , StageLocation
1920
2021
21- def _get_listing (running_server , memory_stage , max_files = 0 ):
22- client = UDFClient (host = "127.0.0.1" , port = running_server )
22+ def _get_listing (
23+ server_port : int , stage : StageLocation , pattern : str = None , max_files : int = 0
24+ ) -> List [Dict [str , Any ]]:
25+ client = UDFClient (host = "127.0.0.1" , port = server_port )
26+
27+ # ai_list_files(stage_location, pattern, max_files)
28+ # UDFClient.call_function accepts *args, not RecordBatch
2329 return client .call_function (
2430 "ai_list_files" ,
31+ pattern ,
2532 max_files ,
26- stage_locations = [build_stage_mapping (memory_stage , "stage_location" )],
33+ stage_locations = [build_stage_mapping (stage , "stage_location" )],
2734 )
2835
2936
@@ -43,10 +50,9 @@ def test_list_stage_files_content(running_server, memory_stage):
4350def test_list_stage_files_metadata (running_server , memory_stage ):
4451 rows = _get_listing (running_server , memory_stage )
4552 assert {row ["stage_name" ] for row in rows } == {memory_stage .stage_name }
46- # Check for fullpath instead of relative_path
47- # Memory stage fullpath might be just the path if no bucket/root
48- assert all ("fullpath" in row for row in rows )
49- assert all (row ["fullpath" ].endswith (row ["path" ]) for row in rows )
53+ # Memory stage uri might be just the path if no bucket/root
54+ assert all ("uri" in row for row in rows )
55+ assert all (row ["uri" ].endswith (row ["path" ]) for row in rows )
5056 # Check that last_modified key exists (value might be None for memory backend)
5157 assert all ("last_modified" in row for row in rows )
5258
@@ -55,22 +61,40 @@ def test_list_stage_files_schema(running_server, memory_stage):
5561 rows = _get_listing (running_server , memory_stage )
5662 for row in rows :
5763 assert "path" in row
58- assert "fullpath " in row
64+ assert "uri " in row
5965 assert "size" in row
6066 assert "last_modified" in row
6167 assert "etag" in row # May be None
6268 assert "content_type" in row # May be None
63-
64- # Verify order implicitly by checking keys list if needed,
69+
70+ # Verify order implicitly by checking keys list if needed,
6571 # but for now just existence is enough as dicts are ordered in Python 3.7+
6672 keys = list (row .keys ())
67- # Expected keys: stage_name, path, fullpath , size, last_modified, etag, content_type
73+ # Expected keys: stage_name, path, uri , size, last_modified, etag, content_type
6874 # Note: stage_name is added by _get_listing or the UDF logic, let's check the core ones
69- assert keys .index ("path" ) < keys .index ("fullpath " )
75+ assert keys .index ("path" ) < keys .index ("uri " )
7076 assert keys .index ("last_modified" ) < keys .index ("etag" )
7177
7278
7379def test_list_stage_files_truncation (running_server , memory_stage ):
7480 rows = _get_listing (running_server , memory_stage , max_files = 1 )
7581 assert len (rows ) == 1
7682 assert "last_modified" in rows [0 ]
83+
84+
85+ def test_list_stage_files_pattern (running_server , memory_stage ):
86+ # Test pattern matching - patterns match against full path (e.g., "data/file.pdf")
87+ rows = _get_listing (running_server , memory_stage , pattern = "data/*.pdf" )
88+ assert len (rows ) == 1
89+ assert rows [0 ]["path" ].endswith (".pdf" )
90+
91+ rows = _get_listing (running_server , memory_stage , pattern = "data/*.docx" )
92+ assert len (rows ) == 1
93+ assert rows [0 ]["path" ].endswith (".docx" )
94+
95+ rows = _get_listing (running_server , memory_stage , pattern = "data/subdir/*" )
96+ # Matches data/subdir/ and data/subdir/note.txt
97+ assert len (rows ) == 2
98+ paths = {r ["path" ] for r in rows }
99+ assert "data/subdir/note.txt" in paths
100+ assert "data/subdir/" in paths
0 commit comments