diff --git a/sdks/python/apache_beam/io/parquetio_test.py b/sdks/python/apache_beam/io/parquetio_test.py index 78d1db4cc7c2..c7bf687f12cd 100644 --- a/sdks/python/apache_beam/io/parquetio_test.py +++ b/sdks/python/apache_beam/io/parquetio_test.py @@ -564,7 +564,7 @@ def test_dynamic_work_rebalancing(self): # count to row_group_size also being sufficiently large (but the required # ratio to pass varies for values of row_group_size and, somehow, the # version of pyarrow being tested against.) - file_name = self._write_data(count=280, row_group_size=20) + file_name = self._write_data(count=320, row_group_size=20) source = _create_parquet_source(file_name) splits = [split for split in source.split(desired_bundle_size=float('inf'))] diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 0e27d99deb3b..3b195cdbc087 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -145,7 +145,7 @@ def cythonize(*args, **kwargs): pyarrow_dependency = [''] else: pyarrow_dependency = [ - 'pyarrow>=3.0.0,<19.0.0', + 'pyarrow>=6.0.1,<24.0.0', # NOTE(https://github.com/apache/beam/issues/29392): We can remove this # once Beam increases the pyarrow lower bound to a version that fixes CVE. # (lower bound >= 14.0.1) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 093c5212e607..a86111a45c63 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -343,26 +343,23 @@ extras = test commands = bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/prism_runner_test.py {posargs} -[testenv:py{310,311}-pyarrow-{3,9,10,11,12,13,14,15,16,17,18}] +[testenv:py{310,311}-pyarrow-{6,15,16,17,18,19,20,21,22,23}] deps = # As a courtesy to users, test against the oldest allowed version of Pyarrow. # We'd have to increase the pyarrow lower bound when Python 3.9 is deprecated. # Since Pandas 2 requires pyarrow>=7, downgrade pandas for this test. - 3: pyarrow>=3,<4 - 3: pandas<2 - 3: numpy>=1.14.3,<1.27.0 + 6: pyarrow>=6,<7 + 6: pandas<2 # Test against versions of pyarrow released in last ~2 years. - 9: pyarrow>=9,<10 - 9: pandas==2.1.4 - 10: pyarrow>=10,<11 - 11: pyarrow>=11,<12 - 12: pyarrow>=12,<13 - 13: pyarrow>=13,<14 - 14: pyarrow>=14,<15 15: pyarrow>=15,<16 16: pyarrow>=16,<17 17: pyarrow>=17,<18 18: pyarrow>=18,<19 + 19: pyarrow>=19,<20 + 20: pyarrow>=20,<21 + 21: pyarrow>=21,<22 + 22: pyarrow>=22,<23 + 23: pyarrow>=23,<24 numpy==1.26.4 commands = # Log pyarrow and numpy version for debugging