Skip to content

Downloading larger dataset causes memory issues #18

@tk-king

Description

@tk-king

dataset-store | File "/usr/local/lib/python3.10/site-packages/uvicorn/protocols/http/h11_impl.py", line 408, in run_asgi
dataset-store | result = await app( # type: ignore[func-returns-value]
dataset-store | File "/usr/local/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 84, in call
dataset-store | return await self.app(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/fastapi/applications.py", line 289, in call
dataset-store | await super().call(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/applications.py", line 122, in call
dataset-store | await self.middleware_stack(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/errors.py", line 184, in call
dataset-store | raise exc
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/errors.py", line 162, in call
dataset-store | await self.app(scope, receive, _send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/gzip.py", line 24, in call
dataset-store | await responder(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/gzip.py", line 44, in call
dataset-store | await self.app(scope, receive, self.send_with_gzip)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/cors.py", line 91, in call
dataset-store | await self.simple_response(scope, receive, send, request_headers=headers)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/cors.py", line 146, in simple_response
dataset-store | await self.app(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 79, in call
dataset-store | raise exc
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 68, in call
dataset-store | await self.app(scope, receive, sender)
dataset-store | File "/usr/local/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py", line 20, in call
dataset-store | raise e
dataset-store | File "/usr/local/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py", line 17, in call
dataset-store | await self.app(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/routing.py", line 718, in call
dataset-store | await route.handle(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/routing.py", line 276, in handle
dataset-store | await self.app(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/routing.py", line 69, in app
dataset-store | await response(scope, receive, send)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/responses.py", line 174, in call
dataset-store | await self.background()
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/background.py", line 43, in call
dataset-store | await task()
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/background.py", line 28, in call
dataset-store | await run_in_threadpool(self.func, *self.args, **self.kwargs)
dataset-store | File "/usr/local/lib/python3.10/site-packages/starlette/concurrency.py", line 41, in run_in_threadpool
dataset-store | return await anyio.to_thread.run_sync(func, *args)
dataset-store | File "/usr/local/lib/python3.10/site-packages/anyio/to_thread.py", line 33, in run_sync
dataset-store | return await get_async_backend().run_sync_in_worker_thread(
dataset-store | File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2106, in run_sync_in_worker_thread
dataset-store | return await future
dataset-store | File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 833, in run
dataset-store | result = context.run(func, *args)
dataset-store | File "/app/app/app/controller/downloadController.py", line 85, in downloadDataset
dataset-store | fileCSV, fileName = ctrl.getCSV(project, dataset)
dataset-store | File "/app/app/app/controller/dataset_controller.py", line 435, in getCSV
dataset-store | final_df = pd.merge(final_df, df, how="outer", on='time')
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/reshape/merge.py", line 162, in merge
dataset-store | return op.get_result(copy=copy)
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/reshape/merge.py", line 811, in get_result
dataset-store | result = self._reindex_and_concat(
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/reshape/merge.py", line 802, in _reindex_and_concat
dataset-store | result = concat([left, right], axis=1, copy=copy)
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/reshape/concat.py", line 385, in concat
dataset-store | return op.get_result()
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/reshape/concat.py", line 616, in get_result
dataset-store | new_data = concatenate_managers(
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/internals/concat.py", line 204, in concatenate_managers
dataset-store | return _concat_managers_axis0(mgrs_indexers, axes, copy)
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/internals/concat.py", line 279, in _concat_managers_axis0
dataset-store | nb = blk.copy()
dataset-store | File "/usr/local/lib/python3.10/site-packages/pandas/core/internals/blocks.py", line 540, in copy
dataset-store | values = values.copy()
dataset-store | numpy.core._exceptions._ArrayMemoryError: Unable to allocate 392. MiB for an array with shape (1, 102732298) and data type float32
dataset-store | INFO: 10.1.7.2:38596 - "DELETE /ds/download/877acbf2ec9d0a07303b114a68f9144b6e0cf844cc6bcff HTTP/1.1" 200 OK
dataset-store | INFO: 10.1.7.2:38598 - "POST /ds/download/dataset/643b1324fe391b8ee31d6ef5 HTTP/1.1" 200 OK
dataset-store | INFO: 10.1.7.2:43166 - "GET /ds/download/status/ HTTP/1.1" 200 OK

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions