Skip to content

Commit 7237474

Browse files
authored
VER: Release 0.34.0
See release notes.
2 parents 290b82e + 2aaef36 commit 7237474

File tree

11 files changed

+309
-106
lines changed

11 files changed

+309
-106
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# Changelog
22

3+
## 0.34.0 - 2024-05-14
4+
5+
#### Enhancements
6+
- Added `pip-system-certs` dependency for Windows platforms to prevent a connection issue in `requests` when behind a proxy
7+
- Iteration of the `Live` client will now automatically call `Live.stop` when the iterator is destroyed, such as when a for loop is escaped with an exception or `break` statement.
8+
9+
#### Bug fixes
10+
- Fixed an issue where `batch.download` and `batch.download_async` would fail if requested files already existed in the output directory
11+
- Fixed an issue where `batch.download`, `batch.download_async`, and `timeseries.get_range` could use a lot of memory while streaming data
12+
- Fixed an issue where reusing a `Live` client with an open output stream would drop DBN records when received at the same time as the `Metadata` header
13+
314
## 0.33.0 - 2024-04-16
415

516
#### Enhancements

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ The minimum dependencies as found in the `pyproject.toml` are also listed below:
3535
- databento-dbn = "0.17.1"
3636
- numpy= ">=1.23.5"
3737
- pandas = ">=1.5.3"
38+
- pip-system-certs = ">=4.0" (Windows only)
3839
- pyarrow = ">=13.0.0"
3940
- requests = ">=2.24.0"
4041
- zstandard = ">=0.21.0"

databento/common/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
x[0]: np.iinfo(x[1]).max for x in InstrumentDefMsg._dtypes if not isinstance(x[1], str)
2626
}
2727

28+
HTTP_STREAMING_READ_SIZE: Final = 2**12
29+
2830
SCHEMA_STRUCT_MAP: Final[dict[Schema, type[DBNRecord]]] = {
2931
Schema.DEFINITION: InstrumentDefMsg,
3032
Schema.IMBALANCE: ImbalanceMsg,

databento/common/enums.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ class RecordFlags(StringyMixin, IntFlag): # type: ignore
198198
Represents record flags.
199199
200200
F_LAST
201-
Last message in the packet from the venue for a given `instrument_id`.
201+
Marks the last record in a single event for a given `instrument_id`.
202202
F_TOB
203203
Indicates a top-of-book message, not an individual order.
204204
F_SNAPSHOT

databento/historical/api/batch.py

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from databento_dbn import SType
2525
from requests.auth import HTTPBasicAuth
2626

27+
from databento.common.constants import HTTP_STREAMING_READ_SIZE
2728
from databento.common.enums import Delivery
2829
from databento.common.enums import Packaging
2930
from databento.common.enums import SplitDuration
@@ -394,8 +395,16 @@ def _download_batch_file(
394395
headers: dict[str, str] = self._headers.copy()
395396
if output_path.exists():
396397
existing_size = output_path.stat().st_size
397-
headers["Range"] = f"bytes={existing_size}-{batch_download_file.size - 1}"
398-
mode = "ab"
398+
if existing_size < batch_download_file.size:
399+
headers["Range"] = f"bytes={existing_size}-{batch_download_file.size - 1}"
400+
mode = "ab"
401+
elif existing_size == batch_download_file.size:
402+
# File exists and is complete
403+
break
404+
else:
405+
raise FileExistsError(
406+
f"Batch file {output_path.name} already exists and has a larger than expected size.",
407+
)
399408
else:
400409
mode = "wb"
401410
try:
@@ -408,7 +417,7 @@ def _download_batch_file(
408417
) as response:
409418
check_http_error(response)
410419
with open(output_path, mode=mode) as f:
411-
for chunk in response.iter_content(chunk_size=None):
420+
for chunk in response.iter_content(chunk_size=HTTP_STREAMING_READ_SIZE):
412421
f.write(chunk)
413422
except BentoHttpError as exc:
414423
if exc.http_status == 429:
@@ -424,24 +433,26 @@ def _download_batch_file(
424433
attempts += 1
425434
continue # try again
426435
raise BentoError(f"Error downloading file: {exc}") from None
436+
else:
437+
break
427438

428-
logger.debug("Download of %s completed", output_path.name)
429-
hash_algo, _, hash_hex = batch_download_file.hash_str.partition(":")
439+
logger.debug("Download of %s completed", output_path.name)
440+
hash_algo, _, hash_hex = batch_download_file.hash_str.partition(":")
430441

431-
if hash_algo == "sha256":
432-
output_hash = hashlib.sha256(output_path.read_bytes())
433-
if output_hash.hexdigest() != hash_hex:
434-
warn_msg = f"Downloaded file failed checksum validation: {output_path.name}"
435-
logger.warning(warn_msg)
436-
warnings.warn(warn_msg, category=BentoWarning)
437-
else:
438-
logger.warning(
439-
"Skipping %s checksum because %s is not supported",
440-
output_path.name,
441-
hash_algo,
442-
)
442+
if hash_algo == "sha256":
443+
output_hash = hashlib.sha256(output_path.read_bytes())
444+
if output_hash.hexdigest() != hash_hex:
445+
warn_msg = f"Downloaded file failed checksum validation: {output_path.name}"
446+
logger.warning(warn_msg)
447+
warnings.warn(warn_msg, category=BentoWarning)
448+
else:
449+
logger.warning(
450+
"Skipping %s checksum because %s is not supported",
451+
output_path.name,
452+
hash_algo,
453+
)
443454

444-
return output_path
455+
return output_path
445456

446457

447458
@dataclass

databento/historical/http.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from os import PathLike
99
from typing import IO
1010
from typing import Any
11+
from typing import Final
1112

1213
import aiohttp
1314
import requests
@@ -16,6 +17,7 @@
1617
from requests import Response
1718
from requests.auth import HTTPBasicAuth
1819

20+
from databento.common.constants import HTTP_STREAMING_READ_SIZE
1921
from databento.common.dbnstore import DBNStore
2022
from databento.common.error import BentoClientError
2123
from databento.common.error import BentoDeprecationWarning
@@ -25,7 +27,7 @@
2527
from databento.common.system import USER_AGENT
2628

2729

28-
WARNING_HEADER_FIELD: str = "X-Warning"
30+
WARNING_HEADER_FIELD: Final = "X-Warning"
2931

3032

3133
class BentoHttpAPI:
@@ -137,7 +139,7 @@ def _stream(
137139
writer = open(path, "x+b")
138140

139141
try:
140-
for chunk in response.iter_content(chunk_size=None):
142+
for chunk in response.iter_content(chunk_size=HTTP_STREAMING_READ_SIZE):
141143
writer.write(chunk)
142144
except Exception as exc:
143145
raise BentoError(f"Error streaming response: {exc}") from None

databento/live/client.py

Lines changed: 93 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -115,78 +115,17 @@ def factory() -> _SessionProtocol:
115115
if not Live._thread.is_alive():
116116
Live._thread.start()
117117

118-
def __aiter__(self) -> Live:
118+
def __aiter__(self) -> LiveIterator:
119119
return iter(self)
120120

121-
async def __anext__(self) -> DBNRecord:
122-
if not self._dbn_queue.is_enabled():
123-
raise ValueError("iteration has not started")
124-
125-
loop = asyncio.get_running_loop()
126-
127-
try:
128-
return self._dbn_queue.get_nowait()
129-
except queue.Empty:
130-
while True:
131-
try:
132-
return await loop.run_in_executor(
133-
None,
134-
self._dbn_queue.get,
135-
True,
136-
0.1,
137-
)
138-
except queue.Empty:
139-
if self._session.is_disconnected():
140-
break
141-
finally:
142-
if not self._dbn_queue.is_full() and not self._session.is_reading():
143-
logger.debug(
144-
"resuming reading with %d pending records",
145-
self._dbn_queue.qsize(),
146-
)
147-
self._session.resume_reading()
148-
149-
self._dbn_queue.disable()
150-
await self.wait_for_close()
151-
logger.debug("completed async iteration")
152-
raise StopAsyncIteration
153-
154-
def __iter__(self) -> Live:
121+
def __iter__(self) -> LiveIterator:
155122
logger.debug("starting iteration")
156123
if self._session.is_started():
157124
logger.error("iteration started after session has started")
158125
raise ValueError(
159126
"Cannot start iteration after streaming has started, records may be missed. Don't call `Live.start` before iterating.",
160127
)
161-
elif self.is_connected():
162-
self.start()
163-
self._dbn_queue._enabled.set()
164-
return self
165-
166-
def __next__(self) -> DBNRecord:
167-
if not self._dbn_queue.is_enabled():
168-
raise ValueError("iteration has not started")
169-
170-
while True:
171-
try:
172-
record = self._dbn_queue.get(timeout=0.1)
173-
except queue.Empty:
174-
if self._session.is_disconnected():
175-
break
176-
else:
177-
return record
178-
finally:
179-
if not self._dbn_queue.is_full() and not self._session.is_reading():
180-
logger.debug(
181-
"resuming reading with %d pending records",
182-
self._dbn_queue.qsize(),
183-
)
184-
self._session.resume_reading()
185-
186-
self._dbn_queue.disable()
187-
self.block_for_close()
188-
logger.debug("completed iteration")
189-
raise StopIteration
128+
return LiveIterator(self)
190129

191130
def __repr__(self) -> str:
192131
name = self.__class__.__name__
@@ -661,3 +600,93 @@ def _map_symbol(self, record: DBNRecord) -> None:
661600
instrument_id = record.instrument_id
662601
self._symbology_map[instrument_id] = record.stype_out_symbol
663602
logger.info("added symbology mapping %s to %d", out_symbol, instrument_id)
603+
604+
605+
class LiveIterator:
606+
"""
607+
Iterator class for the `Live` client. Automatically starts the client when
608+
created and will stop it when destroyed. This provides context-manager-like
609+
behavior to for loops.
610+
611+
Parameters
612+
----------
613+
client : Live
614+
The Live client that spawned this LiveIterator.
615+
616+
"""
617+
618+
def __init__(self, client: Live):
619+
client._dbn_queue._enabled.set()
620+
client.start()
621+
self._client = client
622+
623+
@property
624+
def client(self) -> Live:
625+
return self._client
626+
627+
def __iter__(self) -> LiveIterator:
628+
return self
629+
630+
def __del__(self) -> None:
631+
if self.client.is_connected():
632+
self.client.stop()
633+
self.client.block_for_close()
634+
logger.debug("iteration aborted")
635+
636+
async def __anext__(self) -> DBNRecord:
637+
if not self.client._dbn_queue.is_enabled():
638+
raise ValueError("iteration has not started")
639+
640+
loop = asyncio.get_running_loop()
641+
642+
try:
643+
return self.client._dbn_queue.get_nowait()
644+
except queue.Empty:
645+
while True:
646+
try:
647+
return await loop.run_in_executor(
648+
None,
649+
self.client._dbn_queue.get,
650+
True,
651+
0.1,
652+
)
653+
except queue.Empty:
654+
if self.client._session.is_disconnected():
655+
break
656+
finally:
657+
if not self.client._dbn_queue.is_full() and not self.client._session.is_reading():
658+
logger.debug(
659+
"resuming reading with %d pending records",
660+
self.client._dbn_queue.qsize(),
661+
)
662+
self.client._session.resume_reading()
663+
664+
self.client._dbn_queue.disable()
665+
await self.client.wait_for_close()
666+
logger.debug("async iteration completed")
667+
raise StopAsyncIteration
668+
669+
def __next__(self) -> DBNRecord:
670+
if not self.client._dbn_queue.is_enabled():
671+
raise ValueError("iteration has not started")
672+
673+
while True:
674+
try:
675+
record = self.client._dbn_queue.get(timeout=0.1)
676+
except queue.Empty:
677+
if self.client._session.is_disconnected():
678+
break
679+
else:
680+
return record
681+
finally:
682+
if not self.client._dbn_queue.is_full() and not self.client._session.is_reading():
683+
logger.debug(
684+
"resuming reading with %d pending records",
685+
self.client._dbn_queue.qsize(),
686+
)
687+
self.client._session.resume_reading()
688+
689+
self.client._dbn_queue.disable()
690+
self.client.block_for_close()
691+
logger.debug("iteration completed")
692+
raise StopIteration

databento/live/session.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import Final
1212

1313
import databento_dbn
14+
from databento_dbn import Metadata
1415
from databento_dbn import Schema
1516
from databento_dbn import SType
1617

@@ -195,28 +196,29 @@ def __init__(
195196
self._user_streams = user_streams
196197

197198
def _process_dbn(self, data: bytes) -> None:
198-
# Do no re-write the metadata to the stream to avoid corruption
199-
if not self._metadata or not data.startswith(b"DBN"):
200-
for stream, exc_callback in self._user_streams.items():
201-
try:
202-
stream.write(data)
203-
except Exception as exc:
204-
stream_name = getattr(stream, "name", str(stream))
205-
logger.error(
206-
"error writing %d bytes to `%s` stream",
207-
len(data),
208-
stream_name,
209-
exc_info=exc,
210-
)
211-
if exc_callback is not None:
212-
exc_callback(exc)
199+
start_index = 0
200+
if data.startswith(b"DBN") and self._metadata:
201+
# We have already received metata for the stream
202+
# Set start index to metadata length
203+
start_index = int.from_bytes(data[4:8], byteorder="little") + 8
204+
self._metadata.check(Metadata.decode(bytes(data[:start_index])))
205+
for stream, exc_callback in self._user_streams.items():
206+
try:
207+
stream.write(data[start_index:])
208+
except Exception as exc:
209+
stream_name = getattr(stream, "name", str(stream))
210+
logger.error(
211+
"error writing %d bytes to `%s` stream",
212+
len(data[start_index:]),
213+
stream_name,
214+
exc_info=exc,
215+
)
216+
if exc_callback is not None:
217+
exc_callback(exc)
213218
return super()._process_dbn(data)
214219

215220
def received_metadata(self, metadata: databento_dbn.Metadata) -> None:
216-
if not self._metadata:
217-
self._metadata.data = metadata
218-
else:
219-
self._metadata.check(metadata)
221+
self._metadata.data = metadata
220222
return super().received_metadata(metadata)
221223

222224
def received_record(self, record: DBNRecord) -> None:

databento/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.33.0"
1+
__version__ = "0.34.0"

0 commit comments

Comments
 (0)