|
2 | 2 | # Use of this source code is governed by a BSD-style |
3 | 3 | # license that can be found in the LICENSE file. |
4 | 4 |
|
| 5 | +import copy |
| 6 | +import concurrent.futures |
5 | 7 | from datetime import datetime |
6 | 8 | import logging |
7 | 9 | import re |
@@ -378,6 +380,9 @@ def process_http_error(ex): |
378 | 380 | # See `BigQuery Troubleshooting Errors |
379 | 381 | # <https://cloud.google.com/bigquery/troubleshooting-errors>`__ |
380 | 382 |
|
| 383 | + if "cancelled" in ex.message: |
| 384 | + raise QueryTimeout("Reason: {0}".format(ex)) |
| 385 | + |
381 | 386 | raise GenericGBQException("Reason: {0}".format(ex)) |
382 | 387 |
|
383 | 388 | def download_table( |
@@ -406,8 +411,41 @@ def download_table( |
406 | 411 | user_dtypes=dtypes, |
407 | 412 | ) |
408 | 413 |
|
| 414 | + def _wait_for_query_job(self, query_reply, timeout_ms): |
| 415 | + """Wait for query to complete, pausing occasionally to update progress. |
| 416 | +
|
| 417 | + Args: |
| 418 | + query_reply (QueryJob): |
| 419 | + A query job which has started. |
| 420 | +
|
| 421 | + timeout_ms (Optional[int]): |
| 422 | + How long to wait before cancelling the query. |
| 423 | + """ |
| 424 | + # Wait at most 10 seconds so we can show progress. |
| 425 | + # TODO(https://github.com/googleapis/python-bigquery-pandas/issues/327): |
| 426 | + # Include a tqdm progress bar here instead of a stream of log messages. |
| 427 | + timeout_sec = 10.0 |
| 428 | + if timeout_ms: |
| 429 | + timeout_sec = min(timeout_sec, timeout_ms / 1000.0) |
| 430 | + |
| 431 | + while query_reply.state != "DONE": |
| 432 | + self.log_elapsed_seconds(" Elapsed", "s. Waiting...") |
| 433 | + |
| 434 | + if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000: |
| 435 | + self.client.cancel_job( |
| 436 | + query_reply.job_id, location=query_reply.location |
| 437 | + ) |
| 438 | + raise QueryTimeout("Query timeout: {} ms".format(timeout_ms)) |
| 439 | + |
| 440 | + try: |
| 441 | + query_reply.result(timeout=timeout_sec) |
| 442 | + except concurrent.futures.TimeoutError: |
| 443 | + # Use our own timeout logic |
| 444 | + pass |
| 445 | + except self.http_error as ex: |
| 446 | + self.process_http_error(ex) |
| 447 | + |
409 | 448 | def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): |
410 | | - from concurrent.futures import TimeoutError |
411 | 449 | from google.auth.exceptions import RefreshError |
412 | 450 | from google.cloud import bigquery |
413 | 451 | import pandas |
@@ -449,28 +487,11 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): |
449 | 487 | job_id = query_reply.job_id |
450 | 488 | logger.debug("Job ID: %s" % job_id) |
451 | 489 |
|
452 | | - while query_reply.state != "DONE": |
453 | | - self.log_elapsed_seconds(" Elapsed", "s. Waiting...") |
454 | | - |
455 | | - timeout_ms = job_config.get("jobTimeoutMs") or job_config["query"].get( |
456 | | - "timeoutMs" |
457 | | - ) |
458 | | - timeout_ms = int(timeout_ms) if timeout_ms else None |
459 | | - if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000: |
460 | | - raise QueryTimeout("Query timeout: {} ms".format(timeout_ms)) |
461 | | - |
462 | | - timeout_sec = 1.0 |
463 | | - if timeout_ms: |
464 | | - # Wait at most 1 second so we can show progress bar |
465 | | - timeout_sec = min(1.0, timeout_ms / 1000.0) |
466 | | - |
467 | | - try: |
468 | | - query_reply.result(timeout=timeout_sec) |
469 | | - except TimeoutError: |
470 | | - # Use our own timeout logic |
471 | | - pass |
472 | | - except self.http_error as ex: |
473 | | - self.process_http_error(ex) |
| 490 | + timeout_ms = job_config.get("jobTimeoutMs") or job_config["query"].get( |
| 491 | + "timeoutMs" |
| 492 | + ) |
| 493 | + timeout_ms = int(timeout_ms) if timeout_ms else None |
| 494 | + self._wait_for_query_job(query_reply, timeout_ms) |
474 | 495 |
|
475 | 496 | if query_reply.cache_hit: |
476 | 497 | logger.debug("Query done.\nCache hit.\n") |
@@ -673,6 +694,28 @@ def _finalize_dtypes( |
673 | 694 | return df |
674 | 695 |
|
675 | 696 |
|
| 697 | +def _transform_read_gbq_configuration(configuration): |
| 698 | + """ |
| 699 | + For backwards-compatibility, convert any previously client-side only |
| 700 | + parameters such as timeoutMs to the property name expected by the REST API. |
| 701 | +
|
| 702 | + Makes a copy of configuration if changes are needed. |
| 703 | + """ |
| 704 | + |
| 705 | + if configuration is None: |
| 706 | + return None |
| 707 | + |
| 708 | + timeout_ms = configuration.get("query", {}).get("timeoutMs") |
| 709 | + if timeout_ms is not None: |
| 710 | + # Transform timeoutMs to an actual server-side configuration. |
| 711 | + # https://github.com/googleapis/python-bigquery-pandas/issues/479 |
| 712 | + configuration = copy.deepcopy(configuration) |
| 713 | + del configuration["query"]["timeoutMs"] |
| 714 | + configuration["jobTimeoutMs"] = timeout_ms |
| 715 | + |
| 716 | + return configuration |
| 717 | + |
| 718 | + |
676 | 719 | def read_gbq( |
677 | 720 | query_or_table, |
678 | 721 | project_id=None, |
@@ -847,6 +890,8 @@ def read_gbq( |
847 | 890 | if dialect not in ("legacy", "standard"): |
848 | 891 | raise ValueError("'{0}' is not valid for dialect".format(dialect)) |
849 | 892 |
|
| 893 | + configuration = _transform_read_gbq_configuration(configuration) |
| 894 | + |
850 | 895 | if configuration and "query" in configuration and "query" in configuration["query"]: |
851 | 896 | if query_or_table is not None: |
852 | 897 | raise ValueError( |
|
0 commit comments