diff --git a/contents/job-wait.py b/contents/job-wait.py index bec2b09..bb07578 100644 --- a/contents/job-wait.py +++ b/contents/job-wait.py @@ -20,6 +20,9 @@ def wait(): + connection_max_time = 1800 # time in seconds + connection_max_time_bool = False + last_line_number = 0 try: name = environ.get("RD_CONFIG_NAME") namespace = environ.get("RD_CONFIG_NAMESPACE") @@ -31,18 +34,22 @@ def wait(): retries_count = 0 completed = False + while True: common.connect() - + connection_start_time = time.time() + #validate retries - if retries_count != 0: + if retries_count != 0 and not connection_max_time_bool: log.warning("An error occurred - retries: {0}".format(retries_count)) - retries_count = retries_count + 1 + + if not connection_max_time_bool: + retries_count = retries_count + 1 - if retries_count > retries: - log.error("Number of retries exceeded") - completed = True + if retries_count > retries: + log.error("Number of retries exceeded") + completed = True if show_log and not completed: log.debug("Searching for pod associated with job") @@ -69,47 +76,62 @@ def wait(): if ex.status == 200: break else: - log.info("waiting for log") + if not connection_max_time_bool: + log.info("waiting for log") time.sleep(15) if timeout and time.time() - start_time > timeout: # pragma: no cover raise TimeoutError + + if not connection_max_time_bool: + log.info("Fetching logs from pod: {0}".format(pod_name)) - log.info("Fetching logs from pod: {0}".format(pod_name)) - - if retries_count == 1: + if retries_count == 1 and not connection_max_time_bool: log.info("========================== job log start ==========================") + connection_max_time_bool = False + current_line_number = 0 w = watch.Watch() for line in w.stream(core_v1.read_namespaced_pod_log, name=pod_name, namespace=namespace): - log.info(line.encode('ascii', 'ignore')) - - #check status job - batch_v1 = client.BatchV1Api() - - api_response = batch_v1.read_namespaced_job( - name, - namespace, - pretty="True" - ) - log.debug(api_response) - - if api_response.status.conditions: - for condition in api_response.status.conditions: - if condition.type == "Failed": - completed = True - - if api_response.status.completion_time: - completed = True - - if completed: - if show_log: - log.info("=========================== job log end ===========================") - break - log.info("Waiting for job completion") - time.sleep(sleep) + if current_line_number > last_line_number or last_line_number == 0: + log.info(line.encode('ascii', 'ignore')) + last_line_number = current_line_number + + current_line_number += 1 + + connection_elapsed_time = time.time() - connection_start_time + if connection_elapsed_time >= connection_max_time: + connection_max_time_bool = True + break + + if not connection_max_time_bool: + #check status job + batch_v1 = client.BatchV1Api() + + api_response = batch_v1.read_namespaced_job( + name, + namespace, + pretty="True" + ) + log.debug(api_response) + + if api_response.status.conditions: + for condition in api_response.status.conditions: + if condition.type == "Failed": + completed = True + + if api_response.status.completion_time: + completed = True + + if completed: + if show_log: + log.info("=========================== job log end ===========================") + break + + log.info("Waiting for job completion") + time.sleep(sleep) if api_response.status.succeeded: