Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
14 changes: 11 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,23 @@
import traceback
import logging
import glob
import random
from datetime import datetime, timedelta
import numpy as np
from utilsAPI import getAPIURL, getWorkerType, getErrorLogBool, getASInstance, unprotect_current_instance, get_number_of_pending_trials
from utilsAPI import (getAPIURL, getWorkerType, getErrorLogBool, getASInstance,
unprotect_current_instance, get_number_of_pending_trials,
getAppPullWaitTimeAndJitter, getLogLevel)
from utilsAuth import getToken
from utils import (getDataDirectory, checkTime, checkResourceUsage,
sendStatusEmail, checkForTrialsWithStatus,
getCommitHash, getHostname, postLocalClientInfo,
postProcessedDuration, makeRequestWithRetry,
writeToErrorLog)

log_level = getLogLevel()

logging.basicConfig(format="[%(asctime)s] [%(levelname)s] %(message)s",
level=logging.INFO,
level=log_level,
datefmt='%Y-%m-%d %H:%M:%S',
force=True)

Expand All @@ -30,6 +35,7 @@

ERROR_LOG = getErrorLogBool()
error_log_path = "/data/error_log.json"
wait_base_time, wait_jitter = getAppPullWaitTimeAndJitter()

# if true, will delete entire data directory when finished with a trial
isDocker = True
Expand Down Expand Up @@ -84,7 +90,9 @@
if r.status_code == 404:
logging.info(f"...pulling {workerType} trials from {API_URL} "
f"using commit {getCommitHash()}")
time.sleep(1)
wait_time = wait_base_time + random.uniform(-wait_jitter, wait_jitter)
time.sleep(wait_time)
logging.debug(f'waiting {wait_time} seconds')

# When using autoscaling, we will remove the instance scale-in protection if it hasn't
# pulled a trial recently and there are no actively recording trials
Expand Down
13 changes: 13 additions & 0 deletions utilsAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import boto3
import requests
import logging

from decouple import config
from datetime import datetime, timedelta
Expand Down Expand Up @@ -51,6 +52,18 @@ def getStatusEmails():
def getErrorLogBool():
return config('ERROR_LOG', default=False, cast=bool)

def getAppPullWaitTimeAndJitter():
time = config('APP_PULL_WAIT_TIME', default=5.0, cast=float)
jitter = config('APP_PULL_WAIT_TIME_JITTER', default=1.0, cast=float)

return time, jitter

def getLogLevel():
log_level_str = config('LOG_LEVEL', default='INFO')
log_level = getattr(logging, log_level_str.upper(), logging.INFO)

return log_level

def getASInstance():
try:
# Check if the ECS_CONTAINER_METADATA_FILE environment variable exists
Expand Down
Loading