Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 49 additions & 18 deletions server/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,46 +1,77 @@
FROM python:3.7-stretch
FROM python:3.8 as tf

##This part of code installs bazel which we can further use to rebuilt libraries packages for m1 support
#RUN apt-get update
#RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential openjdk-11-jdk zip unzip wget
#RUN wget https://github.com/bazelbuild/bazel/releases/download/4.2.1/bazel-4.2.1-dist.zip
#RUN mkdir bazel-4.2.1mkdir bazel-4.2.1
#RUN unzip -d ./bazel-4.2.1 bazel-4.2.1-dist.zip
#WORKDIR /bazel-4.2.1
#RUN env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh
#RUN cp output/bazel /usr/local/bin
#WORKDIR /
#RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -y
#RUN cargo install py-spy \

ARG M1
ARG BUILD_ENV
ARG OVERCOOKED_BRANCH
ARG HARL_BRANCH
ARG GRAPHICS

#
WORKDIR /app

#
# Install non-chai dependencies
COPY ./requirements.txt ./requirements.txt
RUN pip install -r requirements.txt

# Install eventlet production server if production build
RUN if [ "$BUILD_ENV" = "production" ] ; then pip install eventlet ; fi
RUN pip install --upgrade pip
RUN apt-get update && apt-get -y install cmake

if [ "$M1" = "false" ]; then COPY ./requirements.txt ./requirements.txt; else COPY ./requirements_m1.txt ./requirements.txt ; fi \

# Clone chai code
#Here comes the problem, it's a built version 1.1.0 which will work, but then we will need
#to change some parts of the code, other way is to rebuild the version 0.8.5
if [ "$M1" = "true" ]; then RUN wget https://github.com/ray-project/ray/files/5741700/ray-1.1.0-cp38-aarch64.zip ; fi
if [ "$M1" = "true" ]; then RUN unzip ray-1.1.0-cp38-aarch64.zip ; fi
if [ "$M1" = "true" ]; then RUN pip install ray-1.1.0-cp38-cp38-linux_aarch64.whl ; fi
if [ "$M1" = "true" ]; then RUN pip install py_spy-0.3.3-cp38-cp38-linux_aarch64.whl ; fi
if [ "$M1" = "true" ]; then RUN pip install tensorflow==2.6.0 -f https://tf.kmtea.eu/whl/stable.html ; fi
if [ "$M1" = "true" ]; then RUN pip install -r requirements.txt ; fi
##
## Install eventlet production server if production build
RUN if [ "$BUILD_ENV" = "production" ] ; then pip install eventlet ; fi
##
## Clone chai code
RUN git clone https://github.com/HumanCompatibleAI/overcooked_ai.git --branch $OVERCOOKED_BRANCH --single-branch /overcooked_ai
RUN git clone https://github.com/HumanCompatibleAI/human_aware_rl.git --branch $HARL_BRANCH --single-branch /human_aware_rl
##
if [ "$M1" = "true" ]; then COPY ./setup_corrections/setup.py /human_aware_rl/setup.py ; fi

# Dummy data_dir so things don't break
#I've started to upgrade ray to 1.1.0 version (it requires to use different (upgraded) class for RNN)
##COPY ./setup_corrections/ppo_rlib.py /human_aware_rl/human_aware_rl/ppo/ppo_rllib.py

## Dummy data_dir so things don't break
RUN echo "import os; DATA_DIR=os.path.abspath('.')" >> /human_aware_rl/human_aware_rl/data_dir.py
##

# Install chai dependencies
### Install chai dependencies
RUN pip install -e /overcooked_ai
RUN pip install -e /human_aware_rl

RUN apt-get update
##
RUN apt-get install -y libgl1-mesa-dev
##

# Copy over remaining files
### Copy over remaining files
COPY ./static ./static
COPY ./*.py ./
COPY ./graphics/$GRAPHICS ./static/js/graphics.js
COPY ./config.json ./config.json



# Set environment variables that will be used by app.py
##
## Set environment variables that will be used by app.py
ENV HOST 0.0.0.0
ENV PORT 5000
ENV CONF_PATH config.json

# Do the thing
##
## Do the thing
EXPOSE 5000
CMD ["python", "-u", "app.py"]
21 changes: 21 additions & 0 deletions server/requirements_m1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
certifi==2020.6.20
click==8.0
dnspython==1.16.0
dill==0.3.2
Flask==2.1.0
Flask-SocketIO==4.3.0
greenlet==0.4.16
itsdangerous==2.0
Jinja2==3.1.0
MarkupSafe==2.0
monotonic==1.5
python-engineio==3.13.0
python-socketio==4.6.0
six==1.15.0
Werkzeug==2.0.3
requests==2.23.0
pygame==2.1.0
tabulate
dm-tree
opencv-python
keras==2.6
202 changes: 202 additions & 0 deletions server/setup_corrections/ppo_rlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.recurrent_net import RecurrentNetwork
import numpy as np
import tensorflow as tf


class RllibPPOModel(TFModelV2):
"""
Model that will map environment states to action probabilities. Will be shared across agents
"""

def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs):

super(RllibPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name)

# params we got to pass in from the call to "run"
custom_params = model_config["custom_options"]

## Parse custom network params
num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"]
size_hidden_layers = custom_params["SIZE_HIDDEN_LAYERS"]
num_filters = custom_params["NUM_FILTERS"]
num_convs = custom_params["NUM_CONV_LAYERS"]
d2rl = custom_params["D2RL"]
assert type(d2rl) == bool

## Create graph of custom network. It will under a shared tf scope such that all agents
## use the same model
self.inputs = tf.keras.Input(shape=obs_space.shape, name="observations")
out = self.inputs

# Apply initial conv layer with a larger kenel (why?)
if num_convs > 0:
out = tf.keras.layers.Conv2D(
filters=num_filters,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.leaky_relu,
name="conv_initial"
)(out)

# Apply remaining conv layers, if any
for i in range(0, num_convs - 1):
padding = "same" if i < num_convs - 2 else "valid"
out = tf.keras.layers.Conv2D(
filters=num_filters,
kernel_size=[3, 3],
padding=padding,
activation=tf.nn.leaky_relu,
name="conv_{}".format(i)
)(out)

# Apply dense hidden layers, if any
conv_out = tf.keras.layers.Flatten()(out)
out = conv_out
for i in range(num_hidden_layers):
if i > 0 and d2rl:
out = tf.keras.layers.Concatenate()([out, conv_out])
out = tf.keras.layers.Dense(size_hidden_layers)(out)
out = tf.keras.layers.LeakyReLU()(out)

# Linear last layer for action distribution logits
layer_out = tf.keras.layers.Dense(self.num_outputs)(out)

# Linear last layer for value function branch of model
value_out = tf.keras.layers.Dense(1)(out)

self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
self.register_variables(self.base_model.variables)

def forward(self, input_dict, state=None, seq_lens=None):
model_out, self._value_out = self.base_model(input_dict["obs"])
return model_out, state

def value_function(self):
return tf.reshape(self._value_out, [-1])


class RllibLSTMPPOModel(RecurrentNetwork):
"""
Model that will map encoded environment observations to action logits

|_______|
/-> | value |
___________ _________ ________ / |_______|
state -> | conv_net | -> | fc_net | -> | lstm |
|__________| |________| |______| \\ |_______________|
/ \\ \\-> | action_logits |
h_in c_in |_______________|
"""

def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs):
super(RllibLSTMPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name)

# params we passed in from rllib client
custom_params = model_config["custom_options"]

## Parse custom network params
num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"]
size_hidden_layers = custom_params["SIZE_HIDDEN_LAYERS"]
num_filters = custom_params["NUM_FILTERS"]
num_convs = custom_params["NUM_CONV_LAYERS"]
cell_size = custom_params["CELL_SIZE"]

### Create graph of the model ###
flattened_dim = np.prod(obs_space.shape)

# Need an extra batch dimension (None) for time dimension
flattened_obs_inputs = tf.keras.Input(shape=(None, flattened_dim), name="input")
lstm_h_in = tf.keras.Input(shape=(cell_size,), name="h_in")
lstm_c_in = tf.keras.Input(shape=(cell_size,), name="c_in")
seq_in = tf.keras.Input(shape=(), name="seq_in", dtype=tf.int32)

# Restore initial observation shape
obs_inputs = tf.keras.layers.Reshape(target_shape=(-1, *obs_space.shape))(flattened_obs_inputs)
out = obs_inputs

## Initial "vision" network

# Apply initial conv layer with a larger kenel (why?)
if num_convs > 0:
out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(
filters=num_filters,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.leaky_relu,
name="conv_initial"
))(out)

# Apply remaining conv layers, if any
for i in range(0, num_convs - 1):
padding = "same" if i < num_convs - 2 else "valid"
out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(
filters=num_filters,
kernel_size=[3, 3],
padding=padding,
activation=tf.nn.leaky_relu,
name="conv_{}".format(i)
))(out)

# Flatten spatial features
out = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(out)

# Apply dense hidden layers, if any
for i in range(num_hidden_layers):
out = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(
units=size_hidden_layers,
activation=tf.nn.leaky_relu,
name="fc_{0}".format(i)
))(out)

## LSTM network
lstm_out, h_out, c_out = tf.keras.layers.LSTM(cell_size, return_sequences=True, return_state=True, name="lstm")(
inputs=out,
mask=tf.sequence_mask(seq_in),
initial_state=[lstm_h_in, lstm_c_in]
)

# Linear last layer for action distribution logits
layer_out = tf.keras.layers.Dense(self.num_outputs, name="logits")(lstm_out)

# Linear last layer for value function branch of model
value_out = tf.keras.layers.Dense(1, name="values")(lstm_out)

self.cell_size = cell_size
self.base_model = tf.keras.Model(
inputs=[flattened_obs_inputs, seq_in, lstm_h_in, lstm_c_in],
outputs=[layer_out, value_out, h_out, c_out]
)
self.register_variables(self.base_model.variables)

def forward_rnn(self, inputs, state, seq_lens):
"""
Run the forward pass of the model

Arguments:
inputs: np.array of shape [BATCH, T, obs_shape]
state: list of np.arrays [h_in, c_in] each of shape [BATCH, self.cell_size]
seq_lens: np.array of shape [BATCH] where the ith element is the length of the ith sequence

Output:
model_out: tensor of shape [BATCH, T, self.num_outputs] representing action logits
state: list of tensors [h_out, c_out] each of shape [BATCH, self.cell_size]
"""
model_out, self._value_out, h_out, c_out = self.base_model([inputs, seq_lens, state])

return model_out, [h_out, c_out]

def value_function(self):
"""
Returns a tensor of shape [BATCH * T] representing the value function for the most recent forward pass
"""
return tf.reshape(self._value_out, [-1])

def get_initial_state(self):
"""
Returns the initial hidden state for the LSTM
"""
return [
np.zeros(self.cell_size, np.float32),
np.zeros(self.cell_size, np.float32),
]
23 changes: 23 additions & 0 deletions server/setup_corrections/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python

from setuptools import setup, find_packages

setup(name='human_aware_rl',
version='0.0.1',
description='This package has shared components.',
author='Micah Carroll',
author_email='micah.d.carroll@berkeley.edu',
packages=find_packages(),
install_requires=[
'GitPython',
'memory_profiler',
'sacred',
'pymongo',
'dill',
'matplotlib',
'requests',
'numpy==1.19.5',
'seaborn==0.9.0',
'pygame==2.1.0',
],
)