From addab68e0b22cb3d1522e9879b76e4fb4e3597ce Mon Sep 17 00:00:00 2001 From: alexlichtenstein Date: Thu, 2 Jun 2022 18:10:22 +0200 Subject: [PATCH] M1 deployment progress --- server/Dockerfile | 67 ++++++--- server/requirements_m1.txt | 21 +++ server/setup_corrections/ppo_rlib.py | 202 +++++++++++++++++++++++++++ server/setup_corrections/setup.py | 23 +++ 4 files changed, 295 insertions(+), 18 deletions(-) create mode 100644 server/requirements_m1.txt create mode 100644 server/setup_corrections/ppo_rlib.py create mode 100644 server/setup_corrections/setup.py diff --git a/server/Dockerfile b/server/Dockerfile index fcd67d3..d48b2c2 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -1,46 +1,77 @@ -FROM python:3.7-stretch +FROM python:3.8 as tf +##This part of code installs bazel which we can further use to rebuilt libraries packages for m1 support +#RUN apt-get update +#RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential openjdk-11-jdk zip unzip wget +#RUN wget https://github.com/bazelbuild/bazel/releases/download/4.2.1/bazel-4.2.1-dist.zip +#RUN mkdir bazel-4.2.1mkdir bazel-4.2.1 +#RUN unzip -d ./bazel-4.2.1 bazel-4.2.1-dist.zip +#WORKDIR /bazel-4.2.1 +#RUN env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh +#RUN cp output/bazel /usr/local/bin +#WORKDIR / +#RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -y +#RUN cargo install py-spy \ + +ARG M1 ARG BUILD_ENV ARG OVERCOOKED_BRANCH ARG HARL_BRANCH ARG GRAPHICS - +# WORKDIR /app - +# # Install non-chai dependencies -COPY ./requirements.txt ./requirements.txt -RUN pip install -r requirements.txt -# Install eventlet production server if production build -RUN if [ "$BUILD_ENV" = "production" ] ; then pip install eventlet ; fi +RUN pip install --upgrade pip +RUN apt-get update && apt-get -y install cmake + +if [ "$M1" = "false" ]; then COPY ./requirements.txt ./requirements.txt; else COPY ./requirements_m1.txt ./requirements.txt ; fi \ -# Clone chai code +#Here comes the problem, it's a built version 1.1.0 which will work, but then we will need +#to change some parts of the code, other way is to rebuild the version 0.8.5 +if [ "$M1" = "true" ]; then RUN wget https://github.com/ray-project/ray/files/5741700/ray-1.1.0-cp38-aarch64.zip ; fi +if [ "$M1" = "true" ]; then RUN unzip ray-1.1.0-cp38-aarch64.zip ; fi +if [ "$M1" = "true" ]; then RUN pip install ray-1.1.0-cp38-cp38-linux_aarch64.whl ; fi +if [ "$M1" = "true" ]; then RUN pip install py_spy-0.3.3-cp38-cp38-linux_aarch64.whl ; fi +if [ "$M1" = "true" ]; then RUN pip install tensorflow==2.6.0 -f https://tf.kmtea.eu/whl/stable.html ; fi +if [ "$M1" = "true" ]; then RUN pip install -r requirements.txt ; fi +## +## Install eventlet production server if production build +RUN if [ "$BUILD_ENV" = "production" ] ; then pip install eventlet ; fi +## +## Clone chai code RUN git clone https://github.com/HumanCompatibleAI/overcooked_ai.git --branch $OVERCOOKED_BRANCH --single-branch /overcooked_ai RUN git clone https://github.com/HumanCompatibleAI/human_aware_rl.git --branch $HARL_BRANCH --single-branch /human_aware_rl +## +if [ "$M1" = "true" ]; then COPY ./setup_corrections/setup.py /human_aware_rl/setup.py ; fi -# Dummy data_dir so things don't break +#I've started to upgrade ray to 1.1.0 version (it requires to use different (upgraded) class for RNN) +##COPY ./setup_corrections/ppo_rlib.py /human_aware_rl/human_aware_rl/ppo/ppo_rllib.py + +## Dummy data_dir so things don't break RUN echo "import os; DATA_DIR=os.path.abspath('.')" >> /human_aware_rl/human_aware_rl/data_dir.py +## -# Install chai dependencies +### Install chai dependencies RUN pip install -e /overcooked_ai RUN pip install -e /human_aware_rl -RUN apt-get update +## RUN apt-get install -y libgl1-mesa-dev +## -# Copy over remaining files +### Copy over remaining files COPY ./static ./static COPY ./*.py ./ COPY ./graphics/$GRAPHICS ./static/js/graphics.js COPY ./config.json ./config.json - - - -# Set environment variables that will be used by app.py +## +## Set environment variables that will be used by app.py ENV HOST 0.0.0.0 ENV PORT 5000 ENV CONF_PATH config.json - -# Do the thing +## +## Do the thing EXPOSE 5000 CMD ["python", "-u", "app.py"] \ No newline at end of file diff --git a/server/requirements_m1.txt b/server/requirements_m1.txt new file mode 100644 index 0000000..bcd3a84 --- /dev/null +++ b/server/requirements_m1.txt @@ -0,0 +1,21 @@ +certifi==2020.6.20 +click==8.0 +dnspython==1.16.0 +dill==0.3.2 +Flask==2.1.0 +Flask-SocketIO==4.3.0 +greenlet==0.4.16 +itsdangerous==2.0 +Jinja2==3.1.0 +MarkupSafe==2.0 +monotonic==1.5 +python-engineio==3.13.0 +python-socketio==4.6.0 +six==1.15.0 +Werkzeug==2.0.3 +requests==2.23.0 +pygame==2.1.0 +tabulate +dm-tree +opencv-python +keras==2.6 \ No newline at end of file diff --git a/server/setup_corrections/ppo_rlib.py b/server/setup_corrections/ppo_rlib.py new file mode 100644 index 0000000..34986aa --- /dev/null +++ b/server/setup_corrections/ppo_rlib.py @@ -0,0 +1,202 @@ +from ray.rllib.models.tf.tf_modelv2 import TFModelV2 +from ray.rllib.models.tf.recurrent_net import RecurrentNetwork +import numpy as np +import tensorflow as tf + + +class RllibPPOModel(TFModelV2): + """ + Model that will map environment states to action probabilities. Will be shared across agents + """ + + def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): + + super(RllibPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) + + # params we got to pass in from the call to "run" + custom_params = model_config["custom_options"] + + ## Parse custom network params + num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"] + size_hidden_layers = custom_params["SIZE_HIDDEN_LAYERS"] + num_filters = custom_params["NUM_FILTERS"] + num_convs = custom_params["NUM_CONV_LAYERS"] + d2rl = custom_params["D2RL"] + assert type(d2rl) == bool + + ## Create graph of custom network. It will under a shared tf scope such that all agents + ## use the same model + self.inputs = tf.keras.Input(shape=obs_space.shape, name="observations") + out = self.inputs + + # Apply initial conv layer with a larger kenel (why?) + if num_convs > 0: + out = tf.keras.layers.Conv2D( + filters=num_filters, + kernel_size=[5, 5], + padding="same", + activation=tf.nn.leaky_relu, + name="conv_initial" + )(out) + + # Apply remaining conv layers, if any + for i in range(0, num_convs - 1): + padding = "same" if i < num_convs - 2 else "valid" + out = tf.keras.layers.Conv2D( + filters=num_filters, + kernel_size=[3, 3], + padding=padding, + activation=tf.nn.leaky_relu, + name="conv_{}".format(i) + )(out) + + # Apply dense hidden layers, if any + conv_out = tf.keras.layers.Flatten()(out) + out = conv_out + for i in range(num_hidden_layers): + if i > 0 and d2rl: + out = tf.keras.layers.Concatenate()([out, conv_out]) + out = tf.keras.layers.Dense(size_hidden_layers)(out) + out = tf.keras.layers.LeakyReLU()(out) + + # Linear last layer for action distribution logits + layer_out = tf.keras.layers.Dense(self.num_outputs)(out) + + # Linear last layer for value function branch of model + value_out = tf.keras.layers.Dense(1)(out) + + self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) + self.register_variables(self.base_model.variables) + + def forward(self, input_dict, state=None, seq_lens=None): + model_out, self._value_out = self.base_model(input_dict["obs"]) + return model_out, state + + def value_function(self): + return tf.reshape(self._value_out, [-1]) + + +class RllibLSTMPPOModel(RecurrentNetwork): + """ + Model that will map encoded environment observations to action logits + + |_______| + /-> | value | + ___________ _________ ________ / |_______| + state -> | conv_net | -> | fc_net | -> | lstm | + |__________| |________| |______| \\ |_______________| + / \\ \\-> | action_logits | + h_in c_in |_______________| + """ + + def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): + super(RllibLSTMPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) + + # params we passed in from rllib client + custom_params = model_config["custom_options"] + + ## Parse custom network params + num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"] + size_hidden_layers = custom_params["SIZE_HIDDEN_LAYERS"] + num_filters = custom_params["NUM_FILTERS"] + num_convs = custom_params["NUM_CONV_LAYERS"] + cell_size = custom_params["CELL_SIZE"] + + ### Create graph of the model ### + flattened_dim = np.prod(obs_space.shape) + + # Need an extra batch dimension (None) for time dimension + flattened_obs_inputs = tf.keras.Input(shape=(None, flattened_dim), name="input") + lstm_h_in = tf.keras.Input(shape=(cell_size,), name="h_in") + lstm_c_in = tf.keras.Input(shape=(cell_size,), name="c_in") + seq_in = tf.keras.Input(shape=(), name="seq_in", dtype=tf.int32) + + # Restore initial observation shape + obs_inputs = tf.keras.layers.Reshape(target_shape=(-1, *obs_space.shape))(flattened_obs_inputs) + out = obs_inputs + + ## Initial "vision" network + + # Apply initial conv layer with a larger kenel (why?) + if num_convs > 0: + out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D( + filters=num_filters, + kernel_size=[5, 5], + padding="same", + activation=tf.nn.leaky_relu, + name="conv_initial" + ))(out) + + # Apply remaining conv layers, if any + for i in range(0, num_convs - 1): + padding = "same" if i < num_convs - 2 else "valid" + out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D( + filters=num_filters, + kernel_size=[3, 3], + padding=padding, + activation=tf.nn.leaky_relu, + name="conv_{}".format(i) + ))(out) + + # Flatten spatial features + out = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(out) + + # Apply dense hidden layers, if any + for i in range(num_hidden_layers): + out = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense( + units=size_hidden_layers, + activation=tf.nn.leaky_relu, + name="fc_{0}".format(i) + ))(out) + + ## LSTM network + lstm_out, h_out, c_out = tf.keras.layers.LSTM(cell_size, return_sequences=True, return_state=True, name="lstm")( + inputs=out, + mask=tf.sequence_mask(seq_in), + initial_state=[lstm_h_in, lstm_c_in] + ) + + # Linear last layer for action distribution logits + layer_out = tf.keras.layers.Dense(self.num_outputs, name="logits")(lstm_out) + + # Linear last layer for value function branch of model + value_out = tf.keras.layers.Dense(1, name="values")(lstm_out) + + self.cell_size = cell_size + self.base_model = tf.keras.Model( + inputs=[flattened_obs_inputs, seq_in, lstm_h_in, lstm_c_in], + outputs=[layer_out, value_out, h_out, c_out] + ) + self.register_variables(self.base_model.variables) + + def forward_rnn(self, inputs, state, seq_lens): + """ + Run the forward pass of the model + + Arguments: + inputs: np.array of shape [BATCH, T, obs_shape] + state: list of np.arrays [h_in, c_in] each of shape [BATCH, self.cell_size] + seq_lens: np.array of shape [BATCH] where the ith element is the length of the ith sequence + + Output: + model_out: tensor of shape [BATCH, T, self.num_outputs] representing action logits + state: list of tensors [h_out, c_out] each of shape [BATCH, self.cell_size] + """ + model_out, self._value_out, h_out, c_out = self.base_model([inputs, seq_lens, state]) + + return model_out, [h_out, c_out] + + def value_function(self): + """ + Returns a tensor of shape [BATCH * T] representing the value function for the most recent forward pass + """ + return tf.reshape(self._value_out, [-1]) + + def get_initial_state(self): + """ + Returns the initial hidden state for the LSTM + """ + return [ + np.zeros(self.cell_size, np.float32), + np.zeros(self.cell_size, np.float32), + ] \ No newline at end of file diff --git a/server/setup_corrections/setup.py b/server/setup_corrections/setup.py new file mode 100644 index 0000000..8561076 --- /dev/null +++ b/server/setup_corrections/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='human_aware_rl', + version='0.0.1', + description='This package has shared components.', + author='Micah Carroll', + author_email='micah.d.carroll@berkeley.edu', + packages=find_packages(), + install_requires=[ + 'GitPython', + 'memory_profiler', + 'sacred', + 'pymongo', + 'dill', + 'matplotlib', + 'requests', + 'numpy==1.19.5', + 'seaborn==0.9.0', + 'pygame==2.1.0', + ], + ) \ No newline at end of file