From addab68e0b22cb3d1522e9879b76e4fb4e3597ce Mon Sep 17 00:00:00 2001
From: alexlichtenstein <alexander@lichtenstein.com>
Date: Thu, 2 Jun 2022 18:10:22 +0200
Subject: [PATCH] M1 deployment progress

---
 server/Dockerfile                    |  67 ++++++---
 server/requirements_m1.txt           |  21 +++
 server/setup_corrections/ppo_rlib.py | 202 +++++++++++++++++++++++++++
 server/setup_corrections/setup.py    |  23 +++
 4 files changed, 295 insertions(+), 18 deletions(-)
 create mode 100644 server/requirements_m1.txt
 create mode 100644 server/setup_corrections/ppo_rlib.py
 create mode 100644 server/setup_corrections/setup.py

diff --git a/server/Dockerfile b/server/Dockerfile
index fcd67d3..d48b2c2 100644
--- a/server/Dockerfile
+++ b/server/Dockerfile
@@ -1,46 +1,77 @@
-FROM python:3.7-stretch
+FROM python:3.8 as tf
 
+##This part of code installs bazel which we can further use to rebuilt libraries packages for m1 support
+#RUN apt-get update
+#RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential openjdk-11-jdk zip unzip wget
+#RUN wget https://github.com/bazelbuild/bazel/releases/download/4.2.1/bazel-4.2.1-dist.zip
+#RUN mkdir bazel-4.2.1mkdir bazel-4.2.1
+#RUN unzip -d ./bazel-4.2.1 bazel-4.2.1-dist.zip
+#WORKDIR /bazel-4.2.1
+#RUN env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh
+#RUN cp output/bazel /usr/local/bin
+#WORKDIR /
+#RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -y
+#RUN cargo install py-spy \
+
+ARG M1
 ARG BUILD_ENV
 ARG OVERCOOKED_BRANCH
 ARG HARL_BRANCH
 ARG GRAPHICS
-
+#
 WORKDIR /app
-
+#
 # Install non-chai dependencies
-COPY ./requirements.txt ./requirements.txt
-RUN pip install -r requirements.txt
 
-# Install eventlet production server if production build
-RUN if [ "$BUILD_ENV" = "production" ] ; then pip install eventlet ; fi
+RUN pip install --upgrade pip
+RUN apt-get update && apt-get -y install cmake
+
+if [ "$M1" = "false" ]; then COPY ./requirements.txt ./requirements.txt; else COPY ./requirements_m1.txt ./requirements.txt ; fi \
 
-# Clone chai code
+#Here comes the problem, it's a built version 1.1.0 which will work, but then we will need
+#to change some parts of the code, other way is to rebuild the version 0.8.5
+if [ "$M1" = "true" ]; then RUN wget https://github.com/ray-project/ray/files/5741700/ray-1.1.0-cp38-aarch64.zip ; fi
+if [ "$M1" = "true" ]; then RUN unzip ray-1.1.0-cp38-aarch64.zip ; fi
+if [ "$M1" = "true" ]; then RUN pip install ray-1.1.0-cp38-cp38-linux_aarch64.whl ; fi
+if [ "$M1" = "true" ]; then RUN pip install py_spy-0.3.3-cp38-cp38-linux_aarch64.whl ; fi
+if [ "$M1" = "true" ]; then RUN pip install tensorflow==2.6.0 -f https://tf.kmtea.eu/whl/stable.html ; fi
+if [ "$M1" = "true" ]; then RUN pip install -r requirements.txt ; fi
+##
+## Install eventlet production server if production build
+RUN if [ "$BUILD_ENV" = "production" ] ; then pip install eventlet ; fi
+##
+## Clone chai code
 RUN git clone https://github.com/HumanCompatibleAI/overcooked_ai.git --branch $OVERCOOKED_BRANCH --single-branch /overcooked_ai
 RUN git clone https://github.com/HumanCompatibleAI/human_aware_rl.git --branch $HARL_BRANCH --single-branch /human_aware_rl
+##
+if [ "$M1" = "true" ]; then COPY ./setup_corrections/setup.py /human_aware_rl/setup.py ; fi
 
-# Dummy data_dir so things don't break
+#I've started to upgrade ray to 1.1.0 version (it requires to use different (upgraded) class for RNN)
+##COPY ./setup_corrections/ppo_rlib.py /human_aware_rl/human_aware_rl/ppo/ppo_rllib.py
+
+## Dummy data_dir so things don't break
 RUN echo "import os; DATA_DIR=os.path.abspath('.')" >> /human_aware_rl/human_aware_rl/data_dir.py
+##
 
-# Install chai dependencies
+### Install chai dependencies
 RUN pip install -e /overcooked_ai
 RUN pip install -e /human_aware_rl
 
-RUN apt-get update
+##
 RUN apt-get install -y libgl1-mesa-dev
+##
 
-# Copy over remaining files
+### Copy over remaining files
 COPY ./static ./static
 COPY ./*.py ./
 COPY ./graphics/$GRAPHICS ./static/js/graphics.js
 COPY ./config.json ./config.json
-
-
-
-# Set environment variables that will be used by app.py
+##
+## Set environment variables that will be used by app.py
 ENV HOST 0.0.0.0
 ENV PORT 5000
 ENV CONF_PATH config.json
-
-# Do the thing
+##
+## Do the thing
 EXPOSE 5000
 CMD ["python", "-u", "app.py"]
\ No newline at end of file
diff --git a/server/requirements_m1.txt b/server/requirements_m1.txt
new file mode 100644
index 0000000..bcd3a84
--- /dev/null
+++ b/server/requirements_m1.txt
@@ -0,0 +1,21 @@
+certifi==2020.6.20
+click==8.0
+dnspython==1.16.0
+dill==0.3.2
+Flask==2.1.0
+Flask-SocketIO==4.3.0
+greenlet==0.4.16
+itsdangerous==2.0
+Jinja2==3.1.0
+MarkupSafe==2.0
+monotonic==1.5
+python-engineio==3.13.0
+python-socketio==4.6.0
+six==1.15.0
+Werkzeug==2.0.3
+requests==2.23.0
+pygame==2.1.0
+tabulate
+dm-tree
+opencv-python
+keras==2.6
\ No newline at end of file
diff --git a/server/setup_corrections/ppo_rlib.py b/server/setup_corrections/ppo_rlib.py
new file mode 100644
index 0000000..34986aa
--- /dev/null
+++ b/server/setup_corrections/ppo_rlib.py
@@ -0,0 +1,202 @@
+from ray.rllib.models.tf.tf_modelv2 import TFModelV2
+from ray.rllib.models.tf.recurrent_net import RecurrentNetwork
+import numpy as np
+import tensorflow as tf
+
+
+class RllibPPOModel(TFModelV2):
+    """
+    Model that will map environment states to action probabilities. Will be shared across agents
+    """
+
+    def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs):
+
+        super(RllibPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name)
+
+        # params we got to pass in from the call to "run"
+        custom_params = model_config["custom_options"]
+
+        ## Parse custom network params
+        num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"]
+        size_hidden_layers = custom_params["SIZE_HIDDEN_LAYERS"]
+        num_filters = custom_params["NUM_FILTERS"]
+        num_convs = custom_params["NUM_CONV_LAYERS"]
+        d2rl = custom_params["D2RL"]
+        assert type(d2rl) == bool
+
+        ## Create graph of custom network. It will under a shared tf scope such that all agents
+        ## use the same model
+        self.inputs = tf.keras.Input(shape=obs_space.shape, name="observations")
+        out = self.inputs
+
+        # Apply initial conv layer with a larger kenel (why?)
+        if num_convs > 0:
+            out = tf.keras.layers.Conv2D(
+                filters=num_filters,
+                kernel_size=[5, 5],
+                padding="same",
+                activation=tf.nn.leaky_relu,
+                name="conv_initial"
+            )(out)
+
+        # Apply remaining conv layers, if any
+        for i in range(0, num_convs - 1):
+            padding = "same" if i < num_convs - 2 else "valid"
+            out = tf.keras.layers.Conv2D(
+                filters=num_filters,
+                kernel_size=[3, 3],
+                padding=padding,
+                activation=tf.nn.leaky_relu,
+                name="conv_{}".format(i)
+            )(out)
+
+        # Apply dense hidden layers, if any
+        conv_out = tf.keras.layers.Flatten()(out)
+        out = conv_out
+        for i in range(num_hidden_layers):
+            if i > 0 and d2rl:
+                out = tf.keras.layers.Concatenate()([out, conv_out])
+            out = tf.keras.layers.Dense(size_hidden_layers)(out)
+            out = tf.keras.layers.LeakyReLU()(out)
+
+        # Linear last layer for action distribution logits
+        layer_out = tf.keras.layers.Dense(self.num_outputs)(out)
+
+        # Linear last layer for value function branch of model
+        value_out = tf.keras.layers.Dense(1)(out)
+
+        self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
+        self.register_variables(self.base_model.variables)
+
+    def forward(self, input_dict, state=None, seq_lens=None):
+        model_out, self._value_out = self.base_model(input_dict["obs"])
+        return model_out, state
+
+    def value_function(self):
+        return tf.reshape(self._value_out, [-1])
+
+
+class RllibLSTMPPOModel(RecurrentNetwork):
+    """
+    Model that will map encoded environment observations to action logits
+
+                                                         |_______|
+                                                     /-> | value |
+             ___________     _________     ________ /    |_______|
+    state -> | conv_net | -> | fc_net | -> | lstm |
+             |__________|    |________|    |______| \\    |_______________|
+                                           /    \\   \\-> | action_logits |
+                                          h_in   c_in     |_______________|
+    """
+
+    def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs):
+        super(RllibLSTMPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name)
+
+        # params we passed in from rllib client
+        custom_params = model_config["custom_options"]
+
+        ## Parse custom network params
+        num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"]
+        size_hidden_layers = custom_params["SIZE_HIDDEN_LAYERS"]
+        num_filters = custom_params["NUM_FILTERS"]
+        num_convs = custom_params["NUM_CONV_LAYERS"]
+        cell_size = custom_params["CELL_SIZE"]
+
+        ### Create graph of the model ###
+        flattened_dim = np.prod(obs_space.shape)
+
+        # Need an extra batch dimension (None) for time dimension
+        flattened_obs_inputs = tf.keras.Input(shape=(None, flattened_dim), name="input")
+        lstm_h_in = tf.keras.Input(shape=(cell_size,), name="h_in")
+        lstm_c_in = tf.keras.Input(shape=(cell_size,), name="c_in")
+        seq_in = tf.keras.Input(shape=(), name="seq_in", dtype=tf.int32)
+
+        # Restore initial observation shape
+        obs_inputs = tf.keras.layers.Reshape(target_shape=(-1, *obs_space.shape))(flattened_obs_inputs)
+        out = obs_inputs
+
+        ## Initial "vision" network
+
+        # Apply initial conv layer with a larger kenel (why?)
+        if num_convs > 0:
+            out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(
+                filters=num_filters,
+                kernel_size=[5, 5],
+                padding="same",
+                activation=tf.nn.leaky_relu,
+                name="conv_initial"
+            ))(out)
+
+        # Apply remaining conv layers, if any
+        for i in range(0, num_convs - 1):
+            padding = "same" if i < num_convs - 2 else "valid"
+            out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(
+                filters=num_filters,
+                kernel_size=[3, 3],
+                padding=padding,
+                activation=tf.nn.leaky_relu,
+                name="conv_{}".format(i)
+            ))(out)
+
+        # Flatten spatial features
+        out = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(out)
+
+        # Apply dense hidden layers, if any
+        for i in range(num_hidden_layers):
+            out = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(
+                units=size_hidden_layers,
+                activation=tf.nn.leaky_relu,
+                name="fc_{0}".format(i)
+            ))(out)
+
+        ## LSTM network
+        lstm_out, h_out, c_out = tf.keras.layers.LSTM(cell_size, return_sequences=True, return_state=True, name="lstm")(
+            inputs=out,
+            mask=tf.sequence_mask(seq_in),
+            initial_state=[lstm_h_in, lstm_c_in]
+        )
+
+        # Linear last layer for action distribution logits
+        layer_out = tf.keras.layers.Dense(self.num_outputs, name="logits")(lstm_out)
+
+        # Linear last layer for value function branch of model
+        value_out = tf.keras.layers.Dense(1, name="values")(lstm_out)
+
+        self.cell_size = cell_size
+        self.base_model = tf.keras.Model(
+            inputs=[flattened_obs_inputs, seq_in, lstm_h_in, lstm_c_in],
+            outputs=[layer_out, value_out, h_out, c_out]
+        )
+        self.register_variables(self.base_model.variables)
+
+    def forward_rnn(self, inputs, state, seq_lens):
+        """
+        Run the forward pass of the model
+
+        Arguments:
+            inputs: np.array of shape [BATCH, T, obs_shape]
+            state:  list of np.arrays [h_in, c_in] each of shape [BATCH, self.cell_size]
+            seq_lens: np.array of shape [BATCH] where the ith element is the length of the ith sequence
+
+        Output:
+            model_out: tensor of shape [BATCH, T, self.num_outputs] representing action logits
+            state: list of tensors [h_out, c_out] each of shape [BATCH, self.cell_size]
+        """
+        model_out, self._value_out, h_out, c_out = self.base_model([inputs, seq_lens, state])
+
+        return model_out, [h_out, c_out]
+
+    def value_function(self):
+        """
+        Returns a tensor of shape [BATCH * T] representing the value function for the most recent forward pass
+        """
+        return tf.reshape(self._value_out, [-1])
+
+    def get_initial_state(self):
+        """
+        Returns the initial hidden state for the LSTM
+        """
+        return [
+            np.zeros(self.cell_size, np.float32),
+            np.zeros(self.cell_size, np.float32),
+        ]
\ No newline at end of file
diff --git a/server/setup_corrections/setup.py b/server/setup_corrections/setup.py
new file mode 100644
index 0000000..8561076
--- /dev/null
+++ b/server/setup_corrections/setup.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+
+setup(name='human_aware_rl',
+      version='0.0.1',
+      description='This package has shared components.',
+      author='Micah Carroll',
+      author_email='micah.d.carroll@berkeley.edu',
+      packages=find_packages(),
+      install_requires=[
+        'GitPython',
+        'memory_profiler',
+        'sacred',
+        'pymongo',
+        'dill',
+        'matplotlib',
+        'requests',
+        'numpy==1.19.5',
+        'seaborn==0.9.0',
+        'pygame==2.1.0',
+      ],
+    )
\ No newline at end of file