diff --git a/RL01. Basic Example .ipynb b/RL01. Basic Example .ipynb index 8935f22..2e5588e 100644 --- a/RL01. Basic Example .ipynb +++ b/RL01. Basic Example .ipynb @@ -10,9 +10,7 @@ "## Create conda environment\n", "Note that you must use python 2 for compatibility with keras-rl. Everything else supports python 3. \n", "\n", - "`conda create --name RLEnvPython2 python=2.7 numpy scipy matplotlib jupyter h5py`\n", - "\n", - "`source activate RLEnvPython2`\n", + "`yes | conda install numpy scipy matplotlib jupyter h5py`\n", "\n", "\n", "## Now install the gym: \n", @@ -24,7 +22,7 @@ "\n", "`pip install -e .`\n", "\n", - "`brew install cmake boost boost-python sdl2 swig wget`\n", + "`brew update && brew install cmake boost boost-python sdl2 swig wget`\n", "\n", "`pip install -e '.[all]'`\n", "\n", @@ -58,7 +56,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 09:39:14,754] Making new env: CartPole-v0\n" + "[2017-10-23 14:12:46,816] Making new env: CartPole-v0\n" ] } ], @@ -78,14 +76,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 09:39:23,762] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\n" + "[2017-10-23 14:12:48,583] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Successfully Completed 28.0 steps before falling!\n" + "Successfully Completed 22.0 steps before falling!\n" ] } ], @@ -160,8 +158,7 @@ "\n", "from rl.agents.dqn import DQNAgent\n", "from rl.policy import BoltzmannQPolicy\n", - "from rl.memory import SequentialMemory\n", - "\n" + "from rl.memory import SequentialMemory" ] }, { @@ -173,7 +170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 09:39:38,985] Making new env: CartPole-v0\n" + "[2017-10-23 14:12:54,968] Making new env: CartPole-v0\n" ] } ], @@ -198,7 +195,7 @@ "text": [ "Box(4,)\n", "Discrete(2)\n", - "{'video.frames_per_second': 50, 'render.modes': ['human', 'rgb_array']}\n" + "{'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n" ] } ], @@ -286,16 +283,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/lucaswood/anaconda3/envs/py27/lib/python2.7/site-packages/rl/memory.py:29: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\n", + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/memory.py:29: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\n", " warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')\n" ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -307,11 +314,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing for 3 episodes ...\n", + "Episode 1: reward: 162.000, steps: 162\n", + "Episode 2: reward: 184.000, steps: 184\n", + "Episode 3: reward: 178.000, steps: 178\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Finally, evaluate our algorithm for 5 episodes.\n", "dqn.test(env, nb_episodes=3, visualize=True)" @@ -329,11 +355,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], "source": [ "from __future__ import division\n", "\n", @@ -358,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -386,11 +418,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2017-10-23 14:26:42,996] Making new env: Breakout-v0\n" + ] + } + ], "source": [ "# Get the environment and extract the number of actions.\n", "env = gym.make('Breakout-v0')\n", @@ -401,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -422,11 +460,60 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "permute_1 (Permute) (None, 84, 84, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_1 (Conv2D) (None, 20, 20, 32) 8224 \n", + "_________________________________________________________________\n", + "activation_1 (Activation) (None, 20, 20, 32) 0 \n", + "_________________________________________________________________\n", + "conv2d_2 (Conv2D) (None, 9, 9, 64) 32832 \n", + "_________________________________________________________________\n", + "activation_2 (Activation) (None, 9, 9, 64) 0 \n", + "_________________________________________________________________\n", + "conv2d_3 (Conv2D) (None, 7, 7, 64) 36928 \n", + "_________________________________________________________________\n", + "activation_3 (Activation) (None, 7, 7, 64) 0 \n", + "_________________________________________________________________\n", + "flatten_1 (Flatten) (None, 3136) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 512) 1606144 \n", + "_________________________________________________________________\n", + "activation_4 (Activation) (None, 512) 0 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 4) 2052 \n", + "_________________________________________________________________\n", + "activation_5 (Activation) (None, 4) 0 \n", + "=================================================================\n", + "Total params: 1,686,180\n", + "Trainable params: 1,686,180\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (8, 8), strides=(4, 4))`\n", + " \"\"\"Entry point for launching an IPython kernel.\n", + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (4, 4), strides=(2, 2))`\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n", + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:5: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), strides=(1, 1))`\n", + " \"\"\"\n" + ] + } + ], "source": [ "model.add(Convolution2D(32, 8, 8, subsample=(4, 4)))\n", "model.add(Activation('relu'))\n", @@ -444,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -480,52 +567,75 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved weights to %s large_data/breakout_LinAnnPol_GreedyQ.h5f\n", + "1000000\n", + "CPU times: user 6.71 s, sys: 678 ms, total: 7.38 s\n", + "Wall time: 3.46 s\n" + ] + } + ], "source": [ "%%time\n", + "model_path = 'large_data/breakout_LinAnnPol_GreedyQ.h5f'\n", "# Okay, now it's time to learn something! We capture the interrupt exception so that training\n", "# can be prematurely aborted. Notice that you can use the built-in Keras callbacks!\n", "steps_per_run = 1000000\n", "runs += steps_per_run\n", - "dqn.fit(env, nb_steps=steps_per_run, visualize=False, verbose=0)\n", - "print(runs)\n" + "try:\n", + " dqn.fit(env, nb_steps=steps_per_run, visualize=False, verbose=0)\n", + "except KeyboardInterrupt:\n", + " print(\"Model interupted, triggering save.\")\n", + " pass\n", + "finally:\n", + " dqn.save_weights(model_path, overwrite=True)\n", + " print(\"Saved weights to %s\", model_path)\n", + " print(runs)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "dqn.save_weights('large_data/breakout_LinAnnPol_GreedyQ.h5f', overwrite=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2017-10-23 14:29:15,224] Making new env: Breakout-v0\n" + ] + } + ], "source": [ - "dqn.test(env, nb_episodes=1, visualize=True)" + "env.close()\n", + "env = gym.make('Breakout-v0')\n", + "np.random.seed(123)\n", + "env.seed(123)\n", + "nb_actions = env.action_space.n\n", + "dqn.nb_actions = nb_actions" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing for 5 episodes ...\n" + ] + } + ], "source": [ - "env.close()" + "dqn.test(env, nb_episodes=5, nb_max_start_steps=1, nb_max_episode_steps=100)" ] }, { @@ -664,21 +774,21 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.13" + "pygments_lexer": "ipython3", + "version": "3.6.1" } }, "nbformat": 4,