From 6ee274524e51d56b919e2e316a84e545970a4ea2 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Mon, 23 Oct 2017 11:06:22 -0500 Subject: [PATCH 1/3] Python3 works --- RL01. Basic Example .ipynb | 220 ++++++++++++++++++++++++++++--------- 1 file changed, 168 insertions(+), 52 deletions(-) diff --git a/RL01. Basic Example .ipynb b/RL01. Basic Example .ipynb index 8935f22..eb8d07d 100644 --- a/RL01. Basic Example .ipynb +++ b/RL01. Basic Example .ipynb @@ -58,7 +58,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 09:39:14,754] Making new env: CartPole-v0\n" + "[2017-10-23 10:36:41,614] Making new env: CartPole-v0\n" ] } ], @@ -78,14 +78,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 09:39:23,762] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\n" + "[2017-10-23 10:36:43,124] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Successfully Completed 28.0 steps before falling!\n" + "Successfully Completed 14.0 steps before falling!\n" ] } ], @@ -160,8 +160,7 @@ "\n", "from rl.agents.dqn import DQNAgent\n", "from rl.policy import BoltzmannQPolicy\n", - "from rl.memory import SequentialMemory\n", - "\n" + "from rl.memory import SequentialMemory" ] }, { @@ -173,7 +172,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 09:39:38,985] Making new env: CartPole-v0\n" + "[2017-10-23 10:36:47,143] Making new env: CartPole-v0\n" ] } ], @@ -198,7 +197,7 @@ "text": [ "Box(4,)\n", "Discrete(2)\n", - "{'video.frames_per_second': 50, 'render.modes': ['human', 'rgb_array']}\n" + "{'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n" ] } ], @@ -264,9 +263,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and\n", @@ -286,16 +283,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/lucaswood/anaconda3/envs/py27/lib/python2.7/site-packages/rl/memory.py:29: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\n", + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/memory.py:29: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\n", " warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')\n" ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -307,11 +314,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing for 3 episodes ...\n", + "Episode 1: reward: 162.000, steps: 162\n", + "Episode 2: reward: 184.000, steps: 184\n", + "Episode 3: reward: 178.000, steps: 178\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Finally, evaluate our algorithm for 5 episodes.\n", "dqn.test(env, nb_episodes=3, visualize=True)" @@ -329,7 +355,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "collapsed": true }, @@ -358,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "collapsed": true }, @@ -386,11 +412,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2017-10-23 10:37:56,282] Making new env: Breakout-v0\n" + ] + } + ], "source": [ "# Get the environment and extract the number of actions.\n", "env = gym.make('Breakout-v0')\n", @@ -401,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "collapsed": true }, @@ -422,11 +454,60 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "permute_1 (Permute) (None, 84, 84, 4) 0 \n", + "_________________________________________________________________\n", + "conv2d_1 (Conv2D) (None, 20, 20, 32) 8224 \n", + "_________________________________________________________________\n", + "activation_5 (Activation) (None, 20, 20, 32) 0 \n", + "_________________________________________________________________\n", + "conv2d_2 (Conv2D) (None, 9, 9, 64) 32832 \n", + "_________________________________________________________________\n", + "activation_6 (Activation) (None, 9, 9, 64) 0 \n", + "_________________________________________________________________\n", + "conv2d_3 (Conv2D) (None, 7, 7, 64) 36928 \n", + "_________________________________________________________________\n", + "activation_7 (Activation) (None, 7, 7, 64) 0 \n", + "_________________________________________________________________\n", + "flatten_2 (Flatten) (None, 3136) 0 \n", + "_________________________________________________________________\n", + "dense_5 (Dense) (None, 512) 1606144 \n", + "_________________________________________________________________\n", + "activation_8 (Activation) (None, 512) 0 \n", + "_________________________________________________________________\n", + "dense_6 (Dense) (None, 4) 2052 \n", + "_________________________________________________________________\n", + "activation_9 (Activation) (None, 4) 0 \n", + "=================================================================\n", + "Total params: 1,686,180\n", + "Trainable params: 1,686,180\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (8, 8), strides=(4, 4))`\n", + " \"\"\"Entry point for launching an IPython kernel.\n", + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (4, 4), strides=(2, 2))`\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n", + "/Users/lucaswood/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:5: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), strides=(1, 1))`\n", + " \"\"\"\n" + ] + } + ], "source": [ "model.add(Convolution2D(32, 8, 8, subsample=(4, 4)))\n", "model.add(Activation('relu'))\n", @@ -444,7 +525,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "collapsed": true }, @@ -480,27 +561,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2000000\n", + "CPU times: user 2min 19s, sys: 45.4 s, total: 3min 4s\n", + "Wall time: 4min 20s\n" + ] + } + ], "source": [ "%%time\n", "# Okay, now it's time to learn something! We capture the interrupt exception so that training\n", "# can be prematurely aborted. Notice that you can use the built-in Keras callbacks!\n", "steps_per_run = 1000000\n", "runs += steps_per_run\n", - "dqn.fit(env, nb_steps=steps_per_run, visualize=False, verbose=0)\n", + "dqn.fit(env, nb_steps=steps_per_run, visualize=True, verbose=0)\n", "print(runs)\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 21, + "metadata": {}, "outputs": [], "source": [ "dqn.save_weights('large_data/breakout_LinAnnPol_GreedyQ.h5f', overwrite=True)" @@ -508,18 +595,47 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing for 1 episodes ...\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdqn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnb_episodes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvisualize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/core.py\u001b[0m in \u001b[0;36mtest\u001b[0;34m(self, env, nb_episodes, action_repetition, callbacks, visualize, nb_max_episode_steps, nb_max_start_steps, start_step_policy, verbose)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepisode_step\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 307\u001b[0;31m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 308\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocessor\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocessor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_action\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/agents/dqn.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, observation)\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0;31m# Select an action.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[0mstate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmemory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_recent_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 218\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpolicy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect_action\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mq_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mq_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/agents/dqn.py\u001b[0m in \u001b[0;36mcompute_q_values\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_batch_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 70\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnb_actions\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/agents/dqn.py\u001b[0m in \u001b[0;36mcompute_batch_q_values\u001b[0;34m(self, state_batch)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_batch_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_state_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_on_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnb_actions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/keras/models.py\u001b[0m in \u001b[0;36mpredict_on_batch\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 921\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 923\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_on_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 924\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 925\u001b[0m def train_on_batch(self, x, y, class_weight=None,\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict_on_batch\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 1626\u001b[0m \u001b[0mins\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_predict_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1628\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1629\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1630\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 2266\u001b[0m updated = session.run(self.outputs + [self.updates_op],\n\u001b[1;32m 2267\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2268\u001b[0;31m **self.session_kwargs)\n\u001b[0m\u001b[1;32m 2269\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mupdated\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2270\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 777\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 778\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 779\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 780\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 980\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 981\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 982\u001b[0;31m feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m 983\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 984\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1030\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1031\u001b[0m return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1032\u001b[0;31m target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m 1033\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1034\u001b[0m return self._do_call(_prun_fn, self._session, handle, feed_dict,\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1037\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1039\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1040\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1019\u001b[0m return tf_session.TF_Run(session, options,\n\u001b[1;32m 1020\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1021\u001b[0;31m status, run_metadata)\n\u001b[0m\u001b[1;32m 1022\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], "source": [ "dqn.test(env, nb_episodes=1, visualize=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": { "collapsed": true }, @@ -664,21 +780,21 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.13" + "pygments_lexer": "ipython3", + "version": "3.6.1" } }, "nbformat": 4, From 5177b55b4bba673f3aa9ad1c5c7b24da7b052038 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Mon, 23 Oct 2017 14:05:34 -0500 Subject: [PATCH 2/3] udpate firsted cell for python3 --- RL01. Basic Example .ipynb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/RL01. Basic Example .ipynb b/RL01. Basic Example .ipynb index eb8d07d..ce28cbb 100644 --- a/RL01. Basic Example .ipynb +++ b/RL01. Basic Example .ipynb @@ -10,9 +10,7 @@ "## Create conda environment\n", "Note that you must use python 2 for compatibility with keras-rl. Everything else supports python 3. \n", "\n", - "`conda create --name RLEnvPython2 python=2.7 numpy scipy matplotlib jupyter h5py`\n", - "\n", - "`source activate RLEnvPython2`\n", + "`yes | conda install numpy scipy matplotlib jupyter h5py`\n", "\n", "\n", "## Now install the gym: \n", @@ -24,7 +22,7 @@ "\n", "`pip install -e .`\n", "\n", - "`brew install cmake boost boost-python sdl2 swig wget`\n", + "`brew update && brew install cmake boost boost-python sdl2 swig wget`\n", "\n", "`pip install -e '.[all]'`\n", "\n", @@ -263,7 +261,9 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and\n", @@ -587,7 +587,9 @@ { "cell_type": "code", "execution_count": 21, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "dqn.save_weights('large_data/breakout_LinAnnPol_GreedyQ.h5f', overwrite=True)" From ad733261f466eb487da93d25cce21a91df59bce0 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Mon, 23 Oct 2017 14:30:02 -0500 Subject: [PATCH 3/3] Test doesn't work no matter what - we need to look into why test doesn't work but an identical fit does --- RL01. Basic Example .ipynb | 136 +++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 72 deletions(-) diff --git a/RL01. Basic Example .ipynb b/RL01. Basic Example .ipynb index ce28cbb..2e5588e 100644 --- a/RL01. Basic Example .ipynb +++ b/RL01. Basic Example .ipynb @@ -56,7 +56,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 10:36:41,614] Making new env: CartPole-v0\n" + "[2017-10-23 14:12:46,816] Making new env: CartPole-v0\n" ] } ], @@ -76,14 +76,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 10:36:43,124] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\n" + "[2017-10-23 14:12:48,583] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Successfully Completed 14.0 steps before falling!\n" + "Successfully Completed 22.0 steps before falling!\n" ] } ], @@ -170,7 +170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 10:36:47,143] Making new env: CartPole-v0\n" + "[2017-10-23 14:12:54,968] Making new env: CartPole-v0\n" ] } ], @@ -355,11 +355,17 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], "source": [ "from __future__ import division\n", "\n", @@ -384,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -412,14 +418,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[2017-10-23 10:37:56,282] Making new env: Breakout-v0\n" + "[2017-10-23 14:26:42,996] Making new env: Breakout-v0\n" ] } ], @@ -433,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -454,7 +460,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -468,25 +474,25 @@ "_________________________________________________________________\n", "conv2d_1 (Conv2D) (None, 20, 20, 32) 8224 \n", "_________________________________________________________________\n", - "activation_5 (Activation) (None, 20, 20, 32) 0 \n", + "activation_1 (Activation) (None, 20, 20, 32) 0 \n", "_________________________________________________________________\n", "conv2d_2 (Conv2D) (None, 9, 9, 64) 32832 \n", "_________________________________________________________________\n", - "activation_6 (Activation) (None, 9, 9, 64) 0 \n", + "activation_2 (Activation) (None, 9, 9, 64) 0 \n", "_________________________________________________________________\n", "conv2d_3 (Conv2D) (None, 7, 7, 64) 36928 \n", "_________________________________________________________________\n", - "activation_7 (Activation) (None, 7, 7, 64) 0 \n", + "activation_3 (Activation) (None, 7, 7, 64) 0 \n", "_________________________________________________________________\n", - "flatten_2 (Flatten) (None, 3136) 0 \n", + "flatten_1 (Flatten) (None, 3136) 0 \n", "_________________________________________________________________\n", - "dense_5 (Dense) (None, 512) 1606144 \n", + "dense_1 (Dense) (None, 512) 1606144 \n", "_________________________________________________________________\n", - "activation_8 (Activation) (None, 512) 0 \n", + "activation_4 (Activation) (None, 512) 0 \n", "_________________________________________________________________\n", - "dense_6 (Dense) (None, 4) 2052 \n", + "dense_2 (Dense) (None, 4) 2052 \n", "_________________________________________________________________\n", - "activation_9 (Activation) (None, 4) 0 \n", + "activation_5 (Activation) (None, 4) 0 \n", "=================================================================\n", "Total params: 1,686,180\n", "Trainable params: 1,686,180\n", @@ -525,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -561,89 +567,75 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2000000\n", - "CPU times: user 2min 19s, sys: 45.4 s, total: 3min 4s\n", - "Wall time: 4min 20s\n" + "Saved weights to %s large_data/breakout_LinAnnPol_GreedyQ.h5f\n", + "1000000\n", + "CPU times: user 6.71 s, sys: 678 ms, total: 7.38 s\n", + "Wall time: 3.46 s\n" ] } ], "source": [ "%%time\n", + "model_path = 'large_data/breakout_LinAnnPol_GreedyQ.h5f'\n", "# Okay, now it's time to learn something! We capture the interrupt exception so that training\n", "# can be prematurely aborted. Notice that you can use the built-in Keras callbacks!\n", "steps_per_run = 1000000\n", "runs += steps_per_run\n", - "dqn.fit(env, nb_steps=steps_per_run, visualize=True, verbose=0)\n", - "print(runs)\n" + "try:\n", + " dqn.fit(env, nb_steps=steps_per_run, visualize=False, verbose=0)\n", + "except KeyboardInterrupt:\n", + " print(\"Model interupted, triggering save.\")\n", + " pass\n", + "finally:\n", + " dqn.save_weights(model_path, overwrite=True)\n", + " print(\"Saved weights to %s\", model_path)\n", + " print(runs)" ] }, { "cell_type": "code", - "execution_count": 21, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2017-10-23 14:29:15,224] Making new env: Breakout-v0\n" + ] + } + ], "source": [ - "dqn.save_weights('large_data/breakout_LinAnnPol_GreedyQ.h5f', overwrite=True)" + "env.close()\n", + "env = gym.make('Breakout-v0')\n", + "np.random.seed(123)\n", + "env.seed(123)\n", + "nb_actions = env.action_space.n\n", + "dqn.nb_actions = nb_actions" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Testing for 1 episodes ...\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdqn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnb_episodes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvisualize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/core.py\u001b[0m in \u001b[0;36mtest\u001b[0;34m(self, env, nb_episodes, action_repetition, callbacks, visualize, nb_max_episode_steps, nb_max_start_steps, start_step_policy, verbose)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepisode_step\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 307\u001b[0;31m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 308\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocessor\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocessor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_action\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/agents/dqn.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, observation)\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0;31m# Select an action.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[0mstate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmemory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_recent_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 218\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpolicy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect_action\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mq_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mq_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/agents/dqn.py\u001b[0m in \u001b[0;36mcompute_q_values\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_batch_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 70\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnb_actions\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/rl/agents/dqn.py\u001b[0m in \u001b[0;36mcompute_batch_q_values\u001b[0;34m(self, state_batch)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_batch_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_state_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_on_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnb_actions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/keras/models.py\u001b[0m in \u001b[0;36mpredict_on_batch\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 921\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 923\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_on_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 924\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 925\u001b[0m def train_on_batch(self, x, y, class_weight=None,\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict_on_batch\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 1626\u001b[0m \u001b[0mins\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_predict_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1628\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1629\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1630\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 2266\u001b[0m updated = session.run(self.outputs + [self.updates_op],\n\u001b[1;32m 2267\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2268\u001b[0;31m **self.session_kwargs)\n\u001b[0m\u001b[1;32m 2269\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mupdated\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2270\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 777\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 778\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 779\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 780\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 980\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 981\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m--> 982\u001b[0;31m feed_dict_string, options, run_metadata)\n\u001b[0m\u001b[1;32m 983\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 984\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1030\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1031\u001b[0m return self._do_call(_run_fn, self._session, feed_dict, fetch_list,\n\u001b[0;32m-> 1032\u001b[0;31m target_list, options, run_metadata)\n\u001b[0m\u001b[1;32m 1033\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1034\u001b[0m return self._do_call(_prun_fn, self._session, handle, feed_dict,\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1037\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1039\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1040\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/lucaswood/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1019\u001b[0m return tf_session.TF_Run(session, options,\n\u001b[1;32m 1020\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1021\u001b[0;31m status, run_metadata)\n\u001b[0m\u001b[1;32m 1022\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "Testing for 5 episodes ...\n" ] } ], "source": [ - "dqn.test(env, nb_episodes=1, visualize=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "env.close()" + "dqn.test(env, nb_episodes=5, nb_max_start_steps=1, nb_max_episode_steps=100)" ] }, {