vllm-project · hmellor · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/docs/features/reasoning_outputs.md b/docs/features/reasoning_outputs.md
@@ -2,7 +2,10 @@
 
 vLLM offers support for reasoning models like [DeepSeek R1](https://huggingface.co/deepseek-ai/DeepSeek-R1), which are designed to generate outputs containing both reasoning steps and final conclusions.
 
-Reasoning models return an additional `reasoning_content` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models.
+Reasoning models return an additional `reasoning` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models.
+
+!!! warning
+    `reasoning` used to be called `reasoning_content`. For now, `reasoning_content` will continue to work. However, we encourage you to migrate to `reasoning` in case `reasoning_content` is removed in future.
 
 ## Supported Models
 
@@ -61,18 +64,18 @@ Next, make a request to the model that should return the reasoning content in th
     # extra_body={"chat_template_kwargs": {"enable_thinking": False}}
     response = client.chat.completions.create(model=model, messages=messages)
 
-    reasoning_content = response.choices[0].message.reasoning_content
+    reasoning = response.choices[0].message.reasoning
     content = response.choices[0].message.content
 
-    print("reasoning_content:", reasoning_content)
+    print("reasoning:", reasoning)
     print("content:", content)
     ```
 
-The `reasoning_content` field contains the reasoning steps that led to the final conclusion, while the `content` field contains the final conclusion.
+The `reasoning` field contains the reasoning steps that led to the final conclusion, while the `content` field contains the final conclusion.
 
 ## Streaming chat completions
 
-Streaming chat completions are also supported for reasoning models. The `reasoning_content` field is available in the `delta` field in [chat completion response chunks](https://platform.openai.com/docs/api-reference/chat/streaming).
+Streaming chat completions are also supported for reasoning models. The `reasoning` field is available in the `delta` field in [chat completion response chunks](https://platform.openai.com/docs/api-reference/chat/streaming).
 
 ??? console "Json"
 
@@ -88,7 +91,7 @@ Streaming chat completions are also supported for reasoning models. The `reasoni
                 "index": 0,
                 "delta": {
                     "role": "assistant",
-                    "reasoning_content": "is",
+                    "reasoning": "is",
                 },
                 "logprobs": null,
                 "finish_reason": null
@@ -97,7 +100,7 @@ Streaming chat completions are also supported for reasoning models. The `reasoni
     }
     ```
 
-OpenAI Python client library does not officially support `reasoning_content` attribute for streaming output. But the client supports extra attributes in the response. You can use `hasattr` to check if the `reasoning_content` attribute is present in the response. For example:
+OpenAI Python client library does not officially support `reasoning` attribute for streaming output. But the client supports extra attributes in the response. You can use `hasattr` to check if the `reasoning` attribute is present in the response. For example:
 
 ??? code
 
@@ -127,22 +130,22 @@ OpenAI Python client library does not officially support `reasoning_content` att
     )
 
     print("client: Start streaming chat completions...")
-    printed_reasoning_content = False
+    printed_reasoning = False
     printed_content = False
 
     for chunk in stream:
-        # Safely extract reasoning_content and content from delta,
+        # Safely extract reasoning and content from delta,
         # defaulting to None if attributes don't exist or are empty strings
-        reasoning_content = (
-            getattr(chunk.choices[0].delta, "reasoning_content", None) or None
+        reasoning = (
+            getattr(chunk.choices[0].delta, "reasoning", None) or None
         )
         content = getattr(chunk.choices[0].delta, "content", None) or None
 
-        if reasoning_content is not None:
-            if not printed_reasoning_content:
-                printed_reasoning_content = True
-                print("reasoning_content:", end="", flush=True)
-            print(reasoning_content, end="", flush=True)
+        if reasoning is not None:
+            if not printed_reasoning:
+                printed_reasoning = True
+                print("reasoning:", end="", flush=True)
+            print(reasoning, end="", flush=True)
         elif content is not None:
             if not printed_content:
                 printed_content = True
@@ -151,11 +154,11 @@ OpenAI Python client library does not officially support `reasoning_content` att
             print(content, end="", flush=True)
     ```
 
-Remember to check whether the `reasoning_content` exists in the response before accessing it. You could check out the [example](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py).
+Remember to check whether the `reasoning` exists in the response before accessing it. You could check out the [example](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py).
 
 ## Tool Calling
 
-The reasoning content is also available when both tool calling and the reasoning parser are enabled. Additionally, tool calling only parses functions from the `content` field, not from the `reasoning_content`.
+The reasoning content is also available when both tool calling and the reasoning parser are enabled. Additionally, tool calling only parses functions from the `content` field, not from the `reasoning`.
 
 ??? code
 
@@ -192,7 +195,7 @@ The reasoning content is also available when both tool calling and the reasoning
     print(response)
     tool_call = response.choices[0].message.tool_calls[0].function
 
-    print(f"reasoning_content: {response.choices[0].message.reasoning_content}")
+    print(f"reasoning: {response.choices[0].message.reasoning}")
     print(f"Function called: {tool_call.name}")
     print(f"Arguments: {tool_call.arguments}")
     ```
@@ -224,7 +227,7 @@ You can add a new `ReasoningParser` similar to [vllm/reasoning/deepseek_r1_reaso
         def __init__(self, tokenizer: AnyTokenizer):
             super().__init__(tokenizer)
 
-        def extract_reasoning_content_streaming(
+        def extract_reasoning_streaming(
             self,
             previous_text: str,
             current_text: str,
@@ -241,7 +244,7 @@ You can add a new `ReasoningParser` similar to [vllm/reasoning/deepseek_r1_reaso
             previously been parsed and extracted (see constructor)
             """
 
-        def extract_reasoning_content(
+        def extract_reasoning(
             self,
             model_output: str,
             request: ChatCompletionRequest | ResponsesRequest,

diff --git a/docs/features/structured_outputs.md b/docs/features/structured_outputs.md
@@ -204,7 +204,7 @@ Note that you can use reasoning with any provided structured outputs feature. Th
             }
         },
     )
-    print("reasoning_content: ", completion.choices[0].message.reasoning_content)
+    print("reasoning: ", completion.choices[0].message.reasoning)
     print("content: ", completion.choices[0].message.content)
     ```
 

diff --git a/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py b/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
 An example demonstrates how to use tool calling with reasoning models 
-like QwQ-32B. The reasoning_content will not be parsed by the tool 
+like QwQ-32B. The reasoning will not be parsed by the tool 
 calling process; only the final output will be parsed.
 
 To run this example, you need to start the vLLM server with both 
@@ -78,7 +78,7 @@ def get_current_weather(city: str, state: str, unit: "str"):
 
 
 def extract_reasoning_and_calls(chunks: list):
-    reasoning_content = ""
+    reasoning = ""
     tool_call_idx = -1
     arguments = []
     function_names = []
@@ -97,9 +97,9 @@ def extract_reasoning_and_calls(chunks: list):
                 if tool_call.function.arguments:
                     arguments[tool_call_idx] += tool_call.function.arguments
         else:
-            if hasattr(chunk.choices[0].delta, "reasoning_content"):
-                reasoning_content += chunk.choices[0].delta.reasoning_content
-    return reasoning_content, arguments, function_names
+            if hasattr(chunk.choices[0].delta, "reasoning"):
+                reasoning += chunk.choices[0].delta.reasoning
+    return reasoning, arguments, function_names
 
 
 def main():
@@ -115,7 +115,7 @@ def main():
     tool_calls = client.chat.completions.create(
         messages=messages, model=model, tools=tools
     )
-    print(f"reasoning_content: {tool_calls.choices[0].message.reasoning_content}")
+    print(f"reasoning: {tool_calls.choices[0].message.reasoning}")
     print(f"function name: {tool_calls.choices[0].message.tool_calls[0].function.name}")
     print(
         f"function arguments: "
@@ -129,9 +129,9 @@ def main():
 
     chunks = list(tool_calls_stream)
 
-    reasoning_content, arguments, function_names = extract_reasoning_and_calls(chunks)
+    reasoning, arguments, function_names = extract_reasoning_and_calls(chunks)
 
-    print(f"reasoning_content: {reasoning_content}")
+    print(f"reasoning: {reasoning}")
     print(f"function name: {function_names[0]}")
     print(f"function arguments: {arguments[0]}")
 
@@ -144,7 +144,7 @@ def main():
     )
 
     tool_call = tool_calls.choices[0].message.tool_calls[0].function
-    print(f"reasoning_content: {tool_calls.choices[0].message.reasoning_content}")
+    print(f"reasoning: {tool_calls.choices[0].message.reasoning}")
     print(f"function name: {tool_call.name}")
     print(f"function arguments: {tool_call.arguments}")
     print("----------Stream Generate With Named Function Calling--------------")
@@ -159,8 +159,8 @@ def main():
 
     chunks = list(tool_calls_stream)
 
-    reasoning_content, arguments, function_names = extract_reasoning_and_calls(chunks)
-    print(f"reasoning_content: {reasoning_content}")
+    reasoning, arguments, function_names = extract_reasoning_and_calls(chunks)
+    print(f"reasoning: {reasoning}")
     print(f"function name: {function_names[0]}")
     print(f"function arguments: {arguments[0]}")
     print("\n\n")

diff --git a/examples/online_serving/openai_chat_completion_with_reasoning.py b/examples/online_serving/openai_chat_completion_with_reasoning.py
@@ -38,10 +38,10 @@ def main():
     # For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
     response = client.chat.completions.create(model=model, messages=messages)
 
-    reasoning_content = response.choices[0].message.reasoning_content
+    reasoning = response.choices[0].message.reasoning
     content = response.choices[0].message.content
 
-    print("reasoning_content for Round 1:", reasoning_content)
+    print("reasoning for Round 1:", reasoning)
     print("content for Round 1:", content)
 
     # Round 2
@@ -54,10 +54,10 @@ def main():
     )
     response = client.chat.completions.create(model=model, messages=messages)
 
-    reasoning_content = response.choices[0].message.reasoning_content
+    reasoning = response.choices[0].message.reasoning
     content = response.choices[0].message.content
 
-    print("reasoning_content for Round 2:", reasoning_content)
+    print("reasoning for Round 2:", reasoning)
     print("content for Round 2:", content)
 
 

diff --git a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py b/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
@@ -20,7 +20,7 @@
 where you want to display chat completions to the user as they are generated
 by the model.
 
-Remember to check content and reasoning_content exist in `ChatCompletionChunk`,
+Remember to check content and reasoning exist in `ChatCompletionChunk`,
 content may not exist leading to errors if you try to access it.
 """
 
@@ -47,22 +47,20 @@ def main():
     stream = client.chat.completions.create(model=model, messages=messages, stream=True)
 
     print("client: Start streaming chat completions...")
-    printed_reasoning_content = False
+    printed_reasoning = False
     printed_content = False
 
     for chunk in stream:
-        # Safely extract reasoning_content and content from delta,
+        # Safely extract reasoning and content from delta,
         # defaulting to None if attributes don't exist or are empty strings
-        reasoning_content = (
-            getattr(chunk.choices[0].delta, "reasoning_content", None) or None
-        )
+        reasoning = getattr(chunk.choices[0].delta, "reasoning", None) or None
         content = getattr(chunk.choices[0].delta, "content", None) or None
 
-        if reasoning_content is not None:
-            if not printed_reasoning_content:
-                printed_reasoning_content = True
-                print("reasoning_content:", end="", flush=True)
-            print(reasoning_content, end="", flush=True)
+        if reasoning is not None:
+            if not printed_reasoning:
+                printed_reasoning = True
+                print("reasoning:", end="", flush=True)
+            print(reasoning, end="", flush=True)
         elif content is not None:
             if not printed_content:
                 printed_content = True

diff --git a/examples/online_serving/streamlit_openai_chatbot_webserver.py b/examples/online_serving/streamlit_openai_chatbot_webserver.py
@@ -159,8 +159,8 @@ def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None
         for chunk in response:
             delta = chunk.choices[0].delta
             # Stream reasoning first
-            if reason and hasattr(delta, "reasoning_content") and live_think:
-                rc = delta.reasoning_content
+            if reason and hasattr(delta, "reasoning") and live_think:
+                rc = delta.reasoning
                 if rc:
                     think_text += rc
                     live_think.markdown(think_text + "▌")
@@ -262,8 +262,8 @@ def server_supports_reasoning():
         messages=[{"role": "user", "content": "Hi"}],
         stream=False,
     )
-    return hasattr(resp.choices[0].message, "reasoning_content") and bool(
-        resp.choices[0].message.reasoning_content
+    return hasattr(resp.choices[0].message, "reasoning") and bool(
+        resp.choices[0].message.reasoning
     )
 
 

diff --git a/examples/online_serving/structured_outputs/structured_outputs.py b/examples/online_serving/structured_outputs/structured_outputs.py
@@ -33,7 +33,7 @@ async def print_stream_response(
     async for chunk in stream_response:
         delta = chunk.choices[0].delta
 
-        reasoning_chunk_text: str | None = getattr(delta, "reasoning_content", None)
+        reasoning_chunk_text: str | None = getattr(delta, "reasoning", None)
         content_chunk_text = delta.content
 
         if args.reasoning:
@@ -255,8 +255,8 @@ async def cli():
         for constraint, response in zip(constraints, results):
             print(f"\n\n{constraint}:")
             message = response.choices[0].message
-            if args.reasoning and hasattr(message, "reasoning_content"):
-                print(f"  Reasoning: {message.reasoning_content or ''}")
+            if args.reasoning and hasattr(message, "reasoning"):
+                print(f"  Reasoning: {message.reasoning or ''}")
             print(f"  Content: {message.content!r}")
 
 

@@ -80,7 +80,7 @@ async def client(server):
 
 
 def extract_reasoning_and_calls(chunks: list):
-    reasoning_content = ""
+    reasoning = ""
     tool_call_idx = -1
     arguments = []
     function_names = []
@@ -99,9 +99,9 @@ def extract_reasoning_and_calls(chunks: list):
                 if tool_call.function.arguments:
                     arguments[tool_call_idx] += tool_call.function.arguments
         else:
-            if hasattr(chunk.choices[0].delta, "reasoning_content"):
-                reasoning_content += chunk.choices[0].delta.reasoning_content
-    return reasoning_content, arguments, function_names
+            if hasattr(chunk.choices[0].delta, "reasoning"):
+                reasoning += chunk.choices[0].delta.reasoning
+    return reasoning, arguments, function_names
 
 
 # test streaming
@@ -119,8 +119,8 @@ async def test_chat_streaming_of_tool_and_reasoning(client: openai.AsyncOpenAI):
     async for chunk in stream:
         chunks.append(chunk)
 
-    reasoning_content, arguments, function_names = extract_reasoning_and_calls(chunks)
-    assert len(reasoning_content) > 0
+    reasoning, arguments, function_names = extract_reasoning_and_calls(chunks)
+    assert len(reasoning) > 0
     assert len(function_names) > 0 and function_names[0] == FUNC_NAME
     assert len(arguments) > 0 and arguments[0] == FUNC_ARGS
 
@@ -136,6 +136,6 @@ async def test_chat_full_of_tool_and_reasoning(client: openai.AsyncOpenAI):
         stream=False,
     )
 
-    assert len(tool_calls.choices[0].message.reasoning_content) > 0
+    assert len(tool_calls.choices[0].message.reasoning) > 0
     assert tool_calls.choices[0].message.tool_calls[0].function.name == FUNC_NAME
     assert tool_calls.choices[0].message.tool_calls[0].function.arguments == FUNC_ARGS
@@ -180,8 +180,8 @@ async def test_function_tool_use(
             extra_body={"chat_template_kwargs": {"enable_thinking": enable_thinking}},
         )
         if enable_thinking:
-            assert chat_completion.choices[0].message.reasoning_content is not None
-            assert chat_completion.choices[0].message.reasoning_content != ""
+            assert chat_completion.choices[0].message.reasoning is not None
+            assert chat_completion.choices[0].message.reasoning != ""
         assert chat_completion.choices[0].message.tool_calls is not None
         assert len(chat_completion.choices[0].message.tool_calls) > 0
     else:
@@ -200,9 +200,9 @@ async def test_function_tool_use(
         async for chunk in output_stream:
             if chunk.choices:
                 if enable_thinking and getattr(
-                    chunk.choices[0].delta, "reasoning_content", None
+                    chunk.choices[0].delta, "reasoning", None
                 ):
-                    reasoning.append(chunk.choices[0].delta.reasoning_content)
+                    reasoning.append(chunk.choices[0].delta.reasoning)
                 if chunk.choices[0].delta.tool_calls:
                     output.extend(chunk.choices[0].delta.tool_calls)
 

@@ -232,9 +232,9 @@ def test_reasoning_parser():
             assert isinstance(line_dict, dict)
             assert line_dict["error"] is None
 
-            # Check that reasoning_content is present and not empty
-            reasoning_content = line_dict["response"]["body"]["choices"][0]["message"][
-                "reasoning_content"
+            # Check that reasoning is present and not empty
+            reasoning = line_dict["response"]["body"]["choices"][0]["message"][
+                "reasoning"
             ]
-            assert reasoning_content is not None
-            assert len(reasoning_content) > 0
+            assert reasoning is not None
+            assert len(reasoning) > 0
-Original file line number
+Diff line change
@@ Expand Up @@
                 }
             },
         )
-        print("reasoning_content: ", completion.choices[0].message.reasoning_content)
+        print("reasoning: ", completion.choices[0].message.reasoning)
         print("content: ", completion.choices[0].message.content)
         ```
@@ Expand Down @@