Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 104 additions & 88 deletions src/app/endpoints/query_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@

from fastapi import APIRouter, Depends, Request
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseMCPApprovalRequest,
OpenAIResponseMCPApprovalResponse,
OpenAIResponseObject,
OpenAIResponseOutput,
OpenAIResponseOutputMessageFileSearchToolCall,
OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack_client import AsyncLlamaStackClient

Expand Down Expand Up @@ -41,6 +49,7 @@
get_topic_summary_system_prompt,
)
from utils.mcp_headers import mcp_headers_dependency
from utils.query import parse_arguments_string
from utils.responses import extract_text_from_response_output_item
from utils.shields import (
append_turn_to_conversation,
Expand Down Expand Up @@ -73,153 +82,160 @@


def _build_tool_call_summary( # pylint: disable=too-many-return-statements,too-many-branches
output_item: Any,
output_item: OpenAIResponseOutput,
) -> tuple[Optional[ToolCallSummary], Optional[ToolResultSummary]]:
"""Translate applicable Responses API tool outputs into ``ToolCallSummary`` records.
"""Translate Responses API tool outputs into ToolCallSummary and ToolResultSummary records.
The OpenAI ``response.output`` array may contain any ``OpenAIResponseOutput`` variant:
``message``, ``function_call``, ``file_search_call``, ``web_search_call``, ``mcp_call``,
``mcp_list_tools``, or ``mcp_approval_request``. The OpenAI Spec supports more types
but as llamastack does not support them, yet they are not considered here.
Processes OpenAI response output items and extracts tool call and result information.
Args:
output_item: An OpenAIResponseOutput item from the response.output array
Returns:
A tuple of (ToolCallSummary, ToolResultSummary) one of them possibly None
if current llama stack Responses API does not provide the information.
Supported tool types:
- function_call: Function tool calls with parsed arguments (no result)
- file_search_call: File search operations with results
- web_search_call: Web search operations (incomplete)
- mcp_call: MCP calls with server labels
- mcp_list_tools: MCP server tool listings
- mcp_approval_request: MCP approval requests (no result)
- mcp_approval_response: MCP approval responses (no call)
"""
item_type = getattr(output_item, "type", None)

if item_type == "function_call":
parsed_arguments = getattr(output_item, "arguments", "")
if isinstance(parsed_arguments, dict):
args = parsed_arguments
else:
args = {"arguments": parsed_arguments}

call_id = getattr(output_item, "id", None) or getattr(
output_item, "call_id", None
)
item = cast(OpenAIResponseOutputMessageFunctionToolCall, output_item)
return (
ToolCallSummary(
id=str(call_id),
name=getattr(output_item, "name", "function_call"),
args=args,
id=item.call_id,
name=item.name,
args=parse_arguments_string(item.arguments),
type="function_call",
),
None,
None, # not supported by Responses API at all
)

if item_type == "file_search_call":
args = {
"queries": list(getattr(output_item, "queries", [])),
"status": getattr(output_item, "status", None),
}
results = getattr(output_item, "results", None)
response_payload: Optional[Any] = None
if results is not None:
# Store only the essential result metadata to avoid large payloads
item = cast(OpenAIResponseOutputMessageFileSearchToolCall, output_item)
response_payload: Optional[dict[str, Any]] = None
if item.results is not None:
response_payload = {
"results": [
{
"file_id": (
getattr(result, "file_id", None)
if not isinstance(result, dict)
else result.get("file_id")
),
"filename": (
getattr(result, "filename", None)
if not isinstance(result, dict)
else result.get("filename")
),
"score": (
getattr(result, "score", None)
if not isinstance(result, dict)
else result.get("score")
),
}
for result in results
]
"results": [result.model_dump() for result in item.results]
}
Comment on lines +123 to 127
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's read the specific file and understand the context around lines 123-127
cat -n src/app/endpoints/query_v2.py | head -150 | tail -50

Repository: lightspeed-core/lightspeed-stack

Length of output: 208


🏁 Script executed:

# Also check the full file structure to understand what we're working with
wc -l src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 109


🏁 Script executed:

# Let's search for the item.results usage and understand its type
rg "item\.results" src/app/endpoints/query_v2.py -B 3 -A 3

Repository: lightspeed-core/lightspeed-stack

Length of output: 717


🏁 Script executed:

# Search for model_dump usage in this file to understand patterns
rg "model_dump" src/app/endpoints/query_v2.py -B 2 -A 2

Repository: lightspeed-core/lightspeed-stack

Length of output: 267


🏁 Script executed:

# Look for any dict handling with results in this file
rg "results.*dict|dict.*results" src/app/endpoints/query_v2.py -B 2 -A 2

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Check type annotations and imports to understand data types
head -50 src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 1680


🏁 Script executed:

# Let's get more context around the code and understand the full function
sed -n '100,150p' src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 2045


🏁 Script executed:

# Search for more result processing patterns to find where dicts are allegedly handled
rg "results" src/app/endpoints/query_v2.py -B 2 -A 2 | head -100

Repository: lightspeed-core/lightspeed-stack

Length of output: 2144


🏁 Script executed:

# Check if there's any handling that creates dicts from results
rg '"results"' src/app/endpoints/query_v2.py -B 3 -A 3

Repository: lightspeed-core/lightspeed-stack

Length of output: 862


🏁 Script executed:

# Let's look for the type definition - check if there are any type stubs or if we can infer from usage
rg "OpenAIResponseOutputMessageFileSearchToolCall" . -B 2 -A 2

Repository: lightspeed-core/lightspeed-stack

Length of output: 806


🏁 Script executed:

# Check if there's a models or type definition file in the project
find . -name "*.py" -type f | grep -E "(models|types)" | head -20

Repository: lightspeed-core/lightspeed-stack

Length of output: 757


🏁 Script executed:

# Let's see the full context of where dicts are handled
sed -n '550,650p' src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 4838


🏁 Script executed:

# Get the full section of the dict handling code
rg -A 10 "Handle both object and dict access" src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 1262


🏁 Script executed:

# Let's understand when results might be dicts vs objects
sed -n '540,580p' src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 2027


🏁 Script executed:

# Check the extraction_text_from_response_output_item function to understand the pattern better
ast-grep --pattern 'def extract_text_from_response_output_item($_) {
  $$$
}'

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


Handle dict results to avoid AttributeError.
result.model_dump() will fail if item.results contains dicts. Other parts of this same file already handle this inconsistency (e.g., lines ~540-580 use isinstance(result, dict) checks for the same results field), and the token usage function documents that llama_stack returns both dicts and objects due to internal type inconsistencies. Use a robust check to handle both cases:

🐛 Robust result handling
-            response_payload = {
-                "results": [result.model_dump() for result in item.results]
-            }
+            response_payload = {
+                "results": [
+                    result if isinstance(result, dict) else result.model_dump()
+                    for result in item.results
+                ]
+            }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
response_payload: Optional[dict[str, Any]] = None
if item.results is not None:
response_payload = {
"results": [
{
"file_id": (
getattr(result, "file_id", None)
if not isinstance(result, dict)
else result.get("file_id")
),
"filename": (
getattr(result, "filename", None)
if not isinstance(result, dict)
else result.get("filename")
),
"score": (
getattr(result, "score", None)
if not isinstance(result, dict)
else result.get("score")
),
}
for result in results
]
"results": [result.model_dump() for result in item.results]
}
response_payload: Optional[dict[str, Any]] = None
if item.results is not None:
response_payload = {
"results": [
result if isinstance(result, dict) else result.model_dump()
for result in item.results
]
}
🤖 Prompt for AI Agents
In `@src/app/endpoints/query_v2.py` around lines 123 - 127, The current
construction of response_payload assumes every entry in item.results has
model_dump(), causing AttributeError for dict entries; update the logic that
builds response_payload (where response_payload is set from item.results) to
iterate results and for each result use it directly if isinstance(result, dict)
otherwise call result.model_dump(); ensure this change is applied to the block
that creates the {"results": [...] } payload so both dicts and model objects are
handled safely.

return ToolCallSummary(
id=str(getattr(output_item, "id")),
id=item.id,
name=DEFAULT_RAG_TOOL,
args=args,
args={"queries": item.queries},
type="file_search_call",
), ToolResultSummary(
id=str(getattr(output_item, "id")),
status=str(getattr(output_item, "status", None)),
content=json.dumps(response_payload) if response_payload else None,
id=item.id,
status=item.status,
content=json.dumps(response_payload) if response_payload else "",
type="file_search_call",
round=1,
)

# Incomplete OpenAI Responses API definition in LLS: action attribute not supported yet
if item_type == "web_search_call":
args = {"status": getattr(output_item, "status", None)}
item = cast(OpenAIResponseOutputMessageWebSearchToolCall, output_item)
return (
ToolCallSummary(
id=str(getattr(output_item, "id")),
id=item.id,
name="web_search",
args=args,
args={},
type="web_search_call",
),
None,
ToolResultSummary(
id=item.id,
status=item.status,
content="",
type="web_search_call",
round=1,
),
)

if item_type == "mcp_call":
parsed_arguments = getattr(output_item, "arguments", "")
args = {"arguments": parsed_arguments}
server_label = getattr(output_item, "server_label", None)
if server_label:
args["server_label"] = server_label
error = getattr(output_item, "error", None)
if error:
args["error"] = error
item = cast(OpenAIResponseOutputMessageMCPCall, output_item)
args = parse_arguments_string(item.arguments)
if item.server_label:
args["server_label"] = item.server_label
content = item.error if item.error else (item.output if item.output else "")

return ToolCallSummary(
id=str(getattr(output_item, "id")),
name=getattr(output_item, "name", "mcp_call"),
id=item.id,
name=item.name,
args=args,
type="mcp_call",
), ToolResultSummary(
id=str(getattr(output_item, "id")),
status=str(getattr(output_item, "status", None)),
content=getattr(output_item, "output", ""),
id=item.id,
status="success" if item.error is None else "failure",
content=content,
type="mcp_call",
Comment on lines +165 to 176
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# Find the ToolResultSummary definition
rg "class ToolResultSummary" -A 15

Repository: lightspeed-core/lightspeed-stack

Length of output: 1059


🏁 Script executed:

# Check the context around line 165-176 in query_v2.py
cat -n src/app/endpoints/query_v2.py | sed -n '155,185p'

Repository: lightspeed-core/lightspeed-stack

Length of output: 1287


🏁 Script executed:

# Find what item's type is
rg "def.*item.*:" src/app/endpoints/query_v2.py -B 5 -A 1 | head -40

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Check if json is already imported
grep -n "^import json\|^from json" src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 89


🏁 Script executed:

# Search for how ToolResultSummary is used downstream
rg "ToolResultSummary" src/app/endpoints/query_v2.py -B 2 -A 2

Repository: lightspeed-core/lightspeed-stack

Length of output: 1964


🏁 Script executed:

# Find OpenAIResponseOutputMessageMCPCall definition to see item.error and item.output types
rg "class OpenAIResponseOutputMessageMCPCall" -A 20

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Also check if there are type hints for error and output fields
rg "OpenAIResponseOutputMessageMCPCall\|MCPCall" --type py -A 15 | head -60

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Check the actual Llama Stack client types
fd "llama_stack" --type f -name "*.py" | xargs rg "MCPCall" -A 5 2>/dev/null | head -50

Repository: lightspeed-core/lightspeed-stack

Length of output: 1169


🏁 Script executed:

# Let's search for imports related to OpenAI response types
rg "from.*import.*OpenAIResponseOutput" -A 2

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


🏁 Script executed:

# Check the imports at the top of query_v2.py to see where types come from
head -50 src/app/endpoints/query_v2.py

Repository: lightspeed-core/lightspeed-stack

Length of output: 1680


🏁 Script executed:

# Search for Llama Stack client imports to understand the types
rg "from llama_stack" src/app/endpoints/query_v2.py -A 10

Repository: lightspeed-core/lightspeed-stack

Length of output: 861


🏁 Script executed:

# Check if there's any documentation or usage that shows item.output/item.error types
rg "item\.output|item\.error" src/app/endpoints/query_v2.py -B 5 -A 2

Repository: lightspeed-core/lightspeed-stack

Length of output: 693


🏁 Script executed:

# Let's check what ToolResultSummary.content is expecting and how it's used
rg "ToolResultSummary" src/ -A 3 -B 1 | grep -A 10 "content"

Repository: lightspeed-core/lightspeed-stack

Length of output: 2595


🏁 Script executed:

# Check downstream usage - see if content is serialized to JSON later
rg "tool_results\|ToolResultSummary" src/app/endpoints/query_v2.py -A 3 | tail -30

Repository: lightspeed-core/lightspeed-stack

Length of output: 58


Serialize MCP output to string to match ToolResultSummary.content field type.
The ToolResultSummary.content field is typed as str. If item.error or item.output are dicts or lists from the Llama Stack API, they will be passed directly without serialization, causing type mismatches. This is inconsistent with other tool result types in the same function (web search, MCP list tools, and approval requests all use json.dumps() for structured content).

🧾 Serialize non-string outputs
-        content = item.error if item.error else (item.output if item.output else "")
+        raw_content = item.error if item.error is not None else item.output
+        if raw_content is None:
+            content = ""
+        elif isinstance(raw_content, str):
+            content = raw_content
+        else:
+            content = json.dumps(raw_content)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
content = item.error if item.error else (item.output if item.output else "")
return ToolCallSummary(
id=str(getattr(output_item, "id")),
name=getattr(output_item, "name", "mcp_call"),
id=item.id,
name=item.name,
args=args,
type="mcp_call",
), ToolResultSummary(
id=str(getattr(output_item, "id")),
status=str(getattr(output_item, "status", None)),
content=getattr(output_item, "output", ""),
id=item.id,
status="success" if item.error is None else "failure",
content=content,
type="mcp_call",
raw_content = item.error if item.error is not None else item.output
if raw_content is None:
content = ""
elif isinstance(raw_content, str):
content = raw_content
else:
content = json.dumps(raw_content)
return ToolCallSummary(
id=item.id,
name=item.name,
args=args,
type="mcp_call",
), ToolResultSummary(
id=item.id,
status="success" if item.error is None else "failure",
content=content,
type="mcp_call",
🤖 Prompt for AI Agents
In `@src/app/endpoints/query_v2.py` around lines 165 - 176, The
ToolResultSummary.content currently assigns item.error or item.output directly
(via the local variable content), which can be dict/list and violates the str
type; update the code that builds content (used when constructing
ToolResultSummary in this block) to serialize non-string values with json.dumps
(e.g., if not isinstance(content, str): content = json.dumps(content)) before
passing to ToolResultSummary so item.error/item.output are always strings;
ensure to import json if not already and preserve the existing
"success"/"failure" logic and the surrounding ToolCallSummary/ToolResultSummary
construction.

round=1,
)

if item_type == "mcp_list_tools":
tool_names: list[str] = []
for tool in getattr(output_item, "tools", []):
if hasattr(tool, "name"):
tool_names.append(str(getattr(tool, "name")))
elif isinstance(tool, dict) and tool.get("name"):
tool_names.append(str(tool.get("name")))
args = {
"server_label": getattr(output_item, "server_label", None),
"tools": tool_names,
item = cast(OpenAIResponseOutputMessageMCPListTools, output_item)
tools_info = [
{
"name": tool.name,
"description": tool.description,
"input_schema": tool.input_schema,
}
for tool in item.tools
]
content_dict = {
"server_label": item.server_label,
"tools": tools_info,
}
return (
ToolCallSummary(
id=str(getattr(output_item, "id")),
id=item.id,
name="mcp_list_tools",
args=args,
args={"server_label": item.server_label},
type="mcp_list_tools",
),
None,
ToolResultSummary(
id=item.id,
status="success",
content=json.dumps(content_dict),
type="mcp_list_tools",
round=1,
),
)

if item_type == "mcp_approval_request":
parsed_arguments = getattr(output_item, "arguments", "")
args = {"arguments": parsed_arguments}
server_label = getattr(output_item, "server_label", None)
if server_label:
args["server_label"] = server_label
item = cast(OpenAIResponseMCPApprovalRequest, output_item)
args = parse_arguments_string(item.arguments)
return (
ToolCallSummary(
id=str(getattr(output_item, "id")),
name=getattr(output_item, "name", "mcp_approval_request"),
id=item.id,
name=item.name,
args=args,
type="tool_call",
),
None,
)

if item_type == "mcp_approval_response":
item = cast(OpenAIResponseMCPApprovalResponse, output_item)
content_dict = {}
if item.reason:
content_dict["reason"] = item.reason
return (
None,
ToolResultSummary(
id=item.approval_request_id,
status="success" if item.approve else "denied",
content=json.dumps(content_dict),
type="mcp_approval_response",
round=1,
),
)

return None, None


Expand Down
3 changes: 1 addition & 2 deletions src/app/endpoints/streaming_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import (
Annotated,
Any,
AsyncGenerator,
AsyncIterator,
Iterator,
Optional,
Expand Down Expand Up @@ -369,7 +368,7 @@ def generic_llm_error(error: Exception, media_type: str) -> str:
)


async def stream_http_error(error: AbstractErrorResponse) -> AsyncGenerator[str, None]:
def stream_http_error(error: AbstractErrorResponse) -> Iterator[str]:
"""
Yield an SSE-formatted error response for generic LLM or API errors.

Expand Down
Loading
Loading