pgadmin-org
diff --git a/‎docs/en_US/release_notes_9_14.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/en_US/release_notes_9_14.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎web/pgadmin/llm/chat.py‎
Lines changed: 115 additions & 2 deletions b/‎web/pgadmin/llm/chat.py‎
Lines changed: 115 additions & 2 deletions
diff --git a/‎web/pgadmin/llm/client.py‎
Lines changed: 44 additions & 1 deletion b/‎web/pgadmin/llm/client.py‎
Lines changed: 44 additions & 1 deletion
diff --git a/‎web/pgadmin/llm/prompts/nlq.py‎
Lines changed: 6 additions & 9 deletions b/‎web/pgadmin/llm/prompts/nlq.py‎
Lines changed: 6 additions & 9 deletions
@@ -41,5 +41,6 @@ Bug fixes
   | `Issue #9721 <https://github.com/pgadmin-org/pgadmin4/issues/9721>`_ -  Fixed an issue where permissions page is not completely accessible on full scroll.
   | `Issue #9729 <https://github.com/pgadmin-org/pgadmin4/issues/9729>`_ -  Fixed an issue where some LLM models would not use database tools in the AI assistant, instead returning text descriptions of tool calls.
   | `Issue #9732 <https://github.com/pgadmin-org/pgadmin4/issues/9732>`_ -  Improve the AI Assistant user prompt to be more descriptive of the actual functionality.
+  | `Issue #9734 <https://github.com/pgadmin-org/pgadmin4/issues/9734>`_ -  Fixed an issue where LLM responses are not streamed or rendered properly in the AI Assistant.
   | `Issue #9736 <https://github.com/pgadmin-org/pgadmin4/issues/9736>`_ -  Fix an issue where the AI Assistant was not retaining conversation context between messages, with chat history compaction to manage token budgets.
   | `Issue #9740 <https://github.com/pgadmin-org/pgadmin4/issues/9740>`_ -  Fixed an issue where the AI Assistant input textbox sometimes swallows the first character of input.
@@ -14,10 +14,11 @@
 """
 
 import json
-from typing import Optional
+from collections.abc import Generator
+from typing import Optional, Union
 
 from pgadmin.llm.client import get_llm_client, is_llm_available
-from pgadmin.llm.models import Message, StopReason
+from pgadmin.llm.models import Message, LLMResponse, StopReason
 from pgadmin.llm.tools import DATABASE_TOOLS, execute_tool, DatabaseToolError
 from pgadmin.llm.utils import get_max_tool_iterations
 
@@ -153,6 +154,118 @@ def chat_with_database(
     )
 
 
+def chat_with_database_stream(
+    user_message: str,
+    sid: int,
+    did: int,
+    conversation_history: Optional[list[Message]] = None,
+    system_prompt: Optional[str] = None,
+    max_tool_iterations: Optional[int] = None,
+    provider: Optional[str] = None,
+    model: Optional[str] = None
+) -> Generator[Union[str, tuple], None, None]:
+    """
+    Stream an LLM chat conversation with database tool access.
+
+    Like chat_with_database, but yields text chunks as the final
+    response streams in. During tool-use iterations, no text is
+    yielded (tools are executed silently).
+
+    Yields:
+        str: Text content chunks from the final LLM response.
+
+    The last item yielded is a 3-tuple of
+    ('complete', final_response_text, updated_conversation_history).
+
+    Raises:
+        LLMClientError: If the LLM request fails.
+        RuntimeError: If LLM is not available or max iterations exceeded.
+    """
+    if not is_llm_available():
+        raise RuntimeError("LLM is not configured. Please configure an LLM "
+                           "provider in Preferences > AI.")
+
+    client = get_llm_client(provider=provider, model=model)
+    if not client:
+        raise RuntimeError("Failed to create LLM client")
+
+    messages = list(conversation_history) if conversation_history else []
+    messages.append(Message.user(user_message))
+
+    if system_prompt is None:
+        system_prompt = DEFAULT_SYSTEM_PROMPT
+
+    if max_tool_iterations is None:
+        max_tool_iterations = get_max_tool_iterations()
+
+    iteration = 0
+    while iteration < max_tool_iterations:
+        iteration += 1
+
+        # Stream the LLM response, yielding text chunks as they arrive
+        response = None
+        for item in client.chat_stream(
+            messages=messages,
+            tools=DATABASE_TOOLS,
+            system_prompt=system_prompt
+        ):
+            if isinstance(item, LLMResponse):
+                response = item
+            elif isinstance(item, str):
+                yield item
+
+        if response is None:
+            raise RuntimeError("No response received from LLM")
+
+        messages.append(response.to_message())
+
+        if response.stop_reason != StopReason.TOOL_USE:
+            # Final response - yield a 3-tuple to distinguish from
+            # the 2-tuple tool_use event
+            yield ('complete', response.content, messages)
+            return
+
+        # Signal that tools are being executed so the caller can
+        # reset streaming state and show a thinking indicator
+        yield ('tool_use', [tc.name for tc in response.tool_calls])
+
+        # Execute tool calls
+        tool_results = []
+        for tool_call in response.tool_calls:
+            try:
+                result = execute_tool(
+                    tool_name=tool_call.name,
+                    arguments=tool_call.arguments,
+                    sid=sid,
+                    did=did
+                )
+                tool_results.append(Message.tool_result(
+                    tool_call_id=tool_call.id,
+                    content=json.dumps(result, default=str),
+                    is_error=False
+                ))
+            except (DatabaseToolError, ValueError) as e:
+                tool_results.append(Message.tool_result(
+                    tool_call_id=tool_call.id,
+                    content=json.dumps({"error": str(e)}),
+                    is_error=True
+                ))
+            except Exception as e:
+                tool_results.append(Message.tool_result(
+                    tool_call_id=tool_call.id,
+                    content=json.dumps({
+                        "error": f"Unexpected error: {str(e)}"
+                    }),
+                    is_error=True
+                ))
+
+        messages.extend(tool_results)
+
+    raise RuntimeError(
+        f"Exceeded maximum tool iterations ({max_tool_iterations})"
+    )
+
+
 def single_query(
     question: str,
     sid: int,
 
@@ -10,7 +10,8 @@
 """Base LLM client interface and factory."""
 
 from abc import ABC, abstractmethod
-from typing import Optional
+from collections.abc import Generator
+from typing import Optional, Union
 
 from pgadmin.llm.models import (
     Message, Tool, LLMResponse, LLMError
@@ -74,6 +75,48 @@ def chat(
         """
         pass
 
+    def chat_stream(
+        self,
+        messages: list[Message],
+        tools: Optional[list[Tool]] = None,
+        system_prompt: Optional[str] = None,
+        max_tokens: int = 4096,
+        temperature: float = 0.0,
+        **kwargs
+    ) -> Generator[Union[str, LLMResponse], None, None]:
+        """
+        Stream a chat response from the LLM.
+
+        Yields text chunks (str) as they arrive, then yields
+        a final LLMResponse with the complete response metadata.
+
+        The default implementation falls back to non-streaming chat().
+
+        Args:
+            messages: List of conversation messages.
+            tools: Optional list of tools the LLM can use.
+            system_prompt: Optional system prompt to set context.
+            max_tokens: Maximum tokens in the response.
+            temperature: Sampling temperature (0.0 = deterministic).
+            **kwargs: Additional provider-specific parameters.
+
+        Yields:
+            str: Text content chunks as they arrive.
+            LLMResponse: Final response with complete metadata (last item).
+        """
+        # Default: fall back to non-streaming
+        response = self.chat(
+            messages=messages,
+            tools=tools,
+            system_prompt=system_prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            **kwargs
+        )
+        if response.content:
+            yield response.content
+        yield response
+
     def validate_connection(self) -> tuple[bool, Optional[str]]:
         """
         Validate the connection to the LLM provider.
 
@@ -35,13 +35,10 @@
 - Use explicit column names instead of SELECT *
 - For UPDATE/DELETE, always include WHERE clauses
 
-Once you have explored the database structure using the tools above, \
-provide your final answer as a JSON object in this exact format:
-{"sql": "YOUR SQL QUERY HERE", "explanation": "Brief explanation"}
-
-Rules for the final response:
-- Return ONLY the JSON object, no other text
-- No markdown code blocks
-- If you need clarification, set "sql" to null and put \
-your question in "explanation"
+Response format:
+- Always put SQL in fenced code blocks with the sql language tag
+- You may include multiple SQL blocks if the request needs \
+multiple statements
+- Briefly explain what each query does
+- If you need clarification, just ask — no code blocks needed
 """