Merge pull request #8 from SecrinLabs/feature/streaming-token

jenilsavani9 · web-flow · commit ffdc89a1ea03 · 2025-11-30T11:37:45.000+05:30
token streaming support
diff --git a/apps/api/routes/v1/ask.py b/apps/api/routes/v1/ask.py
@@ -1,7 +1,9 @@
 import logging
+import json
 from typing import Any
 
 from fastapi import APIRouter, HTTPException, status
+from fastapi.responses import StreamingResponse
 
 from apps.api.routes.v1.schemas.qa import (
     QARequest,
@@ -15,6 +17,7 @@
 from packages.memory.services.issue_analysis import IssueAnalyzer
 from packages.database.graph.graph import neo4j_client
 from packages.config import Settings
+from packages.config.feature_flags import is_feature_enabled, FeatureFlag
 
 router = APIRouter(prefix="/ask", tags=["Question Answering"])
 settings = Settings()
@@ -107,6 +110,12 @@ async def analyze_issue(request: IssueRequest):
     try:
         logger.info(f"Analyzing issue: {request.title}")
 
+        if is_feature_enabled(FeatureFlag.ENABLE_TOKEN_STREAMING):
+            return StreamingResponse(
+                _stream_issue_analysis(request.title, request.body),
+                media_type="text/event-stream"
+            )
+
         result = issue_analyzer.analyze_issue(request.title, request.body)
 
         if "error" in result:
@@ -131,3 +140,16 @@ async def analyze_issue(request: IssueRequest):
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             detail="An unexpected error occurred. Please try again later.",
         )
+
+
+def _stream_issue_analysis(title: str, body: str):
+    """Generator for streaming issue analysis."""
+    try:
+        for chunk in issue_analyzer.analyze_issue_stream(title, body):
+            yield f"data: {json.dumps(chunk)}\n\n"
+    except Exception as e:
+        logger.exception("Error during streaming issue analysis")
+        yield f"data: {json.dumps({'error': str(e)})}\n\n"
+    finally:
+        yield "data: [DONE]\n\n"
+ 
diff --git a/packages/config/feature_flags.py b/packages/config/feature_flags.py
@@ -40,6 +40,7 @@ class FeatureFlag(str, Enum):
     ENABLE_AUTO_INDEXING = "enable_auto_indexing"
     ENABLE_SMART_RETRY = "enable_smart_retry"
     ENABLE_MULTIMODAL_EMBEDDINGS = "enable_multimodal_embeddings"
+    ENABLE_TOKEN_STREAMING = "enable_token_streaming"
 
 
 class FeatureFlagConfig(BaseModel):
@@ -150,6 +151,10 @@ def _initialize_defaults(self) -> Dict[FeatureFlag, FeatureFlagConfig]:
                 enabled=False,
                 environments=["development"]
             ),
+            FeatureFlag.ENABLE_TOKEN_STREAMING: FeatureFlagConfig(
+                enabled=False,
+                environments=["development", "staging", "production"]
+            ),
         }
     
     def is_enabled(self, flag: FeatureFlag) -> bool:
diff --git a/packages/memory/llm/base.py b/packages/memory/llm/base.py
@@ -4,7 +4,7 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import List, Any, Optional
+from typing import List, Any, Optional, Iterator
 
 
 class BaseLLMProvider(ABC):
@@ -45,6 +45,26 @@ def generate_answer(
             Exception: If generation fails
         """
         pass
+
+    def stream_answer(
+        self,
+        question: str,
+        context_items: List[Any],
+        search_type: str
+    ) -> Iterator[str]:
+        """
+        Stream an answer to the question using provided context.
+        
+        Args:
+            question: User's question
+            context_items: List of search results to use as context
+            search_type: Type of search performed (vector/hybrid)
+        
+        Returns:
+            Iterator yielding generated answer chunks
+        """
+        prompt = self._build_prompt(question, context_items, search_type)
+        return self.stream_text(prompt)
     
     @abstractmethod
     def generate_text(self, prompt: str, system_prompt: Optional[str] = None) -> str:
@@ -59,6 +79,20 @@ def generate_text(self, prompt: str, system_prompt: Optional[str] = None) -> str
             Generated text
         """
         pass
+
+    @abstractmethod
+    def stream_text(self, prompt: str, system_prompt: Optional[str] = None) -> Iterator[str]:
+        """
+        Stream text from a raw prompt.
+        
+        Args:
+            prompt: The prompt to send to the LLM
+            system_prompt: Optional system prompt
+            
+        Returns:
+            Iterator yielding generated text chunks
+        """
+        pass
     
     @abstractmethod
     def get_provider_name(self) -> str:
diff --git a/packages/memory/llm/providers/ollama.py b/packages/memory/llm/providers/ollama.py
@@ -3,7 +3,8 @@
 """
 
 import requests
-from typing import List, Any, Optional
+import json
+from typing import List, Any, Optional, Iterator
 from packages.memory.llm.base import BaseLLMProvider
 from packages.config.settings import Settings
 
@@ -118,6 +119,58 @@ def generate_text(self, prompt: str, system_prompt: Optional[str] = None) -> str
             )
         except requests.exceptions.RequestException as e:
             raise Exception(f"Ollama API error: {e}")
+
+    def stream_text(self, prompt: str, system_prompt: Optional[str] = None) -> Iterator[str]:
+        """
+        Stream text using Ollama.
+        
+        Args:
+            prompt: The prompt to send
+            system_prompt: Optional system prompt
+            
+        Returns:
+            Iterator yielding generated text chunks
+        """
+        try:
+            payload = {
+                "model": self.model,
+                "prompt": prompt,
+                "stream": True,
+                "options": {
+                    "temperature": self.temperature,
+                    "num_predict": self.max_tokens
+                }
+            }
+            
+            if system_prompt:
+                payload["system"] = system_prompt
+                
+            response = requests.post(
+                f"{self.base_url}/api/generate",
+                json=payload,
+                timeout=self.timeout,
+                stream=True
+            )
+            response.raise_for_status()
+            
+            for line in response.iter_lines():
+                if line:
+                    try:
+                        json_response = json.loads(line)
+                        if "response" in json_response:
+                            yield json_response["response"]
+                        if json_response.get("done", False):
+                            break
+                    except json.JSONDecodeError:
+                        continue
+            
+        except requests.exceptions.Timeout:
+            raise TimeoutError(
+                f"Ollama request timed out after {self.timeout}s. "
+                "Try increasing LLM_TIMEOUT or reducing context size."
+            )
+        except requests.exceptions.RequestException as e:
+            raise Exception(f"Ollama API error: {e}")
     
     def get_provider_name(self) -> str:
         """Return provider name."""
diff --git a/packages/memory/services/issue_analysis.py b/packages/memory/services/issue_analysis.py
@@ -1,4 +1,4 @@
-from typing import Dict, Any, List, Optional
+from typing import Dict, Any, List, Optional, Iterator
 import logging
 from packages.memory.services.graph_service import GraphService
 from packages.memory.llm import BaseLLMProvider
@@ -104,6 +104,85 @@ def analyze_issue(self, title: str, body: str) -> Dict[str, Any]:
             "context_used": [self._format_item_summary(item) for item in all_context]
         }
 
+    def analyze_issue_stream(self, title: str, body: str) -> Iterator[Dict[str, Any]]:
+        """
+        Analyze an issue and stream the report.
+        
+        Args:
+            title: Issue title
+            body: Issue body/description
+            
+        Returns:
+            Iterator yielding chunks of the report or context info
+        """
+        issue_text = f"{title}\n\n{body}"
+        logger.info(f"Analyzing issue (streaming): {title}")
+        
+        # 1. Search for relevant code (Functions, Classes, Files)
+        code_context = self.graph_service.hybrid_search(
+            query_text=issue_text,
+            node_type="Function",
+            limit=5
+        )
+        
+        # Also search for Files directly
+        file_context = self.graph_service.hybrid_search(
+            query_text=issue_text,
+            node_type="File",
+            limit=3
+        )
+        
+        # 2. Search for relevant history (Commits)
+        commit_context = self.graph_service.hybrid_search(
+            query_text=issue_text,
+            node_type="Commit",
+            limit=5
+        )
+        
+        # Combine context
+        all_context = code_context + file_context + commit_context
+        
+        if not all_context:
+            yield {"error": "No relevant context found in the knowledge graph."}
+            return
+            
+        # Yield context info first
+        yield {
+            "context_used": [self._format_item_summary(item) for item in all_context]
+        }
+            
+        # 3. Generate Report using LLM
+        system_prompt = """
+        You are an expert software engineer and debugger. 
+        You are given a GitHub issue description and a set of relevant code snippets and commit history from the project's Knowledge Graph.
+        
+        Your task is to analyze the issue and provide a detailed report containing:
+        1. **Root Cause Analysis**: What is likely causing the issue based on the code and history?
+        2. **Affected Areas**: Which files, classes, or functions are involved?
+        3. **Suggested Fix**: How can this be fixed? Provide code snippets if possible.
+        4. **Relevant History**: Are there recent commits that might have introduced this?
+        
+        Be specific. Reference the filenames and function names provided in the context.
+        """
+        
+        # Format context for LLM
+        context_str = self._format_context(all_context)
+        
+        prompt = f"""
+        ISSUE TITLE: {title}
+        
+        ISSUE BODY:
+        {body}
+        
+        RELEVANT CONTEXT FROM KNOWLEDGE GRAPH:
+        {context_str}
+        
+        Please provide your analysis report.
+        """
+        
+        for chunk in self.llm_provider.stream_text(prompt=prompt, system_prompt=system_prompt):
+            yield {"chunk": chunk}
+
     def _format_context(self, items: List[Any]) -> str:
         output = []
         for item in items: