@@ -142,9 +142,9 @@ def chat(
142142 raise
143143 except Exception as e :
144144 raise LLMClientError (LLMError (
145- message = f"Request failed: { str ( e ) } " ,
145+ message = f"Request failed: { e !s } " ,
146146 provider = self .provider_name
147- ))
147+ )) from e
148148
149149 def _chat_responses (
150150 self ,
@@ -165,9 +165,9 @@ def _chat_responses(
165165 raise
166166 except Exception as e :
167167 raise LLMClientError (LLMError (
168- message = f"Request failed: { str ( e ) } " ,
168+ message = f"Request failed: { e !s } " ,
169169 provider = self .provider_name
170- ))
170+ )) from e
171171
172172 def _should_use_responses_api (self , error : LLMClientError ) -> bool :
173173 """Check if the error indicates we should use the Responses API."""
@@ -400,6 +400,22 @@ def _make_request(self, payload: dict) -> dict:
400400 retryable = True
401401 ))
402402
403+ def _raise_max_tokens_error (self , input_tokens : int ):
404+ """Raise an error when a response is truncated due to token limit."""
405+ raise LLMClientError (LLMError (
406+ message = f'Response truncated due to token limit '
407+ f'(input: { input_tokens } tokens). '
408+ f'The request is too large for model '
409+ f'{ self ._model } . '
410+ f'Try using a model with a larger context '
411+ f'window, or analyze a smaller scope (e.g., a '
412+ f'specific schema instead of the entire '
413+ f'database).' ,
414+ code = 'max_tokens' ,
415+ provider = self .provider_name ,
416+ retryable = False
417+ ))
418+
403419 def _parse_response (self , data : dict ) -> LLMResponse :
404420 """Parse the Chat Completions API response into an LLMResponse."""
405421 # Check for API-level errors in the response
@@ -470,20 +486,7 @@ def _parse_response(self, data: dict) -> LLMResponse:
470486 # Check for problematic responses
471487 if not content and not tool_calls :
472488 if stop_reason == StopReason .MAX_TOKENS :
473- input_tokens = usage .input_tokens
474- raise LLMClientError (LLMError (
475- message = f'Response truncated due to token limit '
476- f'(input: { input_tokens } tokens). '
477- f'The request is too large for model '
478- f'{ self ._model } . '
479- f'Try using a model with a larger context '
480- f'window, or analyze a smaller scope (e.g., a '
481- f'specific schema instead of the entire '
482- f'database).' ,
483- code = 'max_tokens' ,
484- provider = self .provider_name ,
485- retryable = False
486- ))
489+ self ._raise_max_tokens_error (usage .input_tokens )
487490 elif finish_reason and finish_reason not in ('stop' , 'tool_calls' ):
488491 raise LLMClientError (LLMError (
489492 message = (f'Empty response with finish reason: '
@@ -562,20 +565,7 @@ def _parse_responses_response(self, data: dict) -> LLMResponse:
562565 # Check for problematic responses
563566 if not content and not tool_calls :
564567 if stop_reason == StopReason .MAX_TOKENS :
565- input_tokens = usage .input_tokens
566- raise LLMClientError (LLMError (
567- message = f'Response truncated due to token limit '
568- f'(input: { input_tokens } tokens). '
569- f'The request is too large for model '
570- f'{ self ._model } . '
571- f'Try using a model with a larger context '
572- f'window, or analyze a smaller scope (e.g., a '
573- f'specific schema instead of the entire '
574- f'database).' ,
575- code = 'max_tokens' ,
576- provider = self .provider_name ,
577- retryable = False
578- ))
568+ self ._raise_max_tokens_error (usage .input_tokens )
579569
580570 return LLMResponse (
581571 content = content ,
@@ -607,9 +597,9 @@ def chat_stream(
607597 raise
608598 except Exception as e :
609599 raise LLMClientError (LLMError (
610- message = f"Streaming request failed: { str ( e ) } " ,
600+ message = f"Streaming request failed: { e !s } " ,
611601 provider = self .provider_name
612- ))
602+ )) from e
613603 return
614604
615605 # Try Chat Completions API first
@@ -633,9 +623,9 @@ def chat_stream(
633623 raise
634624 except Exception as e :
635625 raise LLMClientError (LLMError (
636- message = f"Streaming request failed: { str ( e ) } " ,
626+ message = f"Streaming request failed: { e !s } " ,
637627 provider = self .provider_name
638- ))
628+ )) from e
639629
640630 def _process_stream (
641631 self , payload : dict
0 commit comments