Add merge_reasoning_content_in_choices option to model resource

Nicholas Cecere · Nicholas Cecere · commit 55c3ce1f3071 · 2025-03-13T21:20:31.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,16 +5,23 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.6] - 2025-03-13
+
+### Added
+- Added new `merge_reasoning_content_in_choices` option to model resource
+
+## [0.2.5] - 2025-03-13
+
+### Fixed
+- Fixed issue where `thinking_budget_tokens` was being added to models that don't have `thinking_enabled = true`
+
 ## [0.2.4] - 2025-03-13
 
 ### Added
 - Added new `thinking` capability to model resource with configurable parameters:
   - `thinking_enabled` - Boolean to enable/disable thinking capability (default: false)
   - `thinking_budget_tokens` - Integer to set token budget for thinking (default: 1024)
 
-### Fixed
-- Fixed issue where `thinking_budget_tokens` was being added to models that don't have `thinking_enabled = true`
-
 ## [0.2.2] - 2025-02-06
 
 ### Added
diff --git a/docs/resources/model.md b/docs/resources/model.md
@@ -17,6 +17,7 @@ resource "litellm_model" "gpt4" {
   reasoning_effort    = "medium"
   thinking_enabled    = true
   thinking_budget_tokens = 1024
+  merge_reasoning_content_in_choices = true
   tpm                 = 100000
   rpm                 = 1000
   
@@ -70,6 +71,8 @@ The following arguments are supported:
 
 * `thinking_budget_tokens` - (Optional) Sets the token budget for the model's thinking capability. Default is `1024`.
 
+* `merge_reasoning_content_in_choices` - (Optional) When set to `true`, merges reasoning content into the model's choices.
+
 * `input_cost_per_million_tokens` - (Optional) Cost per million input tokens. This will be automatically converted to the per-token cost required by the API.
 
 * `output_cost_per_million_tokens` - (Optional) Cost per million output tokens. This will be automatically converted to the per-token cost required by the API.
diff --git a/litellm/resource_model.go b/litellm/resource_model.go
@@ -52,6 +52,10 @@ func resourceLiteLLMModel() *schema.Resource {
 					return !d.Get("thinking_enabled").(bool)
 				},
 			},
+			"merge_reasoning_content_in_choices": {
+				Type:     schema.TypeBool,
+				Optional: true,
+			},
 			"model_api_key": {
 				Type:      schema.TypeString,
 				Optional:  true,
diff --git a/litellm/resource_model_crud.go b/litellm/resource_model_crud.go
@@ -47,27 +47,28 @@ func createOrUpdateModel(d *schema.ResourceData, m interface{}, isUpdate bool) e
 	modelReq := ModelRequest{
 		ModelName: d.Get("model_name").(string),
 		LiteLLMParams: LiteLLMParams{
-			CustomLLMProvider:   customLLMProvider,
-			TPM:                 d.Get("tpm").(int),
-			RPM:                 d.Get("rpm").(int),
-			APIKey:              d.Get("model_api_key").(string),
-			APIBase:             d.Get("model_api_base").(string),
-			APIVersion:          d.Get("api_version").(string),
-			Model:               modelName,
-			InputCostPerToken:   inputCostPerToken,
-			OutputCostPerToken:  outputCostPerToken,
-			InputCostPerPixel:   d.Get("input_cost_per_pixel").(float64),
-			OutputCostPerPixel:  d.Get("output_cost_per_pixel").(float64),
-			InputCostPerSecond:  d.Get("input_cost_per_second").(float64),
-			OutputCostPerSecond: d.Get("output_cost_per_second").(float64),
-			AWSAccessKeyID:      d.Get("aws_access_key_id").(string),
-			AWSSecretAccessKey:  d.Get("aws_secret_access_key").(string),
-			AWSRegionName:       d.Get("aws_region_name").(string),
-			VertexProject:       d.Get("vertex_project").(string),
-			VertexLocation:      d.Get("vertex_location").(string),
-			VertexCredentials:   d.Get("vertex_credentials").(string),
-			ReasoningEffort:     d.Get("reasoning_effort").(string),
-			Thinking:            thinking,
+			CustomLLMProvider:              customLLMProvider,
+			TPM:                            d.Get("tpm").(int),
+			RPM:                            d.Get("rpm").(int),
+			APIKey:                         d.Get("model_api_key").(string),
+			APIBase:                        d.Get("model_api_base").(string),
+			APIVersion:                     d.Get("api_version").(string),
+			Model:                          modelName,
+			InputCostPerToken:              inputCostPerToken,
+			OutputCostPerToken:             outputCostPerToken,
+			InputCostPerPixel:              d.Get("input_cost_per_pixel").(float64),
+			OutputCostPerPixel:             d.Get("output_cost_per_pixel").(float64),
+			InputCostPerSecond:             d.Get("input_cost_per_second").(float64),
+			OutputCostPerSecond:            d.Get("output_cost_per_second").(float64),
+			AWSAccessKeyID:                 d.Get("aws_access_key_id").(string),
+			AWSSecretAccessKey:             d.Get("aws_secret_access_key").(string),
+			AWSRegionName:                  d.Get("aws_region_name").(string),
+			VertexProject:                  d.Get("vertex_project").(string),
+			VertexLocation:                 d.Get("vertex_location").(string),
+			VertexCredentials:              d.Get("vertex_credentials").(string),
+			ReasoningEffort:                d.Get("reasoning_effort").(string),
+			Thinking:                       thinking,
+			MergeReasoningContentInChoices: d.Get("merge_reasoning_content_in_choices").(bool),
 		},
 		ModelInfo: ModelInfo{
 			ID:        modelID,
@@ -166,6 +167,9 @@ func resourceLiteLLMModelRead(d *schema.ResourceData, m interface{}) error {
 		// Don't set thinking_budget_tokens when thinking is not enabled
 	}
 
+	// Handle merge_reasoning_content_in_choices
+	d.Set("merge_reasoning_content_in_choices", modelResp.LiteLLMParams.MergeReasoningContentInChoices)
+
 	return nil
 }
 
diff --git a/litellm/types.go b/litellm/types.go
@@ -45,27 +45,28 @@ type TeamResponse struct {
 
 // LiteLLMParams represents the parameters for LiteLLM.
 type LiteLLMParams struct {
-	CustomLLMProvider   string                 `json:"custom_llm_provider"`
-	TPM                 int                    `json:"tpm,omitempty"`
-	RPM                 int                    `json:"rpm,omitempty"`
-	ReasoningEffort     string                 `json:"reasoning_effort,omitempty"`
-	Thinking            map[string]interface{} `json:"thinking,omitempty"`
-	APIKey              string                 `json:"api_key,omitempty"`
-	APIBase             string                 `json:"api_base,omitempty"`
-	APIVersion          string                 `json:"api_version,omitempty"`
-	Model               string                 `json:"model"`
-	InputCostPerToken   float64                `json:"input_cost_per_token,omitempty"`
-	OutputCostPerToken  float64                `json:"output_cost_per_token,omitempty"`
-	InputCostPerPixel   float64                `json:"input_cost_per_pixel,omitempty"`
-	OutputCostPerPixel  float64                `json:"output_cost_per_pixel,omitempty"`
-	InputCostPerSecond  float64                `json:"input_cost_per_second,omitempty"`
-	OutputCostPerSecond float64                `json:"output_cost_per_second,omitempty"`
-	AWSAccessKeyID      string                 `json:"aws_access_key_id,omitempty"`
-	AWSSecretAccessKey  string                 `json:"aws_secret_access_key,omitempty"`
-	AWSRegionName       string                 `json:"aws_region_name,omitempty"`
-	VertexProject       string                 `json:"vertex_project,omitempty"`
-	VertexLocation      string                 `json:"vertex_location,omitempty"`
-	VertexCredentials   string                 `json:"vertex_credentials,omitempty"`
+	CustomLLMProvider              string                 `json:"custom_llm_provider"`
+	TPM                            int                    `json:"tpm,omitempty"`
+	RPM                            int                    `json:"rpm,omitempty"`
+	ReasoningEffort                string                 `json:"reasoning_effort,omitempty"`
+	Thinking                       map[string]interface{} `json:"thinking,omitempty"`
+	MergeReasoningContentInChoices bool                   `json:"merge_reasoning_content_in_choices,omitempty"`
+	APIKey                         string                 `json:"api_key,omitempty"`
+	APIBase                        string                 `json:"api_base,omitempty"`
+	APIVersion                     string                 `json:"api_version,omitempty"`
+	Model                          string                 `json:"model"`
+	InputCostPerToken              float64                `json:"input_cost_per_token,omitempty"`
+	OutputCostPerToken             float64                `json:"output_cost_per_token,omitempty"`
+	InputCostPerPixel              float64                `json:"input_cost_per_pixel,omitempty"`
+	OutputCostPerPixel             float64                `json:"output_cost_per_pixel,omitempty"`
+	InputCostPerSecond             float64                `json:"input_cost_per_second,omitempty"`
+	OutputCostPerSecond            float64                `json:"output_cost_per_second,omitempty"`
+	AWSAccessKeyID                 string                 `json:"aws_access_key_id,omitempty"`
+	AWSSecretAccessKey             string                 `json:"aws_secret_access_key,omitempty"`
+	AWSRegionName                  string                 `json:"aws_region_name,omitempty"`
+	VertexProject                  string                 `json:"vertex_project,omitempty"`
+	VertexLocation                 string                 `json:"vertex_location,omitempty"`
+	VertexCredentials              string                 `json:"vertex_credentials,omitempty"`
 }
 
 // ModelInfo represents information about a model.