Add thinking capability to model resource

Nicholas Cecere · Nicholas Cecere · commit 051be03c1b62 · 2025-03-13T09:24:18.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.4] - 2025-03-13
+
+### Added
+- Added new `thinking` capability to model resource with configurable parameters:
+  - `thinking_enabled` - Boolean to enable/disable thinking capability (default: false)
+  - `thinking_budget_tokens` - Integer to set token budget for thinking (default: 1024)
+
 ## [0.2.2] - 2025-02-06
 
 ### Added
diff --git a/docs/resources/model.md b/docs/resources/model.md
@@ -15,6 +15,8 @@ resource "litellm_model" "gpt4" {
   tier                = "paid"
   mode                = "completion"
   reasoning_effort    = "medium"
+  thinking_enabled    = true
+  thinking_budget_tokens = 1024
   tpm                 = 100000
   rpm                 = 1000
   
@@ -64,6 +66,10 @@ The following arguments are supported:
   * `medium`
   * `high`
 
+* `thinking_enabled` - (Optional) Enables the model's thinking capability. Default is `false`.
+
+* `thinking_budget_tokens` - (Optional) Sets the token budget for the model's thinking capability. Default is `1024`.
+
 * `input_cost_per_million_tokens` - (Optional) Cost per million input tokens. This will be automatically converted to the per-token cost required by the API.
 
 * `output_cost_per_million_tokens` - (Optional) Cost per million output tokens. This will be automatically converted to the per-token cost required by the API.
diff --git a/litellm/resource_model.go b/litellm/resource_model.go
@@ -38,6 +38,16 @@ func resourceLiteLLMModel() *schema.Resource {
 					"high",
 				}, false),
 			},
+			"thinking_enabled": {
+				Type:     schema.TypeBool,
+				Optional: true,
+				Default:  false,
+			},
+			"thinking_budget_tokens": {
+				Type:     schema.TypeInt,
+				Optional: true,
+				Default:  1024,
+			},
 			"model_api_key": {
 				Type:      schema.TypeString,
 				Optional:  true,
diff --git a/litellm/resource_model_crud.go b/litellm/resource_model_crud.go
@@ -35,6 +35,15 @@ func createOrUpdateModel(d *schema.ResourceData, m interface{}, isUpdate bool) e
 		modelID = uuid.New().String()
 	}
 
+	// Create thinking configuration if enabled
+	var thinking map[string]interface{}
+	if d.Get("thinking_enabled").(bool) {
+		thinking = map[string]interface{}{
+			"type":          "enabled",
+			"budget_tokens": d.Get("thinking_budget_tokens").(int),
+		}
+	}
+
 	modelReq := ModelRequest{
 		ModelName: d.Get("model_name").(string),
 		LiteLLMParams: LiteLLMParams{
@@ -58,6 +67,7 @@ func createOrUpdateModel(d *schema.ResourceData, m interface{}, isUpdate bool) e
 			VertexLocation:      d.Get("vertex_location").(string),
 			VertexCredentials:   d.Get("vertex_credentials").(string),
 			ReasoningEffort:     d.Get("reasoning_effort").(string),
+			Thinking:            thinking,
 		},
 		ModelInfo: ModelInfo{
 			ID:        modelID,
@@ -140,6 +150,18 @@ func resourceLiteLLMModelRead(d *schema.ResourceData, m interface{}) error {
 	d.Set("input_cost_per_million_tokens", d.Get("input_cost_per_million_tokens"))
 	d.Set("output_cost_per_million_tokens", d.Get("output_cost_per_million_tokens"))
 
+	// Handle thinking configuration
+	if modelResp.LiteLLMParams.Thinking != nil {
+		if thinkingType, ok := modelResp.LiteLLMParams.Thinking["type"].(string); ok && thinkingType == "enabled" {
+			d.Set("thinking_enabled", true)
+			if budgetTokens, ok := modelResp.LiteLLMParams.Thinking["budget_tokens"].(float64); ok {
+				d.Set("thinking_budget_tokens", int(budgetTokens))
+			}
+		}
+	} else {
+		d.Set("thinking_enabled", false)
+	}
+
 	return nil
 }
 
diff --git a/litellm/types.go b/litellm/types.go
@@ -45,26 +45,27 @@ type TeamResponse struct {
 
 // LiteLLMParams represents the parameters for LiteLLM.
 type LiteLLMParams struct {
-	CustomLLMProvider   string  `json:"custom_llm_provider"`
-	TPM                 int     `json:"tpm,omitempty"`
-	RPM                 int     `json:"rpm,omitempty"`
-	ReasoningEffort     string  `json:"reasoning_effort,omitempty"`
-	APIKey              string  `json:"api_key,omitempty"`
-	APIBase             string  `json:"api_base,omitempty"`
-	APIVersion          string  `json:"api_version,omitempty"`
-	Model               string  `json:"model"`
-	InputCostPerToken   float64 `json:"input_cost_per_token,omitempty"`
-	OutputCostPerToken  float64 `json:"output_cost_per_token,omitempty"`
-	InputCostPerPixel   float64 `json:"input_cost_per_pixel,omitempty"`
-	OutputCostPerPixel  float64 `json:"output_cost_per_pixel,omitempty"`
-	InputCostPerSecond  float64 `json:"input_cost_per_second,omitempty"`
-	OutputCostPerSecond float64 `json:"output_cost_per_second,omitempty"`
-	AWSAccessKeyID      string  `json:"aws_access_key_id,omitempty"`
-	AWSSecretAccessKey  string  `json:"aws_secret_access_key,omitempty"`
-	AWSRegionName       string  `json:"aws_region_name,omitempty"`
-	VertexProject       string  `json:"vertex_project,omitempty"`
-	VertexLocation      string  `json:"vertex_location,omitempty"`
-	VertexCredentials   string  `json:"vertex_credentials,omitempty"`
+	CustomLLMProvider   string                 `json:"custom_llm_provider"`
+	TPM                 int                    `json:"tpm,omitempty"`
+	RPM                 int                    `json:"rpm,omitempty"`
+	ReasoningEffort     string                 `json:"reasoning_effort,omitempty"`
+	Thinking            map[string]interface{} `json:"thinking,omitempty"`
+	APIKey              string                 `json:"api_key,omitempty"`
+	APIBase             string                 `json:"api_base,omitempty"`
+	APIVersion          string                 `json:"api_version,omitempty"`
+	Model               string                 `json:"model"`
+	InputCostPerToken   float64                `json:"input_cost_per_token,omitempty"`
+	OutputCostPerToken  float64                `json:"output_cost_per_token,omitempty"`
+	InputCostPerPixel   float64                `json:"input_cost_per_pixel,omitempty"`
+	OutputCostPerPixel  float64                `json:"output_cost_per_pixel,omitempty"`
+	InputCostPerSecond  float64                `json:"input_cost_per_second,omitempty"`
+	OutputCostPerSecond float64                `json:"output_cost_per_second,omitempty"`
+	AWSAccessKeyID      string                 `json:"aws_access_key_id,omitempty"`
+	AWSSecretAccessKey  string                 `json:"aws_secret_access_key,omitempty"`
+	AWSRegionName       string                 `json:"aws_region_name,omitempty"`
+	VertexProject       string                 `json:"vertex_project,omitempty"`
+	VertexLocation      string                 `json:"vertex_location,omitempty"`
+	VertexCredentials   string                 `json:"vertex_credentials,omitempty"`
 }
 
 // ModelInfo represents information about a model.