Skip to content

Commit a932028

Browse files
author
Nicholas Cecere
committed
Implement retry mechanism with exponential backoff for model read operations
1 parent a59cdf3 commit a932028

File tree

2 files changed

+48
-7
lines changed

2 files changed

+48
-7
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.3.0] - 2025-04-23
9+
10+
### Fixed
11+
- Implemented retry mechanism with exponential backoff for model read operations
12+
- Added detailed logging for retry attempts
13+
- Improved error handling for "model not found" errors
14+
815
## [0.2.9] - 2025-04-23
916

1017
### Fixed

litellm/resource_model_crud.go

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,50 @@ package litellm
22

33
import (
44
"fmt"
5+
"log"
56
"time"
67

78
"github.com/google/uuid"
89
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
910
)
1011

12+
// retryModelRead attempts to read a model with exponential backoff
13+
func retryModelRead(d *schema.ResourceData, m interface{}, maxRetries int) error {
14+
var err error
15+
delay := 1 * time.Second
16+
maxDelay := 10 * time.Second
17+
18+
for i := 0; i < maxRetries; i++ {
19+
log.Printf("[INFO] Attempting to read model (attempt %d/%d)", i+1, maxRetries)
20+
21+
err = resourceLiteLLMModelRead(d, m)
22+
if err == nil {
23+
log.Printf("[INFO] Successfully read model after %d attempts", i+1)
24+
return nil
25+
}
26+
27+
// Check if this is a "model not found" error
28+
if err.Error() != "failed to read model: API request failed: Status: 400 Bad Request, Response: {\"detail\":{\"error\":\"Model id = "+d.Id()+" not found on litellm proxy\"}}, Request: null" {
29+
// If it's a different error, don't retry
30+
return err
31+
}
32+
33+
if i < maxRetries-1 {
34+
log.Printf("[INFO] Model not found yet, retrying in %v...", delay)
35+
time.Sleep(delay)
36+
37+
// Exponential backoff with a maximum delay
38+
delay *= 2
39+
if delay > maxDelay {
40+
delay = maxDelay
41+
}
42+
}
43+
}
44+
45+
log.Printf("[WARN] Failed to read model after %d attempts: %v", maxRetries, err)
46+
return err
47+
}
48+
1149
const (
1250
endpointModelNew = "/model/new"
1351
endpointModelUpdate = "/model/update"
@@ -102,13 +140,9 @@ func createOrUpdateModel(d *schema.ResourceData, m interface{}, isUpdate bool) e
102140

103141
d.SetId(modelID)
104142

105-
// Add a delay to allow the model to be registered in the LiteLLM proxy
106-
fmt.Printf("Model created with ID %s. Waiting 5 seconds for LiteLLM proxy to register the model...\n", modelID)
107-
time.Sleep(5 * time.Second)
108-
fmt.Printf("Wait complete. Proceeding to read the model...\n")
109-
110-
// Read back the resource to ensure the state is consistent
111-
return resourceLiteLLMModelRead(d, m)
143+
log.Printf("[INFO] Model created with ID %s. Starting retry mechanism to read the model...", modelID)
144+
// Read back the resource with retries to ensure the state is consistent
145+
return retryModelRead(d, m, 5)
112146
}
113147

114148
func resourceLiteLLMModelCreate(d *schema.ResourceData, m interface{}) error {

0 commit comments

Comments
 (0)