Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 85 additions & 3 deletions internal/provider/openai/cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,33 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
},
"audio": {
"whisper-1": 0.006,
"tts-1": 0.015,
"tts-1-hd": 0.03,

"tts-1": 0.015,
"tts-1-hd": 0.03,

"gpt-4o-transcribe": 0.006,
"gpt-4o-transcribe-diarize": 0.006,
"gpt-4o-mini-transcribe": 0.003,

"gpt-4o-mini-tts": 0.012,
},
"transcription-input": {
"gpt-4o-transcribe": 0.0025,
"gpt-4o-transcribe-diarize": 0.0025,
"gpt-4o-mini-transcribe": 0.00125,
},
"transcription-output": {
"gpt-4o-transcribe": 0.01,
"gpt-4o-transcribe-diarize": 0.01,
"gpt-4o-mini-transcribe": 0.005,
},
"video": { // $ per sec
"sora-2": 0.1,
"sora-2-pro": 0.30,
"sora-2-720": 0.1,
"sora-2-pro-720": 0.30,
"sora-2-pro-1024": 0.5,
"sora-2-pro-1080": 0.7,
},
"completion": {
"gpt-image-1.5": 0.010,
Expand Down Expand Up @@ -649,7 +674,30 @@ func prepareGptImageQuality(quality string) (string, error) {
return quality, nil
}

func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string) (float64, error) {
func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string, usage *TranscriptionResponseUsage) (float64, error) {
if usage != nil {
inputTokens := usage.InputTokens
costMap, ok := ce.tokenCostMap["transcription-input"]
if !ok {
return 0, errors.New("transcription input token cost map is not provided")
}
inputCost, ok := costMap[model]
if !ok {
return 0, errors.New("model is not present in the transcription input token cost map")
}

outputTokens := usage.OutputTokens
costMap, ok = ce.tokenCostMap["transcription-output"]
if !ok {
return 0, errors.New("transcription output token cost map is not provided")
}
outputCost, ok := costMap[model]
if !ok {
return 0, errors.New("model is not present in the transcription output token cost map")
}

return (float64(inputTokens)/1000)*inputCost + (float64(outputTokens)/1000)*outputCost, nil
}
costMap, ok := ce.tokenCostMap["audio"]
if !ok {
return 0, errors.New("audio cost map is not provided")
Expand Down Expand Up @@ -769,6 +817,40 @@ func (ce *CostEstimator) EstimateResponseApiToolCreateContainerCost(req *Respons
return totalCost, nil
}

func (ce *CostEstimator) EstimateVideoCost(metadata *VideoResponseMetadata) (float64, error) {
if metadata == nil {
return 0, errors.New("metadata is nil")
}
costMap, ok := ce.tokenCostMap["video"]
if !ok {
return 0, errors.New("video cost map is not provided")
}
model := metadata.Model
size, err := normalizedVideoSize(metadata.Size)
if err != nil {
return 0, err
}
costKey := fmt.Sprintf("%s-%s", model, size)
cost, ok := costMap[costKey]
if !ok {
return 0, errors.New("model with provided size is not present in the video cost map")
}
return cost * metadata.GetSecondsAsFloat(), nil
Comment on lines +820 to +838
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Support size-less video pricing before forcing model-size lookup.

The new cost map contains plain keys like sora-2 and sora-2-pro, but this implementation always requires metadata.Size and always looks up model-size. Any response without a size will now error and record $0, even though you already have a fallback price configured.

💡 Suggested fix
 func (ce *CostEstimator) EstimateVideoCost(metadata *VideoResponseMetadata) (float64, error) {
 	if metadata == nil {
 		return 0, errors.New("metadata is nil")
 	}
 	costMap, ok := ce.tokenCostMap["video"]
 	if !ok {
 		return 0, errors.New("video cost map is not provided")
 	}
 	model := metadata.Model
-	size, err := normalizedVideoSize(metadata.Size)
-	if err != nil {
-		return 0, err
-	}
-	costKey := fmt.Sprintf("%s-%s", model, size)
-	cost, ok := costMap[costKey]
+	costKey := model
+	if metadata.Size != "" {
+		size, err := normalizedVideoSize(metadata.Size)
+		if err != nil {
+			return 0, err
+		}
+		costKey = fmt.Sprintf("%s-%s", model, size)
+	}
+	cost, ok := costMap[costKey]
 	if !ok {
 		return 0, errors.New("model with provided size is not present in the video cost map")
 	}
 	return cost * metadata.GetSecondsAsFloat(), nil
 }

Also applies to: 841-852

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/provider/openai/cost.go` around lines 820 - 838, EstimateVideoCost
currently mandates a model-size lookup and errors when metadata.Size is absent,
but the cost map may contain fallback keys like "sora-2" (model-only). Change
EstimateVideoCost to try lookups in order: 1) if size is present/normalized, try
"model-size"; 2) if that fails (or size missing/normalization returns an
empty/expected-error), try the model-only key "model"; and only return an error
if neither key exists in ce.tokenCostMap["video"]. Handle normalization errors
by treating missing size as absent (do not immediately return), and update the
same lookup logic in the analogous image pricing function (the one around lines
841-852) so both video and image cost resolution use the model-then-model-only
fallback.

}

func normalizedVideoSize(size string) (string, error) {
switch size {
case "720x1280", "1280x720":
return "720", nil
case "1024x1792", "1792x1024":
return "1024", nil
case "1080x1920", "1920x1080":
return "1080", nil
default:
return "", errors.New("size is not valid")
}
}

var reasoningModelPrefix = []string{"gpt-5", "o1", "o2", "o3"}

func extendedToolType(toolType, model string) string {
Expand Down
57 changes: 57 additions & 0 deletions internal/provider/openai/types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package openai

import "strconv"

type ResponseRequest struct {
Background *bool `json:"background,omitzero"`
Conversation *any `json:"conversation,omitzero"`
Expand Down Expand Up @@ -89,3 +91,58 @@ type ImageResponseMetadata struct {
Size string `json:"size,omitempty"`
Usage ImageResponseUsage `json:"usage,omitempty"`
}

type VideoResponseMetadata struct {
Model string `json:"model,omitempty"`
Size string `json:"size,omitempty"`
Seconds string `json:"seconds,omitempty"`
}

func (v *VideoResponseMetadata) GetSecondsAsFloat() float64 {
if secondsFloat, err := strconv.ParseFloat(v.Seconds, 64); err == nil {
return secondsFloat
}
return 0
}
Comment on lines +101 to +106
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't silently coerce invalid video duration to $0.

If seconds is missing or malformed, this returns 0 and EstimateVideoCost under-bills without any error. Please return an error here, or make EstimateVideoCost validate the raw field before multiplying.

💡 Suggested direction
-func (v *VideoResponseMetadata) GetSecondsAsFloat() float64 {
-	if secondsFloat, err := strconv.ParseFloat(v.Seconds, 64); err == nil {
-		return secondsFloat
-	}
-	return 0
+func (v *VideoResponseMetadata) GetSecondsAsFloat() (float64, error) {
+	return strconv.ParseFloat(v.Seconds, 64)
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/provider/openai/types.go` around lines 101 - 106, The current
GetSecondsAsFloat silently returns 0 on parse failure which causes
EstimateVideoCost to under-bill; change GetSecondsAsFloat to return (float64,
error) (or add a new GetSecondsAsFloatSafe that returns (float64, error)) and
propagate/handle the error in callers like EstimateVideoCost and any other call
sites, validating v.Seconds before using it and returning/propagating the parse
error instead of treating malformed or missing seconds as 0 so billing is
correct.


type TranscriptionResponseUsageInputTokenDetails struct {
TextTokens int `json:"text_tokens,omitempty"`
AudioTokens int `json:"audio_tokens,omitempty"`
}
type TranscriptionResponseUsage struct {
Type string `json:"type"`
TotalTokens int `json:"total_tokens,omitempty"`
InputTokens int `json:"input_tokens,omitempty"`
InputTokenDetails TranscriptionResponseUsageInputTokenDetails `json:"input_token_details,omitempty"`
OutputTokens int `json:"output_tokens,omitempty"`
}
type TranscriptionResponse struct {
Text string `json:"text,omitempty"`
Usage TranscriptionResponseUsage `json:"usage,omitempty"`
}

type TranscriptionStreamChunk struct {
Type string `json:"type"`
Delta string `json:"delta,omitempty"`
Text string `json:"text,omitempty"`
Usage TranscriptionResponseUsage `json:"usage,omitempty"`
}

func (c *TranscriptionStreamChunk) IsDone() bool {
return c.Type == "transcript.text.done"
}

func (c *TranscriptionStreamChunk) IsDelta() bool {
return c.Type == "transcript.text.delta"
}

func (c *TranscriptionStreamChunk) IsSegment() bool {
return c.Type == "transcript.text.segment"
}

func (c *TranscriptionStreamChunk) GetText() string {
if c.IsDelta() {
return c.Delta
}
return c.Text
}
15 changes: 13 additions & 2 deletions internal/server/web/proxy/audio.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ func getContentType(format string) string {

func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
return func(c *gin.Context) {
model := c.PostForm("model")
if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" {
processGPTTranscriptions(c, prod, client, e, model)
return
}

log := util.GetLogFromCtx(c)
telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1)

Expand Down Expand Up @@ -291,7 +297,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha
}

if err == nil {
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"))
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil)
if err != nil {
telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.estimate_total_cost_error", nil, 1)
logError(log, "error when estimating openai cost", prod, err)
Expand Down Expand Up @@ -333,6 +339,11 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha

func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
return func(c *gin.Context) {
model := c.PostForm("model")
if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" {
processGPTTranslations(c, prod, client, e, model)
return
}
log := util.GetLogFromCtx(c)
telemetry.Incr("bricksllm.proxy.get_translations_handler.requests", nil, 1)

Expand Down Expand Up @@ -451,7 +462,7 @@ func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.Hand
}

if err == nil {
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"))
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil)
if err != nil {
telemetry.Incr("bricksllm.proxy.get_translations_handler.estimate_total_cost_error", nil, 1)
logError(log, "error when estimating openai cost", prod, err)
Expand Down
Loading
Loading