diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go
index 777ab31..a50a930 100644
--- a/internal/provider/openai/cost.go
+++ b/internal/provider/openai/cost.go
@@ -149,8 +149,33 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 	},
 	"audio": {
 		"whisper-1": 0.006,
-		"tts-1":     0.015,
-		"tts-1-hd":  0.03,
+
+		"tts-1":    0.015,
+		"tts-1-hd": 0.03,
+
+		"gpt-4o-transcribe":         0.006,
+		"gpt-4o-transcribe-diarize": 0.006,
+		"gpt-4o-mini-transcribe":    0.003,
+
+		"gpt-4o-mini-tts": 0.012,
+	},
+	"transcription-input": {
+		"gpt-4o-transcribe":         0.0025,
+		"gpt-4o-transcribe-diarize": 0.0025,
+		"gpt-4o-mini-transcribe":    0.00125,
+	},
+	"transcription-output": {
+		"gpt-4o-transcribe":         0.01,
+		"gpt-4o-transcribe-diarize": 0.01,
+		"gpt-4o-mini-transcribe":    0.005,
+	},
+	"video": { // $ per sec
+		"sora-2":          0.1,
+		"sora-2-pro":      0.30,
+		"sora-2-720":      0.1,
+		"sora-2-pro-720":  0.30,
+		"sora-2-pro-1024": 0.5,
+		"sora-2-pro-1080": 0.7,
 	},
 	"completion": {
 		"gpt-image-1.5":        0.010,
@@ -649,7 +674,30 @@ func prepareGptImageQuality(quality string) (string, error) {
 	return quality, nil
 }
 
-func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string) (float64, error) {
+func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string, usage *TranscriptionResponseUsage) (float64, error) {
+	if usage != nil {
+		inputTokens := usage.InputTokens
+		costMap, ok := ce.tokenCostMap["transcription-input"]
+		if !ok {
+			return 0, errors.New("transcription input token cost map is not provided")
+		}
+		inputCost, ok := costMap[model]
+		if !ok {
+			return 0, errors.New("model is not present in the transcription input token cost map")
+		}
+
+		outputTokens := usage.OutputTokens
+		costMap, ok = ce.tokenCostMap["transcription-output"]
+		if !ok {
+			return 0, errors.New("transcription output token cost map is not provided")
+		}
+		outputCost, ok := costMap[model]
+		if !ok {
+			return 0, errors.New("model is not present in the transcription output token cost map")
+		}
+
+		return (float64(inputTokens)/1000)*inputCost + (float64(outputTokens)/1000)*outputCost, nil
+	}
 	costMap, ok := ce.tokenCostMap["audio"]
 	if !ok {
 		return 0, errors.New("audio cost map is not provided")
@@ -769,6 +817,40 @@ func (ce *CostEstimator) EstimateResponseApiToolCreateContainerCost(req *Respons
 	return totalCost, nil
 }
 
+func (ce *CostEstimator) EstimateVideoCost(metadata *VideoResponseMetadata) (float64, error) {
+	if metadata == nil {
+		return 0, errors.New("metadata is nil")
+	}
+	costMap, ok := ce.tokenCostMap["video"]
+	if !ok {
+		return 0, errors.New("video cost map is not provided")
+	}
+	model := metadata.Model
+	size, err := normalizedVideoSize(metadata.Size)
+	if err != nil {
+		return 0, err
+	}
+	costKey := fmt.Sprintf("%s-%s", model, size)
+	cost, ok := costMap[costKey]
+	if !ok {
+		return 0, errors.New("model with provided size is not present in the video cost map")
+	}
+	return cost * metadata.GetSecondsAsFloat(), nil
+}
+
+func normalizedVideoSize(size string) (string, error) {
+	switch size {
+	case "720x1280", "1280x720":
+		return "720", nil
+	case "1024x1792", "1792x1024":
+		return "1024", nil
+	case "1080x1920", "1920x1080":
+		return "1080", nil
+	default:
+		return "", errors.New("size is not valid")
+	}
+}
+
 var reasoningModelPrefix = []string{"gpt-5", "o1", "o2", "o3"}
 
 func extendedToolType(toolType, model string) string {
diff --git a/internal/provider/openai/types.go b/internal/provider/openai/types.go
index 3f2dae3..aa24f06 100644
--- a/internal/provider/openai/types.go
+++ b/internal/provider/openai/types.go
@@ -1,5 +1,7 @@
 package openai
 
+import "strconv"
+
 type ResponseRequest struct {
 	Background         *bool                      `json:"background,omitzero"`
 	Conversation       *any                       `json:"conversation,omitzero"`
@@ -89,3 +91,58 @@ type ImageResponseMetadata struct {
 	Size    string             `json:"size,omitempty"`
 	Usage   ImageResponseUsage `json:"usage,omitempty"`
 }
+
+type VideoResponseMetadata struct {
+	Model   string `json:"model,omitempty"`
+	Size    string `json:"size,omitempty"`
+	Seconds string `json:"seconds,omitempty"`
+}
+
+func (v *VideoResponseMetadata) GetSecondsAsFloat() float64 {
+	if secondsFloat, err := strconv.ParseFloat(v.Seconds, 64); err == nil {
+		return secondsFloat
+	}
+	return 0
+}
+
+type TranscriptionResponseUsageInputTokenDetails struct {
+	TextTokens  int `json:"text_tokens,omitempty"`
+	AudioTokens int `json:"audio_tokens,omitempty"`
+}
+type TranscriptionResponseUsage struct {
+	Type              string                                      `json:"type"`
+	TotalTokens       int                                         `json:"total_tokens,omitempty"`
+	InputTokens       int                                         `json:"input_tokens,omitempty"`
+	InputTokenDetails TranscriptionResponseUsageInputTokenDetails `json:"input_token_details,omitempty"`
+	OutputTokens      int                                         `json:"output_tokens,omitempty"`
+}
+type TranscriptionResponse struct {
+	Text  string                     `json:"text,omitempty"`
+	Usage TranscriptionResponseUsage `json:"usage,omitempty"`
+}
+
+type TranscriptionStreamChunk struct {
+	Type  string                     `json:"type"`
+	Delta string                     `json:"delta,omitempty"`
+	Text  string                     `json:"text,omitempty"`
+	Usage TranscriptionResponseUsage `json:"usage,omitempty"`
+}
+
+func (c *TranscriptionStreamChunk) IsDone() bool {
+	return c.Type == "transcript.text.done"
+}
+
+func (c *TranscriptionStreamChunk) IsDelta() bool {
+	return c.Type == "transcript.text.delta"
+}
+
+func (c *TranscriptionStreamChunk) IsSegment() bool {
+	return c.Type == "transcript.text.segment"
+}
+
+func (c *TranscriptionStreamChunk) GetText() string {
+	if c.IsDelta() {
+		return c.Delta
+	}
+	return c.Text
+}
diff --git a/internal/server/web/proxy/audio.go b/internal/server/web/proxy/audio.go
index af6e2a9..738a59a 100644
--- a/internal/server/web/proxy/audio.go
+++ b/internal/server/web/proxy/audio.go
@@ -169,6 +169,12 @@ func getContentType(format string) string {
 
 func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
+		model := c.PostForm("model")
+		if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" {
+			processGPTTranscriptions(c, prod, client, e, model)
+			return
+		}
+
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1)
 
@@ -291,7 +297,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha
 			}
 
 			if err == nil {
-				cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"))
+				cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil)
 				if err != nil {
 					telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.estimate_total_cost_error", nil, 1)
 					logError(log, "error when estimating openai cost", prod, err)
@@ -333,6 +339,11 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha
 
 func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
+		model := c.PostForm("model")
+		if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" {
+			processGPTTranslations(c, prod, client, e, model)
+			return
+		}
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_translations_handler.requests", nil, 1)
 
@@ -451,7 +462,7 @@ func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.Hand
 			}
 
 			if err == nil {
-				cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"))
+				cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil)
 				if err != nil {
 					telemetry.Incr("bricksllm.proxy.get_translations_handler.estimate_total_cost_error", nil, 1)
 					logError(log, "error when estimating openai cost", prod, err)
diff --git a/internal/server/web/proxy/audio_extended.go b/internal/server/web/proxy/audio_extended.go
new file mode 100644
index 0000000..70cb970
--- /dev/null
+++ b/internal/server/web/proxy/audio_extended.go
@@ -0,0 +1,308 @@
+package proxy
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"mime/multipart"
+	"net/http"
+	"time"
+
+	"github.com/bricks-cloud/bricksllm/internal/provider/openai"
+	"github.com/bricks-cloud/bricksllm/internal/telemetry"
+	"github.com/bricks-cloud/bricksllm/internal/util"
+	"github.com/gin-gonic/gin"
+	goopenai "github.com/sashabaranov/go-openai"
+	"go.uber.org/zap"
+)
+
+const (
+	transcriptionsUrl = "https://api.openai.com/v1/audio/transcriptions"
+	translationsUrl   = "https://api.openai.com/v1/audio/translations"
+)
+
+func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model string) {
+	processGPTAudio(ginCtx, prod, client, e, model, transcriptionsUrl, "transcriptions")
+}
+
+func processGPTTranslations(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model string) {
+	processGPTAudio(ginCtx, prod, client, e, model, translationsUrl, "translations")
+}
+
+func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model, url, handler string) {
+	log := util.GetLogFromCtx(ginCtx)
+	telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.requests", handler), nil, 1)
+
+	if ginCtx.Request == nil {
+		JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] request is empty")
+		return
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), ginCtx.GetDuration("requestTimeout"))
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, url, ginCtx.Request.Body)
+	if err != nil {
+		logError(log, "error when creating transcriptions/translation openai http request", prod, err)
+		JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai transcriptions/translation http request")
+		return
+	}
+
+	copyHttpHeaders(ginCtx.Request, req, ginCtx.GetBool("removeUserAgent"))
+
+	isStreaming := ginCtx.PostForm("stream") == "True" || ginCtx.PostForm("stream") == "true"
+
+	if isStreaming {
+		req.Header.Set("Accept", "*/*")
+		req.Header.Set("Cache-Control", "no-cache")
+		req.Header.Set("Connection", "keep-alive")
+	}
+
+	if !isStreaming {
+		modifyGPTTranscriptionsRequest(ginCtx, prod, log, req, handler)
+	}
+
+	start := time.Now()
+	res, err := client.Do(req)
+	if err != nil {
+		telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.http_client_error", handler), nil, 1)
+
+		logError(log, "error when sending transcriptions/translation request to openai", prod, err)
+		JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to send transcriptions/translation request to openai")
+		return
+	}
+
+	defer res.Body.Close()
+
+	for name, values := range res.Header {
+		for _, value := range values {
+			ginCtx.Header(name, value)
+		}
+	}
+
+	if res.StatusCode == http.StatusOK && !isStreaming {
+		dur := time.Since(start)
+		telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.latency", handler), dur, nil, 1)
+		readBytes, err := io.ReadAll(res.Body)
+		if err != nil {
+			logError(log, "error when reading openai http transcriptions/translation response body", prod, err)
+			JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body")
+			return
+		}
+		var cost float64 = 0
+		resp := &openai.TranscriptionResponse{}
+		telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.success", handler), nil, 1)
+		telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.success_latency", handler), dur, nil, 1)
+
+		err = json.Unmarshal(readBytes, resp)
+		if err != nil {
+			logError(log, "error when unmarshalling openai http response api response body", prod, err)
+		}
+
+		if err == nil {
+			cost, err = e.EstimateTranscriptionCost(0, model, &resp.Usage)
+			if err != nil {
+				telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.estimate_total_cost_error", handler), nil, 1)
+				logError(log, "error when estimating openai cost", prod, err)
+			}
+		}
+
+		ginCtx.Set("costInUsd", cost)
+
+		contentType := "application/json"
+		bytesToSend := readBytes
+		if ginCtx.PostForm("response_format") == "text" {
+			contentType = "text/plain; charset=utf-8"
+			bytesToSend = []byte(resp.Text + "\n")
+		}
+
+		ginCtx.Data(res.StatusCode, contentType, bytesToSend)
+		return
+	}
+
+	if res.StatusCode != http.StatusOK {
+		dur := time.Since(start)
+		telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.error_latency", handler), dur, nil, 1)
+		telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.error_response", handler), nil, 1)
+
+		readBytes, err := io.ReadAll(res.Body)
+		if err != nil {
+			logError(log, "error when reading openai transcriptions/translation response body", prod, err)
+			JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai transcriptions/translation response body")
+			return
+		}
+
+		errorRes := &goopenai.ErrorResponse{}
+		err = json.Unmarshal(readBytes, errorRes)
+		if err != nil {
+			logError(log, "error when unmarshalling openai transcriptions/translation error response body", prod, err)
+		}
+
+		logOpenAiError(log, prod, errorRes)
+
+		ginCtx.Data(res.StatusCode, "application/json", readBytes)
+		return
+	}
+
+	buffer := bufio.NewReader(res.Body)
+	content := ""
+	streamingResponse := [][]byte{}
+
+	streamCost := 0.0
+
+	defer func() {
+		ginCtx.Set("content", content)
+		ginCtx.Set("streaming_response", bytes.Join(streamingResponse, []byte{'\n'}))
+
+		ginCtx.Set("costInUsd", streamCost)
+	}()
+
+	telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.streaming_response", handler), nil, 1)
+	ginCtx.Stream(func(w io.Writer) bool {
+		raw, err := buffer.ReadBytes('\n')
+		if err != nil {
+			if err == io.EOF {
+				return false
+			}
+
+			if errors.Is(err, context.DeadlineExceeded) {
+				telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.context_deadline_exceeded_error", handler), nil, 1)
+				logError(log, "context deadline exceeded when reading bytes from openai transcriptions/translation response", prod, err)
+
+				return false
+			}
+
+			telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.read_bytes_error", handler), nil, 1)
+			logError(log, "error when reading bytes from openai transcriptions/translation response", prod, err)
+
+			apiErr := &goopenai.ErrorResponse{
+				Error: &goopenai.APIError{
+					Type:    "bricksllm_error",
+					Message: err.Error(),
+				},
+			}
+
+			errBytes, err := json.Marshal(apiErr)
+			if err != nil {
+				telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.json_marshal_error", handler), nil, 1)
+				logError(log, "error when marshalling bytes for openai streaming transcriptions/translation error response", prod, err)
+				return false
+			}
+
+			ginCtx.SSEvent("", string(errBytes))
+			ginCtx.SSEvent("", " [DONE]")
+			return false
+		}
+		streamingResponse = append(streamingResponse, raw)
+
+		noSpaceLine := bytes.TrimSpace(raw)
+		if !bytes.HasPrefix(noSpaceLine, headerData) {
+			return true
+		}
+
+		noPrefixLine := bytes.TrimPrefix(noSpaceLine, headerData)
+		noPrefixLine = bytes.TrimSpace(noPrefixLine)
+		ginCtx.SSEvent("", " "+string(noPrefixLine))
+
+		if string(noPrefixLine) == "[DONE]" {
+			return false
+		}
+		chunk := &openai.TranscriptionStreamChunk{}
+		err = json.Unmarshal(noPrefixLine, chunk)
+		if err != nil {
+			telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.completion_response_unmarshall_error", handler), nil, 1)
+			logError(log, "error when unmarshalling openai transcriptions/translation stream response", prod, err)
+		}
+		if err == nil {
+			textDelta := chunk.GetText()
+			if len(textDelta) > 0 {
+				content += textDelta
+			}
+			if chunk.IsDone() {
+				content = chunk.GetText()
+				streamCost, err = e.EstimateTranscriptionCost(0, model, &chunk.Usage)
+			}
+		}
+		return true
+	})
+	telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.streaming_latency", handler), time.Since(start), nil, 1)
+}
+
+func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request, handler string) {
+	var b bytes.Buffer
+	writer := multipart.NewWriter(&b)
+	defer writer.Close()
+
+	responseFormat := c.PostForm("response_format")
+	if responseFormat == "text" {
+		responseFormat = "json"
+	}
+
+	err := writePostFields(c, writer, map[string]string{
+		"response_format": responseFormat,
+	})
+	if err != nil {
+		telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.write_field_to_buffer_error", handler), nil, 1)
+		logError(log, "error when writing field to buffer", prod, err)
+		JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot write field to buffer")
+		return
+	}
+
+	var form TransriptionForm
+	c.ShouldBind(&form)
+
+	if form.File != nil {
+		fieldWriter, err := writer.CreateFormFile("file", form.File.Filename)
+		if err != nil {
+			telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.create_transcription_file_error", handler), nil, 1)
+			logError(log, "error when creating transcriptions/translation file", prod, err)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot create transcriptions/translation file")
+			return
+		}
+
+		opened, err := form.File.Open()
+		if err != nil {
+			telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.open_transcription_file_error", handler), nil, 1)
+			logError(log, "error when openning transcriptions/translation file", prod, err)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot open transcriptions/translation file")
+			return
+		}
+
+		_, err = io.Copy(fieldWriter, opened)
+		if err != nil {
+			telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.copy_transcription_file_error", handler), nil, 1)
+			logError(log, "error when copying transcriptions/translation file", prod, err)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot copy transcriptions/translation file")
+			return
+		}
+	}
+
+	req.Header.Set("Content-Type", writer.FormDataContentType())
+	req.Body = io.NopCloser(&b)
+}
+
+func writePostFields(c *gin.Context, writer *multipart.Writer, overWrites map[string]string) error {
+	for k, v := range c.Request.PostForm {
+		if len(v) == 0 {
+			continue
+		}
+		val := v[0]
+		if len(overWrites) != 0 {
+			if ow := overWrites[k]; len(ow) != 0 {
+				val = ow
+			}
+		}
+		if len(val) == 0 {
+			continue
+		}
+		err := writer.WriteField(k, val)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index a994f72..7a47387 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -49,7 +49,7 @@ type keyStorage interface {
 }
 
 type estimator interface {
-	EstimateTranscriptionCost(secs float64, model string) (float64, error)
+	EstimateTranscriptionCost(secs float64, model string, usage *openai.TranscriptionResponseUsage) (float64, error)
 	EstimateSpeechCost(input string, model string) (float64, error)
 	EstimateChatCompletionPromptCostWithTokenCounts(r *goopenai.ChatCompletionRequest) (int, float64, error)
 	EstimateEmbeddingsCost(r *goopenai.EmbeddingRequest) (float64, error)
@@ -61,6 +61,7 @@ type estimator interface {
 	EstimateResponseApiTotalCost(model string, usage responsesOpenai.ResponseUsage) (float64, error)
 	EstimateResponseApiToolCallsCost(tools []responsesOpenai.ToolUnion, model string) (float64, error)
 	EstimateResponseApiToolCreateContainerCost(req *openai.ResponseRequest) (float64, error)
+	EstimateVideoCost(metadata *openai.VideoResponseMetadata) (float64, error)
 }
 
 type azureEstimator interface {
diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go
index 3a6cb82..4740867 100644
--- a/internal/server/web/proxy/proxy.go
+++ b/internal/server/web/proxy/proxy.go
@@ -104,6 +104,15 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan
 	router.POST("/api/providers/openai/v1/audio/transcriptions", getTranscriptionsHandler(prod, client, e))
 	router.POST("/api/providers/openai/v1/audio/translations", getTranslationsHandler(prod, client, e))
 
+	// videos
+	router.POST("/api/providers/openai/v1/videos", getVideoHandler(prod, client, e))
+	router.POST("/api/providers/openai/v1/videos/edits", getVideoHandler(prod, client, e))
+	router.POST("/api/providers/openai/v1/videos/extensions", getVideoHandler(prod, client, e))
+	router.GET("/api/providers/openai/v1/videos/:video_id", getVideoHandler(prod, client, e))
+	router.DELETE("/api/providers/openai/v1/videos/:video_id", getVideoHandler(prod, client, e))
+	router.POST("/api/providers/openai/v1/videos/:video_id/remix", getVideoHandler(prod, client, e))
+	router.GET("/api/providers/openai/v1/videos/:video_id/content", getVideoHandler(prod, client, e))
+
 	// completions
 	router.POST("/api/providers/openai/v1/chat/completions", getChatCompletionHandler(prod, private, client, e))
 
diff --git a/internal/server/web/proxy/video.go b/internal/server/web/proxy/video.go
new file mode 100644
index 0000000..7e35130
--- /dev/null
+++ b/internal/server/web/proxy/video.go
@@ -0,0 +1,132 @@
+package proxy
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/bricks-cloud/bricksllm/internal/provider/openai"
+	"github.com/bricks-cloud/bricksllm/internal/telemetry"
+	"github.com/bricks-cloud/bricksllm/internal/util"
+	"github.com/gin-gonic/gin"
+	goopenai "github.com/sashabaranov/go-openai"
+)
+
+func getVideoHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
+	return func(ginCtx *gin.Context) {
+		log := util.GetLogFromCtx(ginCtx)
+		telemetry.Incr("bricksllm.proxy.get_responses_handler.requests", nil, 1)
+
+		if ginCtx == nil || ginCtx.Request == nil {
+			JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] context is empty")
+			return
+		}
+
+		ctx, cancel := context.WithTimeout(ginCtx.Request.Context(), ginCtx.GetDuration("requestTimeout"))
+		defer cancel()
+
+		videoURL, err := constructVideoURL(ginCtx.Request.URL.Path)
+		if err != nil {
+			logError(log, "failed to construct video URL", prod, err)
+			JSON(ginCtx, http.StatusBadRequest, "[BricksLLM] invalid video request")
+			return
+		}
+
+		req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, videoURL, ginCtx.Request.Body)
+		if err != nil {
+			logError(log, "error when creating openai http request", prod, err)
+			JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai http request")
+			return
+		}
+
+		copyHttpHeaders(ginCtx.Request, req, ginCtx.GetBool("removeUserAgent"))
+
+		start := time.Now()
+		res, err := client.Do(req)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_video_handler.http_client_error", nil, 1)
+
+			logError(log, "error when sending http request to openai", prod, err)
+			JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai")
+			return
+		}
+		defer res.Body.Close()
+
+		for name, values := range res.Header {
+			for _, value := range values {
+				ginCtx.Header(name, value)
+			}
+		}
+
+		if res.StatusCode != http.StatusOK {
+			dur := time.Since(start)
+			telemetry.Timing("bricksllm.proxy.get_video_handler.error_latency", dur, nil, 1)
+			telemetry.Incr("bricksllm.proxy.get_video_handler.error_response", nil, 1)
+
+			bytes, err2 := io.ReadAll(res.Body)
+			if err2 != nil {
+				logError(log, "error when reading openai http video response body", prod, err2)
+				JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body")
+				return
+			}
+
+			errorRes := &goopenai.ErrorResponse{}
+			err2 = json.Unmarshal(bytes, errorRes)
+			if err2 != nil {
+				logError(log, "error when unmarshalling openai video error response body", prod, err2)
+			}
+
+			logOpenAiError(log, prod, errorRes)
+
+			ginCtx.Data(res.StatusCode, "application/json", bytes)
+			return
+		}
+
+		dur := time.Since(start)
+		telemetry.Timing("bricksllm.proxy.get_video_handler.latency", dur, nil, 1)
+
+		bytes, err := io.ReadAll(res.Body)
+		if err != nil {
+			logError(log, "error when reading openai http video response body", prod, err)
+			JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body")
+			return
+		}
+
+		var cost float64 = 0
+		respMetadata := &openai.VideoResponseMetadata{}
+		telemetry.Incr("bricksllm.proxy.get_video_handler.success", nil, 1)
+		telemetry.Timing("bricksllm.proxy.get_video_handler.success_latency", dur, nil, 1)
+
+		err = json.Unmarshal(bytes, respMetadata)
+		if err != nil {
+			logError(log, "error when unmarshalling openai http video response body", prod, err)
+		}
+
+		isPaidRequest := ginCtx.Request.Method == http.MethodPost
+		if err == nil && isPaidRequest {
+			cost, err = e.EstimateVideoCost(respMetadata)
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_video_handler.estimate_cost_error", nil, 1)
+				logError(log, "error when estimating video cost", prod, err)
+			}
+		}
+		ginCtx.Set("costInUsd", cost)
+		ginCtx.Data(res.StatusCode, res.Header.Get("Content-Type"), bytes)
+		return
+	}
+}
+
+func constructVideoURL(fullPath string) (string, error) {
+	if fullPath == "" {
+		return "", errors.New("empty full path")
+	}
+	if !strings.HasPrefix(fullPath, "/api/providers/openai") {
+		return "", errors.New("invalid path prefix")
+	}
+	path := strings.TrimPrefix(fullPath, "/api/providers/openai")
+	return "https://api.openai.com" + path, nil
+}