Create Transcription - Requesty Docs

Create transcription

curl --request POST \
  --url https://router.requesty.ai/v1/audio/transcriptions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: multipart/form-data' \
  --form file='@example-file' \
  --form model=openai/gpt-4o-transcribe \
  --form 'language=<string>'

import requests

url = "https://router.requesty.ai/v1/audio/transcriptions"

files = { "file": ("example-file", open("example-file", "rb")) }
payload = {
    "model": "openai/gpt-4o-transcribe",
    "language": "<string>"
}
headers = {"Authorization": "Bearer <token>"}

response = requests.post(url, data=payload, files=files, headers=headers)

print(response.text)

const form = new FormData();
form.append('file', '<string>');
form.append('model', 'openai/gpt-4o-transcribe');
form.append('language', '<string>');

const options = {method: 'POST', headers: {Authorization: 'Bearer <token>'}};

options.body = form;

fetch('https://router.requesty.ai/v1/audio/transcriptions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://router.requesty.ai/v1/audio/transcriptions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: multipart/form-data"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://router.requesty.ai/v1/audio/transcriptions"

	payload := strings.NewReader("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://router.requesty.ai/v1/audio/transcriptions")
  .header("Authorization", "Bearer <token>")
  .body("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://router.requesty.ai/v1/audio/transcriptions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request.body = "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--"

response = http.request(request)
puts response.read_body

{
  "text": "Hello, world.",
  "usage": {
    "type": "tokens",
    "input_tokens": 123,
    "output_tokens": 123,
    "total_tokens": 123,
    "input_token_details": {
      "audio_tokens": 123,
      "text_tokens": 123
    }
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

POST

audio

transcriptions

Create transcription

curl --request POST \
  --url https://router.requesty.ai/v1/audio/transcriptions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: multipart/form-data' \
  --form file='@example-file' \
  --form model=openai/gpt-4o-transcribe \
  --form 'language=<string>'

import requests

url = "https://router.requesty.ai/v1/audio/transcriptions"

files = { "file": ("example-file", open("example-file", "rb")) }
payload = {
    "model": "openai/gpt-4o-transcribe",
    "language": "<string>"
}
headers = {"Authorization": "Bearer <token>"}

response = requests.post(url, data=payload, files=files, headers=headers)

print(response.text)

const form = new FormData();
form.append('file', '<string>');
form.append('model', 'openai/gpt-4o-transcribe');
form.append('language', '<string>');

const options = {method: 'POST', headers: {Authorization: 'Bearer <token>'}};

options.body = form;

fetch('https://router.requesty.ai/v1/audio/transcriptions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://router.requesty.ai/v1/audio/transcriptions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: multipart/form-data"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://router.requesty.ai/v1/audio/transcriptions"

	payload := strings.NewReader("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://router.requesty.ai/v1/audio/transcriptions")
  .header("Authorization", "Bearer <token>")
  .body("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://router.requesty.ai/v1/audio/transcriptions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request.body = "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nopenai/gpt-4o-transcribe\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n<string>\r\n-----011000010111000001101001--"

response = http.request(request)
puts response.read_body

{
  "text": "Hello, world.",
  "usage": {
    "type": "tokens",
    "input_tokens": 123,
    "output_tokens": 123,
    "total_tokens": 123,
    "input_token_details": {
      "audio_tokens": 123,
      "text_tokens": 123
    }
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

{
  "error": {
    "message": "<string>"
  }
}

Transcribe audio into text using OpenAI’s speech-to-text models through Requesty’s routing.

Base URL

https://router.requesty.ai/v1/audio/transcriptions

Authentication

Include your Requesty API key in the request headers:

Authorization: Bearer YOUR_REQUESTY_API_KEY

Example Request

The endpoint accepts multipart/form-data. Send the audio as the file field and the model identifier as the model field.

curl https://router.requesty.ai/v1/audio/transcriptions \
  -H "Authorization: Bearer YOUR_REQUESTY_API_KEY" \
  -F "model=openai/gpt-4o-transcribe" \
  -F "file=@./meeting.mp3"

Example response:

{
  "text": "Hello, this is a transcription of the audio.",
  "usage": {
    "type": "tokens",
    "input_tokens": 14,
    "output_tokens": 11,
    "total_tokens": 25,
    "input_token_details": {
      "audio_tokens": 14,
      "text_tokens": 0
    }
  }
}

OpenAI SDK

The endpoint is fully compatible with the OpenAI SDK. Just point the client at Requesty’s base URL:

from openai import OpenAI

client = OpenAI(
    base_url="https://router.requesty.ai/v1",
    api_key="YOUR_REQUESTY_API_KEY",
)

with open("meeting.mp3", "rb") as audio:
    transcript = client.audio.transcriptions.create(
        model="openai/gpt-4o-transcribe",
        file=audio,
    )

print(transcript.text)

import OpenAI from "openai";
import fs from "node:fs";

const client = new OpenAI({
  baseURL: "https://router.requesty.ai/v1",
  apiKey: process.env.REQUESTY_API_KEY,
});

const transcript = await client.audio.transcriptions.create({
  model: "openai/gpt-4o-transcribe",
  file: fs.createReadStream("meeting.mp3"),
});

console.log(transcript.text);

Supported Models

Browse the full catalog on the Transcription model library. Today the available transcription models are all from OpenAI:

Model	Best for	Billing
`openai/gpt-4o-transcribe`	Highest accuracy, multilingual	Token based
`openai/gpt-4o-mini-transcribe`	Fast and cost efficient	Token based
`openai/whisper-1`	Drop in replacement for legacy Whisper	Duration based (per second of audio)

Date pinned snapshots (for example openai/gpt-4o-mini-transcribe-2025-12-15) are also available when you need a stable model version.

Supported Audio Formats

The file field accepts the following formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm. The maximum upload size per request is 32 MB. For longer recordings, split the audio into chunks and concatenate the resulting transcripts on your side.

Language Hint

Set language to the ISO 639-1 code of the spoken language to improve accuracy and latency. When omitted, the model auto detects the language.

curl https://router.requesty.ai/v1/audio/transcriptions \
  -H "Authorization: Bearer YOUR_REQUESTY_API_KEY" \
  -F "model=openai/gpt-4o-transcribe" \
  -F "language=fr" \
  -F "file=@./conference.m4a"

Response Format

Set response_format to control the output shape. Supported values:

Value	Description
`json`	(default) JSON object with `text` and `usage`
`verbose_json`	JSON with full timestamp data (`words`, `segments`, `language`, `duration`)

Default JSON (`json`)

When response_format is omitted or set to json, the response is a JSON object with text and a usage block:

Token usage (`gpt-4o-transcribe`, `gpt-4o-mini-transcribe`)

{
  "text": "Hello, world.",
  "usage": {
    "type": "tokens",
    "input_tokens": 14,
    "output_tokens": 11,
    "total_tokens": 25,
    "input_token_details": {
      "audio_tokens": 14,
      "text_tokens": 0
    }
  }
}

Duration usage (`whisper-1`)

{
  "text": "Hello, world.",
  "usage": {
    "type": "duration",
    "seconds": 4.2
  }
}

Use the type discriminator to decide how to render or aggregate usage on your side.

Verbose JSON with Timestamps (`verbose_json`)

Use response_format=verbose_json together with timestamp_granularities[] to get word-level and/or segment-level timestamps. This is especially useful for subtitle generation, audio alignment, and video localization. timestamp_granularities[] accepts word, segment, or both (OpenAI whisper-1 supports both simultaneously).

mistral/voxtral-mini-latest only supports a single timestamp_granularities value per request. If you need both word and segment timestamps with Mistral, make two separate requests.

curl https://router.requesty.ai/v1/audio/transcriptions \
  -H "Authorization: Bearer YOUR_REQUESTY_API_KEY" \
  -F "model=openai/whisper-1" \
  -F "response_format=verbose_json" \
  -F "timestamp_granularities[]=word" \
  -F "timestamp_granularities[]=segment" \
  -F "file=@./meeting.mp3"

transcript = client.audio.transcriptions.create(
    model="openai/whisper-1",
    file=audio,
    response_format="verbose_json",
    timestamp_granularities=["word", "segment"],
)

for word in transcript.words:
    print(f"{word.start:.2f}s - {word.end:.2f}s: {word.word}")

Example response:

{
  "task": "transcribe",
  "language": "english",
  "duration": 4.2,
  "text": "Hello, world.",
  "words": [
    { "word": "Hello,", "start": 0.0, "end": 0.52 },
    { "word": "world.", "start": 0.52, "end": 1.04 }
  ],
  "segments": [
    {
      "id": 0,
      "start": 0.0,
      "end": 4.2,
      "text": " Hello, world.",
      "tokens": [50364, 2425, 11, 1002, 13, 50574],
      "temperature": 0.0,
      "avg_logprob": -0.28,
      "compression_ratio": 0.7,
      "no_speech_prob": 0.1
    }
  ]
}

The verbose_json response format with timestamp_granularities is supported for openai/whisper-1 and mistral/voxtral-mini-latest. The newer gpt-4o-transcribe models return timestamps through their own response schema.

Pricing

Transcription models are priced either per token of input audio (for gpt-4o-transcribe and gpt-4o-mini-transcribe) or per second of input audio (for whisper-1). The exact rate per model is on the Transcription model library. Charges appear in your usage dashboard immediately after the request completes.

Error Handling

The API returns standard HTTP status codes:

200 Success
400 Bad Request (missing file or model, unsupported audio format)
401 Unauthorized (invalid API key)
404 Model not found or not approved for your organization
413 Payload Too Large (audio file exceeds 32 MB)
429 Rate limited
500 Internal Server Error

This endpoint is fully compatible with the OpenAI Audio Transcriptions API. You can use the OpenAI SDK’s client.audio.transcriptions.create() method directly.

To go the other direction and turn text into audio, use the Create Speech endpoint.

Authorizations

Authorization

string

header

required

API key for authentication

Body

multipart/form-data

file

required

The audio file to transcribe. Supported formats are flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, and webm. Maximum upload size is 32 MB.

model

string

required

The speech-to-text model to use, prefixed with the provider slug. Currently only OpenAI models are supported.

Example:

"openai/gpt-4o-transcribe"

language

string

The language of the input audio in ISO 639-1 format (for example, en, fr, ja). Supplying the language improves accuracy and latency. Auto-detected when omitted.

Response

Transcription result

text

string

required

The transcribed text.

Example:

"Hello, world."

usage

object

required

Usage stats for the transcription. The shape depends on how the model is billed: token-based (gpt-4o-transcribe, gpt-4o-mini-transcribe) or duration-based (whisper-1).

Option 1
Option 2

Show child attributes

Last modified on June 8, 2026

Create Speech Create Image

⌘I

​Base URL

​Authentication

​Example Request

​OpenAI SDK

​Supported Models

​Supported Audio Formats

​Language Hint

​Response Format

​Default JSON (json)

​Token usage (gpt-4o-transcribe, gpt-4o-mini-transcribe)

​Duration usage (whisper-1)

​Verbose JSON with Timestamps (verbose_json)

​Pricing

​Error Handling

Authorizations

Body

Response

Base URL

Authentication

Example Request

OpenAI SDK

Supported Models

Supported Audio Formats

Language Hint

Response Format

Default JSON (`json`)

Token usage (`gpt-4o-transcribe`, `gpt-4o-mini-transcribe`)

Duration usage (`whisper-1`)

Verbose JSON with Timestamps (`verbose_json`)

Pricing

Error Handling