Stream Speech

curl --request POST \
  --url https://api.bland.ai/v1/speak/stream \
  --header 'Content-Type: application/json' \
  --header 'authorization: <authorization>' \
  --data '
{
  "text": "<string>",
  "voice_id": "<string>",
  "output_format": "<string>",
  "language": "<string>",
  "consistency": 123,
  "expressiveness": 123,
  "boost": 123
}
'

import requests

url = "https://api.bland.ai/v1/speak/stream"

payload = {
    "text": "<string>",
    "voice_id": "<string>",
    "output_format": "<string>",
    "language": "<string>",
    "consistency": 123,
    "expressiveness": 123,
    "boost": 123
}
headers = {
    "authorization": "<authorization>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {authorization: '<authorization>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    text: '<string>',
    voice_id: '<string>',
    output_format: '<string>',
    language: '<string>',
    consistency: 123,
    expressiveness: 123,
    boost: 123
  })
};

fetch('https://api.bland.ai/v1/speak/stream', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.bland.ai/v1/speak/stream",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_id' => '<string>',
    'output_format' => '<string>',
    'language' => '<string>',
    'consistency' => 123,
    'expressiveness' => 123,
    'boost' => 123
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "authorization: <authorization>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.bland.ai/v1/speak/stream"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_id\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"language\": \"<string>\",\n  \"consistency\": 123,\n  \"expressiveness\": 123,\n  \"boost\": 123\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("authorization", "<authorization>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.bland.ai/v1/speak/stream")
  .header("authorization", "<authorization>")
  .header("Content-Type", "application/json")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_id\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"language\": \"<string>\",\n  \"consistency\": 123,\n  \"expressiveness\": 123,\n  \"boost\": 123\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.bland.ai/v1/speak/stream")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["authorization"] = '<authorization>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_id\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"language\": \"<string>\",\n  \"consistency\": 123,\n  \"expressiveness\": 123,\n  \"boost\": 123\n}"

response = http.request(request)
puts response.read_body

HTTP/1.1 200 OK
Content-Type: audio/x-wav
Transfer-Encoding: chunked
x-latency: 396
x-cost: 0.001

<chunked WAV with placeholder header sizes followed by PCM16 data>

{
  "data": null,
  "errors": [
    { "error": "Service Not Supported", "message": "This endpoint only supports Bland's Beige Voices" }
  ]
}

POST

speak

stream

Stream Speech

curl --request POST \
  --url https://api.bland.ai/v1/speak/stream \
  --header 'Content-Type: application/json' \
  --header 'authorization: <authorization>' \
  --data '
{
  "text": "<string>",
  "voice_id": "<string>",
  "output_format": "<string>",
  "language": "<string>",
  "consistency": 123,
  "expressiveness": 123,
  "boost": 123
}
'

import requests

url = "https://api.bland.ai/v1/speak/stream"

payload = {
    "text": "<string>",
    "voice_id": "<string>",
    "output_format": "<string>",
    "language": "<string>",
    "consistency": 123,
    "expressiveness": 123,
    "boost": 123
}
headers = {
    "authorization": "<authorization>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {authorization: '<authorization>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    text: '<string>',
    voice_id: '<string>',
    output_format: '<string>',
    language: '<string>',
    consistency: 123,
    expressiveness: 123,
    boost: 123
  })
};

fetch('https://api.bland.ai/v1/speak/stream', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.bland.ai/v1/speak/stream",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_id' => '<string>',
    'output_format' => '<string>',
    'language' => '<string>',
    'consistency' => 123,
    'expressiveness' => 123,
    'boost' => 123
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "authorization: <authorization>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.bland.ai/v1/speak/stream"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_id\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"language\": \"<string>\",\n  \"consistency\": 123,\n  \"expressiveness\": 123,\n  \"boost\": 123\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("authorization", "<authorization>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.bland.ai/v1/speak/stream")
  .header("authorization", "<authorization>")
  .header("Content-Type", "application/json")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_id\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"language\": \"<string>\",\n  \"consistency\": 123,\n  \"expressiveness\": 123,\n  \"boost\": 123\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.bland.ai/v1/speak/stream")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["authorization"] = '<authorization>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_id\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"language\": \"<string>\",\n  \"consistency\": 123,\n  \"expressiveness\": 123,\n  \"boost\": 123\n}"

response = http.request(request)
puts response.read_body

HTTP/1.1 200 OK
Content-Type: audio/x-wav
Transfer-Encoding: chunked
x-latency: 396
x-cost: 0.001

<chunked WAV with placeholder header sizes followed by PCM16 data>

{
  "data": null,
  "errors": [
    { "error": "Service Not Supported", "message": "This endpoint only supports Bland's Beige Voices" }
  ]
}

Overview

Streams synthesized audio to the client as it is generated, using Transfer-Encoding: chunked. Functionally equivalent to Speak with stream: true, but mounted as its own canonical endpoint for parity with our internal services and to avoid prefix conflicts with the non-streaming handler. Use this endpoint when you care about time-to-first-byte (live preview, voice assistants, IVRs). Use Speak when you want to buffer the full audio before delivering it.

Requires a Bland TTS voice (BTTS, BTTS_V2, or BTTS_V3). Other voice services are not supported on this endpoint.

Every generation is automatically stored and retrievable via List TTS Generations and Get TTS Generation, the same as the non-streaming endpoint.

Headers

authorization

string

required

Your API key for authentication.

Body Parameters

text

string

required

The text to synthesize. Maximum 5,000 characters per request. Supports pause markers in the form <|N|> (0.1-10.0 seconds).

voice_id

string

required

ID of the Bland TTS voice to use. Pass either the voice UUID from List Voices or a curated voice name.

output_format

string

default:"pcm_44100"

Audio container/sample rate.

pcm_8000, pcm_16000, pcm_24000, pcm_44100, ulaw_8000

language

string

Language code. Defaults to the voice’s primary language.

consistency

number

V1: float 0.0-1.0 (higher = more consistent). V2/V3: integer 1-32 (lower = more consistent).

expressiveness

number

V1 only. Float 0.0-1.0.

boost

integer

V2/V3 only. 0 or 1.

Streaming response format

The response is a single WAV file delivered in chunks:

First 44 bytes, a standard WAV header with placeholder sizes: bytes 4-7 (RIFF chunk size) and 40-43 (data chunk size) are both filled with 0xFFFFFFFF because the final length is not yet known.
Subsequent chunks, raw PCM16 audio data, written as it is synthesized.
After the stream closes, the client patches the WAV header in place: bytes 4-7 become the total file size minus 8, bytes 40-43 become the total data chunk size. Most decoders ignore the placeholder size and play the file fine without the patch, but tools that strictly validate the header will need it.

Content-Type is audio/x-wav. No Content-Length header is sent.

x-latency

string

Time in milliseconds from request to first audio byte.

x-cost

string

Cost in USD for the synthesis.

Transfer-Encoding

string

Always chunked.

HTTP/1.1 200 OK
Content-Type: audio/x-wav
Transfer-Encoding: chunked
x-latency: 396
x-cost: 0.001

<chunked WAV with placeholder header sizes followed by PCM16 data>

{
  "data": null,
  "errors": [
    { "error": "Service Not Supported", "message": "This endpoint only supports Bland's Beige Voices" }
  ]
}

Docs for agents: llms.txt

Speak Mint Stream Input Token

⌘I

Basic Tutorials

Calls

Voices & Text to Speech

Conversational Pathways

Knowledge Bases

Numbers

Blocked Numbers

Widgets

Tools

Contacts

Memory

Node tests

Agent Testing

Evals

Guard Rails

Web Agents

Live Translation

Custom Twilio Accounts

Batches

Prompts

Account

Organizations

Messaging

SIP Trunks

Custom Dialing Pools

Personas

Citation Schemas

Alarms

Triage

Stream Speech

Overview

Headers

Body Parameters

Streaming response format

​Overview

​Headers

​Body Parameters

​Streaming response format

Overview

Headers

Body Parameters

Streaming response format