Loading...
Loading...
Generate speech, music, and sound effects using ModelsLab's v7 Voice API. Supports text-to-speech, speech-to-text, speech-to-speech, music generation, sound effects, dubbing, song extension, and song inpainting via ElevenLabs and Inworld models.
npx skill4agent add modelslab/skills modelslab-audio-generationPOST https://modelslab.com/api/v7/voice/text-to-speechPOST https://modelslab.com/api/v7/voice/speech-to-textPOST https://modelslab.com/api/v7/voice/speech-to-speechPOST https://modelslab.com/api/v7/voice/music-genPOST https://modelslab.com/api/v7/voice/sound-generationPOST https://modelslab.com/api/v7/voice/create-dubbingPOST https://modelslab.com/api/v7/voice/song-extenderPOST https://modelslab.com/api/v7/voice/song-inpaintPOST https://modelslab.com/api/v7/voice/fetch/{id}Note: v6 endpoints (, etc.) still work but v7 is the current version. Parameter names have changed in v7 (e.g.,/api/v6/voice/text_to_speechis nowtext,promptis nowaudio).init_audio
# Search audio/voice models
modelslab models search --feature audio_gen
# Search by provider
modelslab models search --search "eleven"
# Get model details
modelslab models detail --id eleven_multilingual_v2| model_id | Name | Use With |
|---|---|---|
| ElevenLabs Multilingual v2 | text-to-speech |
| ElevenLabs Voice Changer | speech-to-speech |
| ElevenLabs Scribe | speech-to-text |
| ElevenLabs Sound Effects | sound-generation |
| ElevenLabs Music | music-gen |
| Inworld TTS | text-to-speech |
import requests
import time
def text_to_speech(text, api_key, voice_id="21m00Tcm4TlvDq8ikWAM", model_id="eleven_multilingual_v2"):
"""Convert text to speech.
Args:
text: The text to convert to speech
api_key: Your ModelsLab API key
voice_id: ElevenLabs voice ID (see Available Voices below)
model_id: TTS model to use
"""
response = requests.post(
"https://modelslab.com/api/v7/voice/text-to-speech",
json={
"key": api_key,
"prompt": text, # v7 uses "prompt" not "text"
"voice_id": voice_id,
"model_id": model_id
}
)
data = response.json()
if data["status"] == "success":
return data["output"][0]
elif data["status"] == "processing":
return poll_audio_result(data["id"], api_key)
else:
raise Exception(f"Error: {data.get('message', 'Unknown error')}")
# Usage
audio_url = text_to_speech(
"Hello! Welcome to ModelsLab. This is a test of our text-to-speech API.",
"your_api_key"
)
print(f"Audio URL: {audio_url}")def speech_to_text(audio_url, api_key, model_id="scribe_v1"):
"""Transcribe speech from audio to text.
Args:
audio_url: URL of audio file (must be publicly accessible)
model_id: STT model to use
"""
response = requests.post(
"https://modelslab.com/api/v7/voice/speech-to-text",
json={
"key": api_key,
"init_audio": audio_url, # v7 uses "init_audio" not "audio"
"model_id": model_id
}
)
data = response.json()
if data["status"] == "success":
return data["output"][0]
elif data["status"] == "processing":
return poll_audio_result(data["id"], api_key)
else:
raise Exception(data.get("message"))
# Transcribe audio
result = speech_to_text(
"https://example.com/speech.mp3",
"your_api_key"
)
print(f"Transcription: {result}")def speech_to_speech(audio_url, voice_id, api_key, model_id="eleven_english_sts_v2"):
"""Convert voice characteristics in audio.
Args:
audio_url: URL of the source audio
voice_id: Target ElevenLabs voice ID
model_id: Voice conversion model
"""
response = requests.post(
"https://modelslab.com/api/v7/voice/speech-to-speech",
json={
"key": api_key,
"init_audio": audio_url,
"voice_id": voice_id,
"model_id": model_id
}
)
data = response.json()
if data["status"] == "success":
return data["output"][0]
elif data["status"] == "processing":
return poll_audio_result(data["id"], api_key)def generate_sound_effect(description, api_key, model_id="eleven_sound_effect"):
"""Generate a sound effect from a text description.
Args:
description: What sound to generate
model_id: Sound effects model
"""
response = requests.post(
"https://modelslab.com/api/v7/voice/sound-generation",
json={
"key": api_key,
"prompt": description,
"model_id": model_id
}
)
data = response.json()
if data["status"] == "success":
return data["output"][0]
elif data["status"] == "processing":
return poll_audio_result(data["id"], api_key)
# Generate door slam sound
sfx_url = generate_sound_effect(
"Heavy wooden door slamming shut",
"your_api_key"
)def generate_music(prompt, api_key, model_id="music_v1"):
"""Generate music from a text description.
Args:
prompt: Description of music style/mood
model_id: Music generation model
"""
response = requests.post(
"https://modelslab.com/api/v7/voice/music-gen",
json={
"key": api_key,
"prompt": prompt,
"model_id": model_id
}
)
data = response.json()
if data["status"] == "success":
return data["output"][0]
elif data["status"] == "processing":
return poll_audio_result(data["id"], api_key)
# Generate background music
music_url = generate_music(
"Upbeat electronic music with a driving beat, perfect for a tech startup video",
"your_api_key"
)
print(f"Music: {music_url}")def poll_audio_result(request_id, api_key, timeout=300):
"""Poll for async audio generation results."""
start_time = time.time()
while time.time() - start_time < timeout:
fetch = requests.post(
f"https://modelslab.com/api/v7/voice/fetch/{request_id}",
json={"key": api_key}
)
result = fetch.json()
if result["status"] == "success":
return result["output"][0]
elif result["status"] == "failed":
raise Exception(result.get("message", "Generation failed"))
time.sleep(5)
raise Exception("Timeout waiting for audio generation")| Voice ID | Name | Style |
|---|---|---|
| Rachel | Neutral, calm |
| Domi | Confident |
| Bella | Soft, warm |
| Antoni | Well-rounded |
| Elli | Young, clear |
| Josh | Deep, warm |
| Arnold | Strong |
| Adam | Deep, narrative |
| Sam | Dynamic |
| Parameter | Type | Required | Description |
|---|---|---|---|
| string | Yes | Text to convert to speech |
| string | Yes | ElevenLabs voice identifier |
| string | Yes | TTS model (e.g., |
| float | No | Voice variation |
| string | No | Async notification URL |
| Parameter | Type | Required | Description |
|---|---|---|---|
| string | Yes | URL of audio to transcribe |
| string | Yes | STT model (e.g., |
| Parameter | Type | Required | Description |
|---|---|---|---|
| string | Yes | Sound effect description |
| string | Yes | SFX model (e.g., |
| v6 Parameter | v7 Parameter | Notes |
|---|---|---|
| | TTS text input |
| | STT/STS audio input |
| | Voice-to-voice source |
| (not required) | | Now required on all endpoints |
payload = {
"key": api_key,
"prompt": "...",
"model_id": "eleven_multilingual_v2",
"webhook": "https://yourserver.com/webhook/audio",
"track_id": "audio_001"
}try:
audio = text_to_speech(text, api_key)
print(f"Audio generated: {audio}")
except Exception as e:
print(f"Audio generation failed: {e}")modelslab-model-discoverymodelslab-video-generationmodelslab-chat-generationmodelslab-webhooks