Loading...
Loading...
Chat with LLM models using ModelsLab's OpenAI-compatible Chat Completions API. Supports 60+ models including DeepSeek R1, Meta Llama, Google Gemini, Qwen, and Mistral with streaming, function calling, and structured outputs.
npx skill4agent add modelslab/skills modelslab-chat-generationPOST https://modelslab.com/api/v7/llm/chat/completionsimport requests
def chat(message, api_key, model="meta-llama-3-8B-instruct"):
"""Send a chat completion request.
Args:
message: The user message
api_key: Your ModelsLab API key
model: LLM model ID (use modelslab models search --feature llmaster to find models)
"""
response = requests.post(
"https://modelslab.com/api/v7/llm/chat/completions",
json={
"key": api_key,
"model_id": model,
"messages": [
{"role": "user", "content": message}
],
"max_tokens": 200,
"temperature": 0.7
}
)
data = response.json()
if "choices" in data:
return data["choices"][0]["message"]["content"]
else:
raise Exception(f"Error: {data.get('message', 'Unknown error')}")
# Usage
reply = chat(
"Explain quantum computing in simple terms.",
"your_api_key"
)
print(reply)def chat_with_system(system_prompt, message, api_key, model="meta-llama-3-8B-instruct"):
"""Chat with a system prompt for role/behavior control."""
response = requests.post(
"https://modelslab.com/api/v7/llm/chat/completions",
json={
"key": api_key,
"model_id": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": message}
],
"max_tokens": 500,
"temperature": 0.7
}
)
data = response.json()
if "choices" in data:
return data["choices"][0]["message"]["content"]
# Usage
reply = chat_with_system(
"You are a Python expert. Give concise code examples.",
"How do I read a CSV file?",
"your_api_key"
)def conversation(messages, api_key, model="meta-llama-3-8B-instruct"):
"""Send a multi-turn conversation."""
response = requests.post(
"https://modelslab.com/api/v7/llm/chat/completions",
json={
"key": api_key,
"model_id": model,
"messages": messages,
"max_tokens": 500,
"temperature": 0.7
}
)
return response.json()
# Multi-turn example
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is Python?"},
{"role": "assistant", "content": "Python is a high-level programming language..."},
{"role": "user", "content": "Show me a hello world example."}
]
result = conversation(messages, "your_api_key")
print(result["choices"][0]["message"]["content"])from openai import OpenAI
client = OpenAI(
base_url="https://modelslab.com/api/v7/llm",
api_key="your_modelslab_api_key"
)
response = client.chat.completions.create(
model="meta-llama-3-8B-instruct",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
],
max_tokens=100,
temperature=0.7
)
print(response.choices[0].message.content)curl -X POST "https://modelslab.com/api/v7/llm/chat/completions" \
-H "Content-Type: application/json" \
-d '{
"key": "your_api_key",
"model_id": "meta-llama-3-8B-instruct",
"messages": [
{"role": "user", "content": "Say hello in one sentence."}
],
"max_tokens": 50,
"temperature": 0.7
}'# Search all chat/LLM models
modelslab models search --feature llmaster
# Search by provider
modelslab models search --search "deepseek"
modelslab models search --search "llama"
modelslab models search --search "gemini"
# Get model details
modelslab models detail --id meta-llama-3-8B-instructmeta-llama-3-8B-instructmeta-llama-Llama-3.3-70B-Instruct-Turbometa-llama-Meta-Llama-3.1-405B-Instruct-Turbodeepseek-ai-DeepSeek-R1-Distill-Llama-70Bdeepseek-ai-DeepSeek-V3gemini-2.0-flash-001gemini-2.5-proQwen-Qwen2.5-72B-Instruct-TurboQwen-Qwen2.5-Coder-32B-Instructmistralai-Mixtral-8x7B-Instruct-v0.1mistralai-Mistral-Small-24B-Instruct-2501| Parameter | Type | Required | Description |
|---|---|---|---|
| string | Yes | LLM model identifier (alias: |
| array | Yes | Array of message objects with |
| float | No | Sampling temperature (0-2, default varies by model) |
| integer | No | Maximum tokens to generate |
| float | No | Nucleus sampling (0-1) |
| integer | No | Top-k sampling |
| float | No | Frequency penalty (-2 to 2) |
| float | No | Presence penalty (-2 to 2) |
| boolean | No | Enable streaming responses (alias: |
| integer | No | Number of completions (1-10) |
| array | No | Stop sequences (max 4) |
| integer | No | Random seed for reproducibility |
| array | No | Function/tool definitions |
| string | No | Tool selection strategy |
| object | No | |
{
"id": "gen-...",
"model": "meta-llama-3-8B-instruct",
"object": "chat.completion",
"created": 1771658583,
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "Hello! How can I help you today?"
}
}
],
"usage": {
"prompt_tokens": 17,
"completion_tokens": 10,
"total_tokens": 27
}
}def safe_chat(message, api_key, model="meta-llama-3-8B-instruct"):
"""Chat with error handling."""
try:
response = requests.post(
"https://modelslab.com/api/v7/llm/chat/completions",
json={
"key": api_key,
"model_id": model,
"messages": [{"role": "user", "content": message}],
"max_tokens": 200
}
)
data = response.json()
if "choices" in data:
return data["choices"][0]["message"]["content"]
elif data.get("status") == "error":
raise Exception(data.get("message", "Unknown error"))
else:
raise Exception(f"Unexpected response: {data}")
except requests.exceptions.RequestException as e:
raise Exception(f"Network error: {e}")
try:
reply = safe_chat("Hello!", "your_api_key")
print(reply)
except Exception as e:
print(f"Chat failed: {e}")modelslab-model-discoverymodelslab-account-managementmodelslab-image-generation