Loading...
Loading...
Build and deploy applications on inference.sh. Use when getting started, understanding the platform, creating apps, configuring resources, or needing an overview of inference.sh app development. Supports both Python and Node.js. Triggers: inference.sh app, belt app, inf.yml, inference.py, inference.js, deploy app, app development, build app, create app, GPU app, VRAM, app resources, app secrets, app integrations, multi-function app
npx skill4agent add inference-sh-skills/skills building-inferencesh-appsinf.ymlinference.pyinference.js__init__.pypackage.jsonbelt app initPROVIDER_STRUCTURE.mdoutput_metaBaseAppOutputBaseModelBaseModeloutput_metacdbeltcdself.logger.info(...)run()__init__.py__init__.pyfrom .inference import Appfrom .shared_helper import funcprovider/shared_helper.pyprovider/app-name/shared_helper.py -> ../shared_helper.pyprovider/app-name/__init__.py__init__.pycurl -fsSL https://cli.inference.sh | shbelt update # Update CLI
belt login # Authenticate
belt me # Check current userbelt app initinf.yml"type": "module"package.jsonbelt app init my-app # Create app (interactive)
belt app init my-app --lang node # Create Node.js appbelt app init my-appinference.pyinference.jsinf.ymlrequirements.txtpackage.jsoncd my-app # ALWAYS cd into app dir first
belt app test --save-example # Generate sample input from schema
belt app test # Run with input.json
belt app test --input '{"prompt": "hello"}' # Or inline JSONcd my-app # cd again — cwd doesn't persist
belt app deploy --dry-run # Validate first
belt app deploy # Deploy for realoutput_metabelt app run user/app --json --input '{"prompt": "hello"}'output_metaBaseModelBaseAppOutput# Other useful commands
belt app run user/app --input input.json
belt app sample user/app
belt app sample user/app --save input.jsonfrom inferencesh import BaseApp, BaseAppInput, BaseAppOutput
from pydantic import Field
class AppSetup(BaseAppInput):
"""Setup parameters — triggers re-init when changed"""
model_id: str = Field(default="gpt2", description="Model to load")
class AppInput(BaseAppInput):
prompt: str = Field(description="Input prompt")
class AppOutput(BaseAppOutput):
result: str = Field(description="Output result")
class App(BaseApp):
async def setup(self, config: AppSetup):
"""Runs once when worker starts or config changes"""
self.model = load_model(config.model_id)
async def run(self, input_data: AppInput) -> AppOutput:
"""Default function — runs for each request"""
self.logger.info(f"Processing prompt: {input_data.prompt[:50]}")
result = self.model.generate(input_data.prompt)
self.logger.info("Generation complete")
return AppOutput(result=result)
async def unload(self):
"""Cleanup on shutdown"""
pass
async def on_cancel(self):
"""Called when user cancels — for long-running tasks"""
return Trueimport { z } from "zod";
export const AppSetup = z.object({
modelId: z.string().default("gpt2").describe("Model to load"),
});
export const RunInput = z.object({
prompt: z.string().describe("Input prompt"),
});
export const RunOutput = z.object({
result: z.string().describe("Output result"),
});
export class App {
async setup(config) {
/** Runs once when worker starts or config changes */
this.model = loadModel(config.modelId);
}
async run(inputData) {
/** Default function — runs for each request */
return { result: "done" };
}
async unload() {
/** Cleanup on shutdown */
}
async onCancel() {
/** Called when user cancels — for long-running tasks */
return true;
}
}{PascalName}Input{PascalName}Output_setupunloadon_cancelonCancelconstructor"function": "method_name"default_functioninf.ymlrunimport os
import httpx
from inferencesh import BaseApp, BaseAppInput, BaseAppOutput, File
from inferencesh.models.usage import OutputMeta, ImageMeta # or TextMeta, AudioMeta, etc.
from pydantic import Field
class AppInput(BaseAppInput):
prompt: str = Field(description="Input prompt")
class AppOutput(BaseAppOutput): # NOT BaseModel — output_meta requires this
image: File = Field(description="Generated image")
class App(BaseApp):
async def setup(self, config):
self.api_key = os.environ["API_KEY"]
self.client = httpx.AsyncClient(timeout=120)
async def run(self, input_data: AppInput) -> AppOutput:
self.logger.info(f"Calling API with prompt: {input_data.prompt[:80]}")
response = await self.client.post(
"https://api.example.com/generate",
headers={"Authorization": f"Bearer {self.api_key}"},
json={"prompt": input_data.prompt},
)
response.raise_for_status()
# Write output file
output_path = "/tmp/output.png"
with open(output_path, "wb") as f:
f.write(response.content)
# Read actual dimensions (don't hardcode!)
from PIL import Image
with Image.open(output_path) as img:
width, height = img.size
self.logger.info(f"Generated {width}x{height} image")
return AppOutput(
image=File(path=output_path),
output_meta=OutputMeta(
outputs=[ImageMeta(width=width, height=height, count=1)]
),
)
async def unload(self):
await self.client.aclose()my-app/
├── inf.yml # Configuration
├── inference.py # App logic
├── requirements.txt # Python packages (pip)
└── packages.txt # System packages (apt) — optionalmy-app/
├── inf.yml # Configuration
├── src/
│ └── inference.js # App logic
├── package.json # Node.js packages (npm/pnpm)
└── packages.txt # System packages (apt) — optionalname: my-app
description: What my app does
category: image
kernel: python-3.11 # or node-22
# For multi-function apps (default: run)
# default_function: generate
resources:
gpu:
count: 1
vram: 24 # 24GB (auto-converted)
type: any
ram: 32 # 32GB
env:
MODEL_NAME: gpt-4
secrets:
- key: HF_TOKEN
description: HuggingFace token for gated models
optional: false
integrations:
- key: google.sheets
description: Access to Google Sheets
optional: true80anynvidiaamdapplenoneNote: Currently only NVIDIA CUDA GPUs are supported.
imagevideoaudiotextchat3dotherresources:
gpu:
count: 0
type: none
ram: 4requirements.txttorch>=2.0
transformers
acceleratepackage.json{
"type": "module",
"dependencies": {
"zod": "^3.23.0",
"sharp": "^0.33.0"
}
}packages.txtffmpeg
libgl1-mesa-glx| Type | Image |
|---|---|
| GPU | |
| CPU | |
acceleratetorch.cuda.is_available()from accelerate import Accelerator
accelerator = Accelerator()
self.device = accelerator.device.to(device)device_mapself.model = SomeModel.from_pretrained("org/model")
self.model = self.model.to(device=self.device, dtype=torch.float16)acceleraterequirements.txt