Loading...
Loading...
Self-hosted Firecrawl deployment, troubleshooting, and best practices. TRIGGERS - firecrawl, self-hosted scraping, web scrape, scraper wrapper, littleblack, ZeroTier scraping.
npx skill4agent add terrylica/cc-skills firecrawl-self-hosted┌─────────────────────────────────────────────────────────────────┐
│ LittleBlack (172.25.236.1) │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Client │───▶│ Scraper │───▶│ Firecrawl │ │
│ │ (curl) │ │ Wrapper :3003│ │ API :3002 │ │
│ └──────────────┘ └──────────────┘ └──────────────┘ │
│ │ │ │ │
│ │ │ ▼ │
│ │ │ ┌──────────────┐ │
│ │ │ │ Playwright │ │
│ │ │ │ Service │ │
│ │ │ └──────────────┘ │
│ │ │ │ │
│ │ ▼ ▼ │
│ │ ┌──────────────┐ ┌──────────────┐ │
│ │ │ Caddy :8080 │ │ Redis │ │
│ │ │ (files) │ │ RabbitMQ │ │
│ ▼ └──────────────┘ └──────────────┘ │
│ ┌──────────────┐ │
│ │ Output URL │◀── http://172.25.236.1:8080/NAME-TS.md │
│ └──────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘| Port | Service | Type | Purpose |
|---|---|---|---|
| 3002 | Firecrawl API | Docker | Core scraping engine |
| 3003 | Scraper Wrapper | Bun | Saves to file, returns URL |
| 8080 | Caddy | Binary | Serves saved markdown |
curl "http://172.25.236.1:3003/scrape?url=URL&name=NAME"{
"url": "http://172.25.236.1:8080/NAME-TIMESTAMP.md",
"file": "NAME-TIMESTAMP.md"
}curl -s -X POST http://172.25.236.1:3002/v1/scrape \
-H "Content-Type: application/json" \
-d '{"url":"URL","formats":["markdown"],"waitFor":5000}' \
| jq -r '.data.markdown'# All containers running?
ssh littleblack 'docker ps --filter "name=firecrawl" --format "{{.Names}}: {{.Status}}"'
# API responding?
ssh littleblack 'curl -s -o /dev/null -w "%{http_code}" http://localhost:3002/v1/scrape'
# Expected: 401 (no payload) or 200 (with payload)
# Wrapper responding?
curl -s -o /dev/null -w "%{http_code}" "http://172.25.236.1:3003/health"# systemd services
ssh littleblack "systemctl --user status firecrawl firecrawl-scraper caddy-firecrawl"
# Docker container details
ssh littleblack 'docker ps -a --filter "name=firecrawl" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"'
# Logs (live)
ssh littleblack "journalctl --user -u firecrawl -u firecrawl-scraper -u caddy-firecrawl -f"no# Check container status
ssh littleblack 'docker ps -a --filter "name=firecrawl"'
# Check restart policy
ssh littleblack 'docker inspect --format "{{.Name}}: {{.HostConfig.RestartPolicy.Name}}" $(docker ps -a --filter "name=firecrawl" -q)'restart: unless-stoppeddocker-compose.yaml# ~/firecrawl/docker-compose.yaml
x-common-service: &common-service
networks:
- backend
restart: unless-stopped # CRITICAL: Add this line
logging:
driver: "json-file"
options:
max-size: "1G"
max-file: "4"
services:
playwright-service:
<<: *common-service
# ... rest of config
api:
<<: *common-service
# ... rest of config
redis:
<<: *common-service
# ... rest of config
rabbitmq:
<<: *common-service
# ... rest of configssh littleblack 'cd ~/firecrawl && docker compose up -d --force-recreate'ssh littleblack 'docker inspect --format "{{.Name}}: RestartPolicy={{.HostConfig.RestartPolicy.Name}}" $(docker ps -a --filter "name=firecrawl" -q)'
# All should show: RestartPolicy=unless-stoppedssh littleblack "systemctl --user status firecrawl-scraper"ssh littleblack "systemctl --user restart firecrawl-scraper"ssh littleblack "systemctl --user status caddy-firecrawl"
curl -I http://172.25.236.1:8080/ssh littleblack "systemctl --user restart caddy-firecrawl"# From local machine
ping 172.25.236.1
# Check ZeroTier status
zerotier-cli listnetworkscd ~
git clone https://github.com/mendableai/firecrawl.git
cd firecrawlx-common-service: &common-service
networks:
- backend
restart: unless-stopped # <-- ADD THIS
logging:
driver: "json-file"
options:
max-size: "1G"
max-file: "4"services:
api:
<<: *common-service
# ...
playwright-service:
<<: *common-service
# ...
redis:
<<: *common-service
# ...
rabbitmq:
<<: *common-service
# ....envcp .env.example .env# .env
NUM_WORKERS_PER_QUEUE=2
PORT=3002
HOST=0.0.0.0
REDIS_URL=redis://redis:6379
REDIS_RATE_LIMIT_URL=redis://redis:6379docker compose up -ddocker inspect --format "{{.Name}}: RestartPolicy={{.HostConfig.RestartPolicy.Name}}" \
$(docker ps -a --filter "name=firecrawl" -q)unless-stopped~/firecrawl-scraper.tsimport { serve } from "bun";
import { $ } from "bun";
const FIRECRAWL_API = "http://localhost:3002";
const OUTPUT_DIR = "/home/kab/firecrawl-output";
serve({
port: 3003,
async fetch(req) {
const url = new URL(req.url);
if (url.pathname === "/health") {
return new Response("OK", { status: 200 });
}
if (url.pathname === "/scrape") {
const targetUrl = url.searchParams.get("url");
const name = url.searchParams.get("name") || "scraped";
if (!targetUrl) {
return Response.json(
{ error: "url parameter required" },
{ status: 400 },
);
}
const response = await fetch(`${FIRECRAWL_API}/v1/scrape`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
url: targetUrl,
formats: ["markdown"],
waitFor: 5000,
}),
});
const data = await response.json();
const markdown = data?.data?.markdown;
if (!markdown) {
return Response.json(
{ error: "No markdown returned" },
{ status: 500 },
);
}
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
const filename = `${name}-${timestamp}.md`;
const filepath = `${OUTPUT_DIR}/${filename}`;
await Bun.write(filepath, markdown);
return Response.json({
url: `http://172.25.236.1:8080/${filename}`,
file: filename,
});
}
return new Response("Not Found", { status: 404 });
},
});~/.config/systemd/user/firecrawl-scraper.service[Unit]
Description=Firecrawl Scraper Wrapper
After=network.target
[Service]
Type=simple
WorkingDirectory=/home/kab
ExecStart=/home/kab/.bun/bin/bun run firecrawl-scraper.ts
Restart=always
RestartSec=5
[Install]
WantedBy=default.targetsystemctl --user daemon-reload
systemctl --user enable --now firecrawl-scraper# Download and extract (check releases for current version)
wget https://github.com/caddyserver/caddy/releases/download/v<version>/caddy_<version>_linux_amd64.tar.gz # SSoT-OK
tar xzf caddy_*.tar.gz
chmod +x caddy~/.config/systemd/user/caddy-firecrawl.service[Unit]
Description=Caddy Firecrawl File Server
After=network.target
[Service]
Type=simple
WorkingDirectory=/home/kab
ExecStart=/home/kab/caddy file-server --root /home/kab/firecrawl-output --listen :8080 --browse
Restart=always
RestartSec=5
[Install]
WantedBy=default.targetsystemctl --user daemon-reload
systemctl --user enable --now caddy-firecrawlrestart: unless-stoppednoservices:
api:
image: firecrawl/api
# Missing restart policy = container dies and stays deadservices:
api:
image: firecrawl/api
restart: unless-stopped # Auto-restart on crash or signalrestart: unless-stoppedx-common-service: &common-service
restart: unless-stopped
logging:
driver: "json-file"
options:
max-size: "1G"
max-file: "4"
services:
api:
<<: *common-service
# ...docker compose up -ddocker inspect --format "{{.Name}}: {{.HostConfig.RestartPolicy.Name}}" \
$(docker ps -a --filter "name=firecrawl" -q)Restart=always[Service]
Restart=always
RestartSec=5# Add to crontab
*/5 * * * * curl -sf http://localhost:3002/health || systemctl --user restart firecrawl| Path on LittleBlack | Purpose |
|---|---|
| Firecrawl Docker deployment |
| Docker orchestration (EDIT THIS) |
| Environment configuration |
| Bun wrapper script |
| Saved markdown files (Caddy root) |
| Caddy binary |
| User systemd services |
# Full restart (all services)
ssh littleblack 'cd ~/firecrawl && docker compose restart'
ssh littleblack 'systemctl --user restart firecrawl-scraper caddy-firecrawl'
# Check everything
ssh littleblack 'docker ps --filter "name=firecrawl" && systemctl --user status firecrawl-scraper caddy-firecrawl --no-pager'
# Logs (last 100 lines)
ssh littleblack 'docker logs firecrawl-api-1 --tail 100'
ssh littleblack 'journalctl --user -u firecrawl-scraper --no-pager -n 100'
# Force recreate with new config
ssh littleblack 'cd ~/firecrawl && docker compose up -d --force-recreate'
# Verify restart policies
ssh littleblack 'docker inspect --format "{{.Name}}: RestartPolicy={{.HostConfig.RestartPolicy.Name}}" $(docker ps -a --filter "name=firecrawl" -q)'