Loading...
Loading...
Run, monitor, analyze, and debug LLM evaluations via nemo-evaluator-launcher. Covers running evaluations, checking status and live progress, debugging failed runs, exporting artifacts and logs, and analyzing results. ALWAYS triggers on mentions of running evaluations, checking progress, debugging failed evals, analyzing or analysing runs or results, run directories or artifact paths on clusters, Slurm job issues, invocation IDs, or inspecting logs (client logs, server logs, SSH to cluster, tail logs, grep logs). Do NOT use for creating or modifying evaluation configs.
npx skill4agent add nvidia/skills launching-evals# Run evaluation
uv run nemo-evaluator-launcher run --config <path.yaml>
uv run nemo-evaluator-launcher run --config <path.yaml> -t <a_single_task_to_be_run_by_name>
uv run nemo-evaluator-launcher run --config <path.yaml> -t <task_name_1> -t <task_name_2> ...
uv run nemo-evaluator-launcher run --config <path.yaml> -o evaluation.nemo_evaluator_config.config.params.limit_samples=10 ...
# Preview the resolved config and the sbatch script without running the evaluation
uv run nemo-evaluator-launcher run --config <path.yaml> --dry-run
# Check status (--json for machine-readable output)
uv run nemo-evaluator-launcher status <invocation_id> --json
# Get evaluation run info (output paths, slurm job IDs, cluster hostname, etc.)
uv run nemo-evaluator-launcher info <invocation_id>
# Copy just the logs (quick — good for debugging)
uv run nemo-evaluator-launcher info <invocation_id> --copy-logs ./evaluation-results/
# For artifacts: use `nel info` to discover paths. If remote, SSH to explore and rsync what you need.
# If local, just read directly from the paths shown by `nel info`.
# ssh <user>@<hostname> "ls <artifacts_path>/"
# rsync -avzP <user>@<hostname>:<artifacts_path>/{results.yml,eval_factory_metrics.json,config.yml} ./evaluation-results/<invocation_id>.<job_index>/artifacts/
# Resume a failed/interrupted run (re-sbatches existing run.sub in the original run directory)
uv run nemo-evaluator-launcher resume <invocation_id>
# List past runs
uv run nemo-evaluator-launcher ls runs --since 1d
# List available evaluation tasks (by default, only shows tasks from the latest released containers)
uv run nemo-evaluator-launcher ls tasks
uv run nemo-evaluator-launcher ls tasks --from_container nvcr.io/nvidia/eval-factory/simple-evals:26.03nel-assistantconfig.ymlreferences/run-evaluation.mdnel runreferences/check-progress.mdSUCCESSreferences/analyze-results.mdFAILEDreferences/debug-failed-runs.mdreferences/benchmarks/accountcoreai_dlalgo_compevalcoreai_dlalgo_llmHF_HUB_OFFLINE=1python3 -m venv hf_cli && source hf_cli/bin/activate && pip install huggingface_hubHF_HOME=/lustre/fsw/portfolios/coreai/users/<username>/cache/huggingface hf download <model>LocalEntryNotFoundErrordata_parallel_sizedp_size=1num_nodes=8dp_sizepayload_modifierparams_to_remove[max_tokens, max_completion_tokens]python:3.12-slimgitauto_export.launcher_install_cmdapt-get update -qq && apt-get install -qq -y git && pip install "nemo-evaluator-launcher[all] @ git+...#subdirectory=packages/nemo-evaluator-launcher"nemo-evaluator-launcher export --dest localprocessed_results.json--copy_logs--copy-artifactsnel info --copy-artifactsnel infonel info