Loading...
Loading...
Create ShinkaEvolve task scaffolds from a target directory and task description, producing `evaluate.py` and `initial.<ext>` (multi-language). Use when asked to set up new ShinkaEvolve tasks, evaluation harnesses, or baseline programs for ShinkaEvolve.
npx skill4agent add sakanaai/shinkaevolve shinka-setupevaluate.pyinitial.<ext>initial.<ext>evaluate.pyinitial.<ext>initial.<ext>EVOLVE-BLOCKevaluate.pyinitial.pyrun_shinka_evalexperiment_fn_nameget_experiment_kwargsaggregate_metrics_fnnum_runsvalidate_fninitial.<ext>subprocessmetrics.jsoncorrect.jsonevaluate.pypython evaluate.py --program_path initial.<ext> --results_dir /tmp/shinka_eval_smokedictaggregate_fnmetrics.jsoncombined_scorepublicdictprivatedictextra_datadicttext_feedbackcorrect.jsoncorrecterrorshinka-runrun_evo.pyshinka.yamlinit_program_pathshinka-runcombined_score| File | Purpose |
|---|---|
| Starting solution in the chosen language with an evolve region that LLMs mutate |
| Scores candidates and emits metrics/correctness outputs that guide selection |
| (Optional) Launches the evolution loop |
| (Optional) Config: generations, islands, LLM models, patch types, etc. |
# Check if shinka is available in workspace environment
python -c "import shinka"
# If not; install from PyPI
pip install shinka-evolve
# Or with uv
uv pip install shinka-evolveinitial.<ext> | |
|---|---|
| |
| |
| |
| |
| |
| |
| |
evaluate.pyrun_shinka_evalexperiment_fn_namesubprocessmetrics.jsoncorrect.jsonevo_config.languageevo_config.init_program_pathinitial.<ext>import random
# EVOLVE-BLOCK-START
def advanced_algo():
# Implement the evolving algorithm here.
return 0.0, ""
# EVOLVE-BLOCK-END
def solve_problem(params):
return advanced_algo()
def run_experiment(random_seed: int | None = None, **kwargs):
"""Main entrypoint called by evaluator."""
if random_seed is not None:
random.seed(random_seed)
score, text = solve_problem(kwargs)
return float(score), textinitial.<ext>evaluate.pyevaluate.pyrun_shinka_evalimport argparse
import numpy as np
from shinka.core import run_shinka_eval # required for results storage
def get_kwargs(run_idx: int) -> dict:
return {"random_seed": int(np.random.randint(0, 1_000_000_000))}
def aggregate_fn(results: list) -> dict:
scores = [r[0] for r in results]
texts = [r[1] for r in results if len(r) > 1]
combined_score = float(np.mean(scores))
text = texts[0] if texts else ""
return {
"combined_score": combined_score,
"public": {},
"private": {},
"extra_data": {},
"text_feedback": text,
}
def validate_fn(result):
# Return (True, None) or (False, "reason")
return True, None
def main(program_path: str, results_dir: str):
metrics, correct, err = run_shinka_eval(
program_path=program_path,
results_dir=results_dir,
experiment_fn_name="run_experiment",
num_runs=3,
get_experiment_kwargs=get_kwargs,
aggregate_metrics_fn=aggregate_fn,
validate_fn=validate_fn, # Optional
)
if not correct:
raise RuntimeError(err or "Evaluation failed")
if __name__ == "__main__":
# argparse program path & dir
parser = argparse.ArgumentParser()
parser.add_argument("--program_path", required=True)
parser.add_argument("--results_dir", required=True)
args = parser.parse_args()
main(program_path=args.program_path, results_dir=args.results_dir)evaluate.pyinitial.<ext>import argparse
import json
import os
from pathlib import Path
def main(program_path: str, results_dir: str):
os.makedirs(results_dir, exist_ok=True)
# 1) Execute candidate program_path (subprocess / runtime-specific call)
# 2) Compute task metrics + correctness
metrics = {
"combined_score": 0.0,
"public": {},
"private": {},
"extra_data": {},
"text_feedback": "",
}
correct = False
error = ""
(Path(results_dir) / "metrics.json").write_text(
json.dumps(metrics, indent=2), encoding="utf-8"
)
(Path(results_dir) / "correct.json").write_text(
json.dumps({"correct": correct, "error": error}, indent=2), encoding="utf-8"
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--program_path", required=True)
parser.add_argument("--results_dir", required=True)
args = parser.parse_args()
main(program_path=args.program_path, results_dir=args.results_dir)run_evo.pyskills/shinka-setup/scripts/run_evo.pyshinka.yamlskills/shinka-setup/scripts/shinka.yamlcombined_scorepublicprivateextra_datatext_feedbackexperiment_fn_nameinitial.pyinitial.<ext>combined_score