Loading...
Loading...
Use when the user wants to create a dataset, generate synthetic data, or build a data generation pipeline.
npx skill4agent add nvidia-nemo/datadesigner data-designerworkflows/interactive.mdworkflows/autopilot.mdreferences/seed-datasets.mdreferences/person-sampling.mdsampler_type="category"params=dd.CategorySamplerParams(...)promptsystem_promptexpr{{ column_name }}{{ column_name.field }}SamplerColumnConfigparamssampler_paramsLLMJudgeColumnConfig{reasoning: str, score: int}.scorequalitycorrectness{{ quality.correctness.score }}{{ quality.correctness }}data-designerdata-designerload_config_builder()DataDesignerConfigBuildercustomer_reviews.py# /// script
# dependencies = [
# "data-designer", # always required
# "pydantic", # only if this script imports from pydantic
# # add additional dependencies here
# ]
# ///
import data_designer.config as dd
from pydantic import BaseModel, Field
# Use Pydantic models when the output needs to conform to a specific schema
class MyStructuredOutput(BaseModel):
field_one: str = Field(description="...")
field_two: int = Field(description="...")
# Use custom generators when built-in column types aren't enough
@dd.custom_column_generator(
required_columns=["col_a"],
side_effect_columns=["extra_col"],
)
def generator_function(row: dict) -> dict:
# add custom logic here that depends on "col_a" and update row in place
row["name_in_custom_column_config"] = "custom value"
row["extra_col"] = "extra value"
return row
def load_config_builder() -> dd.DataDesignerConfigBuilder:
config_builder = dd.DataDesignerConfigBuilder()
# Seed dataset (only if the user explicitly mentions a seed dataset path)
# config_builder.with_seed_dataset(dd.LocalFileSeedSource(path="path/to/seed.parquet"))
# config_builder.add_column(...)
# config_builder.add_processor(...)
return config_builder