Configure generators¶
Goal: pick and configure a generator implementation for your run.
When to use this:
Use this guide when generation is the main variable you are changing and you already know the task you want to run.
Procedure¶
Use this chooser when generation is the variable you are changing and the rest of the runtime should remain stable.
flowchart TD
A["What should produce the candidate?"] --> B{"Deterministic local example?"}
B -->|Yes| C["builtin/demo_generator"]
B -->|No| D{"External model or graph runtime?"}
D -->|Yes| E["Provider adapter"]
D -->|No| F["Custom Generator"]
The choice is mainly about where candidate production lives, not whether Themis still owns fan-out, storage, and inspection.
Use the builtin demo generator for deterministic tutorials, smoke tests, and local examples.
Use provider adapters when Themis should still own fan-out, reduction, storage, and inspection, but an external model or graph runtime should produce the candidate output.
Prompt-focused experiments:
- set
GenerationConfig.prompt_specwhen you want prompt instructions, prefixes, suffixes, or generic prompt blocks to be part of the experiment identity PromptSpec.blocksis intentionally generic prompt material, not an example-specific feature- prompt specs flow into
GenerateContext, so custom generators can consume them directly - provider-backed adapters such as OpenAI also consume prompt specs, which means prompt changes invalidate generation-stage cache reuse as expected
Review these example sources:
from __future__ import annotations
from themis import Experiment
from themis.core.config import EvaluationConfig, GenerationConfig, StorageConfig
from themis.core.models import Case, Dataset, GenerationResult
class CustomGenerator:
"""Small example generator that satisfies the Generator protocol."""
component_id = "generator/custom_example"
version = "1.0"
def fingerprint(self) -> str:
return "custom-example-generator"
async def generate(self, case: Case, ctx: object) -> GenerationResult:
del ctx
return GenerationResult(
candidate_id=f"{case.case_id}-candidate",
final_output={"answer": "4"},
)
def run_example() -> dict[str, object]:
"""Execute an experiment with a custom generator instance."""
experiment = Experiment(
generation=GenerationConfig(generator=CustomGenerator()),
evaluation=EvaluationConfig(
metrics=["builtin/exact_match"], parsers=["builtin/json_identity"]
),
storage=StorageConfig(store="memory"),
datasets=[
Dataset(
dataset_id="sample",
cases=[
Case(
case_id="case-1",
input={"question": "2+2"},
expected_output={"answer": "4"},
)
],
)
],
)
result = experiment.run()
return {"run_id": result.run_id, "status": result.status.value}
if __name__ == "__main__":
print(run_example())
from __future__ import annotations
from types import SimpleNamespace
from themis import Experiment, InMemoryRunStore
from themis.adapters import openai
from themis.core.config import EvaluationConfig, GenerationConfig, StorageConfig
from themis.core.models import Case, Dataset
class _FakeResponses:
async def create(self, **kwargs: object) -> object:
return SimpleNamespace(
id="resp-1",
output_text="4",
usage=SimpleNamespace(input_tokens=3, output_tokens=1),
headers={"x-ratelimit-limit-requests": "60"},
model_dump=lambda mode="json": {"request": kwargs, "output_text": "4"},
)
class _FakeClient:
@property
def responses(self) -> _FakeResponses:
return _FakeResponses()
def run_example() -> dict[str, object]:
"""Execute the OpenAI adapter against a fake injected client."""
store = InMemoryRunStore()
generator = openai(
"gpt-fake",
client=_FakeClient(),
instructions="Answer with only the final number.",
)
experiment = Experiment(
generation=GenerationConfig(generator=generator),
evaluation=EvaluationConfig(),
storage=StorageConfig(store="memory"),
datasets=[
Dataset(
dataset_id="sample",
cases=[Case(case_id="case-1", input={"question": "2+2"})],
)
],
seeds=[7],
)
result = experiment.run(store=store)
if not result.cases:
raise RuntimeError("OpenAI example expected at least one case result")
if not result.cases[0].generated_candidates:
raise RuntimeError("OpenAI example expected at least one generated candidate")
candidate = result.cases[0].generated_candidates[0]
artifact_keys = [] if candidate.artifacts is None else sorted(candidate.artifacts)
return {
"run_id": result.run_id,
"status": result.status.value,
"artifact_keys": artifact_keys,
}
if __name__ == "__main__":
print(run_example())
from __future__ import annotations
from types import SimpleNamespace
from themis import Experiment
from themis.adapters import vllm
from themis.core.config import EvaluationConfig, GenerationConfig, StorageConfig
from themis.core.models import Case, Dataset
class _FakeResponses:
async def create(self, **kwargs: object) -> object:
return SimpleNamespace(
id="vllm-resp-1",
output_text="4",
usage=SimpleNamespace(input_tokens=3, output_tokens=1),
headers={"x-ratelimit-limit-requests": "120"},
model_dump=lambda mode="json": {"request": kwargs, "output_text": "4"},
)
class _FakeChatCompletions:
async def create(self, **kwargs: object) -> object:
return SimpleNamespace(
id="chat-1",
choices=[SimpleNamespace(message=SimpleNamespace(content="4"))],
usage=SimpleNamespace(prompt_tokens=3, completion_tokens=1),
headers={"x-ratelimit-limit-requests": "120"},
model_dump=lambda mode="json": {
"request": kwargs,
"choices": [{"message": {"content": "4"}}],
},
)
class _FakeChat:
@property
def completions(self) -> _FakeChatCompletions:
return _FakeChatCompletions()
class _FakeClient:
@property
def responses(self) -> _FakeResponses:
return _FakeResponses()
@property
def chat(self) -> _FakeChat:
return _FakeChat()
def run_example() -> dict[str, object]:
"""Execute the vLLM adapter against a fake injected client."""
generator = vllm(
"fake-vllm",
base_url="http://localhost:8000/v1",
client=_FakeClient(),
api_mode="chat_completions",
)
experiment = Experiment(
generation=GenerationConfig(generator=generator),
evaluation=EvaluationConfig(),
storage=StorageConfig(store="memory"),
datasets=[
Dataset(
dataset_id="sample",
cases=[Case(case_id="case-1", input={"question": "2+2"})],
)
],
seeds=[7],
)
result = experiment.run()
return {
"run_id": result.run_id,
"status": result.status.value,
"api_mode": generator.api_mode,
}
if __name__ == "__main__":
print(run_example())
from __future__ import annotations
from themis import Experiment
from themis.adapters import langgraph
from themis.core.config import EvaluationConfig, GenerationConfig, StorageConfig
from themis.core.models import Case, Dataset
class _FakeGraph:
async def ainvoke(self, payload: object) -> object:
return {"answer": "4", "input": payload}
async def astream_events(self, payload: object, *, version: str):
del version
yield {
"name": "plan",
"event": "step",
"data": {"input": payload, "output": {"proposed_answer": "4"}},
}
def run_example() -> dict[str, object]:
"""Execute the LangGraph adapter against a fake graph."""
generator = langgraph(_FakeGraph(), graph_id="fake-graph", output_key="answer")
experiment = Experiment(
generation=GenerationConfig(generator=generator),
evaluation=EvaluationConfig(),
storage=StorageConfig(store="memory"),
datasets=[
Dataset(
dataset_id="sample",
cases=[Case(case_id="case-1", input={"question": "2+2"})],
)
],
seeds=[7],
)
result = experiment.run()
trace_steps = len(result.cases[0].generated_candidates[0].trace or [])
return {
"run_id": result.run_id,
"status": result.status.value,
"trace_steps": trace_steps,
}
if __name__ == "__main__":
print(run_example())
Provider-backed examples should be runnable with injected fake clients in docs/tests, even when real deployments need optional extras.
Variants¶
| Variant | Best when | Tradeoff | Related APIs / commands |
|---|---|---|---|
| Builtin deterministic runs | You want tutorials, smoke tests, or fixture-backed examples without external providers | Not representative of production model behavior | builtin/demo_generator |
| Provider-backed generation | An external endpoint or graph runtime should generate outputs while Themis owns the rest of the run | Requires provider extras, clients, or service setup | themis.adapters.openai(...), themis.adapters.vllm(...), themis.adapters.langgraph(...) |
| Fully custom generation | Candidate production logic belongs entirely in your own code | Highest implementation effort | Generator |
| Prompt-only experiment change | The generator stays fixed and prompt material is the only experiment variable | Less useful when provider or generator behavior also needs to change | GenerationConfig.prompt_spec, PromptSpec.blocks |
Expected result¶
You should know which generator style matches your run and what prerequisites or optional extras are required.