Skip to content
9 changes: 9 additions & 0 deletions vllm/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,15 @@ def __post_init__(
architectures, self.runner_type, self.convert
)

if (
is_pooling_model
and not is_generative_model
and self.runner_type in ("draft", "generate")
):
raise ValueError(
f"Embedding models do not support `--runner {self.runner_type}`. "
"Use `--runner pooling` or `--runner auto` for embedding models."
)
if self.runner_type == "generate" and not is_generative_model:
generate_converts = _RUNNER_CONVERTS["generate"]
if self.convert_type not in generate_converts:
Expand Down
Loading