Run inference on thousands of models through Hugging Face Inference Providers directly from the command line.
curl -LsSf https://hf.co/cli/install.sh | bash
hf extensions install hf-inferencehf inference run "What is the capital of France?" --model Qwen/Qwen3.5-35B-A3B
hf inference run "Explain quicksort" --model moonshotai/Kimi-K2.5 --stream
hf inference run "Translate to French: hello world" --model Qwen/Qwen3.5-35B-A3B --provider cheapestPick a specific provider or routing policy with --provider:
hf inference run "Hello" --model moonshotai/Kimi-K2.5 --provider novita
hf inference run "Hello" --model Qwen/Qwen3.5-35B-A3B --provider cheapest
hf inference run "Hello" --model Qwen/Qwen3.5-35B-A3B --provider fastestPipe input via stdin:
cat article.txt | hf inference run --model Qwen/Qwen3.5-35B-A3B --system-prompt "Summarize this"hf inference list
hf inference list --provider novita
hf inference list --search qwen -n 5
hf inference list --format json
hf inference list -q # model IDs onlyhf inference info moonshotai/Kimi-K2.5PROVIDER STATUS CONTEXT INPUT $/M OUTPUT $/M TOOLS STRUCTURED
------------ ------ ------- --------- ---------- ----- ----------
fireworks-ai live 262144 yes no
novita live 262144 0.6 3 yes no
together live 262144 0.5 2.8 yes no
| Variable | Purpose |
|---|---|
HF_TOKEN |
Hugging Face API token (also reads from ~/.cache/huggingface/token) |
