{"models":[{"id":"groq-qwen","provider":"groq","name":"Qwen3-32B","wave9_quality":1,"cost_per_1k_tokens":0,"best_for":["classification","creative","structured-output"],"latency_class":"ultra-fast","tier_required":"starter","notes":"Empirically best quality at $0 cost. Default for classification tasks."},{"id":"claude-sonnet-4-6","provider":"anthropic","name":"Claude Sonnet 4.6","wave9_quality":0.927,"cost_per_1k_tokens":0.071,"best_for":["complex","multi-file","architectural-decisions"],"latency_class":"medium","tier_required":"scale","notes":"Refusal-corrected #1 quality. Reserved for complex tasks."},{"id":"groq-llama4","provider":"groq","name":"Llama 4","wave9_quality":0.92,"cost_per_1k_tokens":0,"best_for":["research","factual-retrieval","summarization"],"latency_class":"fast","tier_required":"starter","notes":"Free research-grade model. Strong on factual tasks."},{"id":"claude-haiku-4-5","provider":"anthropic","name":"Claude Haiku 4.5","wave9_quality":0.887,"cost_per_1k_tokens":0.004,"best_for":["routine","simple-generation","fast-response"],"latency_class":"fast","tier_required":"starter","notes":"Best Anthropic model for latency-sensitive routine tasks."},{"id":"deepseek","provider":"deepseek","name":"DeepSeek-Chat","wave9_quality":0.881,"cost_per_1k_tokens":0.0005,"best_for":["code-edit","file-patch","code-review"],"latency_class":"fast","tier_required":"starter","notes":"Best cost-performance for code tasks at $0.0005/1K tokens."},{"id":"deepseek-r1","provider":"deepseek","name":"DeepSeek-R1","wave9_quality":0.874,"cost_per_1k_tokens":0.004,"best_for":["reasoning","multi-step-deduction","math"],"latency_class":"medium","tier_required":"pro","notes":"27× cheaper than Opus for reasoning tasks."}],"updated_at":"2026-05-09","benchmark":"NEXUS-Bench v0 wave-9","note":"Quality scores are empirical Brier-calibrated measurements, not marketing claims.","routing_default":"Pareto-optimal: cheapest model meeting quality_threshold for task_type"}