openrouter-routing-rules
Implement intelligent model routing based on request characteristics. Use when optimizing for cost, speed, or quality per request. Trigger with phrases like 'openrouter routing', 'model selection', 'smart routing', 'dynamic model'.
Install
mkdir -p .claude/skills/openrouter-routing-rules && curl -L -o skill.zip "https://mcp.directory/api/skills/download/8027" && unzip -o skill.zip -d .claude/skills/openrouter-routing-rules && rm skill.zipInstalls to .claude/skills/openrouter-routing-rules
About this skill
OpenRouter Routing Rules
Overview
Beyond simple task-based model selection, production systems need configurable routing rules that consider user tier, cost budget, time of day, model availability, and feature requirements. This skill covers building a rules engine for OpenRouter model selection with config-driven rules, dynamic conditions, and override capabilities.
Rules Engine
import os, json, time
from dataclasses import dataclass
from typing import Optional, Callable
from openai import OpenAI
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ["OPENROUTER_API_KEY"],
default_headers={"HTTP-Referer": "https://my-app.com", "X-Title": "my-app"},
)
@dataclass
class RoutingContext:
user_tier: str = "free" # "free" | "basic" | "pro" | "enterprise"
task_type: str = "general" # "chat" | "code" | "analysis" | "classification"
budget_remaining: float = 0.0 # Remaining daily budget in dollars
prompt_tokens_est: int = 0 # Estimated prompt tokens
needs_tools: bool = False # Requires function calling
needs_vision: bool = False # Requires image input
max_latency_ms: int = 30000 # Latency SLA
@dataclass
class RoutingRule:
name: str
priority: int # Lower = higher priority
condition: Callable[[RoutingContext], bool]
model: str
fallbacks: list[str] = None
max_tokens: int = 1024
def matches(self, ctx: RoutingContext) -> bool:
try:
return self.condition(ctx)
except Exception:
return False
# Define rules in priority order
RULES = [
# Rule 1: Free users get free models only
RoutingRule(
name="free-tier",
priority=1,
condition=lambda ctx: ctx.user_tier == "free",
model="google/gemma-2-9b-it:free",
fallbacks=["meta-llama/llama-3.1-8b-instruct"],
max_tokens=512,
),
# Rule 2: Low budget → cheap models
RoutingRule(
name="low-budget",
priority=2,
condition=lambda ctx: ctx.budget_remaining < 1.0 and ctx.user_tier != "enterprise",
model="openai/gpt-4o-mini",
fallbacks=["meta-llama/llama-3.1-8b-instruct"],
max_tokens=512,
),
# Rule 3: Tool calling required → tool-capable models
RoutingRule(
name="tools-required",
priority=3,
condition=lambda ctx: ctx.needs_tools,
model="openai/gpt-4o",
fallbacks=["anthropic/claude-3.5-sonnet"],
),
# Rule 4: Vision required
RoutingRule(
name="vision-required",
priority=4,
condition=lambda ctx: ctx.needs_vision,
model="openai/gpt-4o",
fallbacks=["anthropic/claude-3.5-sonnet", "google/gemini-2.0-flash-001"],
),
# Rule 5: Code tasks → Claude
RoutingRule(
name="code-tasks",
priority=5,
condition=lambda ctx: ctx.task_type == "code",
model="anthropic/claude-3.5-sonnet",
fallbacks=["openai/gpt-4o"],
),
# Rule 6: Latency-sensitive → fast models
RoutingRule(
name="low-latency",
priority=6,
condition=lambda ctx: ctx.max_latency_ms < 3000,
model="openai/gpt-4o-mini",
fallbacks=["anthropic/claude-3-haiku"],
),
# Rule 7: Enterprise gets premium
RoutingRule(
name="enterprise-default",
priority=7,
condition=lambda ctx: ctx.user_tier == "enterprise",
model="anthropic/claude-3.5-sonnet",
fallbacks=["openai/gpt-4o", "openai/gpt-4o-mini"],
),
# Rule 8: Default catch-all
RoutingRule(
name="default",
priority=99,
condition=lambda ctx: True, # Always matches
model="openai/gpt-4o-mini",
fallbacks=["meta-llama/llama-3.1-8b-instruct"],
),
]
def evaluate_rules(ctx: RoutingContext) -> RoutingRule:
"""Find the first matching rule (sorted by priority)."""
sorted_rules = sorted(RULES, key=lambda r: r.priority)
for rule in sorted_rules:
if rule.matches(ctx):
return rule
return sorted_rules[-1] # Default catch-all
Config-Driven Rules (JSON)
RULES_CONFIG = {
"rules": [
{
"name": "free-tier",
"priority": 1,
"conditions": {"user_tier": "free"},
"model": "google/gemma-2-9b-it:free",
"max_tokens": 512,
},
{
"name": "code-pro",
"priority": 5,
"conditions": {"task_type": "code", "user_tier": ["pro", "enterprise"]},
"model": "anthropic/claude-3.5-sonnet",
"max_tokens": 2048,
},
{
"name": "default",
"priority": 99,
"conditions": {},
"model": "openai/gpt-4o-mini",
},
]
}
def match_config_rule(ctx: RoutingContext, rule_config: dict) -> bool:
"""Match a context against config-driven conditions."""
conditions = rule_config.get("conditions", {})
for key, expected in conditions.items():
actual = getattr(ctx, key, None)
if isinstance(expected, list):
if actual not in expected:
return False
elif actual != expected:
return False
return True
Routed Completion
def routed_completion(messages: list[dict], ctx: RoutingContext, **kwargs):
"""Execute completion with rule-based routing."""
rule = evaluate_rules(ctx)
extra_body = {}
if rule.fallbacks:
extra_body = {
"models": [rule.model] + rule.fallbacks,
"route": "fallback",
}
response = client.chat.completions.create(
model=rule.model,
messages=messages,
max_tokens=rule.max_tokens,
extra_body=extra_body or None,
**kwargs,
)
return {
"content": response.choices[0].message.content,
"model": response.model,
"rule": rule.name,
"tokens": response.usage.prompt_tokens + response.usage.completion_tokens,
}
# Usage
ctx = RoutingContext(user_tier="pro", task_type="code", budget_remaining=50.0)
result = routed_completion(
[{"role": "user", "content": "Refactor this function..."}],
ctx=ctx,
)
print(f"Rule: {result['rule']}, Model: {result['model']}")
A/B Testing Rules
import random
def ab_test_routing(ctx: RoutingContext, test_name: str, variant_b_pct: float = 0.10):
"""Route a percentage of traffic to variant B for comparison."""
rule = evaluate_rules(ctx)
if random.random() < variant_b_pct:
# Variant B: try a different model
return RoutingRule(
name=f"{rule.name}:variant-b",
priority=rule.priority,
condition=rule.condition,
model="openai/gpt-4o", # Test against a different model
fallbacks=rule.fallbacks,
max_tokens=rule.max_tokens,
)
return rule
Error Handling
| Error | Cause | Fix |
|---|---|---|
| No rule matched | Missing default catch-all | Always include a priority=99 default rule |
| Rule condition error | Dynamic check raised exception | Wrap condition in try/catch; return False on error |
| Wrong model selected | Rule priority incorrect | Log matching rule name; review priority ordering |
| Config parse error | Invalid JSON rule definition | Validate config at startup; fail fast |
Enterprise Considerations
- Store rules in a config file or database for hot-reloading without redeployment
- Log every routing decision (rule name, model, context) for analytics and debugging
- Use A/B testing to validate rule changes before full rollout
- Always include a default catch-all rule with a reliable, affordable model
- Version your rule configurations and track changes alongside code deployments
- Combine routing rules with budget enforcement (see openrouter-cost-controls)
References
More by jeremylongshore
View all skills by jeremylongshore →You might also like
flutter-development
aj-geddes
Build beautiful cross-platform mobile apps with Flutter and Dart. Covers widgets, state management with Provider/BLoC, navigation, API integration, and material design.
drawio-diagrams-enhanced
jgtolentino
Create professional draw.io (diagrams.net) diagrams in XML format (.drawio files) with integrated PMP/PMBOK methodologies, extensive visual asset libraries, and industry-standard professional templates. Use this skill when users ask to create flowcharts, swimlane diagrams, cross-functional flowcharts, org charts, network diagrams, UML diagrams, BPMN, project management diagrams (WBS, Gantt, PERT, RACI), risk matrices, stakeholder maps, or any other visual diagram in draw.io format. This skill includes access to custom shape libraries for icons, clipart, and professional symbols.
ui-ux-pro-max
nextlevelbuilder
"UI/UX design intelligence. 50 styles, 21 palettes, 50 font pairings, 20 charts, 8 stacks (React, Next.js, Vue, Svelte, SwiftUI, React Native, Flutter, Tailwind). Actions: plan, build, create, design, implement, review, fix, improve, optimize, enhance, refactor, check UI/UX code. Projects: website, landing page, dashboard, admin panel, e-commerce, SaaS, portfolio, blog, mobile app, .html, .tsx, .vue, .svelte. Elements: button, modal, navbar, sidebar, card, table, form, chart. Styles: glassmorphism, claymorphism, minimalism, brutalism, neumorphism, bento grid, dark mode, responsive, skeuomorphism, flat design. Topics: color palette, accessibility, animation, layout, typography, font pairing, spacing, hover, shadow, gradient."
godot
bfollington
This skill should be used when working on Godot Engine projects. It provides specialized knowledge of Godot's file formats (.gd, .tscn, .tres), architecture patterns (component-based, signal-driven, resource-based), common pitfalls, validation tools, code templates, and CLI workflows. The `godot` command is available for running the game, validating scripts, importing resources, and exporting builds. Use this skill for tasks involving Godot game development, debugging scene/resource files, implementing game systems, or creating new Godot components.
nano-banana-pro
garg-aayush
Generate and edit images using Google's Nano Banana Pro (Gemini 3 Pro Image) API. Use when the user asks to generate, create, edit, modify, change, alter, or update images. Also use when user references an existing image file and asks to modify it in any way (e.g., "modify this image", "change the background", "replace X with Y"). Supports both text-to-image generation and image-to-image editing with configurable resolution (1K default, 2K, or 4K for high resolution). DO NOT read the image file first - use this skill directly with the --input-image parameter.
fastapi-templates
wshobson
Create production-ready FastAPI projects with async patterns, dependency injection, and comprehensive error handling. Use when building new FastAPI applications or setting up backend API projects.
Related MCP Servers
Browse all serversWeb Reader MCP Server — Z.AI MCP for powerful web content extraction: full-page retrieval and structured data for Claude
Web Search MCP Server delivers powerful Z.AI web search and real-time info retrieval for MCP-compatible clients like Cla
Zread MCP Server: Z.AI-powered MCP for Claude Code, Cline and MCP clients — access docs, code & open-source repo knowled
Extend your developer tools with GitHub MCP Server for advanced automation, supporting GitHub Student and student packag
Boost productivity with Task Master: an AI-powered tool for project management and agile development workflows, integrat
Optimize your codebase for AI with Repomix—transform, compress, and secure repos for easier analysis with modern AI tool
Stay ahead of the MCP ecosystem
Get weekly updates on new skills and servers.