mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
fix: combo 503 cooldown wait before fallthrough + 406 on disabled creds (#382)
- fixes #335: on transient 503/502/504, wait for short cooldown (up to 5s) before falling to next combo model, giving the provider a chance to recover rather than immediately skipping it - fixes #334: when all combo models have no active credentials, return 503 (Service Unavailable) instead of 406 (Not Acceptable), which is more accurate and retriable by clients
This commit is contained in:
@@ -74,13 +74,22 @@ export async function handleComboChat({ body, models, handleSingleModel, log })
|
||||
}
|
||||
|
||||
// Check if should fallback to next model
|
||||
const { shouldFallback } = checkFallbackError(result.status, errorText);
|
||||
|
||||
const { shouldFallback, cooldownMs } = checkFallbackError(result.status, errorText);
|
||||
|
||||
if (!shouldFallback) {
|
||||
log.warn("COMBO", `Model ${modelStr} failed (no fallback)`, { status: result.status });
|
||||
return result;
|
||||
}
|
||||
|
||||
// For transient errors (503/502/504), wait for cooldown before falling through
|
||||
// so a briefly-overloaded provider gets a chance to recover rather than being
|
||||
// skipped immediately (fixes: combo falls through on transient 503)
|
||||
if (cooldownMs && cooldownMs > 0 && cooldownMs <= 5000 &&
|
||||
(result.status === 503 || result.status === 502 || result.status === 504)) {
|
||||
log.info("COMBO", `Model ${modelStr} transient ${result.status}, waiting ${cooldownMs}ms before next`);
|
||||
await new Promise(r => setTimeout(r, cooldownMs));
|
||||
}
|
||||
|
||||
// Fallback to next model
|
||||
lastError = errorText || String(result.status);
|
||||
if (!lastStatus) lastStatus = result.status;
|
||||
@@ -94,7 +103,11 @@ export async function handleComboChat({ body, models, handleSingleModel, log })
|
||||
}
|
||||
|
||||
// All models failed
|
||||
const status = 406;
|
||||
// Use 503 (Service Unavailable) rather than 406 (Not Acceptable) — 406 implies
|
||||
// the request itself is invalid, but here the providers are simply unavailable
|
||||
// or have no active credentials. 503 is more accurate and retryable by clients.
|
||||
const allDisabled = lastError && lastError.toLowerCase().includes("no credentials");
|
||||
const status = allDisabled ? 503 : (lastStatus || 503);
|
||||
const msg = lastError || "All combo models unavailable";
|
||||
|
||||
if (earliestRetryAfter) {
|
||||
|
||||
Reference in New Issue
Block a user