fix: combo 503 cooldown wait before fallthrough + 406 on disabled creds (#382)

- fixes #335: on transient 503/502/504, wait for short cooldown (up to
  5s) before falling to next combo model, giving the provider a chance
  to recover rather than immediately skipping it
- fixes #334: when all combo models have no active credentials, return
  503 (Service Unavailable) instead of 406 (Not Acceptable), which is
  more accurate and retriable by clients
This commit is contained in:
Anurag Saxena
2026-03-22 22:24:58 -04:00
committed by GitHub
parent 8c0b4a3e84
commit 4774150ca7

View File

@@ -74,13 +74,22 @@ export async function handleComboChat({ body, models, handleSingleModel, log })
}
// Check if should fallback to next model
const { shouldFallback } = checkFallbackError(result.status, errorText);
const { shouldFallback, cooldownMs } = checkFallbackError(result.status, errorText);
if (!shouldFallback) {
log.warn("COMBO", `Model ${modelStr} failed (no fallback)`, { status: result.status });
return result;
}
// For transient errors (503/502/504), wait for cooldown before falling through
// so a briefly-overloaded provider gets a chance to recover rather than being
// skipped immediately (fixes: combo falls through on transient 503)
if (cooldownMs && cooldownMs > 0 && cooldownMs <= 5000 &&
(result.status === 503 || result.status === 502 || result.status === 504)) {
log.info("COMBO", `Model ${modelStr} transient ${result.status}, waiting ${cooldownMs}ms before next`);
await new Promise(r => setTimeout(r, cooldownMs));
}
// Fallback to next model
lastError = errorText || String(result.status);
if (!lastStatus) lastStatus = result.status;
@@ -94,7 +103,11 @@ export async function handleComboChat({ body, models, handleSingleModel, log })
}
// All models failed
const status = 406;
// Use 503 (Service Unavailable) rather than 406 (Not Acceptable) — 406 implies
// the request itself is invalid, but here the providers are simply unavailable
// or have no active credentials. 503 is more accurate and retryable by clients.
const allDisabled = lastError && lastError.toLowerCase().includes("no credentials");
const status = allDisabled ? 503 : (lastStatus || 503);
const msg = lastError || "All combo models unavailable";
if (earliestRetryAfter) {