mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
feat(auth): add model-level rate limit locking for multi-bucket providers (#120)
Providers like Antigravity maintain separate quota buckets per model family (e.g. Claude vs Gemini). A 429 on claude-opus previously locked the entire account, preventing gemini-pro requests even though its quota was full. This adds in-memory per-model locking so that only the specific model is skipped during account selection while other models remain accessible. Changes: - Add model-aware lock tracking in auth.js (Map<connectionId:model, expiry>) - Pass model context from chat handler to auth service - Multi-bucket behavior gated to known providers (MULTI_BUCKET_PROVIDERS set) - No database schema changes — locks are in-memory and clear on restart Closes #110
This commit is contained in:
@@ -120,7 +120,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re
|
||||
let lastStatus = null;
|
||||
|
||||
while (true) {
|
||||
const credentials = await getProviderCredentials(provider, excludeConnectionId);
|
||||
const credentials = await getProviderCredentials(provider, excludeConnectionId, model);
|
||||
|
||||
// All accounts unavailable
|
||||
if (!credentials || credentials.allRateLimited) {
|
||||
@@ -170,7 +170,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re
|
||||
if (result.success) return result.response;
|
||||
|
||||
// Mark account unavailable (auto-calculates cooldown with exponential backoff)
|
||||
const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider);
|
||||
const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model);
|
||||
|
||||
if (shouldFallback) {
|
||||
log.warn("AUTH", `Account ${accountId}... unavailable (${result.status}), trying fallback`);
|
||||
|
||||
@@ -5,13 +5,70 @@ import * as log from "../utils/logger.js";
|
||||
// Mutex to prevent race conditions during account selection
|
||||
let selectionMutex = Promise.resolve();
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Model-level rate-limit locking (in-memory)
|
||||
//
|
||||
// Providers like Antigravity maintain *separate* quota buckets per model
|
||||
// family (e.g. Claude vs Gemini). When a 429 arrives for one model, the
|
||||
// standard account-level DB lock would block ALL models on that account,
|
||||
// wasting quota that is still available for other model families.
|
||||
//
|
||||
// This module tracks model-specific locks in memory so that only the
|
||||
// affected model is skipped during account selection while the rest of the
|
||||
// account's quota remains accessible.
|
||||
//
|
||||
// Locks are intentionally in-memory: they clear on restart and require no
|
||||
// database schema migration.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Providers known to have independent per-model quota buckets */
|
||||
const MULTI_BUCKET_PROVIDERS = new Set(["antigravity"]);
|
||||
|
||||
/** Map<"connectionId:model", expiryTimestamp> */
|
||||
const modelLocks = new Map();
|
||||
|
||||
/** Default lock duration for model-level rate limits (5 minutes) */
|
||||
const DEFAULT_MODEL_LOCK_MS = 5 * 60 * 1000;
|
||||
|
||||
/**
|
||||
* Check whether a specific model is temporarily locked on a connection.
|
||||
* Expired locks are cleaned up lazily.
|
||||
*/
|
||||
function isModelLocked(connectionId, model) {
|
||||
if (!connectionId || !model) return false;
|
||||
const key = `${connectionId}:${model}`;
|
||||
const expiry = modelLocks.get(key);
|
||||
if (!expiry) return false;
|
||||
if (expiry > Date.now()) return true;
|
||||
modelLocks.delete(key); // clean up expired
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lock a model on a specific connection for `durationMs` milliseconds.
|
||||
*/
|
||||
function lockModel(connectionId, model, durationMs) {
|
||||
if (!connectionId || !model) return;
|
||||
const key = `${connectionId}:${model}`;
|
||||
modelLocks.set(key, Date.now() + durationMs);
|
||||
log.warn("AUTH", `Model lock: ${model} on ${connectionId.slice(0, 8)} for ${Math.round(durationMs / 1000)}s`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a provider uses per-model quota buckets.
|
||||
*/
|
||||
function isMultiBucketProvider(provider) {
|
||||
return MULTI_BUCKET_PROVIDERS.has(provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get provider credentials from localDb
|
||||
* Filters out unavailable accounts and returns the selected account based on strategy
|
||||
* @param {string} provider - Provider name
|
||||
* @param {string|null} excludeConnectionId - Connection ID to exclude (for retry with next account)
|
||||
* @param {string|null} model - Model name for per-model rate limit filtering
|
||||
*/
|
||||
export async function getProviderCredentials(provider, excludeConnectionId = null) {
|
||||
export async function getProviderCredentials(provider, excludeConnectionId = null, model = null) {
|
||||
// Acquire mutex to prevent race conditions
|
||||
const currentMutex = selectionMutex;
|
||||
let resolveMutex;
|
||||
@@ -21,7 +78,7 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
|
||||
await currentMutex;
|
||||
|
||||
const connections = await getProviderConnections({ provider, isActive: true });
|
||||
log.debug("AUTH", `${provider} | total connections: ${connections.length}, excludeId: ${excludeConnectionId || "none"}`);
|
||||
log.debug("AUTH", `${provider} | total connections: ${connections.length}, excludeId: ${excludeConnectionId || "none"}, model: ${model || "any"}`);
|
||||
|
||||
if (connections.length === 0) {
|
||||
// Check all connections (including inactive) to see if rate limited
|
||||
@@ -43,9 +100,11 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
|
||||
}
|
||||
|
||||
// Filter out unavailable accounts and excluded connection
|
||||
const multiBucket = isMultiBucketProvider(provider);
|
||||
const availableConnections = connections.filter(c => {
|
||||
if (excludeConnectionId && c.id === excludeConnectionId) return false;
|
||||
if (isAccountUnavailable(c.rateLimitedUntil)) return false;
|
||||
if (multiBucket && model && isModelLocked(c.id, model)) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -53,8 +112,9 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
|
||||
connections.forEach(c => {
|
||||
const excluded = excludeConnectionId && c.id === excludeConnectionId;
|
||||
const rateLimited = isAccountUnavailable(c.rateLimitedUntil);
|
||||
if (excluded || rateLimited) {
|
||||
log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}`);
|
||||
const modelLocked = multiBucket && model && isModelLocked(c.id, model);
|
||||
if (excluded || rateLimited || modelLocked) {
|
||||
log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""} ${modelLocked ? `modelLocked(${model})` : ""}`);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -73,6 +133,15 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
|
||||
lastErrorCode: earliestConn?.errorCode || null
|
||||
};
|
||||
}
|
||||
if (multiBucket && model) {
|
||||
log.warn("AUTH", `${provider} | all accounts model-locked for ${model}`);
|
||||
return {
|
||||
allRateLimited: true,
|
||||
retryAfter: new Date(Date.now() + 60000).toISOString(),
|
||||
retryAfterHuman: "reset after 1m",
|
||||
lastError: `All accounts rate limited for model ${model}`
|
||||
};
|
||||
}
|
||||
log.warn("AUTH", `${provider} | all ${connections.length} accounts unavailable`);
|
||||
return null;
|
||||
}
|
||||
@@ -144,10 +213,17 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark account as unavailable — reads backoffLevel from DB, calculates cooldown with exponential backoff, saves new level
|
||||
* Mark account as unavailable — reads backoffLevel from DB, calculates cooldown with exponential backoff, saves new level.
|
||||
* For multi-bucket providers (e.g. Antigravity), 429 errors lock only the specific model in memory
|
||||
* rather than the entire account in the database.
|
||||
* @param {string} connectionId
|
||||
* @param {number} status - HTTP status code from upstream
|
||||
* @param {string} errorText
|
||||
* @param {string|null} provider
|
||||
* @param {string|null} model - The specific model that triggered the error
|
||||
* @returns {{ shouldFallback: boolean, cooldownMs: number }}
|
||||
*/
|
||||
export async function markAccountUnavailable(connectionId, status, errorText, provider = null) {
|
||||
export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null) {
|
||||
// Read current connection to get backoffLevel
|
||||
const connections = await getProviderConnections({ provider });
|
||||
const conn = connections.find(c => c.id === connectionId);
|
||||
@@ -156,6 +232,12 @@ export async function markAccountUnavailable(connectionId, status, errorText, pr
|
||||
const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel);
|
||||
if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 };
|
||||
|
||||
if (isMultiBucketProvider(provider) && status === 429 && model) {
|
||||
const lockDuration = cooldownMs > 0 ? cooldownMs : DEFAULT_MODEL_LOCK_MS;
|
||||
lockModel(connectionId, model, lockDuration);
|
||||
return { shouldFallback: true, cooldownMs: 0 };
|
||||
}
|
||||
|
||||
const rateLimitedUntil = getUnavailableUntil(cooldownMs);
|
||||
const reason = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user