feat(auth): add model-level rate limit locking for multi-bucket providers (#120)

Providers like Antigravity maintain separate quota buckets per model family
(e.g. Claude vs Gemini). A 429 on claude-opus previously locked the entire
account, preventing gemini-pro requests even though its quota was full.

This adds in-memory per-model locking so that only the specific model is
skipped during account selection while other models remain accessible.

Changes:
- Add model-aware lock tracking in auth.js (Map<connectionId:model, expiry>)
- Pass model context from chat handler to auth service
- Multi-bucket behavior gated to known providers (MULTI_BUCKET_PROVIDERS set)
- No database schema changes — locks are in-memory and clear on restart

Closes #110
This commit is contained in:
Nick Roth
2026-02-14 22:35:13 -06:00
committed by GitHub
parent 69131295db
commit 202fee714b
2 changed files with 90 additions and 8 deletions

View File

@@ -120,7 +120,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re
let lastStatus = null;
while (true) {
const credentials = await getProviderCredentials(provider, excludeConnectionId);
const credentials = await getProviderCredentials(provider, excludeConnectionId, model);
// All accounts unavailable
if (!credentials || credentials.allRateLimited) {
@@ -170,7 +170,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re
if (result.success) return result.response;
// Mark account unavailable (auto-calculates cooldown with exponential backoff)
const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider);
const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model);
if (shouldFallback) {
log.warn("AUTH", `Account ${accountId}... unavailable (${result.status}), trying fallback`);

View File

@@ -5,13 +5,70 @@ import * as log from "../utils/logger.js";
// Mutex to prevent race conditions during account selection
let selectionMutex = Promise.resolve();
// ---------------------------------------------------------------------------
// Model-level rate-limit locking (in-memory)
//
// Providers like Antigravity maintain *separate* quota buckets per model
// family (e.g. Claude vs Gemini). When a 429 arrives for one model, the
// standard account-level DB lock would block ALL models on that account,
// wasting quota that is still available for other model families.
//
// This module tracks model-specific locks in memory so that only the
// affected model is skipped during account selection while the rest of the
// account's quota remains accessible.
//
// Locks are intentionally in-memory: they clear on restart and require no
// database schema migration.
// ---------------------------------------------------------------------------
/** Providers known to have independent per-model quota buckets */
const MULTI_BUCKET_PROVIDERS = new Set(["antigravity"]);
/** Map<"connectionId:model", expiryTimestamp> */
const modelLocks = new Map();
/** Default lock duration for model-level rate limits (5 minutes) */
const DEFAULT_MODEL_LOCK_MS = 5 * 60 * 1000;
/**
* Check whether a specific model is temporarily locked on a connection.
* Expired locks are cleaned up lazily.
*/
function isModelLocked(connectionId, model) {
if (!connectionId || !model) return false;
const key = `${connectionId}:${model}`;
const expiry = modelLocks.get(key);
if (!expiry) return false;
if (expiry > Date.now()) return true;
modelLocks.delete(key); // clean up expired
return false;
}
/**
* Lock a model on a specific connection for `durationMs` milliseconds.
*/
function lockModel(connectionId, model, durationMs) {
if (!connectionId || !model) return;
const key = `${connectionId}:${model}`;
modelLocks.set(key, Date.now() + durationMs);
log.warn("AUTH", `Model lock: ${model} on ${connectionId.slice(0, 8)} for ${Math.round(durationMs / 1000)}s`);
}
/**
* Check whether a provider uses per-model quota buckets.
*/
function isMultiBucketProvider(provider) {
return MULTI_BUCKET_PROVIDERS.has(provider);
}
/**
* Get provider credentials from localDb
* Filters out unavailable accounts and returns the selected account based on strategy
* @param {string} provider - Provider name
* @param {string|null} excludeConnectionId - Connection ID to exclude (for retry with next account)
* @param {string|null} model - Model name for per-model rate limit filtering
*/
export async function getProviderCredentials(provider, excludeConnectionId = null) {
export async function getProviderCredentials(provider, excludeConnectionId = null, model = null) {
// Acquire mutex to prevent race conditions
const currentMutex = selectionMutex;
let resolveMutex;
@@ -21,7 +78,7 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
await currentMutex;
const connections = await getProviderConnections({ provider, isActive: true });
log.debug("AUTH", `${provider} | total connections: ${connections.length}, excludeId: ${excludeConnectionId || "none"}`);
log.debug("AUTH", `${provider} | total connections: ${connections.length}, excludeId: ${excludeConnectionId || "none"}, model: ${model || "any"}`);
if (connections.length === 0) {
// Check all connections (including inactive) to see if rate limited
@@ -43,9 +100,11 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
}
// Filter out unavailable accounts and excluded connection
const multiBucket = isMultiBucketProvider(provider);
const availableConnections = connections.filter(c => {
if (excludeConnectionId && c.id === excludeConnectionId) return false;
if (isAccountUnavailable(c.rateLimitedUntil)) return false;
if (multiBucket && model && isModelLocked(c.id, model)) return false;
return true;
});
@@ -53,8 +112,9 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
connections.forEach(c => {
const excluded = excludeConnectionId && c.id === excludeConnectionId;
const rateLimited = isAccountUnavailable(c.rateLimitedUntil);
if (excluded || rateLimited) {
log.debug("AUTH", `${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}`);
const modelLocked = multiBucket && model && isModelLocked(c.id, model);
if (excluded || rateLimited || modelLocked) {
log.debug("AUTH", `${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""} ${modelLocked ? `modelLocked(${model})` : ""}`);
}
});
@@ -73,6 +133,15 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
lastErrorCode: earliestConn?.errorCode || null
};
}
if (multiBucket && model) {
log.warn("AUTH", `${provider} | all accounts model-locked for ${model}`);
return {
allRateLimited: true,
retryAfter: new Date(Date.now() + 60000).toISOString(),
retryAfterHuman: "reset after 1m",
lastError: `All accounts rate limited for model ${model}`
};
}
log.warn("AUTH", `${provider} | all ${connections.length} accounts unavailable`);
return null;
}
@@ -144,10 +213,17 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
}
/**
* Mark account as unavailable — reads backoffLevel from DB, calculates cooldown with exponential backoff, saves new level
* Mark account as unavailable — reads backoffLevel from DB, calculates cooldown with exponential backoff, saves new level.
* For multi-bucket providers (e.g. Antigravity), 429 errors lock only the specific model in memory
* rather than the entire account in the database.
* @param {string} connectionId
* @param {number} status - HTTP status code from upstream
* @param {string} errorText
* @param {string|null} provider
* @param {string|null} model - The specific model that triggered the error
* @returns {{ shouldFallback: boolean, cooldownMs: number }}
*/
export async function markAccountUnavailable(connectionId, status, errorText, provider = null) {
export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null) {
// Read current connection to get backoffLevel
const connections = await getProviderConnections({ provider });
const conn = connections.find(c => c.id === connectionId);
@@ -156,6 +232,12 @@ export async function markAccountUnavailable(connectionId, status, errorText, pr
const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel);
if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 };
if (isMultiBucketProvider(provider) && status === 429 && model) {
const lockDuration = cooldownMs > 0 ? cooldownMs : DEFAULT_MODEL_LOCK_MS;
lockModel(connectionId, model, lockDuration);
return { shouldFallback: true, cooldownMs: 0 };
}
const rateLimitedUntil = getUnavailableUntil(cooldownMs);
const reason = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error";