feat(auth): add model-level rate limit locking for multi-bucket providers (#120)

Providers like Antigravity maintain separate quota buckets per model family (e.g. Claude vs Gemini). A 429 on claude-opus previously locked the entire account, preventing gemini-pro requests even though its quota was full. This adds in-memory per-model locking so that only the specific model is skipped during account selection while other models remain accessible. Changes: - Add model-aware lock tracking in auth.js (Map<connectionId:model, expiry>) - Pass model context from chat handler to auth service - Multi-bucket behavior gated to known providers (MULTI_BUCKET_PROVIDERS set) - No database schema changes — locks are in-memory and clear on restart Closes #110
2026-05-08 12:01:28 +00:00 · 2026-02-14 22:35:13 -06:00
parent 69131295db
commit 202fee714b
2 changed files with 90 additions and 8 deletions
--- a/src/sse/handlers/chat.js
+++ b/src/sse/handlers/chat.js
@@ -120,7 +120,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re
  let lastStatus = null;

  while (true) {
-    const credentials = await getProviderCredentials(provider, excludeConnectionId);
+    const credentials = await getProviderCredentials(provider, excludeConnectionId, model);

    // All accounts unavailable
    if (!credentials || credentials.allRateLimited) {
@@ -170,7 +170,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re
    if (result.success) return result.response;

    // Mark account unavailable (auto-calculates cooldown with exponential backoff)
-    const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider);
+    const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model);
    
    if (shouldFallback) {
      log.warn("AUTH", `Account ${accountId}... unavailable (${result.status}), trying fallback`);
--- a/src/sse/services/auth.js
+++ b/src/sse/services/auth.js
@@ -5,13 +5,70 @@ import * as log from "../utils/logger.js";
 // Mutex to prevent race conditions during account selection
 let selectionMutex = Promise.resolve();

+// ---------------------------------------------------------------------------
+// Model-level rate-limit locking (in-memory)
+//
+// Providers like Antigravity maintain *separate* quota buckets per model
+// family (e.g. Claude vs Gemini). When a 429 arrives for one model, the
+// standard account-level DB lock would block ALL models on that account,
+// wasting quota that is still available for other model families.
+//
+// This module tracks model-specific locks in memory so that only the
+// affected model is skipped during account selection while the rest of the
+// account's quota remains accessible.
+//
+// Locks are intentionally in-memory: they clear on restart and require no
+// database schema migration.
+// ---------------------------------------------------------------------------
+
+/** Providers known to have independent per-model quota buckets */
+const MULTI_BUCKET_PROVIDERS = new Set(["antigravity"]);
+
+/** Map<"connectionId:model", expiryTimestamp> */
+const modelLocks = new Map();
+
+/** Default lock duration for model-level rate limits (5 minutes) */
+const DEFAULT_MODEL_LOCK_MS = 5 * 60 * 1000;
+
+/**
+ * Check whether a specific model is temporarily locked on a connection.
+ * Expired locks are cleaned up lazily.
+ */
+function isModelLocked(connectionId, model) {
+  if (!connectionId || !model) return false;
+  const key = `${connectionId}:${model}`;
+  const expiry = modelLocks.get(key);
+  if (!expiry) return false;
+  if (expiry > Date.now()) return true;
+  modelLocks.delete(key); // clean up expired
+  return false;
+}
+
+/**
+ * Lock a model on a specific connection for `durationMs` milliseconds.
+ */
+function lockModel(connectionId, model, durationMs) {
+  if (!connectionId || !model) return;
+  const key = `${connectionId}:${model}`;
+  modelLocks.set(key, Date.now() + durationMs);
+  log.warn("AUTH", `Model lock: ${model} on ${connectionId.slice(0, 8)} for ${Math.round(durationMs / 1000)}s`);
+}
+
+/**
+ * Check whether a provider uses per-model quota buckets.
+ */
+function isMultiBucketProvider(provider) {
+  return MULTI_BUCKET_PROVIDERS.has(provider);
+}
+
 /**
 * Get provider credentials from localDb
 * Filters out unavailable accounts and returns the selected account based on strategy
 * @param {string} provider - Provider name
 * @param {string|null} excludeConnectionId - Connection ID to exclude (for retry with next account)
+ * @param {string|null} model - Model name for per-model rate limit filtering
 */
-export async function getProviderCredentials(provider, excludeConnectionId = null) {
+export async function getProviderCredentials(provider, excludeConnectionId = null, model = null) {
  // Acquire mutex to prevent race conditions
  const currentMutex = selectionMutex;
  let resolveMutex;
@@ -21,7 +78,7 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
    await currentMutex;

    const connections = await getProviderConnections({ provider, isActive: true });
-    log.debug("AUTH", `${provider} | total connections: ${connections.length}, excludeId: ${excludeConnectionId || "none"}`);
+    log.debug("AUTH", `${provider} | total connections: ${connections.length}, excludeId: ${excludeConnectionId || "none"}, model: ${model || "any"}`);

    if (connections.length === 0) {
      // Check all connections (including inactive) to see if rate limited
@@ -43,9 +100,11 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
    }

    // Filter out unavailable accounts and excluded connection
+    const multiBucket = isMultiBucketProvider(provider);
    const availableConnections = connections.filter(c => {
      if (excludeConnectionId && c.id === excludeConnectionId) return false;
      if (isAccountUnavailable(c.rateLimitedUntil)) return false;
+      if (multiBucket && model && isModelLocked(c.id, model)) return false;
      return true;
    });

@@ -53,8 +112,9 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
    connections.forEach(c => {
      const excluded = excludeConnectionId && c.id === excludeConnectionId;
      const rateLimited = isAccountUnavailable(c.rateLimitedUntil);
-      if (excluded || rateLimited) {
-        log.debug("AUTH", `  → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}`);
+      const modelLocked = multiBucket && model && isModelLocked(c.id, model);
+      if (excluded || rateLimited || modelLocked) {
+        log.debug("AUTH", `  → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""} ${modelLocked ? `modelLocked(${model})` : ""}`);
      }
    });

@@ -73,6 +133,15 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
          lastErrorCode: earliestConn?.errorCode || null
        };
      }
+      if (multiBucket && model) {
+        log.warn("AUTH", `${provider} | all accounts model-locked for ${model}`);
+        return {
+          allRateLimited: true,
+          retryAfter: new Date(Date.now() + 60000).toISOString(),
+          retryAfterHuman: "reset after 1m",
+          lastError: `All accounts rate limited for model ${model}`
+        };
+      }
      log.warn("AUTH", `${provider} | all ${connections.length} accounts unavailable`);
      return null;
    }
@@ -144,10 +213,17 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
 }

 /**
- * Mark account as unavailable — reads backoffLevel from DB, calculates cooldown with exponential backoff, saves new level
+ * Mark account as unavailable — reads backoffLevel from DB, calculates cooldown with exponential backoff, saves new level.
+ * For multi-bucket providers (e.g. Antigravity), 429 errors lock only the specific model in memory
+ * rather than the entire account in the database.
+ * @param {string} connectionId
+ * @param {number} status - HTTP status code from upstream
+ * @param {string} errorText
+ * @param {string|null} provider
+ * @param {string|null} model - The specific model that triggered the error
 * @returns {{ shouldFallback: boolean, cooldownMs: number }}
 */
-export async function markAccountUnavailable(connectionId, status, errorText, provider = null) {
+export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null) {
  // Read current connection to get backoffLevel
  const connections = await getProviderConnections({ provider });
  const conn = connections.find(c => c.id === connectionId);
@@ -156,6 +232,12 @@ export async function markAccountUnavailable(connectionId, status, errorText, pr
  const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel);
  if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 };

+  if (isMultiBucketProvider(provider) && status === 429 && model) {
+    const lockDuration = cooldownMs > 0 ? cooldownMs : DEFAULT_MODEL_LOCK_MS;
+    lockModel(connectionId, model, lockDuration);
+    return { shouldFallback: true, cooldownMs: 0 };
+  }
+
  const rateLimitedUntil = getUnavailableUntil(cooldownMs);
  const reason = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error";