feat: Add Google Cloud Vertex AI provider support (vertex, vertex-partner)

Co-authored-by: Quan <quanle96@outlook.com> PR: https://github.com/decolua/9router/pull/298 Thanks to @kwanLeeFrmVi for the original implementation. Here is a summary of changes made during review integration: - Replaced google-auth-library with jose (already a project dependency) for SA JSON -> OAuth2 Bearer token minting (RS256 JWT assertion flow) - Moved auth logic (parseSaJson, refreshVertexToken, token cache) from executor into open-sse/services/tokenRefresh.js to match project pattern - Fixed executor to use proxyAwareFetch instead of raw fetch (proxy support) - Simplified buildUrl: use global aiplatform.googleapis.com endpoint for both vertex (Gemini) and vertex-partner; removed region/modelFamily fields - Added auto-detection of GCP project_id from raw API key via probe request (vertex-partner only, cached per key) - Added vertex/vertex-partner cases to /api/providers/validate/route.js - Updated model lists based on live testing: - vertex: gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview, gemini-3-flash-preview, gemini-2.5-flash (removed gemini-2.5-pro: 404) - vertex-partner: deepseek-v3.2, qwen3-next-80b (instruct+thinking), glm-5 (removed Mistral/Llama: not enabled in test project) - gemini provider: added gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview - Removed bun.lock (project uses npm/package-lock.json) - Removed region and modelFamily UI fields (global endpoint, auto-detect) - Kiro token auto-refresh on AccessDeniedException (from commit 2) Made-with: Cursor
2026-05-08 12:01:28 +00:00 · 2026-03-14 11:37:23 +07:00
parent 05fc8e9ed9
commit 39f651f5be
14 changed files with 333 additions and 9 deletions
--- a/open-sse/config/providerModels.js
+++ b/open-sse/config/providerModels.js
@@ -166,7 +166,10 @@ export const PROVIDER_MODELS = {
    { id: "claude-3-5-sonnet-20241022", name: "Claude 3.5 Sonnet" },
  ],
  gemini: [
+    { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
+    { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
    { id: "gemini-3-pro-preview", name: "Gemini 3 Pro Preview" },
+    { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
    { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
    { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
    { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
@@ -311,6 +314,18 @@ export const PROVIDER_MODELS = {
    { id: "glm-4.7-flash", name: "GLM 4.7 Flash" },
    { id: "qwen3.5", name: "Qwen3.5" },
  ],
+  vertex: [
+    { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
+    { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
+    { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
+    { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
+  ],
+  "vertex-partner": [
+    { id: "deepseek-ai/deepseek-v3.2-maas", name: "DeepSeek V3.2 (Vertex)" },
+    { id: "qwen/qwen3-next-80b-a3b-thinking-maas", name: "Qwen3 Next 80B Thinking (Vertex)" },
+    { id: "qwen/qwen3-next-80b-a3b-instruct-maas", name: "Qwen3 Next 80B Instruct (Vertex)" },
+    { id: "zai-org/glm-5-maas", name: "GLM-5 (Vertex)" },
+  ],
 };

 // Helper functions
@@ -358,6 +373,8 @@ const OAUTH_ALIASES = {
  "kimi-coding": "kmc",
  kilocode: "kc",
  cline: "cl",
+  vertex: "vertex",
+  "vertex-partner": "vertex-partner",
 };

 // Derived from PROVIDERS — no need to maintain manually
--- a/open-sse/config/providers.js
+++ b/open-sse/config/providers.js
@@ -298,4 +298,16 @@ export const PROVIDERS = {
    baseUrl: "http://localhost:11434/api/chat",
    format: "ollama"
  },
+  // Vertex AI - Gemini models via Service Account JSON
+  // baseUrl is not used; VertexExecutor.buildUrl() constructs it dynamically
+  vertex: {
+    baseUrl: "https://aiplatform.googleapis.com",
+    format: "gemini"
+  },
+  // Vertex AI - Partner models (Claude, Llama, Mistral, GLM) via SA JSON
+  // Uses OpenAI-compatible global endpoint (or rawPredict for Anthropic)
+  "vertex-partner": {
+    baseUrl: "https://aiplatform.googleapis.com",
+    format: "openai"
+  },
 };
--- a/open-sse/executors/index.js
+++ b/open-sse/executors/index.js
@@ -5,6 +5,7 @@ import { IFlowExecutor } from "./iflow.js";
 import { KiroExecutor } from "./kiro.js";
 import { CodexExecutor } from "./codex.js";
 import { CursorExecutor } from "./cursor.js";
+import { VertexExecutor } from "./vertex.js";
 import { DefaultExecutor } from "./default.js";

 const executors = {
@@ -15,7 +16,9 @@ const executors = {
  kiro: new KiroExecutor(),
  codex: new CodexExecutor(),
  cursor: new CursorExecutor(),
-  cu: new CursorExecutor() // Alias for cursor
+  cu: new CursorExecutor(), // Alias for cursor
+  vertex: new VertexExecutor("vertex"),
+  "vertex-partner": new VertexExecutor("vertex-partner"),
 };

 const defaultCache = new Map();
@@ -38,4 +41,5 @@ export { IFlowExecutor } from "./iflow.js";
 export { KiroExecutor } from "./kiro.js";
 export { CodexExecutor } from "./codex.js";
 export { CursorExecutor } from "./cursor.js";
+export { VertexExecutor } from "./vertex.js";
 export { DefaultExecutor } from "./default.js";
--- a/open-sse/executors/vertex.js
+++ b/open-sse/executors/vertex.js
@@ -0,0 +1,120 @@
+import { BaseExecutor } from "./base.js";
+import { PROVIDERS } from "../config/providers.js";
+import { parseVertexSaJson, refreshVertexToken } from "../services/tokenRefresh.js";
+import { proxyAwareFetch } from "../utils/proxyFetch.js";
+
+// Cache project IDs resolved from raw API keys { apiKey → projectId }
+const projectIdCache = new Map();
+
+/**
+ * Resolve GCP project ID from a raw Vertex API key.
+ * Sends a dummy 404 request and parses "projects/{id}" from the error message.
+ */
+async function resolveProjectId(apiKey) {
+  if (projectIdCache.has(apiKey)) return projectIdCache.get(apiKey);
+
+  const res = await fetch(
+    `https://aiplatform.googleapis.com/v1/publishers/google/models/__probe__:generateContent?key=${apiKey}`,
+    { method: "POST", headers: { "Content-Type": "application/json" }, body: "{}" }
+  );
+  const json = await res.json().catch(() => null);
+  const msg = json?.[0]?.error?.message || json?.error?.message || "";
+  const match = msg.match(/projects\/([^/]+)\//);
+  const projectId = match?.[1] || null;
+
+  if (projectId) projectIdCache.set(apiKey, projectId);
+  return projectId;
+}
+
+/**
+ * VertexExecutor - Google Cloud Vertex AI
+ *
+ * "vertex"         → Gemini models via regional/global Vertex endpoint
+ * "vertex-partner" → Partner models (Llama, Mistral, GLM, DeepSeek, Qwen)
+ *                    via global OpenAI-compatible endpoint
+ *
+ * Auth: SA JSON (stored as apiKey) → JWT assertion → Bearer token (via jose)
+ * Token is minted/cached in tokenRefresh.js, not here.
+ */
+export class VertexExecutor extends BaseExecutor {
+  constructor(providerId = "vertex") {
+    super(providerId, PROVIDERS[providerId] || {});
+  }
+
+  buildUrl(model, stream, urlIndex = 0, credentials = null) {
+    const saJson = parseVertexSaJson(credentials?.apiKey);
+    const rawKey = !saJson ? credentials?.apiKey : null;
+    const projectId = saJson?.project_id || credentials?.providerSpecificData?.projectId;
+
+    if (this.provider === "vertex-partner") {
+      // Partner models require project_id in path regardless of auth method
+      if (!projectId) throw new Error("Vertex partner models require a project_id. Add it in providerSpecificData or use Service Account JSON.");
+      const url = `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/global/endpoints/openapi/chat/completions`;
+      return rawKey ? `${url}?key=${rawKey}` : url;
+    }
+
+    // Gemini on Vertex: always use global publishers endpoint
+    const action = stream ? "streamGenerateContent" : "generateContent";
+    let url = `https://aiplatform.googleapis.com/v1/publishers/google/models/${model}:${action}`;
+
+    if (rawKey) url += `?key=${rawKey}`;
+    return url;
+  }
+
+  buildHeaders(credentials, stream = true) {
+    const headers = { "Content-Type": "application/json" };
+
+    // Only set Bearer token if using SA JSON flow (raw key goes in URL ?key=)
+    if (credentials.accessToken) {
+      headers["Authorization"] = `Bearer ${credentials.accessToken}`;
+    }
+
+    if (stream) headers["Accept"] = "text/event-stream";
+
+    return headers;
+  }
+
+  async refreshCredentials(credentials, log) {
+    const saJson = parseVertexSaJson(credentials?.apiKey);
+    if (!saJson) return null;
+
+    const result = await refreshVertexToken(saJson, log);
+    if (!result) return null;
+
+    return { accessToken: result.accessToken, expiresAt: result.expiresAt };
+  }
+
+  async execute({ model, body, stream, credentials, signal, log, proxyOptions = null }) {
+    const saJson = parseVertexSaJson(credentials?.apiKey);
+
+    // SA JSON flow: mint Bearer token (cached)
+    if (saJson) {
+      const result = await refreshVertexToken(saJson, log);
+      if (!result?.accessToken) throw new Error("Vertex: failed to mint access token from Service Account JSON");
+      credentials.accessToken = result.accessToken;
+    }
+
+    // vertex-partner with raw key: auto-resolve project_id if not provided
+    if (this.provider === "vertex-partner" && !saJson && !credentials?.providerSpecificData?.projectId) {
+      const projectId = await resolveProjectId(credentials.apiKey);
+      if (!projectId) throw new Error("Vertex: could not resolve project_id from API key. Please add it manually in provider settings.");
+      log?.debug?.("VERTEX", `Resolved project_id: ${projectId}`);
+      credentials.providerSpecificData = { ...credentials.providerSpecificData, projectId };
+    }
+
+    const url = this.buildUrl(model, stream, 0, credentials);
+    const headers = this.buildHeaders(credentials, stream);
+    const transformedBody = this.transformRequest(model, body, stream, credentials);
+
+    const response = await proxyAwareFetch(url, {
+      method: "POST",
+      headers,
+      body: JSON.stringify(transformedBody),
+      signal,
+    }, proxyOptions);
+
+    return { response, url, headers, transformedBody };
+  }
+}
+
+export default VertexExecutor;
--- a/open-sse/services/model.js
+++ b/open-sse/services/model.js
@@ -46,6 +46,10 @@ const ALIAS_TO_PROVIDER_ID = {
  ch: "chutes",
  chutes: "chutes",
  cursor: "cursor",
+  vx: "vertex",
+  vertex: "vertex",
+  vxp: "vertex-partner",
+  "vertex-partner": "vertex-partner",
 };

 /**
--- a/open-sse/services/provider.js
+++ b/open-sse/services/provider.js
@@ -298,6 +298,12 @@ export function buildProviderHeaders(provider, credentials, stream = true, body
        // Claude-compatible API providers use x-api-key
        headers["x-api-key"] = credentials.apiKey;
        break;
+
+      case "vertex":
+      case "vertex-partner":
+        // Vertex uses async token minting — headers are set by VertexExecutor._buildHeadersAsync()
+        // Do NOT set Authorization here; it would leak the raw SA JSON as Bearer token
+        break;
  
      default:
        headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;
--- a/open-sse/services/tokenRefresh.js
+++ b/open-sse/services/tokenRefresh.js
@@ -497,6 +497,13 @@ export async function getAccessToken(provider, credentials, log) {
        log
      );

+    case "vertex":
+    case "vertex-partner": {
+      const saJson = parseVertexSaJson(credentials.apiKey);
+      if (!saJson) return null;
+      return await refreshVertexToken(saJson, log);
+    }
+
    default:
      log?.warn?.("TOKEN_REFRESH", `Unsupported provider for token refresh: ${provider}`);
      return null;
@@ -534,6 +541,12 @@ export async function refreshTokenByProvider(provider, credentials, log) {
        credentials.providerSpecificData,
        log
      );
+    case "vertex":
+    case "vertex-partner": {
+      const saJson = parseVertexSaJson(credentials.apiKey);
+      if (!saJson) return null;
+      return refreshVertexToken(saJson, log);
+    }
    default:
      return refreshAccessToken(provider, credentials.refreshToken, credentials, log);
  }
@@ -613,6 +626,81 @@ export async function getAllAccessTokens(userInfo, log) {
  return results;
 }

+/**
+ * Parse Vertex AI Service Account JSON from apiKey string
+ */
+export function parseVertexSaJson(apiKey) {
+  if (typeof apiKey !== "string") return null;
+  try {
+    const parsed = JSON.parse(apiKey);
+    if (parsed.type === "service_account" && parsed.client_email && parsed.private_key && parsed.project_id) {
+      return parsed;
+    }
+    return null;
+  } catch {
+    return null;
+  }
+}
+
+// Cache Vertex tokens keyed by service account email { token, expiresAt }
+const vertexTokenCache = new Map();
+
+/**
+ * Mint a short-lived OAuth2 Bearer token for Google Cloud Vertex AI
+ * using Service Account JSON + jose (RS256 JWT assertion flow).
+ * Token is cached until 5 minutes before expiry.
+ */
+export async function refreshVertexToken(saJson, log) {
+  const cacheKey = saJson.client_email;
+  const cached = vertexTokenCache.get(cacheKey);
+
+  // Return cached token if still valid (5-min buffer)
+  if (cached && cached.expiresAt - Date.now() > 5 * 60 * 1000) {
+    return { accessToken: cached.token, expiresAt: cached.expiresAt };
+  }
+
+  try {
+    const { SignJWT, importPKCS8 } = await import("jose");
+    log?.debug?.("TOKEN_REFRESH", `Vertex minting token for ${saJson.client_email}`);
+    const privateKey = await importPKCS8(saJson.private_key.replace(/\\n/g, "\n"), "RS256");
+    const now = Math.floor(Date.now() / 1000);
+
+    const jwt = await new SignJWT({ scope: "https://www.googleapis.com/auth/cloud-platform" })
+      .setProtectedHeader({ alg: "RS256" })
+      .setIssuer(saJson.client_email)
+      .setAudience("https://oauth2.googleapis.com/token")
+      .setIssuedAt(now)
+      .setExpirationTime(now + 3600)
+      .sign(privateKey);
+
+    const res = await fetch("https://oauth2.googleapis.com/token", {
+      method: "POST",
+      headers: { "Content-Type": "application/x-www-form-urlencoded" },
+      body: new URLSearchParams({
+        grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer",
+        assertion: jwt,
+      }),
+    });
+
+    if (!res.ok) {
+      const err = await res.text();
+      log?.error?.("TOKEN_REFRESH", `Vertex token mint failed: ${err}`);
+      return null;
+    }
+
+    const { access_token, expires_in } = await res.json();
+    const expiresAt = Date.now() + (expires_in ?? 3600) * 1000;
+
+    vertexTokenCache.set(cacheKey, { token: access_token, expiresAt });
+    log?.info?.("TOKEN_REFRESH", `Vertex token minted for ${saJson.client_email}`);
+
+    return { accessToken: access_token, expiresAt };
+  } catch (error) {
+    log?.error?.("TOKEN_REFRESH", `Vertex token error: ${error.message}`);
+    return null;
+  }
+}
+
 /**
 * Refresh token with retry and exponential backoff
 * Retries on failure with increasing delay: 1s, 2s, 3s...
--- a/public/providers/vertex-partner.png
+++ b/public/providers/vertex-partner.png
--- a/public/providers/vertex.png
+++ b/public/providers/vertex.png
--- a/src/app/(dashboard)/dashboard/providers/[id]/page.js
+++ b/src/app/(dashboard)/dashboard/providers/[id]/page.js
@@ -1685,6 +1685,7 @@ function AddApiKeyModal({ isOpen, provider, providerName, isCompatible, isAnthro
        priority: formData.priority,
        proxyPoolId: formData.proxyPoolId === NONE_PROXY_POOL_VALUE ? null : formData.proxyPoolId,
        testStatus: isValid ? "active" : "unknown",
+        providerSpecificData: undefined
      });
    } finally {
      setSaving(false);
--- a/src/app/api/providers/[id]/models/route.js
+++ b/src/app/api/providers/[id]/models/route.js
@@ -3,7 +3,7 @@ import { getProviderConnectionById } from "@/models";
 import { isOpenAICompatibleProvider, isAnthropicCompatibleProvider } from "@/shared/constants/providers";
 import { KiroService } from "@/lib/oauth/services/kiro";
 import { GEMINI_CONFIG } from "@/lib/oauth/constants/oauth";
-import { refreshGoogleToken, updateProviderCredentials } from "@/sse/services/tokenRefresh";
+import { refreshGoogleToken, updateProviderCredentials, refreshKiroToken } from "@/sse/services/tokenRefresh";

 const GEMINI_CLI_MODELS_URL = "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels";

@@ -258,22 +258,56 @@ export async function GET(request, { params }) {

    // Kiro: Try dynamic model fetching first
    if (connection.provider === "kiro") {
+      let warning;
      try {
        const kiroService = new KiroService();
        const profileArn = connection.providerSpecificData?.profileArn;
        const accessToken = connection.accessToken;
+        const refreshToken = connection.refreshToken;

        if (accessToken && profileArn) {
-          const models = await kiroService.listAvailableModels(accessToken, profileArn);
-          return NextResponse.json({
-            provider: connection.provider,
-            connectionId: connection.id,
-            models
-          });
+          try {
+            const models = await kiroService.listAvailableModels(accessToken, profileArn);
+            return NextResponse.json({
+              provider: connection.provider,
+              connectionId: connection.id,
+              models
+            });
+          } catch (error) {
+            if (error.message.includes("AccessDeniedException") && refreshToken) {
+              console.log("Kiro token invalid/expired. Attempting refresh...");
+              const refreshed = await refreshKiroToken(refreshToken, connection.providerSpecificData);
+
+              if (refreshed?.accessToken) {
+                await updateProviderCredentials(connection.id, {
+                  accessToken: refreshed.accessToken,
+                  refreshToken: refreshed.refreshToken || refreshToken,
+                  expiresIn: refreshed.expiresIn,
+                });
+
+                const models = await kiroService.listAvailableModels(refreshed.accessToken, profileArn);
+                return NextResponse.json({
+                  provider: connection.provider,
+                  connectionId: connection.id,
+                  models
+                });
+              }
+            }
+            throw error; // Let outer catch handle it
+          }
        }
      } catch (error) {
+        warning = `Failed to fetch Kiro models: ${error.message}`;
        console.log("Failed to fetch Kiro models dynamically, falling back to static:", error.message);
      }
+
+      // Return empty dynamic list so UI falls back to static provider models.
+      return NextResponse.json({
+        provider: connection.provider,
+        connectionId: connection.id,
+        models: [],
+        warning,
+      });
    }

    if (connection.provider === "gemini-cli") {
--- a/src/app/api/providers/validate/route.js
+++ b/src/app/api/providers/validate/route.js
@@ -204,6 +204,38 @@ export async function POST(request) {
          break;
        }

+        case "vertex": {
+          // Raw key: probe global endpoint (always 404 for unknown model, never 401)
+          // SA JSON: attempt token mint via JWT assertion
+          const saJson = (() => { try { const p = JSON.parse(apiKey); return p.type === "service_account" ? p : null; } catch { return null; } })();
+          if (saJson) {
+            // Validate SA JSON has required fields
+            isValid = !!(saJson.client_email && saJson.private_key && saJson.project_id);
+          } else {
+            // Raw key: probe Vertex — 404 means key is valid (model just doesn't exist), 401 means invalid key
+            const probeRes = await fetch(
+              `https://aiplatform.googleapis.com/v1/publishers/google/models/__probe__:generateContent?key=${apiKey}`,
+              { method: "POST", headers: { "Content-Type": "application/json" }, body: "{}" }
+            );
+            isValid = probeRes.status !== 401 && probeRes.status !== 403;
+          }
+          break;
+        }
+
+        case "vertex-partner": {
+          const saJson = (() => { try { const p = JSON.parse(apiKey); return p.type === "service_account" ? p : null; } catch { return null; } })();
+          if (saJson) {
+            isValid = !!(saJson.client_email && saJson.private_key && saJson.project_id);
+          } else {
+            const probeRes = await fetch(
+              `https://aiplatform.googleapis.com/v1/publishers/google/models/__probe__:generateContent?key=${apiKey}`,
+              { method: "POST", headers: { "Content-Type": "application/json" }, body: "{}" }
+            );
+            isValid = probeRes.status !== 401 && probeRes.status !== 403;
+          }
+          break;
+        }
+
        default:
          return NextResponse.json({ error: "Provider validation not supported" }, { status: 400 });
      }
--- a/src/shared/constants/providers.js
+++ b/src/shared/constants/providers.js
@@ -51,6 +51,8 @@ export const APIKEY_PROVIDERS = {
  chutes: { id: "chutes", alias: "ch", name: "Chutes AI", icon: "water_drop", color: "#ffffffff", textIcon: "CH", website: "https://chutes.ai" },
  ollama: { id: "ollama", alias: "ollama", name: "Ollama Cloud", icon: "cloud", color: "#ffffffff", textIcon: "OL", website: "https://ollama.com" },
  "ollama-local": { id: "ollama-local", alias: "ollama-local", name: "Ollama Local", icon: "cloud", color: "#ffffffff", textIcon: "OL", website: "https://ollama.com" },
+  vertex: { id: "vertex", alias: "vx", name: "Vertex AI", icon: "cloud", color: "#4285F4", textIcon: "VX", website: "https://cloud.google.com/vertex-ai" },
+  "vertex-partner": { id: "vertex-partner", alias: "vxp", name: "Vertex Partner", icon: "cloud", color: "#34A853", textIcon: "VP", website: "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models" },
 };

 export const OPENAI_COMPATIBLE_PREFIX = "openai-compatible-";
--- a/src/sse/services/tokenRefresh.js
+++ b/src/sse/services/tokenRefresh.js
@@ -19,7 +19,8 @@ import {
  getAccessToken as _getAccessToken,
  refreshTokenByProvider as _refreshTokenByProvider,
  formatProviderCredentials as _formatProviderCredentials,
-  getAllAccessTokens as _getAllAccessTokens
+  getAllAccessTokens as _getAllAccessTokens,
+  refreshKiroToken as _refreshKiroToken
 } from "open-sse/services/tokenRefresh.js";

 export const TOKEN_EXPIRY_BUFFER_MS = BUFFER_MS;
@@ -50,6 +51,9 @@ export const refreshGitHubToken = (refreshToken) =>
 export const refreshCopilotToken = (githubAccessToken) =>
  _refreshCopilotToken(githubAccessToken, log);

+export const refreshKiroToken = (refreshToken, providerSpecificData) =>
+  _refreshKiroToken(refreshToken, providerSpecificData, log);
+
 export const getAccessToken = (provider, credentials) =>
  _getAccessToken(provider, credentials, log);