feat: Add Google Cloud Vertex AI provider support (vertex, vertex-partner)

Co-authored-by: Quan <quanle96@outlook.com>
PR: https://github.com/decolua/9router/pull/298

Thanks to @kwanLeeFrmVi for the original implementation. Here is a summary
of changes made during review integration:

- Replaced google-auth-library with jose (already a project dependency)
  for SA JSON -> OAuth2 Bearer token minting (RS256 JWT assertion flow)
- Moved auth logic (parseSaJson, refreshVertexToken, token cache) from
  executor into open-sse/services/tokenRefresh.js to match project pattern
- Fixed executor to use proxyAwareFetch instead of raw fetch (proxy support)
- Simplified buildUrl: use global aiplatform.googleapis.com endpoint for
  both vertex (Gemini) and vertex-partner; removed region/modelFamily fields
- Added auto-detection of GCP project_id from raw API key via probe request
  (vertex-partner only, cached per key)
- Added vertex/vertex-partner cases to /api/providers/validate/route.js
- Updated model lists based on live testing:
  - vertex: gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview,
    gemini-3-flash-preview, gemini-2.5-flash (removed gemini-2.5-pro: 404)
  - vertex-partner: deepseek-v3.2, qwen3-next-80b (instruct+thinking),
    glm-5 (removed Mistral/Llama: not enabled in test project)
  - gemini provider: added gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview
- Removed bun.lock (project uses npm/package-lock.json)
- Removed region and modelFamily UI fields (global endpoint, auto-detect)
- Kiro token auto-refresh on AccessDeniedException (from commit 2)

Made-with: Cursor
This commit is contained in:
Quan
2026-03-14 11:37:23 +07:00
committed by decolua
parent 05fc8e9ed9
commit 39f651f5be
14 changed files with 333 additions and 9 deletions

View File

@@ -166,7 +166,10 @@ export const PROVIDER_MODELS = {
{ id: "claude-3-5-sonnet-20241022", name: "Claude 3.5 Sonnet" },
],
gemini: [
{ id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
{ id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
{ id: "gemini-3-pro-preview", name: "Gemini 3 Pro Preview" },
{ id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
{ id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
@@ -311,6 +314,18 @@ export const PROVIDER_MODELS = {
{ id: "glm-4.7-flash", name: "GLM 4.7 Flash" },
{ id: "qwen3.5", name: "Qwen3.5" },
],
vertex: [
{ id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
{ id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
{ id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
],
"vertex-partner": [
{ id: "deepseek-ai/deepseek-v3.2-maas", name: "DeepSeek V3.2 (Vertex)" },
{ id: "qwen/qwen3-next-80b-a3b-thinking-maas", name: "Qwen3 Next 80B Thinking (Vertex)" },
{ id: "qwen/qwen3-next-80b-a3b-instruct-maas", name: "Qwen3 Next 80B Instruct (Vertex)" },
{ id: "zai-org/glm-5-maas", name: "GLM-5 (Vertex)" },
],
};
// Helper functions
@@ -358,6 +373,8 @@ const OAUTH_ALIASES = {
"kimi-coding": "kmc",
kilocode: "kc",
cline: "cl",
vertex: "vertex",
"vertex-partner": "vertex-partner",
};
// Derived from PROVIDERS — no need to maintain manually

View File

@@ -298,4 +298,16 @@ export const PROVIDERS = {
baseUrl: "http://localhost:11434/api/chat",
format: "ollama"
},
// Vertex AI - Gemini models via Service Account JSON
// baseUrl is not used; VertexExecutor.buildUrl() constructs it dynamically
vertex: {
baseUrl: "https://aiplatform.googleapis.com",
format: "gemini"
},
// Vertex AI - Partner models (Claude, Llama, Mistral, GLM) via SA JSON
// Uses OpenAI-compatible global endpoint (or rawPredict for Anthropic)
"vertex-partner": {
baseUrl: "https://aiplatform.googleapis.com",
format: "openai"
},
};

View File

@@ -5,6 +5,7 @@ import { IFlowExecutor } from "./iflow.js";
import { KiroExecutor } from "./kiro.js";
import { CodexExecutor } from "./codex.js";
import { CursorExecutor } from "./cursor.js";
import { VertexExecutor } from "./vertex.js";
import { DefaultExecutor } from "./default.js";
const executors = {
@@ -15,7 +16,9 @@ const executors = {
kiro: new KiroExecutor(),
codex: new CodexExecutor(),
cursor: new CursorExecutor(),
cu: new CursorExecutor() // Alias for cursor
cu: new CursorExecutor(), // Alias for cursor
vertex: new VertexExecutor("vertex"),
"vertex-partner": new VertexExecutor("vertex-partner"),
};
const defaultCache = new Map();
@@ -38,4 +41,5 @@ export { IFlowExecutor } from "./iflow.js";
export { KiroExecutor } from "./kiro.js";
export { CodexExecutor } from "./codex.js";
export { CursorExecutor } from "./cursor.js";
export { VertexExecutor } from "./vertex.js";
export { DefaultExecutor } from "./default.js";

View File

@@ -0,0 +1,120 @@
import { BaseExecutor } from "./base.js";
import { PROVIDERS } from "../config/providers.js";
import { parseVertexSaJson, refreshVertexToken } from "../services/tokenRefresh.js";
import { proxyAwareFetch } from "../utils/proxyFetch.js";
// Cache project IDs resolved from raw API keys { apiKey → projectId }
const projectIdCache = new Map();
/**
* Resolve GCP project ID from a raw Vertex API key.
* Sends a dummy 404 request and parses "projects/{id}" from the error message.
*/
async function resolveProjectId(apiKey) {
if (projectIdCache.has(apiKey)) return projectIdCache.get(apiKey);
const res = await fetch(
`https://aiplatform.googleapis.com/v1/publishers/google/models/__probe__:generateContent?key=${apiKey}`,
{ method: "POST", headers: { "Content-Type": "application/json" }, body: "{}" }
);
const json = await res.json().catch(() => null);
const msg = json?.[0]?.error?.message || json?.error?.message || "";
const match = msg.match(/projects\/([^/]+)\//);
const projectId = match?.[1] || null;
if (projectId) projectIdCache.set(apiKey, projectId);
return projectId;
}
/**
* VertexExecutor - Google Cloud Vertex AI
*
* "vertex" → Gemini models via regional/global Vertex endpoint
* "vertex-partner" → Partner models (Llama, Mistral, GLM, DeepSeek, Qwen)
* via global OpenAI-compatible endpoint
*
* Auth: SA JSON (stored as apiKey) → JWT assertion → Bearer token (via jose)
* Token is minted/cached in tokenRefresh.js, not here.
*/
export class VertexExecutor extends BaseExecutor {
constructor(providerId = "vertex") {
super(providerId, PROVIDERS[providerId] || {});
}
buildUrl(model, stream, urlIndex = 0, credentials = null) {
const saJson = parseVertexSaJson(credentials?.apiKey);
const rawKey = !saJson ? credentials?.apiKey : null;
const projectId = saJson?.project_id || credentials?.providerSpecificData?.projectId;
if (this.provider === "vertex-partner") {
// Partner models require project_id in path regardless of auth method
if (!projectId) throw new Error("Vertex partner models require a project_id. Add it in providerSpecificData or use Service Account JSON.");
const url = `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/global/endpoints/openapi/chat/completions`;
return rawKey ? `${url}?key=${rawKey}` : url;
}
// Gemini on Vertex: always use global publishers endpoint
const action = stream ? "streamGenerateContent" : "generateContent";
let url = `https://aiplatform.googleapis.com/v1/publishers/google/models/${model}:${action}`;
if (rawKey) url += `?key=${rawKey}`;
return url;
}
buildHeaders(credentials, stream = true) {
const headers = { "Content-Type": "application/json" };
// Only set Bearer token if using SA JSON flow (raw key goes in URL ?key=)
if (credentials.accessToken) {
headers["Authorization"] = `Bearer ${credentials.accessToken}`;
}
if (stream) headers["Accept"] = "text/event-stream";
return headers;
}
async refreshCredentials(credentials, log) {
const saJson = parseVertexSaJson(credentials?.apiKey);
if (!saJson) return null;
const result = await refreshVertexToken(saJson, log);
if (!result) return null;
return { accessToken: result.accessToken, expiresAt: result.expiresAt };
}
async execute({ model, body, stream, credentials, signal, log, proxyOptions = null }) {
const saJson = parseVertexSaJson(credentials?.apiKey);
// SA JSON flow: mint Bearer token (cached)
if (saJson) {
const result = await refreshVertexToken(saJson, log);
if (!result?.accessToken) throw new Error("Vertex: failed to mint access token from Service Account JSON");
credentials.accessToken = result.accessToken;
}
// vertex-partner with raw key: auto-resolve project_id if not provided
if (this.provider === "vertex-partner" && !saJson && !credentials?.providerSpecificData?.projectId) {
const projectId = await resolveProjectId(credentials.apiKey);
if (!projectId) throw new Error("Vertex: could not resolve project_id from API key. Please add it manually in provider settings.");
log?.debug?.("VERTEX", `Resolved project_id: ${projectId}`);
credentials.providerSpecificData = { ...credentials.providerSpecificData, projectId };
}
const url = this.buildUrl(model, stream, 0, credentials);
const headers = this.buildHeaders(credentials, stream);
const transformedBody = this.transformRequest(model, body, stream, credentials);
const response = await proxyAwareFetch(url, {
method: "POST",
headers,
body: JSON.stringify(transformedBody),
signal,
}, proxyOptions);
return { response, url, headers, transformedBody };
}
}
export default VertexExecutor;

View File

@@ -46,6 +46,10 @@ const ALIAS_TO_PROVIDER_ID = {
ch: "chutes",
chutes: "chutes",
cursor: "cursor",
vx: "vertex",
vertex: "vertex",
vxp: "vertex-partner",
"vertex-partner": "vertex-partner",
};
/**

View File

@@ -298,6 +298,12 @@ export function buildProviderHeaders(provider, credentials, stream = true, body
// Claude-compatible API providers use x-api-key
headers["x-api-key"] = credentials.apiKey;
break;
case "vertex":
case "vertex-partner":
// Vertex uses async token minting — headers are set by VertexExecutor._buildHeadersAsync()
// Do NOT set Authorization here; it would leak the raw SA JSON as Bearer token
break;
default:
headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;

View File

@@ -497,6 +497,13 @@ export async function getAccessToken(provider, credentials, log) {
log
);
case "vertex":
case "vertex-partner": {
const saJson = parseVertexSaJson(credentials.apiKey);
if (!saJson) return null;
return await refreshVertexToken(saJson, log);
}
default:
log?.warn?.("TOKEN_REFRESH", `Unsupported provider for token refresh: ${provider}`);
return null;
@@ -534,6 +541,12 @@ export async function refreshTokenByProvider(provider, credentials, log) {
credentials.providerSpecificData,
log
);
case "vertex":
case "vertex-partner": {
const saJson = parseVertexSaJson(credentials.apiKey);
if (!saJson) return null;
return refreshVertexToken(saJson, log);
}
default:
return refreshAccessToken(provider, credentials.refreshToken, credentials, log);
}
@@ -613,6 +626,81 @@ export async function getAllAccessTokens(userInfo, log) {
return results;
}
/**
* Parse Vertex AI Service Account JSON from apiKey string
*/
export function parseVertexSaJson(apiKey) {
if (typeof apiKey !== "string") return null;
try {
const parsed = JSON.parse(apiKey);
if (parsed.type === "service_account" && parsed.client_email && parsed.private_key && parsed.project_id) {
return parsed;
}
return null;
} catch {
return null;
}
}
// Cache Vertex tokens keyed by service account email { token, expiresAt }
const vertexTokenCache = new Map();
/**
* Mint a short-lived OAuth2 Bearer token for Google Cloud Vertex AI
* using Service Account JSON + jose (RS256 JWT assertion flow).
* Token is cached until 5 minutes before expiry.
*/
export async function refreshVertexToken(saJson, log) {
const cacheKey = saJson.client_email;
const cached = vertexTokenCache.get(cacheKey);
// Return cached token if still valid (5-min buffer)
if (cached && cached.expiresAt - Date.now() > 5 * 60 * 1000) {
return { accessToken: cached.token, expiresAt: cached.expiresAt };
}
try {
const { SignJWT, importPKCS8 } = await import("jose");
log?.debug?.("TOKEN_REFRESH", `Vertex minting token for ${saJson.client_email}`);
const privateKey = await importPKCS8(saJson.private_key.replace(/\\n/g, "\n"), "RS256");
const now = Math.floor(Date.now() / 1000);
const jwt = await new SignJWT({ scope: "https://www.googleapis.com/auth/cloud-platform" })
.setProtectedHeader({ alg: "RS256" })
.setIssuer(saJson.client_email)
.setAudience("https://oauth2.googleapis.com/token")
.setIssuedAt(now)
.setExpirationTime(now + 3600)
.sign(privateKey);
const res = await fetch("https://oauth2.googleapis.com/token", {
method: "POST",
headers: { "Content-Type": "application/x-www-form-urlencoded" },
body: new URLSearchParams({
grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer",
assertion: jwt,
}),
});
if (!res.ok) {
const err = await res.text();
log?.error?.("TOKEN_REFRESH", `Vertex token mint failed: ${err}`);
return null;
}
const { access_token, expires_in } = await res.json();
const expiresAt = Date.now() + (expires_in ?? 3600) * 1000;
vertexTokenCache.set(cacheKey, { token: access_token, expiresAt });
log?.info?.("TOKEN_REFRESH", `Vertex token minted for ${saJson.client_email}`);
return { accessToken: access_token, expiresAt };
} catch (error) {
log?.error?.("TOKEN_REFRESH", `Vertex token error: ${error.message}`);
return null;
}
}
/**
* Refresh token with retry and exponential backoff
* Retries on failure with increasing delay: 1s, 2s, 3s...

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.8 KiB

BIN
public/providers/vertex.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.8 KiB

View File

@@ -1685,6 +1685,7 @@ function AddApiKeyModal({ isOpen, provider, providerName, isCompatible, isAnthro
priority: formData.priority,
proxyPoolId: formData.proxyPoolId === NONE_PROXY_POOL_VALUE ? null : formData.proxyPoolId,
testStatus: isValid ? "active" : "unknown",
providerSpecificData: undefined
});
} finally {
setSaving(false);

View File

@@ -3,7 +3,7 @@ import { getProviderConnectionById } from "@/models";
import { isOpenAICompatibleProvider, isAnthropicCompatibleProvider } from "@/shared/constants/providers";
import { KiroService } from "@/lib/oauth/services/kiro";
import { GEMINI_CONFIG } from "@/lib/oauth/constants/oauth";
import { refreshGoogleToken, updateProviderCredentials } from "@/sse/services/tokenRefresh";
import { refreshGoogleToken, updateProviderCredentials, refreshKiroToken } from "@/sse/services/tokenRefresh";
const GEMINI_CLI_MODELS_URL = "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels";
@@ -258,22 +258,56 @@ export async function GET(request, { params }) {
// Kiro: Try dynamic model fetching first
if (connection.provider === "kiro") {
let warning;
try {
const kiroService = new KiroService();
const profileArn = connection.providerSpecificData?.profileArn;
const accessToken = connection.accessToken;
const refreshToken = connection.refreshToken;
if (accessToken && profileArn) {
const models = await kiroService.listAvailableModels(accessToken, profileArn);
return NextResponse.json({
provider: connection.provider,
connectionId: connection.id,
models
});
try {
const models = await kiroService.listAvailableModels(accessToken, profileArn);
return NextResponse.json({
provider: connection.provider,
connectionId: connection.id,
models
});
} catch (error) {
if (error.message.includes("AccessDeniedException") && refreshToken) {
console.log("Kiro token invalid/expired. Attempting refresh...");
const refreshed = await refreshKiroToken(refreshToken, connection.providerSpecificData);
if (refreshed?.accessToken) {
await updateProviderCredentials(connection.id, {
accessToken: refreshed.accessToken,
refreshToken: refreshed.refreshToken || refreshToken,
expiresIn: refreshed.expiresIn,
});
const models = await kiroService.listAvailableModels(refreshed.accessToken, profileArn);
return NextResponse.json({
provider: connection.provider,
connectionId: connection.id,
models
});
}
}
throw error; // Let outer catch handle it
}
}
} catch (error) {
warning = `Failed to fetch Kiro models: ${error.message}`;
console.log("Failed to fetch Kiro models dynamically, falling back to static:", error.message);
}
// Return empty dynamic list so UI falls back to static provider models.
return NextResponse.json({
provider: connection.provider,
connectionId: connection.id,
models: [],
warning,
});
}
if (connection.provider === "gemini-cli") {

View File

@@ -204,6 +204,38 @@ export async function POST(request) {
break;
}
case "vertex": {
// Raw key: probe global endpoint (always 404 for unknown model, never 401)
// SA JSON: attempt token mint via JWT assertion
const saJson = (() => { try { const p = JSON.parse(apiKey); return p.type === "service_account" ? p : null; } catch { return null; } })();
if (saJson) {
// Validate SA JSON has required fields
isValid = !!(saJson.client_email && saJson.private_key && saJson.project_id);
} else {
// Raw key: probe Vertex — 404 means key is valid (model just doesn't exist), 401 means invalid key
const probeRes = await fetch(
`https://aiplatform.googleapis.com/v1/publishers/google/models/__probe__:generateContent?key=${apiKey}`,
{ method: "POST", headers: { "Content-Type": "application/json" }, body: "{}" }
);
isValid = probeRes.status !== 401 && probeRes.status !== 403;
}
break;
}
case "vertex-partner": {
const saJson = (() => { try { const p = JSON.parse(apiKey); return p.type === "service_account" ? p : null; } catch { return null; } })();
if (saJson) {
isValid = !!(saJson.client_email && saJson.private_key && saJson.project_id);
} else {
const probeRes = await fetch(
`https://aiplatform.googleapis.com/v1/publishers/google/models/__probe__:generateContent?key=${apiKey}`,
{ method: "POST", headers: { "Content-Type": "application/json" }, body: "{}" }
);
isValid = probeRes.status !== 401 && probeRes.status !== 403;
}
break;
}
default:
return NextResponse.json({ error: "Provider validation not supported" }, { status: 400 });
}

View File

@@ -51,6 +51,8 @@ export const APIKEY_PROVIDERS = {
chutes: { id: "chutes", alias: "ch", name: "Chutes AI", icon: "water_drop", color: "#ffffffff", textIcon: "CH", website: "https://chutes.ai" },
ollama: { id: "ollama", alias: "ollama", name: "Ollama Cloud", icon: "cloud", color: "#ffffffff", textIcon: "OL", website: "https://ollama.com" },
"ollama-local": { id: "ollama-local", alias: "ollama-local", name: "Ollama Local", icon: "cloud", color: "#ffffffff", textIcon: "OL", website: "https://ollama.com" },
vertex: { id: "vertex", alias: "vx", name: "Vertex AI", icon: "cloud", color: "#4285F4", textIcon: "VX", website: "https://cloud.google.com/vertex-ai" },
"vertex-partner": { id: "vertex-partner", alias: "vxp", name: "Vertex Partner", icon: "cloud", color: "#34A853", textIcon: "VP", website: "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models" },
};
export const OPENAI_COMPATIBLE_PREFIX = "openai-compatible-";

View File

@@ -19,7 +19,8 @@ import {
getAccessToken as _getAccessToken,
refreshTokenByProvider as _refreshTokenByProvider,
formatProviderCredentials as _formatProviderCredentials,
getAllAccessTokens as _getAllAccessTokens
getAllAccessTokens as _getAllAccessTokens,
refreshKiroToken as _refreshKiroToken
} from "open-sse/services/tokenRefresh.js";
export const TOKEN_EXPIRY_BUFFER_MS = BUFFER_MS;
@@ -50,6 +51,9 @@ export const refreshGitHubToken = (refreshToken) =>
export const refreshCopilotToken = (githubAccessToken) =>
_refreshCopilotToken(githubAccessToken, log);
export const refreshKiroToken = (refreshToken, providerSpecificData) =>
_refreshKiroToken(refreshToken, providerSpecificData, log);
export const getAccessToken = (provider, credentials) =>
_getAccessToken(provider, credentials, log);