Enhance chat handling and introduce Caveman feature

- Refactored handleChatCore to include Caveman functionality, allowing for terse-style system prompts to reduce output token usage.
- Updated APIPageClient to manage Caveman settings, including enabling/disabling and selecting compression levels.
- Adjusted AntigravityExecutor to consolidate function declarations for compatibility with Gemini.
- Removed unnecessary console logs during translator initialization across multiple routes.
This commit is contained in:
decolua
2026-04-30 18:00:38 +07:00
parent b3fb20e237
commit 936d65ae1c
14 changed files with 265 additions and 48 deletions

View File

@@ -67,24 +67,17 @@ export class AntigravityExecutor extends BaseExecutor {
let tools = body.request?.tools;
if (tools && tools.length > 0) {
tools = tools
.map(group => {
if (!group.functionDeclarations) return group;
const cleanedDeclarations = group.functionDeclarations.map(fn => ({
...fn,
name: sanitizeFunctionName(fn.name),
parameters: fn.parameters
? cleanJSONSchemaForAntigravity(structuredClone(fn.parameters))
: { type: "object", properties: { reason: { type: "string", description: "Brief explanation" } }, required: ["reason"] }
}));
return {
...group,
functionDeclarations: cleanedDeclarations
};
})
.filter(group => group.functionDeclarations?.length > 0)
.slice(0, 1);
// Merge all groups into a single functionDeclarations group (Gemini expects 1 group)
const allDeclarations = tools.flatMap(group =>
(group.functionDeclarations || []).map(fn => ({
...fn,
name: sanitizeFunctionName(fn.name),
parameters: fn.parameters
? cleanJSONSchemaForAntigravity(structuredClone(fn.parameters))
: { type: "object", properties: { reason: { type: "string", description: "Brief explanation" } }, required: ["reason"] }
}))
);
tools = allDeclarations.length > 0 ? [{ functionDeclarations: allDeclarations }] : [];
}
const { tools: _originalTools, toolConfig: _originalToolConfig, ...requestWithoutTools } = body.request || {};

View File

@@ -16,6 +16,8 @@ import { handleForcedSSEToJson } from "./chatCore/sseToJsonHandler.js";
import { handleNonStreamingResponse } from "./chatCore/nonStreamingHandler.js";
import { handleStreamingResponse, buildOnStreamComplete } from "./chatCore/streamingHandler.js";
import { detectClientTool, isNativePassthrough } from "../utils/clientDetector.js";
import { injectCaveman } from "../rtk/caveman.js";
import { compressMessages, formatRtkLog } from "../rtk/index.js";
/**
* Core chat handler - shared between SSE and Worker
@@ -24,7 +26,7 @@ import { detectClientTool, isNativePassthrough } from "../utils/clientDetector.j
* @param {object} options.credentials - Provider credentials
* @param {string} options.sourceFormatOverride - Override detected source format (e.g. "openai-responses")
*/
export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, rtkEnabled, sourceFormatOverride, providerThinking }) {
export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, rtkEnabled, cavemanEnabled, cavemanLevel, sourceFormatOverride, providerThinking }) {
const { provider, model } = modelInfo;
const requestStartTime = Date.now();
@@ -82,7 +84,7 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
log?.debug?.("PASSTHROUGH", `${clientTool}${provider} | native lossless`);
translatedBody = { ...body, model };
} else {
translatedBody = translateRequest(sourceFormat, targetFormat, model, body, stream, credentials, provider, reqLogger, stripList, connectionId, rtkEnabled, clientTool);
translatedBody = translateRequest(sourceFormat, targetFormat, model, body, stream, credentials, provider, reqLogger, stripList, connectionId, clientTool);
if (!translatedBody) {
trackPendingRequest(model, provider, connectionId, false, true);
return createErrorResult(HTTP_STATUS.BAD_REQUEST, `Failed to translate request for ${sourceFormat}${targetFormat}`);
@@ -92,6 +94,21 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
translatedBody.model = model;
}
// Token savers: applied at the final body just before dispatch
// Covers both passthrough (source shape) and translated (target shape) flows
const finalFormat = passthrough ? sourceFormat : targetFormat;
// RTK: compress tool_result content
const rtkStats = compressMessages(translatedBody, rtkEnabled);
const rtkLine = formatRtkLog(rtkStats);
if (rtkLine) console.log(rtkLine);
// Caveman: inject terse-style system prompt
if (cavemanEnabled && cavemanLevel) {
injectCaveman(translatedBody, finalFormat, cavemanLevel);
log?.debug?.("CAVEMAN", `${cavemanLevel} | ${finalFormat}`);
}
const executor = getExecutor(provider);
trackPendingRequest(model, provider, connectionId, true);
appendRequestLog({ model, provider, connectionId, status: "PENDING" }).catch(() => {});

100
open-sse/rtk/caveman.js Normal file
View File

@@ -0,0 +1,100 @@
// Caveman injector: appends a caveman-style instruction into the system message
// of the final request body, just before it is dispatched to the provider executor.
// Dispatches by format so it works for both translated and native-passthrough flows.
import { FORMATS } from "../translator/formats.js";
import { CAVEMAN_PROMPTS } from "./cavemanPrompts.js";
const SEP = "\n\n";
export function injectCaveman(body, format, level) {
const prompt = CAVEMAN_PROMPTS[level];
if (!body || !prompt) return;
switch (format) {
case FORMATS.CLAUDE:
injectClaudeSystem(body, prompt);
return;
case FORMATS.GEMINI:
case FORMATS.GEMINI_CLI:
case FORMATS.VERTEX:
case FORMATS.ANTIGRAVITY:
// Antigravity wraps Gemini shape in body.request → injectGeminiSystem handles it
injectGeminiSystem(body, prompt);
return;
default:
// OpenAI and OpenAI-shaped formats (responses/codex/cursor/kiro/ollama)
injectMessagesSystem(body, prompt);
}
}
// OpenAI-shaped: messages[] (chat) or input[] (responses) or instructions (responses string)
function injectMessagesSystem(body, prompt) {
// OpenAI Responses API: top-level string field
if (typeof body.instructions === "string") {
body.instructions = body.instructions
? `${body.instructions}${SEP}${prompt}`
: prompt;
return;
}
const arr = Array.isArray(body.messages) ? body.messages
: Array.isArray(body.input) ? body.input
: null;
if (!arr) return;
const idx = arr.findIndex(m => m && (m.role === "system" || m.role === "developer"));
if (idx >= 0) {
appendToOpenAIMessage(arr[idx], prompt);
} else {
arr.unshift({ role: "system", content: prompt });
}
}
function appendToOpenAIMessage(msg, prompt) {
if (typeof msg.content === "string") {
msg.content = `${msg.content}${SEP}${prompt}`;
} else if (Array.isArray(msg.content)) {
// Responses-style array of parts {type:"input_text"|"text", text}
msg.content.push({ type: "input_text", text: prompt });
} else {
msg.content = prompt;
}
}
// Claude shape: body.system as string | array of {type:"text", text}
// Insert before the last cache_control block to keep caveman inside the cached prefix.
function injectClaudeSystem(body, prompt) {
if (typeof body.system === "string" && body.system.length > 0) {
body.system = `${body.system}${SEP}${prompt}`;
return;
}
if (Array.isArray(body.system)) {
const block = { type: "text", text: prompt };
let lastCacheIdx = -1;
for (let i = body.system.length - 1; i >= 0; i--) {
if (body.system[i]?.cache_control) { lastCacheIdx = i; break; }
}
if (lastCacheIdx >= 0) {
body.system.splice(lastCacheIdx, 0, block);
} else {
body.system.push(block);
}
return;
}
body.system = prompt;
}
// Gemini shape: body.system_instruction | body.systemInstruction | body.request.systemInstruction
// Each shape: { parts: [{ text }] }
function injectGeminiSystem(body, prompt) {
const target = body.request && typeof body.request === "object" ? body.request : body;
const useSnake = Object.prototype.hasOwnProperty.call(target, "system_instruction");
const key = useSnake ? "system_instruction" : "systemInstruction";
const sys = target[key];
if (sys && Array.isArray(sys.parts)) {
sys.parts.push({ text: prompt });
return;
}
target[key] = { parts: [{ text: prompt }] };
}

View File

@@ -0,0 +1,35 @@
// Caveman intensity-level prompts injected into system message to reduce output tokens.
// Adapted from caveman skill (https://github.com/JuliusBrussee/caveman).
export const CAVEMAN_LEVELS = {
LITE: "lite",
FULL: "full",
ULTRA: "ultra",
};
const SHARED_BOUNDARIES = "Code blocks, file paths, commands, errors, URLs: keep exact. Security warnings, irreversible action confirmations, multi-step ordered sequences: write normal. Resume terse style after.";
export const CAVEMAN_PROMPTS = {
[CAVEMAN_LEVELS.LITE]: [
"Respond tersely. Keep grammar and full sentences but drop filler, hedging and pleasantries (just/really/basically/sure/of course/I'd be happy to).",
"Pattern: state the thing, the action, the reason. Then next step.",
SHARED_BOUNDARIES,
"Active every response until user asks for normal mode.",
].join(" "),
[CAVEMAN_LEVELS.FULL]: [
"Respond like terse caveman. All technical substance stay exact, only fluff die.",
"Drop: articles (a/an/the), filler (just/really/basically/actually/simply), pleasantries, hedging. Fragments OK. Short synonyms (big not extensive, fix not implement a solution for).",
"Pattern: [thing] [action] [reason]. [next step].",
SHARED_BOUNDARIES,
"Active every response until user asks for normal mode.",
].join(" "),
[CAVEMAN_LEVELS.ULTRA]: [
"Respond ultra-terse. Maximum compression. Telegraphic.",
"Abbreviate (DB/auth/config/req/res/fn/impl), strip conjunctions, use arrows for causality (X → Y). One word when one word enough.",
"Pattern: [thing] → [result]. [fix].",
SHARED_BOUNDARIES,
"Active every response until user asks for normal mode.",
].join(" "),
};

View File

@@ -5,7 +5,6 @@ import { cloakClaudeTools } from "../utils/claudeCloaking.js";
import { filterToOpenAIFormat } from "./helpers/openaiHelper.js";
import { normalizeThinkingConfig } from "../services/provider.js";
import { AntigravityExecutor } from "../executors/antigravity.js";
import { compressMessages, formatRtkLog } from "../rtk/index.js";
// Registry for translators
const requestRegistry = new Map();
@@ -71,17 +70,10 @@ function stripContentTypes(body, stripList = []) {
}
// Translate request: source -> openai -> target
export function translateRequest(sourceFormat, targetFormat, model, body, stream = true, credentials = null, provider = null, reqLogger = null, stripList = [], connectionId = null, rtkEnabled = false, clientTool = null) {
export function translateRequest(sourceFormat, targetFormat, model, body, stream = true, credentials = null, provider = null, reqLogger = null, stripList = [], connectionId = null, clientTool = null) {
ensureInitialized();
let result = body;
// RTK: compress tool_result content before any translation (shape-agnostic)
const rtkStats = compressMessages(result, rtkEnabled);
if (rtkStats) {
const line = formatRtkLog(rtkStats);
if (line) console.log(line);
}
// Strip explicit content types (opt-in via strip[] in PROVIDER_MODELS entry)
stripContentTypes(result, stripList);