feat: Implement buffer addition to usage tracking for improved context handling

This commit is contained in:
decolua
2026-02-03 10:39:20 +07:00
parent df0e1d6485
commit 7881db81ec
3 changed files with 92 additions and 44 deletions

View File

@@ -3,6 +3,7 @@ import { translateRequest, needsTranslation } from "../translator/index.js";
import { FORMATS } from "../translator/formats.js";
import { createSSETransformStreamWithLogger, createPassthroughStreamWithLogger, COLORS } from "../utils/stream.js";
import { createStreamController, pipeWithDisconnect } from "../utils/streamHandler.js";
import { addBufferToUsage } from "../utils/usageTracking.js";
import { refreshWithRetry } from "../services/tokenRefresh.js";
import { createRequestLogger } from "../utils/requestLogger.js";
import { getModelTargetFormat, PROVIDER_ID_TO_ALIAS } from "../config/providerModels.js";
@@ -434,6 +435,11 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
? translateNonStreamingResponse(responseBody, targetFormat, sourceFormat)
: responseBody;
// Add buffer to usage for client (to prevent CLI context errors)
if (translatedResponse?.usage) {
translatedResponse.usage = addBufferToUsage(translatedResponse.usage);
}
return {
success: true,
response: new Response(JSON.stringify(translatedResponse), {

View File

@@ -1,7 +1,7 @@
import { translateResponse, initState } from "../translator/index.js";
import { FORMATS } from "../translator/formats.js";
import { trackPendingRequest, appendRequestLog } from "@/lib/usageDb.js";
import { extractUsage, hasValidUsage, estimateUsage, logUsage, COLORS } from "./usageTracking.js";
import { extractUsage, hasValidUsage, estimateUsage, logUsage, addBufferToUsage, COLORS } from "./usageTracking.js";
// Re-export COLORS for backward compatibility
export { COLORS };
@@ -138,17 +138,22 @@ export function createSSEStream(options = {}) {
// Extract usage from chunk
const extracted = extractUsage(parsed);
if (extracted) {
usage = extracted;
usage = extracted; // Keep original usage for logging
}
// Inject estimated usage into final chunk (has finish_reason but no valid usage)
const isFinishChunk = parsed.choices?.[0]?.finish_reason;
if (isFinishChunk && !hasValidUsage(parsed.usage)) {
const estimated = estimateUsage(body, totalContentLength, FORMATS.OPENAI);
parsed.usage = estimated;
parsed.usage = estimated; // Already has buffer from formatUsage
output = `data: ${JSON.stringify(parsed)}\n`;
usage = estimated;
injectedUsage = true;
} else if (isFinishChunk && usage) {
// Add buffer to usage for client (but keep original for logging)
parsed.usage = addBufferToUsage(usage);
output = `data: ${JSON.stringify(parsed)}\n`;
injectedUsage = true;
}
} catch { }
}
@@ -181,16 +186,36 @@ export function createSSEStream(options = {}) {
}
// Track content length for estimation (from various formats)
const content = parsed.delta?.text || // Claude
parsed.choices?.[0]?.delta?.content || // OpenAI
parsed.candidates?.[0]?.content?.parts?.[0]?.text; // Gemini
if (content && typeof content === "string") {
totalContentLength += content.length;
// Include both regular content and reasoning/thinking content
// Claude format
if (parsed.delta?.text) {
totalContentLength += parsed.delta.text.length;
}
if (parsed.delta?.thinking) {
totalContentLength += parsed.delta.thinking.length;
}
// OpenAI format
if (parsed.choices?.[0]?.delta?.content) {
totalContentLength += parsed.choices[0].delta.content.length;
}
if (parsed.choices?.[0]?.delta?.reasoning_content) {
totalContentLength += parsed.choices[0].delta.reasoning_content.length;
}
// Gemini format - may have multiple parts
if (parsed.candidates?.[0]?.content?.parts) {
for (const part of parsed.candidates[0].content.parts) {
if (part.text && typeof part.text === "string") {
totalContentLength += part.text.length;
}
}
}
// Extract usage
const extracted = extractUsage(parsed);
if (extracted) state.usage = extracted;
if (extracted) state.usage = extracted; // Keep original usage for logging
// Translate: targetFormat -> openai -> sourceFormat
const translated = translateResponse(targetFormat, sourceFormat, parsed, state);
@@ -209,8 +234,11 @@ export function createSSEStream(options = {}) {
const isFinishChunk = item.type === "message_delta" || item.choices?.[0]?.finish_reason;
if (state.finishReason && isFinishChunk && !hasValidUsage(item.usage) && totalContentLength > 0) {
const estimated = estimateUsage(body, totalContentLength, sourceFormat);
item.usage = estimated;
item.usage = estimated; // Already has buffer from formatUsage
state.usage = estimated;
} else if (state.finishReason && isFinishChunk && state.usage) {
// Add buffer to usage for client (but keep original in state.usage for logging)
item.usage = addBufferToUsage(state.usage);
}
const output = formatSSE(item, sourceFormat);

View File

@@ -15,11 +15,42 @@ export const COLORS = {
cyan: "\x1b[36m"
};
// Buffer tokens to prevent context errors
const BUFFER_TOKENS = 2000;
// Get HH:MM:SS timestamp
function getTimeString() {
return new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit" });
}
/**
* Add buffer tokens to usage to prevent context errors
* @param {object} usage - Usage object (any format)
* @returns {object} Usage with buffer added
*/
export function addBufferToUsage(usage) {
if (!usage || typeof usage !== "object") return usage;
const result = { ...usage };
// Claude format
if (result.input_tokens !== undefined) {
result.input_tokens += BUFFER_TOKENS;
}
// OpenAI format
if (result.prompt_tokens !== undefined) {
result.prompt_tokens += BUFFER_TOKENS;
}
// Update total_tokens if exists
if (result.total_tokens !== undefined) {
result.total_tokens += BUFFER_TOKENS;
}
return result;
}
/**
* Normalize usage object - ensure all values are valid numbers
*/
@@ -120,43 +151,22 @@ export function extractUsage(chunk) {
/**
* Estimate input tokens from request body
* Calculate total body size for more accurate estimation
*/
export function estimateInputTokens(body) {
if (!body || typeof body !== "object") return 0;
let totalChars = 0;
try {
// Calculate total body size (includes messages, tools, system, thinking config, etc.)
const bodyStr = JSON.stringify(body);
const totalChars = bodyStr.length;
// Count messages
if (Array.isArray(body.messages)) {
for (const msg of body.messages) {
if (msg.content) {
if (typeof msg.content === "string") {
totalChars += msg.content.length;
} else if (Array.isArray(msg.content)) {
for (const part of msg.content) {
if (part.text) totalChars += part.text.length;
if (part.type === "image_url") totalChars += 85; // Rough estimate for images
}
}
}
if (msg.role) totalChars += msg.role.length;
}
// Estimate: ~4 chars per token (rough average across all tokenizers)
return Math.ceil(totalChars / 4);
} catch (err) {
// Fallback if stringify fails
return 0;
}
// Count tools/functions
if (Array.isArray(body.tools)) {
totalChars += JSON.stringify(body.tools).length;
} else if (Array.isArray(body.functions)) {
totalChars += JSON.stringify(body.functions).length;
}
// Count system prompt
if (body.system) {
totalChars += typeof body.system === "string" ? body.system.length : JSON.stringify(body.system).length;
}
// Estimate: ~4 chars per token (rough average across all tokenizers)
return Math.ceil(totalChars / 4);
}
/**
@@ -176,16 +186,20 @@ export function estimateOutputTokens(contentLength) {
export function formatUsage(inputTokens, outputTokens, targetFormat) {
// Claude format uses input_tokens/output_tokens
if (targetFormat === FORMATS.CLAUDE) {
return { input_tokens: inputTokens, output_tokens: outputTokens, estimated: true };
return addBufferToUsage({
input_tokens: inputTokens,
output_tokens: outputTokens,
estimated: true
});
}
// Default: OpenAI format (works for openai, gemini, responses, etc.)
return {
return addBufferToUsage({
prompt_tokens: inputTokens,
completion_tokens: outputTokens,
total_tokens: inputTokens + outputTokens,
estimated: true
};
});
}
/**