mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
feat: Implement buffer addition to usage tracking for improved context handling
This commit is contained in:
@@ -3,6 +3,7 @@ import { translateRequest, needsTranslation } from "../translator/index.js";
|
||||
import { FORMATS } from "../translator/formats.js";
|
||||
import { createSSETransformStreamWithLogger, createPassthroughStreamWithLogger, COLORS } from "../utils/stream.js";
|
||||
import { createStreamController, pipeWithDisconnect } from "../utils/streamHandler.js";
|
||||
import { addBufferToUsage } from "../utils/usageTracking.js";
|
||||
import { refreshWithRetry } from "../services/tokenRefresh.js";
|
||||
import { createRequestLogger } from "../utils/requestLogger.js";
|
||||
import { getModelTargetFormat, PROVIDER_ID_TO_ALIAS } from "../config/providerModels.js";
|
||||
@@ -434,6 +435,11 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
|
||||
? translateNonStreamingResponse(responseBody, targetFormat, sourceFormat)
|
||||
: responseBody;
|
||||
|
||||
// Add buffer to usage for client (to prevent CLI context errors)
|
||||
if (translatedResponse?.usage) {
|
||||
translatedResponse.usage = addBufferToUsage(translatedResponse.usage);
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
response: new Response(JSON.stringify(translatedResponse), {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { translateResponse, initState } from "../translator/index.js";
|
||||
import { FORMATS } from "../translator/formats.js";
|
||||
import { trackPendingRequest, appendRequestLog } from "@/lib/usageDb.js";
|
||||
import { extractUsage, hasValidUsage, estimateUsage, logUsage, COLORS } from "./usageTracking.js";
|
||||
import { extractUsage, hasValidUsage, estimateUsage, logUsage, addBufferToUsage, COLORS } from "./usageTracking.js";
|
||||
|
||||
// Re-export COLORS for backward compatibility
|
||||
export { COLORS };
|
||||
@@ -138,17 +138,22 @@ export function createSSEStream(options = {}) {
|
||||
// Extract usage from chunk
|
||||
const extracted = extractUsage(parsed);
|
||||
if (extracted) {
|
||||
usage = extracted;
|
||||
usage = extracted; // Keep original usage for logging
|
||||
}
|
||||
|
||||
// Inject estimated usage into final chunk (has finish_reason but no valid usage)
|
||||
const isFinishChunk = parsed.choices?.[0]?.finish_reason;
|
||||
if (isFinishChunk && !hasValidUsage(parsed.usage)) {
|
||||
const estimated = estimateUsage(body, totalContentLength, FORMATS.OPENAI);
|
||||
parsed.usage = estimated;
|
||||
parsed.usage = estimated; // Already has buffer from formatUsage
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
usage = estimated;
|
||||
injectedUsage = true;
|
||||
} else if (isFinishChunk && usage) {
|
||||
// Add buffer to usage for client (but keep original for logging)
|
||||
parsed.usage = addBufferToUsage(usage);
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
injectedUsage = true;
|
||||
}
|
||||
} catch { }
|
||||
}
|
||||
@@ -181,16 +186,36 @@ export function createSSEStream(options = {}) {
|
||||
}
|
||||
|
||||
// Track content length for estimation (from various formats)
|
||||
const content = parsed.delta?.text || // Claude
|
||||
parsed.choices?.[0]?.delta?.content || // OpenAI
|
||||
parsed.candidates?.[0]?.content?.parts?.[0]?.text; // Gemini
|
||||
if (content && typeof content === "string") {
|
||||
totalContentLength += content.length;
|
||||
// Include both regular content and reasoning/thinking content
|
||||
|
||||
// Claude format
|
||||
if (parsed.delta?.text) {
|
||||
totalContentLength += parsed.delta.text.length;
|
||||
}
|
||||
if (parsed.delta?.thinking) {
|
||||
totalContentLength += parsed.delta.thinking.length;
|
||||
}
|
||||
|
||||
// OpenAI format
|
||||
if (parsed.choices?.[0]?.delta?.content) {
|
||||
totalContentLength += parsed.choices[0].delta.content.length;
|
||||
}
|
||||
if (parsed.choices?.[0]?.delta?.reasoning_content) {
|
||||
totalContentLength += parsed.choices[0].delta.reasoning_content.length;
|
||||
}
|
||||
|
||||
// Gemini format - may have multiple parts
|
||||
if (parsed.candidates?.[0]?.content?.parts) {
|
||||
for (const part of parsed.candidates[0].content.parts) {
|
||||
if (part.text && typeof part.text === "string") {
|
||||
totalContentLength += part.text.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract usage
|
||||
const extracted = extractUsage(parsed);
|
||||
if (extracted) state.usage = extracted;
|
||||
if (extracted) state.usage = extracted; // Keep original usage for logging
|
||||
|
||||
// Translate: targetFormat -> openai -> sourceFormat
|
||||
const translated = translateResponse(targetFormat, sourceFormat, parsed, state);
|
||||
@@ -209,8 +234,11 @@ export function createSSEStream(options = {}) {
|
||||
const isFinishChunk = item.type === "message_delta" || item.choices?.[0]?.finish_reason;
|
||||
if (state.finishReason && isFinishChunk && !hasValidUsage(item.usage) && totalContentLength > 0) {
|
||||
const estimated = estimateUsage(body, totalContentLength, sourceFormat);
|
||||
item.usage = estimated;
|
||||
item.usage = estimated; // Already has buffer from formatUsage
|
||||
state.usage = estimated;
|
||||
} else if (state.finishReason && isFinishChunk && state.usage) {
|
||||
// Add buffer to usage for client (but keep original in state.usage for logging)
|
||||
item.usage = addBufferToUsage(state.usage);
|
||||
}
|
||||
|
||||
const output = formatSSE(item, sourceFormat);
|
||||
|
||||
@@ -15,11 +15,42 @@ export const COLORS = {
|
||||
cyan: "\x1b[36m"
|
||||
};
|
||||
|
||||
// Buffer tokens to prevent context errors
|
||||
const BUFFER_TOKENS = 2000;
|
||||
|
||||
// Get HH:MM:SS timestamp
|
||||
function getTimeString() {
|
||||
return new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit" });
|
||||
}
|
||||
|
||||
/**
|
||||
* Add buffer tokens to usage to prevent context errors
|
||||
* @param {object} usage - Usage object (any format)
|
||||
* @returns {object} Usage with buffer added
|
||||
*/
|
||||
export function addBufferToUsage(usage) {
|
||||
if (!usage || typeof usage !== "object") return usage;
|
||||
|
||||
const result = { ...usage };
|
||||
|
||||
// Claude format
|
||||
if (result.input_tokens !== undefined) {
|
||||
result.input_tokens += BUFFER_TOKENS;
|
||||
}
|
||||
|
||||
// OpenAI format
|
||||
if (result.prompt_tokens !== undefined) {
|
||||
result.prompt_tokens += BUFFER_TOKENS;
|
||||
}
|
||||
|
||||
// Update total_tokens if exists
|
||||
if (result.total_tokens !== undefined) {
|
||||
result.total_tokens += BUFFER_TOKENS;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize usage object - ensure all values are valid numbers
|
||||
*/
|
||||
@@ -120,43 +151,22 @@ export function extractUsage(chunk) {
|
||||
|
||||
/**
|
||||
* Estimate input tokens from request body
|
||||
* Calculate total body size for more accurate estimation
|
||||
*/
|
||||
export function estimateInputTokens(body) {
|
||||
if (!body || typeof body !== "object") return 0;
|
||||
|
||||
let totalChars = 0;
|
||||
try {
|
||||
// Calculate total body size (includes messages, tools, system, thinking config, etc.)
|
||||
const bodyStr = JSON.stringify(body);
|
||||
const totalChars = bodyStr.length;
|
||||
|
||||
// Count messages
|
||||
if (Array.isArray(body.messages)) {
|
||||
for (const msg of body.messages) {
|
||||
if (msg.content) {
|
||||
if (typeof msg.content === "string") {
|
||||
totalChars += msg.content.length;
|
||||
} else if (Array.isArray(msg.content)) {
|
||||
for (const part of msg.content) {
|
||||
if (part.text) totalChars += part.text.length;
|
||||
if (part.type === "image_url") totalChars += 85; // Rough estimate for images
|
||||
}
|
||||
}
|
||||
}
|
||||
if (msg.role) totalChars += msg.role.length;
|
||||
}
|
||||
// Estimate: ~4 chars per token (rough average across all tokenizers)
|
||||
return Math.ceil(totalChars / 4);
|
||||
} catch (err) {
|
||||
// Fallback if stringify fails
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Count tools/functions
|
||||
if (Array.isArray(body.tools)) {
|
||||
totalChars += JSON.stringify(body.tools).length;
|
||||
} else if (Array.isArray(body.functions)) {
|
||||
totalChars += JSON.stringify(body.functions).length;
|
||||
}
|
||||
|
||||
// Count system prompt
|
||||
if (body.system) {
|
||||
totalChars += typeof body.system === "string" ? body.system.length : JSON.stringify(body.system).length;
|
||||
}
|
||||
|
||||
// Estimate: ~4 chars per token (rough average across all tokenizers)
|
||||
return Math.ceil(totalChars / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -176,16 +186,20 @@ export function estimateOutputTokens(contentLength) {
|
||||
export function formatUsage(inputTokens, outputTokens, targetFormat) {
|
||||
// Claude format uses input_tokens/output_tokens
|
||||
if (targetFormat === FORMATS.CLAUDE) {
|
||||
return { input_tokens: inputTokens, output_tokens: outputTokens, estimated: true };
|
||||
return addBufferToUsage({
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens,
|
||||
estimated: true
|
||||
});
|
||||
}
|
||||
|
||||
// Default: OpenAI format (works for openai, gemini, responses, etc.)
|
||||
return {
|
||||
return addBufferToUsage({
|
||||
prompt_tokens: inputTokens,
|
||||
completion_tokens: outputTokens,
|
||||
total_tokens: inputTokens + outputTokens,
|
||||
estimated: true
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user