feat: Implement buffer addition to usage tracking for improved context handling

2026-05-08 12:01:28 +00:00 · 2026-02-03 10:39:20 +07:00
parent df0e1d6485
commit 7881db81ec
3 changed files with 92 additions and 44 deletions
--- a/open-sse/handlers/chatCore.js
+++ b/open-sse/handlers/chatCore.js
@@ -3,6 +3,7 @@ import { translateRequest, needsTranslation } from "../translator/index.js";
 import { FORMATS } from "../translator/formats.js";
 import { createSSETransformStreamWithLogger, createPassthroughStreamWithLogger, COLORS } from "../utils/stream.js";
 import { createStreamController, pipeWithDisconnect } from "../utils/streamHandler.js";
+import { addBufferToUsage } from "../utils/usageTracking.js";
 import { refreshWithRetry } from "../services/tokenRefresh.js";
 import { createRequestLogger } from "../utils/requestLogger.js";
 import { getModelTargetFormat, PROVIDER_ID_TO_ALIAS } from "../config/providerModels.js";
@@ -434,6 +435,11 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
      ? translateNonStreamingResponse(responseBody, targetFormat, sourceFormat)
      : responseBody;

+    // Add buffer to usage for client (to prevent CLI context errors)
+    if (translatedResponse?.usage) {
+      translatedResponse.usage = addBufferToUsage(translatedResponse.usage);
+    }
+
    return {
      success: true,
      response: new Response(JSON.stringify(translatedResponse), {
--- a/open-sse/utils/stream.js
+++ b/open-sse/utils/stream.js
@@ -1,7 +1,7 @@
 import { translateResponse, initState } from "../translator/index.js";
 import { FORMATS } from "../translator/formats.js";
 import { trackPendingRequest, appendRequestLog } from "@/lib/usageDb.js";
-import { extractUsage, hasValidUsage, estimateUsage, logUsage, COLORS } from "./usageTracking.js";
+import { extractUsage, hasValidUsage, estimateUsage, logUsage, addBufferToUsage, COLORS } from "./usageTracking.js";

 // Re-export COLORS for backward compatibility
 export { COLORS };
@@ -138,17 +138,22 @@ export function createSSEStream(options = {}) {
              // Extract usage from chunk
              const extracted = extractUsage(parsed);
              if (extracted) {
-                usage = extracted;
+                usage = extracted; // Keep original usage for logging
              }

              // Inject estimated usage into final chunk (has finish_reason but no valid usage)
              const isFinishChunk = parsed.choices?.[0]?.finish_reason;
              if (isFinishChunk && !hasValidUsage(parsed.usage)) {
                const estimated = estimateUsage(body, totalContentLength, FORMATS.OPENAI);
-                parsed.usage = estimated;
+                parsed.usage = estimated; // Already has buffer from formatUsage
                output = `data: ${JSON.stringify(parsed)}\n`;
                usage = estimated;
                injectedUsage = true;
+              } else if (isFinishChunk && usage) {
+                // Add buffer to usage for client (but keep original for logging)
+                parsed.usage = addBufferToUsage(usage);
+                output = `data: ${JSON.stringify(parsed)}\n`;
+                injectedUsage = true;
              }
            } catch { }
          }
@@ -181,16 +186,36 @@ export function createSSEStream(options = {}) {
        }

        // Track content length for estimation (from various formats)
-        const content = parsed.delta?.text || // Claude
-          parsed.choices?.[0]?.delta?.content || // OpenAI
-          parsed.candidates?.[0]?.content?.parts?.[0]?.text; // Gemini
-        if (content && typeof content === "string") {
-          totalContentLength += content.length;
+        // Include both regular content and reasoning/thinking content
+        
+        // Claude format
+        if (parsed.delta?.text) {
+          totalContentLength += parsed.delta.text.length;
+        }
+        if (parsed.delta?.thinking) {
+          totalContentLength += parsed.delta.thinking.length;
+        }
+        
+        // OpenAI format
+        if (parsed.choices?.[0]?.delta?.content) {
+          totalContentLength += parsed.choices[0].delta.content.length;
+        }
+        if (parsed.choices?.[0]?.delta?.reasoning_content) {
+          totalContentLength += parsed.choices[0].delta.reasoning_content.length;
+        }
+        
+        // Gemini format - may have multiple parts
+        if (parsed.candidates?.[0]?.content?.parts) {
+          for (const part of parsed.candidates[0].content.parts) {
+            if (part.text && typeof part.text === "string") {
+              totalContentLength += part.text.length;
+            }
+          }
        }

        // Extract usage
        const extracted = extractUsage(parsed);
-        if (extracted) state.usage = extracted;
+        if (extracted) state.usage = extracted; // Keep original usage for logging

        // Translate: targetFormat -> openai -> sourceFormat
        const translated = translateResponse(targetFormat, sourceFormat, parsed, state);
@@ -209,8 +234,11 @@ export function createSSEStream(options = {}) {
            const isFinishChunk = item.type === "message_delta" || item.choices?.[0]?.finish_reason;
            if (state.finishReason && isFinishChunk && !hasValidUsage(item.usage) && totalContentLength > 0) {
              const estimated = estimateUsage(body, totalContentLength, sourceFormat);
-              item.usage = estimated;
+              item.usage = estimated; // Already has buffer from formatUsage
              state.usage = estimated;
+            } else if (state.finishReason && isFinishChunk && state.usage) {
+              // Add buffer to usage for client (but keep original in state.usage for logging)
+              item.usage = addBufferToUsage(state.usage);
            }

            const output = formatSSE(item, sourceFormat);
--- a/open-sse/utils/usageTracking.js
+++ b/open-sse/utils/usageTracking.js
@@ -15,11 +15,42 @@ export const COLORS = {
  cyan: "\x1b[36m"
 };

+// Buffer tokens to prevent context errors
+const BUFFER_TOKENS = 2000;
+
 // Get HH:MM:SS timestamp
 function getTimeString() {
  return new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit" });
 }

+/**
+ * Add buffer tokens to usage to prevent context errors
+ * @param {object} usage - Usage object (any format)
+ * @returns {object} Usage with buffer added
+ */
+export function addBufferToUsage(usage) {
+  if (!usage || typeof usage !== "object") return usage;
+
+  const result = { ...usage };
+
+  // Claude format
+  if (result.input_tokens !== undefined) {
+    result.input_tokens += BUFFER_TOKENS;
+  }
+
+  // OpenAI format
+  if (result.prompt_tokens !== undefined) {
+    result.prompt_tokens += BUFFER_TOKENS;
+  }
+
+  // Update total_tokens if exists
+  if (result.total_tokens !== undefined) {
+    result.total_tokens += BUFFER_TOKENS;
+  }
+
+  return result;
+}
+
 /**
 * Normalize usage object - ensure all values are valid numbers
 */
@@ -120,43 +151,22 @@ export function extractUsage(chunk) {

 /**
 * Estimate input tokens from request body
+ * Calculate total body size for more accurate estimation
 */
 export function estimateInputTokens(body) {
  if (!body || typeof body !== "object") return 0;

-  let totalChars = 0;
+  try {
+    // Calculate total body size (includes messages, tools, system, thinking config, etc.)
+    const bodyStr = JSON.stringify(body);
+    const totalChars = bodyStr.length;

-  // Count messages
-  if (Array.isArray(body.messages)) {
-    for (const msg of body.messages) {
-      if (msg.content) {
-        if (typeof msg.content === "string") {
-          totalChars += msg.content.length;
-        } else if (Array.isArray(msg.content)) {
-          for (const part of msg.content) {
-            if (part.text) totalChars += part.text.length;
-            if (part.type === "image_url") totalChars += 85; // Rough estimate for images
-          }
-        }
-      }
-      if (msg.role) totalChars += msg.role.length;
-    }
+    // Estimate: ~4 chars per token (rough average across all tokenizers)
+    return Math.ceil(totalChars / 4);
+  } catch (err) {
+    // Fallback if stringify fails
+    return 0;
  }
-
-  // Count tools/functions
-  if (Array.isArray(body.tools)) {
-    totalChars += JSON.stringify(body.tools).length;
-  } else if (Array.isArray(body.functions)) {
-    totalChars += JSON.stringify(body.functions).length;
-  }
-
-  // Count system prompt
-  if (body.system) {
-    totalChars += typeof body.system === "string" ? body.system.length : JSON.stringify(body.system).length;
-  }
-
-  // Estimate: ~4 chars per token (rough average across all tokenizers)
-  return Math.ceil(totalChars / 4);
 }

 /**
@@ -176,16 +186,20 @@ export function estimateOutputTokens(contentLength) {
 export function formatUsage(inputTokens, outputTokens, targetFormat) {
  // Claude format uses input_tokens/output_tokens
  if (targetFormat === FORMATS.CLAUDE) {
-    return { input_tokens: inputTokens, output_tokens: outputTokens, estimated: true };
+    return addBufferToUsage({ 
+      input_tokens: inputTokens, 
+      output_tokens: outputTokens, 
+      estimated: true 
+    });
  }

  // Default: OpenAI format (works for openai, gemini, responses, etc.)
-  return {
+  return addBufferToUsage({
    prompt_tokens: inputTokens,
    completion_tokens: outputTokens,
    total_tokens: inputTokens + outputTokens,
    estimated: true
-  };
+  });
 }

 /**