feat: Update response handling and logging for improved usage tracking

This commit is contained in:
decolua
2026-02-03 10:22:43 +07:00
parent a33924b336
commit df0e1d6485
7 changed files with 348 additions and 152 deletions

View File

@@ -468,9 +468,9 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
// For Codex, translate FROM openai-responses TO openai (client's expected format)
const responseSourceFormat = needsCodexTranslation ? 'openai-responses' : targetFormat;
const responseTargetFormat = needsCodexTranslation ? 'openai' : sourceFormat;
transformStream = createSSETransformStreamWithLogger(responseSourceFormat, responseTargetFormat, provider, reqLogger, toolNameMap, model, connectionId);
transformStream = createSSETransformStreamWithLogger(responseSourceFormat, responseTargetFormat, provider, reqLogger, toolNameMap, model, connectionId, body);
} else {
transformStream = createPassthroughStreamWithLogger(provider, reqLogger, model, connectionId);
transformStream = createPassthroughStreamWithLogger(provider, reqLogger, model, connectionId, body);
}
// Pipe response through transform with disconnect detection

View File

@@ -146,6 +146,8 @@ export function convertKiroToOpenAI(chunk, state) {
// Handle completion/done events
if (eventType === "messageStopEvent" || eventType === "done" || data.messageStopEvent) {
state.finishReason = "stop"; // Mark for usage injection in stream.js
const openaiChunk = {
id: state.responseId,
object: "chat.completion.chunk",

View File

@@ -505,6 +505,8 @@ export function openaiResponsesToOpenAIResponse(chunk, state) {
if (!state.finishReasonSent) {
state.finishReasonSent = true;
state.finishReason = "stop"; // Mark for usage injection in stream.js
const finalChunk = {
id: state.chatId,
object: "chat.completion.chunk",

View File

@@ -192,7 +192,10 @@ export function openaiToClaudeResponse(chunk, state) {
});
}
// Use tracked usage or default to 0
// Mark finish for later usage injection in stream.js
state.finishReason = choice.finish_reason;
// Use tracked usage (will be estimated in stream.js if not valid)
const finalUsage = state.usage || { input_tokens: 0, output_tokens: 0 };
results.push({
type: "message_delta",

View File

@@ -1,130 +1,15 @@
import { translateResponse, initState } from "../translator/index.js";
import { FORMATS } from "../translator/formats.js";
import { saveRequestUsage, trackPendingRequest, appendRequestLog } from "@/lib/usageDb.js";
import { trackPendingRequest, appendRequestLog } from "@/lib/usageDb.js";
import { extractUsage, hasValidUsage, estimateUsage, logUsage, COLORS } from "./usageTracking.js";
// Re-export COLORS for backward compatibility
export { COLORS };
// Singleton TextEncoder/Decoder for performance (reuse across all streams)
const sharedDecoder = new TextDecoder();
const sharedEncoder = new TextEncoder();
// Get HH:MM:SS timestamp
function getTimeString() {
return new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit" });
}
// Extract usage from any format (Claude, OpenAI, Gemini, Responses API)
function extractUsage(chunk) {
if (!chunk || typeof chunk !== "object") return null;
// Claude format (message_delta event)
if (chunk.type === "message_delta" && chunk.usage && typeof chunk.usage === 'object') {
return normalizeUsage({
prompt_tokens: chunk.usage.input_tokens || 0,
completion_tokens: chunk.usage.output_tokens || 0,
cache_read_input_tokens: chunk.usage.cache_read_input_tokens,
cache_creation_input_tokens: chunk.usage.cache_creation_input_tokens
});
}
// OpenAI Responses API format (response.completed or response.done)
if ((chunk.type === "response.completed" || chunk.type === "response.done") && chunk.response?.usage && typeof chunk.response.usage === 'object') {
const usage = chunk.response.usage;
return normalizeUsage({
prompt_tokens: usage.input_tokens || usage.prompt_tokens || 0,
completion_tokens: usage.output_tokens || usage.completion_tokens || 0,
cached_tokens: usage.input_tokens_details?.cached_tokens,
reasoning_tokens: usage.output_tokens_details?.reasoning_tokens
});
}
// OpenAI format
if (chunk.usage && typeof chunk.usage === 'object' && chunk.usage.prompt_tokens !== undefined) {
return normalizeUsage({
prompt_tokens: chunk.usage.prompt_tokens,
completion_tokens: chunk.usage.completion_tokens || 0,
cached_tokens: chunk.usage.prompt_tokens_details?.cached_tokens,
reasoning_tokens: chunk.usage.completion_tokens_details?.reasoning_tokens
});
}
// Gemini format (Antigravity)
if (chunk.usageMetadata && typeof chunk.usageMetadata === 'object') {
return normalizeUsage({
prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0,
completion_tokens: chunk.usageMetadata?.candidatesTokenCount || 0,
cached_tokens: chunk.usageMetadata?.cachedContentTokenCount,
reasoning_tokens: chunk.usageMetadata?.thoughtsTokenCount
});
}
return null;
}
function normalizeUsage(usage) {
if (!usage || typeof usage !== "object" || Array.isArray(usage)) return null;
const normalized = {};
const assignNumber = (key, value) => {
if (value === undefined || value === null) return;
const numeric = Number(value);
if (Number.isFinite(numeric)) normalized[key] = numeric;
};
assignNumber("prompt_tokens", usage?.prompt_tokens);
assignNumber("completion_tokens", usage?.completion_tokens);
assignNumber("cache_read_input_tokens", usage?.cache_read_input_tokens);
assignNumber("cache_creation_input_tokens", usage?.cache_creation_input_tokens);
assignNumber("cached_tokens", usage?.cached_tokens);
assignNumber("reasoning_tokens", usage?.reasoning_tokens);
if (Object.keys(normalized).length === 0) return null;
return normalized;
}
// ANSI color codes
export const COLORS = {
reset: "\x1b[0m",
red: "\x1b[31m",
green: "\x1b[32m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
cyan: "\x1b[36m"
};
// Log usage with cache info (green color)
function logUsage(provider, usage, model = null, connectionId = null) {
if (!usage || typeof usage !== 'object') return;
const p = provider?.toUpperCase() || "UNKNOWN";
// Support both formats:
// - OpenAI: prompt_tokens, completion_tokens
// - Claude: input_tokens, output_tokens
const inTokens = usage?.prompt_tokens || usage?.input_tokens || 0;
const outTokens = usage?.completion_tokens || usage?.output_tokens || 0;
let msg = `[${getTimeString()}] 📊 [USAGE] ${p} | in=${inTokens} | out=${outTokens}`;
if (connectionId) msg += ` | account=${connectionId.slice(0, 8)}...`;
// Support both formats: cache_read_input_tokens (Claude) and cached_tokens (OpenAI/Gemini)
const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens;
if (cacheRead) msg += ` | cache_read=${cacheRead}`;
if (usage.cache_creation_input_tokens) msg += ` | cache_write=${usage.cache_creation_input_tokens}`;
if (usage.reasoning_tokens) msg += ` | reasoning=${usage.reasoning_tokens}`;
console.log(`${COLORS.green}${msg}${COLORS.reset}`);
// Log to log.txt
appendRequestLog({ model, provider, connectionId, tokens: usage, status: "200 OK" }).catch(() => { });
// Save to DB
saveRequestUsage({
provider: provider || "unknown",
model: model || "unknown",
tokens: usage,
timestamp: new Date().toISOString(),
connectionId: connectionId || undefined
}).catch(err => {
console.error("Failed to save usage stats:", err.message);
});
}
// Parse SSE data line (optimized - reduce string operations)
function parseSSELine(line) {
if (!line || line.charCodeAt(0) !== 100) return null; // 'd' = 100
@@ -199,6 +84,7 @@ const STREAM_MODE = {
* @param {object} options.reqLogger - Request logger instance
* @param {string} options.model - Model name
* @param {string} options.connectionId - Connection ID for usage tracking
* @param {object} options.body - Request body (for input token estimation)
*/
export function createSSEStream(options = {}) {
const {
@@ -209,7 +95,8 @@ export function createSSEStream(options = {}) {
reqLogger = null,
toolNameMap = null,
model = null,
connectionId = null
connectionId = null,
body = null
} = options;
let buffer = "";
@@ -218,6 +105,9 @@ export function createSSEStream(options = {}) {
// State for translate mode
const state = mode === STREAM_MODE.TRANSLATE ? { ...initState(sourceFormat), provider, toolNameMap } : null;
// Track content length for usage estimation (both modes)
let totalContentLength = 0;
return new TransformStream({
transform(chunk, controller) {
const text = sharedDecoder.decode(chunk, { stream: true });
@@ -232,20 +122,46 @@ export function createSSEStream(options = {}) {
// Passthrough mode: normalize and forward
if (mode === STREAM_MODE.PASSTHROUGH) {
let output;
let injectedUsage = false;
if (trimmed.startsWith("data:") && trimmed.slice(5).trim() !== "[DONE]") {
try {
const parsed = JSON.parse(trimmed.slice(5).trim());
// Track content length for estimation
const content = parsed.choices?.[0]?.delta?.content || parsed.choices?.[0]?.delta?.reasoning_content;
if (content && typeof content === "string") {
totalContentLength += content.length;
}
// Extract usage from chunk
const extracted = extractUsage(parsed);
if (extracted) usage = extracted;
if (extracted) {
usage = extracted;
}
// Inject estimated usage into final chunk (has finish_reason but no valid usage)
const isFinishChunk = parsed.choices?.[0]?.finish_reason;
if (isFinishChunk && !hasValidUsage(parsed.usage)) {
const estimated = estimateUsage(body, totalContentLength, FORMATS.OPENAI);
parsed.usage = estimated;
output = `data: ${JSON.stringify(parsed)}\n`;
usage = estimated;
injectedUsage = true;
}
} catch { }
}
// Normalize: ensure "data: " has space
let output;
if (line.startsWith("data:") && !line.startsWith("data: ")) {
output = "data: " + line.slice(5) + "\n";
} else {
output = line + "\n";
// Normalize if not already injected
if (!injectedUsage) {
if (line.startsWith("data:") && !line.startsWith("data: ")) {
output = "data: " + line.slice(5) + "\n";
} else {
output = line + "\n";
}
}
reqLogger?.appendConvertedChunk?.(output);
controller.enqueue(sharedEncoder.encode(output));
continue;
@@ -264,6 +180,14 @@ export function createSSEStream(options = {}) {
continue;
}
// Track content length for estimation (from various formats)
const content = parsed.delta?.text || // Claude
parsed.choices?.[0]?.delta?.content || // OpenAI
parsed.candidates?.[0]?.content?.parts?.[0]?.text; // Gemini
if (content && typeof content === "string") {
totalContentLength += content.length;
}
// Extract usage
const extracted = extractUsage(parsed);
if (extracted) state.usage = extracted;
@@ -281,6 +205,14 @@ export function createSSEStream(options = {}) {
if (translated?.length > 0) {
for (const item of translated) {
// Inject estimated usage if finish chunk has no valid usage
const isFinishChunk = item.type === "message_delta" || item.choices?.[0]?.finish_reason;
if (state.finishReason && isFinishChunk && !hasValidUsage(item.usage) && totalContentLength > 0) {
const estimated = estimateUsage(body, totalContentLength, sourceFormat);
item.usage = estimated;
state.usage = estimated;
}
const output = formatSSE(item, sourceFormat);
reqLogger?.appendConvertedChunk?.(output);
controller.enqueue(sharedEncoder.encode(output));
@@ -304,10 +236,15 @@ export function createSSEStream(options = {}) {
reqLogger?.appendConvertedChunk?.(output);
controller.enqueue(sharedEncoder.encode(output));
}
if (usage && typeof usage === 'object') {
// Estimate usage if provider didn't return valid usage (PASSTHROUGH is always OpenAI format)
if (!hasValidUsage(usage) && totalContentLength > 0) {
usage = estimateUsage(body, totalContentLength, FORMATS.OPENAI);
}
if (hasValidUsage(usage)) {
logUsage(provider, usage, model, connectionId);
} else {
// No usage data available - still mark request as completed
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { });
}
return;
@@ -361,10 +298,14 @@ export function createSSEStream(options = {}) {
reqLogger?.appendConvertedChunk?.(doneOutput);
controller.enqueue(sharedEncoder.encode(doneOutput));
if (state?.usage && typeof state.usage === 'object') {
// Estimate usage if provider didn't return valid usage (for translate mode)
if (!hasValidUsage(state?.usage) && totalContentLength > 0) {
state.usage = estimateUsage(body, totalContentLength, sourceFormat);
}
if (hasValidUsage(state?.usage)) {
logUsage(state.provider || targetFormat, state.usage, model, connectionId);
} else {
// No usage data available - still mark request as completed
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { });
}
} catch (error) {
@@ -375,7 +316,7 @@ export function createSSEStream(options = {}) {
}
// Convenience functions for backward compatibility
export function createSSETransformStreamWithLogger(targetFormat, sourceFormat, provider = null, reqLogger = null, toolNameMap = null, model = null, connectionId = null) {
export function createSSETransformStreamWithLogger(targetFormat, sourceFormat, provider = null, reqLogger = null, toolNameMap = null, model = null, connectionId = null, body = null) {
return createSSEStream({
mode: STREAM_MODE.TRANSLATE,
targetFormat,
@@ -384,16 +325,18 @@ export function createSSETransformStreamWithLogger(targetFormat, sourceFormat, p
reqLogger,
toolNameMap,
model,
connectionId
connectionId,
body
});
}
export function createPassthroughStreamWithLogger(provider = null, reqLogger = null, model = null, connectionId = null) {
export function createPassthroughStreamWithLogger(provider = null, reqLogger = null, model = null, connectionId = null, body = null) {
return createSSEStream({
mode: STREAM_MODE.PASSTHROUGH,
provider,
reqLogger,
model,
connectionId
connectionId,
body
});
}

View File

@@ -0,0 +1,249 @@
/**
* Token Usage Tracking - Extract, normalize, estimate and log token usage
*/
import { saveRequestUsage, appendRequestLog } from "@/lib/usageDb.js";
import { FORMATS } from "../translator/formats.js";
// ANSI color codes
export const COLORS = {
reset: "\x1b[0m",
red: "\x1b[31m",
green: "\x1b[32m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
cyan: "\x1b[36m"
};
// Get HH:MM:SS timestamp
function getTimeString() {
return new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit" });
}
/**
* Normalize usage object - ensure all values are valid numbers
*/
export function normalizeUsage(usage) {
if (!usage || typeof usage !== "object" || Array.isArray(usage)) return null;
const normalized = {};
const assignNumber = (key, value) => {
if (value === undefined || value === null) return;
const numeric = Number(value);
if (Number.isFinite(numeric)) normalized[key] = numeric;
};
assignNumber("prompt_tokens", usage?.prompt_tokens);
assignNumber("completion_tokens", usage?.completion_tokens);
assignNumber("cache_read_input_tokens", usage?.cache_read_input_tokens);
assignNumber("cache_creation_input_tokens", usage?.cache_creation_input_tokens);
assignNumber("cached_tokens", usage?.cached_tokens);
assignNumber("reasoning_tokens", usage?.reasoning_tokens);
if (Object.keys(normalized).length === 0) return null;
return normalized;
}
/**
* Check if usage has valid token data
* Valid = has at least one token field with value > 0
* Invalid = empty object {}, null, undefined, no token fields, or all zeros
*/
export function hasValidUsage(usage) {
if (!usage || typeof usage !== "object") return false;
// Check for any known token field with value > 0
const tokenFields = [
"prompt_tokens", "completion_tokens", "total_tokens", // OpenAI
"input_tokens", "output_tokens", // Claude
"promptTokenCount", "candidatesTokenCount" // Gemini
];
for (const field of tokenFields) {
if (typeof usage[field] === "number" && usage[field] > 0) {
return true;
}
}
return false;
}
/**
* Extract usage from any format (Claude, OpenAI, Gemini, Responses API)
*/
export function extractUsage(chunk) {
if (!chunk || typeof chunk !== "object") return null;
// Claude format (message_delta event)
if (chunk.type === "message_delta" && chunk.usage && typeof chunk.usage === "object") {
return normalizeUsage({
prompt_tokens: chunk.usage.input_tokens || 0,
completion_tokens: chunk.usage.output_tokens || 0,
cache_read_input_tokens: chunk.usage.cache_read_input_tokens,
cache_creation_input_tokens: chunk.usage.cache_creation_input_tokens
});
}
// OpenAI Responses API format (response.completed or response.done)
if ((chunk.type === "response.completed" || chunk.type === "response.done") && chunk.response?.usage && typeof chunk.response.usage === "object") {
const usage = chunk.response.usage;
return normalizeUsage({
prompt_tokens: usage.input_tokens || usage.prompt_tokens || 0,
completion_tokens: usage.output_tokens || usage.completion_tokens || 0,
cached_tokens: usage.input_tokens_details?.cached_tokens,
reasoning_tokens: usage.output_tokens_details?.reasoning_tokens
});
}
// OpenAI format
if (chunk.usage && typeof chunk.usage === "object" && chunk.usage.prompt_tokens !== undefined) {
return normalizeUsage({
prompt_tokens: chunk.usage.prompt_tokens,
completion_tokens: chunk.usage.completion_tokens || 0,
cached_tokens: chunk.usage.prompt_tokens_details?.cached_tokens,
reasoning_tokens: chunk.usage.completion_tokens_details?.reasoning_tokens
});
}
// Gemini format (Antigravity)
if (chunk.usageMetadata && typeof chunk.usageMetadata === "object") {
return normalizeUsage({
prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0,
completion_tokens: chunk.usageMetadata?.candidatesTokenCount || 0,
cached_tokens: chunk.usageMetadata?.cachedContentTokenCount,
reasoning_tokens: chunk.usageMetadata?.thoughtsTokenCount
});
}
return null;
}
/**
* Estimate input tokens from request body
*/
export function estimateInputTokens(body) {
if (!body || typeof body !== "object") return 0;
let totalChars = 0;
// Count messages
if (Array.isArray(body.messages)) {
for (const msg of body.messages) {
if (msg.content) {
if (typeof msg.content === "string") {
totalChars += msg.content.length;
} else if (Array.isArray(msg.content)) {
for (const part of msg.content) {
if (part.text) totalChars += part.text.length;
if (part.type === "image_url") totalChars += 85; // Rough estimate for images
}
}
}
if (msg.role) totalChars += msg.role.length;
}
}
// Count tools/functions
if (Array.isArray(body.tools)) {
totalChars += JSON.stringify(body.tools).length;
} else if (Array.isArray(body.functions)) {
totalChars += JSON.stringify(body.functions).length;
}
// Count system prompt
if (body.system) {
totalChars += typeof body.system === "string" ? body.system.length : JSON.stringify(body.system).length;
}
// Estimate: ~4 chars per token (rough average across all tokenizers)
return Math.ceil(totalChars / 4);
}
/**
* Estimate output tokens from content length
*/
export function estimateOutputTokens(contentLength) {
if (!contentLength || contentLength <= 0) return 0;
return Math.max(1, Math.floor(contentLength / 4));
}
/**
* Format usage object based on target format
* @param {number} inputTokens - Input/prompt tokens
* @param {number} outputTokens - Output/completion tokens
* @param {string} targetFormat - Target format from FORMATS
*/
export function formatUsage(inputTokens, outputTokens, targetFormat) {
// Claude format uses input_tokens/output_tokens
if (targetFormat === FORMATS.CLAUDE) {
return { input_tokens: inputTokens, output_tokens: outputTokens, estimated: true };
}
// Default: OpenAI format (works for openai, gemini, responses, etc.)
return {
prompt_tokens: inputTokens,
completion_tokens: outputTokens,
total_tokens: inputTokens + outputTokens,
estimated: true
};
}
/**
* Estimate full usage when provider doesn't return it
* @param {object} body - Request body for input token estimation
* @param {number} contentLength - Content length for output token estimation
* @param {string} targetFormat - Target format from FORMATS constant
*/
export function estimateUsage(body, contentLength, targetFormat = FORMATS.OPENAI) {
return formatUsage(
estimateInputTokens(body),
estimateOutputTokens(contentLength),
targetFormat
);
}
/**
* Log usage with cache info (green color)
*/
export function logUsage(provider, usage, model = null, connectionId = null) {
if (!usage || typeof usage !== "object") return;
const p = provider?.toUpperCase() || "UNKNOWN";
// Support both formats:
// - OpenAI: prompt_tokens, completion_tokens
// - Claude: input_tokens, output_tokens
const inTokens = usage?.prompt_tokens || usage?.input_tokens || 0;
const outTokens = usage?.completion_tokens || usage?.output_tokens || 0;
const accountPrefix = connectionId ? connectionId.slice(0, 8) + "..." : "unknown";
let msg = `[${getTimeString()}] 📊 ${COLORS.green}[USAGE] ${p} | in=${inTokens} | out=${outTokens} | account=${accountPrefix}${COLORS.reset}`;
// Add estimated flag if present
if (usage.estimated) {
msg += ` ${COLORS.yellow}(estimated)${COLORS.reset}`;
}
// Add cache info if present (unified from different formats)
const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens;
if (cacheRead) msg += ` | cache_read=${cacheRead}`;
const cacheCreation = usage.cache_creation_input_tokens;
if (cacheCreation) msg += ` | cache_create=${cacheCreation}`;
const reasoning = usage.reasoning_tokens;
if (reasoning) msg += ` | reasoning=${reasoning}`;
console.log(msg);
// Save to usage DB
const tokens = {
input: inTokens,
output: outTokens,
cacheRead: cacheRead || 0,
cacheCreation: cacheCreation || 0,
reasoning: reasoning || 0
};
saveRequestUsage({ model, provider, connectionId, tokens }).catch(() => { });
appendRequestLog({ model, provider, connectionId, tokens, status: "200 OK" }).catch(() => { });
}

View File

@@ -17,7 +17,7 @@ export default function RequestLogger() {
if (autoRefresh) {
interval = setInterval(() => {
fetchLogs(false);
}, 500);
}, 3000);
}
return () => clearInterval(interval);
}, [autoRefresh]);
@@ -43,17 +43,15 @@ export default function RequestLogger() {
<h2 className="text-xl font-semibold">Request Logs</h2>
<div className="flex items-center gap-2">
<label className="text-sm font-medium text-text-muted flex items-center gap-2 cursor-pointer">
<span>Auto Refresh (500ms)</span>
<span>Auto Refresh (3s)</span>
<div
onClick={() => setAutoRefresh(!autoRefresh)}
className={`relative inline-flex h-5 w-9 items-center rounded-full transition-colors focus:outline-none ${
autoRefresh ? "bg-primary" : "bg-bg-subtle border border-border"
}`}
className={`relative inline-flex h-5 w-9 items-center rounded-full transition-colors focus:outline-none ${autoRefresh ? "bg-primary" : "bg-bg-subtle border border-border"
}`}
>
<span
className={`inline-block h-3 w-3 transform rounded-full bg-white transition-transform ${
autoRefresh ? "translate-x-5" : "translate-x-1"
}`}
className={`inline-block h-3 w-3 transform rounded-full bg-white transition-transform ${autoRefresh ? "translate-x-5" : "translate-x-1"
}`}
/>
</div>
</label>
@@ -101,11 +99,10 @@ export default function RequestLogger() {
<td className="px-3 py-1.5 border-r border-border truncate max-w-[150px]" title={parts[3]}>{parts[3]}</td>
<td className="px-3 py-1.5 border-r border-border text-right text-primary">{parts[4]}</td>
<td className="px-3 py-1.5 border-r border-border text-right text-success">{parts[5]}</td>
<td className={`px-3 py-1.5 font-bold ${
isSuccess ? 'text-success' :
isFailed ? 'text-error' :
'text-primary animate-pulse'
}`}>
<td className={`px-3 py-1.5 font-bold ${isSuccess ? 'text-success' :
isFailed ? 'text-error' :
'text-primary animate-pulse'
}`}>
{status}
</td>
</tr>