feat: Enhance usage tracking across response handlers

This commit is contained in:
decolua
2026-02-03 00:25:23 +07:00
parent c1e88a9d01
commit a33924b336
8 changed files with 299 additions and 66 deletions

View File

@@ -98,9 +98,16 @@ export class KiroExecutor extends BaseExecutor {
if (!event) continue;
const eventType = event.headers[":event-type"] || "";
// Track total content length for token estimation
if (!state.totalContentLength) state.totalContentLength = 0;
if (!state.contextUsagePercentage) state.contextUsagePercentage = 0;
// Handle assistantResponseEvent
if (eventType === "assistantResponseEvent" && event.payload?.content) {
const content = event.payload.content;
state.totalContentLength += content.length;
const chunk = {
id: responseId,
object: "chat.completion.chunk",
@@ -109,8 +116,8 @@ export class KiroExecutor extends BaseExecutor {
choices: [{
index: 0,
delta: chunkIndex === 0
? { role: "assistant", content: event.payload.content }
: { content: event.payload.content },
? { role: "assistant", content }
: { content },
finish_reason: null
}]
};
@@ -233,25 +240,79 @@ export class KiroExecutor extends BaseExecutor {
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`));
}
// Detect end of stream
if ((eventType === "meteringEvent" || eventType === "contextUsageEvent") && !state.endDetected) {
state.endDetected = true;
if (!state.finishEmitted) {
state.finishEmitted = true;
const finishChunk = {
id: responseId,
object: "chat.completion.chunk",
created,
model,
choices: [{
index: 0,
delta: {},
finish_reason: state.hasToolCalls ? "tool_calls" : "stop"
}]
};
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finishChunk)}\n\n`));
// Handle contextUsageEvent to extract contextUsagePercentage
if (eventType === "contextUsageEvent" && event.payload?.contextUsagePercentage) {
state.contextUsagePercentage = event.payload.contextUsagePercentage;
// Mark that we received context usage event
state.hasContextUsage = true;
}
// Handle meteringEvent - mark that we received it
if (eventType === "meteringEvent") {
state.hasMeteringEvent = true;
}
// Handle metricsEvent for token usage
if (eventType === "metricsEvent") {
// Extract usage data from metricsEvent payload
const metrics = event.payload?.metricsEvent || event.payload;
if (metrics && typeof metrics === 'object') {
const inputTokens = metrics.inputTokens || 0;
const outputTokens = metrics.outputTokens || 0;
if (inputTokens > 0 || outputTokens > 0) {
state.usage = {
prompt_tokens: inputTokens,
completion_tokens: outputTokens,
total_tokens: inputTokens + outputTokens
};
}
}
}
// Emit final chunk only after receiving BOTH meteringEvent AND contextUsageEvent
if (state.hasMeteringEvent && state.hasContextUsage && !state.finishEmitted) {
state.finishEmitted = true;
// Estimate tokens if not available from events
if (!state.usage) {
// Estimate output tokens from content length
const estimatedOutputTokens = state.totalContentLength > 0
? Math.max(1, Math.floor(state.totalContentLength / 4))
: 0;
// Estimate input tokens from contextUsagePercentage
// Kiro models typically have 200k context window
const estimatedInputTokens = state.contextUsagePercentage > 0
? Math.floor(state.contextUsagePercentage * 200000 / 100)
: 0;
state.usage = {
prompt_tokens: estimatedInputTokens,
completion_tokens: estimatedOutputTokens,
total_tokens: estimatedInputTokens + estimatedOutputTokens
};
}
const finishChunk = {
id: responseId,
object: "chat.completion.chunk",
created,
model,
choices: [{
index: 0,
delta: {},
finish_reason: state.hasToolCalls ? "tool_calls" : "stop"
}]
};
// Include usage in final chunk if available
if (state.usage) {
finishChunk.usage = state.usage;
}
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finishChunk)}\n\n`));
}
}
if (iterations >= maxIterations) {

View File

@@ -12,7 +12,6 @@ export function adjustMaxTokens(body) {
// Tool calls with large content (like writing files) need more tokens
if (body.tools && Array.isArray(body.tools) && body.tools.length > 0) {
if (maxTokens < DEFAULT_MIN_TOKENS) {
console.log(`[AUTO-ADJUST] max_tokens: ${maxTokens}${DEFAULT_MIN_TOKENS} (tool calling detected)`);
maxTokens = DEFAULT_MIN_TOKENS;
}
}

View File

@@ -92,9 +92,34 @@ export function claudeToOpenAIResponse(chunk, state) {
}
case "message_delta": {
// Extract usage from message_delta event (Claude native format)
// Normalize to OpenAI format (prompt_tokens/completion_tokens) for consistent logging
if (chunk.usage && typeof chunk.usage === "object") {
const inputTokens = typeof chunk.usage.input_tokens === "number" ? chunk.usage.input_tokens : 0;
const outputTokens = typeof chunk.usage.output_tokens === "number" ? chunk.usage.output_tokens : 0;
const cacheReadTokens = typeof chunk.usage.cache_read_input_tokens === "number" ? chunk.usage.cache_read_input_tokens : 0;
const cacheCreationTokens = typeof chunk.usage.cache_creation_input_tokens === "number" ? chunk.usage.cache_creation_input_tokens : 0;
// Use OpenAI format keys for consistent logging in stream.js
state.usage = {
prompt_tokens: inputTokens,
completion_tokens: outputTokens,
input_tokens: inputTokens,
output_tokens: outputTokens
};
// Store cache tokens if present
if (cacheReadTokens > 0) {
state.usage.cache_read_input_tokens = cacheReadTokens;
}
if (cacheCreationTokens > 0) {
state.usage.cache_creation_input_tokens = cacheCreationTokens;
}
}
if (chunk.delta?.stop_reason) {
state.finishReason = convertStopReason(chunk.delta.stop_reason);
results.push({
const finalChunk = {
id: `chatcmpl-${state.messageId}`,
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
@@ -104,7 +129,41 @@ export function claudeToOpenAIResponse(chunk, state) {
delta: {},
finish_reason: state.finishReason
}]
});
};
// Include usage in final chunk if available
if (state.usage && typeof state.usage === "object") {
const inputTokens = state.usage.input_tokens || 0;
const outputTokens = state.usage.output_tokens || 0;
const cachedTokens = state.usage.cache_read_input_tokens || 0;
const cacheCreationTokens = state.usage.cache_creation_input_tokens || 0;
// prompt_tokens = input_tokens + cache_read + cache_creation (all prompt-side tokens)
// completion_tokens = output_tokens
// total_tokens = prompt_tokens + completion_tokens
const promptTokens = inputTokens + cachedTokens + cacheCreationTokens;
const completionTokens = outputTokens;
const totalTokens = promptTokens + completionTokens;
finalChunk.usage = {
prompt_tokens: promptTokens,
completion_tokens: completionTokens,
total_tokens: totalTokens
};
// Add prompt_tokens_details if cached tokens exist
if (cachedTokens > 0 || cacheCreationTokens > 0) {
finalChunk.usage.prompt_tokens_details = {};
if (cachedTokens > 0) {
finalChunk.usage.prompt_tokens_details.cached_tokens = cachedTokens;
}
if (cacheCreationTokens > 0) {
finalChunk.usage.prompt_tokens_details.cache_creation_tokens = cacheCreationTokens;
}
}
}
results.push(finalChunk);
state.finishReasonSent = true;
}
break;

View File

@@ -160,14 +160,56 @@ export function geminiToOpenAIResponse(chunk, state) {
}
}
// Finish reason
// Usage metadata - extract before finish reason so we can include it
const usageMeta = response.usageMetadata || chunk.usageMetadata;
if (usageMeta && typeof usageMeta === "object") {
const cachedTokens = typeof usageMeta.cachedContentTokenCount === "number" ? usageMeta.cachedContentTokenCount : 0;
const promptTokenCountRaw = typeof usageMeta.promptTokenCount === "number" ? usageMeta.promptTokenCount : 0;
const thoughtsTokens = typeof usageMeta.thoughtsTokenCount === "number" ? usageMeta.thoughtsTokenCount : 0;
let candidatesTokens = typeof usageMeta.candidatesTokenCount === "number" ? usageMeta.candidatesTokenCount : 0;
const totalTokens = typeof usageMeta.totalTokenCount === "number" ? usageMeta.totalTokenCount : 0;
// prompt_tokens = promptTokenCount (includes cached tokens, matching claude-to-openai.js behavior)
const promptTokens = promptTokenCountRaw;
// Fallback calculation if candidatesTokenCount is 0 but totalTokenCount exists
if (candidatesTokens === 0 && totalTokens > 0) {
candidatesTokens = totalTokens - promptTokenCountRaw - thoughtsTokens;
if (candidatesTokens < 0) candidatesTokens = 0;
}
// completion_tokens = candidatesTokenCount + thoughtsTokenCount (match Go code)
const completionTokens = candidatesTokens + thoughtsTokens;
state.usage = {
prompt_tokens: promptTokens,
completion_tokens: completionTokens,
total_tokens: totalTokens
};
// Add prompt_tokens_details if cached tokens exist
if (cachedTokens > 0) {
state.usage.prompt_tokens_details = {
cached_tokens: cachedTokens
};
}
// Add completion_tokens_details if reasoning tokens exist
if (thoughtsTokens > 0) {
state.usage.completion_tokens_details = {
reasoning_tokens: thoughtsTokens
};
}
}
// Finish reason - include usage in final chunk
if (candidate.finishReason) {
let finishReason = candidate.finishReason.toLowerCase();
if (finishReason === "stop" && state.toolCalls.size > 0) {
finishReason = "tool_calls";
}
results.push({
const finalChunk = {
id: `chatcmpl-${state.messageId}`,
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
@@ -177,24 +219,15 @@ export function geminiToOpenAIResponse(chunk, state) {
delta: {},
finish_reason: finishReason
}]
});
state.finishReason = finishReason;
}
// Usage metadata
const usage = response.usageMetadata || chunk.usageMetadata;
if (usage && typeof usage === 'object') {
const promptTokens = (usage.promptTokenCount || 0) + (usage.thoughtsTokenCount || 0);
state.usage = {
prompt_tokens: promptTokens,
completion_tokens: usage.candidatesTokenCount || 0,
total_tokens: usage.totalTokenCount || 0
};
if (usage.thoughtsTokenCount > 0) {
state.usage.completion_tokens_details = {
reasoning_tokens: usage.thoughtsTokenCount
};
// Include usage in final chunk for downstream translators
if (state.usage) {
finalChunk.usage = state.usage;
}
results.push(finalChunk);
state.finishReason = finishReason;
}
return results.length > 0 ? results : null;

View File

@@ -17,9 +17,6 @@ export function convertKiroToOpenAI(chunk, state) {
if (chunk.object === "chat.completion.chunk" && chunk.choices) {
return chunk;
}
console.log("chunk", chunk);
// Handle string chunk (raw SSE data)
let data = chunk;
@@ -161,6 +158,11 @@ export function convertKiroToOpenAI(chunk, state) {
}]
};
// Include usage in final chunk if available
if (state.usage && typeof state.usage === "object") {
openaiChunk.usage = state.usage;
}
return openaiChunk;
}

View File

@@ -474,9 +474,38 @@ export function openaiResponsesToOpenAIResponse(chunk, state) {
// Response completed
if (eventType === "response.completed") {
// Extract usage from response.completed event
const responseUsage = data.response?.usage;
if (responseUsage && typeof responseUsage === "object") {
const inputTokens = responseUsage.input_tokens || responseUsage.prompt_tokens || 0;
const outputTokens = responseUsage.output_tokens || responseUsage.completion_tokens || 0;
const cacheReadTokens = responseUsage.cache_read_input_tokens || 0;
const cacheCreationTokens = responseUsage.cache_creation_input_tokens || 0;
// prompt_tokens = input_tokens + cache_read + cache_creation (all prompt-side tokens)
const promptTokens = inputTokens + cacheReadTokens + cacheCreationTokens;
state.usage = {
prompt_tokens: promptTokens,
completion_tokens: outputTokens,
total_tokens: promptTokens + outputTokens
};
// Add prompt_tokens_details if cache tokens exist
if (cacheReadTokens > 0 || cacheCreationTokens > 0) {
state.usage.prompt_tokens_details = {};
if (cacheReadTokens > 0) {
state.usage.prompt_tokens_details.cached_tokens = cacheReadTokens;
}
if (cacheCreationTokens > 0) {
state.usage.prompt_tokens_details.cache_creation_tokens = cacheCreationTokens;
}
}
}
if (!state.finishReasonSent) {
state.finishReasonSent = true;
return {
const finalChunk = {
id: state.chatId,
object: "chat.completion.chunk",
created: state.created,
@@ -487,6 +516,13 @@ export function openaiResponsesToOpenAIResponse(chunk, state) {
finish_reason: "stop"
}]
};
// Include usage in final chunk if available
if (state.usage && typeof state.usage === "object") {
finalChunk.usage = state.usage;
}
return finalChunk;
}
return null;
}

View File

@@ -33,6 +33,40 @@ export function openaiToClaudeResponse(chunk, state) {
const choice = chunk.choices[0];
const delta = choice.delta;
// Track usage from OpenAI chunk if available
if (chunk.usage && typeof chunk.usage === "object") {
const promptTokens = typeof chunk.usage.prompt_tokens === "number" ? chunk.usage.prompt_tokens : 0;
const outputTokens = typeof chunk.usage.completion_tokens === "number" ? chunk.usage.completion_tokens : 0;
// Extract cache tokens from prompt_tokens_details
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens;
const cacheCreationTokens = chunk.usage.prompt_tokens_details?.cache_creation_tokens;
const cacheReadTokens = typeof cachedTokens === "number" ? cachedTokens : 0;
const cacheCreateTokens = typeof cacheCreationTokens === "number" ? cacheCreationTokens : 0;
// input_tokens = prompt_tokens - cached_tokens - cache_creation_tokens
// Because OpenAI's prompt_tokens includes all prompt-side tokens
const inputTokens = promptTokens - cacheReadTokens - cacheCreateTokens;
state.usage = {
input_tokens: inputTokens,
output_tokens: outputTokens
};
// Add cache_read_input_tokens if present
if (cacheReadTokens > 0) {
state.usage.cache_read_input_tokens = cacheReadTokens;
}
// Add cache_creation_input_tokens if present
if (cacheCreateTokens > 0) {
state.usage.cache_creation_input_tokens = cacheCreateTokens;
}
// Note: completion_tokens_details.reasoning_tokens is already included in output_tokens
// No need to add separately as Claude expects total output_tokens
}
// First chunk - ALWAYS send message_start first
if (!state.messageStartSent) {
state.messageStartSent = true;
@@ -158,10 +192,12 @@ export function openaiToClaudeResponse(chunk, state) {
});
}
// Use tracked usage or default to 0
const finalUsage = state.usage || { input_tokens: 0, output_tokens: 0 };
results.push({
type: "message_delta",
delta: { stop_reason: convertFinishReason(choice.finish_reason) },
usage: { output_tokens: 0 }
usage: finalUsage
});
results.push({ type: "message_stop" });
}

View File

@@ -43,12 +43,13 @@ function extractUsage(chunk) {
reasoning_tokens: chunk.usage.completion_tokens_details?.reasoning_tokens
});
}
// Gemini format
// Gemini format (Antigravity)
if (chunk.usageMetadata && typeof chunk.usageMetadata === 'object') {
return normalizeUsage({
prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
reasoning_tokens: chunk.usageMetadata.thoughtsTokenCount
prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0,
completion_tokens: chunk.usageMetadata?.candidatesTokenCount || 0,
cached_tokens: chunk.usageMetadata?.cachedContentTokenCount,
reasoning_tokens: chunk.usageMetadata?.thoughtsTokenCount
});
}
return null;
@@ -90,21 +91,27 @@ function logUsage(provider, usage, model = null, connectionId = null) {
if (!usage || typeof usage !== 'object') return;
const p = provider?.toUpperCase() || "UNKNOWN";
const inTokens = usage?.prompt_tokens || 0;
const outTokens = usage?.completion_tokens || 0;
// Support both formats:
// - OpenAI: prompt_tokens, completion_tokens
// - Claude: input_tokens, output_tokens
const inTokens = usage?.prompt_tokens || usage?.input_tokens || 0;
const outTokens = usage?.completion_tokens || usage?.output_tokens || 0;
let msg = `[${getTimeString()}] 📊 [USAGE] ${p} | in=${inTokens} | out=${outTokens}`;
if (connectionId) msg += ` | account=${connectionId.slice(0, 8)}...`;
// Support both formats: cache_read_input_tokens (Claude) and cached_tokens (OpenAI/Gemini)
const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens;
if (cacheRead) msg += ` | cache_read=${cacheRead}`;
if (usage.cache_creation_input_tokens) msg += ` | cache_write=${usage.cache_creation_input_tokens}`;
if (usage.cache_read_input_tokens) msg += ` | cache_read=${usage.cache_read_input_tokens}`;
if (usage.cached_tokens) msg += ` | cached=${usage.cached_tokens}`;
if (usage.reasoning_tokens) msg += ` | reasoning=${usage.reasoning_tokens}`;
console.log(`${COLORS.green}${msg}${COLORS.reset}`);
// Log to log.txt
appendRequestLog({ model, provider, connectionId, tokens: usage, status: "200 OK" }).catch(() => {});
appendRequestLog({ model, provider, connectionId, tokens: usage, status: "200 OK" }).catch(() => { });
// Save to DB
saveRequestUsage({
@@ -147,7 +154,7 @@ export function formatSSE(data, sourceFormat) {
if (data === null || data === undefined) {
return "data: null\n\n";
}
if (data && data.done) return "data: [DONE]\n\n";
// OpenAI Responses API format: has event field
@@ -230,7 +237,7 @@ export function createSSEStream(options = {}) {
const parsed = JSON.parse(trimmed.slice(5).trim());
const extracted = extractUsage(parsed);
if (extracted) usage = extracted;
} catch {}
} catch { }
}
// Normalize: ensure "data: " has space
let output;
@@ -263,7 +270,7 @@ export function createSSEStream(options = {}) {
// Translate: targetFormat -> openai -> sourceFormat
const translated = translateResponse(targetFormat, sourceFormat, parsed, state);
// Log OpenAI intermediate chunks (if available)
if (translated?._openaiIntermediate) {
for (const item of translated._openaiIntermediate) {
@@ -271,7 +278,7 @@ export function createSSEStream(options = {}) {
reqLogger?.appendOpenAIChunk?.(openaiOutput);
}
}
if (translated?.length > 0) {
for (const item of translated) {
const output = formatSSE(item, sourceFormat);
@@ -297,11 +304,11 @@ export function createSSEStream(options = {}) {
reqLogger?.appendConvertedChunk?.(output);
controller.enqueue(sharedEncoder.encode(output));
}
if (usage && typeof usage === 'object') {
if (usage && typeof usage === 'object') {
logUsage(provider, usage, model, connectionId);
} else {
// No usage data available - still mark request as completed
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => {});
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { });
}
return;
}
@@ -311,7 +318,7 @@ if (usage && typeof usage === 'object') {
const parsed = parseSSELine(buffer.trim());
if (parsed && !parsed.done) {
const translated = translateResponse(targetFormat, sourceFormat, parsed, state);
// Log OpenAI intermediate chunks
if (translated?._openaiIntermediate) {
for (const item of translated._openaiIntermediate) {
@@ -319,7 +326,7 @@ if (usage && typeof usage === 'object') {
reqLogger?.appendOpenAIChunk?.(openaiOutput);
}
}
if (translated?.length > 0) {
for (const item of translated) {
const output = formatSSE(item, sourceFormat);
@@ -332,7 +339,7 @@ if (usage && typeof usage === 'object') {
// Flush remaining events (only once at stream end)
const flushed = translateResponse(targetFormat, sourceFormat, null, state);
// Log OpenAI intermediate chunks for flushed events
if (flushed?._openaiIntermediate) {
for (const item of flushed._openaiIntermediate) {
@@ -340,7 +347,7 @@ if (usage && typeof usage === 'object') {
reqLogger?.appendOpenAIChunk?.(openaiOutput);
}
}
if (flushed?.length > 0) {
for (const item of flushed) {
const output = formatSSE(item, sourceFormat);
@@ -354,11 +361,11 @@ if (usage && typeof usage === 'object') {
reqLogger?.appendConvertedChunk?.(doneOutput);
controller.enqueue(sharedEncoder.encode(doneOutput));
if (state?.usage && typeof state.usage === 'object') {
if (state?.usage && typeof state.usage === 'object') {
logUsage(state.provider || targetFormat, state.usage, model, connectionId);
} else {
// No usage data available - still mark request as completed
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => {});
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { });
}
} catch (error) {
console.log("Error in flush:", error);