mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
feat: Enhance usage tracking across response handlers
This commit is contained in:
@@ -98,9 +98,16 @@ export class KiroExecutor extends BaseExecutor {
|
||||
if (!event) continue;
|
||||
|
||||
const eventType = event.headers[":event-type"] || "";
|
||||
|
||||
// Track total content length for token estimation
|
||||
if (!state.totalContentLength) state.totalContentLength = 0;
|
||||
if (!state.contextUsagePercentage) state.contextUsagePercentage = 0;
|
||||
|
||||
// Handle assistantResponseEvent
|
||||
if (eventType === "assistantResponseEvent" && event.payload?.content) {
|
||||
const content = event.payload.content;
|
||||
state.totalContentLength += content.length;
|
||||
|
||||
const chunk = {
|
||||
id: responseId,
|
||||
object: "chat.completion.chunk",
|
||||
@@ -109,8 +116,8 @@ export class KiroExecutor extends BaseExecutor {
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: chunkIndex === 0
|
||||
? { role: "assistant", content: event.payload.content }
|
||||
: { content: event.payload.content },
|
||||
? { role: "assistant", content }
|
||||
: { content },
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
@@ -233,25 +240,79 @@ export class KiroExecutor extends BaseExecutor {
|
||||
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`));
|
||||
}
|
||||
|
||||
// Detect end of stream
|
||||
if ((eventType === "meteringEvent" || eventType === "contextUsageEvent") && !state.endDetected) {
|
||||
state.endDetected = true;
|
||||
if (!state.finishEmitted) {
|
||||
state.finishEmitted = true;
|
||||
const finishChunk = {
|
||||
id: responseId,
|
||||
object: "chat.completion.chunk",
|
||||
created,
|
||||
model,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: state.hasToolCalls ? "tool_calls" : "stop"
|
||||
}]
|
||||
};
|
||||
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finishChunk)}\n\n`));
|
||||
// Handle contextUsageEvent to extract contextUsagePercentage
|
||||
if (eventType === "contextUsageEvent" && event.payload?.contextUsagePercentage) {
|
||||
state.contextUsagePercentage = event.payload.contextUsagePercentage;
|
||||
// Mark that we received context usage event
|
||||
state.hasContextUsage = true;
|
||||
}
|
||||
|
||||
// Handle meteringEvent - mark that we received it
|
||||
if (eventType === "meteringEvent") {
|
||||
state.hasMeteringEvent = true;
|
||||
}
|
||||
|
||||
// Handle metricsEvent for token usage
|
||||
if (eventType === "metricsEvent") {
|
||||
// Extract usage data from metricsEvent payload
|
||||
const metrics = event.payload?.metricsEvent || event.payload;
|
||||
if (metrics && typeof metrics === 'object') {
|
||||
const inputTokens = metrics.inputTokens || 0;
|
||||
const outputTokens = metrics.outputTokens || 0;
|
||||
|
||||
if (inputTokens > 0 || outputTokens > 0) {
|
||||
state.usage = {
|
||||
prompt_tokens: inputTokens,
|
||||
completion_tokens: outputTokens,
|
||||
total_tokens: inputTokens + outputTokens
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit final chunk only after receiving BOTH meteringEvent AND contextUsageEvent
|
||||
if (state.hasMeteringEvent && state.hasContextUsage && !state.finishEmitted) {
|
||||
state.finishEmitted = true;
|
||||
|
||||
// Estimate tokens if not available from events
|
||||
if (!state.usage) {
|
||||
// Estimate output tokens from content length
|
||||
const estimatedOutputTokens = state.totalContentLength > 0
|
||||
? Math.max(1, Math.floor(state.totalContentLength / 4))
|
||||
: 0;
|
||||
|
||||
// Estimate input tokens from contextUsagePercentage
|
||||
// Kiro models typically have 200k context window
|
||||
const estimatedInputTokens = state.contextUsagePercentage > 0
|
||||
? Math.floor(state.contextUsagePercentage * 200000 / 100)
|
||||
: 0;
|
||||
|
||||
state.usage = {
|
||||
prompt_tokens: estimatedInputTokens,
|
||||
completion_tokens: estimatedOutputTokens,
|
||||
total_tokens: estimatedInputTokens + estimatedOutputTokens
|
||||
};
|
||||
}
|
||||
|
||||
const finishChunk = {
|
||||
id: responseId,
|
||||
object: "chat.completion.chunk",
|
||||
created,
|
||||
model,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: state.hasToolCalls ? "tool_calls" : "stop"
|
||||
}]
|
||||
};
|
||||
|
||||
// Include usage in final chunk if available
|
||||
if (state.usage) {
|
||||
finishChunk.usage = state.usage;
|
||||
}
|
||||
|
||||
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finishChunk)}\n\n`));
|
||||
}
|
||||
}
|
||||
|
||||
if (iterations >= maxIterations) {
|
||||
|
||||
@@ -12,7 +12,6 @@ export function adjustMaxTokens(body) {
|
||||
// Tool calls with large content (like writing files) need more tokens
|
||||
if (body.tools && Array.isArray(body.tools) && body.tools.length > 0) {
|
||||
if (maxTokens < DEFAULT_MIN_TOKENS) {
|
||||
console.log(`[AUTO-ADJUST] max_tokens: ${maxTokens} → ${DEFAULT_MIN_TOKENS} (tool calling detected)`);
|
||||
maxTokens = DEFAULT_MIN_TOKENS;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,9 +92,34 @@ export function claudeToOpenAIResponse(chunk, state) {
|
||||
}
|
||||
|
||||
case "message_delta": {
|
||||
// Extract usage from message_delta event (Claude native format)
|
||||
// Normalize to OpenAI format (prompt_tokens/completion_tokens) for consistent logging
|
||||
if (chunk.usage && typeof chunk.usage === "object") {
|
||||
const inputTokens = typeof chunk.usage.input_tokens === "number" ? chunk.usage.input_tokens : 0;
|
||||
const outputTokens = typeof chunk.usage.output_tokens === "number" ? chunk.usage.output_tokens : 0;
|
||||
const cacheReadTokens = typeof chunk.usage.cache_read_input_tokens === "number" ? chunk.usage.cache_read_input_tokens : 0;
|
||||
const cacheCreationTokens = typeof chunk.usage.cache_creation_input_tokens === "number" ? chunk.usage.cache_creation_input_tokens : 0;
|
||||
|
||||
// Use OpenAI format keys for consistent logging in stream.js
|
||||
state.usage = {
|
||||
prompt_tokens: inputTokens,
|
||||
completion_tokens: outputTokens,
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens
|
||||
};
|
||||
|
||||
// Store cache tokens if present
|
||||
if (cacheReadTokens > 0) {
|
||||
state.usage.cache_read_input_tokens = cacheReadTokens;
|
||||
}
|
||||
if (cacheCreationTokens > 0) {
|
||||
state.usage.cache_creation_input_tokens = cacheCreationTokens;
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk.delta?.stop_reason) {
|
||||
state.finishReason = convertStopReason(chunk.delta.stop_reason);
|
||||
results.push({
|
||||
const finalChunk = {
|
||||
id: `chatcmpl-${state.messageId}`,
|
||||
object: "chat.completion.chunk",
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
@@ -104,7 +129,41 @@ export function claudeToOpenAIResponse(chunk, state) {
|
||||
delta: {},
|
||||
finish_reason: state.finishReason
|
||||
}]
|
||||
});
|
||||
};
|
||||
|
||||
// Include usage in final chunk if available
|
||||
if (state.usage && typeof state.usage === "object") {
|
||||
const inputTokens = state.usage.input_tokens || 0;
|
||||
const outputTokens = state.usage.output_tokens || 0;
|
||||
const cachedTokens = state.usage.cache_read_input_tokens || 0;
|
||||
const cacheCreationTokens = state.usage.cache_creation_input_tokens || 0;
|
||||
|
||||
// prompt_tokens = input_tokens + cache_read + cache_creation (all prompt-side tokens)
|
||||
// completion_tokens = output_tokens
|
||||
// total_tokens = prompt_tokens + completion_tokens
|
||||
const promptTokens = inputTokens + cachedTokens + cacheCreationTokens;
|
||||
const completionTokens = outputTokens;
|
||||
const totalTokens = promptTokens + completionTokens;
|
||||
|
||||
finalChunk.usage = {
|
||||
prompt_tokens: promptTokens,
|
||||
completion_tokens: completionTokens,
|
||||
total_tokens: totalTokens
|
||||
};
|
||||
|
||||
// Add prompt_tokens_details if cached tokens exist
|
||||
if (cachedTokens > 0 || cacheCreationTokens > 0) {
|
||||
finalChunk.usage.prompt_tokens_details = {};
|
||||
if (cachedTokens > 0) {
|
||||
finalChunk.usage.prompt_tokens_details.cached_tokens = cachedTokens;
|
||||
}
|
||||
if (cacheCreationTokens > 0) {
|
||||
finalChunk.usage.prompt_tokens_details.cache_creation_tokens = cacheCreationTokens;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.push(finalChunk);
|
||||
state.finishReasonSent = true;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -160,14 +160,56 @@ export function geminiToOpenAIResponse(chunk, state) {
|
||||
}
|
||||
}
|
||||
|
||||
// Finish reason
|
||||
// Usage metadata - extract before finish reason so we can include it
|
||||
const usageMeta = response.usageMetadata || chunk.usageMetadata;
|
||||
if (usageMeta && typeof usageMeta === "object") {
|
||||
const cachedTokens = typeof usageMeta.cachedContentTokenCount === "number" ? usageMeta.cachedContentTokenCount : 0;
|
||||
const promptTokenCountRaw = typeof usageMeta.promptTokenCount === "number" ? usageMeta.promptTokenCount : 0;
|
||||
const thoughtsTokens = typeof usageMeta.thoughtsTokenCount === "number" ? usageMeta.thoughtsTokenCount : 0;
|
||||
let candidatesTokens = typeof usageMeta.candidatesTokenCount === "number" ? usageMeta.candidatesTokenCount : 0;
|
||||
const totalTokens = typeof usageMeta.totalTokenCount === "number" ? usageMeta.totalTokenCount : 0;
|
||||
|
||||
// prompt_tokens = promptTokenCount (includes cached tokens, matching claude-to-openai.js behavior)
|
||||
const promptTokens = promptTokenCountRaw;
|
||||
|
||||
// Fallback calculation if candidatesTokenCount is 0 but totalTokenCount exists
|
||||
if (candidatesTokens === 0 && totalTokens > 0) {
|
||||
candidatesTokens = totalTokens - promptTokenCountRaw - thoughtsTokens;
|
||||
if (candidatesTokens < 0) candidatesTokens = 0;
|
||||
}
|
||||
|
||||
// completion_tokens = candidatesTokenCount + thoughtsTokenCount (match Go code)
|
||||
const completionTokens = candidatesTokens + thoughtsTokens;
|
||||
|
||||
state.usage = {
|
||||
prompt_tokens: promptTokens,
|
||||
completion_tokens: completionTokens,
|
||||
total_tokens: totalTokens
|
||||
};
|
||||
|
||||
// Add prompt_tokens_details if cached tokens exist
|
||||
if (cachedTokens > 0) {
|
||||
state.usage.prompt_tokens_details = {
|
||||
cached_tokens: cachedTokens
|
||||
};
|
||||
}
|
||||
|
||||
// Add completion_tokens_details if reasoning tokens exist
|
||||
if (thoughtsTokens > 0) {
|
||||
state.usage.completion_tokens_details = {
|
||||
reasoning_tokens: thoughtsTokens
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Finish reason - include usage in final chunk
|
||||
if (candidate.finishReason) {
|
||||
let finishReason = candidate.finishReason.toLowerCase();
|
||||
if (finishReason === "stop" && state.toolCalls.size > 0) {
|
||||
finishReason = "tool_calls";
|
||||
}
|
||||
|
||||
results.push({
|
||||
const finalChunk = {
|
||||
id: `chatcmpl-${state.messageId}`,
|
||||
object: "chat.completion.chunk",
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
@@ -177,24 +219,15 @@ export function geminiToOpenAIResponse(chunk, state) {
|
||||
delta: {},
|
||||
finish_reason: finishReason
|
||||
}]
|
||||
});
|
||||
state.finishReason = finishReason;
|
||||
}
|
||||
|
||||
// Usage metadata
|
||||
const usage = response.usageMetadata || chunk.usageMetadata;
|
||||
if (usage && typeof usage === 'object') {
|
||||
const promptTokens = (usage.promptTokenCount || 0) + (usage.thoughtsTokenCount || 0);
|
||||
state.usage = {
|
||||
prompt_tokens: promptTokens,
|
||||
completion_tokens: usage.candidatesTokenCount || 0,
|
||||
total_tokens: usage.totalTokenCount || 0
|
||||
};
|
||||
if (usage.thoughtsTokenCount > 0) {
|
||||
state.usage.completion_tokens_details = {
|
||||
reasoning_tokens: usage.thoughtsTokenCount
|
||||
};
|
||||
|
||||
// Include usage in final chunk for downstream translators
|
||||
if (state.usage) {
|
||||
finalChunk.usage = state.usage;
|
||||
}
|
||||
|
||||
results.push(finalChunk);
|
||||
state.finishReason = finishReason;
|
||||
}
|
||||
|
||||
return results.length > 0 ? results : null;
|
||||
|
||||
@@ -17,9 +17,6 @@ export function convertKiroToOpenAI(chunk, state) {
|
||||
if (chunk.object === "chat.completion.chunk" && chunk.choices) {
|
||||
return chunk;
|
||||
}
|
||||
|
||||
|
||||
console.log("chunk", chunk);
|
||||
|
||||
// Handle string chunk (raw SSE data)
|
||||
let data = chunk;
|
||||
@@ -161,6 +158,11 @@ export function convertKiroToOpenAI(chunk, state) {
|
||||
}]
|
||||
};
|
||||
|
||||
// Include usage in final chunk if available
|
||||
if (state.usage && typeof state.usage === "object") {
|
||||
openaiChunk.usage = state.usage;
|
||||
}
|
||||
|
||||
return openaiChunk;
|
||||
}
|
||||
|
||||
|
||||
@@ -474,9 +474,38 @@ export function openaiResponsesToOpenAIResponse(chunk, state) {
|
||||
|
||||
// Response completed
|
||||
if (eventType === "response.completed") {
|
||||
// Extract usage from response.completed event
|
||||
const responseUsage = data.response?.usage;
|
||||
if (responseUsage && typeof responseUsage === "object") {
|
||||
const inputTokens = responseUsage.input_tokens || responseUsage.prompt_tokens || 0;
|
||||
const outputTokens = responseUsage.output_tokens || responseUsage.completion_tokens || 0;
|
||||
const cacheReadTokens = responseUsage.cache_read_input_tokens || 0;
|
||||
const cacheCreationTokens = responseUsage.cache_creation_input_tokens || 0;
|
||||
|
||||
// prompt_tokens = input_tokens + cache_read + cache_creation (all prompt-side tokens)
|
||||
const promptTokens = inputTokens + cacheReadTokens + cacheCreationTokens;
|
||||
|
||||
state.usage = {
|
||||
prompt_tokens: promptTokens,
|
||||
completion_tokens: outputTokens,
|
||||
total_tokens: promptTokens + outputTokens
|
||||
};
|
||||
|
||||
// Add prompt_tokens_details if cache tokens exist
|
||||
if (cacheReadTokens > 0 || cacheCreationTokens > 0) {
|
||||
state.usage.prompt_tokens_details = {};
|
||||
if (cacheReadTokens > 0) {
|
||||
state.usage.prompt_tokens_details.cached_tokens = cacheReadTokens;
|
||||
}
|
||||
if (cacheCreationTokens > 0) {
|
||||
state.usage.prompt_tokens_details.cache_creation_tokens = cacheCreationTokens;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!state.finishReasonSent) {
|
||||
state.finishReasonSent = true;
|
||||
return {
|
||||
const finalChunk = {
|
||||
id: state.chatId,
|
||||
object: "chat.completion.chunk",
|
||||
created: state.created,
|
||||
@@ -487,6 +516,13 @@ export function openaiResponsesToOpenAIResponse(chunk, state) {
|
||||
finish_reason: "stop"
|
||||
}]
|
||||
};
|
||||
|
||||
// Include usage in final chunk if available
|
||||
if (state.usage && typeof state.usage === "object") {
|
||||
finalChunk.usage = state.usage;
|
||||
}
|
||||
|
||||
return finalChunk;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -33,6 +33,40 @@ export function openaiToClaudeResponse(chunk, state) {
|
||||
const choice = chunk.choices[0];
|
||||
const delta = choice.delta;
|
||||
|
||||
// Track usage from OpenAI chunk if available
|
||||
if (chunk.usage && typeof chunk.usage === "object") {
|
||||
const promptTokens = typeof chunk.usage.prompt_tokens === "number" ? chunk.usage.prompt_tokens : 0;
|
||||
const outputTokens = typeof chunk.usage.completion_tokens === "number" ? chunk.usage.completion_tokens : 0;
|
||||
|
||||
// Extract cache tokens from prompt_tokens_details
|
||||
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens;
|
||||
const cacheCreationTokens = chunk.usage.prompt_tokens_details?.cache_creation_tokens;
|
||||
const cacheReadTokens = typeof cachedTokens === "number" ? cachedTokens : 0;
|
||||
const cacheCreateTokens = typeof cacheCreationTokens === "number" ? cacheCreationTokens : 0;
|
||||
|
||||
// input_tokens = prompt_tokens - cached_tokens - cache_creation_tokens
|
||||
// Because OpenAI's prompt_tokens includes all prompt-side tokens
|
||||
const inputTokens = promptTokens - cacheReadTokens - cacheCreateTokens;
|
||||
|
||||
state.usage = {
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens
|
||||
};
|
||||
|
||||
// Add cache_read_input_tokens if present
|
||||
if (cacheReadTokens > 0) {
|
||||
state.usage.cache_read_input_tokens = cacheReadTokens;
|
||||
}
|
||||
|
||||
// Add cache_creation_input_tokens if present
|
||||
if (cacheCreateTokens > 0) {
|
||||
state.usage.cache_creation_input_tokens = cacheCreateTokens;
|
||||
}
|
||||
|
||||
// Note: completion_tokens_details.reasoning_tokens is already included in output_tokens
|
||||
// No need to add separately as Claude expects total output_tokens
|
||||
}
|
||||
|
||||
// First chunk - ALWAYS send message_start first
|
||||
if (!state.messageStartSent) {
|
||||
state.messageStartSent = true;
|
||||
@@ -158,10 +192,12 @@ export function openaiToClaudeResponse(chunk, state) {
|
||||
});
|
||||
}
|
||||
|
||||
// Use tracked usage or default to 0
|
||||
const finalUsage = state.usage || { input_tokens: 0, output_tokens: 0 };
|
||||
results.push({
|
||||
type: "message_delta",
|
||||
delta: { stop_reason: convertFinishReason(choice.finish_reason) },
|
||||
usage: { output_tokens: 0 }
|
||||
usage: finalUsage
|
||||
});
|
||||
results.push({ type: "message_stop" });
|
||||
}
|
||||
|
||||
@@ -43,12 +43,13 @@ function extractUsage(chunk) {
|
||||
reasoning_tokens: chunk.usage.completion_tokens_details?.reasoning_tokens
|
||||
});
|
||||
}
|
||||
// Gemini format
|
||||
// Gemini format (Antigravity)
|
||||
if (chunk.usageMetadata && typeof chunk.usageMetadata === 'object') {
|
||||
return normalizeUsage({
|
||||
prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
|
||||
completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
|
||||
reasoning_tokens: chunk.usageMetadata.thoughtsTokenCount
|
||||
prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0,
|
||||
completion_tokens: chunk.usageMetadata?.candidatesTokenCount || 0,
|
||||
cached_tokens: chunk.usageMetadata?.cachedContentTokenCount,
|
||||
reasoning_tokens: chunk.usageMetadata?.thoughtsTokenCount
|
||||
});
|
||||
}
|
||||
return null;
|
||||
@@ -90,21 +91,27 @@ function logUsage(provider, usage, model = null, connectionId = null) {
|
||||
if (!usage || typeof usage !== 'object') return;
|
||||
|
||||
const p = provider?.toUpperCase() || "UNKNOWN";
|
||||
const inTokens = usage?.prompt_tokens || 0;
|
||||
const outTokens = usage?.completion_tokens || 0;
|
||||
|
||||
// Support both formats:
|
||||
// - OpenAI: prompt_tokens, completion_tokens
|
||||
// - Claude: input_tokens, output_tokens
|
||||
const inTokens = usage?.prompt_tokens || usage?.input_tokens || 0;
|
||||
const outTokens = usage?.completion_tokens || usage?.output_tokens || 0;
|
||||
|
||||
let msg = `[${getTimeString()}] 📊 [USAGE] ${p} | in=${inTokens} | out=${outTokens}`;
|
||||
if (connectionId) msg += ` | account=${connectionId.slice(0, 8)}...`;
|
||||
|
||||
// Support both formats: cache_read_input_tokens (Claude) and cached_tokens (OpenAI/Gemini)
|
||||
const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens;
|
||||
if (cacheRead) msg += ` | cache_read=${cacheRead}`;
|
||||
|
||||
if (usage.cache_creation_input_tokens) msg += ` | cache_write=${usage.cache_creation_input_tokens}`;
|
||||
if (usage.cache_read_input_tokens) msg += ` | cache_read=${usage.cache_read_input_tokens}`;
|
||||
if (usage.cached_tokens) msg += ` | cached=${usage.cached_tokens}`;
|
||||
if (usage.reasoning_tokens) msg += ` | reasoning=${usage.reasoning_tokens}`;
|
||||
|
||||
console.log(`${COLORS.green}${msg}${COLORS.reset}`);
|
||||
|
||||
// Log to log.txt
|
||||
appendRequestLog({ model, provider, connectionId, tokens: usage, status: "200 OK" }).catch(() => {});
|
||||
appendRequestLog({ model, provider, connectionId, tokens: usage, status: "200 OK" }).catch(() => { });
|
||||
|
||||
// Save to DB
|
||||
saveRequestUsage({
|
||||
@@ -147,7 +154,7 @@ export function formatSSE(data, sourceFormat) {
|
||||
if (data === null || data === undefined) {
|
||||
return "data: null\n\n";
|
||||
}
|
||||
|
||||
|
||||
if (data && data.done) return "data: [DONE]\n\n";
|
||||
|
||||
// OpenAI Responses API format: has event field
|
||||
@@ -230,7 +237,7 @@ export function createSSEStream(options = {}) {
|
||||
const parsed = JSON.parse(trimmed.slice(5).trim());
|
||||
const extracted = extractUsage(parsed);
|
||||
if (extracted) usage = extracted;
|
||||
} catch {}
|
||||
} catch { }
|
||||
}
|
||||
// Normalize: ensure "data: " has space
|
||||
let output;
|
||||
@@ -263,7 +270,7 @@ export function createSSEStream(options = {}) {
|
||||
|
||||
// Translate: targetFormat -> openai -> sourceFormat
|
||||
const translated = translateResponse(targetFormat, sourceFormat, parsed, state);
|
||||
|
||||
|
||||
// Log OpenAI intermediate chunks (if available)
|
||||
if (translated?._openaiIntermediate) {
|
||||
for (const item of translated._openaiIntermediate) {
|
||||
@@ -271,7 +278,7 @@ export function createSSEStream(options = {}) {
|
||||
reqLogger?.appendOpenAIChunk?.(openaiOutput);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (translated?.length > 0) {
|
||||
for (const item of translated) {
|
||||
const output = formatSSE(item, sourceFormat);
|
||||
@@ -297,11 +304,11 @@ export function createSSEStream(options = {}) {
|
||||
reqLogger?.appendConvertedChunk?.(output);
|
||||
controller.enqueue(sharedEncoder.encode(output));
|
||||
}
|
||||
if (usage && typeof usage === 'object') {
|
||||
if (usage && typeof usage === 'object') {
|
||||
logUsage(provider, usage, model, connectionId);
|
||||
} else {
|
||||
// No usage data available - still mark request as completed
|
||||
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => {});
|
||||
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { });
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -311,7 +318,7 @@ if (usage && typeof usage === 'object') {
|
||||
const parsed = parseSSELine(buffer.trim());
|
||||
if (parsed && !parsed.done) {
|
||||
const translated = translateResponse(targetFormat, sourceFormat, parsed, state);
|
||||
|
||||
|
||||
// Log OpenAI intermediate chunks
|
||||
if (translated?._openaiIntermediate) {
|
||||
for (const item of translated._openaiIntermediate) {
|
||||
@@ -319,7 +326,7 @@ if (usage && typeof usage === 'object') {
|
||||
reqLogger?.appendOpenAIChunk?.(openaiOutput);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (translated?.length > 0) {
|
||||
for (const item of translated) {
|
||||
const output = formatSSE(item, sourceFormat);
|
||||
@@ -332,7 +339,7 @@ if (usage && typeof usage === 'object') {
|
||||
|
||||
// Flush remaining events (only once at stream end)
|
||||
const flushed = translateResponse(targetFormat, sourceFormat, null, state);
|
||||
|
||||
|
||||
// Log OpenAI intermediate chunks for flushed events
|
||||
if (flushed?._openaiIntermediate) {
|
||||
for (const item of flushed._openaiIntermediate) {
|
||||
@@ -340,7 +347,7 @@ if (usage && typeof usage === 'object') {
|
||||
reqLogger?.appendOpenAIChunk?.(openaiOutput);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (flushed?.length > 0) {
|
||||
for (const item of flushed) {
|
||||
const output = formatSSE(item, sourceFormat);
|
||||
@@ -354,11 +361,11 @@ if (usage && typeof usage === 'object') {
|
||||
reqLogger?.appendConvertedChunk?.(doneOutput);
|
||||
controller.enqueue(sharedEncoder.encode(doneOutput));
|
||||
|
||||
if (state?.usage && typeof state.usage === 'object') {
|
||||
if (state?.usage && typeof state.usage === 'object') {
|
||||
logUsage(state.provider || targetFormat, state.usage, model, connectionId);
|
||||
} else {
|
||||
// No usage data available - still mark request as completed
|
||||
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => {});
|
||||
appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { });
|
||||
}
|
||||
} catch (error) {
|
||||
console.log("Error in flush:", error);
|
||||
|
||||
Reference in New Issue
Block a user