lmstudio: Fix context wheel by including token usage in streaming responses

Add stream_options with include_usage: true to the ChatCompletionRequest so
LM Studio returns token usage in streaming responses. Previously, without
this field, the API never included usage data, so the context wheel had nothing
to display.

Also move usage handling in the event mapper to run before the empty-choices
guard. OpenAI-compatible servers send the final usage summary as a chunk with
an empty choices array, so the old guard was discarding usage data instead of
emitting a UsageUpdate event.

Fixes #53790
This commit is contained in:
Gabriele Ancillai 2026-05-27 12:40:59 -05:00
parent e25458243b
commit da6f241ff2
2 changed files with 25 additions and 14 deletions

View file

@ -1,4 +1,4 @@
use anyhow::{Result, anyhow};
use anyhow::Result;
use collections::HashMap;
use credentials_provider::CredentialsProvider;
use fs::Fs;
@ -413,6 +413,9 @@ impl LmStudioLanguageModel {
model: self.model.name.clone(),
messages,
stream: true,
stream_options: Some(lmstudio::StreamOptions {
include_usage: true,
}),
max_tokens: Some(-1),
stop: Some(request.stop),
// In LM Studio you can configure specific settings you'd like to use for your model.
@ -558,13 +561,23 @@ impl LmStudioEventMapper {
&mut self,
event: lmstudio::ResponseStreamEvent,
) -> Vec<Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
let mut events = Vec::new();
if let Some(usage) = event.usage {
events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
input_tokens: usage.prompt_tokens,
output_tokens: usage.completion_tokens,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
})));
}
// The final usage summary chunk from OpenAI-compatible servers has an empty choices array.
// Return accumulated events instead of treating it as an error.
let Some(choice) = event.choices.into_iter().next() else {
return vec![Err(LanguageModelCompletionError::from(anyhow!(
"Response contained no choices"
)))];
return events;
};
let mut events = Vec::new();
if let Some(content) = choice.delta.content {
events.push(Ok(LanguageModelCompletionEvent::Text(content)));
}
@ -603,15 +616,6 @@ impl LmStudioEventMapper {
}
}
if let Some(usage) = event.usage {
events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
input_tokens: usage.prompt_tokens,
output_tokens: usage.completion_tokens,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
})));
}
match choice.finish_reason.as_deref() {
Some("stop") => {
events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));

View file

@ -205,12 +205,19 @@ pub struct FunctionContent {
pub arguments: String,
}
#[derive(Serialize, Debug)]
pub struct StreamOptions {
pub include_usage: bool,
}
#[derive(Serialize, Debug)]
pub struct ChatCompletionRequest {
pub model: String,
pub messages: Vec<ChatMessage>,
pub stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub stream_options: Option<StreamOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_tokens: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stop: Option<Vec<String>>,