mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
lmstudio: Fix context wheel by including token usage in streaming responses
Add stream_options with include_usage: true to the ChatCompletionRequest so LM Studio returns token usage in streaming responses. Previously, without this field, the API never included usage data, so the context wheel had nothing to display. Also move usage handling in the event mapper to run before the empty-choices guard. OpenAI-compatible servers send the final usage summary as a chunk with an empty choices array, so the old guard was discarding usage data instead of emitting a UsageUpdate event. Fixes #53790
This commit is contained in:
parent
e25458243b
commit
da6f241ff2
2 changed files with 25 additions and 14 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use anyhow::{Result, anyhow};
|
||||
use anyhow::Result;
|
||||
use collections::HashMap;
|
||||
use credentials_provider::CredentialsProvider;
|
||||
use fs::Fs;
|
||||
|
|
@ -413,6 +413,9 @@ impl LmStudioLanguageModel {
|
|||
model: self.model.name.clone(),
|
||||
messages,
|
||||
stream: true,
|
||||
stream_options: Some(lmstudio::StreamOptions {
|
||||
include_usage: true,
|
||||
}),
|
||||
max_tokens: Some(-1),
|
||||
stop: Some(request.stop),
|
||||
// In LM Studio you can configure specific settings you'd like to use for your model.
|
||||
|
|
@ -558,13 +561,23 @@ impl LmStudioEventMapper {
|
|||
&mut self,
|
||||
event: lmstudio::ResponseStreamEvent,
|
||||
) -> Vec<Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
|
||||
let mut events = Vec::new();
|
||||
|
||||
if let Some(usage) = event.usage {
|
||||
events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
|
||||
input_tokens: usage.prompt_tokens,
|
||||
output_tokens: usage.completion_tokens,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
})));
|
||||
}
|
||||
|
||||
// The final usage summary chunk from OpenAI-compatible servers has an empty choices array.
|
||||
// Return accumulated events instead of treating it as an error.
|
||||
let Some(choice) = event.choices.into_iter().next() else {
|
||||
return vec![Err(LanguageModelCompletionError::from(anyhow!(
|
||||
"Response contained no choices"
|
||||
)))];
|
||||
return events;
|
||||
};
|
||||
|
||||
let mut events = Vec::new();
|
||||
if let Some(content) = choice.delta.content {
|
||||
events.push(Ok(LanguageModelCompletionEvent::Text(content)));
|
||||
}
|
||||
|
|
@ -603,15 +616,6 @@ impl LmStudioEventMapper {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(usage) = event.usage {
|
||||
events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
|
||||
input_tokens: usage.prompt_tokens,
|
||||
output_tokens: usage.completion_tokens,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
})));
|
||||
}
|
||||
|
||||
match choice.finish_reason.as_deref() {
|
||||
Some("stop") => {
|
||||
events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn)));
|
||||
|
|
|
|||
|
|
@ -205,12 +205,19 @@ pub struct FunctionContent {
|
|||
pub arguments: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct StreamOptions {
|
||||
pub include_usage: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct ChatCompletionRequest {
|
||||
pub model: String,
|
||||
pub messages: Vec<ChatMessage>,
|
||||
pub stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stream_options: Option<StreamOptions>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_tokens: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stop: Option<Vec<String>>,
|
||||
|
|
|
|||
Loading…
Reference in a new issue