mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
Add stream_options.include_usage for OpenAI-compatible API token usage (#45812)
## Summary
This PR enables token usage reporting in streaming responses for
OpenAI-compatible APIs (OpenAI, xAI/Grok, OpenRouter, etc).
## Problem
Currently, the token counter UI in the Agent Panel doesn't display usage
for some OpenAI-compatible providers because they don't return usage
data during streaming by default. According to OpenAI's API
documentation, the `stream_options.include_usage` parameter must be set
to `true` to receive usage statistics in streaming responses.
## Solution
- Added StreamOptions struct with `include_usage` field to the open_ai
crate
- Added `stream_options` field to the Request struct
- Automatically set `stream_options: { include_usage: true }` when
`stream: true`
- Updated edit_prediction requests with `stream_options: None`
(non-streaming)
## Testing
Tested with xAI Grok models - token counter now correctly shows usage
after sending a message.
## References
- [OpenAI Chat Completions API -
stream_options](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options)
- [xAI API Documentation](https://docs.x.ai/api)
This commit is contained in:
parent
8a25373fb9
commit
905d28cc54
4 changed files with 23 additions and 0 deletions
|
|
@ -137,6 +137,7 @@ impl Mercury {
|
|||
content: open_ai::MessageContent::Plain(prompt),
|
||||
}],
|
||||
stream: false,
|
||||
stream_options: None,
|
||||
max_completion_tokens: None,
|
||||
stop: vec![],
|
||||
temperature: None,
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ impl PlainOpenAiClient {
|
|||
model: model.to_string(),
|
||||
messages,
|
||||
stream: false,
|
||||
stream_options: None,
|
||||
max_completion_tokens: Some(max_tokens),
|
||||
stop: Vec::new(),
|
||||
temperature: None,
|
||||
|
|
@ -490,6 +491,7 @@ impl BatchingOpenAiClient {
|
|||
model: serializable_request.model,
|
||||
messages,
|
||||
stream: false,
|
||||
stream_options: None,
|
||||
max_completion_tokens: Some(serializable_request.max_tokens),
|
||||
stop: Vec::new(),
|
||||
temperature: None,
|
||||
|
|
|
|||
|
|
@ -506,6 +506,11 @@ pub fn into_open_ai(
|
|||
model: model_id.into(),
|
||||
messages,
|
||||
stream,
|
||||
stream_options: if stream {
|
||||
Some(open_ai::StreamOptions::default())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
stop: request.stop,
|
||||
temperature: request.temperature.or(Some(1.0)),
|
||||
max_completion_tokens: max_output_tokens,
|
||||
|
|
|
|||
|
|
@ -295,12 +295,27 @@ impl Model {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct StreamOptions {
|
||||
pub include_usage: bool,
|
||||
}
|
||||
|
||||
impl Default for StreamOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
include_usage: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Request {
|
||||
pub model: String,
|
||||
pub messages: Vec<RequestMessage>,
|
||||
pub stream: bool,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub stream_options: Option<StreamOptions>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub max_completion_tokens: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub stop: Vec<String>,
|
||||
|
|
|
|||
Loading…
Reference in a new issue