mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
bedrock: Always use 1M context window for anthropic models (#56195) (cherry-pick to preview) (#56201)
Cherry-pick of #56195 to preview ---- Closes #49617 Release Notes: - bedrock: Always use 1M context windows for Anthropic models Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
This commit is contained in:
parent
daa7856706
commit
915fbde0ae
6 changed files with 10 additions and 76 deletions
|
|
@ -32,8 +32,6 @@ use thiserror::Error;
|
|||
|
||||
pub use crate::models::*;
|
||||
|
||||
pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
|
||||
|
||||
pub async fn stream_completion(
|
||||
client: bedrock::Client,
|
||||
request: Request,
|
||||
|
|
@ -70,13 +68,6 @@ pub async fn stream_completion(
|
|||
_ => {}
|
||||
}
|
||||
|
||||
if request.allow_extended_context {
|
||||
additional_fields.insert(
|
||||
"anthropic_beta".to_string(),
|
||||
Document::Array(vec![Document::String(CONTEXT_1M_BETA_HEADER.to_string())]),
|
||||
);
|
||||
}
|
||||
|
||||
if !additional_fields.is_empty() {
|
||||
response = response.additional_model_request_fields(Document::Object(additional_fields));
|
||||
}
|
||||
|
|
@ -211,7 +202,6 @@ pub struct Request {
|
|||
pub temperature: Option<f32>,
|
||||
pub top_k: Option<u32>,
|
||||
pub top_p: Option<f32>,
|
||||
pub allow_extended_context: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
|
|
|||
|
|
@ -384,19 +384,15 @@ impl Model {
|
|||
}
|
||||
|
||||
pub fn max_token_count(&self) -> u64 {
|
||||
self.max_tokens()
|
||||
}
|
||||
|
||||
pub fn max_tokens(&self) -> u64 {
|
||||
match self {
|
||||
Self::ClaudeHaiku4_5
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4_5
|
||||
| Self::ClaudeOpus4_1
|
||||
| Self::ClaudeOpus4_5
|
||||
| Self::ClaudeOpus4_6
|
||||
| Self::ClaudeOpus4_7
|
||||
| Self::ClaudeSonnet4_6 => 200_000,
|
||||
| Self::ClaudeSonnet4_6 => 1_000_000,
|
||||
Self::ClaudeOpus4_1 => 200_000,
|
||||
Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
|
||||
Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
|
||||
Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
|
||||
|
|
@ -526,18 +522,6 @@ impl Model {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn supports_extended_context(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4_5
|
||||
| Self::ClaudeOpus4_5
|
||||
| Self::ClaudeOpus4_6
|
||||
| Self::ClaudeOpus4_7
|
||||
| Self::ClaudeSonnet4_6
|
||||
)
|
||||
}
|
||||
|
||||
pub fn supports_caching(&self) -> bool {
|
||||
match self {
|
||||
Self::ClaudeHaiku4_5
|
||||
|
|
@ -1040,11 +1024,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_max_tokens() {
|
||||
assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
|
||||
assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
|
||||
assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
|
||||
assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
|
||||
fn test_max_token_count() {
|
||||
assert_eq!(Model::ClaudeSonnet4_5.max_token_count(), 1_000_000);
|
||||
assert_eq!(Model::ClaudeOpus4_6.max_token_count(), 1_000_000);
|
||||
assert_eq!(Model::Llama4Scout17B.max_token_count(), 128_000);
|
||||
assert_eq!(Model::NovaPremier.max_token_count(), 1_000_000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -113,7 +113,6 @@ pub struct AmazonBedrockSettings {
|
|||
pub role_arn: Option<String>,
|
||||
pub authentication_method: Option<BedrockAuthMethod>,
|
||||
pub allow_global: Option<bool>,
|
||||
pub allow_extended_context: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, EnumIter, IntoStaticStr, JsonSchema)]
|
||||
|
|
@ -386,13 +385,6 @@ impl State {
|
|||
.and_then(|s| s.allow_global)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn get_allow_extended_context(&self) -> bool {
|
||||
self.settings
|
||||
.as_ref()
|
||||
.and_then(|s| s.allow_extended_context)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BedrockLanguageModelProvider {
|
||||
|
|
@ -718,14 +710,9 @@ impl LanguageModel for BedrockModel {
|
|||
LanguageModelCompletionError,
|
||||
>,
|
||||
> {
|
||||
let (region, allow_global, allow_extended_context) =
|
||||
cx.read_entity(&self.state, |state, _cx| {
|
||||
(
|
||||
state.get_region(),
|
||||
state.get_allow_global(),
|
||||
state.get_allow_extended_context(),
|
||||
)
|
||||
});
|
||||
let (region, allow_global) = cx.read_entity(&self.state, |state, _cx| {
|
||||
(state.get_region(), state.get_allow_global())
|
||||
});
|
||||
|
||||
let model_id = match self.model.cross_region_inference_id(®ion, allow_global) {
|
||||
Ok(s) => s,
|
||||
|
|
@ -736,8 +723,6 @@ impl LanguageModel for BedrockModel {
|
|||
|
||||
let deny_tool_calls = request.tool_choice == Some(LanguageModelToolChoice::None);
|
||||
|
||||
let use_extended_context = allow_extended_context && self.model.supports_extended_context();
|
||||
|
||||
let request = match into_bedrock(
|
||||
request,
|
||||
model_id,
|
||||
|
|
@ -746,7 +731,6 @@ impl LanguageModel for BedrockModel {
|
|||
self.model.thinking_mode(),
|
||||
self.model.supports_caching(),
|
||||
self.model.supports_tool_use(),
|
||||
use_extended_context,
|
||||
) {
|
||||
Ok(request) => request,
|
||||
Err(err) => return futures::future::ready(Err(err.into())).boxed(),
|
||||
|
|
@ -839,7 +823,6 @@ pub fn into_bedrock(
|
|||
thinking_mode: BedrockModelMode,
|
||||
supports_caching: bool,
|
||||
supports_tool_use: bool,
|
||||
allow_extended_context: bool,
|
||||
) -> Result<bedrock::Request> {
|
||||
let mut new_messages: Vec<BedrockMessage> = Vec::new();
|
||||
let mut system_message = String::new();
|
||||
|
|
@ -1144,7 +1127,6 @@ pub fn into_bedrock(
|
|||
temperature: request.temperature.or(Some(default_temperature)),
|
||||
top_k: None,
|
||||
top_p: None,
|
||||
allow_extended_context,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ impl settings::Settings for AllLanguageModelSettings {
|
|||
role_arn: None, // todo(was never a setting for this...)
|
||||
authentication_method: bedrock.authentication_method.map(Into::into),
|
||||
allow_global: bedrock.allow_global,
|
||||
allow_extended_context: bedrock.allow_extended_context,
|
||||
},
|
||||
deepseek: DeepSeekSettings {
|
||||
api_url: deepseek.api_url.unwrap(),
|
||||
|
|
|
|||
|
|
@ -65,8 +65,6 @@ pub struct AmazonBedrockSettingsContent {
|
|||
pub profile: Option<String>,
|
||||
pub authentication_method: Option<BedrockAuthMethodContent>,
|
||||
pub allow_global: Option<bool>,
|
||||
/// Enable the 1M token extended context window beta for supported Anthropic models.
|
||||
pub allow_extended_context: Option<bool>,
|
||||
}
|
||||
|
||||
#[with_fallible_options]
|
||||
|
|
|
|||
|
|
@ -150,25 +150,6 @@ We will support Cross-Region inference for each of the models on a best-effort b
|
|||
|
||||
For the most up-to-date supported regions and models, refer to the [Supported Models and Regions for Cross Region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html).
|
||||
|
||||
#### Extended Context Window {#bedrock-extended-context}
|
||||
|
||||
Anthropic models on Bedrock support a 1M token extended context window through the `anthropic_beta` API parameter. To enable this feature, set `"allow_extended_context": true` in your Bedrock configuration:
|
||||
|
||||
```json [settings]
|
||||
{
|
||||
"language_models": {
|
||||
"bedrock": {
|
||||
"authentication_method": "named_profile",
|
||||
"region": "your-aws-region",
|
||||
"profile": "your-profile-name",
|
||||
"allow_extended_context": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Zed enables extended context for supported models (Claude Sonnet 4.5, Claude Opus 4.6, and Claude Opus 4.7). Extended context usage may increase API costs—refer to AWS Bedrock pricing for details.
|
||||
|
||||
#### Image Support {#bedrock-image-support}
|
||||
|
||||
Bedrock models that support vision (Claude 3 and later, Amazon Nova Pro and Lite, Meta Llama 3.2 Vision models, Mistral Pixtral) can receive images in conversations and tool results.
|
||||
|
|
|
|||
Loading…
Reference in a new issue