bedrock: Always use 1M context window for anthropic models (#56195) (cherry-pick to preview) (#56201)

Cherry-pick of #56195 to preview

----
Closes #49617

Release Notes:

- bedrock: Always use 1M context windows for Anthropic models

Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
This commit is contained in:
zed-zippy[bot] 2026-05-08 16:13:12 +00:00 committed by GitHub
parent daa7856706
commit 915fbde0ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 10 additions and 76 deletions

View file

@ -32,8 +32,6 @@ use thiserror::Error;
pub use crate::models::*;
pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
pub async fn stream_completion(
client: bedrock::Client,
request: Request,
@ -70,13 +68,6 @@ pub async fn stream_completion(
_ => {}
}
if request.allow_extended_context {
additional_fields.insert(
"anthropic_beta".to_string(),
Document::Array(vec![Document::String(CONTEXT_1M_BETA_HEADER.to_string())]),
);
}
if !additional_fields.is_empty() {
response = response.additional_model_request_fields(Document::Object(additional_fields));
}
@ -211,7 +202,6 @@ pub struct Request {
pub temperature: Option<f32>,
pub top_k: Option<u32>,
pub top_p: Option<f32>,
pub allow_extended_context: bool,
}
#[derive(Debug, Serialize, Deserialize)]

View file

@ -384,19 +384,15 @@ impl Model {
}
pub fn max_token_count(&self) -> u64 {
self.max_tokens()
}
pub fn max_tokens(&self) -> u64 {
match self {
Self::ClaudeHaiku4_5
| Self::ClaudeSonnet4
| Self::ClaudeSonnet4_5
| Self::ClaudeOpus4_1
| Self::ClaudeOpus4_5
| Self::ClaudeOpus4_6
| Self::ClaudeOpus4_7
| Self::ClaudeSonnet4_6 => 200_000,
| Self::ClaudeSonnet4_6 => 1_000_000,
Self::ClaudeOpus4_1 => 200_000,
Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
@ -526,18 +522,6 @@ impl Model {
}
}
pub fn supports_extended_context(&self) -> bool {
matches!(
self,
Self::ClaudeSonnet4
| Self::ClaudeSonnet4_5
| Self::ClaudeOpus4_5
| Self::ClaudeOpus4_6
| Self::ClaudeOpus4_7
| Self::ClaudeSonnet4_6
)
}
pub fn supports_caching(&self) -> bool {
match self {
Self::ClaudeHaiku4_5
@ -1040,11 +1024,11 @@ mod tests {
}
#[test]
fn test_max_tokens() {
assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
fn test_max_token_count() {
assert_eq!(Model::ClaudeSonnet4_5.max_token_count(), 1_000_000);
assert_eq!(Model::ClaudeOpus4_6.max_token_count(), 1_000_000);
assert_eq!(Model::Llama4Scout17B.max_token_count(), 128_000);
assert_eq!(Model::NovaPremier.max_token_count(), 1_000_000);
}
#[test]

View file

@ -113,7 +113,6 @@ pub struct AmazonBedrockSettings {
pub role_arn: Option<String>,
pub authentication_method: Option<BedrockAuthMethod>,
pub allow_global: Option<bool>,
pub allow_extended_context: Option<bool>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, EnumIter, IntoStaticStr, JsonSchema)]
@ -386,13 +385,6 @@ impl State {
.and_then(|s| s.allow_global)
.unwrap_or(false)
}
fn get_allow_extended_context(&self) -> bool {
self.settings
.as_ref()
.and_then(|s| s.allow_extended_context)
.unwrap_or(false)
}
}
pub struct BedrockLanguageModelProvider {
@ -718,14 +710,9 @@ impl LanguageModel for BedrockModel {
LanguageModelCompletionError,
>,
> {
let (region, allow_global, allow_extended_context) =
cx.read_entity(&self.state, |state, _cx| {
(
state.get_region(),
state.get_allow_global(),
state.get_allow_extended_context(),
)
});
let (region, allow_global) = cx.read_entity(&self.state, |state, _cx| {
(state.get_region(), state.get_allow_global())
});
let model_id = match self.model.cross_region_inference_id(&region, allow_global) {
Ok(s) => s,
@ -736,8 +723,6 @@ impl LanguageModel for BedrockModel {
let deny_tool_calls = request.tool_choice == Some(LanguageModelToolChoice::None);
let use_extended_context = allow_extended_context && self.model.supports_extended_context();
let request = match into_bedrock(
request,
model_id,
@ -746,7 +731,6 @@ impl LanguageModel for BedrockModel {
self.model.thinking_mode(),
self.model.supports_caching(),
self.model.supports_tool_use(),
use_extended_context,
) {
Ok(request) => request,
Err(err) => return futures::future::ready(Err(err.into())).boxed(),
@ -839,7 +823,6 @@ pub fn into_bedrock(
thinking_mode: BedrockModelMode,
supports_caching: bool,
supports_tool_use: bool,
allow_extended_context: bool,
) -> Result<bedrock::Request> {
let mut new_messages: Vec<BedrockMessage> = Vec::new();
let mut system_message = String::new();
@ -1144,7 +1127,6 @@ pub fn into_bedrock(
temperature: request.temperature.or(Some(default_temperature)),
top_k: None,
top_p: None,
allow_extended_context,
})
}

View file

@ -61,7 +61,6 @@ impl settings::Settings for AllLanguageModelSettings {
role_arn: None, // todo(was never a setting for this...)
authentication_method: bedrock.authentication_method.map(Into::into),
allow_global: bedrock.allow_global,
allow_extended_context: bedrock.allow_extended_context,
},
deepseek: DeepSeekSettings {
api_url: deepseek.api_url.unwrap(),

View file

@ -65,8 +65,6 @@ pub struct AmazonBedrockSettingsContent {
pub profile: Option<String>,
pub authentication_method: Option<BedrockAuthMethodContent>,
pub allow_global: Option<bool>,
/// Enable the 1M token extended context window beta for supported Anthropic models.
pub allow_extended_context: Option<bool>,
}
#[with_fallible_options]

View file

@ -150,25 +150,6 @@ We will support Cross-Region inference for each of the models on a best-effort b
For the most up-to-date supported regions and models, refer to the [Supported Models and Regions for Cross Region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html).
#### Extended Context Window {#bedrock-extended-context}
Anthropic models on Bedrock support a 1M token extended context window through the `anthropic_beta` API parameter. To enable this feature, set `"allow_extended_context": true` in your Bedrock configuration:
```json [settings]
{
"language_models": {
"bedrock": {
"authentication_method": "named_profile",
"region": "your-aws-region",
"profile": "your-profile-name",
"allow_extended_context": true
}
}
}
```
Zed enables extended context for supported models (Claude Sonnet 4.5, Claude Opus 4.6, and Claude Opus 4.7). Extended context usage may increase API costs—refer to AWS Bedrock pricing for details.
#### Image Support {#bedrock-image-support}
Bedrock models that support vision (Claude 3 and later, Amazon Nova Pro and Lite, Meta Llama 3.2 Vision models, Mistral Pixtral) can receive images in conversations and tool results.