bedrock: Add system-prompt cache anchor on caching-capable models (#56474)

The Bedrock Converse API supports placing `CachePoint` blocks inside the
`system` field, but we were sending the system prompt as a single
`SystemContentBlock::Text`, which leaves the system tokens dependent on
whatever message-level breakpoint happens to fall within the 20-block
lookback window.

This widens `bedrock::Request.system` from `Option<String>` to
`Vec<BedrockSystemContentBlock>` and has `into_bedrock` emit
`[Text(system), CachePoint(Default)]` whenever the model supports prompt
caching. The system prompt now anchors its own cache prefix, on top of
the existing tool-list anchor and per-message breakpoint, so a stable
system prompt keeps producing cache hits even when earlier conversation
turns change.

Bedrock does not support automatic caching or the 1-hour TTL, so the
default 5-minute ephemeral cache is the only option for this provider.

Release Notes:

- Improved Bedrock prompt cache utilization by anchoring the system
prompt as its own cache prefix
This commit is contained in:
Richard Feldman 2026-05-12 18:46:29 -04:00 committed by GitHub
parent 249f427f10
commit 800a795545
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 22 additions and 6 deletions

View file

@ -84,10 +84,8 @@ pub async fn stream_completion(
response = response.inference_config(inference_config);
if let Some(system) = request.system {
if !system.is_empty() {
response = response.system(BedrockSystemContentBlock::Text(system));
}
for system_block in request.system {
response = response.system(system_block);
}
if let Some(guardrail_id) = &request.guardrail_identifier {
@ -207,7 +205,11 @@ pub struct Request {
pub messages: Vec<BedrockMessage>,
pub tools: Option<BedrockToolConfig>,
pub thinking: Option<Thinking>,
pub system: Option<String>,
/// System content blocks in prefix order. Typically `[Text(...)]` or, when
/// the model supports prompt caching, `[Text(...), CachePoint(...)]` so the
/// system prompt anchors its own cache prefix independent of tools and
/// messages.
pub system: Vec<BedrockSystemContentBlock>,
pub metadata: Option<Metadata>,
pub stop_sequences: Vec<String>,
pub temperature: Option<f32>,

View file

@ -7,6 +7,7 @@ use aws_config::stalled_stream_protection::StalledStreamProtectionConfig;
use aws_config::{BehaviorVersion, Region};
use aws_credential_types::{Credentials, Token};
use aws_http_client::AwsHttpClient;
use bedrock::BedrockSystemContentBlock;
use bedrock::bedrock_client::Client as BedrockClient;
use bedrock::bedrock_client::config::timeout::TimeoutConfig;
use bedrock::bedrock_client::types::{
@ -1104,11 +1105,24 @@ pub fn into_bedrock(
)
};
let mut system_blocks: Vec<BedrockSystemContentBlock> = Vec::new();
if !system_message.is_empty() {
system_blocks.push(BedrockSystemContentBlock::Text(system_message));
if supports_caching {
system_blocks.push(BedrockSystemContentBlock::CachePoint(
CachePointBlock::builder()
.r#type(CachePointType::Default)
.build()
.context("failed to build system cache point block")?,
));
}
}
Ok(bedrock::Request {
model,
messages: new_messages,
max_tokens: max_output_tokens,
system: Some(system_message),
system: system_blocks,
tools: tool_config,
thinking: if request.thinking_allowed {
match thinking_mode {