mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
bedrock: Add system-prompt cache anchor on caching-capable models (#56474)
The Bedrock Converse API supports placing `CachePoint` blocks inside the `system` field, but we were sending the system prompt as a single `SystemContentBlock::Text`, which leaves the system tokens dependent on whatever message-level breakpoint happens to fall within the 20-block lookback window. This widens `bedrock::Request.system` from `Option<String>` to `Vec<BedrockSystemContentBlock>` and has `into_bedrock` emit `[Text(system), CachePoint(Default)]` whenever the model supports prompt caching. The system prompt now anchors its own cache prefix, on top of the existing tool-list anchor and per-message breakpoint, so a stable system prompt keeps producing cache hits even when earlier conversation turns change. Bedrock does not support automatic caching or the 1-hour TTL, so the default 5-minute ephemeral cache is the only option for this provider. Release Notes: - Improved Bedrock prompt cache utilization by anchoring the system prompt as its own cache prefix
This commit is contained in:
parent
249f427f10
commit
800a795545
2 changed files with 22 additions and 6 deletions
|
|
@ -84,10 +84,8 @@ pub async fn stream_completion(
|
|||
|
||||
response = response.inference_config(inference_config);
|
||||
|
||||
if let Some(system) = request.system {
|
||||
if !system.is_empty() {
|
||||
response = response.system(BedrockSystemContentBlock::Text(system));
|
||||
}
|
||||
for system_block in request.system {
|
||||
response = response.system(system_block);
|
||||
}
|
||||
|
||||
if let Some(guardrail_id) = &request.guardrail_identifier {
|
||||
|
|
@ -207,7 +205,11 @@ pub struct Request {
|
|||
pub messages: Vec<BedrockMessage>,
|
||||
pub tools: Option<BedrockToolConfig>,
|
||||
pub thinking: Option<Thinking>,
|
||||
pub system: Option<String>,
|
||||
/// System content blocks in prefix order. Typically `[Text(...)]` or, when
|
||||
/// the model supports prompt caching, `[Text(...), CachePoint(...)]` so the
|
||||
/// system prompt anchors its own cache prefix independent of tools and
|
||||
/// messages.
|
||||
pub system: Vec<BedrockSystemContentBlock>,
|
||||
pub metadata: Option<Metadata>,
|
||||
pub stop_sequences: Vec<String>,
|
||||
pub temperature: Option<f32>,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use aws_config::stalled_stream_protection::StalledStreamProtectionConfig;
|
|||
use aws_config::{BehaviorVersion, Region};
|
||||
use aws_credential_types::{Credentials, Token};
|
||||
use aws_http_client::AwsHttpClient;
|
||||
use bedrock::BedrockSystemContentBlock;
|
||||
use bedrock::bedrock_client::Client as BedrockClient;
|
||||
use bedrock::bedrock_client::config::timeout::TimeoutConfig;
|
||||
use bedrock::bedrock_client::types::{
|
||||
|
|
@ -1104,11 +1105,24 @@ pub fn into_bedrock(
|
|||
)
|
||||
};
|
||||
|
||||
let mut system_blocks: Vec<BedrockSystemContentBlock> = Vec::new();
|
||||
if !system_message.is_empty() {
|
||||
system_blocks.push(BedrockSystemContentBlock::Text(system_message));
|
||||
if supports_caching {
|
||||
system_blocks.push(BedrockSystemContentBlock::CachePoint(
|
||||
CachePointBlock::builder()
|
||||
.r#type(CachePointType::Default)
|
||||
.build()
|
||||
.context("failed to build system cache point block")?,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(bedrock::Request {
|
||||
model,
|
||||
messages: new_messages,
|
||||
max_tokens: max_output_tokens,
|
||||
system: Some(system_message),
|
||||
system: system_blocks,
|
||||
tools: tool_config,
|
||||
thinking: if request.thinking_allowed {
|
||||
match thinking_mode {
|
||||
|
|
|
|||
Loading…
Reference in a new issue