mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
language_models: Add image support for Bedrock (#47673)
Closes #N/A (no existing issue - implemented to enable image input for Bedrock models) This PR enables the "@" image mention feature for Bedrock models that support vision capabilities. **Changes:** - Added `supports_images()` method to Bedrock `Model` enum - Wired up image support in the Bedrock language model provider - Added `MessageContent::Image` handling to convert base64 images to Bedrock's expected format - Added tool result image support **Supported models:** Claude 3/3.5/4 family, Amazon Nova Pro/Lite, Meta Llama 3.2 Vision, Mistral Pixtral Release Notes: - Added image input support for Amazon Bedrock models with vision capabilities
This commit is contained in:
parent
c131713901
commit
13a9386a29
5 changed files with 104 additions and 14 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -9071,6 +9071,7 @@ dependencies = [
|
|||
"aws-config",
|
||||
"aws-credential-types",
|
||||
"aws_http_client",
|
||||
"base64 0.22.1",
|
||||
"bedrock",
|
||||
"chrono",
|
||||
"client",
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ pub use bedrock::operation::converse_stream::ConverseStreamInput as BedrockStrea
|
|||
pub use bedrock::types::{
|
||||
ContentBlock as BedrockRequestContent, ConversationRole as BedrockRole,
|
||||
ConverseOutput as BedrockResponse, ConverseStreamOutput as BedrockStreamingResponse,
|
||||
ImageBlock as BedrockImageBlock, Message as BedrockMessage,
|
||||
ImageBlock as BedrockImageBlock, ImageFormat as BedrockImageFormat,
|
||||
ImageSource as BedrockImageSource, Message as BedrockMessage,
|
||||
ReasoningContentBlock as BedrockThinkingBlock, ReasoningTextBlock as BedrockThinkingTextBlock,
|
||||
ResponseStream as BedrockResponseStream, SystemContentBlock as BedrockSystemContentBlock,
|
||||
ToolResultBlock as BedrockToolResultBlock,
|
||||
|
|
|
|||
|
|
@ -551,6 +551,46 @@ impl Model {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn supports_images(&self) -> bool {
|
||||
match self {
|
||||
// Anthropic Claude 3+ models (all support vision)
|
||||
Self::Claude3Opus
|
||||
| Self::Claude3Sonnet
|
||||
| Self::Claude3_5Sonnet
|
||||
| Self::Claude3_5SonnetV2
|
||||
| Self::Claude3_7Sonnet
|
||||
| Self::Claude3_7SonnetThinking
|
||||
| Self::ClaudeOpus4
|
||||
| Self::ClaudeOpus4Thinking
|
||||
| Self::ClaudeOpus4_1
|
||||
| Self::ClaudeOpus4_1Thinking
|
||||
| Self::ClaudeOpus4_5
|
||||
| Self::ClaudeOpus4_5Thinking
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::ClaudeSonnet4_5
|
||||
| Self::ClaudeSonnet4_5Thinking
|
||||
| Self::Claude3_5Haiku
|
||||
| Self::ClaudeHaiku4_5
|
||||
| Self::Claude3Haiku => true,
|
||||
|
||||
// Amazon Nova visual models
|
||||
Self::AmazonNovaPro | Self::AmazonNovaLite => true,
|
||||
|
||||
// Meta Llama 3.2 Vision models
|
||||
Self::MetaLlama3211BInstructV1 | Self::MetaLlama3290BInstructV1 => true,
|
||||
|
||||
// Mistral Pixtral (visual model)
|
||||
Self::MistralPixtralLarge2502V1 => true,
|
||||
|
||||
// Custom models default to no image support
|
||||
Self::Custom { .. } => false,
|
||||
|
||||
// All other models don't support images
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn supports_caching(&self) -> bool {
|
||||
match self {
|
||||
// Only Claude models on Bedrock support caching
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ anyhow.workspace = true
|
|||
aws-config = { workspace = true, features = ["behavior-version-latest"] }
|
||||
aws-credential-types = { workspace = true, features = ["hardcoded-credentials"] }
|
||||
aws_http_client.workspace = true
|
||||
base64.workspace = true
|
||||
bedrock = { workspace = true, features = ["schemars"] }
|
||||
chrono.workspace = true
|
||||
client.workspace = true
|
||||
|
|
|
|||
|
|
@ -13,11 +13,12 @@ use bedrock::bedrock_client::types::{
|
|||
ReasoningContentBlockDelta, StopReason,
|
||||
};
|
||||
use bedrock::{
|
||||
BedrockAnyToolChoice, BedrockAutoToolChoice, BedrockBlob, BedrockError, BedrockInnerContent,
|
||||
BedrockMessage, BedrockModelMode, BedrockStreamingResponse, BedrockThinkingBlock,
|
||||
BedrockThinkingTextBlock, BedrockTool, BedrockToolChoice, BedrockToolConfig,
|
||||
BedrockToolInputSchema, BedrockToolResultBlock, BedrockToolResultContentBlock,
|
||||
BedrockToolResultStatus, BedrockToolSpec, BedrockToolUseBlock, Model, value_to_aws_document,
|
||||
BedrockAnyToolChoice, BedrockAutoToolChoice, BedrockBlob, BedrockError, BedrockImageBlock,
|
||||
BedrockImageFormat, BedrockImageSource, BedrockInnerContent, BedrockMessage, BedrockModelMode,
|
||||
BedrockStreamingResponse, BedrockThinkingBlock, BedrockThinkingTextBlock, BedrockTool,
|
||||
BedrockToolChoice, BedrockToolConfig, BedrockToolInputSchema, BedrockToolResultBlock,
|
||||
BedrockToolResultContentBlock, BedrockToolResultStatus, BedrockToolSpec, BedrockToolUseBlock,
|
||||
Model, value_to_aws_document,
|
||||
};
|
||||
use collections::{BTreeMap, HashMap};
|
||||
use credentials_provider::CredentialsProvider;
|
||||
|
|
@ -636,7 +637,7 @@ impl LanguageModel for BedrockModel {
|
|||
}
|
||||
|
||||
fn supports_images(&self) -> bool {
|
||||
false
|
||||
self.model.supports_images()
|
||||
}
|
||||
|
||||
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
|
||||
|
|
@ -835,7 +836,7 @@ pub fn into_bedrock(
|
|||
.context("failed to build Bedrock tool use block")
|
||||
.log_err()
|
||||
.map(BedrockInnerContent::ToolUse)
|
||||
},
|
||||
}
|
||||
MessageContent::ToolResult(tool_result) => {
|
||||
BedrockToolResultBlock::builder()
|
||||
.tool_use_id(tool_result.tool_use_id.to_string())
|
||||
|
|
@ -843,11 +844,42 @@ pub fn into_bedrock(
|
|||
LanguageModelToolResultContent::Text(text) => {
|
||||
BedrockToolResultContentBlock::Text(text.to_string())
|
||||
}
|
||||
LanguageModelToolResultContent::Image(_) => {
|
||||
BedrockToolResultContentBlock::Text(
|
||||
// TODO: Bedrock image support
|
||||
"[Tool responded with an image, but Zed doesn't support these in Bedrock models yet]".to_string()
|
||||
)
|
||||
LanguageModelToolResultContent::Image(image) => {
|
||||
use base64::Engine;
|
||||
|
||||
match base64::engine::general_purpose::STANDARD
|
||||
.decode(image.source.as_bytes())
|
||||
{
|
||||
Ok(image_bytes) => {
|
||||
match BedrockImageBlock::builder()
|
||||
.format(BedrockImageFormat::Png)
|
||||
.source(BedrockImageSource::Bytes(
|
||||
BedrockBlob::new(image_bytes),
|
||||
))
|
||||
.build()
|
||||
{
|
||||
Ok(image_block) => {
|
||||
BedrockToolResultContentBlock::Image(
|
||||
image_block,
|
||||
)
|
||||
}
|
||||
Err(err) => {
|
||||
BedrockToolResultContentBlock::Text(
|
||||
format!(
|
||||
"[Failed to build image block: {}]",
|
||||
err
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
BedrockToolResultContentBlock::Text(format!(
|
||||
"[Failed to decode tool result image: {}]",
|
||||
err
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.status({
|
||||
|
|
@ -862,7 +894,22 @@ pub fn into_bedrock(
|
|||
.log_err()
|
||||
.map(BedrockInnerContent::ToolResult)
|
||||
}
|
||||
_ => None,
|
||||
MessageContent::Image(image) => {
|
||||
use base64::Engine;
|
||||
|
||||
let image_bytes = base64::engine::general_purpose::STANDARD
|
||||
.decode(image.source.as_bytes())
|
||||
.context("failed to decode base64 image data")
|
||||
.log_err()?;
|
||||
|
||||
BedrockImageBlock::builder()
|
||||
.format(BedrockImageFormat::Png)
|
||||
.source(BedrockImageSource::Bytes(BedrockBlob::new(image_bytes)))
|
||||
.build()
|
||||
.context("failed to build Bedrock image block")
|
||||
.log_err()
|
||||
.map(BedrockInnerContent::Image)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if message.cache && supports_caching {
|
||||
|
|
|
|||
Loading…
Reference in a new issue