mirror of
https://github.com/zed-industries/zed.git
synced 2026-05-31 19:05:00 +07:00
agent: Support image output from MCP tools (#57134)
Release Notes: - agent: Support image output from MCP tools
This commit is contained in:
parent
c352cad169
commit
da43bdb648
5 changed files with 225 additions and 122 deletions
|
|
@ -26,10 +26,10 @@ use gpui::{
|
|||
use indoc::indoc;
|
||||
use language_model::{
|
||||
CompletionIntent, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
|
||||
LanguageModelId, LanguageModelProviderId, LanguageModelProviderName, LanguageModelRegistry,
|
||||
LanguageModelRequest, LanguageModelRequestMessage, LanguageModelToolResult,
|
||||
LanguageModelToolSchemaFormat, LanguageModelToolUse, MessageContent, Role, StopReason,
|
||||
TokenUsage,
|
||||
LanguageModelId, LanguageModelImageExt, LanguageModelProviderId, LanguageModelProviderName,
|
||||
LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
|
||||
LanguageModelToolResult, LanguageModelToolSchemaFormat, LanguageModelToolUse, MessageContent,
|
||||
Role, StopReason, TokenUsage,
|
||||
fake_provider::{FakeLanguageModel, FakeLanguageModelProvider},
|
||||
};
|
||||
use pretty_assertions::assert_eq;
|
||||
|
|
@ -1656,6 +1656,7 @@ async fn test_mcp_tool_multi_content_response(cx: &mut TestAppContext) {
|
|||
|
||||
let (tool_call_params, tool_call_response) = mcp_tool_calls.next().await.unwrap();
|
||||
assert_eq!(tool_call_params.name, "screenshot");
|
||||
let image_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
|
||||
tool_call_response
|
||||
.send(context_server::types::CallToolResponse {
|
||||
content: vec![
|
||||
|
|
@ -1663,7 +1664,7 @@ async fn test_mcp_tool_multi_content_response(cx: &mut TestAppContext) {
|
|||
text: "Some text".into(),
|
||||
},
|
||||
context_server::types::ToolResponseContent::Image {
|
||||
data: "aGVsbG8=".into(),
|
||||
data: image_data.into(),
|
||||
mime_type: "image/png".into(),
|
||||
},
|
||||
context_server::types::ToolResponseContent::Text {
|
||||
|
|
@ -1691,13 +1692,25 @@ async fn test_mcp_tool_multi_content_response(cx: &mut TestAppContext) {
|
|||
})
|
||||
.expect("expected a tool result");
|
||||
assert_eq!(tool_result.tool_use_id, "tool_1".into());
|
||||
assert_eq!(tool_result.content.len(), 2);
|
||||
assert_eq!(tool_result.content.len(), 3);
|
||||
assert_eq!(
|
||||
tool_result.content[0],
|
||||
language_model::LanguageModelToolResultContent::Text(Arc::from("Some text"))
|
||||
);
|
||||
let expected_image =
|
||||
language_model::LanguageModelImage::from_base64_image(image_data, "image/png")
|
||||
.expect("image conversion should not error")
|
||||
.expect("image conversion should succeed");
|
||||
assert_eq!(
|
||||
tool_result.content[0],
|
||||
language_model::LanguageModelToolResultContent::Text(Arc::from("Some text"))
|
||||
);
|
||||
assert_eq!(
|
||||
tool_result.content[1],
|
||||
language_model::LanguageModelToolResultContent::Image(expected_image)
|
||||
);
|
||||
assert_eq!(
|
||||
tool_result.content[2],
|
||||
language_model::LanguageModelToolResultContent::Text(Arc::from("Some more text"))
|
||||
);
|
||||
fake_model.end_last_completion_stream();
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use collections::{BTreeMap, HashMap};
|
|||
use context_server::{ContextServerId, client::NotificationSubscription};
|
||||
use futures::FutureExt as _;
|
||||
use gpui::{App, AppContext, AsyncApp, Context, Entity, EventEmitter, SharedString, Task};
|
||||
use language_model::LanguageModelToolResultContent;
|
||||
use language_model::{LanguageModelImage, LanguageModelImageExt, LanguageModelToolResultContent};
|
||||
use project::context_server_store::{ContextServerStatus, ContextServerStore};
|
||||
use std::sync::Arc;
|
||||
use util::ResultExt;
|
||||
|
|
@ -346,7 +346,7 @@ impl AnyAgentTool for ContextServerTool {
|
|||
let authorize =
|
||||
event_stream.authorize_third_party_tool(initial_title, tool_id, display_name, cx);
|
||||
|
||||
cx.spawn(async move |_cx| {
|
||||
cx.spawn(async move |cx| {
|
||||
let input = input
|
||||
.recv()
|
||||
.await
|
||||
|
|
@ -394,15 +394,50 @@ impl AnyAgentTool for ContextServerTool {
|
|||
}
|
||||
|
||||
let mut llm_output = Vec::new();
|
||||
let mut tool_call_content = Vec::new();
|
||||
let mut concatenated_text = String::new();
|
||||
for content in response.content {
|
||||
match content {
|
||||
context_server::types::ToolResponseContent::Text { text } => {
|
||||
concatenated_text.push_str(&text);
|
||||
tool_call_content.push(acp::ToolCallContent::Content(acp::Content::new(
|
||||
acp::ContentBlock::Text(acp::TextContent::new(text.clone())),
|
||||
)));
|
||||
llm_output.push(LanguageModelToolResultContent::Text(text.into()));
|
||||
}
|
||||
context_server::types::ToolResponseContent::Image { .. } => {
|
||||
log::warn!("Ignoring image content from tool response");
|
||||
context_server::types::ToolResponseContent::Image { data, mime_type } => {
|
||||
tool_call_content.push(acp::ToolCallContent::Content(acp::Content::new(
|
||||
acp::ContentBlock::Image(acp::ImageContent::new(
|
||||
data.clone(),
|
||||
mime_type.clone(),
|
||||
)),
|
||||
)));
|
||||
let language_model_image = cx
|
||||
.background_spawn({
|
||||
let mime_type = mime_type.clone();
|
||||
async move {
|
||||
LanguageModelImage::from_base64_image(&data, &mime_type)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
match language_model_image {
|
||||
Ok(Some(image)) => {
|
||||
llm_output.push(LanguageModelToolResultContent::Image(image));
|
||||
}
|
||||
Ok(None) => {
|
||||
log::warn!(
|
||||
"Skipping MCP tool response image with MIME type `{}` because it cannot be converted for language model input",
|
||||
mime_type
|
||||
);
|
||||
}
|
||||
Err(error) => {
|
||||
log::warn!(
|
||||
"Failed to convert MCP tool response image with MIME type `{}` for language model input: {:#}",
|
||||
mime_type,
|
||||
error
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
context_server::types::ToolResponseContent::Audio { .. } => {
|
||||
log::warn!("Ignoring audio content from tool response");
|
||||
|
|
@ -415,6 +450,10 @@ impl AnyAgentTool for ContextServerTool {
|
|||
}
|
||||
}
|
||||
}
|
||||
if !tool_call_content.is_empty() {
|
||||
event_stream
|
||||
.update_fields(acp::ToolCallUpdateFields::new().content(tool_call_content));
|
||||
}
|
||||
let raw_output = serde_json::Value::String(concatenated_text);
|
||||
Ok(AgentToolOutput {
|
||||
raw_output,
|
||||
|
|
|
|||
|
|
@ -6585,6 +6585,32 @@ impl ThreadView {
|
|||
)
|
||||
}),
|
||||
)
|
||||
.when(!use_card_layout, |this| {
|
||||
let button_id =
|
||||
SharedString::from(format!("tool_output-collapse-{:?}", tool_call.id));
|
||||
let tool_call_id = tool_call.id.clone();
|
||||
|
||||
this.child(
|
||||
div()
|
||||
.ml(rems(0.4))
|
||||
.px_3p5()
|
||||
.pt_2()
|
||||
.border_l_1()
|
||||
.border_color(self.tool_card_border_color(cx))
|
||||
.child(
|
||||
IconButton::new(button_id, IconName::ChevronUp)
|
||||
.full_width()
|
||||
.style(ButtonStyle::Outlined)
|
||||
.icon_color(Color::Muted)
|
||||
.on_click(cx.listener({
|
||||
move |this: &mut Self, _, _, cx: &mut Context<Self>| {
|
||||
this.expanded_tool_calls.remove(&tool_call_id);
|
||||
cx.notify();
|
||||
}
|
||||
})),
|
||||
),
|
||||
)
|
||||
})
|
||||
.into_any(),
|
||||
ToolCallStatus::Rejected => Empty.into_any(),
|
||||
}
|
||||
|
|
@ -7580,7 +7606,6 @@ impl ThreadView {
|
|||
} else if let Some(markdown) = content.markdown() {
|
||||
self.render_markdown_output(
|
||||
markdown.clone(),
|
||||
tool_call.id.clone(),
|
||||
context_ix,
|
||||
card_layout,
|
||||
window,
|
||||
|
|
@ -7724,14 +7749,11 @@ impl ThreadView {
|
|||
fn render_markdown_output(
|
||||
&self,
|
||||
markdown: Entity<Markdown>,
|
||||
tool_call_id: acp::ToolCallId,
|
||||
context_ix: usize,
|
||||
card_layout: bool,
|
||||
window: &Window,
|
||||
cx: &Context<Self>,
|
||||
) -> AnyElement {
|
||||
let button_id = SharedString::from(format!("tool_output-{:?}", tool_call_id));
|
||||
|
||||
v_flex()
|
||||
.gap_2()
|
||||
.map(|this| {
|
||||
|
|
@ -7754,20 +7776,6 @@ impl ThreadView {
|
|||
MarkdownStyle::themed(MarkdownFont::Agent, window, cx),
|
||||
cx,
|
||||
))
|
||||
.when(!card_layout, |this| {
|
||||
this.child(
|
||||
IconButton::new(button_id, IconName::ChevronUp)
|
||||
.full_width()
|
||||
.style(ButtonStyle::Outlined)
|
||||
.icon_color(Color::Muted)
|
||||
.on_click(cx.listener({
|
||||
move |this: &mut Self, _, _, cx: &mut Context<Self>| {
|
||||
this.expanded_tool_calls.remove(&tool_call_id);
|
||||
cx.notify();
|
||||
}
|
||||
})),
|
||||
)
|
||||
})
|
||||
.into_any_element()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use std::io::{Cursor, Write};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use base64::write::EncoderWriter;
|
||||
use anyhow::{Result, anyhow};
|
||||
use base64::{Engine as _, write::EncoderWriter};
|
||||
use gpui::{
|
||||
App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, Size, Task, point, px, size,
|
||||
};
|
||||
|
|
@ -29,6 +29,7 @@ const MAX_IMAGE_DOWNSCALE_PASSES: usize = 8;
|
|||
pub trait LanguageModelImageExt {
|
||||
const FORMAT: ImageFormat;
|
||||
fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<LanguageModelImage>>;
|
||||
fn from_base64_image(data: &str, mime_type: &str) -> Result<Option<LanguageModelImage>>;
|
||||
}
|
||||
|
||||
impl LanguageModelImageExt for LanguageModelImage {
|
||||
|
|
@ -36,93 +37,104 @@ impl LanguageModelImageExt for LanguageModelImage {
|
|||
|
||||
fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<LanguageModelImage>> {
|
||||
cx.background_spawn(async move {
|
||||
let image_bytes = Cursor::new(data.bytes());
|
||||
let dynamic_image = match data.format() {
|
||||
ImageFormat::Png => image::codecs::png::PngDecoder::new(image_bytes)
|
||||
.and_then(image::DynamicImage::from_decoder),
|
||||
ImageFormat::Jpeg => image::codecs::jpeg::JpegDecoder::new(image_bytes)
|
||||
.and_then(image::DynamicImage::from_decoder),
|
||||
ImageFormat::Webp => image::codecs::webp::WebPDecoder::new(image_bytes)
|
||||
.and_then(image::DynamicImage::from_decoder),
|
||||
ImageFormat::Gif => image::codecs::gif::GifDecoder::new(image_bytes)
|
||||
.and_then(image::DynamicImage::from_decoder),
|
||||
ImageFormat::Bmp => image::codecs::bmp::BmpDecoder::new(image_bytes)
|
||||
.and_then(image::DynamicImage::from_decoder),
|
||||
ImageFormat::Tiff => image::codecs::tiff::TiffDecoder::new(image_bytes)
|
||||
.and_then(image::DynamicImage::from_decoder),
|
||||
_ => return None,
|
||||
}
|
||||
.log_err()?;
|
||||
|
||||
let width = dynamic_image.width();
|
||||
let height = dynamic_image.height();
|
||||
let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
|
||||
|
||||
// First apply any provider-specific dimension constraints we know about (Anthropic).
|
||||
let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
|
||||
|| image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
|
||||
{
|
||||
let new_bounds = ObjectFit::ScaleDown.get_bounds(
|
||||
gpui::Bounds {
|
||||
origin: point(px(0.0), px(0.0)),
|
||||
size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
|
||||
},
|
||||
image_size,
|
||||
);
|
||||
dynamic_image.resize(
|
||||
new_bounds.size.width.into(),
|
||||
new_bounds.size.height.into(),
|
||||
image::imageops::FilterType::Triangle,
|
||||
)
|
||||
} else {
|
||||
dynamic_image
|
||||
let format = match data.format() {
|
||||
ImageFormat::Png => image::ImageFormat::Png,
|
||||
ImageFormat::Jpeg => image::ImageFormat::Jpeg,
|
||||
ImageFormat::Webp => image::ImageFormat::WebP,
|
||||
ImageFormat::Gif => image::ImageFormat::Gif,
|
||||
ImageFormat::Bmp => image::ImageFormat::Bmp,
|
||||
ImageFormat::Tiff => image::ImageFormat::Tiff,
|
||||
ImageFormat::Ico => image::ImageFormat::Ico,
|
||||
ImageFormat::Pnm => image::ImageFormat::Pnm,
|
||||
ImageFormat::Svg => return None,
|
||||
};
|
||||
|
||||
// Then enforce a default per-image size cap on the encoded PNG bytes.
|
||||
//
|
||||
// We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
|
||||
// The upstream provider limit we want to respect is effectively on the binary image
|
||||
// payload size, so we enforce against the encoded PNG bytes before base64 encoding.
|
||||
let mut encoded_png = encode_png_bytes(&processed_image).log_err()?;
|
||||
for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
|
||||
if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
|
||||
break;
|
||||
}
|
||||
|
||||
// Scale down geometrically to converge quickly. We don't know the final PNG size
|
||||
// as a function of pixels, so we iteratively shrink.
|
||||
let (w, h) = processed_image.dimensions();
|
||||
if w <= 1 || h <= 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
// Shrink by ~15% each pass (0.85). This is a compromise between speed and
|
||||
// preserving image detail.
|
||||
let new_w = ((w as f32) * 0.85).round().max(1.0) as u32;
|
||||
let new_h = ((h as f32) * 0.85).round().max(1.0) as u32;
|
||||
|
||||
processed_image =
|
||||
processed_image.resize(new_w, new_h, image::imageops::FilterType::Triangle);
|
||||
encoded_png = encode_png_bytes(&processed_image).log_err()?;
|
||||
}
|
||||
|
||||
if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
|
||||
// Still too large after multiple passes; treat as non-convertible for now.
|
||||
// (Provider-specific handling can be introduced later.)
|
||||
return None;
|
||||
}
|
||||
|
||||
// Now base64 encode the PNG bytes.
|
||||
let base64_image = encode_bytes_as_base64(encoded_png.as_slice()).log_err()?;
|
||||
|
||||
// SAFETY: The base64 encoder should not produce non-UTF8.
|
||||
let source = unsafe { String::from_utf8_unchecked(base64_image) };
|
||||
|
||||
Some(LanguageModelImage {
|
||||
source: source.into(),
|
||||
})
|
||||
let dynamic_image =
|
||||
image::load_from_memory_with_format(data.bytes(), format).log_err()?;
|
||||
language_model_image_from_dynamic_image(dynamic_image)
|
||||
.log_err()
|
||||
.flatten()
|
||||
})
|
||||
}
|
||||
|
||||
fn from_base64_image(data: &str, mime_type: &str) -> Result<Option<LanguageModelImage>> {
|
||||
let format = image::ImageFormat::from_mime_type(mime_type)
|
||||
.ok_or_else(|| anyhow!("unsupported image MIME type `{}`", mime_type))?;
|
||||
let bytes = base64::engine::general_purpose::STANDARD.decode(data.as_bytes())?;
|
||||
let dynamic_image = image::load_from_memory_with_format(&bytes, format)?;
|
||||
language_model_image_from_dynamic_image(dynamic_image)
|
||||
}
|
||||
}
|
||||
|
||||
fn language_model_image_from_dynamic_image(
|
||||
dynamic_image: image::DynamicImage,
|
||||
) -> Result<Option<LanguageModelImage>> {
|
||||
let width = dynamic_image.width();
|
||||
let height = dynamic_image.height();
|
||||
let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
|
||||
|
||||
// First apply any provider-specific dimension constraints we know about (Anthropic).
|
||||
let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
|
||||
|| image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
|
||||
{
|
||||
let new_bounds = ObjectFit::ScaleDown.get_bounds(
|
||||
gpui::Bounds {
|
||||
origin: point(px(0.0), px(0.0)),
|
||||
size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
|
||||
},
|
||||
image_size,
|
||||
);
|
||||
dynamic_image.resize(
|
||||
new_bounds.size.width.into(),
|
||||
new_bounds.size.height.into(),
|
||||
image::imageops::FilterType::Triangle,
|
||||
)
|
||||
} else {
|
||||
dynamic_image
|
||||
};
|
||||
|
||||
// Then enforce a default per-image size cap on the encoded PNG bytes.
|
||||
//
|
||||
// We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
|
||||
// The upstream provider limit we want to respect is effectively on the binary image
|
||||
// payload size, so we enforce against the encoded PNG bytes before base64 encoding.
|
||||
let mut encoded_png = encode_png_bytes(&processed_image)?;
|
||||
for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
|
||||
if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
|
||||
break;
|
||||
}
|
||||
|
||||
// Scale down geometrically to converge quickly. We don't know the final PNG size
|
||||
// as a function of pixels, so we iteratively shrink.
|
||||
let (width, height) = processed_image.dimensions();
|
||||
if width <= 1 || height <= 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
// Shrink by ~15% each pass (0.85). This is a compromise between speed and
|
||||
// preserving image detail.
|
||||
let new_width = ((width as f32) * 0.85).round().max(1.0) as u32;
|
||||
let new_height = ((height as f32) * 0.85).round().max(1.0) as u32;
|
||||
|
||||
processed_image =
|
||||
processed_image.resize(new_width, new_height, image::imageops::FilterType::Triangle);
|
||||
encoded_png = encode_png_bytes(&processed_image)?;
|
||||
}
|
||||
|
||||
if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
|
||||
// Still too large after multiple passes; treat as non-convertible for now.
|
||||
// (Provider-specific handling can be introduced later.)
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Now base64 encode the PNG bytes.
|
||||
let base64_image = encode_bytes_as_base64(encoded_png.as_slice())?;
|
||||
|
||||
// SAFETY: The base64 encoder should not produce non-UTF8.
|
||||
let source = unsafe { String::from_utf8_unchecked(base64_image) };
|
||||
|
||||
Ok(Some(LanguageModelImage {
|
||||
source: source.into(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn encode_png_bytes(image: &image::DynamicImage) -> Result<Vec<u8>> {
|
||||
|
|
@ -162,7 +174,6 @@ pub fn gpui_size_to_image_size(size: Size<DevicePixels>) -> ImageSize {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use base64::Engine as _;
|
||||
use gpui::TestAppContext;
|
||||
|
||||
fn base64_to_png_bytes(base64: &str) -> Vec<u8> {
|
||||
|
|
@ -202,13 +213,46 @@ mod tests {
|
|||
raw_png.len()
|
||||
);
|
||||
|
||||
let image = Arc::new(gpui::Image::from_bytes(ImageFormat::Png, raw_png));
|
||||
let image = Arc::new(gpui::Image::from_bytes(ImageFormat::Png, raw_png.clone()));
|
||||
let lm_image = cx
|
||||
.update(|cx| LanguageModelImage::from_image(Arc::clone(&image), cx))
|
||||
.await
|
||||
.expect("from_image should succeed");
|
||||
|
||||
let decoded_png = base64_to_png_bytes(lm_image.source.as_ref());
|
||||
assert_downscaled_from_original(lm_image.source.as_ref(), 4096, 4096);
|
||||
|
||||
let base64_png = base64::engine::general_purpose::STANDARD.encode(raw_png);
|
||||
let lm_image = LanguageModelImage::from_base64_image(&base64_png, "image/png")
|
||||
.expect("from_base64_image should not error")
|
||||
.expect("from_base64_image should succeed");
|
||||
|
||||
assert_downscaled_from_original(lm_image.source.as_ref(), 4096, 4096);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_base64_image_converts_jpeg_to_png() {
|
||||
use image::ImageEncoder as _;
|
||||
|
||||
let mut jpeg_bytes = Vec::new();
|
||||
image::codecs::jpeg::JpegEncoder::new(&mut jpeg_bytes)
|
||||
.write_image(&[255, 0, 0], 1, 1, image::ExtendedColorType::Rgb8)
|
||||
.expect("encode jpeg");
|
||||
let jpeg_data = base64::engine::general_purpose::STANDARD.encode(jpeg_bytes);
|
||||
|
||||
let image = LanguageModelImage::from_base64_image(&jpeg_data, "image/jpeg")
|
||||
.expect("from_base64_image should not error")
|
||||
.expect("from_base64_image should succeed");
|
||||
let png_bytes = base64_to_png_bytes(image.source.as_ref());
|
||||
|
||||
assert_eq!(
|
||||
image::guess_format(&png_bytes).expect("guess image format"),
|
||||
image::ImageFormat::Png
|
||||
);
|
||||
assert_eq!(png_dimensions(&png_bytes), (1, 1));
|
||||
}
|
||||
|
||||
fn assert_downscaled_from_original(base64_png: &str, width: u32, height: u32) {
|
||||
let decoded_png = base64_to_png_bytes(base64_png);
|
||||
assert!(
|
||||
decoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES,
|
||||
"Encoded PNG should be ≤ {} bytes after downscale, but was {} bytes",
|
||||
|
|
@ -216,12 +260,12 @@ mod tests {
|
|||
decoded_png.len()
|
||||
);
|
||||
|
||||
let (w, h) = png_dimensions(&decoded_png);
|
||||
let (downsized_width, downsized_height) = png_dimensions(&decoded_png);
|
||||
assert!(
|
||||
w < 4096 && h < 4096,
|
||||
downsized_width < width && downsized_height < height,
|
||||
"Dimensions should have shrunk: got {}×{}",
|
||||
w,
|
||||
h
|
||||
downsized_width,
|
||||
downsized_height
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -438,7 +438,6 @@ mod tests {
|
|||
// Test image object
|
||||
let json = serde_json::json!({
|
||||
"source": "base64encodedimagedata",
|
||||
"size": {"width": 100, "height": 200}
|
||||
});
|
||||
let content: LanguageModelToolResultContent = serde_json::from_value(json).unwrap();
|
||||
match content {
|
||||
|
|
|
|||
Loading…
Reference in a new issue