gpui: Take advantage of unified memory on macOS (#49236)

Third attempt to land this improvement: (#45577, #44273)

The previous PR didn’t work on Intel MacBooks because I made a wrong
assumption about the unified memory check. `has_unified_memory` only
tells us that the CPU and GPU share memory. It does not mean we’re
running on an Apple GPU family.

Memoryless textures are only supported on Apple GPUs. Some Intel Macs
report unified memory, but they don’t support memoryless textures, which
is why the previous change failed there.

So instead of relying on unified memory, we now explicitly check that
we’re running on an Apple GPU family before enabling memoryless
textures.

Before you mark this PR as ready for review, make sure that you have:
- [ ] Added a solid test coverage and/or screenshots from doing manual
testing
- [X] Done a self-review taking into account security and performance
aspects
- [ ] Aligned any UI changes with the [UI
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)

Release Notes:

- Reduced memory usage on macOS

---------

Signed-off-by: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com>
This commit is contained in:
Marco Mihai Condrache 2026-02-21 16:53:12 +01:00 committed by GitHub
parent 5e3efc640e
commit c8656ac96d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 83 additions and 42 deletions

33
Cargo.lock generated
View file

@ -3755,9 +3755,9 @@ dependencies = [
[[package]]
name = "core-graphics2"
version = "0.4.1"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e4583956b9806b69f73fcb23aee05eb3620efc282972f08f6a6db7504f8334d"
checksum = "4416167a69126e617f8d0a214af0e3c1dbdeffcb100ddf72dcd1a1ac9893c146"
dependencies = [
"bitflags 2.10.0",
"block",
@ -3789,16 +3789,16 @@ dependencies = [
[[package]]
name = "core-video"
version = "0.4.3"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d45e71d5be22206bed53c3c3cb99315fc4c3d31b8963808c6bc4538168c4f8ef"
checksum = "139679cc63eb9504bdbe37e37874b0247136177655f0008588781e90863afa62"
dependencies = [
"block",
"core-foundation 0.10.0",
"core-graphics2",
"io-surface",
"libc",
"metal 0.29.0",
"metal",
]
[[package]]
@ -7407,7 +7407,7 @@ dependencies = [
"lyon",
"mach2 0.5.0",
"media",
"metal 0.29.0",
"metal",
"naga",
"num_cpus",
"objc",
@ -7525,7 +7525,7 @@ dependencies = [
"log",
"mach2 0.5.0",
"media",
"metal 0.29.0",
"metal",
"objc",
"parking_lot",
"pathfinder_geometry",
@ -10106,7 +10106,7 @@ dependencies = [
"core-video",
"ctor",
"foreign-types 0.5.0",
"metal 0.29.0",
"metal",
"objc",
]
@ -10189,21 +10189,6 @@ dependencies = [
"ttf-parser 0.20.0",
]
[[package]]
name = "metal"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.10.0",
"block",
"core-graphics-types 0.1.3",
"foreign-types 0.5.0",
"log",
"objc",
"paste",
]
[[package]]
name = "metal"
version = "0.33.0"
@ -19634,7 +19619,7 @@ dependencies = [
"libc",
"libloading",
"log",
"metal 0.33.0",
"metal",
"naga",
"ndk-sys",
"objc",

View file

@ -523,7 +523,7 @@ cocoa-foundation = "=0.2.0"
convert_case = "0.8.0"
core-foundation = "=0.10.0"
core-foundation-sys = "0.8.6"
core-video = { version = "0.4.3", features = ["metal"] }
core-video = { version = "0.5.2", features = ["metal"] }
cpal = "0.17"
crash-handler = "0.6"
criterion = { version = "0.5", features = ["html_reports"] }
@ -574,7 +574,7 @@ log = { version = "0.4.16", features = ["kv_unstable_serde", "serde"] }
lsp-types = { git = "https://github.com/zed-industries/lsp-types", rev = "a4f410987660bf560d1e617cb78117c6b6b9f599" }
mach2 = "0.5"
markup5ever_rcdom = "0.3.0"
metal = "0.29"
metal = "0.33"
minidumper = "0.8"
moka = { version = "0.12.10", features = ["sync"] }
naga = { version = "28.0", features = ["wgsl-in"] }

View file

@ -13,9 +13,10 @@ use std::borrow::Cow;
pub(crate) struct MetalAtlas(Mutex<MetalAtlasState>);
impl MetalAtlas {
pub(crate) fn new(device: Device) -> Self {
pub(crate) fn new(device: Device, is_apple_gpu: bool) -> Self {
MetalAtlas(Mutex::new(MetalAtlasState {
device: AssertSend(device),
is_apple_gpu,
monochrome_textures: Default::default(),
polychrome_textures: Default::default(),
tiles_by_key: Default::default(),
@ -29,6 +30,7 @@ impl MetalAtlas {
struct MetalAtlasState {
device: AssertSend<Device>,
is_apple_gpu: bool,
monochrome_textures: AtlasTextureList<MetalAtlasTexture>,
polychrome_textures: AtlasTextureList<MetalAtlasTexture>,
tiles_by_key: FxHashMap<AtlasKey, AtlasTile>,
@ -149,6 +151,13 @@ impl MetalAtlasState {
}
texture_descriptor.set_pixel_format(pixel_format);
texture_descriptor.set_usage(usage);
// Shared memory mode can be used only on Apple GPU families
// https://developer.apple.com/documentation/metal/mtlresourceoptions/storagemodeshared
texture_descriptor.set_storage_mode(if self.is_apple_gpu {
metal::MTLStorageMode::Shared
} else {
metal::MTLStorageMode::Managed
});
let metal_texture = self.device.new_texture(&texture_descriptor);
let texture_list = match kind {

View file

@ -21,7 +21,7 @@ use core_video::{
};
use foreign_types::{ForeignType, ForeignTypeRef};
use metal::{
CAMetalLayer, CommandQueue, MTLPixelFormat, MTLResourceOptions, NSRange,
CAMetalLayer, CommandQueue, MTLGPUFamily, MTLPixelFormat, MTLResourceOptions, NSRange,
RenderPassColorAttachmentDescriptorRef,
};
use objc::{self, msg_send, sel, sel_impl};
@ -78,12 +78,22 @@ impl InstanceBufferPool {
self.buffers.clear();
}
pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer {
pub(crate) fn acquire(
&mut self,
device: &metal::Device,
unified_memory: bool,
) -> InstanceBuffer {
let buffer = self.buffers.pop().unwrap_or_else(|| {
device.new_buffer(
self.buffer_size as u64,
MTLResourceOptions::StorageModeManaged,
)
let options = if unified_memory {
MTLResourceOptions::StorageModeShared
// Buffers are write only which can benefit from the combined cache
// https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined
| MTLResourceOptions::CPUCacheModeWriteCombined
} else {
MTLResourceOptions::StorageModeManaged
};
device.new_buffer(self.buffer_size as u64, options)
});
InstanceBuffer {
metal_buffer: buffer,
@ -101,6 +111,8 @@ impl InstanceBufferPool {
pub(crate) struct MetalRenderer {
device: metal::Device,
layer: metal::MetalLayer,
is_apple_gpu: bool,
is_unified_memory: bool,
presents_with_transaction: bool,
command_queue: CommandQueue,
paths_rasterization_pipeline_state: metal::RenderPipelineState,
@ -186,6 +198,15 @@ impl MetalRenderer {
output
}
// Shared memory can be used only if CPU and GPU share the same memory space.
// https://developer.apple.com/documentation/metal/setting-resource-storage-modes
let is_unified_memory = device.has_unified_memory();
// Apple GPU families support memoryless textures, which can significantly reduce
// memory usage by keeping render targets in on-chip tile memory instead of
// allocating backing store in system memory.
// https://developer.apple.com/documentation/metal/mtlgpufamily
let is_apple_gpu = device.supports_family(MTLGPUFamily::Apple1);
let unit_vertices = [
to_float2_bits(point(0., 0.)),
to_float2_bits(point(1., 0.)),
@ -197,7 +218,12 @@ impl MetalRenderer {
let unit_vertices = device.new_buffer_with_data(
unit_vertices.as_ptr() as *const c_void,
mem::size_of_val(&unit_vertices) as u64,
MTLResourceOptions::StorageModeManaged,
if is_unified_memory {
MTLResourceOptions::StorageModeShared
| MTLResourceOptions::CPUCacheModeWriteCombined
} else {
MTLResourceOptions::StorageModeManaged
},
);
let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state(
@ -267,7 +293,7 @@ impl MetalRenderer {
);
let command_queue = device.new_command_queue();
let sprite_atlas = Arc::new(MetalAtlas::new(device.clone()));
let sprite_atlas = Arc::new(MetalAtlas::new(device.clone(), is_apple_gpu));
let core_video_texture_cache =
CVMetalTextureCache::new(None, device.clone(), None).unwrap();
@ -275,6 +301,8 @@ impl MetalRenderer {
device,
layer,
presents_with_transaction: false,
is_apple_gpu,
is_unified_memory,
command_queue,
paths_rasterization_pipeline_state,
path_sprites_pipeline_state,
@ -344,14 +372,23 @@ impl MetalRenderer {
texture_descriptor.set_width(size.width.0 as u64);
texture_descriptor.set_height(size.height.0 as u64);
texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm);
texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
texture_descriptor
.set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead);
self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor));
if self.path_sample_count > 1 {
// https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus
// Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon
let storage_mode = if self.is_apple_gpu {
metal::MTLStorageMode::Memoryless
} else {
metal::MTLStorageMode::Private
};
let msaa_descriptor = texture_descriptor;
msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample);
msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
msaa_descriptor.set_storage_mode(storage_mode);
msaa_descriptor.set_sample_count(self.path_sample_count as _);
self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor));
} else {
@ -385,7 +422,10 @@ impl MetalRenderer {
};
loop {
let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
let mut instance_buffer = self
.instance_buffer_pool
.lock()
.acquire(&self.device, self.is_unified_memory);
let command_buffer =
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
@ -449,7 +489,10 @@ impl MetalRenderer {
.ok_or_else(|| anyhow::anyhow!("Failed to get drawable for render_to_image"))?;
loop {
let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
let mut instance_buffer = self
.instance_buffer_pool
.lock()
.acquire(&self.device, self.is_unified_memory);
let command_buffer =
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
@ -646,10 +689,14 @@ impl MetalRenderer {
command_encoder.end_encoding();
instance_buffer.metal_buffer.did_modify_range(NSRange {
location: 0,
length: instance_offset as NSUInteger,
});
if !self.is_unified_memory {
// Sync the instance buffer to the GPU
instance_buffer.metal_buffer.did_modify_range(NSRange {
location: 0,
length: instance_offset as NSUInteger,
});
}
Ok(command_buffer.to_owned())
}