mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
gpui: Take advantage of unified memory on macOS (#49236)
Third attempt to land this improvement: (#45577, #44273) The previous PR didn’t work on Intel MacBooks because I made a wrong assumption about the unified memory check. `has_unified_memory` only tells us that the CPU and GPU share memory. It does not mean we’re running on an Apple GPU family. Memoryless textures are only supported on Apple GPUs. Some Intel Macs report unified memory, but they don’t support memoryless textures, which is why the previous change failed there. So instead of relying on unified memory, we now explicitly check that we’re running on an Apple GPU family before enabling memoryless textures. Before you mark this PR as ready for review, make sure that you have: - [ ] Added a solid test coverage and/or screenshots from doing manual testing - [X] Done a self-review taking into account security and performance aspects - [ ] Aligned any UI changes with the [UI checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) Release Notes: - Reduced memory usage on macOS --------- Signed-off-by: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com>
This commit is contained in:
parent
5e3efc640e
commit
c8656ac96d
4 changed files with 83 additions and 42 deletions
33
Cargo.lock
generated
33
Cargo.lock
generated
|
|
@ -3755,9 +3755,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "core-graphics2"
|
||||
version = "0.4.1"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e4583956b9806b69f73fcb23aee05eb3620efc282972f08f6a6db7504f8334d"
|
||||
checksum = "4416167a69126e617f8d0a214af0e3c1dbdeffcb100ddf72dcd1a1ac9893c146"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"block",
|
||||
|
|
@ -3789,16 +3789,16 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "core-video"
|
||||
version = "0.4.3"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d45e71d5be22206bed53c3c3cb99315fc4c3d31b8963808c6bc4538168c4f8ef"
|
||||
checksum = "139679cc63eb9504bdbe37e37874b0247136177655f0008588781e90863afa62"
|
||||
dependencies = [
|
||||
"block",
|
||||
"core-foundation 0.10.0",
|
||||
"core-graphics2",
|
||||
"io-surface",
|
||||
"libc",
|
||||
"metal 0.29.0",
|
||||
"metal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -7407,7 +7407,7 @@ dependencies = [
|
|||
"lyon",
|
||||
"mach2 0.5.0",
|
||||
"media",
|
||||
"metal 0.29.0",
|
||||
"metal",
|
||||
"naga",
|
||||
"num_cpus",
|
||||
"objc",
|
||||
|
|
@ -7525,7 +7525,7 @@ dependencies = [
|
|||
"log",
|
||||
"mach2 0.5.0",
|
||||
"media",
|
||||
"metal 0.29.0",
|
||||
"metal",
|
||||
"objc",
|
||||
"parking_lot",
|
||||
"pathfinder_geometry",
|
||||
|
|
@ -10106,7 +10106,7 @@ dependencies = [
|
|||
"core-video",
|
||||
"ctor",
|
||||
"foreign-types 0.5.0",
|
||||
"metal 0.29.0",
|
||||
"metal",
|
||||
"objc",
|
||||
]
|
||||
|
||||
|
|
@ -10189,21 +10189,6 @@ dependencies = [
|
|||
"ttf-parser 0.20.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metal"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"block",
|
||||
"core-graphics-types 0.1.3",
|
||||
"foreign-types 0.5.0",
|
||||
"log",
|
||||
"objc",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metal"
|
||||
version = "0.33.0"
|
||||
|
|
@ -19634,7 +19619,7 @@ dependencies = [
|
|||
"libc",
|
||||
"libloading",
|
||||
"log",
|
||||
"metal 0.33.0",
|
||||
"metal",
|
||||
"naga",
|
||||
"ndk-sys",
|
||||
"objc",
|
||||
|
|
|
|||
|
|
@ -523,7 +523,7 @@ cocoa-foundation = "=0.2.0"
|
|||
convert_case = "0.8.0"
|
||||
core-foundation = "=0.10.0"
|
||||
core-foundation-sys = "0.8.6"
|
||||
core-video = { version = "0.4.3", features = ["metal"] }
|
||||
core-video = { version = "0.5.2", features = ["metal"] }
|
||||
cpal = "0.17"
|
||||
crash-handler = "0.6"
|
||||
criterion = { version = "0.5", features = ["html_reports"] }
|
||||
|
|
@ -574,7 +574,7 @@ log = { version = "0.4.16", features = ["kv_unstable_serde", "serde"] }
|
|||
lsp-types = { git = "https://github.com/zed-industries/lsp-types", rev = "a4f410987660bf560d1e617cb78117c6b6b9f599" }
|
||||
mach2 = "0.5"
|
||||
markup5ever_rcdom = "0.3.0"
|
||||
metal = "0.29"
|
||||
metal = "0.33"
|
||||
minidumper = "0.8"
|
||||
moka = { version = "0.12.10", features = ["sync"] }
|
||||
naga = { version = "28.0", features = ["wgsl-in"] }
|
||||
|
|
|
|||
|
|
@ -13,9 +13,10 @@ use std::borrow::Cow;
|
|||
pub(crate) struct MetalAtlas(Mutex<MetalAtlasState>);
|
||||
|
||||
impl MetalAtlas {
|
||||
pub(crate) fn new(device: Device) -> Self {
|
||||
pub(crate) fn new(device: Device, is_apple_gpu: bool) -> Self {
|
||||
MetalAtlas(Mutex::new(MetalAtlasState {
|
||||
device: AssertSend(device),
|
||||
is_apple_gpu,
|
||||
monochrome_textures: Default::default(),
|
||||
polychrome_textures: Default::default(),
|
||||
tiles_by_key: Default::default(),
|
||||
|
|
@ -29,6 +30,7 @@ impl MetalAtlas {
|
|||
|
||||
struct MetalAtlasState {
|
||||
device: AssertSend<Device>,
|
||||
is_apple_gpu: bool,
|
||||
monochrome_textures: AtlasTextureList<MetalAtlasTexture>,
|
||||
polychrome_textures: AtlasTextureList<MetalAtlasTexture>,
|
||||
tiles_by_key: FxHashMap<AtlasKey, AtlasTile>,
|
||||
|
|
@ -149,6 +151,13 @@ impl MetalAtlasState {
|
|||
}
|
||||
texture_descriptor.set_pixel_format(pixel_format);
|
||||
texture_descriptor.set_usage(usage);
|
||||
// Shared memory mode can be used only on Apple GPU families
|
||||
// https://developer.apple.com/documentation/metal/mtlresourceoptions/storagemodeshared
|
||||
texture_descriptor.set_storage_mode(if self.is_apple_gpu {
|
||||
metal::MTLStorageMode::Shared
|
||||
} else {
|
||||
metal::MTLStorageMode::Managed
|
||||
});
|
||||
let metal_texture = self.device.new_texture(&texture_descriptor);
|
||||
|
||||
let texture_list = match kind {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ use core_video::{
|
|||
};
|
||||
use foreign_types::{ForeignType, ForeignTypeRef};
|
||||
use metal::{
|
||||
CAMetalLayer, CommandQueue, MTLPixelFormat, MTLResourceOptions, NSRange,
|
||||
CAMetalLayer, CommandQueue, MTLGPUFamily, MTLPixelFormat, MTLResourceOptions, NSRange,
|
||||
RenderPassColorAttachmentDescriptorRef,
|
||||
};
|
||||
use objc::{self, msg_send, sel, sel_impl};
|
||||
|
|
@ -78,12 +78,22 @@ impl InstanceBufferPool {
|
|||
self.buffers.clear();
|
||||
}
|
||||
|
||||
pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer {
|
||||
pub(crate) fn acquire(
|
||||
&mut self,
|
||||
device: &metal::Device,
|
||||
unified_memory: bool,
|
||||
) -> InstanceBuffer {
|
||||
let buffer = self.buffers.pop().unwrap_or_else(|| {
|
||||
device.new_buffer(
|
||||
self.buffer_size as u64,
|
||||
MTLResourceOptions::StorageModeManaged,
|
||||
)
|
||||
let options = if unified_memory {
|
||||
MTLResourceOptions::StorageModeShared
|
||||
// Buffers are write only which can benefit from the combined cache
|
||||
// https://developer.apple.com/documentation/metal/mtlresourceoptions/cpucachemodewritecombined
|
||||
| MTLResourceOptions::CPUCacheModeWriteCombined
|
||||
} else {
|
||||
MTLResourceOptions::StorageModeManaged
|
||||
};
|
||||
|
||||
device.new_buffer(self.buffer_size as u64, options)
|
||||
});
|
||||
InstanceBuffer {
|
||||
metal_buffer: buffer,
|
||||
|
|
@ -101,6 +111,8 @@ impl InstanceBufferPool {
|
|||
pub(crate) struct MetalRenderer {
|
||||
device: metal::Device,
|
||||
layer: metal::MetalLayer,
|
||||
is_apple_gpu: bool,
|
||||
is_unified_memory: bool,
|
||||
presents_with_transaction: bool,
|
||||
command_queue: CommandQueue,
|
||||
paths_rasterization_pipeline_state: metal::RenderPipelineState,
|
||||
|
|
@ -186,6 +198,15 @@ impl MetalRenderer {
|
|||
output
|
||||
}
|
||||
|
||||
// Shared memory can be used only if CPU and GPU share the same memory space.
|
||||
// https://developer.apple.com/documentation/metal/setting-resource-storage-modes
|
||||
let is_unified_memory = device.has_unified_memory();
|
||||
// Apple GPU families support memoryless textures, which can significantly reduce
|
||||
// memory usage by keeping render targets in on-chip tile memory instead of
|
||||
// allocating backing store in system memory.
|
||||
// https://developer.apple.com/documentation/metal/mtlgpufamily
|
||||
let is_apple_gpu = device.supports_family(MTLGPUFamily::Apple1);
|
||||
|
||||
let unit_vertices = [
|
||||
to_float2_bits(point(0., 0.)),
|
||||
to_float2_bits(point(1., 0.)),
|
||||
|
|
@ -197,7 +218,12 @@ impl MetalRenderer {
|
|||
let unit_vertices = device.new_buffer_with_data(
|
||||
unit_vertices.as_ptr() as *const c_void,
|
||||
mem::size_of_val(&unit_vertices) as u64,
|
||||
MTLResourceOptions::StorageModeManaged,
|
||||
if is_unified_memory {
|
||||
MTLResourceOptions::StorageModeShared
|
||||
| MTLResourceOptions::CPUCacheModeWriteCombined
|
||||
} else {
|
||||
MTLResourceOptions::StorageModeManaged
|
||||
},
|
||||
);
|
||||
|
||||
let paths_rasterization_pipeline_state = build_path_rasterization_pipeline_state(
|
||||
|
|
@ -267,7 +293,7 @@ impl MetalRenderer {
|
|||
);
|
||||
|
||||
let command_queue = device.new_command_queue();
|
||||
let sprite_atlas = Arc::new(MetalAtlas::new(device.clone()));
|
||||
let sprite_atlas = Arc::new(MetalAtlas::new(device.clone(), is_apple_gpu));
|
||||
let core_video_texture_cache =
|
||||
CVMetalTextureCache::new(None, device.clone(), None).unwrap();
|
||||
|
||||
|
|
@ -275,6 +301,8 @@ impl MetalRenderer {
|
|||
device,
|
||||
layer,
|
||||
presents_with_transaction: false,
|
||||
is_apple_gpu,
|
||||
is_unified_memory,
|
||||
command_queue,
|
||||
paths_rasterization_pipeline_state,
|
||||
path_sprites_pipeline_state,
|
||||
|
|
@ -344,14 +372,23 @@ impl MetalRenderer {
|
|||
texture_descriptor.set_width(size.width.0 as u64);
|
||||
texture_descriptor.set_height(size.height.0 as u64);
|
||||
texture_descriptor.set_pixel_format(metal::MTLPixelFormat::BGRA8Unorm);
|
||||
texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
|
||||
texture_descriptor
|
||||
.set_usage(metal::MTLTextureUsage::RenderTarget | metal::MTLTextureUsage::ShaderRead);
|
||||
self.path_intermediate_texture = Some(self.device.new_texture(&texture_descriptor));
|
||||
|
||||
if self.path_sample_count > 1 {
|
||||
// https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus
|
||||
// Rendering MSAA textures are done in a single pass, so we can use memory-less storage on Apple Silicon
|
||||
let storage_mode = if self.is_apple_gpu {
|
||||
metal::MTLStorageMode::Memoryless
|
||||
} else {
|
||||
metal::MTLStorageMode::Private
|
||||
};
|
||||
|
||||
let msaa_descriptor = texture_descriptor;
|
||||
msaa_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample);
|
||||
msaa_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
|
||||
msaa_descriptor.set_storage_mode(storage_mode);
|
||||
msaa_descriptor.set_sample_count(self.path_sample_count as _);
|
||||
self.path_intermediate_msaa_texture = Some(self.device.new_texture(&msaa_descriptor));
|
||||
} else {
|
||||
|
|
@ -385,7 +422,10 @@ impl MetalRenderer {
|
|||
};
|
||||
|
||||
loop {
|
||||
let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
|
||||
let mut instance_buffer = self
|
||||
.instance_buffer_pool
|
||||
.lock()
|
||||
.acquire(&self.device, self.is_unified_memory);
|
||||
|
||||
let command_buffer =
|
||||
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
|
||||
|
|
@ -449,7 +489,10 @@ impl MetalRenderer {
|
|||
.ok_or_else(|| anyhow::anyhow!("Failed to get drawable for render_to_image"))?;
|
||||
|
||||
loop {
|
||||
let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
|
||||
let mut instance_buffer = self
|
||||
.instance_buffer_pool
|
||||
.lock()
|
||||
.acquire(&self.device, self.is_unified_memory);
|
||||
|
||||
let command_buffer =
|
||||
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
|
||||
|
|
@ -646,10 +689,14 @@ impl MetalRenderer {
|
|||
|
||||
command_encoder.end_encoding();
|
||||
|
||||
instance_buffer.metal_buffer.did_modify_range(NSRange {
|
||||
location: 0,
|
||||
length: instance_offset as NSUInteger,
|
||||
});
|
||||
if !self.is_unified_memory {
|
||||
// Sync the instance buffer to the GPU
|
||||
instance_buffer.metal_buffer.did_modify_range(NSRange {
|
||||
location: 0,
|
||||
length: instance_offset as NSUInteger,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(command_buffer.to_owned())
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue