Fix multibuffer chunk splitting at UTF-8 boundaries (#57641)

Don't panic when a diff transform boundary falls inside a multi-byte
UTF-8 character. Round split points up to the next char boundary and
advance past any transform boundaries skipped by that adjustment.

Closes FR-16

Release Notes:

- Fixed a crash when deleting words near inline diff boundaries
containing multi-byte characters
This commit is contained in:
Oleksiy Syvokon 2026-05-27 20:57:15 +03:00 committed by GitHub
parent 710228e3dd
commit 7ae24463e1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 51 additions and 4 deletions

View file

@ -7934,7 +7934,11 @@ impl<'a> Iterator for MultiBufferChunks<'a> {
if self.range.start >= self.range.end { if self.range.start >= self.range.end {
return None; return None;
} }
if self.range.start == self.diff_transforms.end().0 { while self
.diff_transforms
.item()
.is_some_and(|_| self.range.start >= self.diff_transforms.end().0)
{
self.diff_transforms.next(); self.diff_transforms.next();
} }
@ -7961,10 +7965,17 @@ impl<'a> Iterator for MultiBufferChunks<'a> {
let chunk_end = self.range.start + chunk.text.len(); let chunk_end = self.range.start + chunk.text.len();
let diff_transform_end = diff_transform_end.min(self.range.end); let diff_transform_end = diff_transform_end.min(self.range.end);
if diff_transform_end < chunk_end { let split_idx = if diff_transform_end < chunk_end {
let split_idx = diff_transform_end - self.range.start; chunk
.text
.ceil_char_boundary(diff_transform_end - self.range.start)
} else {
chunk.text.len()
};
if split_idx < chunk.text.len() {
let (before, after) = chunk.text.split_at(split_idx); let (before, after) = chunk.text.split_at(split_idx);
self.range.start = diff_transform_end; self.range.start += split_idx;
let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1); let mask = 1u128.unbounded_shl(split_idx as u32).wrapping_sub(1);
let chars = chunk.chars & mask; let chars = chunk.chars & mask;
let tabs = chunk.tabs & mask; let tabs = chunk.tabs & mask;

View file

@ -1527,6 +1527,42 @@ async fn test_basic_diff_hunks(cx: &mut TestAppContext) {
); );
} }
#[gpui::test]
fn test_text_for_range_with_diff_transform_boundary_inside_multibyte_character(cx: &mut App) {
let buffer = cx.new(|cx| Buffer::local("タx", cx));
let multibuffer = cx.new(|cx| MultiBuffer::singleton(buffer, cx));
let mut snapshot = multibuffer.read(cx).snapshot(cx);
fn ascii_summary_with_byte_len(byte_len: usize) -> MBTextSummary {
let text = "x".repeat(byte_len);
MBTextSummary::from(TextSummary::from(text.as_str()))
}
// FR-16 shown a diff transform boundary two bytes into the leading 'タ'.
// Build that transform tree directly so this test stays focused on chunk iteration.
let mut diff_transforms = SumTree::default();
diff_transforms.push(
DiffTransform::BufferContent {
summary: ascii_summary_with_byte_len(2),
inserted_hunk_info: None,
},
(),
);
diff_transforms.push(
DiffTransform::BufferContent {
summary: ascii_summary_with_byte_len("タx".len() - 2),
inserted_hunk_info: None,
},
(),
);
snapshot.diff_transforms = diff_transforms;
let text = snapshot
.text_for_range(MultiBufferOffset(0)..snapshot.len())
.collect::<String>();
assert_eq!(text, "タx");
}
#[gpui::test] #[gpui::test]
async fn test_repeatedly_expand_a_diff_hunk(cx: &mut TestAppContext) { async fn test_repeatedly_expand_a_diff_hunk(cx: &mut TestAppContext) {
let text = indoc!( let text = indoc!(