zed/crates/editor/src/rewrap.rs
Mikhail Pertsev b470b50d52
editor: Extract rewrap and config out of editor.rs (#55855)
cc @SomeoneToIgnore

## Summary

Follow-up to https://github.com/zed-industries/zed/discussions/55352,
where the conclusion was to split `editor.rs` incrementally by topic
instead of all at once.

This mechanically extracts editor config and reflow-related code into
`crates/editor/src/config.rs` and `crates/editor/src/rewrap.rs`, while
preserving existing behavior and keeping externally-used APIs public
where needed.

Self-Review Checklist:

- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

Release Notes:

- N/A
2026-05-06 11:57:36 +00:00

782 lines
31 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use super::*;
impl Editor {
pub fn rewrap(&mut self, options: RewrapOptions, cx: &mut Context<Self>) {
if self.read_only(cx) || self.mode.is_single_line() {
return;
}
let buffer = self.buffer.read(cx).snapshot(cx);
let selections = self.selections.all::<Point>(&self.display_snapshot(cx));
#[derive(Clone, Debug, PartialEq)]
enum CommentFormat {
/// single line comment, with prefix for line
Line(String),
/// single line within a block comment, with prefix for line
BlockLine(String),
/// a single line of a block comment that includes the initial delimiter
BlockCommentWithStart(BlockCommentConfig),
/// a single line of a block comment that includes the ending delimiter
BlockCommentWithEnd(BlockCommentConfig),
}
// Split selections to respect paragraph, indent, and comment prefix boundaries.
let wrap_ranges = selections.into_iter().flat_map(|selection| {
let language_settings = buffer.language_settings_at(selection.head(), cx);
let language_scope = buffer.language_scope_at(selection.head());
let indent_and_prefix_for_row =
|row: u32| -> (IndentSize, Option<CommentFormat>, Option<String>) {
let indent = buffer.indent_size_for_line(MultiBufferRow(row));
let (comment_prefix, rewrap_prefix) = if let Some(language_scope) =
&language_scope
{
let indent_end = Point::new(row, indent.len);
let line_end = Point::new(row, buffer.line_len(MultiBufferRow(row)));
let line_text_after_indent = buffer
.text_for_range(indent_end..line_end)
.collect::<String>();
let is_within_comment_override = buffer
.language_scope_at(indent_end)
.is_some_and(|scope| scope.override_name() == Some("comment"));
let comment_delimiters = if is_within_comment_override {
// we are within a comment syntax node, but we don't
// yet know what kind of comment: block, doc or line
match (
language_scope.documentation_comment(),
language_scope.block_comment(),
) {
(Some(config), _) | (_, Some(config))
if buffer.contains_str_at(indent_end, &config.start) =>
{
Some(CommentFormat::BlockCommentWithStart(config.clone()))
}
(Some(config), _) | (_, Some(config))
if line_text_after_indent.ends_with(config.end.as_ref()) =>
{
Some(CommentFormat::BlockCommentWithEnd(config.clone()))
}
(Some(config), _) | (_, Some(config))
if !config.prefix.is_empty()
&& buffer.contains_str_at(indent_end, &config.prefix) =>
{
Some(CommentFormat::BlockLine(config.prefix.to_string()))
}
(_, _) => language_scope
.line_comment_prefixes()
.iter()
.find(|prefix| buffer.contains_str_at(indent_end, prefix))
.map(|prefix| CommentFormat::Line(prefix.to_string())),
}
} else {
// we not in an overridden comment node, but we may
// be within a non-overridden line comment node
language_scope
.line_comment_prefixes()
.iter()
.find(|prefix| buffer.contains_str_at(indent_end, prefix))
.map(|prefix| CommentFormat::Line(prefix.to_string()))
};
let rewrap_prefix = language_scope
.rewrap_prefixes()
.iter()
.find_map(|prefix_regex| {
prefix_regex.find(&line_text_after_indent).map(|mat| {
if mat.start() == 0 {
Some(mat.as_str().to_string())
} else {
None
}
})
})
.flatten();
(comment_delimiters, rewrap_prefix)
} else {
(None, None)
};
(indent, comment_prefix, rewrap_prefix)
};
let mut start_row = selection.start.row;
let mut end_row = selection.end.row;
if selection.is_empty() {
let cursor_row = selection.start.row;
let (mut indent_size, comment_prefix, _) = indent_and_prefix_for_row(cursor_row);
let line_prefix = match &comment_prefix {
Some(CommentFormat::Line(prefix) | CommentFormat::BlockLine(prefix)) => {
Some(prefix.as_str())
}
Some(CommentFormat::BlockCommentWithEnd(BlockCommentConfig {
prefix, ..
})) => Some(prefix.as_ref()),
Some(CommentFormat::BlockCommentWithStart(BlockCommentConfig {
start: _,
end: _,
prefix,
tab_size,
})) => {
indent_size.len += tab_size;
Some(prefix.as_ref())
}
None => None,
};
let indent_prefix = indent_size.chars().collect::<String>();
let line_prefix = format!("{indent_prefix}{}", line_prefix.unwrap_or(""));
'expand_upwards: while start_row > 0 {
let prev_row = start_row - 1;
if buffer.contains_str_at(Point::new(prev_row, 0), &line_prefix)
&& buffer.line_len(MultiBufferRow(prev_row)) as usize > line_prefix.len()
&& !buffer.is_line_blank(MultiBufferRow(prev_row))
{
start_row = prev_row;
} else {
break 'expand_upwards;
}
}
'expand_downwards: while end_row < buffer.max_point().row {
let next_row = end_row + 1;
if buffer.contains_str_at(Point::new(next_row, 0), &line_prefix)
&& buffer.line_len(MultiBufferRow(next_row)) as usize > line_prefix.len()
&& !buffer.is_line_blank(MultiBufferRow(next_row))
{
end_row = next_row;
} else {
break 'expand_downwards;
}
}
}
let mut non_blank_rows_iter = (start_row..=end_row)
.filter(|row| !buffer.is_line_blank(MultiBufferRow(*row)))
.peekable();
let first_row = if let Some(&row) = non_blank_rows_iter.peek() {
row
} else {
return Vec::new();
};
let mut ranges = Vec::new();
let mut current_range_start = first_row;
let mut prev_row = first_row;
let (
mut current_range_indent,
mut current_range_comment_delimiters,
mut current_range_rewrap_prefix,
) = indent_and_prefix_for_row(first_row);
for row in non_blank_rows_iter.skip(1) {
let has_paragraph_break = row > prev_row + 1;
let (row_indent, row_comment_delimiters, row_rewrap_prefix) =
indent_and_prefix_for_row(row);
let has_indent_change = row_indent != current_range_indent;
let has_comment_change = row_comment_delimiters != current_range_comment_delimiters;
let has_boundary_change = has_comment_change
|| row_rewrap_prefix.is_some()
|| (has_indent_change && current_range_comment_delimiters.is_some());
if has_paragraph_break || has_boundary_change {
ranges.push((
language_settings.clone(),
Point::new(current_range_start, 0)
..Point::new(prev_row, buffer.line_len(MultiBufferRow(prev_row))),
current_range_indent,
current_range_comment_delimiters.clone(),
current_range_rewrap_prefix.clone(),
));
current_range_start = row;
current_range_indent = row_indent;
current_range_comment_delimiters = row_comment_delimiters;
current_range_rewrap_prefix = row_rewrap_prefix;
}
prev_row = row;
}
ranges.push((
language_settings.clone(),
Point::new(current_range_start, 0)
..Point::new(prev_row, buffer.line_len(MultiBufferRow(prev_row))),
current_range_indent,
current_range_comment_delimiters,
current_range_rewrap_prefix,
));
ranges
});
let mut edits = Vec::new();
let mut rewrapped_row_ranges = Vec::<RangeInclusive<u32>>::new();
for (language_settings, wrap_range, mut indent_size, comment_prefix, rewrap_prefix) in
wrap_ranges
{
let start_row = wrap_range.start.row;
let end_row = wrap_range.end.row;
// Skip selections that overlap with a range that has already been rewrapped.
let selection_range = start_row..end_row;
if rewrapped_row_ranges
.iter()
.any(|range| range.overlaps(&selection_range))
{
continue;
}
let tab_size = language_settings.tab_size;
let (line_prefix, inside_comment) = match &comment_prefix {
Some(CommentFormat::Line(prefix) | CommentFormat::BlockLine(prefix)) => {
(Some(prefix.as_str()), true)
}
Some(CommentFormat::BlockCommentWithEnd(BlockCommentConfig { prefix, .. })) => {
(Some(prefix.as_ref()), true)
}
Some(CommentFormat::BlockCommentWithStart(BlockCommentConfig {
start: _,
end: _,
prefix,
tab_size,
})) => {
indent_size.len += tab_size;
(Some(prefix.as_ref()), true)
}
None => (None, false),
};
let indent_prefix = indent_size.chars().collect::<String>();
let line_prefix = format!("{indent_prefix}{}", line_prefix.unwrap_or(""));
let allow_rewrap_based_on_language = match language_settings.allow_rewrap {
RewrapBehavior::InComments => inside_comment,
RewrapBehavior::InSelections => !wrap_range.is_empty(),
RewrapBehavior::Anywhere => true,
};
let should_rewrap = options.override_language_settings
|| allow_rewrap_based_on_language
|| self.hard_wrap.is_some();
if !should_rewrap {
continue;
}
let start = Point::new(start_row, 0);
let start_offset = ToOffset::to_offset(&start, &buffer);
let end = Point::new(end_row, buffer.line_len(MultiBufferRow(end_row)));
let selection_text = buffer.text_for_range(start..end).collect::<String>();
let mut first_line_delimiter = None;
let mut last_line_delimiter = None;
let Some(lines_without_prefixes) = selection_text
.lines()
.enumerate()
.map(|(ix, line)| {
let line_trimmed = line.trim_start();
if rewrap_prefix.is_some() && ix > 0 {
Ok(line_trimmed)
} else if let Some(
CommentFormat::BlockCommentWithStart(BlockCommentConfig {
start,
prefix,
end,
tab_size,
})
| CommentFormat::BlockCommentWithEnd(BlockCommentConfig {
start,
prefix,
end,
tab_size,
}),
) = &comment_prefix
{
let line_trimmed = line_trimmed
.strip_prefix(start.as_ref())
.map(|s| {
let mut indent_size = indent_size;
indent_size.len -= tab_size;
let indent_prefix: String = indent_size.chars().collect();
first_line_delimiter = Some((indent_prefix, start));
s.trim_start()
})
.unwrap_or(line_trimmed);
let line_trimmed = line_trimmed
.strip_suffix(end.as_ref())
.map(|s| {
last_line_delimiter = Some(end);
s.trim_end()
})
.unwrap_or(line_trimmed);
let line_trimmed = line_trimmed
.strip_prefix(prefix.as_ref())
.unwrap_or(line_trimmed);
Ok(line_trimmed)
} else if let Some(CommentFormat::BlockLine(prefix)) = &comment_prefix {
line_trimmed.strip_prefix(prefix).with_context(|| {
format!("line did not start with prefix {prefix:?}: {line:?}")
})
} else {
line_trimmed
.strip_prefix(&line_prefix.trim_start())
.with_context(|| {
format!("line did not start with prefix {line_prefix:?}: {line:?}")
})
}
})
.collect::<Result<Vec<_>, _>>()
.log_err()
else {
continue;
};
let wrap_column = options.line_length.or(self.hard_wrap).unwrap_or_else(|| {
buffer
.language_settings_at(Point::new(start_row, 0), cx)
.preferred_line_length as usize
});
let subsequent_lines_prefix = if let Some(rewrap_prefix_str) = &rewrap_prefix {
format!("{}{}", indent_prefix, " ".repeat(rewrap_prefix_str.len()))
} else {
line_prefix.clone()
};
let wrapped_text = {
let mut wrapped_text = wrap_with_prefix(
line_prefix,
subsequent_lines_prefix,
lines_without_prefixes.join("\n"),
wrap_column,
tab_size,
options.preserve_existing_whitespace,
);
if let Some((indent, delimiter)) = first_line_delimiter {
wrapped_text = format!("{indent}{delimiter}\n{wrapped_text}");
}
if let Some(last_line) = last_line_delimiter {
wrapped_text = format!("{wrapped_text}\n{indent_prefix}{last_line}");
}
wrapped_text
};
// TODO: should always use char-based diff while still supporting cursor behavior that
// matches vim.
let mut diff_options = DiffOptions::default();
if options.override_language_settings {
diff_options.max_word_diff_len = 0;
diff_options.max_word_diff_line_count = 0;
} else {
diff_options.max_word_diff_len = usize::MAX;
diff_options.max_word_diff_line_count = usize::MAX;
}
for (old_range, new_text) in
text_diff_with_options(&selection_text, &wrapped_text, diff_options)
{
let edit_start = buffer.anchor_after(start_offset + old_range.start);
let edit_end = buffer.anchor_after(start_offset + old_range.end);
edits.push((edit_start..edit_end, new_text));
}
rewrapped_row_ranges.push(start_row..=end_row);
}
self.buffer
.update(cx, |buffer, cx| buffer.edit(edits, None, cx));
}
}
fn char_len_with_expanded_tabs(offset: usize, text: &str, tab_size: NonZeroU32) -> usize {
let tab_size = tab_size.get() as usize;
let mut width = offset;
for ch in text.chars() {
width += if ch == '\t' {
tab_size - (width % tab_size)
} else {
1
};
}
width - offset
}
/// Tokenizes a string into runs of text that should stick together, or that is whitespace.
struct WordBreakingTokenizer<'a> {
input: &'a str,
}
impl<'a> WordBreakingTokenizer<'a> {
fn new(input: &'a str) -> Self {
Self { input }
}
}
fn is_char_ideographic(ch: char) -> bool {
use unicode_script::Script::*;
use unicode_script::UnicodeScript;
matches!(ch.script(), Han | Tangut | Yi)
}
fn is_grapheme_ideographic(text: &str) -> bool {
text.chars().any(is_char_ideographic)
}
fn is_grapheme_whitespace(text: &str) -> bool {
text.chars().any(|x| x.is_whitespace())
}
fn should_stay_with_preceding_ideograph(text: &str) -> bool {
text.chars()
.next()
.is_some_and(|ch| matches!(ch, '。' | '、' | '' | '' | '' | '' | '' | '…'))
}
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
enum WordBreakToken<'a> {
Word { token: &'a str, grapheme_len: usize },
InlineWhitespace { token: &'a str, grapheme_len: usize },
Newline,
}
impl<'a> Iterator for WordBreakingTokenizer<'a> {
/// Yields a span, the count of graphemes in the token, and whether it was
/// whitespace. Note that it also breaks at word boundaries.
type Item = WordBreakToken<'a>;
fn next(&mut self) -> Option<Self::Item> {
use unicode_segmentation::UnicodeSegmentation;
if self.input.is_empty() {
return None;
}
let mut iter = self.input.graphemes(true).peekable();
let mut offset = 0;
let mut grapheme_len = 0;
if let Some(first_grapheme) = iter.next() {
let is_newline = first_grapheme == "\n";
let is_whitespace = is_grapheme_whitespace(first_grapheme);
offset += first_grapheme.len();
grapheme_len += 1;
if is_grapheme_ideographic(first_grapheme) && !is_whitespace {
if let Some(grapheme) = iter.peek().copied()
&& should_stay_with_preceding_ideograph(grapheme)
{
offset += grapheme.len();
grapheme_len += 1;
}
} else {
let mut words = self.input[offset..].split_word_bound_indices().peekable();
let mut next_word_bound = words.peek().copied();
if next_word_bound.is_some_and(|(i, _)| i == 0) {
next_word_bound = words.next();
}
while let Some(grapheme) = iter.peek().copied() {
if next_word_bound.is_some_and(|(i, _)| i == offset) {
break;
};
if is_grapheme_whitespace(grapheme) != is_whitespace
|| (grapheme == "\n") != is_newline
{
break;
};
offset += grapheme.len();
grapheme_len += 1;
iter.next();
}
}
let token = &self.input[..offset];
self.input = &self.input[offset..];
if token == "\n" {
Some(WordBreakToken::Newline)
} else if is_whitespace {
Some(WordBreakToken::InlineWhitespace {
token,
grapheme_len,
})
} else {
Some(WordBreakToken::Word {
token,
grapheme_len,
})
}
} else {
None
}
}
}
fn wrap_with_prefix(
first_line_prefix: String,
subsequent_lines_prefix: String,
unwrapped_text: String,
wrap_column: usize,
tab_size: NonZeroU32,
preserve_existing_whitespace: bool,
) -> String {
let first_line_prefix_len = char_len_with_expanded_tabs(0, &first_line_prefix, tab_size);
let subsequent_lines_prefix_len =
char_len_with_expanded_tabs(0, &subsequent_lines_prefix, tab_size);
let mut wrapped_text = String::new();
let mut current_line = first_line_prefix;
let mut is_first_line = true;
let tokenizer = WordBreakingTokenizer::new(&unwrapped_text);
let mut current_line_len = first_line_prefix_len;
let mut in_whitespace = false;
for token in tokenizer {
let have_preceding_whitespace = in_whitespace;
match token {
WordBreakToken::Word {
token,
grapheme_len,
} => {
in_whitespace = false;
let current_prefix_len = if is_first_line {
first_line_prefix_len
} else {
subsequent_lines_prefix_len
};
if current_line_len + grapheme_len > wrap_column
&& current_line_len != current_prefix_len
{
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
is_first_line = false;
current_line = subsequent_lines_prefix.clone();
current_line_len = subsequent_lines_prefix_len;
}
current_line.push_str(token);
current_line_len += grapheme_len;
}
WordBreakToken::InlineWhitespace {
mut token,
mut grapheme_len,
} => {
in_whitespace = true;
if have_preceding_whitespace && !preserve_existing_whitespace {
continue;
}
if !preserve_existing_whitespace {
// Keep a single whitespace grapheme as-is
if let Some(first) =
unicode_segmentation::UnicodeSegmentation::graphemes(token, true).next()
{
token = first;
} else {
token = " ";
}
grapheme_len = 1;
}
let current_prefix_len = if is_first_line {
first_line_prefix_len
} else {
subsequent_lines_prefix_len
};
if current_line_len + grapheme_len > wrap_column {
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
is_first_line = false;
current_line = subsequent_lines_prefix.clone();
current_line_len = subsequent_lines_prefix_len;
} else if current_line_len != current_prefix_len || preserve_existing_whitespace {
current_line.push_str(token);
current_line_len += grapheme_len;
}
}
WordBreakToken::Newline => {
in_whitespace = true;
let current_prefix_len = if is_first_line {
first_line_prefix_len
} else {
subsequent_lines_prefix_len
};
if preserve_existing_whitespace {
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
is_first_line = false;
current_line = subsequent_lines_prefix.clone();
current_line_len = subsequent_lines_prefix_len;
} else if have_preceding_whitespace {
continue;
} else if current_line_len + 1 > wrap_column
&& current_line_len != current_prefix_len
{
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
is_first_line = false;
current_line = subsequent_lines_prefix.clone();
current_line_len = subsequent_lines_prefix_len;
} else if current_line_len != current_prefix_len {
current_line.push(' ');
current_line_len += 1;
}
}
}
}
if !current_line.is_empty() {
wrapped_text.push_str(&current_line);
}
wrapped_text
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_size_with_expanded_tabs() {
let nz = |val| NonZeroU32::new(val).unwrap();
assert_eq!(char_len_with_expanded_tabs(0, "", nz(4)), 0);
assert_eq!(char_len_with_expanded_tabs(0, "hello", nz(4)), 5);
assert_eq!(char_len_with_expanded_tabs(0, "\thello", nz(4)), 9);
assert_eq!(char_len_with_expanded_tabs(0, "abc\tab", nz(4)), 6);
assert_eq!(char_len_with_expanded_tabs(0, "hello\t", nz(4)), 8);
assert_eq!(char_len_with_expanded_tabs(0, "\t\t", nz(8)), 16);
assert_eq!(char_len_with_expanded_tabs(0, "x\t", nz(8)), 8);
assert_eq!(char_len_with_expanded_tabs(7, "x\t", nz(8)), 9);
}
#[test]
fn test_word_breaking_tokenizer() {
let tests: &[(&str, &[WordBreakToken<'static>])] = &[
("", &[]),
(" ", &[whitespace(" ", 2)]),
("Ʒ", &[word("Ʒ", 1)]),
("Ǽ", &[word("Ǽ", 1)]),
("", &[word("", 1)]),
("⋑⋑", &[word("⋑⋑", 2)]),
(
"原理,进而",
&[word("", 1), word("理,", 2), word("", 1), word("", 1)],
),
(
"hello world",
&[word("hello", 5), whitespace(" ", 1), word("world", 5)],
),
(
"hello, world",
&[word("hello,", 6), whitespace(" ", 1), word("world", 5)],
),
(
" hello world",
&[
whitespace(" ", 2),
word("hello", 5),
whitespace(" ", 1),
word("world", 5),
],
),
(
"这是什么 \n 钢笔",
&[
word("", 1),
word("", 1),
word("", 1),
word("", 1),
whitespace(" ", 1),
newline(),
whitespace(" ", 1),
word("", 1),
word("", 1),
],
),
("mutton", &[whitespace("", 1), word("mutton", 6)]),
];
fn word(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
WordBreakToken::Word {
token,
grapheme_len,
}
}
fn whitespace(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
WordBreakToken::InlineWhitespace {
token,
grapheme_len,
}
}
fn newline() -> WordBreakToken<'static> {
WordBreakToken::Newline
}
for (input, result) in tests {
assert_eq!(
WordBreakingTokenizer::new(input)
.collect::<Vec<_>>()
.as_slice(),
*result,
);
}
}
#[test]
fn test_wrap_with_prefix() {
assert_eq!(
wrap_with_prefix(
"# ".to_string(),
"# ".to_string(),
"abcdefg".to_string(),
4,
NonZeroU32::new(4).unwrap(),
false,
),
"# abcdefg"
);
assert_eq!(
wrap_with_prefix(
"".to_string(),
"".to_string(),
"\thello world".to_string(),
8,
NonZeroU32::new(4).unwrap(),
false,
),
"hello\nworld"
);
assert_eq!(
wrap_with_prefix(
"// ".to_string(),
"// ".to_string(),
"xx \nyy zz aa bb cc".to_string(),
12,
NonZeroU32::new(4).unwrap(),
false,
),
"// xx yy zz\n// aa bb cc"
);
assert_eq!(
wrap_with_prefix(
String::new(),
String::new(),
"这是什么 \n 钢笔".to_string(),
3,
NonZeroU32::new(4).unwrap(),
false,
),
"这是什\n么 钢\n"
);
assert_eq!(
wrap_with_prefix(
String::new(),
String::new(),
format!("foo{}bar", '\u{2009}'), // thin space
80,
NonZeroU32::new(4).unwrap(),
false,
),
format!("foo{}bar", '\u{2009}')
);
}
}