mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
zeta: Allow the server to select the editable and context ranges more flexibly (#50975)
Release Notes: - N/A --------- Co-authored-by: Ben Kunkle <ben@zed.dev>
This commit is contained in:
parent
cb8088049e
commit
2bd5c21855
16 changed files with 756 additions and 522 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -3202,6 +3202,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"text",
|
||||
"zeta_prompt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -22,5 +22,6 @@ log.workspace = true
|
|||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
text.workspace = true
|
||||
zeta_prompt.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use gpui::{App, AppContext as _, Context, Entity, Global, SharedString, Task};
|
|||
use http_client::HttpClient;
|
||||
use icons::IconName;
|
||||
use language::{
|
||||
Anchor, Buffer, BufferSnapshot, EditPreview, ToPoint, language_settings::all_language_settings,
|
||||
Anchor, Buffer, BufferSnapshot, EditPreview, language_settings::all_language_settings,
|
||||
};
|
||||
use language_model::{ApiKeyState, AuthenticateError, EnvVar, env_var};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -18,7 +18,7 @@ use std::{
|
|||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use text::{OffsetRangeExt as _, ToOffset};
|
||||
use text::ToOffset;
|
||||
|
||||
pub const CODESTRAL_API_URL: &str = "https://codestral.mistral.ai";
|
||||
pub const DEBOUNCE_TIMEOUT: Duration = Duration::from_millis(150);
|
||||
|
|
@ -259,28 +259,31 @@ impl EditPredictionDelegate for CodestralEditPredictionDelegate {
|
|||
}
|
||||
|
||||
let cursor_offset = cursor_position.to_offset(&snapshot);
|
||||
let cursor_point = cursor_offset.to_point(&snapshot);
|
||||
|
||||
const MAX_EDITABLE_TOKENS: usize = 350;
|
||||
const MAX_CONTEXT_TOKENS: usize = 150;
|
||||
const MAX_REWRITE_TOKENS: usize = 350;
|
||||
|
||||
let (_, context_range) =
|
||||
cursor_excerpt::editable_and_context_ranges_for_cursor_position(
|
||||
cursor_point,
|
||||
&snapshot,
|
||||
MAX_REWRITE_TOKENS,
|
||||
MAX_CONTEXT_TOKENS,
|
||||
);
|
||||
|
||||
let context_range = context_range.to_offset(&snapshot);
|
||||
let excerpt_text = snapshot
|
||||
.text_for_range(context_range.clone())
|
||||
.collect::<String>();
|
||||
let cursor_within_excerpt = cursor_offset
|
||||
let (excerpt_point_range, excerpt_offset_range, cursor_offset_in_excerpt) =
|
||||
cursor_excerpt::compute_cursor_excerpt(&snapshot, cursor_offset);
|
||||
let syntax_ranges = cursor_excerpt::compute_syntax_ranges(
|
||||
&snapshot,
|
||||
cursor_offset,
|
||||
&excerpt_offset_range,
|
||||
);
|
||||
let excerpt_text: String = snapshot.text_for_range(excerpt_point_range).collect();
|
||||
let (_, context_range) = zeta_prompt::compute_editable_and_context_ranges(
|
||||
&excerpt_text,
|
||||
cursor_offset_in_excerpt,
|
||||
&syntax_ranges,
|
||||
MAX_EDITABLE_TOKENS,
|
||||
MAX_CONTEXT_TOKENS,
|
||||
);
|
||||
let context_text = &excerpt_text[context_range.clone()];
|
||||
let cursor_within_excerpt = cursor_offset_in_excerpt
|
||||
.saturating_sub(context_range.start)
|
||||
.min(excerpt_text.len());
|
||||
let prompt = excerpt_text[..cursor_within_excerpt].to_string();
|
||||
let suffix = excerpt_text[cursor_within_excerpt..].to_string();
|
||||
.min(context_text.len());
|
||||
let prompt = context_text[..cursor_within_excerpt].to_string();
|
||||
let suffix = context_text[cursor_within_excerpt..].to_string();
|
||||
|
||||
let completion_text = match Self::fetch_completion(
|
||||
http_client,
|
||||
|
|
|
|||
|
|
@ -1,12 +1,9 @@
|
|||
use crate::{
|
||||
StoredEvent, cursor_excerpt::editable_and_context_ranges_for_cursor_position,
|
||||
example_spec::ExampleSpec,
|
||||
};
|
||||
use crate::{StoredEvent, example_spec::ExampleSpec};
|
||||
use anyhow::Result;
|
||||
use buffer_diff::BufferDiffSnapshot;
|
||||
use collections::HashMap;
|
||||
use gpui::{App, Entity, Task};
|
||||
use language::{Buffer, ToPoint as _};
|
||||
use language::Buffer;
|
||||
use project::{Project, WorktreeId};
|
||||
use std::{collections::hash_map, fmt::Write as _, ops::Range, path::Path, sync::Arc};
|
||||
use text::{BufferSnapshot as TextBufferSnapshot, Point};
|
||||
|
|
@ -157,17 +154,34 @@ fn compute_cursor_excerpt(
|
|||
cursor_anchor: language::Anchor,
|
||||
) -> (String, usize, Range<Point>) {
|
||||
use text::ToOffset as _;
|
||||
use text::ToPoint as _;
|
||||
|
||||
let cursor_point = cursor_anchor.to_point(snapshot);
|
||||
let (_editable_range, context_range) =
|
||||
editable_and_context_ranges_for_cursor_position(cursor_point, snapshot, 100, 50);
|
||||
let context_start_offset = context_range.start.to_offset(snapshot);
|
||||
let cursor_offset = cursor_anchor.to_offset(snapshot);
|
||||
let cursor_offset_in_excerpt = cursor_offset.saturating_sub(context_start_offset);
|
||||
let excerpt = snapshot
|
||||
.text_for_range(context_range.clone())
|
||||
.collect::<String>();
|
||||
(excerpt, cursor_offset_in_excerpt, context_range)
|
||||
let (excerpt_point_range, excerpt_offset_range, cursor_offset_in_excerpt) =
|
||||
crate::cursor_excerpt::compute_cursor_excerpt(snapshot, cursor_offset);
|
||||
let syntax_ranges = crate::cursor_excerpt::compute_syntax_ranges(
|
||||
snapshot,
|
||||
cursor_offset,
|
||||
&excerpt_offset_range,
|
||||
);
|
||||
let excerpt_text: String = snapshot.text_for_range(excerpt_point_range).collect();
|
||||
let (_, context_range) = zeta_prompt::compute_editable_and_context_ranges(
|
||||
&excerpt_text,
|
||||
cursor_offset_in_excerpt,
|
||||
&syntax_ranges,
|
||||
100,
|
||||
50,
|
||||
);
|
||||
let context_text = excerpt_text[context_range.clone()].to_string();
|
||||
let cursor_in_context = cursor_offset_in_excerpt.saturating_sub(context_range.start);
|
||||
let context_buffer_start =
|
||||
(excerpt_offset_range.start + context_range.start).to_point(snapshot);
|
||||
let context_buffer_end = (excerpt_offset_range.start + context_range.end).to_point(snapshot);
|
||||
(
|
||||
context_text,
|
||||
cursor_in_context,
|
||||
context_buffer_start..context_buffer_end,
|
||||
)
|
||||
}
|
||||
|
||||
async fn collect_snapshots(
|
||||
|
|
|
|||
|
|
@ -1,150 +1,30 @@
|
|||
use language::{BufferSnapshot, Point};
|
||||
use language::{BufferSnapshot, Point, ToPoint as _};
|
||||
use std::ops::Range;
|
||||
use text::OffsetRangeExt as _;
|
||||
use zeta_prompt::ExcerptRanges;
|
||||
|
||||
/// Computes all range variants for a cursor position: editable ranges at 150, 180, and 350
|
||||
/// token budgets, plus their corresponding context expansions. Returns the full excerpt range
|
||||
/// (union of all context ranges) and the individual sub-ranges as Points.
|
||||
pub fn compute_excerpt_ranges(
|
||||
position: Point,
|
||||
const CURSOR_EXCERPT_TOKEN_BUDGET: usize = 8192;
|
||||
|
||||
/// Computes a cursor excerpt as the largest linewise symmetric region around
|
||||
/// the cursor that fits within an 8192-token budget. Returns the point range,
|
||||
/// byte offset range, and the cursor offset relative to the excerpt start.
|
||||
pub fn compute_cursor_excerpt(
|
||||
snapshot: &BufferSnapshot,
|
||||
) -> (Range<Point>, Range<usize>, ExcerptRanges) {
|
||||
let editable_150 = compute_editable_range(snapshot, position, 150);
|
||||
let editable_180 = compute_editable_range(snapshot, position, 180);
|
||||
let editable_350 = compute_editable_range(snapshot, position, 350);
|
||||
let editable_512 = compute_editable_range(snapshot, position, 512);
|
||||
cursor_offset: usize,
|
||||
) -> (Range<Point>, Range<usize>, usize) {
|
||||
let cursor_point = cursor_offset.to_point(snapshot);
|
||||
let cursor_row = cursor_point.row;
|
||||
let (start_row, end_row, _) =
|
||||
expand_symmetric_from_cursor(snapshot, cursor_row, CURSOR_EXCERPT_TOKEN_BUDGET);
|
||||
|
||||
let editable_150_context_350 =
|
||||
expand_context_syntactically_then_linewise(snapshot, editable_150.clone(), 350);
|
||||
let editable_180_context_350 =
|
||||
expand_context_syntactically_then_linewise(snapshot, editable_180.clone(), 350);
|
||||
let editable_350_context_150 =
|
||||
expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 150);
|
||||
let editable_350_context_512 =
|
||||
expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 512);
|
||||
let editable_350_context_1024 =
|
||||
expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 1024);
|
||||
let context_4096 = expand_context_syntactically_then_linewise(
|
||||
snapshot,
|
||||
editable_350_context_1024.clone(),
|
||||
4096 - 1024,
|
||||
);
|
||||
let context_8192 =
|
||||
expand_context_syntactically_then_linewise(snapshot, context_4096.clone(), 8192 - 4096);
|
||||
let excerpt_range = Point::new(start_row, 0)..Point::new(end_row, snapshot.line_len(end_row));
|
||||
let excerpt_offset_range = excerpt_range.to_offset(snapshot);
|
||||
let cursor_offset_in_excerpt = cursor_offset - excerpt_offset_range.start;
|
||||
|
||||
let full_start_row = context_8192.start.row;
|
||||
let full_end_row = context_8192.end.row;
|
||||
|
||||
let full_context =
|
||||
Point::new(full_start_row, 0)..Point::new(full_end_row, snapshot.line_len(full_end_row));
|
||||
|
||||
let full_context_offset_range = full_context.to_offset(snapshot);
|
||||
|
||||
let to_offset = |range: &Range<Point>| -> Range<usize> {
|
||||
let start = range.start.to_offset(snapshot);
|
||||
let end = range.end.to_offset(snapshot);
|
||||
(start - full_context_offset_range.start)..(end - full_context_offset_range.start)
|
||||
};
|
||||
|
||||
let ranges = ExcerptRanges {
|
||||
editable_150: to_offset(&editable_150),
|
||||
editable_180: to_offset(&editable_180),
|
||||
editable_350: to_offset(&editable_350),
|
||||
editable_512: Some(to_offset(&editable_512)),
|
||||
editable_150_context_350: to_offset(&editable_150_context_350),
|
||||
editable_180_context_350: to_offset(&editable_180_context_350),
|
||||
editable_350_context_150: to_offset(&editable_350_context_150),
|
||||
editable_350_context_512: Some(to_offset(&editable_350_context_512)),
|
||||
editable_350_context_1024: Some(to_offset(&editable_350_context_1024)),
|
||||
context_4096: Some(to_offset(&context_4096)),
|
||||
context_8192: Some(to_offset(&context_8192)),
|
||||
};
|
||||
|
||||
(full_context, full_context_offset_range, ranges)
|
||||
}
|
||||
|
||||
pub fn editable_and_context_ranges_for_cursor_position(
|
||||
position: Point,
|
||||
snapshot: &BufferSnapshot,
|
||||
editable_region_token_limit: usize,
|
||||
context_token_limit: usize,
|
||||
) -> (Range<Point>, Range<Point>) {
|
||||
let editable_range = compute_editable_range(snapshot, position, editable_region_token_limit);
|
||||
|
||||
let context_range = expand_context_syntactically_then_linewise(
|
||||
snapshot,
|
||||
editable_range.clone(),
|
||||
context_token_limit,
|
||||
);
|
||||
|
||||
(editable_range, context_range)
|
||||
}
|
||||
|
||||
/// Computes the editable range using a three-phase approach:
|
||||
/// 1. Expand symmetrically from cursor (75% of budget)
|
||||
/// 2. Expand to syntax boundaries
|
||||
/// 3. Continue line-wise in the least-expanded direction
|
||||
fn compute_editable_range(
|
||||
snapshot: &BufferSnapshot,
|
||||
cursor: Point,
|
||||
token_limit: usize,
|
||||
) -> Range<Point> {
|
||||
// Phase 1: Expand symmetrically from cursor using 75% of budget.
|
||||
let initial_budget = (token_limit * 3) / 4;
|
||||
let (mut start_row, mut end_row, mut remaining_tokens) =
|
||||
expand_symmetric_from_cursor(snapshot, cursor.row, initial_budget);
|
||||
|
||||
// Add remaining budget from phase 1.
|
||||
remaining_tokens += token_limit.saturating_sub(initial_budget);
|
||||
|
||||
let original_start = start_row;
|
||||
let original_end = end_row;
|
||||
|
||||
// Phase 2: Expand to syntax boundaries that fit within budget.
|
||||
for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
|
||||
{
|
||||
let tokens_for_start = if boundary_start < start_row {
|
||||
estimate_tokens_for_rows(snapshot, boundary_start, start_row)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let tokens_for_end = if boundary_end > end_row {
|
||||
estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let total_needed = tokens_for_start + tokens_for_end;
|
||||
|
||||
if total_needed <= remaining_tokens {
|
||||
if boundary_start < start_row {
|
||||
start_row = boundary_start;
|
||||
}
|
||||
if boundary_end > end_row {
|
||||
end_row = boundary_end;
|
||||
}
|
||||
remaining_tokens = remaining_tokens.saturating_sub(total_needed);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: Continue line-wise in the direction we expanded least during syntax phase.
|
||||
let expanded_up = original_start.saturating_sub(start_row);
|
||||
let expanded_down = end_row.saturating_sub(original_end);
|
||||
|
||||
(start_row, end_row, _) = expand_linewise_biased(
|
||||
snapshot,
|
||||
start_row,
|
||||
end_row,
|
||||
remaining_tokens,
|
||||
expanded_up <= expanded_down, // prefer_up if we expanded less upward
|
||||
);
|
||||
|
||||
let start = Point::new(start_row, 0);
|
||||
let end = Point::new(end_row, snapshot.line_len(end_row));
|
||||
start..end
|
||||
(
|
||||
excerpt_range,
|
||||
excerpt_offset_range,
|
||||
cursor_offset_in_excerpt,
|
||||
)
|
||||
}
|
||||
|
||||
/// Expands symmetrically from cursor, one line at a time, alternating down then up.
|
||||
|
|
@ -157,7 +37,6 @@ fn expand_symmetric_from_cursor(
|
|||
let mut start_row = cursor_row;
|
||||
let mut end_row = cursor_row;
|
||||
|
||||
// Account for the cursor's line.
|
||||
let cursor_line_tokens = line_token_count(snapshot, cursor_row);
|
||||
token_budget = token_budget.saturating_sub(cursor_line_tokens);
|
||||
|
||||
|
|
@ -169,7 +48,6 @@ fn expand_symmetric_from_cursor(
|
|||
break;
|
||||
}
|
||||
|
||||
// Expand down first (slight forward bias for edit prediction).
|
||||
if can_expand_down {
|
||||
let next_row = end_row + 1;
|
||||
let line_tokens = line_token_count(snapshot, next_row);
|
||||
|
|
@ -181,7 +59,6 @@ fn expand_symmetric_from_cursor(
|
|||
}
|
||||
}
|
||||
|
||||
// Then expand up.
|
||||
if can_expand_up && token_budget > 0 {
|
||||
let next_row = start_row - 1;
|
||||
let line_tokens = line_token_count(snapshot, next_row);
|
||||
|
|
@ -197,74 +74,6 @@ fn expand_symmetric_from_cursor(
|
|||
(start_row, end_row, token_budget)
|
||||
}
|
||||
|
||||
/// Expands line-wise with a bias toward one direction.
|
||||
/// Returns (start_row, end_row, remaining_tokens).
|
||||
fn expand_linewise_biased(
|
||||
snapshot: &BufferSnapshot,
|
||||
mut start_row: u32,
|
||||
mut end_row: u32,
|
||||
mut remaining_tokens: usize,
|
||||
prefer_up: bool,
|
||||
) -> (u32, u32, usize) {
|
||||
loop {
|
||||
let can_expand_up = start_row > 0;
|
||||
let can_expand_down = end_row < snapshot.max_point().row;
|
||||
|
||||
if remaining_tokens == 0 || (!can_expand_up && !can_expand_down) {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut expanded = false;
|
||||
|
||||
// Try preferred direction first.
|
||||
if prefer_up {
|
||||
if can_expand_up {
|
||||
let next_row = start_row - 1;
|
||||
let line_tokens = line_token_count(snapshot, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
start_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
if can_expand_down && remaining_tokens > 0 {
|
||||
let next_row = end_row + 1;
|
||||
let line_tokens = line_token_count(snapshot, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
end_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if can_expand_down {
|
||||
let next_row = end_row + 1;
|
||||
let line_tokens = line_token_count(snapshot, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
end_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
if can_expand_up && remaining_tokens > 0 {
|
||||
let next_row = start_row - 1;
|
||||
let line_tokens = line_token_count(snapshot, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
start_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !expanded {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(start_row, end_row, remaining_tokens)
|
||||
}
|
||||
|
||||
/// Typical number of string bytes per token for the purposes of limiting model input. This is
|
||||
/// intentionally low to err on the side of underestimating limits.
|
||||
pub(crate) const BYTES_PER_TOKEN_GUESS: usize = 3;
|
||||
|
|
@ -277,113 +86,50 @@ fn line_token_count(snapshot: &BufferSnapshot, row: u32) -> usize {
|
|||
guess_token_count(snapshot.line_len(row) as usize).max(1)
|
||||
}
|
||||
|
||||
/// Estimates token count for rows in range [start_row, end_row).
|
||||
fn estimate_tokens_for_rows(snapshot: &BufferSnapshot, start_row: u32, end_row: u32) -> usize {
|
||||
let mut tokens = 0;
|
||||
for row in start_row..end_row {
|
||||
tokens += line_token_count(snapshot, row);
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
/// Returns an iterator of (start_row, end_row) for successively larger syntax nodes
|
||||
/// containing the given row range. Smallest containing node first.
|
||||
fn containing_syntax_boundaries(
|
||||
/// Computes the byte offset ranges of all syntax nodes containing the cursor,
|
||||
/// ordered from innermost to outermost. The offsets are relative to
|
||||
/// `excerpt_offset_range.start`.
|
||||
pub fn compute_syntax_ranges(
|
||||
snapshot: &BufferSnapshot,
|
||||
start_row: u32,
|
||||
end_row: u32,
|
||||
) -> impl Iterator<Item = (u32, u32)> {
|
||||
let range = Point::new(start_row, 0)..Point::new(end_row, snapshot.line_len(end_row));
|
||||
cursor_offset: usize,
|
||||
excerpt_offset_range: &Range<usize>,
|
||||
) -> Vec<Range<usize>> {
|
||||
let cursor_point = cursor_offset.to_point(snapshot);
|
||||
let range = cursor_point..cursor_point;
|
||||
let mut current = snapshot.syntax_ancestor(range);
|
||||
let mut last_rows: Option<(u32, u32)> = None;
|
||||
let mut ranges = Vec::new();
|
||||
let mut last_range: Option<(usize, usize)> = None;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
while let Some(node) = current.take() {
|
||||
let node_start_row = node.start_position().row as u32;
|
||||
let node_end_row = node.end_position().row as u32;
|
||||
let rows = (node_start_row, node_end_row);
|
||||
while let Some(node) = current.take() {
|
||||
let node_start = node.start_byte();
|
||||
let node_end = node.end_byte();
|
||||
let key = (node_start, node_end);
|
||||
|
||||
current = node.parent();
|
||||
current = node.parent();
|
||||
|
||||
// Skip nodes that don't extend beyond our range.
|
||||
if node_start_row >= start_row && node_end_row <= end_row {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if same as last returned (some nodes have same span).
|
||||
if last_rows == Some(rows) {
|
||||
continue;
|
||||
}
|
||||
|
||||
last_rows = Some(rows);
|
||||
return Some(rows);
|
||||
if last_range == Some(key) {
|
||||
continue;
|
||||
}
|
||||
None
|
||||
})
|
||||
}
|
||||
last_range = Some(key);
|
||||
|
||||
/// Expands context by first trying to reach syntax boundaries,
|
||||
/// then expanding line-wise only if no syntax expansion occurred.
|
||||
fn expand_context_syntactically_then_linewise(
|
||||
snapshot: &BufferSnapshot,
|
||||
editable_range: Range<Point>,
|
||||
context_token_limit: usize,
|
||||
) -> Range<Point> {
|
||||
let mut start_row = editable_range.start.row;
|
||||
let mut end_row = editable_range.end.row;
|
||||
let mut remaining_tokens = context_token_limit;
|
||||
let mut did_syntax_expand = false;
|
||||
|
||||
// Phase 1: Try to expand to containing syntax boundaries, picking the largest that fits.
|
||||
for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
|
||||
{
|
||||
let tokens_for_start = if boundary_start < start_row {
|
||||
estimate_tokens_for_rows(snapshot, boundary_start, start_row)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let tokens_for_end = if boundary_end > end_row {
|
||||
estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let total_needed = tokens_for_start + tokens_for_end;
|
||||
|
||||
if total_needed <= remaining_tokens {
|
||||
if boundary_start < start_row {
|
||||
start_row = boundary_start;
|
||||
}
|
||||
if boundary_end > end_row {
|
||||
end_row = boundary_end;
|
||||
}
|
||||
remaining_tokens = remaining_tokens.saturating_sub(total_needed);
|
||||
did_syntax_expand = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
let start = node_start.saturating_sub(excerpt_offset_range.start);
|
||||
let end = node_end
|
||||
.min(excerpt_offset_range.end)
|
||||
.saturating_sub(excerpt_offset_range.start);
|
||||
ranges.push(start..end);
|
||||
}
|
||||
|
||||
// Phase 2: Only expand line-wise if no syntax expansion occurred.
|
||||
if !did_syntax_expand {
|
||||
(start_row, end_row, _) =
|
||||
expand_linewise_biased(snapshot, start_row, end_row, remaining_tokens, true);
|
||||
}
|
||||
|
||||
let start = Point::new(start_row, 0);
|
||||
let end = Point::new(end_row, snapshot.line_len(end_row));
|
||||
start..end
|
||||
ranges
|
||||
}
|
||||
|
||||
use language::ToOffset as _;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use gpui::{App, AppContext};
|
||||
use gpui::{App, AppContext as _};
|
||||
use indoc::indoc;
|
||||
use language::{Buffer, rust_lang};
|
||||
use util::test::{TextRangeMarker, marked_text_ranges_by};
|
||||
use zeta_prompt::compute_editable_and_context_ranges;
|
||||
|
||||
struct TestCase {
|
||||
name: &'static str,
|
||||
|
|
@ -400,7 +146,18 @@ mod tests {
|
|||
// [ ] = expected context range
|
||||
let test_cases = vec![
|
||||
TestCase {
|
||||
name: "cursor near end of function - expands to syntax boundaries",
|
||||
name: "small function fits entirely in editable and context",
|
||||
marked_text: indoc! {r#"
|
||||
[«fn foo() {
|
||||
let x = 1;ˇ
|
||||
let y = 2;
|
||||
}»]
|
||||
"#},
|
||||
editable_token_limit: 30,
|
||||
context_token_limit: 60,
|
||||
},
|
||||
TestCase {
|
||||
name: "cursor near end of function - editable expands to syntax boundaries",
|
||||
marked_text: indoc! {r#"
|
||||
[fn first() {
|
||||
let a = 1;
|
||||
|
|
@ -413,12 +170,11 @@ mod tests {
|
|||
println!("{}", x + y);ˇ
|
||||
}»]
|
||||
"#},
|
||||
// 18 tokens - expands symmetrically then to syntax boundaries
|
||||
editable_token_limit: 18,
|
||||
context_token_limit: 35,
|
||||
},
|
||||
TestCase {
|
||||
name: "cursor at function start - expands to syntax boundaries",
|
||||
name: "cursor at function start - editable expands to syntax boundaries",
|
||||
marked_text: indoc! {r#"
|
||||
[fn before() {
|
||||
« let a = 1;
|
||||
|
|
@ -434,12 +190,11 @@ mod tests {
|
|||
let b = 2;
|
||||
}]
|
||||
"#},
|
||||
// 25 tokens - expands symmetrically then to syntax boundaries
|
||||
editable_token_limit: 25,
|
||||
context_token_limit: 50,
|
||||
},
|
||||
TestCase {
|
||||
name: "tiny budget - just lines around cursor",
|
||||
name: "tiny budget - just lines around cursor, no syntax expansion",
|
||||
marked_text: indoc! {r#"
|
||||
fn outer() {
|
||||
[ let line1 = 1;
|
||||
|
|
@ -451,22 +206,9 @@ mod tests {
|
|||
let line7 = 7;
|
||||
}
|
||||
"#},
|
||||
// 12 tokens (~36 bytes) = just the cursor line with tiny budget
|
||||
editable_token_limit: 12,
|
||||
context_token_limit: 24,
|
||||
},
|
||||
TestCase {
|
||||
name: "small function fits entirely",
|
||||
marked_text: indoc! {r#"
|
||||
[«fn foo() {
|
||||
let x = 1;ˇ
|
||||
let y = 2;
|
||||
}»]
|
||||
"#},
|
||||
// Plenty of budget for this small function
|
||||
editable_token_limit: 30,
|
||||
context_token_limit: 60,
|
||||
},
|
||||
TestCase {
|
||||
name: "context extends beyond editable",
|
||||
marked_text: indoc! {r#"
|
||||
|
|
@ -476,13 +218,11 @@ mod tests {
|
|||
fn fourth() { let d = 4; }»
|
||||
fn fifth() { let e = 5; }]
|
||||
"#},
|
||||
// Small editable, larger context
|
||||
editable_token_limit: 25,
|
||||
context_token_limit: 45,
|
||||
},
|
||||
// Tests for syntax-aware editable and context expansion
|
||||
TestCase {
|
||||
name: "cursor in first if-statement - expands to syntax boundaries",
|
||||
name: "cursor in first if-block - editable expands to syntax boundaries",
|
||||
marked_text: indoc! {r#"
|
||||
[«fn before() { }
|
||||
|
||||
|
|
@ -503,13 +243,11 @@ mod tests {
|
|||
|
||||
fn after() { }]
|
||||
"#},
|
||||
// 35 tokens allows expansion to include function header and first two if blocks
|
||||
editable_token_limit: 35,
|
||||
// 60 tokens allows context to include the whole file
|
||||
context_token_limit: 60,
|
||||
},
|
||||
TestCase {
|
||||
name: "cursor in middle if-statement - expands to syntax boundaries",
|
||||
name: "cursor in middle if-block - editable spans surrounding blocks",
|
||||
marked_text: indoc! {r#"
|
||||
[fn before() { }
|
||||
|
||||
|
|
@ -530,13 +268,11 @@ mod tests {
|
|||
|
||||
fn after() { }]
|
||||
"#},
|
||||
// 40 tokens allows expansion to surrounding if blocks
|
||||
editable_token_limit: 40,
|
||||
// 60 tokens allows context to include the whole file
|
||||
context_token_limit: 60,
|
||||
},
|
||||
TestCase {
|
||||
name: "cursor near bottom of long function - editable expands toward syntax, context reaches function",
|
||||
name: "cursor near bottom of long function - context reaches function boundary",
|
||||
marked_text: indoc! {r#"
|
||||
[fn other() { }
|
||||
|
||||
|
|
@ -556,11 +292,30 @@ mod tests {
|
|||
|
||||
fn another() { }»]
|
||||
"#},
|
||||
// 40 tokens for editable - allows several lines plus syntax expansion
|
||||
editable_token_limit: 40,
|
||||
// 55 tokens - enough for function but not whole file
|
||||
context_token_limit: 55,
|
||||
},
|
||||
TestCase {
|
||||
name: "zero context budget - context equals editable",
|
||||
marked_text: indoc! {r#"
|
||||
fn before() {
|
||||
let p = 1;
|
||||
let q = 2;
|
||||
[«}
|
||||
|
||||
fn foo() {
|
||||
let x = 1;ˇ
|
||||
let y = 2;
|
||||
}
|
||||
»]
|
||||
fn after() {
|
||||
let r = 3;
|
||||
let s = 4;
|
||||
}
|
||||
"#},
|
||||
editable_token_limit: 15,
|
||||
context_token_limit: 0,
|
||||
},
|
||||
];
|
||||
|
||||
for test_case in test_cases {
|
||||
|
|
@ -580,75 +335,63 @@ mod tests {
|
|||
let cursor_ranges = ranges.remove(&cursor_marker).unwrap_or_default();
|
||||
let expected_editable = ranges.remove(&editable_marker).unwrap_or_default();
|
||||
let expected_context = ranges.remove(&context_marker).unwrap_or_default();
|
||||
assert_eq!(expected_editable.len(), 1);
|
||||
assert_eq!(expected_context.len(), 1);
|
||||
assert_eq!(expected_editable.len(), 1, "{}", test_case.name);
|
||||
assert_eq!(expected_context.len(), 1, "{}", test_case.name);
|
||||
|
||||
cx.new(|cx| {
|
||||
cx.new(|cx: &mut gpui::Context<Buffer>| {
|
||||
let text = text.trim_end_matches('\n');
|
||||
let buffer = Buffer::local(text, cx).with_language(rust_lang(), cx);
|
||||
let snapshot = buffer.snapshot();
|
||||
|
||||
let cursor_offset = cursor_ranges[0].start;
|
||||
let cursor_point = snapshot.offset_to_point(cursor_offset);
|
||||
let expected_editable_start = snapshot.offset_to_point(expected_editable[0].start);
|
||||
let expected_editable_end = snapshot.offset_to_point(expected_editable[0].end);
|
||||
let expected_context_start = snapshot.offset_to_point(expected_context[0].start);
|
||||
let expected_context_end = snapshot.offset_to_point(expected_context[0].end);
|
||||
|
||||
let (actual_editable, actual_context) =
|
||||
editable_and_context_ranges_for_cursor_position(
|
||||
cursor_point,
|
||||
&snapshot,
|
||||
test_case.editable_token_limit,
|
||||
test_case.context_token_limit,
|
||||
);
|
||||
let (_, excerpt_offset_range, cursor_offset_in_excerpt) =
|
||||
compute_cursor_excerpt(&snapshot, cursor_offset);
|
||||
let excerpt_text: String = snapshot
|
||||
.text_for_range(excerpt_offset_range.clone())
|
||||
.collect();
|
||||
let syntax_ranges =
|
||||
compute_syntax_ranges(&snapshot, cursor_offset, &excerpt_offset_range);
|
||||
|
||||
let range_text = |start: Point, end: Point| -> String {
|
||||
snapshot.text_for_range(start..end).collect()
|
||||
let (actual_editable, actual_context) = compute_editable_and_context_ranges(
|
||||
&excerpt_text,
|
||||
cursor_offset_in_excerpt,
|
||||
&syntax_ranges,
|
||||
test_case.editable_token_limit,
|
||||
test_case.context_token_limit,
|
||||
);
|
||||
|
||||
let to_buffer_range = |range: Range<usize>| -> Range<usize> {
|
||||
(excerpt_offset_range.start + range.start)
|
||||
..(excerpt_offset_range.start + range.end)
|
||||
};
|
||||
|
||||
let editable_match = actual_editable.start == expected_editable_start
|
||||
&& actual_editable.end == expected_editable_end;
|
||||
let context_match = actual_context.start == expected_context_start
|
||||
&& actual_context.end == expected_context_end;
|
||||
let actual_editable = to_buffer_range(actual_editable);
|
||||
let actual_context = to_buffer_range(actual_context);
|
||||
|
||||
let expected_editable_range = expected_editable[0].clone();
|
||||
let expected_context_range = expected_context[0].clone();
|
||||
|
||||
let editable_match = actual_editable == expected_editable_range;
|
||||
let context_match = actual_context == expected_context_range;
|
||||
|
||||
if !editable_match || !context_match {
|
||||
let range_text = |range: &Range<usize>| {
|
||||
snapshot.text_for_range(range.clone()).collect::<String>()
|
||||
};
|
||||
|
||||
println!("\n=== FAILED: {} ===", test_case.name);
|
||||
if !editable_match {
|
||||
println!(
|
||||
"\nExpected editable ({:?}..{:?}):",
|
||||
expected_editable_start, expected_editable_end
|
||||
);
|
||||
println!(
|
||||
"---\n{}---",
|
||||
range_text(expected_editable_start, expected_editable_end)
|
||||
);
|
||||
println!(
|
||||
"\nActual editable ({:?}..{:?}):",
|
||||
actual_editable.start, actual_editable.end
|
||||
);
|
||||
println!(
|
||||
"---\n{}---",
|
||||
range_text(actual_editable.start, actual_editable.end)
|
||||
);
|
||||
println!("\nExpected editable ({:?}):", expected_editable_range);
|
||||
println!("---\n{}---", range_text(&expected_editable_range));
|
||||
println!("\nActual editable ({:?}):", actual_editable);
|
||||
println!("---\n{}---", range_text(&actual_editable));
|
||||
}
|
||||
if !context_match {
|
||||
println!(
|
||||
"\nExpected context ({:?}..{:?}):",
|
||||
expected_context_start, expected_context_end
|
||||
);
|
||||
println!(
|
||||
"---\n{}---",
|
||||
range_text(expected_context_start, expected_context_end)
|
||||
);
|
||||
println!(
|
||||
"\nActual context ({:?}..{:?}):",
|
||||
actual_context.start, actual_context.end
|
||||
);
|
||||
println!(
|
||||
"---\n{}---",
|
||||
range_text(actual_context.start, actual_context.end)
|
||||
);
|
||||
println!("\nExpected context ({:?}):", expected_context_range);
|
||||
println!("---\n{}---", range_text(&expected_context_range));
|
||||
println!("\nActual context ({:?}):", actual_context);
|
||||
println!("---\n{}---", range_text(&actual_context));
|
||||
}
|
||||
panic!("Test '{}' failed - see output above", test_case.name);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1890,6 +1890,7 @@ async fn test_edit_prediction_basic_interpolation(cx: &mut TestAppContext) {
|
|||
cursor_offset_in_excerpt: 0,
|
||||
excerpt_start_row: None,
|
||||
excerpt_ranges: Default::default(),
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@ use crate::{
|
|||
use anyhow::{Context as _, Result, anyhow};
|
||||
use gpui::{App, AppContext as _, Entity, Task};
|
||||
use language::{
|
||||
Anchor, Buffer, BufferSnapshot, OffsetRangeExt as _, ToOffset, ToPoint as _,
|
||||
Anchor, Buffer, BufferSnapshot, ToOffset, ToPoint as _,
|
||||
language_settings::all_language_settings,
|
||||
};
|
||||
use settings::EditPredictionPromptFormat;
|
||||
use std::{path::Path, sync::Arc, time::Instant};
|
||||
use zeta_prompt::ZetaPromptInput;
|
||||
use zeta_prompt::{ZetaPromptInput, compute_editable_and_context_ranges};
|
||||
|
||||
const FIM_CONTEXT_TOKENS: usize = 512;
|
||||
|
||||
|
|
@ -62,34 +62,42 @@ pub fn request_prediction(
|
|||
let api_key = load_open_ai_compatible_api_key_if_needed(provider, cx);
|
||||
|
||||
let result = cx.background_spawn(async move {
|
||||
let (excerpt_range, _) = cursor_excerpt::editable_and_context_ranges_for_cursor_position(
|
||||
cursor_point,
|
||||
&snapshot,
|
||||
let cursor_offset = cursor_point.to_offset(&snapshot);
|
||||
let (excerpt_point_range, excerpt_offset_range, cursor_offset_in_excerpt) =
|
||||
cursor_excerpt::compute_cursor_excerpt(&snapshot, cursor_offset);
|
||||
let cursor_excerpt: Arc<str> = snapshot
|
||||
.text_for_range(excerpt_point_range.clone())
|
||||
.collect::<String>()
|
||||
.into();
|
||||
let syntax_ranges =
|
||||
cursor_excerpt::compute_syntax_ranges(&snapshot, cursor_offset, &excerpt_offset_range);
|
||||
let (editable_range, _) = compute_editable_and_context_ranges(
|
||||
&cursor_excerpt,
|
||||
cursor_offset_in_excerpt,
|
||||
&syntax_ranges,
|
||||
FIM_CONTEXT_TOKENS,
|
||||
0,
|
||||
);
|
||||
let excerpt_offset_range = excerpt_range.to_offset(&snapshot);
|
||||
let cursor_offset = cursor_point.to_offset(&snapshot);
|
||||
|
||||
let inputs = ZetaPromptInput {
|
||||
events,
|
||||
related_files: Some(Vec::new()),
|
||||
cursor_offset_in_excerpt: cursor_offset - excerpt_offset_range.start,
|
||||
cursor_path: full_path.clone(),
|
||||
excerpt_start_row: Some(excerpt_range.start.row),
|
||||
cursor_excerpt: snapshot
|
||||
.text_for_range(excerpt_range)
|
||||
.collect::<String>()
|
||||
.into(),
|
||||
excerpt_start_row: Some(excerpt_point_range.start.row),
|
||||
cursor_excerpt,
|
||||
excerpt_ranges: Default::default(),
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
repo_url: None,
|
||||
};
|
||||
|
||||
let prefix = inputs.cursor_excerpt[..inputs.cursor_offset_in_excerpt].to_string();
|
||||
let suffix = inputs.cursor_excerpt[inputs.cursor_offset_in_excerpt..].to_string();
|
||||
let editable_text = &inputs.cursor_excerpt[editable_range.clone()];
|
||||
let cursor_in_editable = cursor_offset_in_excerpt.saturating_sub(editable_range.start);
|
||||
let prefix = editable_text[..cursor_in_editable].to_string();
|
||||
let suffix = editable_text[cursor_in_editable..].to_string();
|
||||
let prompt = format_fim_prompt(prompt_format, &prefix, &suffix);
|
||||
let stop_tokens = get_fim_stop_tokens();
|
||||
|
||||
|
|
|
|||
|
|
@ -10,17 +10,14 @@ use gpui::{
|
|||
App, AppContext as _, Entity, Global, SharedString, Task,
|
||||
http_client::{self, AsyncBody, HttpClient, Method},
|
||||
};
|
||||
use language::{OffsetRangeExt as _, ToOffset, ToPoint as _};
|
||||
use language::{ToOffset, ToPoint as _};
|
||||
use language_model::{ApiKeyState, EnvVar, env_var};
|
||||
use release_channel::AppVersion;
|
||||
use serde::Serialize;
|
||||
use std::{mem, ops::Range, path::Path, sync::Arc, time::Instant};
|
||||
|
||||
use zeta_prompt::{ExcerptRanges, ZetaPromptInput};
|
||||
use zeta_prompt::ZetaPromptInput;
|
||||
|
||||
const MERCURY_API_URL: &str = "https://api.inceptionlabs.ai/v1/edit/completions";
|
||||
const MAX_REWRITE_TOKENS: usize = 150;
|
||||
const MAX_CONTEXT_TOKENS: usize = 350;
|
||||
|
||||
pub struct Mercury {
|
||||
pub api_token: Entity<ApiKeyState>,
|
||||
|
|
@ -64,52 +61,46 @@ impl Mercury {
|
|||
let active_buffer = buffer.clone();
|
||||
|
||||
let result = cx.background_spawn(async move {
|
||||
let (editable_range, context_range) =
|
||||
crate::cursor_excerpt::editable_and_context_ranges_for_cursor_position(
|
||||
cursor_point,
|
||||
&snapshot,
|
||||
MAX_CONTEXT_TOKENS,
|
||||
MAX_REWRITE_TOKENS,
|
||||
);
|
||||
let cursor_offset = cursor_point.to_offset(&snapshot);
|
||||
let (excerpt_point_range, excerpt_offset_range, cursor_offset_in_excerpt) =
|
||||
crate::cursor_excerpt::compute_cursor_excerpt(&snapshot, cursor_offset);
|
||||
|
||||
let related_files = zeta_prompt::filter_redundant_excerpts(
|
||||
related_files,
|
||||
full_path.as_ref(),
|
||||
context_range.start.row..context_range.end.row,
|
||||
excerpt_point_range.start.row..excerpt_point_range.end.row,
|
||||
);
|
||||
|
||||
let context_offset_range = context_range.to_offset(&snapshot);
|
||||
let context_start_row = context_range.start.row;
|
||||
let cursor_excerpt: Arc<str> = snapshot
|
||||
.text_for_range(excerpt_point_range.clone())
|
||||
.collect::<String>()
|
||||
.into();
|
||||
let syntax_ranges = crate::cursor_excerpt::compute_syntax_ranges(
|
||||
&snapshot,
|
||||
cursor_offset,
|
||||
&excerpt_offset_range,
|
||||
);
|
||||
let excerpt_ranges = zeta_prompt::compute_legacy_excerpt_ranges(
|
||||
&cursor_excerpt,
|
||||
cursor_offset_in_excerpt,
|
||||
&syntax_ranges,
|
||||
);
|
||||
|
||||
let editable_offset_range = editable_range.to_offset(&snapshot);
|
||||
|
||||
let editable_range_in_excerpt = (editable_offset_range.start
|
||||
- context_offset_range.start)
|
||||
..(editable_offset_range.end - context_offset_range.start);
|
||||
let context_range_in_excerpt =
|
||||
0..(context_offset_range.end - context_offset_range.start);
|
||||
let editable_offset_range = (excerpt_offset_range.start
|
||||
+ excerpt_ranges.editable_350.start)
|
||||
..(excerpt_offset_range.start + excerpt_ranges.editable_350.end);
|
||||
|
||||
let inputs = zeta_prompt::ZetaPromptInput {
|
||||
events,
|
||||
related_files: Some(related_files),
|
||||
cursor_offset_in_excerpt: cursor_point.to_offset(&snapshot)
|
||||
- context_offset_range.start,
|
||||
- excerpt_offset_range.start,
|
||||
cursor_path: full_path.clone(),
|
||||
cursor_excerpt: snapshot
|
||||
.text_for_range(context_range)
|
||||
.collect::<String>()
|
||||
.into(),
|
||||
cursor_excerpt,
|
||||
experiment: None,
|
||||
excerpt_start_row: Some(context_start_row),
|
||||
excerpt_ranges: ExcerptRanges {
|
||||
editable_150: editable_range_in_excerpt.clone(),
|
||||
editable_180: editable_range_in_excerpt.clone(),
|
||||
editable_350: editable_range_in_excerpt.clone(),
|
||||
editable_150_context_350: context_range_in_excerpt.clone(),
|
||||
editable_180_context_350: context_range_in_excerpt.clone(),
|
||||
editable_350_context_150: context_range_in_excerpt.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
excerpt_start_row: Some(excerpt_point_range.start.row),
|
||||
excerpt_ranges,
|
||||
syntax_ranges: Some(syntax_ranges),
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
repo_url: None,
|
||||
|
|
|
|||
|
|
@ -162,6 +162,7 @@ mod tests {
|
|||
cursor_excerpt: "".into(),
|
||||
excerpt_start_row: None,
|
||||
excerpt_ranges: Default::default(),
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
|
|||
|
|
@ -226,6 +226,7 @@ impl SweepAi {
|
|||
editable_350_context_150: 0..inputs.snapshot.len(),
|
||||
..Default::default()
|
||||
},
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use crate::{
|
||||
CurrentEditPrediction, DebugEvent, EditPredictionFinishedDebugEvent, EditPredictionId,
|
||||
EditPredictionModelInput, EditPredictionStartedDebugEvent, EditPredictionStore, StoredEvent,
|
||||
ZedUpdateRequiredError, cursor_excerpt::compute_excerpt_ranges,
|
||||
ZedUpdateRequiredError,
|
||||
cursor_excerpt::{compute_cursor_excerpt, compute_syntax_ranges},
|
||||
prediction::EditPredictionResult,
|
||||
};
|
||||
use anyhow::Result;
|
||||
|
|
@ -11,8 +12,7 @@ use cloud_llm_client::{
|
|||
use edit_prediction_types::PredictedCursorPosition;
|
||||
use gpui::{App, AppContext as _, Entity, Task, WeakEntity, prelude::*};
|
||||
use language::{
|
||||
Buffer, BufferSnapshot, ToOffset as _, ToPoint, language_settings::all_language_settings,
|
||||
text_diff,
|
||||
Buffer, BufferSnapshot, ToOffset as _, language_settings::all_language_settings, text_diff,
|
||||
};
|
||||
use release_channel::AppVersion;
|
||||
use settings::EditPredictionPromptFormat;
|
||||
|
|
@ -490,33 +490,35 @@ pub fn zeta2_prompt_input(
|
|||
can_collect_data: bool,
|
||||
repo_url: Option<String>,
|
||||
) -> (Range<usize>, zeta_prompt::ZetaPromptInput) {
|
||||
let cursor_point = cursor_offset.to_point(snapshot);
|
||||
let (excerpt_point_range, excerpt_offset_range, cursor_offset_in_excerpt) =
|
||||
compute_cursor_excerpt(snapshot, cursor_offset);
|
||||
|
||||
let (full_context, full_context_offset_range, excerpt_ranges) =
|
||||
compute_excerpt_ranges(cursor_point, snapshot);
|
||||
|
||||
let full_context_start_offset = full_context_offset_range.start;
|
||||
let full_context_start_row = full_context.start.row;
|
||||
|
||||
let cursor_offset_in_excerpt = cursor_offset - full_context_start_offset;
|
||||
let cursor_excerpt: Arc<str> = snapshot
|
||||
.text_for_range(excerpt_point_range.clone())
|
||||
.collect::<String>()
|
||||
.into();
|
||||
let syntax_ranges = compute_syntax_ranges(snapshot, cursor_offset, &excerpt_offset_range);
|
||||
let excerpt_ranges = zeta_prompt::compute_legacy_excerpt_ranges(
|
||||
&cursor_excerpt,
|
||||
cursor_offset_in_excerpt,
|
||||
&syntax_ranges,
|
||||
);
|
||||
|
||||
let prompt_input = zeta_prompt::ZetaPromptInput {
|
||||
cursor_path: excerpt_path,
|
||||
cursor_excerpt: snapshot
|
||||
.text_for_range(full_context)
|
||||
.collect::<String>()
|
||||
.into(),
|
||||
cursor_excerpt,
|
||||
cursor_offset_in_excerpt,
|
||||
excerpt_start_row: Some(full_context_start_row),
|
||||
excerpt_start_row: Some(excerpt_point_range.start.row),
|
||||
events,
|
||||
related_files: Some(related_files),
|
||||
excerpt_ranges,
|
||||
syntax_ranges: Some(syntax_ranges),
|
||||
experiment: preferred_experiment,
|
||||
in_open_source_repo: is_open_source,
|
||||
can_collect_data,
|
||||
repo_url,
|
||||
};
|
||||
(full_context_offset_range, prompt_input)
|
||||
(excerpt_offset_range, prompt_input)
|
||||
}
|
||||
|
||||
pub(crate) fn edit_prediction_accepted(
|
||||
|
|
|
|||
|
|
@ -7,12 +7,12 @@ use crate::{
|
|||
use anyhow::{Context as _, Result};
|
||||
use edit_prediction::{
|
||||
EditPredictionStore,
|
||||
cursor_excerpt::compute_excerpt_ranges,
|
||||
cursor_excerpt::{compute_cursor_excerpt, compute_syntax_ranges},
|
||||
udiff::{OpenedBuffers, refresh_worktree_entries, strip_diff_path_prefix},
|
||||
};
|
||||
use futures::AsyncWriteExt as _;
|
||||
use gpui::{AsyncApp, Entity};
|
||||
use language::{Anchor, Buffer, LanguageNotFound, ToOffset, ToPoint};
|
||||
use language::{Anchor, Buffer, LanguageNotFound, ToOffset};
|
||||
use project::{Project, ProjectPath, buffer_store::BufferStoreEvent};
|
||||
use std::{fs, path::PathBuf, sync::Arc};
|
||||
use zeta_prompt::ZetaPromptInput;
|
||||
|
|
@ -75,32 +75,36 @@ pub async fn run_load_project(
|
|||
|
||||
let (prompt_inputs, language_name) = buffer.read_with(&cx, |buffer, _cx| {
|
||||
let snapshot = buffer.snapshot();
|
||||
let cursor_point = cursor_position.to_point(&snapshot);
|
||||
let cursor_offset = cursor_position.to_offset(&snapshot);
|
||||
let language_name = buffer
|
||||
.language()
|
||||
.map(|l| l.name().to_string())
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
|
||||
let (full_context_point_range, full_context_offset_range, excerpt_ranges) =
|
||||
compute_excerpt_ranges(cursor_point, &snapshot);
|
||||
let (excerpt_point_range, excerpt_offset_range, cursor_offset_in_excerpt) =
|
||||
compute_cursor_excerpt(&snapshot, cursor_offset);
|
||||
|
||||
let cursor_excerpt: Arc<str> = buffer
|
||||
.text_for_range(full_context_offset_range.clone())
|
||||
.text_for_range(excerpt_offset_range.clone())
|
||||
.collect::<String>()
|
||||
.into();
|
||||
let cursor_offset_in_excerpt = cursor_offset - full_context_offset_range.start;
|
||||
let excerpt_start_row = Some(full_context_point_range.start.row);
|
||||
let syntax_ranges = compute_syntax_ranges(&snapshot, cursor_offset, &excerpt_offset_range);
|
||||
let excerpt_ranges = zeta_prompt::compute_legacy_excerpt_ranges(
|
||||
&cursor_excerpt,
|
||||
cursor_offset_in_excerpt,
|
||||
&syntax_ranges,
|
||||
);
|
||||
|
||||
(
|
||||
ZetaPromptInput {
|
||||
cursor_path: example.spec.cursor_path.clone(),
|
||||
cursor_excerpt,
|
||||
cursor_offset_in_excerpt,
|
||||
excerpt_start_row,
|
||||
excerpt_start_row: Some(excerpt_point_range.start.row),
|
||||
events,
|
||||
related_files: existing_related_files,
|
||||
excerpt_ranges,
|
||||
syntax_ranges: Some(syntax_ranges),
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
experiment: None,
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ pub async fn run_context_retrieval(
|
|||
.prompt_inputs
|
||||
.as_ref()
|
||||
.is_some_and(|inputs| inputs.related_files.is_some())
|
||||
|| example.spec.repository_url.is_empty()
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -678,6 +678,7 @@ mod tests {
|
|||
editable_350_context_150: 0..content.len(),
|
||||
..Default::default()
|
||||
},
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
|
|||
443
crates/zeta_prompt/src/excerpt_ranges.rs
Normal file
443
crates/zeta_prompt/src/excerpt_ranges.rs
Normal file
|
|
@ -0,0 +1,443 @@
|
|||
use std::ops::Range;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::estimate_tokens;
|
||||
|
||||
/// Pre-computed byte offset ranges within `cursor_excerpt` for different
|
||||
/// editable and context token budgets. Allows the server to select the
|
||||
/// appropriate ranges for whichever model it uses.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
|
||||
pub struct ExcerptRanges {
|
||||
/// Editable region computed with a 150-token budget.
|
||||
pub editable_150: Range<usize>,
|
||||
/// Editable region computed with a 180-token budget.
|
||||
pub editable_180: Range<usize>,
|
||||
/// Editable region computed with a 350-token budget.
|
||||
pub editable_350: Range<usize>,
|
||||
/// Editable region computed with a 350-token budget.
|
||||
pub editable_512: Option<Range<usize>>,
|
||||
/// Context boundary when using editable_150 with 350 tokens of additional context.
|
||||
pub editable_150_context_350: Range<usize>,
|
||||
/// Context boundary when using editable_180 with 350 tokens of additional context.
|
||||
pub editable_180_context_350: Range<usize>,
|
||||
/// Context boundary when using editable_350 with 150 tokens of additional context.
|
||||
pub editable_350_context_150: Range<usize>,
|
||||
pub editable_350_context_512: Option<Range<usize>>,
|
||||
pub editable_350_context_1024: Option<Range<usize>>,
|
||||
pub context_4096: Option<Range<usize>>,
|
||||
pub context_8192: Option<Range<usize>>,
|
||||
}
|
||||
|
||||
/// Builds an `ExcerptRanges` by computing editable and context ranges for each
|
||||
/// budget combination, using the syntax-aware logic in
|
||||
/// `compute_editable_and_context_ranges`.
|
||||
pub fn compute_legacy_excerpt_ranges(
|
||||
cursor_excerpt: &str,
|
||||
cursor_offset: usize,
|
||||
syntax_ranges: &[Range<usize>],
|
||||
) -> ExcerptRanges {
|
||||
let compute = |editable_tokens, context_tokens| {
|
||||
compute_editable_and_context_ranges(
|
||||
cursor_excerpt,
|
||||
cursor_offset,
|
||||
syntax_ranges,
|
||||
editable_tokens,
|
||||
context_tokens,
|
||||
)
|
||||
};
|
||||
|
||||
let (editable_150, editable_150_context_350) = compute(150, 350);
|
||||
let (editable_180, editable_180_context_350) = compute(180, 350);
|
||||
let (editable_350, editable_350_context_150) = compute(350, 150);
|
||||
let (editable_512, _) = compute(512, 0);
|
||||
let (_, editable_350_context_512) = compute(350, 512);
|
||||
let (_, editable_350_context_1024) = compute(350, 1024);
|
||||
let (_, context_4096) = compute(350, 4096);
|
||||
let (_, context_8192) = compute(350, 8192);
|
||||
|
||||
ExcerptRanges {
|
||||
editable_150,
|
||||
editable_180,
|
||||
editable_350,
|
||||
editable_512: Some(editable_512),
|
||||
editable_150_context_350,
|
||||
editable_180_context_350,
|
||||
editable_350_context_150,
|
||||
editable_350_context_512: Some(editable_350_context_512),
|
||||
editable_350_context_1024: Some(editable_350_context_1024),
|
||||
context_4096: Some(context_4096),
|
||||
context_8192: Some(context_8192),
|
||||
}
|
||||
}
|
||||
|
||||
/// Given the cursor excerpt text, cursor offset, and the syntax node ranges
|
||||
/// containing the cursor (innermost to outermost), compute the editable range
|
||||
/// and context range as byte offset ranges within `cursor_excerpt`.
|
||||
///
|
||||
/// This is the server-side equivalent of `compute_excerpt_ranges` in
|
||||
/// `edit_prediction::cursor_excerpt`, but operates on plain text with
|
||||
/// pre-computed syntax boundaries instead of a `BufferSnapshot`.
|
||||
pub fn compute_editable_and_context_ranges(
|
||||
cursor_excerpt: &str,
|
||||
cursor_offset: usize,
|
||||
syntax_ranges: &[Range<usize>],
|
||||
editable_token_limit: usize,
|
||||
context_token_limit: usize,
|
||||
) -> (Range<usize>, Range<usize>) {
|
||||
let line_starts = compute_line_starts(cursor_excerpt);
|
||||
let cursor_row = offset_to_row(&line_starts, cursor_offset);
|
||||
let max_row = line_starts.len().saturating_sub(1) as u32;
|
||||
|
||||
let editable_range = compute_editable_range_from_text(
|
||||
cursor_excerpt,
|
||||
&line_starts,
|
||||
cursor_row,
|
||||
max_row,
|
||||
syntax_ranges,
|
||||
editable_token_limit,
|
||||
);
|
||||
|
||||
let context_range = expand_context_from_text(
|
||||
cursor_excerpt,
|
||||
&line_starts,
|
||||
max_row,
|
||||
&editable_range,
|
||||
syntax_ranges,
|
||||
context_token_limit,
|
||||
);
|
||||
|
||||
(editable_range, context_range)
|
||||
}
|
||||
|
||||
fn compute_line_starts(text: &str) -> Vec<usize> {
|
||||
let mut starts = vec![0];
|
||||
for (index, byte) in text.bytes().enumerate() {
|
||||
if byte == b'\n' {
|
||||
starts.push(index + 1);
|
||||
}
|
||||
}
|
||||
starts
|
||||
}
|
||||
|
||||
fn offset_to_row(line_starts: &[usize], offset: usize) -> u32 {
|
||||
match line_starts.binary_search(&offset) {
|
||||
Ok(row) => row as u32,
|
||||
Err(row) => (row.saturating_sub(1)) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
fn row_start_offset(line_starts: &[usize], row: u32) -> usize {
|
||||
line_starts.get(row as usize).copied().unwrap_or(0)
|
||||
}
|
||||
|
||||
fn row_end_offset(text: &str, line_starts: &[usize], row: u32) -> usize {
|
||||
if let Some(&next_start) = line_starts.get(row as usize + 1) {
|
||||
// End before the newline of this row.
|
||||
next_start.saturating_sub(1).min(text.len())
|
||||
} else {
|
||||
text.len()
|
||||
}
|
||||
}
|
||||
|
||||
fn row_range_to_byte_range(
|
||||
text: &str,
|
||||
line_starts: &[usize],
|
||||
start_row: u32,
|
||||
end_row: u32,
|
||||
) -> Range<usize> {
|
||||
let start = row_start_offset(line_starts, start_row);
|
||||
let end = row_end_offset(text, line_starts, end_row);
|
||||
start..end
|
||||
}
|
||||
|
||||
fn estimate_tokens_for_row_range(
|
||||
text: &str,
|
||||
line_starts: &[usize],
|
||||
start_row: u32,
|
||||
end_row: u32,
|
||||
) -> usize {
|
||||
let mut tokens = 0;
|
||||
for row in start_row..end_row {
|
||||
let row_len = row_end_offset(text, line_starts, row)
|
||||
.saturating_sub(row_start_offset(line_starts, row));
|
||||
tokens += estimate_tokens(row_len).max(1);
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
fn line_token_count_from_text(text: &str, line_starts: &[usize], row: u32) -> usize {
|
||||
let row_len =
|
||||
row_end_offset(text, line_starts, row).saturating_sub(row_start_offset(line_starts, row));
|
||||
estimate_tokens(row_len).max(1)
|
||||
}
|
||||
|
||||
/// Returns syntax boundaries (as row ranges) that contain the given row range
|
||||
/// and extend beyond it, ordered from smallest to largest.
|
||||
fn containing_syntax_boundaries_from_ranges(
|
||||
line_starts: &[usize],
|
||||
syntax_ranges: &[Range<usize>],
|
||||
start_row: u32,
|
||||
end_row: u32,
|
||||
) -> Vec<(u32, u32)> {
|
||||
let mut boundaries = Vec::new();
|
||||
let mut last: Option<(u32, u32)> = None;
|
||||
|
||||
// syntax_ranges is innermost to outermost, so iterate in order.
|
||||
for range in syntax_ranges {
|
||||
let node_start_row = offset_to_row(line_starts, range.start);
|
||||
let node_end_row = offset_to_row(line_starts, range.end);
|
||||
|
||||
// Skip nodes that don't extend beyond the current range.
|
||||
if node_start_row >= start_row && node_end_row <= end_row {
|
||||
continue;
|
||||
}
|
||||
|
||||
let rows = (node_start_row, node_end_row);
|
||||
if last == Some(rows) {
|
||||
continue;
|
||||
}
|
||||
|
||||
last = Some(rows);
|
||||
boundaries.push(rows);
|
||||
}
|
||||
|
||||
boundaries
|
||||
}
|
||||
|
||||
fn compute_editable_range_from_text(
|
||||
text: &str,
|
||||
line_starts: &[usize],
|
||||
cursor_row: u32,
|
||||
max_row: u32,
|
||||
syntax_ranges: &[Range<usize>],
|
||||
token_limit: usize,
|
||||
) -> Range<usize> {
|
||||
// Phase 1: Expand symmetrically from cursor using 75% of budget.
|
||||
let initial_budget = (token_limit * 3) / 4;
|
||||
let (mut start_row, mut end_row, mut remaining_tokens) =
|
||||
expand_symmetric(text, line_starts, cursor_row, max_row, initial_budget);
|
||||
|
||||
remaining_tokens += token_limit.saturating_sub(initial_budget);
|
||||
|
||||
let original_start = start_row;
|
||||
let original_end = end_row;
|
||||
|
||||
// Phase 2: Expand to syntax boundaries that fit within budget.
|
||||
let boundaries =
|
||||
containing_syntax_boundaries_from_ranges(line_starts, syntax_ranges, start_row, end_row);
|
||||
for (boundary_start, boundary_end) in &boundaries {
|
||||
let tokens_for_start = if *boundary_start < start_row {
|
||||
estimate_tokens_for_row_range(text, line_starts, *boundary_start, start_row)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let tokens_for_end = if *boundary_end > end_row {
|
||||
estimate_tokens_for_row_range(text, line_starts, end_row + 1, *boundary_end + 1)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let total_needed = tokens_for_start + tokens_for_end;
|
||||
if total_needed <= remaining_tokens {
|
||||
if *boundary_start < start_row {
|
||||
start_row = *boundary_start;
|
||||
}
|
||||
if *boundary_end > end_row {
|
||||
end_row = *boundary_end;
|
||||
}
|
||||
remaining_tokens = remaining_tokens.saturating_sub(total_needed);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: Continue line-wise in the direction we expanded least.
|
||||
let expanded_up = original_start.saturating_sub(start_row);
|
||||
let expanded_down = end_row.saturating_sub(original_end);
|
||||
let prefer_up = expanded_up <= expanded_down;
|
||||
|
||||
(start_row, end_row, _) = expand_linewise(
|
||||
text,
|
||||
line_starts,
|
||||
start_row,
|
||||
end_row,
|
||||
max_row,
|
||||
remaining_tokens,
|
||||
prefer_up,
|
||||
);
|
||||
|
||||
row_range_to_byte_range(text, line_starts, start_row, end_row)
|
||||
}
|
||||
|
||||
fn expand_context_from_text(
|
||||
text: &str,
|
||||
line_starts: &[usize],
|
||||
max_row: u32,
|
||||
editable_range: &Range<usize>,
|
||||
syntax_ranges: &[Range<usize>],
|
||||
context_token_limit: usize,
|
||||
) -> Range<usize> {
|
||||
let mut start_row = offset_to_row(line_starts, editable_range.start);
|
||||
let mut end_row = offset_to_row(line_starts, editable_range.end);
|
||||
let mut remaining_tokens = context_token_limit;
|
||||
let mut did_syntax_expand = false;
|
||||
|
||||
let boundaries =
|
||||
containing_syntax_boundaries_from_ranges(line_starts, syntax_ranges, start_row, end_row);
|
||||
for (boundary_start, boundary_end) in &boundaries {
|
||||
let tokens_for_start = if *boundary_start < start_row {
|
||||
estimate_tokens_for_row_range(text, line_starts, *boundary_start, start_row)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let tokens_for_end = if *boundary_end > end_row {
|
||||
estimate_tokens_for_row_range(text, line_starts, end_row + 1, *boundary_end + 1)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let total_needed = tokens_for_start + tokens_for_end;
|
||||
if total_needed <= remaining_tokens {
|
||||
if *boundary_start < start_row {
|
||||
start_row = *boundary_start;
|
||||
}
|
||||
if *boundary_end > end_row {
|
||||
end_row = *boundary_end;
|
||||
}
|
||||
remaining_tokens = remaining_tokens.saturating_sub(total_needed);
|
||||
did_syntax_expand = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Only expand line-wise if no syntax expansion occurred.
|
||||
if !did_syntax_expand {
|
||||
(start_row, end_row, _) = expand_linewise(
|
||||
text,
|
||||
line_starts,
|
||||
start_row,
|
||||
end_row,
|
||||
max_row,
|
||||
remaining_tokens,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
row_range_to_byte_range(text, line_starts, start_row, end_row)
|
||||
}
|
||||
|
||||
fn expand_symmetric(
|
||||
text: &str,
|
||||
line_starts: &[usize],
|
||||
cursor_row: u32,
|
||||
max_row: u32,
|
||||
mut token_budget: usize,
|
||||
) -> (u32, u32, usize) {
|
||||
let mut start_row = cursor_row;
|
||||
let mut end_row = cursor_row;
|
||||
|
||||
let cursor_line_tokens = line_token_count_from_text(text, line_starts, cursor_row);
|
||||
token_budget = token_budget.saturating_sub(cursor_line_tokens);
|
||||
|
||||
loop {
|
||||
let can_expand_up = start_row > 0;
|
||||
let can_expand_down = end_row < max_row;
|
||||
|
||||
if token_budget == 0 || (!can_expand_up && !can_expand_down) {
|
||||
break;
|
||||
}
|
||||
|
||||
if can_expand_down {
|
||||
let next_row = end_row + 1;
|
||||
let line_tokens = line_token_count_from_text(text, line_starts, next_row);
|
||||
if line_tokens <= token_budget {
|
||||
end_row = next_row;
|
||||
token_budget = token_budget.saturating_sub(line_tokens);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if can_expand_up && token_budget > 0 {
|
||||
let next_row = start_row - 1;
|
||||
let line_tokens = line_token_count_from_text(text, line_starts, next_row);
|
||||
if line_tokens <= token_budget {
|
||||
start_row = next_row;
|
||||
token_budget = token_budget.saturating_sub(line_tokens);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(start_row, end_row, token_budget)
|
||||
}
|
||||
|
||||
fn expand_linewise(
|
||||
text: &str,
|
||||
line_starts: &[usize],
|
||||
mut start_row: u32,
|
||||
mut end_row: u32,
|
||||
max_row: u32,
|
||||
mut remaining_tokens: usize,
|
||||
prefer_up: bool,
|
||||
) -> (u32, u32, usize) {
|
||||
loop {
|
||||
let can_expand_up = start_row > 0;
|
||||
let can_expand_down = end_row < max_row;
|
||||
|
||||
if remaining_tokens == 0 || (!can_expand_up && !can_expand_down) {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut expanded = false;
|
||||
|
||||
if prefer_up {
|
||||
if can_expand_up {
|
||||
let next_row = start_row - 1;
|
||||
let line_tokens = line_token_count_from_text(text, line_starts, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
start_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
if can_expand_down && remaining_tokens > 0 {
|
||||
let next_row = end_row + 1;
|
||||
let line_tokens = line_token_count_from_text(text, line_starts, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
end_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if can_expand_down {
|
||||
let next_row = end_row + 1;
|
||||
let line_tokens = line_token_count_from_text(text, line_starts, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
end_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
if can_expand_up && remaining_tokens > 0 {
|
||||
let next_row = start_row - 1;
|
||||
let line_tokens = line_token_count_from_text(text, line_starts, next_row);
|
||||
if line_tokens <= remaining_tokens {
|
||||
start_row = next_row;
|
||||
remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
|
||||
expanded = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !expanded {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(start_row, end_row, remaining_tokens)
|
||||
}
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
pub mod excerpt_ranges;
|
||||
|
||||
use anyhow::{Result, anyhow};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Write;
|
||||
|
|
@ -6,6 +8,10 @@ use std::path::Path;
|
|||
use std::sync::Arc;
|
||||
use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
|
||||
|
||||
pub use crate::excerpt_ranges::{
|
||||
ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
|
||||
};
|
||||
|
||||
pub const CURSOR_MARKER: &str = "<|user_cursor|>";
|
||||
pub const MAX_PROMPT_TOKENS: usize = 4096;
|
||||
|
||||
|
|
@ -18,31 +24,6 @@ fn estimate_tokens(bytes: usize) -> usize {
|
|||
bytes / 3
|
||||
}
|
||||
|
||||
/// Pre-computed byte offset ranges within `cursor_excerpt` for different
|
||||
/// editable and context token budgets. Allows the server to select the
|
||||
/// appropriate ranges for whichever model it uses.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
|
||||
pub struct ExcerptRanges {
|
||||
/// Editable region computed with a 150-token budget.
|
||||
pub editable_150: Range<usize>,
|
||||
/// Editable region computed with a 180-token budget.
|
||||
pub editable_180: Range<usize>,
|
||||
/// Editable region computed with a 350-token budget.
|
||||
pub editable_350: Range<usize>,
|
||||
/// Editable region computed with a 350-token budget.
|
||||
pub editable_512: Option<Range<usize>>,
|
||||
/// Context boundary when using editable_150 with 350 tokens of additional context.
|
||||
pub editable_150_context_350: Range<usize>,
|
||||
/// Context boundary when using editable_180 with 350 tokens of additional context.
|
||||
pub editable_180_context_350: Range<usize>,
|
||||
/// Context boundary when using editable_350 with 150 tokens of additional context.
|
||||
pub editable_350_context_150: Range<usize>,
|
||||
pub editable_350_context_512: Option<Range<usize>>,
|
||||
pub editable_350_context_1024: Option<Range<usize>>,
|
||||
pub context_4096: Option<Range<usize>>,
|
||||
pub context_8192: Option<Range<usize>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
|
||||
pub struct ZetaPromptInput {
|
||||
pub cursor_path: Arc<Path>,
|
||||
|
|
@ -55,6 +36,12 @@ pub struct ZetaPromptInput {
|
|||
pub related_files: Option<Vec<RelatedFile>>,
|
||||
/// These ranges let the server select model-appropriate subsets.
|
||||
pub excerpt_ranges: ExcerptRanges,
|
||||
/// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
|
||||
/// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
|
||||
/// When present, the server uses these to compute editable/context ranges
|
||||
/// instead of `excerpt_ranges`.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub syntax_ranges: Option<Vec<Range<usize>>>,
|
||||
/// The name of the edit prediction model experiment to use.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub experiment: Option<String>,
|
||||
|
|
@ -223,6 +210,21 @@ pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str]
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns the (editable_token_limit, context_token_limit) for a given format.
|
||||
pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
|
||||
match format {
|
||||
ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
|
||||
ZetaFormat::V0114180EditableRegion => (180, 350),
|
||||
ZetaFormat::V0120GitMergeMarkers
|
||||
| ZetaFormat::V0131GitMergeMarkersPrefix
|
||||
| ZetaFormat::V0211Prefill
|
||||
| ZetaFormat::V0211SeedCoder
|
||||
| ZetaFormat::v0226Hashline
|
||||
| ZetaFormat::V0304SeedNoEdits => (350, 150),
|
||||
ZetaFormat::V0304VariableEdit => (1024, 0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
|
||||
match format {
|
||||
ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
|
||||
|
|
@ -262,8 +264,9 @@ pub fn excerpt_ranges_for_format(
|
|||
),
|
||||
ZetaFormat::V0304VariableEdit => {
|
||||
let context = ranges
|
||||
.context_8192
|
||||
.editable_350_context_1024
|
||||
.clone()
|
||||
.or(ranges.editable_350_context_512.clone())
|
||||
.unwrap_or_else(|| ranges.editable_350_context_150.clone());
|
||||
(context.clone(), context)
|
||||
}
|
||||
|
|
@ -552,7 +555,18 @@ pub fn resolve_cursor_region(
|
|||
input: &ZetaPromptInput,
|
||||
format: ZetaFormat,
|
||||
) -> (&str, Range<usize>, Range<usize>, usize) {
|
||||
let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
|
||||
let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
|
||||
let (editable_tokens, context_tokens) = token_limits_for_format(format);
|
||||
compute_editable_and_context_ranges(
|
||||
&input.cursor_excerpt,
|
||||
input.cursor_offset_in_excerpt,
|
||||
syntax_ranges,
|
||||
editable_tokens,
|
||||
context_tokens,
|
||||
)
|
||||
} else {
|
||||
excerpt_range_for_format(format, &input.excerpt_ranges)
|
||||
};
|
||||
let context_start = context_range.start;
|
||||
let context_text = &input.cursor_excerpt[context_range.clone()];
|
||||
let adjusted_editable =
|
||||
|
|
@ -3876,6 +3890,7 @@ mod tests {
|
|||
editable_350_context_150: context_range,
|
||||
..Default::default()
|
||||
},
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
@ -3905,6 +3920,7 @@ mod tests {
|
|||
editable_350_context_150: context_range,
|
||||
..Default::default()
|
||||
},
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
@ -4488,6 +4504,7 @@ mod tests {
|
|||
editable_350_context_150: 0..excerpt.len(),
|
||||
..Default::default()
|
||||
},
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
@ -4551,6 +4568,7 @@ mod tests {
|
|||
editable_350_context_150: 0..28,
|
||||
..Default::default()
|
||||
},
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
@ -4609,6 +4627,7 @@ mod tests {
|
|||
editable_350_context_150: context_range.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
syntax_ranges: None,
|
||||
experiment: None,
|
||||
in_open_source_repo: false,
|
||||
can_collect_data: false,
|
||||
|
|
|
|||
Loading…
Reference in a new issue