mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
ep: Parse "user accepted prediction" markers in evals (#49598)
Also: - Add two evals - Remove duplicated Example 6 from the teacher prompt Release Notes: - N/A
This commit is contained in:
parent
b6cd147b9f
commit
3129d7e6b5
5 changed files with 271 additions and 59 deletions
|
|
@ -181,6 +181,7 @@ const EDIT_HISTORY_HEADING: &str = "Edit History";
|
|||
const CURSOR_POSITION_HEADING: &str = "Cursor Position";
|
||||
const EXPECTED_PATCH_HEADING: &str = "Expected Patch";
|
||||
const REJECTED_PATCH_HEADING: &str = "Rejected Patch";
|
||||
const ACCEPTED_PREDICTION_MARKER: &str = "// User accepted prediction:";
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct FrontMatter<'a> {
|
||||
|
|
@ -352,6 +353,7 @@ impl ExampleSpec {
|
|||
}
|
||||
|
||||
let mut current_section = Section::Start;
|
||||
let mut next_edit_predicted = false;
|
||||
|
||||
for event in parser {
|
||||
match event {
|
||||
|
|
@ -387,6 +389,12 @@ impl ExampleSpec {
|
|||
anyhow::bail!("Unexpected heading level: {level}");
|
||||
}
|
||||
Event::Start(Tag::CodeBlock(kind)) => {
|
||||
if current_section == Section::EditHistory
|
||||
&& text.trim() == ACCEPTED_PREDICTION_MARKER
|
||||
{
|
||||
next_edit_predicted = true;
|
||||
}
|
||||
text.clear();
|
||||
match kind {
|
||||
CodeBlockKind::Fenced(info) => {
|
||||
block_info = info;
|
||||
|
|
@ -407,6 +415,11 @@ impl ExampleSpec {
|
|||
spec.uncommitted_diff = mem::take(&mut text);
|
||||
}
|
||||
Section::EditHistory => {
|
||||
if next_edit_predicted {
|
||||
spec.edit_history
|
||||
.push_str(&format!("{}\n", ACCEPTED_PREDICTION_MARKER));
|
||||
next_edit_predicted = false;
|
||||
}
|
||||
spec.edit_history.push_str(&mem::take(&mut text));
|
||||
}
|
||||
Section::CursorPosition => {
|
||||
|
|
@ -908,4 +921,81 @@ mod tests {
|
|||
let results = spec.expected_patches_with_cursor_positions();
|
||||
assert_eq!(results, vec![(clean_patch, None)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_markdown_accepted_prediction_marker() {
|
||||
let markdown = indoc! {r#"
|
||||
+++
|
||||
repository_url = "https://github.com/example/repo"
|
||||
revision = "abc123"
|
||||
+++
|
||||
|
||||
## Edit History
|
||||
|
||||
```diff
|
||||
--- a/src/main.rs
|
||||
+++ b/src/main.rs
|
||||
@@ -1,3 +1,3 @@
|
||||
-fn hello() {}
|
||||
+fn hello_world() {}
|
||||
```
|
||||
|
||||
// User accepted prediction:
|
||||
```diff
|
||||
--- a/src/main.rs
|
||||
+++ b/src/main.rs
|
||||
@@ -1,3 +1,3 @@
|
||||
-fn hello_world() {}
|
||||
+fn hello_world() { println!("hi"); }
|
||||
```
|
||||
|
||||
```diff
|
||||
--- a/src/main.rs
|
||||
+++ b/src/main.rs
|
||||
@@ -1,3 +1,3 @@
|
||||
-fn hello_world() { println!("hi"); }
|
||||
+fn hello_world() { println!("hello"); }
|
||||
```
|
||||
|
||||
## Cursor Position
|
||||
|
||||
```src/main.rs
|
||||
fn hello_world() { println!("hello"); }
|
||||
# ^[CURSOR_POSITION]
|
||||
```
|
||||
|
||||
## Expected Patch
|
||||
|
||||
```diff
|
||||
--- a/src/main.rs
|
||||
+++ b/src/main.rs
|
||||
@@ -1,3 +1,3 @@
|
||||
-fn hello_world() { println!("hello"); }
|
||||
+fn hello_world() { println!("hello, world!"); }
|
||||
```
|
||||
"#};
|
||||
|
||||
let spec = ExampleSpec::from_markdown(markdown).unwrap();
|
||||
|
||||
// The first diff should NOT have the marker
|
||||
assert!(spec.edit_history.starts_with("--- a/src/main.rs"));
|
||||
|
||||
// The second diff should be preceded by the accepted prediction marker
|
||||
assert!(
|
||||
spec.edit_history
|
||||
.contains("// User accepted prediction:\n--- a/src/main.rs")
|
||||
);
|
||||
|
||||
// Count occurrences of the marker - should be exactly one
|
||||
let marker_count = spec
|
||||
.edit_history
|
||||
.matches("// User accepted prediction:")
|
||||
.count();
|
||||
assert_eq!(marker_count, 1);
|
||||
|
||||
// The third diff should NOT have the marker
|
||||
// Verify all three diffs are present
|
||||
let diff_count = spec.edit_history.matches("--- a/src/main.rs").count();
|
||||
assert_eq!(diff_count, 3);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,98 @@
|
|||
+++
|
||||
repository_url = "https://github.com/pallets/flask"
|
||||
revision = "2fec0b206c6e83ea813ab26597e15c96fab08be7"
|
||||
+++
|
||||
|
||||
## Edit History
|
||||
|
||||
```diff
|
||||
--- a/tests/test_basic.py
|
||||
+++ b/tests/test_basic.py
|
||||
@@ -356,3 +356,6 @@
|
||||
cookie = rv.headers["set-cookie"].lower()
|
||||
assert "samesite=lax" in cookie
|
||||
|
||||
|
||||
+de
|
||||
+
|
||||
+
|
||||
def test_missing_session(app):
|
||||
```
|
||||
|
||||
// User accepted prediction:
|
||||
```diff
|
||||
--- a/tests/test_basic.py
|
||||
+++ b/tests/test_basic.py
|
||||
@@ -358,6 +358,14 @@
|
||||
|
||||
|
||||
-de
|
||||
+def test_session_cookie_httponly(app, client):
|
||||
+ app.config["SESSION_COOKIE_HTTPONLY"] = True
|
||||
+
|
||||
+ @app.route("/")
|
||||
+ def index():
|
||||
+ flask.session["testing"] = 42
|
||||
+ return "Hello World"
|
||||
+
|
||||
+ rv = client.get("/")
|
||||
+ assert "httponly" in rv.headers["set-cookie"].lower()
|
||||
|
||||
|
||||
def test_missing_session(app):
|
||||
```
|
||||
|
||||
```diff
|
||||
--- a/tests/test_basic.py
|
||||
+++ b/tests/test_basic.py
|
||||
@@ -358,14 +358,14 @@
|
||||
|
||||
|
||||
-def test_session_cookie_httponly(app, client):
|
||||
+def test_session_cookie_secur(app, client):
|
||||
app.config["SESSION_COOKIE_HTTPONLY"] = True
|
||||
```
|
||||
|
||||
## Cursor Position
|
||||
|
||||
```tests/test_basic.py
|
||||
cookie = rv.headers["set-cookie"].lower()
|
||||
assert "samesite=lax" in cookie
|
||||
|
||||
|
||||
def test_session_cookie_secur(app, client):
|
||||
# ^[CURSOR_POSITION]
|
||||
app.config["SESSION_COOKIE_HTTPONLY"] = True
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
flask.session["testing"] = 42
|
||||
return "Hello World"
|
||||
|
||||
rv = client.get("/")
|
||||
assert "httponly" in rv.headers["set-cookie"].lower()
|
||||
|
||||
|
||||
def test_missing_session(app):
|
||||
```
|
||||
|
||||
## Expected Patch
|
||||
|
||||
```diff
|
||||
--- a/tests/test_basic.py
|
||||
+++ b/tests/test_basic.py
|
||||
@@ -358,14 +358,14 @@
|
||||
-def test_session_cookie_secur(app, client):
|
||||
- app.config["SESSION_COOKIE_HTTPONLY"] = True
|
||||
+def test_session_cookie_secure(app, client):
|
||||
+ app.config["SESSION_COOKIE_SECURE"] = True
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
flask.session["testing"] = 42
|
||||
return "Hello World"
|
||||
|
||||
rv = client.get("/")
|
||||
- assert "httponly" in rv.headers["set-cookie"].lower()
|
||||
+ assert "secure" in rv.headers["set-cookie"].lower()
|
||||
```
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
+++
|
||||
repository_url = "https://github.com/octocat/hello-world"
|
||||
revision = "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d"
|
||||
+++
|
||||
|
||||
## Edit History
|
||||
|
||||
```diff
|
||||
--- a/README
|
||||
+++ b/README
|
||||
@@ -1,1 +1,6 @@
|
||||
-Hello World!
|
||||
+function filterByStatus(items, status) {
|
||||
+ return items.filter(item => item.status === status);
|
||||
+}
|
||||
+
|
||||
+function groupBy
|
||||
+
|
||||
```
|
||||
|
||||
// User accepted prediction:
|
||||
```diff
|
||||
--- a/README
|
||||
+++ b/README
|
||||
@@ -4,3 +4,9 @@
|
||||
|
||||
-function groupBy
|
||||
+function groupByStatus(items) {
|
||||
+ return items.reduce((groups, item) => {
|
||||
+ const key = item.status;
|
||||
+ (groups[key] = groups[key] || []).push(item);
|
||||
+ return groups;
|
||||
+ }, {});
|
||||
+}
|
||||
|
||||
```
|
||||
|
||||
```diff
|
||||
--- a/README
|
||||
+++ b/README
|
||||
@@ -4,4 +4,4 @@
|
||||
|
||||
-function groupByStatus(items) {
|
||||
+function groupByCat(items) {
|
||||
return items.reduce((groups, item) => {
|
||||
```
|
||||
|
||||
## Cursor Position
|
||||
|
||||
```README
|
||||
function filterByStatus(items, status) {
|
||||
return items.filter(item => item.status === status);
|
||||
}
|
||||
|
||||
function groupByCat(items) {
|
||||
# ^[CURSOR_POSITION]
|
||||
return items.reduce((groups, item) => {
|
||||
const key = item.status;
|
||||
(groups[key] = groups[key] || []).push(item);
|
||||
return groups;
|
||||
}, {});
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Expected Patch
|
||||
|
||||
```diff
|
||||
--- a/README
|
||||
+++ b/README
|
||||
@@ -5,7 +5,7 @@
|
||||
-function groupByCat(items) {
|
||||
+function groupByCategory(items) {
|
||||
# ^[CURSOR_POSITION]
|
||||
return items.reduce((groups, item) => {
|
||||
- const key = item.status;
|
||||
+ const key = item.category;
|
||||
(groups[key] = groups[key] || []).push(item);
|
||||
return groups;
|
||||
}, {});
|
||||
```
|
||||
|
|
@ -238,65 +238,6 @@ The user just fixed a bug in the `add` function, changing subtraction to additio
|
|||
NO_EDITS
|
||||
`````
|
||||
|
||||
## Example 6
|
||||
|
||||
The user accepted a prediction for a function, then started renaming it. The original arguments were auto-generated (marked with `// User accepted prediction:`), so they CAN be updated to match the new function name. This is NOT reverting user input—it's improving auto-generated scaffolding.
|
||||
|
||||
### User Edit History
|
||||
|
||||
`````
|
||||
--- a/math_utils.py
|
||||
+++ b/math_utils.py
|
||||
@@ -3,3 +3,5 @@
|
||||
def calculate_rectangle_area(width, height):
|
||||
return width * height
|
||||
|
||||
+de
|
||||
|
||||
// User accepted prediction:
|
||||
--- a/math_utils.py
|
||||
+++ b/math_utils.py
|
||||
@@ -3,5 +3,7 @@
|
||||
def calculate_rectangle_area(width, height):
|
||||
return width * height
|
||||
|
||||
-de
|
||||
+def calculate_rectangle_perimeter(width, height):
|
||||
+
|
||||
|
||||
--- a/math_utils.py
|
||||
+++ b/math_utils.py
|
||||
@@ -5,5 +5,5 @@
|
||||
return width * height
|
||||
|
||||
-def calculate_rectangle_perimeter(width, height):
|
||||
+def calculate_sq_perimeter(width, height):
|
||||
|
||||
`````
|
||||
|
||||
### Current File
|
||||
|
||||
`````math_utils.py
|
||||
def calculate_rectangle_area(width, height):
|
||||
return width * height
|
||||
|
||||
<|editable_region_start|>
|
||||
def calculate_sq<|user_cursor|>_perimeter(width, height):
|
||||
|
||||
<|editable_region_end|>
|
||||
`````
|
||||
|
||||
### Output
|
||||
|
||||
The user accepted a prediction for `calculate_rectangle_perimeter(width, height)`, then started renaming `rectangle` to `square`. Since squares have equal sides, the arguments should change from `(width, height)` to `(side)`. The arguments were auto-generated (from an accepted prediction), so modifying them is appropriate.
|
||||
|
||||
`````
|
||||
<|editable_region_start|>
|
||||
def calculate_square_perimeter(side):
|
||||
<|user_cursor|>
|
||||
<|editable_region_end|>
|
||||
`````
|
||||
|
||||
## Example 5
|
||||
|
||||
The user just deleted code, leaving behind what looks incomplete. You must NOT "complete" it by restoring deleted content—that would undo their edit. Output NO_EDITS. **This is the correct response even though the code appears broken.**
|
||||
|
|
|
|||
|
|
@ -60,6 +60,8 @@ extend-exclude = [
|
|||
"crates/gpui/src/platform/mac/dispatcher.rs",
|
||||
# Tests contain partially incomplete words (by design)
|
||||
"crates/edit_prediction_cli/src/split_commit.rs",
|
||||
# Eval examples contain intentionally partial words (e.g. "secur" for "secure")
|
||||
"crates/edit_prediction_cli/evals/",
|
||||
# Tests contain `baˇr` that cause `"ba" should be "by" or "be".`-like false-positives
|
||||
"crates/editor/src/document_symbols.rs",
|
||||
]
|
||||
|
|
|
|||
Loading…
Reference in a new issue