Duplicate Bot: Add more context for triagers (V3) (#57647)

In addition to the user-facing suggestions with potential duplicates, the bot will now also have a separate collapsible block in the same comment that's meant more for the triagers. This block will list the duplicates we're less sure about as well as recently closed issues that seem related to the issue at hand. This commit updates the bot to V3 for the tracking-effectiveness purposes and fixes some leftovers from the previous dupe bot change (activating the bot for issues with no type). It also improves the reliability of the script a bit, wrapping some calls to the GitHub API into retries. Release Notes: - N/A
2026-06-01 03:14:56 +07:00 · 2026-05-25 16:29:13 +02:00 · 2026-05-25 16:29:13 +02:00 · bb460d5f26
commit bb460d5f26
parent 453b020866
3 changed files with 241 additions and 112 deletions
--- a/.github/workflows/track_duplicate_bot_effectiveness.yml
+++ b/.github/workflows/track_duplicate_bot_effectiveness.yml
@ -16,8 +16,9 @@ jobs:
      github.event_name == 'issues' &&
      github.repository == 'zed-industries/zed' &&
      github.event.issue.pull_request == null &&
-      github.event.issue.type != null &&
-      (github.event.issue.type.name == 'Bug' || github.event.issue.type.name == 'Crash')
+      (github.event.issue.type == null ||
+       github.event.issue.type.name == 'Bug' ||
+       github.event.issue.type.name == 'Crash')
    runs-on: ubuntu-latest
    timeout-minutes: 5
    steps:
--- a/script/github-check-new-issue-for-duplicates.py
+++ b/script/github-check-new-issue-for-duplicates.py
@ -1,15 +1,17 @@
 #!/usr/bin/env python3
 """
-Comment on newly opened issues that might be duplicates of an existing issue.
+Comment on newly opened issues with possible duplicates and triage hints.

-This script is run by a GitHub Actions workflow when a new bug or crash report
-is opened. It:
-1. Checks eligibility (must be bug/crash type, non-staff author)
+This script is run by a GitHub Actions workflow when a new issue is opened. It:
+1. Checks eligibility (bug/crash type or untyped, non-staff author)
 2. Detects relevant areas using Claude + the area label taxonomy
 3. Parses known "duplicate magnets" from tracking issue #46355
-4. Searches for similar recent issues by title keywords, area labels, and error patterns
-5. Asks Claude to analyze potential duplicates (magnets + search results)
-6. Posts a comment on the issue if high-confidence duplicates are found
+4. Searches for similar issues — open (last 60 days) and recently closed (last 30 days)
+5. Asks Claude to sort open candidates into likely and possible duplicates, and
+   surface recently closed issues that may be useful triage context
+6. Posts a comment if anything is found: a user-facing duplicate alert for likely
+   duplicates, and/or a collapsed triager-facing section for possible duplicates
+   and recently closed related issues

 Requires:
    requests (pip install requests)
@ -28,6 +30,7 @@ import json
 import os
 import re
 import sys
+import time
 from datetime import datetime, timedelta

 import requests
@ -48,6 +51,9 @@ STOPWORDS = {
    "the", "this", "when", "while", "with", "won't", "work", "working", "zed",
 }

+# HTTP statuses we'll retry on for GET requests
+TRANSIENT_HTTP_STATUSES = {429, 500, 502, 503, 504}
+

 def log(message):
    """Print to stderr so it doesn't interfere with JSON output on stdout."""
@ -55,11 +61,22 @@ def log(message):


 def github_api_get(path, params=None):
-    """Fetch JSON from the GitHub API. Raises on non-2xx status."""
+    """Fetch JSON from the GitHub API, retrying transient failures. Raises on non-2xx status."""
    url = f"{GITHUB_API}/{path.lstrip('/')}"
-    response = requests.get(url, headers=GITHUB_HEADERS, params=params)
-    response.raise_for_status()
-    return response.json()
+    for attempt in range(3):
+        try:
+            response = requests.get(url, headers=GITHUB_HEADERS, params=params)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            transient = isinstance(e, (requests.ConnectionError, requests.Timeout)) or (
+                isinstance(e, requests.HTTPError) and e.response.status_code in TRANSIENT_HTTP_STATUSES
+            )
+            if not transient or attempt == 2:
+                raise
+            wait = 2 ** attempt
+            log(f"  Transient GitHub API error ({e}); retrying in {wait}s")
+            time.sleep(wait)


 def github_search_issues(query, per_page=15):
@ -86,17 +103,23 @@ def post_comment(issue_number: int, body):
    log(f"  Posted comment on #{issue_number}")


-def build_duplicate_comment(matches):
-    """Build the comment body for potential duplicates."""
-    match_list = "\n".join(f"- #{m['number']}" for m in matches)
-    explanations = "\n\n".join(
-        f"**#{m['number']}:** {m['explanation']}\n\n**Shared root cause:** {m['shared_root_cause']}"
-        if m.get('shared_root_cause')
-        else f"**#{m['number']}:** {m['explanation']}"
-        for m in matches
-    )
+def build_comment(likely_duplicates, possible_duplicates, related_closed_issues):
+    """Compose the full comment body. Returns empty string if there's nothing to post.

-    return f"""This issue appears to be a duplicate of:
+    The comment has two sections, each optional:
+    - User-facing duplicate alert, rendered when likely_duplicates is non-empty.
+    - Collapsed triage context, rendered when there are possible duplicates or
+      related closed issues to surface for triagers.
+    """
+    sections = []
+
+    if likely_duplicates:
+        match_list = "\n".join(f"- #{m['number']}" for m in likely_duplicates)
+        explanations = "\n\n".join(
+            f"**#{m['number']}:** {m['explanation']}\n\n**Shared root cause:** {m['shared_root_cause']}"
+            for m in likely_duplicates
+        )
+        sections.append(f"""This issue appears to be a duplicate of:

 {match_list}

@ -111,10 +134,36 @@ No action needed. A maintainer will review this shortly.

 {explanations}

-</details>
+</details>""")

---
-<sub>This is an automated analysis and might be incorrect.</sub>"""
+    if possible_duplicates or related_closed_issues:
+        parts = []
+        if possible_duplicates:
+            lines = [
+                f"- #{m['number']} — {m['explanation']}\n"
+                f"  - Possible shared root cause: {m['shared_root_cause']}"
+                for m in possible_duplicates
+            ]
+            parts.append("**Possibly related open issues:**\n\n" + "\n".join(lines))
+        if related_closed_issues:
+            lines = [
+                f"- #{m['number']} (closed as {m['state_reason']}) — {m['explanation']}"
+                for m in related_closed_issues
+            ]
+            parts.append("**Recently closed, possibly related:**\n\n" + "\n".join(lines))
+        body = "\n\n".join(parts)
+        sections.append(f"""<details>
+<summary>Additional recent context for triagers</summary>
+
+{body}
+
+</details>""")
+
+    if not sections:
+        return ""
+
+    sections.append("---\n<sub>This is an automated analysis and might be incorrect.</sub>")
+    return "\n\n".join(sections)


 def call_claude(api_key, system, user_content, max_tokens=1024):
@ -344,53 +393,76 @@ def filter_magnets_by_areas(magnets, detected_areas):
    return list(filter(matches, magnets))


-def search_for_similar_issues(issue, detected_areas, max_searches=6):
-    """Search for similar issues that might be duplicates.
+def search_for_similar_issues(issue, detected_areas, max_searches_per_state=6):
+    """Search for similar issues — both open and recently closed.

-    Searches by title keywords, area labels (last 60 days), and error patterns.
-    max_searches caps the total number of queries to keep token usage and context size under control.
+    Runs two passes:
+    - Open issues: title keywords / error pattern unrestricted, area searches last 60 days.
+    - Closed issues: closed within the last 30 days (across all query types).
+
+    max_searches_per_state caps queries per state to keep token usage and context size bounded.
    """
    log("Searching for similar issues")

    sixty_days_ago = (datetime.now() - timedelta(days=60)).strftime("%Y-%m-%d")
-    base_query = f"repo:{REPO_OWNER}/{REPO_NAME} is:issue is:open"
-    seen_issues = {}
-    queries = []
+    thirty_days_ago = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")

    title_keywords = [word for word in issue["title"].split() if word.lower() not in STOPWORDS and len(word) > 2]
-
-    if title_keywords:
-        keywords_query = " ".join(title_keywords)
-        queries.append(("title_keywords", f"{base_query} {keywords_query}"))
-
-    for area in detected_areas:
-        queries.append(("area_label", f'{base_query} label:"area:{area}" created:>{sixty_days_ago}'))
+    keywords_query = " ".join(title_keywords) if title_keywords else None

    # error pattern search: capture 5–90 chars after keyword, colon optional
    error_pattern = r"(?i:\b(?:error|panicked|panic|failed)\b)\s*([^\n]{5,90})"
-    match = re.search(error_pattern, issue["body"])
-    if match:
-        error_snippet = match.group(1).strip()
-        queries.append(("error_pattern", f'{base_query} in:body "{error_snippet}"'))
+    error_match = re.search(error_pattern, issue["body"])
+    error_snippet = error_match.group(1).strip() if error_match else None

-    for search_type, query in queries[:max_searches]:
-        log(f"  Search ({search_type}): {query}")
-        try:
-            results = github_search_issues(query, per_page=15)
-            for item in results:
-                number = item["number"]
-                if number != issue["number"] and number not in seen_issues:
-                    body = item.get("body") or ""
-                    seen_issues[number] = {
-                        "number": number,
-                        "title": item["title"],
-                        "state": item.get("state", ""),
-                        "created_at": item.get("created_at", ""),
-                        "body_preview": body[:1000],
-                        "source": search_type,
-                    }
-        except requests.RequestException as e:
-            log(f"  Search failed: {e}")
+    def build_queries(base, area_window=None):
+        queries = []
+        if keywords_query:
+            queries.append(("title_keywords", f"{base} {keywords_query}"))
+        for area in detected_areas:
+            area_q = f'{base} label:"area:{area}"'
+            if area_window:
+                area_q += f" created:>{area_window}"
+            queries.append(("area_label", area_q))
+        if error_snippet:
+            queries.append(("error_pattern", f'{base} in:body "{error_snippet}"'))
+        return queries
+
+    open_queries = build_queries(
+        f"repo:{REPO_OWNER}/{REPO_NAME} is:issue is:open",
+        area_window=sixty_days_ago,
+    )
+    # closed pass: filter by close date so we catch issues closed recently regardless of
+    # when they were opened. closed:> already restricts the result set, so the per-query
+    # area window is unnecessary.
+    closed_queries = build_queries(
+        f"repo:{REPO_OWNER}/{REPO_NAME} is:issue is:closed closed:>{thirty_days_ago}",
+    )
+
+    seen_issues = {}
+    for state_label, queries in (
+        ("open", open_queries[:max_searches_per_state]),
+        ("closed", closed_queries[:max_searches_per_state]),
+    ):
+        for search_type, query in queries:
+            log(f"  Search ({state_label} / {search_type}): {query}")
+            try:
+                results = github_search_issues(query, per_page=15)
+                for item in results:
+                    number = item["number"]
+                    if number != issue["number"] and number not in seen_issues:
+                        body = item.get("body") or ""
+                        seen_issues[number] = {
+                            "number": number,
+                            "title": item["title"],
+                            "state": item.get("state", ""),
+                            "state_reason": item.get("state_reason"),
+                            "created_at": item.get("created_at", ""),
+                            "body_preview": body[:1000],
+                            "source": search_type,
+                        }
+            except requests.RequestException as e:
+                log(f"  Search failed: {e}")

    similar_issues = list(seen_issues.values())
    log(f"  Found {len(similar_issues)} similar issues")
@ -398,29 +470,41 @@ def search_for_similar_issues(issue, detected_areas, max_searches=6):


 def analyze_duplicates(anthropic_key, issue, magnets, search_results):
-    """Use Claude to analyze potential duplicates."""
-    log("Analyzing duplicates with Claude")
+    """Use Claude to identify duplicates (open) and surface related closed issues.

+    Returns (likely_duplicates, possible_duplicates, related_closed_issues).
+    """
    top_magnets = magnets[:10]
-    enrich_magnets(top_magnets)
    magnet_numbers = {m["number"] for m in top_magnets}

+    open_results = [r for r in search_results if r["state"] == "open" and r["number"] not in magnet_numbers]
+    closed_results = [r for r in search_results if r["state"] == "closed" and r["number"] not in magnet_numbers]
+
+    if not top_magnets and not open_results and not closed_results:
+        return [], [], []
+
+    log("Analyzing candidates with Claude")
+    enrich_magnets(top_magnets)
+
    candidates = [
-        {"number": m["number"], "title": m["title"], "body_preview": m["body_preview"], "source": "known_duplicate_magnet"}
+        {"number": m["number"], "title": m["title"], "body_preview": m["body_preview"],
+         "state": "open", "state_reason": None, "source": "known_duplicate_magnet"}
        for m in top_magnets
    ] + [
-        {"number": r["number"], "title": r["title"], "body_preview": r["body_preview"], "source": "search_result"}
-        for r in search_results[:10]
-        if r["number"] not in magnet_numbers
+        {"number": r["number"], "title": r["title"], "body_preview": r["body_preview"],
+         "state": r["state"], "state_reason": r["state_reason"], "source": "search_result"}
+        for r in open_results[:10] + closed_results[:5]
    ]

-    if not candidates:
-        return [], "No candidates to analyze"
+    system_prompt = """You analyze GitHub issues to (a) identify duplicates among OPEN candidates
+and (b) surface recently CLOSED candidates that are useful triage context.

-    system_prompt = """You analyze GitHub issues to identify potential duplicates.
+Each candidate has a "state" field ("open" or "closed"), and closed candidates carry a
+"state_reason" ("completed", "not_planned", or "duplicate").

-Given a new issue and a list of existing issues, identify which existing issues are duplicates — meaning
-they are caused by the SAME BUG in the code, not just similar symptoms.
+# (a) Duplicates — OPEN candidates only
+
+A duplicate means: caused by the SAME BUG in the code, not just similar symptoms.

 CRITICAL DISTINCTION — shared symptoms vs shared root cause:
 - "models missing", "can't sign in", "editor hangs", "venv not detected" are SYMPTOMS that many
@ -428,13 +512,14 @@ CRITICAL DISTINCTION — shared symptoms vs shared root cause:
  identify a specific shared root cause.
 - A duplicate means: if a developer fixed the existing issue, the new issue would also be fixed.
 - If the issues just happen to be in the same feature area, or describe similar-sounding problems
-  with different specifics (different error messages, different triggers, different platforms, different
-  configurations), they are NOT duplicates.
+  with different specifics (different error messages, different triggers, different platforms,
+  different configurations), they are NOT duplicates.

-For each potential duplicate, assess confidence:
- "high": Almost certainly the same bug. You can name a specific shared root cause, and the
-  reproduction steps / error messages / triggers are consistent.
- "medium": Likely the same bug based on specific technical details, but some uncertainty remains.
+Sort duplicates into two buckets:
+- "likely_duplicates": Almost certainly the same bug. You can name a specific shared root cause, and
+  the reproduction steps / error messages / triggers are consistent.
+- "possible_duplicates": Likely the same bug based on specific technical details, but some
+  uncertainty remains.
 - Do NOT include issues that merely share symptoms, affect the same feature area, or sound similar
  at a surface level.

@ -444,24 +529,48 @@ Examples of things that are NOT duplicates:
 - Two issues about "Zed hangs" — one triggered by network drives, the other by large projects.
 - Two issues about "can't sign in" — one caused by a missing system package, the other by a server-side error.

-Output only valid JSON (no markdown code blocks) with this structure:
+For OPEN duplicates (either bucket), false positives are MUCH worse than false negatives — they
+waste the time of both the issue author and the maintainers. When in doubt, omit.
+
+# (b) Related closed issues — CLOSED candidates only
+
+The goal is to give triagers extra context, NOT to claim a duplicate. The bar is lower than for
+duplicates: include a closed candidate if a triager would plausibly want to see it when reviewing
+the new issue. Examples worth surfacing:
+- A recently fixed (state_reason "completed") issue describing the same symptom — triager may ask
+  the reporter to retest on the latest build.
+- A cluster of similar issues closed as "not_planned" — signals a known limitation or design choice.
+- A previously triaged duplicate (state_reason "duplicate") in the same code area.
+
+Include at most 5 closed candidates, prioritized by relevance.
+
+# Output format
+
+Output only valid JSON (no markdown code blocks):
 {
-  "matches": [
+  "likely_duplicates": [
    {
      "number": 12345,
-      "confidence": "high|medium",
      "shared_root_cause": "The specific bug/root cause shared by both issues",
      "explanation": "Brief explanation with concrete evidence from both issues"
    }
  ],
-  "summary": "One sentence summary of findings"
+  "possible_duplicates": [
+    {
+      "number": 12345,
+      "shared_root_cause": "The specific bug/root cause shared by both issues",
+      "explanation": "Brief explanation with concrete evidence from both issues"
+    }
+  ],
+  "related_closed_issues": [
+    {
+      "number": 12345,
+      "explanation": "Brief explanation of why this is useful triage context"
+    }
+  ]
 }

-When in doubt, return an empty matches array. A false positive (flagging a non-duplicate) is much
-worse than a false negative (missing a real duplicate), because it wastes the time of both the
-issue author and the maintainers.
-
-Return empty matches array if none found or if you can only identify shared symptoms."""
+Return empty arrays where nothing relevant is found."""

    user_content = f"""## New Issue #{issue['number']}
 **Title:** {issue['title']}
@ -479,12 +588,13 @@ Return empty matches array if none found or if you can only identify shared symp
    except json.JSONDecodeError as e:
        log(f"  Failed to parse response: {e}")
        log(f"  Raw response: {response}")
-        return [], "Failed to parse analysis"
+        return [], [], []

-    matches = data.get("matches", [])
-    summary = data.get("summary", "Analysis complete")
-    log(f"  Found {len(matches)} potential matches")
-    return matches, summary
+    likely = data.get("likely_duplicates", [])
+    possible = data.get("possible_duplicates", [])
+    closed = data.get("related_closed_issues", [])
+    log(f"  Found {len(likely) + len(possible) + len(closed)} potential matches")
+    return likely, possible, closed


 if __name__ == "__main__":
@ -518,27 +628,30 @@ if __name__ == "__main__":
    taxonomy = format_taxonomy_for_claude(fetch_area_labels())
    detected_areas = detect_areas(anthropic_key, issue, taxonomy)

-    # search for potential duplicates
+    # search for potential duplicates and related closed issues
    all_magnets = parse_duplicate_magnets()
    relevant_magnets = filter_magnets_by_areas(all_magnets, detected_areas)
    search_results = search_for_similar_issues(issue, detected_areas)

-    # analyze potential duplicates
-    if relevant_magnets or search_results:
-        matches, summary = analyze_duplicates(anthropic_key, issue, relevant_magnets, search_results)
-    else:
-        matches, summary = [], "No potential duplicates to analyze"
+    # analyze candidates
+    likely_duplicates, possible_duplicates, related_closed_issues = analyze_duplicates(
+        anthropic_key, issue, relevant_magnets, search_results
+    )

-    # post comment if high-confidence matches found
-    high_confidence_matches = [m for m in matches if m["confidence"] == "high"]
+    # resolve close reason from our search results (the source of truth) so we don't depend
+    # on Claude to faithfully echo it back
+    results_by_number = {r["number"]: r for r in search_results}
+    for m in related_closed_issues:
+        m["state_reason"] = results_by_number[m["number"]]["state_reason"]
+
+    comment_body = build_comment(likely_duplicates, possible_duplicates, related_closed_issues)
    commented = False

-    if high_confidence_matches:
-        comment_body = build_duplicate_comment(high_confidence_matches)
+    if comment_body:
        if args.dry_run:
            log("Dry run - would post comment:\n" + "-" * 40 + "\n" + comment_body + "\n" + "-" * 40)
        else:
-            log("Posting comment for high-confidence match(es)")
+            log("Posting comment")
            try:
                post_comment(issue["number"], comment_body)
                commented = True
@ -556,7 +669,8 @@ if __name__ == "__main__":
        "detected_areas": detected_areas,
        "magnets_count": len(relevant_magnets),
        "search_results_count": len(search_results),
-        "matches": matches,
-        "summary": summary,
+        "likely_duplicates": likely_duplicates,
+        "possible_duplicates": possible_duplicates,
+        "related_closed_issues": related_closed_issues,
        "commented": commented,
    }))
--- a/script/github-track-duplicate-bot-effectiveness.py
+++ b/script/github-track-duplicate-bot-effectiveness.py
@ -35,15 +35,23 @@ REPO_NAME = "zed"
 STAFF_TEAM_SLUG = "staff"
 BOT_LOGIN = "zed-community-bot[bot]"
 BOT_APP_SLUG = "zed-community-bot"
-BOT_COMMENT_PREFIX = "This issue appears to be a duplicate of"
+# Strings that identify a comment posted by the duplicate-detection bot. Any
+# match counts as a bot comment for classification purposes. A single comment
+# can contain both markers (v3+ produces this when there are both confident
+# duplicates and lower-confidence triage context).
+BOT_COMMENT_MARKERS = (
+    "This issue appears to be a duplicate of",  # user-facing duplicate alert
+    "Additional recent context for triagers",  # v3+ collapsed triage section
+)
 BOT_START_DATE = "2026-02-18"
 NEEDS_TRIAGE_LABEL = "state:needs triage"
 DEFAULT_PROJECT_NUMBER = 76
 VALID_CLOSED_AS_VALUES = {"duplicate", "not_planned", "completed"}
 # Add a new tuple when you deploy a new version of the bot that you want to
 # keep track of (e.g. the prompt gets a rewrite or the model gets swapped).
-# Newest first, please. The datetime is for the deployment time (merge to maain).
+# Newest first, please. The datetime is for the deployment time (merge to main).
 BOT_VERSION_TIMELINE = [
+    ("v3", datetime(2026, 5, 25, 14, 30, tzinfo=timezone.utc)),
    ("v2", datetime(2026, 2, 26, 14, 9, tzinfo=timezone.utc)),
    ("v1", datetime(2026, 2, 18, tzinfo=timezone.utc)),
 ]
@ -96,10 +104,16 @@ def fetch_issue(issue_number):
    }


+def is_bot_dupe_comment(body):
+    """True if the comment body looks like one posted by the duplicate-detection bot."""
+    return any(marker in body for marker in BOT_COMMENT_MARKERS)
+
+
 def get_bot_comment_with_time(issue_number):
    """Get the bot's duplicate-detection comment and its timestamp from an issue.

-    Returns {"body": str, "created_at": str} if found, else None.
+    Recognizes both the user-facing duplicate alert and the v3+ triage-only
+    comment formats. Returns {"body": str, "created_at": str} if found, else None.
    """
    comments_path = f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{issue_number}/comments"
    page = 1
@ -107,7 +121,7 @@ def get_bot_comment_with_time(issue_number):
        for comment in comments:
            author = (comment.get("user") or {}).get("login", "")
            body = comment.get("body", "")
-            if author == BOT_LOGIN and body.startswith(BOT_COMMENT_PREFIX):
+            if author == BOT_LOGIN and is_bot_dupe_comment(body):
                return {"body": body, "created_at": comment.get("created_at", "")}
        page += 1
    return None
@ -448,7 +462,7 @@ def classify_open():
            node_id = item["node_id"]

            skip_reason = (
-                f"type is {type_name}" if type_name not in ("Bug", "Crash")
+                f"type is {type_name}" if type_name and type_name not in ("Bug", "Crash")
                else f"author {author} is staff" if is_staff_member(author)
                else "already on the board" if find_project_item(node_id)
                else "no bot duplicate comment found" if not (bot_comment := get_bot_comment_with_time(number))