mirror of
https://github.com/zed-industries/zed.git
synced 2026-06-01 03:14:56 +07:00
parent
652aff52ba
commit
16aeee6361
5 changed files with 854 additions and 692 deletions
70
.github/workflows/comment_on_potential_duplicate_issues.yml
vendored
Normal file
70
.github/workflows/comment_on_potential_duplicate_issues.yml
vendored
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
name: Comment on potential duplicate bug/crash reports
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
issue_number:
|
||||
description: "Issue number to analyze"
|
||||
required: true
|
||||
type: number
|
||||
|
||||
concurrency:
|
||||
group: potential-duplicate-check-${{ github.event.issue.number || inputs.issue_number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
identify-duplicates:
|
||||
# For manual testing, allow running on any branch; for automatic runs, only on main repo
|
||||
if: github.event_name == 'workflow_dispatch' || github.repository == 'zed-industries/zed'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
sparse-checkout: script/github-check-new-issue-for-duplicates.py
|
||||
sparse-checkout-cone-mode: false
|
||||
|
||||
- name: Get github app token
|
||||
id: get-app-token
|
||||
uses: actions/create-github-app-token@bef1eaf1c0ac2b148ee2a0a74c65fbe6db0631f1 # v1.11.7
|
||||
with:
|
||||
app-id: ${{ secrets.ZED_COMMUNITY_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.ZED_COMMUNITY_BOT_PRIVATE_KEY }}
|
||||
owner: zed-industries
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install requests
|
||||
|
||||
- name: Run duplicate detection
|
||||
id: detect
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ steps.get-app-token.outputs.token }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY_ISSUE_DEDUP }}
|
||||
ISSUE_NUMBER: ${{ github.event.issue.number || inputs.issue_number }}
|
||||
run: |
|
||||
python script/github-check-new-issue-for-duplicates.py "$ISSUE_NUMBER" > result.json
|
||||
cat result.json
|
||||
|
||||
- name: Write job summary
|
||||
if: always()
|
||||
run: |
|
||||
echo '```json' >> "$GITHUB_STEP_SUMMARY"
|
||||
if [[ -f result.json ]] && jq empty result.json 2>/dev/null; then
|
||||
jq . result.json >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo '{"error": "No valid result.json generated. Check logs for details."}' >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
echo '```' >> "$GITHUB_STEP_SUMMARY"
|
||||
|
|
@ -1,692 +0,0 @@
|
|||
name: Identify potential duplicates among new bug/crash reports
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
issue_number:
|
||||
description: "Issue number to analyze (for testing)"
|
||||
required: true
|
||||
type: number
|
||||
|
||||
concurrency:
|
||||
group: potential-duplicate-check-${{ github.event.issue.number || inputs.issue_number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
identify-duplicates:
|
||||
# For manual testing, allow running on any branch; for automatic runs, only on main repo
|
||||
if: github.event_name == 'workflow_dispatch' || github.repository == 'zed-industries/zed'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: read
|
||||
|
||||
steps:
|
||||
- name: Get github app token
|
||||
id: get-app-token
|
||||
uses: actions/create-github-app-token@bef1eaf1c0ac2b148ee2a0a74c65fbe6db0631f1 # v2.1.4
|
||||
with:
|
||||
app-id: ${{ secrets.ZED_COMMUNITY_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.ZED_COMMUNITY_BOT_PRIVATE_KEY }}
|
||||
owner: zed-industries
|
||||
|
||||
- name: Fetch issue and check eligibility
|
||||
id: fetch-issue
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
with:
|
||||
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||
script: |
|
||||
const issueNumber = context.payload.issue?.number || ${{ inputs.issue_number || 0 }};
|
||||
if (!issueNumber) {
|
||||
core.setFailed('No issue number provided');
|
||||
return;
|
||||
}
|
||||
|
||||
const { data: issue } = await github.rest.issues.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: issueNumber
|
||||
});
|
||||
|
||||
const typeName = issue.type?.name;
|
||||
const isTargetType = typeName === 'Bug' || typeName === 'Crash';
|
||||
|
||||
console.log(`Issue #${issueNumber}: "${issue.title}"`);
|
||||
console.log(`Issue type: ${typeName || '(none)'}`);
|
||||
console.log(`Is target type (Bug/Crash): ${isTargetType}`);
|
||||
|
||||
// Set default outputs for all paths
|
||||
core.setOutput('issue_number', issueNumber);
|
||||
core.setOutput('issue_title', issue.title);
|
||||
core.setOutput('issue_body', (issue.body || '').slice(0, 6000));
|
||||
core.setOutput('is_target_type', String(isTargetType));
|
||||
core.setOutput('is_staff', 'false');
|
||||
core.setOutput('should_continue', 'false');
|
||||
|
||||
if (!isTargetType) {
|
||||
console.log('::notice::Skipping - issue type is not Bug or Crash');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if author is staff (skip if so - they know what they're doing)
|
||||
const author = issue.user?.login || '';
|
||||
let isStaff = false;
|
||||
if (author) {
|
||||
try {
|
||||
const response = await github.rest.teams.getMembershipForUserInOrg({
|
||||
org: 'zed-industries',
|
||||
team_slug: 'staff',
|
||||
username: author
|
||||
});
|
||||
isStaff = response.data.state === 'active';
|
||||
} catch (error) {
|
||||
if (error.status !== 404) throw error;
|
||||
}
|
||||
}
|
||||
|
||||
core.setOutput('is_staff', String(isStaff));
|
||||
if (isStaff) {
|
||||
console.log(`::notice::Skipping - author @${author} is a staff member`);
|
||||
return;
|
||||
}
|
||||
|
||||
core.setOutput('should_continue', 'true');
|
||||
|
||||
# ========================================================================
|
||||
# PASS 1: Detect areas using Claude with the full area taxonomy
|
||||
# ========================================================================
|
||||
- name: "Pass 1: Detect areas with Claude"
|
||||
if: steps.fetch-issue.outputs.should_continue == 'true'
|
||||
id: detect-areas
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY_ISSUE_DEDUP }}
|
||||
ISSUE_TITLE: ${{ steps.fetch-issue.outputs.issue_title }}
|
||||
ISSUE_BODY: ${{ steps.fetch-issue.outputs.issue_body }}
|
||||
run: |
|
||||
# shellcheck disable=SC2016
|
||||
cat > /tmp/area_prompt.txt << 'PROMPT_EOF'
|
||||
You are classifying a GitHub issue for the Zed code editor into area categories.
|
||||
|
||||
## Issue Title
|
||||
ISSUE_TITLE_PLACEHOLDER
|
||||
|
||||
## Issue Body
|
||||
ISSUE_BODY_PLACEHOLDER
|
||||
|
||||
## Available Area Labels
|
||||
(descriptions provided only where the label name isn't self-explanatory)
|
||||
|
||||
accessibility
|
||||
ai, ai/acp (Agent Communication Protocol), ai/agent thread, ai/anthropic, ai/assistant, ai/bedrock, ai/codex, ai/copilot, ai/deepseek, ai/edit prediction, ai/gemini, ai/inline assistant, ai/lmstudio, ai/mcp (Model Context Protocol), ai/mistral, ai/ollama, ai/openai, ai/openai compatible, ai/openrouter, ai/qwen, ai/supermaven, ai/text thread, ai/zeta
|
||||
auth
|
||||
autocompletions
|
||||
billing
|
||||
cli
|
||||
code actions
|
||||
code folding
|
||||
collab - real-time collaboration with other Zed users (screen sharing, shared editing). NOT for remote development over SSH.
|
||||
collab/audio, collab/chat
|
||||
command palette
|
||||
controls/ime, controls/keybinds, controls/mouse
|
||||
debugger, debugger/dap/CodeLLDB, debugger/dap/debugpy, debugger/dap/gdb, debugger/dap/javascript
|
||||
design papercut - small UI/UX polish issues
|
||||
dev containers - Docker-based development environments
|
||||
diagnostics - LSP errors/warnings display
|
||||
discoverability
|
||||
editor, editor/brackets, editor/linked edits
|
||||
extensions/infrastructure
|
||||
file finder - fuzzy file search (Cmd/Ctrl+P)
|
||||
gpui - Zed's internal UI rendering framework
|
||||
inlay hints - inline hints from LSP (type annotations, parameter names)
|
||||
installer-updater
|
||||
integrations/environment - shell environment, PATH, env vars
|
||||
integrations/git, integrations/git/blame, integrations/terminal
|
||||
internationalization, internationalization/rtl support
|
||||
keymap editor
|
||||
language server, language server/server failure
|
||||
languages/* - language-specific syntax, grammar, or LSP issues (e.g., languages/python, languages/rust, languages/typescript)
|
||||
legal
|
||||
logging
|
||||
multi-buffer - viewing multiple files or search results in a single editor pane
|
||||
multi-cursor
|
||||
navigation - go to definition, find references, symbol search
|
||||
network - proxy settings, connectivity, SSL certificates. NOT for collab.
|
||||
onboarding
|
||||
outline - document symbols/structure sidebar
|
||||
parity/* - feature parity requests comparing to other editors (parity/vscode, parity/vim, parity/emacs, parity/jetbrains, parity/helix)
|
||||
performance, performance/memory leak
|
||||
permissions
|
||||
popovers - hover cards, tooltips, autocomplete dropdowns
|
||||
preview/images, preview/markdown
|
||||
project panel - file tree sidebar
|
||||
release notes
|
||||
repl
|
||||
search - project-wide search, find/replace
|
||||
security & privacy, security & privacy/workspace trust
|
||||
serialization - saving/restoring workspace state, undo history, folding state across restarts
|
||||
settings, settings/ui
|
||||
snippets
|
||||
status bar
|
||||
tasks - task runner integration
|
||||
telemetry
|
||||
tooling/* - external tool integrations (tooling/emmet, tooling/eslint, tooling/prettier, tooling/flatpak, tooling/nix)
|
||||
tree-sitter - syntax parsing and highlighting engine
|
||||
ui/animations, ui/dock, ui/file icons, ui/font, ui/menus, ui/minimap, ui/panel, ui/scaling, ui/scrolling, ui/tabs, ui/themes
|
||||
workspace - window management, pane layout, project handling
|
||||
zed account
|
||||
zed.dev
|
||||
|
||||
## Your Task
|
||||
|
||||
Based on the issue title and body, identify which areas this issue relates to.
|
||||
- Select 1-5 areas that best match the issue
|
||||
- Prefer more specific sub-areas when applicable (e.g., "ai/gemini" over just "ai")
|
||||
- Only select areas that are clearly relevant
|
||||
|
||||
## Response Format
|
||||
|
||||
Return ONLY a JSON object (no markdown fences, no explanation):
|
||||
{
|
||||
"areas": ["area1", "area2"],
|
||||
"reasoning": "Brief explanation of why these areas were selected"
|
||||
}
|
||||
PROMPT_EOF
|
||||
|
||||
# Single quotes are intentional to prevent bash expansion; node reads env vars via process.env
|
||||
# shellcheck disable=SC2016
|
||||
node << 'SCRIPT_EOF'
|
||||
const fs = require('fs');
|
||||
let prompt = fs.readFileSync('/tmp/area_prompt.txt', 'utf8');
|
||||
prompt = prompt.replace('ISSUE_TITLE_PLACEHOLDER', process.env.ISSUE_TITLE || '');
|
||||
prompt = prompt.replace('ISSUE_BODY_PLACEHOLDER', process.env.ISSUE_BODY || '');
|
||||
fs.writeFileSync('/tmp/area_prompt_final.txt', prompt);
|
||||
SCRIPT_EOF
|
||||
|
||||
HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/area_response.json -X POST "https://api.anthropic.com/v1/messages" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "x-api-key: $ANTHROPIC_API_KEY" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
--data-binary @- << EOF
|
||||
{
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role": "user", "content": $(jq -Rs . < /tmp/area_prompt_final.txt)}]
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
RESPONSE=$(< /tmp/area_response.json)
|
||||
|
||||
if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then
|
||||
echo "HTTP Error: $HTTP_CODE"
|
||||
echo "$RESPONSE" | jq . 2>/dev/null || echo "$RESPONSE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if echo "$RESPONSE" | jq -e '.error' > /dev/null 2>&1; then
|
||||
echo "API Error:"
|
||||
echo "$RESPONSE" | jq .
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AREA_RESULT=$(echo "$RESPONSE" | jq -r '.content[0].text // empty')
|
||||
|
||||
if [ -z "$AREA_RESULT" ]; then
|
||||
echo "Error: No response from Claude for area detection"
|
||||
echo "$RESPONSE" | jq .
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Area detection result: $AREA_RESULT"
|
||||
|
||||
# Extract just the areas array, handling potential markdown fences
|
||||
# shellcheck disable=SC2016
|
||||
CLEAN_JSON=$(echo "$AREA_RESULT" | sed 's/^```json//; s/^```//; s/```$//' | tr -d '\n')
|
||||
AREAS=$(echo "$CLEAN_JSON" | jq -r '.areas // [] | join(",")')
|
||||
echo "Detected areas: $AREAS"
|
||||
|
||||
echo "detected_areas=$AREAS" >> "$GITHUB_OUTPUT"
|
||||
|
||||
INPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.input_tokens')
|
||||
OUTPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.output_tokens')
|
||||
echo "Pass 1 token usage - Input: $INPUT_TOKENS, Output: $OUTPUT_TOKENS"
|
||||
|
||||
# ========================================================================
|
||||
# Use detected areas to filter magnets and search for candidates
|
||||
# ========================================================================
|
||||
- name: Filter magnets and search for candidates
|
||||
if: steps.fetch-issue.outputs.should_continue == 'true'
|
||||
id: gather-candidates
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
with:
|
||||
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||
script: |
|
||||
// ============================================================
|
||||
// KNOWN DUPLICATE MAGNETS (from #46355)
|
||||
// ============================================================
|
||||
const DUPLICATE_MAGNETS = [
|
||||
{ number: 37074, title: "Support history with external ACP agents", areas: ["ai", "ai/gemini", "ai/acp"] },
|
||||
{ number: 35780, title: "Zed consumes a lot of memory and CPU when opening ~/ or other large file trees", areas: ["workspace", "performance", "performance/memory leak", "integrations/git"] },
|
||||
{ number: 16965, title: "Support for non UTF-8 text encodings", areas: ["editor", "internationalization"] },
|
||||
{ number: 38109, title: "Zed out of sync with changes made outside of editor", areas: ["workspace"] },
|
||||
{ number: 16727, title: "Select text in markdown preview", areas: ["preview/markdown", "languages/markdown"] },
|
||||
{ number: 31102, title: "RTL Right-to-Left Text Input/Rendering Support", areas: ["internationalization"] },
|
||||
{ number: 7371, title: "Restarts should be non-destructive on workspace restore/reload", areas: ["workspace", "serialization"] },
|
||||
{ number: 7992, title: "Font rendering on LoDPI displays", areas: ["ui/font"] },
|
||||
{ number: 40018, title: "Windows Beta: Terminal overwrites text when resized and window overflow", areas: ["integrations/terminal"] },
|
||||
{ number: 29962, title: "Agent Panel: Cannot access zed hosted models (via Cloudflare HKG)", areas: ["ai", "network"] },
|
||||
{ number: 15097, title: "Serialize undo history (local and remote projects)", areas: ["workspace", "serialization"] },
|
||||
{ number: 29846, title: "Collapsed code blocks are not restored properly", areas: ["editor", "serialization", "code folding"] },
|
||||
{ number: 38799, title: "Poor search performance in large repositories", areas: ["performance", "search"] },
|
||||
{ number: 27283, title: "Inefficient memory use when opening large file in Zed", areas: ["performance"] },
|
||||
{ number: 39806, title: "Raspberry Pi OS (Trixie) Zed 0.207.3 Video Memory Corruption on Start", areas: ["gpui"] },
|
||||
{ number: 29970, title: "Unable to download any extensions (due to potential DigitalOcean IP block or ISP block)", areas: ["network"] },
|
||||
{ number: 29026, title: "Ability to copy/paste files from the system file manager", areas: ["workspace"] },
|
||||
{ number: 7940, title: "Zed is sometimes unresponsive when the OS awakes from sleep", areas: ["workspace"] },
|
||||
{ number: 37025, title: "Failed to generate thread summary", areas: ["ai"] },
|
||||
{ number: 16156, title: "Support for project settings to enable/disable/control AI features", areas: ["ai", "settings"] },
|
||||
{ number: 24752, title: "Extra horizontal scrolling when inline blame is enabled with soft wrapping", areas: ["editor"] },
|
||||
{ number: 20970, title: "Excessive memory consumption on project search with large files present", areas: ["performance/memory leak", "search", "multi-buffer"] },
|
||||
{ number: 12176, title: "Only some ligatures are being applied", areas: ["ui/font", "settings"] },
|
||||
{ number: 13564, title: "blade: Text is rendered either too thick or too thin", areas: ["ui/font"] },
|
||||
{ number: 38901, title: "Terminal freezes in Linux session when Ctrl+C is pressed before exit", areas: ["controls/keybinds", "integrations/terminal"] },
|
||||
{ number: 20167, title: "Support unsetting default keybindings", areas: ["controls/keybinds"] },
|
||||
{ number: 25469, title: "Tracking - Linux non-QWERTY keyboard support", areas: ["controls/keybinds"] },
|
||||
{ number: 29598, title: "Manual refresh on unsupported filesystems (nfs, fuse, exfat) without inotify/fsevents", areas: ["project panel"] },
|
||||
{ number: 14428, title: "Ordering of search tokens in file finder fuzzy match", areas: ["file finder"] },
|
||||
{ number: 20771, title: "Workspace: Reload to respect the desktop/workspace Zed windows were in after reload", areas: ["workspace", "serialization"] },
|
||||
{ number: 7465, title: "Lines with RTL text aren't rendered correctly", areas: ["editor", "internationalization/rtl support", "parity/vscode"] },
|
||||
{ number: 16120, title: "Large files without newlines (all on one line) cause Zed to hang/crash", areas: ["editor"] },
|
||||
{ number: 22703, title: "Syntax aware folding (folds.scm support)", areas: ["editor", "tree-sitter"] },
|
||||
{ number: 38927, title: "Find & Replace memory leak on large files", areas: ["performance", "performance/memory leak"] },
|
||||
{ number: 4560, title: "Improve streaming search speed", areas: ["performance", "search"] },
|
||||
{ number: 14053, title: "Linux Shortcuts don't work with non-latin / international keyboard layouts", areas: ["internationalization", "controls/keybinds"] },
|
||||
{ number: 31637, title: "High memory consumption in Project Search with large codebases", areas: ["performance/memory leak", "search"] },
|
||||
{ number: 11744, title: "Incorrect spacing of terminal font", areas: ["ui/font", "integrations/terminal"] },
|
||||
{ number: 4746, title: "Terminal Nerd Font rendering incorrect line height", areas: ["ui/font", "integrations/terminal"] },
|
||||
{ number: 10647, title: "User configurable mouse bindings (like keymap for key+mouse)", areas: ["controls/keybinds", "controls/mouse", "accessibility"] },
|
||||
{ number: 34865, title: "ctrl-w with pane::CloseActiveItem binding closes the project panel instead of the active pane", areas: ["controls/keybinds", "ui/panel"] },
|
||||
{ number: 12163, title: "Cannot see list of installed extensions when offline / disconnected", areas: ["network"] },
|
||||
{ number: 44630, title: "Tables do not render all columns in markdown preview", areas: ["preview/markdown"] },
|
||||
{ number: 39435, title: "Windows: Low fps in many cases", areas: ["gpui"] },
|
||||
{ number: 36227, title: "Zed becomes unresponsive when closing", areas: ["workspace"] },
|
||||
{ number: 44962, title: "Can not open file in zed if filename includes (1)", areas: ["workspace"] },
|
||||
{ number: 32318, title: "Zed hangs after exiting sleep mode in Linux", areas: ["workspace"] },
|
||||
{ number: 5120, title: "Add options to hide title and status bar", areas: ["settings", "status bar"] },
|
||||
{ number: 29323, title: "uv: Failed to detect Python venv correctly", areas: ["language server", "languages/python", "integrations/environment"] },
|
||||
{ number: 7450, title: "Support LSP Semantic Tokens", areas: ["language server", "languages", "ui/themes"] },
|
||||
{ number: 31846, title: "LSP: triggerCharacters for signature help declared by servers do not seem to be respected", areas: ["language server"] },
|
||||
{ number: 32792, title: "[SWAY] Zed window flashes rapidly on Sway/wlroots", areas: ["gpui"] },
|
||||
{ number: 28398, title: "Stale buffers should be removed from search multibuffer", areas: ["search", "multi-buffer"] },
|
||||
{ number: 35011, title: "Delete Key against remote Hosts Doesn't Delete Folders", areas: ["project panel"] },
|
||||
{ number: 8626, title: "Palette File Navigation - Preview File Content", areas: ["file finder"] },
|
||||
{ number: 31468, title: "Certain LSP features are not activated till you trigger them manually when working with a remote project", areas: ["language server/server failure", "autocompletions"] },
|
||||
{ number: 9789, title: "Zed checks for LSP updates when offline and disables LSPs irreversibly in the process", areas: ["language server/server failure"] },
|
||||
{ number: 21403, title: "Completions and code actions should not use uniform lists", areas: ["autocompletions", "popovers", "diagnostics"] },
|
||||
{ number: 15196, title: "Remote Project REPL support", areas: ["repl"] },
|
||||
];
|
||||
|
||||
const MAX_SEARCHES = 5;
|
||||
|
||||
const issueNumber = parseInt('${{ steps.fetch-issue.outputs.issue_number }}', 10);
|
||||
const title = process.env.ISSUE_TITLE || '';
|
||||
const body = process.env.ISSUE_BODY || '';
|
||||
const detectedAreasStr = '${{ steps.detect-areas.outputs.detected_areas }}';
|
||||
const detectedAreas = new Set(detectedAreasStr.split(',').filter(a => a.trim()));
|
||||
|
||||
console.log(`Detected areas from Claude: ${[...detectedAreas].join(', ') || '(none)'}`);
|
||||
|
||||
// Helper: check if two areas match (handles hierarchy like "ai" matching "ai/gemini")
|
||||
function areasMatch(detected, magnetArea) {
|
||||
if (detected === magnetArea) return true;
|
||||
if (magnetArea.startsWith(detected + '/')) return true;
|
||||
if (detected.startsWith(magnetArea + '/')) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Filter magnets based on detected areas
|
||||
const relevantMagnets = DUPLICATE_MAGNETS.filter(magnet => {
|
||||
if (detectedAreas.size === 0) return true;
|
||||
return magnet.areas.some(magnetArea =>
|
||||
[...detectedAreas].some(detected => areasMatch(detected, magnetArea))
|
||||
);
|
||||
}).slice(0, 20);
|
||||
|
||||
console.log(`Relevant duplicate magnets: ${relevantMagnets.length}`);
|
||||
|
||||
// Build search queries
|
||||
const searchQueries = [];
|
||||
const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString().split('T')[0];
|
||||
|
||||
// 1. Keyword search from title
|
||||
const stopwords = ['with', 'that', 'this', 'from', 'have', 'been', 'were', 'what', 'when',
|
||||
'where', 'which', 'while', 'does', 'doesn', 'should', 'would', 'could',
|
||||
'about', 'after', 'before', 'between', 'into', 'through', 'during',
|
||||
'above', 'below', 'under', 'again', 'further', 'then', 'once', 'here',
|
||||
'there', 'some', 'such', 'only', 'same', 'than', 'very', 'just', 'also',
|
||||
'work', 'working', 'works', 'issue', 'problem', 'error', 'bug', 'zed'];
|
||||
const titleKeywords = title
|
||||
.toLowerCase()
|
||||
.replace(/[^\w\s]/g, ' ')
|
||||
.split(/\s+/)
|
||||
.filter(w => w.length >= 3 && !stopwords.includes(w))
|
||||
.slice(0, 5);
|
||||
|
||||
if (titleKeywords.length >= 2) {
|
||||
searchQueries.push({
|
||||
type: 'keyword',
|
||||
query: `repo:zed-industries/zed is:issue created:>${thirtyDaysAgo} ${titleKeywords.join(' ')}`
|
||||
});
|
||||
}
|
||||
|
||||
// 2. Area-based searches (using Claude-detected areas)
|
||||
for (const area of [...detectedAreas].slice(0, 3)) {
|
||||
searchQueries.push({
|
||||
type: 'area',
|
||||
query: `repo:zed-industries/zed is:issue is:open label:"area:${area}" created:>${thirtyDaysAgo}`
|
||||
});
|
||||
}
|
||||
|
||||
// 3. Look for error patterns in the body
|
||||
const errorPatterns = body.match(/(?:error|panic|crash|failed|exception)[:\s]+[^\n]{10,100}/gi) || [];
|
||||
if (errorPatterns.length > 0) {
|
||||
const errorSnippet = errorPatterns[0]
|
||||
.slice(0, 60)
|
||||
.replace(/[^\w\s]/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
if (errorSnippet.length > 15) {
|
||||
searchQueries.push({
|
||||
type: 'error',
|
||||
query: `repo:zed-industries/zed is:issue "${errorSnippet.slice(0, 40)}"`
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Execute searches and collect candidates
|
||||
const candidates = [];
|
||||
const seenIssues = new Set([issueNumber]);
|
||||
|
||||
for (const { type, query } of searchQueries.slice(0, MAX_SEARCHES)) {
|
||||
try {
|
||||
console.log(`Search (${type}): ${query}`);
|
||||
const { data: results } = await github.rest.search.issuesAndPullRequests({
|
||||
q: query,
|
||||
sort: 'created',
|
||||
order: 'desc',
|
||||
per_page: 10
|
||||
});
|
||||
|
||||
for (const item of results.items) {
|
||||
if (!seenIssues.has(item.number) && !item.pull_request) {
|
||||
seenIssues.add(item.number);
|
||||
candidates.push({
|
||||
number: item.number,
|
||||
title: item.title,
|
||||
state: item.state,
|
||||
created_at: item.created_at,
|
||||
body_preview: (item.body || '').slice(0, 800),
|
||||
source: type
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(`Search failed (${type}): ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Found ${candidates.length} candidates from searches`);
|
||||
|
||||
// Prepare issue data for Claude
|
||||
const issueData = {
|
||||
number: issueNumber,
|
||||
title: title,
|
||||
body: body.slice(0, 4000),
|
||||
};
|
||||
|
||||
// Prepare output
|
||||
core.setOutput('issue_data', JSON.stringify(issueData));
|
||||
core.setOutput('duplicate_magnets', JSON.stringify(relevantMagnets));
|
||||
core.setOutput('candidates', JSON.stringify(candidates.slice(0, 12)));
|
||||
core.setOutput('detected_areas', [...detectedAreas].join(', '));
|
||||
core.setOutput('should_analyze', (relevantMagnets.length > 0 || candidates.length > 0) ? 'true' : 'false');
|
||||
env:
|
||||
ISSUE_TITLE: ${{ steps.fetch-issue.outputs.issue_title }}
|
||||
ISSUE_BODY: ${{ steps.fetch-issue.outputs.issue_body }}
|
||||
|
||||
# ========================================================================
|
||||
# PASS 2: Analyze duplicates with Claude
|
||||
# ========================================================================
|
||||
- name: "Pass 2: Analyze duplicates with Claude"
|
||||
if: |
|
||||
steps.fetch-issue.outputs.should_continue == 'true' &&
|
||||
steps.gather-candidates.outputs.should_analyze == 'true'
|
||||
id: analyze
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY_ISSUE_DEDUP }}
|
||||
ISSUE_DATA: ${{ steps.gather-candidates.outputs.issue_data }}
|
||||
DUPLICATE_MAGNETS: ${{ steps.gather-candidates.outputs.duplicate_magnets }}
|
||||
CANDIDATES: ${{ steps.gather-candidates.outputs.candidates }}
|
||||
run: |
|
||||
# shellcheck disable=SC2016
|
||||
cat > /tmp/prompt.txt << 'PROMPT_EOF'
|
||||
You are analyzing a GitHub issue to determine if it might be a duplicate of an existing issue.
|
||||
|
||||
## New Issue Being Analyzed
|
||||
ISSUE_DATA_PLACEHOLDER
|
||||
|
||||
## Known Frequently-Duplicated Issues (High Priority)
|
||||
These issues have historically received many duplicate reports. Check these first.
|
||||
DUPLICATE_MAGNETS_PLACEHOLDER
|
||||
|
||||
## Recent Similar Issues Found by Search
|
||||
CANDIDATES_PLACEHOLDER
|
||||
|
||||
## Your Task
|
||||
|
||||
1. First, understand what the new issue is about:
|
||||
- What specific bug or problem is being reported?
|
||||
- What error messages, stack traces, or specific behaviors are mentioned?
|
||||
- What component/feature is affected?
|
||||
|
||||
2. Check against the frequently-duplicated issues first (high priority):
|
||||
- These are known "duplicate magnets" that often get re-reported
|
||||
- If the new issue describes the same problem, it's likely a duplicate
|
||||
|
||||
3. Then check the recent similar issues:
|
||||
- Look for issues describing the SAME bug, not just related topics
|
||||
|
||||
## Duplicate Criteria (be strict!)
|
||||
|
||||
An issue IS a duplicate if:
|
||||
- It describes the EXACT same bug with the same root cause
|
||||
- It has the same error message or stack trace
|
||||
- It has the same reproduction steps leading to the same outcome
|
||||
|
||||
An issue is NOT a duplicate if:
|
||||
- It's merely related to the same feature/area
|
||||
- It has similar symptoms but potentially different causes
|
||||
- It mentions similar things but describes a different problem
|
||||
|
||||
Be VERY conservative. It's better to miss a duplicate than to incorrectly flag a unique issue.
|
||||
|
||||
## Response Format
|
||||
|
||||
Return ONLY a JSON object (no markdown fences, no explanation before or after):
|
||||
{
|
||||
"is_potential_duplicate": boolean,
|
||||
"confidence": "high" | "medium" | "low" | "none",
|
||||
"potential_duplicates": [
|
||||
{"number": integer, "title": "string", "similarity_reason": "string explaining why this might be the same bug"}
|
||||
],
|
||||
"analysis_summary": "Brief explanation of what the new issue is about and your conclusion",
|
||||
"recommendation": "flag_as_duplicate" | "needs_human_review" | "not_a_duplicate"
|
||||
}
|
||||
PROMPT_EOF
|
||||
|
||||
# Single quotes are intentional to prevent bash expansion; node reads env vars via process.env
|
||||
# shellcheck disable=SC2016
|
||||
node << 'SCRIPT_EOF'
|
||||
const fs = require('fs');
|
||||
|
||||
let prompt = fs.readFileSync('/tmp/prompt.txt', 'utf8');
|
||||
prompt = prompt.replace('ISSUE_DATA_PLACEHOLDER', process.env.ISSUE_DATA);
|
||||
prompt = prompt.replace('DUPLICATE_MAGNETS_PLACEHOLDER', process.env.DUPLICATE_MAGNETS);
|
||||
prompt = prompt.replace('CANDIDATES_PLACEHOLDER', process.env.CANDIDATES);
|
||||
|
||||
fs.writeFileSync('/tmp/prompt_final.txt', prompt);
|
||||
SCRIPT_EOF
|
||||
|
||||
HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/response.json -X POST "https://api.anthropic.com/v1/messages" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "x-api-key: $ANTHROPIC_API_KEY" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
--data-binary @- << EOF
|
||||
{
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"max_tokens": 1024,
|
||||
"messages": [{"role": "user", "content": $(jq -Rs . < /tmp/prompt_final.txt)}]
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
RESPONSE=$(< /tmp/response.json)
|
||||
|
||||
if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then
|
||||
echo "HTTP Error: $HTTP_CODE"
|
||||
echo "$RESPONSE" | jq . 2>/dev/null || echo "$RESPONSE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if echo "$RESPONSE" | jq -e '.error' > /dev/null 2>&1; then
|
||||
echo "API Error:"
|
||||
echo "$RESPONSE" | jq .
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ANALYSIS=$(echo "$RESPONSE" | jq -r '.content[0].text // empty')
|
||||
|
||||
if [ -z "$ANALYSIS" ]; then
|
||||
echo "Error: No response from Claude"
|
||||
echo "$RESPONSE" | jq .
|
||||
exit 1
|
||||
fi
|
||||
|
||||
{
|
||||
echo "analysis<<ANALYSIS_EOF"
|
||||
echo "$ANALYSIS"
|
||||
echo "ANALYSIS_EOF"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
INPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.input_tokens')
|
||||
OUTPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.output_tokens')
|
||||
echo "Pass 2 token usage - Input: $INPUT_TOKENS, Output: $OUTPUT_TOKENS"
|
||||
|
||||
# ========================================================================
|
||||
# Log results
|
||||
# ========================================================================
|
||||
- name: Log analysis results
|
||||
if: |
|
||||
steps.fetch-issue.outputs.should_continue == 'true' &&
|
||||
!cancelled()
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
with:
|
||||
script: |
|
||||
const issueNumber = parseInt('${{ steps.fetch-issue.outputs.issue_number }}', 10) || 0;
|
||||
const issueTitle = process.env.ISSUE_TITLE || '';
|
||||
const detectedAreas = '${{ steps.gather-candidates.outputs.detected_areas }}' || '(none)';
|
||||
const shouldAnalyze = '${{ steps.gather-candidates.outputs.should_analyze }}' === 'true';
|
||||
const analysisRaw = process.env.ANALYSIS_OUTPUT || '';
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('DUPLICATE DETECTION RESULTS (TWO-PASS)');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Issue: #${issueNumber} - ${issueTitle}`);
|
||||
console.log(`URL: https://github.com/zed-industries/zed/issues/${issueNumber}`);
|
||||
console.log(`Detected Areas: ${detectedAreas}`);
|
||||
|
||||
if (!shouldAnalyze) {
|
||||
console.log('\nNo duplicate magnets or candidates found - skipping analysis');
|
||||
core.summary.addHeading(`✅ Issue #${issueNumber}: No similar issues found`, 2);
|
||||
core.summary.addRaw(`\n**Title:** ${issueTitle}\n\n`);
|
||||
core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
|
||||
core.summary.addRaw('No potential duplicates were found by search or in the known duplicate magnets list.\n');
|
||||
await core.summary.write();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!analysisRaw) {
|
||||
console.log('\nNo analysis output received');
|
||||
core.summary.addHeading(`⚠️ Issue #${issueNumber}: Analysis incomplete`, 2);
|
||||
core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
|
||||
core.summary.addRaw('The Claude analysis step did not produce output. Check workflow logs.\n');
|
||||
await core.summary.write();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
let cleanJson = analysisRaw.trim();
|
||||
if (cleanJson.startsWith('```')) {
|
||||
cleanJson = cleanJson.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '');
|
||||
}
|
||||
|
||||
const analysis = JSON.parse(cleanJson);
|
||||
|
||||
console.log(`\nIs Potential Duplicate: ${analysis.is_potential_duplicate}`);
|
||||
console.log(`Confidence: ${analysis.confidence}`);
|
||||
console.log(`Recommendation: ${analysis.recommendation}`);
|
||||
console.log(`\nAnalysis Summary:\n${analysis.analysis_summary}`);
|
||||
|
||||
if (analysis.potential_duplicates && analysis.potential_duplicates.length > 0) {
|
||||
console.log(`\nPotential Duplicates Found: ${analysis.potential_duplicates.length}`);
|
||||
for (const dup of analysis.potential_duplicates) {
|
||||
console.log(` - #${dup.number}: ${dup.title}`);
|
||||
console.log(` Reason: ${dup.similarity_reason}`);
|
||||
}
|
||||
} else {
|
||||
console.log('\nNo potential duplicates identified by analysis.');
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
|
||||
const summaryIcon = analysis.is_potential_duplicate ? '⚠️' : '✅';
|
||||
const summaryText = analysis.is_potential_duplicate
|
||||
? `Potential duplicate detected (${analysis.confidence} confidence)`
|
||||
: 'No likely duplicates found';
|
||||
|
||||
core.summary.addHeading(`${summaryIcon} Issue #${issueNumber}: ${summaryText}`, 2);
|
||||
core.summary.addRaw(`\n**Title:** ${issueTitle}\n\n`);
|
||||
core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
|
||||
core.summary.addRaw(`**Recommendation:** \`${analysis.recommendation}\`\n\n`);
|
||||
core.summary.addRaw(`**Summary:** ${analysis.analysis_summary}\n\n`);
|
||||
|
||||
if (analysis.potential_duplicates && analysis.potential_duplicates.length > 0) {
|
||||
core.summary.addHeading('Potential Duplicates', 3);
|
||||
const rows = analysis.potential_duplicates.map(d => [
|
||||
`[#${d.number}](https://github.com/zed-industries/zed/issues/${d.number})`,
|
||||
d.title.slice(0, 60) + (d.title.length > 60 ? '...' : ''),
|
||||
d.similarity_reason
|
||||
]);
|
||||
core.summary.addTable([
|
||||
[{data: 'Issue', header: true}, {data: 'Title', header: true}, {data: 'Similarity Reason', header: true}],
|
||||
...rows
|
||||
]);
|
||||
}
|
||||
|
||||
await core.summary.write();
|
||||
|
||||
} catch (e) {
|
||||
console.log('Failed to parse analysis output:', e.message);
|
||||
console.log('Raw output:', analysisRaw);
|
||||
core.summary.addHeading(`⚠️ Issue #${issueNumber}: Failed to parse analysis`, 2);
|
||||
core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
|
||||
core.summary.addRaw(`Error: ${e.message}\n\nRaw output:\n\`\`\`\n${analysisRaw.slice(0, 1000)}\n\`\`\``);
|
||||
await core.summary.write();
|
||||
}
|
||||
env:
|
||||
ISSUE_TITLE: ${{ steps.fetch-issue.outputs.issue_title }}
|
||||
ANALYSIS_OUTPUT: ${{ steps.analyze.outputs.analysis }}
|
||||
27
.github/workflows/update_duplicate_magnets.yml
vendored
Normal file
27
.github/workflows/update_duplicate_magnets.yml
vendored
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
name: Update Duplicate Magnets Issue
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 6 * * 1,4" # Mondays and Thursdays at 6 AM UTC
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
update-duplicate-magnets:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'zed-industries/zed'
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install requests
|
||||
|
||||
- name: Update duplicate magnets issue
|
||||
run: |
|
||||
python script/github-find-top-duplicated-bugs.py \
|
||||
--github-token ${{ secrets.GITHUB_TOKEN }} \
|
||||
--issue-number 46355
|
||||
534
script/github-check-new-issue-for-duplicates.py
Normal file
534
script/github-check-new-issue-for-duplicates.py
Normal file
|
|
@ -0,0 +1,534 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Comment on newly opened issues that might be duplicates of an existing issue.
|
||||
|
||||
This script is run by a GitHub Actions workflow when a new bug or crash report
|
||||
is opened. It:
|
||||
1. Checks eligibility (must be bug/crash type, non-staff author)
|
||||
2. Detects relevant areas using Claude + the area label taxonomy
|
||||
3. Parses known "duplicate magnets" from tracking issue #46355
|
||||
4. Searches for similar recent issues by title keywords, area labels, and error patterns
|
||||
5. Asks Claude to analyze potential duplicates (magnets + search results)
|
||||
6. Posts a comment on the issue if high-confidence duplicates are found
|
||||
|
||||
Requires:
|
||||
requests (pip install requests)
|
||||
|
||||
Usage:
|
||||
python github-check-new-issue-for-duplicates.py <issue_number>
|
||||
|
||||
Environment variables:
|
||||
GITHUB_TOKEN - GitHub token (org members: read, issues: read & write)
|
||||
ANTHROPIC_API_KEY - Anthropic API key for Claude
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import requests
|
||||
|
||||
GITHUB_API = "https://api.github.com"
|
||||
REPO_OWNER = "zed-industries"
|
||||
REPO_NAME = "zed"
|
||||
TRACKING_ISSUE_NUMBER = 46355
|
||||
STAFF_TEAM_SLUG = "staff"
|
||||
|
||||
# area prefixes to collapse in taxonomy (show summary instead of all sub-labels)
|
||||
PREFIXES_TO_COLLAPSE = ["languages", "parity", "tooling"]
|
||||
|
||||
# stopwords to filter from title keyword searches (short words handled by len > 2 filter)
|
||||
STOPWORDS = {
|
||||
"after", "all", "also", "and", "any", "but", "can't", "does", "doesn't",
|
||||
"don't", "for", "from", "have", "just", "not", "only", "some", "that",
|
||||
"the", "this", "when", "while", "with", "won't", "work", "working", "zed",
|
||||
}
|
||||
|
||||
|
||||
def log(message):
|
||||
"""Print to stderr so it doesn't interfere with JSON output on stdout."""
|
||||
print(message, file=sys.stderr)
|
||||
|
||||
|
||||
def github_api_get(path, params=None):
|
||||
"""Fetch JSON from the GitHub API. Raises on non-2xx status."""
|
||||
url = f"{GITHUB_API}/{path.lstrip('/')}"
|
||||
response = requests.get(url, headers=GITHUB_HEADERS, params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def github_search_issues(query, per_page=15):
|
||||
"""Search issues, returning most recently created first."""
|
||||
params = {"q": query, "sort": "created", "order": "desc", "per_page": per_page}
|
||||
return github_api_get("/search/issues", params).get("items", [])
|
||||
|
||||
|
||||
def check_team_membership(org, team_slug, username):
|
||||
"""Check if user is an active member of a team."""
|
||||
try:
|
||||
data = github_api_get(f"/orgs/{org}/teams/{team_slug}/memberships/{username}")
|
||||
return data.get("state") == "active"
|
||||
except requests.HTTPError as e:
|
||||
if e.response.status_code == 404:
|
||||
return False
|
||||
raise
|
||||
|
||||
|
||||
def post_comment(issue_number: int, body):
|
||||
url = f"{GITHUB_API.rstrip('/')}/repos/{REPO_OWNER}/{REPO_NAME}/issues/{issue_number}/comments"
|
||||
response = requests.post(url, headers=GITHUB_HEADERS, json={"body": body})
|
||||
response.raise_for_status()
|
||||
log(f" Posted comment on #{issue_number}")
|
||||
|
||||
|
||||
def build_duplicate_comment(matches):
|
||||
"""Build the comment body for potential duplicates."""
|
||||
match_list = "\n".join(f"- #{m['number']}" for m in matches)
|
||||
explanations = "\n\n".join(f"**#{m['number']}:** {m['explanation']}" for m in matches)
|
||||
|
||||
return f"""This issue appears to be a duplicate of:
|
||||
|
||||
{match_list}
|
||||
|
||||
**If this is indeed a duplicate:**
|
||||
Please close this issue and subscribe to the linked issue for updates (select "Close as not planned" → "Duplicate")
|
||||
|
||||
**If this is a different issue:**
|
||||
No action needed. A maintainer will review this shortly.
|
||||
|
||||
<details>
|
||||
<summary>Why were these issues selected?</summary>
|
||||
|
||||
{explanations}
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
<sub>This is an automated analysis and might be incorrect.</sub>"""
|
||||
|
||||
|
||||
def call_claude(api_key, system, user_content, max_tokens=1024):
|
||||
"""Send a message to Claude and return the text response. Raises on non-2xx status."""
|
||||
response = requests.post(
|
||||
"https://api.anthropic.com/v1/messages",
|
||||
headers={
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.0,
|
||||
"system": system,
|
||||
"messages": [{"role": "user", "content": user_content}],
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
usage = data.get("usage", {})
|
||||
log(f" Token usage - Input: {usage.get('input_tokens', 'N/A')}, Output: {usage.get('output_tokens', 'N/A')}")
|
||||
|
||||
content = data.get("content", [])
|
||||
if content and content[0].get("type") == "text":
|
||||
return content[0].get("text") or ""
|
||||
return ""
|
||||
|
||||
|
||||
def fetch_issue(issue_number: int):
|
||||
"""Fetch issue from GitHub and return as a dict."""
|
||||
log(f"Fetching issue #{issue_number}")
|
||||
|
||||
issue_data = github_api_get(f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{issue_number}")
|
||||
issue = {
|
||||
"number": issue_number,
|
||||
"title": issue_data["title"],
|
||||
"body": issue_data.get("body") or "",
|
||||
"author": (issue_data.get("user") or {}).get("login") or "",
|
||||
"type": (issue_data.get("type") or {}).get("name"),
|
||||
}
|
||||
|
||||
log(f" Title: {issue['title']}\n Type: {issue['type']}\n Author: {issue['author']}")
|
||||
return issue
|
||||
|
||||
|
||||
def should_skip(issue):
|
||||
"""Check if issue should be skipped in duplicate detection process."""
|
||||
if issue["type"] not in ["Bug", "Crash"]:
|
||||
log(f" Skipping: issue type '{issue['type']}' is not a bug/crash report")
|
||||
return True
|
||||
|
||||
if issue["author"] and check_team_membership(REPO_OWNER, STAFF_TEAM_SLUG, issue["author"]):
|
||||
log(f" Skipping: author '{issue['author']}' is a {STAFF_TEAM_SLUG} member")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def fetch_area_labels():
|
||||
"""Fetch area:* labels from the repository. Returns list of {name, description} dicts."""
|
||||
log("Fetching area labels")
|
||||
|
||||
labels = []
|
||||
page = 1
|
||||
while page_labels := github_api_get(
|
||||
f"/repos/{REPO_OWNER}/{REPO_NAME}/labels",
|
||||
params={"per_page": 100, "page": page},
|
||||
):
|
||||
labels.extend(page_labels)
|
||||
page += 1
|
||||
|
||||
# label["name"][5:] removes the "area:" prefix
|
||||
area_labels = [
|
||||
{"name": label["name"][5:], "description": label.get("description") or ""}
|
||||
for label in labels
|
||||
if label["name"].startswith("area:")
|
||||
]
|
||||
|
||||
log(f" Found {len(area_labels)} area labels")
|
||||
return area_labels
|
||||
|
||||
|
||||
def format_taxonomy_for_claude(area_labels):
|
||||
"""Format area labels into a string for Claude, collapsing certain prefixes."""
|
||||
lines = set()
|
||||
|
||||
for area in area_labels:
|
||||
name = area["name"]
|
||||
collapsible_prefix = next(
|
||||
(p for p in PREFIXES_TO_COLLAPSE if name.startswith(f"{p}/")), None)
|
||||
|
||||
if collapsible_prefix:
|
||||
lines.add(f"- {collapsible_prefix}/* (multiple specific sub-labels exist)")
|
||||
else:
|
||||
desc = area["description"]
|
||||
lines.add(f"- {name}: {desc}" if desc else f"- {name}")
|
||||
|
||||
return "\n".join(sorted(lines))
|
||||
|
||||
|
||||
def detect_areas(anthropic_key, issue, taxonomy):
|
||||
"""Use Claude to detect relevant areas for the issue."""
|
||||
log("Detecting areas with Claude")
|
||||
|
||||
system_prompt = """You analyze GitHub issues to identify which area labels apply.
|
||||
|
||||
Given an issue and a taxonomy of areas, output ONLY a comma-separated list of matching area names.
|
||||
- Output at most 3 areas, ranked by relevance
|
||||
- Use exact area names from the taxonomy
|
||||
- If no areas clearly match, output: none
|
||||
- For languages/*, tooling/*, or parity/*, use the specific sub-label (e.g., "languages/rust",
|
||||
tooling/eslint, parity/vscode)
|
||||
|
||||
Example outputs:
|
||||
- "editor, parity/vim"
|
||||
- "ai, ai/agent panel"
|
||||
- "none"
|
||||
"""
|
||||
|
||||
user_content = f"""## Area Taxonomy
|
||||
{taxonomy}
|
||||
|
||||
# Issue Title
|
||||
{issue['title']}
|
||||
|
||||
# Issue Body
|
||||
{issue['body'][:4000]}"""
|
||||
|
||||
response = call_claude(anthropic_key, system_prompt, user_content, max_tokens=100).strip()
|
||||
log(f" Detected areas: {response}")
|
||||
|
||||
if response.lower() == "none":
|
||||
return []
|
||||
return [area.strip() for area in response.split(",")]
|
||||
|
||||
|
||||
def parse_duplicate_magnets():
|
||||
"""Parse known duplicate magnets from tracking issue #46355.
|
||||
|
||||
Returns a list of magnets sorted by duplicate count (most duplicated first).
|
||||
Magnets only have number, areas, and dupe_count — use enrich_magnets() to fetch
|
||||
title and body_preview for the ones you need.
|
||||
"""
|
||||
log(f"Parsing duplicate magnets from #{TRACKING_ISSUE_NUMBER}")
|
||||
|
||||
issue_data = github_api_get(f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{TRACKING_ISSUE_NUMBER}")
|
||||
body = issue_data.get("body") or ""
|
||||
|
||||
# parse the issue body
|
||||
# format: ## area_name
|
||||
# - [N dupes] https://github.com/zed-industries/zed/issues/NUMBER
|
||||
magnets = {} # number -> {number, areas, dupe_count}
|
||||
current_area = None
|
||||
|
||||
for line in body.split("\n"):
|
||||
# check for area header
|
||||
if line.startswith("## "):
|
||||
current_area = line[3:].strip()
|
||||
continue
|
||||
|
||||
if not current_area or not line.startswith("-") or "/issues/" not in line:
|
||||
continue
|
||||
|
||||
# parse: - [N dupes] https://github.com/.../issues/NUMBER
|
||||
try:
|
||||
dupe_count = int(line.split("[")[1].split()[0])
|
||||
number = int(line.split("/issues/")[1].split()[0].rstrip(")"))
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
# skip "(unlabeled)": these magnets should match everything
|
||||
is_unlabeled = current_area == "(unlabeled)"
|
||||
|
||||
if number in magnets:
|
||||
if not is_unlabeled:
|
||||
magnets[number]["areas"].append(current_area)
|
||||
else:
|
||||
magnets[number] = {
|
||||
"number": number,
|
||||
"areas": [] if is_unlabeled else [current_area],
|
||||
"dupe_count": dupe_count,
|
||||
}
|
||||
|
||||
magnet_list = sorted(magnets.values(), key=lambda m: m["dupe_count"], reverse=True)
|
||||
log(f" Parsed {len(magnet_list)} duplicate magnets")
|
||||
return magnet_list
|
||||
|
||||
|
||||
def enrich_magnets(magnets):
|
||||
"""Fetch title and body_preview for magnets from the API."""
|
||||
log(f" Fetching details for {len(magnets)} magnets")
|
||||
for magnet in magnets:
|
||||
data = github_api_get(f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{magnet['number']}")
|
||||
magnet["title"] = data["title"]
|
||||
magnet["body_preview"] = (data.get("body") or "")[:500]
|
||||
|
||||
|
||||
def areas_match(detected, magnet_area):
|
||||
"""Check if detected area matches magnet area. Matches broadly across hierarchy levels."""
|
||||
return (
|
||||
detected == magnet_area
|
||||
or magnet_area.startswith(f"{detected}/")
|
||||
or detected.startswith(f"{magnet_area}/")
|
||||
)
|
||||
|
||||
|
||||
def filter_magnets_by_areas(magnets, detected_areas):
|
||||
"""Filter magnets based on detected areas."""
|
||||
if not detected_areas:
|
||||
return magnets
|
||||
|
||||
detected_set = set(detected_areas)
|
||||
|
||||
def matches(magnet):
|
||||
# unlabeled magnets (empty areas) match everything
|
||||
if not magnet["areas"]:
|
||||
return True
|
||||
return any(
|
||||
areas_match(detected, magnet_area)
|
||||
for detected in detected_set
|
||||
for magnet_area in magnet["areas"]
|
||||
)
|
||||
|
||||
return list(filter(matches, magnets))
|
||||
|
||||
|
||||
def search_for_similar_issues(issue, detected_areas, max_searches=6):
|
||||
"""Search for similar issues that might be duplicates.
|
||||
|
||||
Searches by title keywords, area labels (last 60 days), and error patterns.
|
||||
max_searches caps the total number of queries to keep token usage and context size under control.
|
||||
"""
|
||||
log("Searching for similar issues")
|
||||
|
||||
sixty_days_ago = (datetime.now() - timedelta(days=60)).strftime("%Y-%m-%d")
|
||||
base_query = f"repo:{REPO_OWNER}/{REPO_NAME} is:issue is:open"
|
||||
seen_issues = {}
|
||||
queries = []
|
||||
|
||||
title_keywords = [word for word in issue["title"].split() if word.lower() not in STOPWORDS and len(word) > 2]
|
||||
|
||||
if title_keywords:
|
||||
keywords_query = " ".join(title_keywords)
|
||||
queries.append(("title_keywords", f"{base_query} {keywords_query}"))
|
||||
|
||||
for area in detected_areas:
|
||||
queries.append(("area_label", f'{base_query} label:"area:{area}" created:>{sixty_days_ago}'))
|
||||
|
||||
# error pattern search: capture 5–90 chars after keyword, colon optional
|
||||
error_pattern = r"(?i:\b(?:error|panicked|panic|failed)\b)\s*([^\n]{5,90})"
|
||||
match = re.search(error_pattern, issue["body"])
|
||||
if match:
|
||||
error_snippet = match.group(1).strip()
|
||||
queries.append(("error_pattern", f'{base_query} in:body "{error_snippet}"'))
|
||||
|
||||
for search_type, query in queries[:max_searches]:
|
||||
log(f" Search ({search_type}): {query}")
|
||||
try:
|
||||
results = github_search_issues(query, per_page=15)
|
||||
for item in results:
|
||||
number = item["number"]
|
||||
if number != issue["number"] and number not in seen_issues:
|
||||
body = item.get("body") or ""
|
||||
seen_issues[number] = {
|
||||
"number": number,
|
||||
"title": item["title"],
|
||||
"state": item.get("state", ""),
|
||||
"created_at": item.get("created_at", ""),
|
||||
"body_preview": body[:500],
|
||||
"source": search_type,
|
||||
}
|
||||
except requests.RequestException as e:
|
||||
log(f" Search failed: {e}")
|
||||
|
||||
similar_issues = list(seen_issues.values())
|
||||
log(f" Found {len(similar_issues)} similar issues")
|
||||
return similar_issues
|
||||
|
||||
|
||||
def analyze_duplicates(anthropic_key, issue, magnets, search_results):
|
||||
"""Use Claude to analyze potential duplicates."""
|
||||
log("Analyzing duplicates with Claude")
|
||||
|
||||
top_magnets = magnets[:10]
|
||||
enrich_magnets(top_magnets)
|
||||
magnet_numbers = {m["number"] for m in top_magnets}
|
||||
|
||||
candidates = [
|
||||
{"number": m["number"], "title": m["title"], "body_preview": m["body_preview"], "source": "known_duplicate_magnet"}
|
||||
for m in top_magnets
|
||||
] + [
|
||||
{"number": r["number"], "title": r["title"], "body_preview": r["body_preview"], "source": "search_result"}
|
||||
for r in search_results[:10]
|
||||
if r["number"] not in magnet_numbers
|
||||
]
|
||||
|
||||
if not candidates:
|
||||
return [], "No candidates to analyze"
|
||||
|
||||
system_prompt = """You analyze GitHub issues to identify potential duplicates.
|
||||
|
||||
Given a new issue and a list of existing issues, identify which existing issues might be duplicates.
|
||||
|
||||
For each potential duplicate, assess confidence:
|
||||
- "high": Very likely the same issue (same root cause, same symptoms)
|
||||
- "medium": Possibly related (likely to be the same root cause)
|
||||
- Do NOT include tangentially related issues (same general area but probably different issues)
|
||||
|
||||
Output only valid JSON (no markdown code blocks) with this structure:
|
||||
{
|
||||
"matches": [
|
||||
{
|
||||
"number": 12345,
|
||||
"confidence": "high|medium",
|
||||
"explanation": "Brief explanation of why this might be a duplicate"
|
||||
}
|
||||
],
|
||||
"summary": "One sentence summary of findings"
|
||||
}
|
||||
|
||||
Only include matches with "high" or "medium" confidence. Return empty matches array if none found."""
|
||||
|
||||
user_content = f"""## New Issue #{issue['number']}
|
||||
**Title:** {issue['title']}
|
||||
|
||||
**Body:**
|
||||
{issue['body'][:3000]}
|
||||
|
||||
## Existing Issues to Compare
|
||||
{json.dumps(candidates, indent=2)}"""
|
||||
|
||||
response = call_claude(anthropic_key, system_prompt, user_content, max_tokens=2048)
|
||||
|
||||
try:
|
||||
data = json.loads(response)
|
||||
except json.JSONDecodeError as e:
|
||||
log(f" Failed to parse response: {e}")
|
||||
log(f" Raw response: {response}")
|
||||
return [], "Failed to parse analysis"
|
||||
|
||||
matches = data.get("matches", [])
|
||||
summary = data.get("summary", "Analysis complete")
|
||||
log(f" Found {len(matches)} potential matches")
|
||||
return matches, summary
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Identify potential duplicate issues")
|
||||
parser.add_argument("issue_number", type=int, help="Issue number to analyze")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Skip posting comment, just log what would be posted")
|
||||
args = parser.parse_args()
|
||||
|
||||
github_token = os.environ.get("GITHUB_TOKEN")
|
||||
anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
|
||||
if not github_token:
|
||||
log("Error: GITHUB_TOKEN not set")
|
||||
sys.exit(1)
|
||||
if not anthropic_key:
|
||||
log("Error: ANTHROPIC_API_KEY not set")
|
||||
sys.exit(1)
|
||||
|
||||
GITHUB_HEADERS = {
|
||||
"Authorization": f"Bearer {github_token}",
|
||||
"Accept": "application/vnd.github+json",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
|
||||
issue = fetch_issue(args.issue_number)
|
||||
if should_skip(issue):
|
||||
print(json.dumps({"skipped": True}))
|
||||
sys.exit(0)
|
||||
|
||||
# detect areas
|
||||
taxonomy = format_taxonomy_for_claude(fetch_area_labels())
|
||||
detected_areas = detect_areas(anthropic_key, issue, taxonomy)
|
||||
|
||||
# search for potential duplicates
|
||||
all_magnets = parse_duplicate_magnets()
|
||||
relevant_magnets = filter_magnets_by_areas(all_magnets, detected_areas)
|
||||
search_results = search_for_similar_issues(issue, detected_areas)
|
||||
|
||||
# analyze potential duplicates
|
||||
if relevant_magnets or search_results:
|
||||
matches, summary = analyze_duplicates(anthropic_key, issue, relevant_magnets, search_results)
|
||||
else:
|
||||
matches, summary = [], "No potential duplicates to analyze"
|
||||
|
||||
# post comment if high-confidence matches found
|
||||
high_confidence_matches = [m for m in matches if m["confidence"] == "high"]
|
||||
commented = False
|
||||
|
||||
if high_confidence_matches:
|
||||
comment_body = build_duplicate_comment(high_confidence_matches)
|
||||
if args.dry_run:
|
||||
log("Dry run - would post comment:\n" + "-" * 40 + "\n" + comment_body + "\n" + "-" * 40)
|
||||
else:
|
||||
log("Posting comment for high-confidence match(es)")
|
||||
try:
|
||||
post_comment(issue["number"], comment_body)
|
||||
commented = True
|
||||
except requests.RequestException as e:
|
||||
log(f" Failed to post comment: {e}")
|
||||
|
||||
print(json.dumps({
|
||||
"skipped": False,
|
||||
"issue": {
|
||||
"number": issue["number"],
|
||||
"title": issue["title"],
|
||||
"author": issue["author"],
|
||||
"type": issue["type"],
|
||||
},
|
||||
"detected_areas": detected_areas,
|
||||
"magnets_count": len(relevant_magnets),
|
||||
"search_results_count": len(search_results),
|
||||
"matches": matches,
|
||||
"summary": summary,
|
||||
"commented": commented,
|
||||
}))
|
||||
223
script/github-find-top-duplicated-bugs.py
Normal file
223
script/github-find-top-duplicated-bugs.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Find open issues that have the most duplicates filed against them and update
|
||||
a GitHub issue with the results.
|
||||
|
||||
Queries open issues and looks for MarkedAsDuplicateEvent in their timelines.
|
||||
Only includes issues that have been re-reported at least twice (2+ duplicates
|
||||
closed against them). Groups results by area: label. The output is formatted
|
||||
as markdown with issue URLs (GitHub renders the titles automatically).
|
||||
|
||||
This script is run regularly by the update_duplicate_magnets.yml workflow.
|
||||
|
||||
Requires: requests (pip install requests)
|
||||
GitHub token permissions: issues:write
|
||||
|
||||
Usage:
|
||||
# Print to stdout only for testing:
|
||||
python github-find-top-duplicated-bugs.py --github-token ghp_xxx
|
||||
|
||||
# Update a GitHub issue:
|
||||
python github-find-top-duplicated-bugs.py --github-token ghp_xxx --issue-number 46355
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
import requests
|
||||
|
||||
OWNER = "zed-industries"
|
||||
REPO = "zed"
|
||||
|
||||
GRAPHQL_URL = "https://api.github.com/graphql"
|
||||
REST_API_URL = "https://api.github.com"
|
||||
|
||||
headers = None
|
||||
|
||||
ISSUES_WITH_DUPLICATES_QUERY = """
|
||||
query($owner: String!, $repo: String!, $cursor: String) {
|
||||
repository(owner: $owner, name: $repo) {
|
||||
issues(
|
||||
first: 100
|
||||
after: $cursor
|
||||
states: [OPEN]
|
||||
orderBy: {field: UPDATED_AT, direction: DESC}
|
||||
) {
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
endCursor
|
||||
}
|
||||
nodes {
|
||||
number
|
||||
url
|
||||
labels(first: 20) {
|
||||
nodes {
|
||||
name
|
||||
}
|
||||
}
|
||||
timelineItems(first: 100, itemTypes: [MARKED_AS_DUPLICATE_EVENT]) {
|
||||
nodes {
|
||||
... on MarkedAsDuplicateEvent {
|
||||
duplicate {
|
||||
... on Issue {
|
||||
number
|
||||
state
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
def extract_duplicate_info(issue):
|
||||
"""Extract duplicate count and info from an issue. Returns None if < 2 duplicates."""
|
||||
seen_duplicates = set()
|
||||
for event in issue["timelineItems"]["nodes"]:
|
||||
try:
|
||||
if event["duplicate"]["state"] == "CLOSED":
|
||||
seen_duplicates.add(event["duplicate"]["number"])
|
||||
except (KeyError, TypeError):
|
||||
continue
|
||||
|
||||
if len(seen_duplicates) < 2:
|
||||
return None
|
||||
|
||||
labels = [l["name"] for l in issue["labels"]["nodes"]]
|
||||
areas = [l.replace("area:", "") for l in labels if l.startswith("area:")]
|
||||
|
||||
return {
|
||||
"number": issue["number"],
|
||||
"url": issue["url"],
|
||||
"areas": areas if areas else ["(unlabeled)"],
|
||||
"duplicate_count": len(seen_duplicates),
|
||||
}
|
||||
|
||||
|
||||
def fetch_canonical_issues_with_duplicates(max_pages=100):
|
||||
"""Fetch open issues and count how many duplicates point to each."""
|
||||
print(f"Finding open issues with the most duplicates in {OWNER}/{REPO}")
|
||||
|
||||
cursor = None
|
||||
duplicate_magnets = []
|
||||
total_issues_scanned = 0
|
||||
|
||||
for page in range(max_pages):
|
||||
response = requests.post(
|
||||
GRAPHQL_URL,
|
||||
headers=headers,
|
||||
json={
|
||||
"query": ISSUES_WITH_DUPLICATES_QUERY,
|
||||
"variables": {"owner": OWNER, "repo": REPO, "cursor": cursor},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if "errors" in data:
|
||||
print(f"GraphQL errors: {data['errors']}")
|
||||
break
|
||||
|
||||
issues = data["data"]["repository"]["issues"]
|
||||
total_issues_scanned += len(issues["nodes"])
|
||||
|
||||
for issue in issues["nodes"]:
|
||||
if info := extract_duplicate_info(issue):
|
||||
duplicate_magnets.append(info)
|
||||
|
||||
page_info = issues["pageInfo"]
|
||||
if not page_info["hasNextPage"]:
|
||||
print(f"Done: scanned {total_issues_scanned} open issues")
|
||||
break
|
||||
cursor = page_info["endCursor"]
|
||||
|
||||
print(
|
||||
f"Page {page + 1}: scanned {total_issues_scanned} open issues, "
|
||||
f"{len(duplicate_magnets)} have duplicates"
|
||||
)
|
||||
|
||||
return duplicate_magnets
|
||||
|
||||
|
||||
def build_markdown_body(duplicate_magnets):
|
||||
"""Group results by area and build markdown body for the GitHub issue.
|
||||
|
||||
NOTE: the output format is parsed by fetch_duplicate_magnets() in
|
||||
github-check-new-issue-for-duplicates.py — update that if you change this.
|
||||
"""
|
||||
by_area = defaultdict(list)
|
||||
area_totals = Counter()
|
||||
for info in duplicate_magnets:
|
||||
for area in info["areas"]:
|
||||
by_area[area].append(info)
|
||||
area_totals[area] += info["duplicate_count"]
|
||||
|
||||
lines = [
|
||||
"These are the issues that are frequently re-reported. "
|
||||
"The list is generated regularly by running a script."
|
||||
]
|
||||
|
||||
for area, _ in area_totals.most_common():
|
||||
issues = sorted(by_area[area], key=lambda x: x["duplicate_count"], reverse=True)
|
||||
|
||||
lines.append("")
|
||||
lines.append(f"## {area}")
|
||||
lines.append("")
|
||||
|
||||
for info in issues:
|
||||
lines.append(
|
||||
f"- [{info['duplicate_count']:2d} dupes] {info['url']}"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def update_github_issue(issue_number, body):
|
||||
"""Update the body of a GitHub issue."""
|
||||
url = f"{REST_API_URL}/repos/{OWNER}/{REPO}/issues/{issue_number}"
|
||||
response = requests.patch(url, headers=headers, json={"body": body})
|
||||
response.raise_for_status()
|
||||
print(f"Updated issue #{issue_number}")
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Find open issues with the most duplicates filed against them."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--github-token",
|
||||
default=os.environ.get("GITHUB_TOKEN"),
|
||||
help="GitHub token (or set GITHUB_TOKEN env var)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--issue-number",
|
||||
type=int,
|
||||
help="GitHub issue number to update (if not provided, prints to stdout)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
if not args.github_token:
|
||||
print("Error: --github-token is required (or set GITHUB_TOKEN env var)")
|
||||
sys.exit(1)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {args.github_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
if duplicate_magnets := fetch_canonical_issues_with_duplicates():
|
||||
body = build_markdown_body(duplicate_magnets)
|
||||
if args.issue_number:
|
||||
update_github_issue(args.issue_number, body)
|
||||
else:
|
||||
print(body)
|
||||
Loading…
Reference in a new issue