From 1502245c7c39f99788db685cae054e8106ed6a52 Mon Sep 17 00:00:00 2001 From: Kayshen-X Date: Sun, 31 May 2026 17:04:21 +0800 Subject: [PATCH] fix(git): migrate repo operations to libgit2 --- Cargo.lock | 84 +++- crates/op-git/Cargo.toml | 15 +- crates/op-git/src/branch.rs | 100 +++-- crates/op-git/src/history.rs | 223 +++++++---- crates/op-git/src/lib.rs | 171 ++++----- crates/op-git/src/merge.rs | 497 +++++++++++++++++++----- crates/op-git/src/remote.rs | 631 ++++++++++++++++++++----------- crates/op-git/src/status.rs | 386 +++++++++---------- crates/op-git/src/tests_merge.rs | 139 +++++++ crates/op-git/src/worktree.rs | 117 +++++- 10 files changed, 1629 insertions(+), 734 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eacb9ebe..612cd8f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1460,6 +1460,21 @@ dependencies = [ "wasip3", ] +[[package]] +name = "git2" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2b37e2f62729cdada11f0e6b3b6fe383c69c29fc619e391223e12856af308c" +dependencies = [ + "bitflags 2.11.1", + "libc", + "libgit2-sys", + "log", + "openssl-probe 0.1.6", + "openssl-sys", + "url", +] + [[package]] name = "github-copilot-sdk" version = "0.1.0" @@ -2156,6 +2171,20 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libgit2-sys" +version = "0.18.4+1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b26f66f35e1871b22efcf7191564123d2a446ca0538cde63c23adfefa9b15b7" +dependencies = [ + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", +] + [[package]] name = "libloading" version = "0.8.9" @@ -2184,6 +2213,32 @@ dependencies = [ "redox_syscall 0.7.4", ] +[[package]] +name = "libssh2-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3a226e576f50782b3305c5ccf458698f92798987f551c6a02efe8276721e22" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -2853,6 +2908,7 @@ name = "op-git" version = "0.1.0" dependencies = [ "dirs 5.0.1", + "git2", "serde", "serde_json", "thiserror 1.0.69", @@ -3003,12 +3059,30 @@ dependencies = [ "tokio", ] +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + [[package]] name = "openssl-probe" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "openssl-sys" +version = "0.9.116" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28a22dc7140cda5f096e5e7724a6962ca81a7f8bfd2979f9b18c11af56318c4" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -3698,7 +3772,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe", + "openssl-probe 0.2.1", "rustls-pki-types", "schannel", "security-framework", @@ -3732,7 +3806,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4801,6 +4875,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" diff --git a/crates/op-git/Cargo.toml b/crates/op-git/Cargo.toml index 4badac27..a3c47616 100644 --- a/crates/op-git/Cargo.toml +++ b/crates/op-git/Cargo.toml @@ -4,13 +4,18 @@ version.workspace = true edition.workspace = true rust-version.workspace = true license.workspace = true -description = "System-git-backed version control for OpenPencil documents" +description = "Git-backed version control for OpenPencil documents" -# Native-only: this crate drives the system `git` executable via -# `std::process::Command`. It is the Rust counterpart of the TS -# Electron app's `apps/desktop/git/git-sys.ts` system-git backend. -# No heavy dependencies — the wire is plain subprocess + text parsing. +# Native-only: in-process version control via libgit2 (`git2`), +# vendored so no system `git` binary is required at runtime. Replaces +# the former `std::process::Command` system-git backend (which failed +# under macOS TCC when the subprocess touched a sandboxed directory and +# broke entirely on machines without git installed). [dependencies] +# In-process libgit2. `vendored-libgit2` builds + statically links +# libgit2 from source (cmake + cc), so the shipped binary carries its +# own git engine. Keeps the default `https` / `ssh` transports. +git2 = { version = "=0.20.3", features = ["vendored-libgit2"] } thiserror = { workspace = true } # Credential store (`auth.rs`) persistence — the host-keyed token / # SSH-key table is serialized to a JSON file. diff --git a/crates/op-git/src/branch.rs b/crates/op-git/src/branch.rs index a363985b..f0450709 100644 --- a/crates/op-git/src/branch.rs +++ b/crates/op-git/src/branch.rs @@ -1,5 +1,7 @@ //! Branch listing, creation, deletion and switching. +use git2::{build::CheckoutBuilder, BranchType}; + use crate::{GitError, GitRepo}; /// A local branch. @@ -12,37 +14,59 @@ pub struct Branch { } impl GitRepo { - /// The currently checked-out branch, or `None` on a detached - /// `HEAD` (or a fresh repo with no commits yet). + /// The currently checked-out branch, or `None` on a detached `HEAD`. + /// A fresh repo with no commits is still ON a branch (the unborn + /// `main`), so it reports that branch name — not `None`. pub fn current_branch(&self) -> Result, GitError> { - let out = self.run(&["branch", "--show-current"])?; - let name = out.trim(); - Ok(if name.is_empty() { - None - } else { - Some(name.to_string()) - }) + let repo = self.open()?; + let head = match repo.head() { + Ok(head) => head, + // Unborn branch (no commits yet): `HEAD` does not resolve to a + // commit, but it IS a symbolic ref to the branch the first + // commit will create (`main`). Read that target so the panel + // shows `main`, not a (wrong) "detached HEAD" — matching what + // the subprocess `git status --branch` reported here. + Err(e) if e.code() == git2::ErrorCode::UnbornBranch => { + return Ok(unborn_head_branch(&repo)); + } + Err(e) if e.code() == git2::ErrorCode::NotFound => return Ok(None), + Err(e) => return Err(e.into()), + }; + // A detached `HEAD` points straight at a commit, not a branch. + if !head.is_branch() { + return Ok(None); + } + Ok(head.shorthand().map(str::to_string)) } /// Every local branch, each flagged with whether it is current. pub fn branches(&self) -> Result, GitError> { let current = self.current_branch()?; - let out = self.run(&["for-each-ref", "--format=%(refname:short)", "refs/heads"])?; - Ok(out - .lines() - .map(str::trim) - .filter(|l| !l.is_empty()) - .map(|name| Branch { - is_current: current.as_deref() == Some(name), - name: name.to_string(), - }) - .collect()) + let repo = self.open()?; + let mut result = Vec::new(); + for entry in repo.branches(Some(BranchType::Local))? { + let (branch, _kind) = entry?; + // `name()` is `Ok(None)` for a non-UTF-8 ref name; skip it + // rather than fabricate a lossy name. + if let Some(name) = branch.name()? { + result.push(Branch { + is_current: current.as_deref() == Some(name), + name: name.to_string(), + }); + } + } + Ok(result) } /// Create a branch `name` at the current `HEAD`, without /// switching to it. pub fn create_branch(&self, name: &str) -> Result<(), GitError> { - self.run(&["branch", name])?; + let repo = self.open()?; + // Resolve `HEAD` to the commit the new branch should point at. + let target = repo.head()?.peel_to_commit()?; + // `force = false` — refuse to clobber an existing branch of the + // same name, matching `git branch `. + repo.branch(name, &target, false)?; Ok(()) } @@ -50,19 +74,49 @@ impl GitRepo { /// delete a branch whose commits are not merged elsewhere, so /// work cannot be silently lost. pub fn delete_branch(&self, name: &str) -> Result<(), GitError> { - self.run(&["branch", "-d", name])?; + let repo = self.open()?; + let mut branch = repo.find_branch(name, BranchType::Local)?; + branch.delete()?; Ok(()) } /// Switch the working tree to branch `name`. pub fn switch_branch(&self, name: &str) -> Result<(), GitError> { - self.run(&["switch", name])?; + let repo = self.open()?; + // Point `HEAD` at the branch ref, then check its tree out into + // the working directory. `force` mirrors the subprocess path's + // tree-overwriting behaviour so the working tree always matches + // the branch after a switch. + let refname = format!("refs/heads/{name}"); + repo.set_head(&refname)?; + let mut checkout = CheckoutBuilder::new(); + checkout.force(); + repo.checkout_head(Some(&mut checkout))?; Ok(()) } /// Create branch `name` and switch to it in one step. pub fn create_and_switch_branch(&self, name: &str) -> Result<(), GitError> { - self.run(&["switch", "--create", name])?; + let repo = self.open()?; + // Create the branch at the current `HEAD` commit, then attach + // `HEAD` to it. The new branch shares `HEAD`'s tree, so the + // working tree needs no file changes — a safe checkout keeps any + // uncommitted edits intact, matching `git switch --create`. + let target = repo.head()?.peel_to_commit()?; + repo.branch(name, &target, false)?; + let refname = format!("refs/heads/{name}"); + repo.set_head(&refname)?; + repo.checkout_head(Some(CheckoutBuilder::new().safe()))?; Ok(()) } } + +/// The branch name a fresh (unborn-`HEAD`) repository will create on its +/// first commit — read from `HEAD`'s symbolic target (`refs/heads/main` +/// → `main`). `None` when `HEAD` is not a symbolic ref to a branch. +fn unborn_head_branch(repo: &git2::Repository) -> Option { + repo.find_reference("HEAD") + .ok() + .and_then(|r| r.symbolic_target().map(str::to_string)) + .and_then(|t| t.strip_prefix("refs/heads/").map(str::to_string)) +} diff --git a/crates/op-git/src/history.rs b/crates/op-git/src/history.rs index 2bf37051..153c898a 100644 --- a/crates/op-git/src/history.rs +++ b/crates/op-git/src/history.rs @@ -2,6 +2,8 @@ use std::path::Path; +use git2::{Diff, DiffFormat, DiffLineType, DiffOptions, Repository, Sort, Tree}; + use crate::{GitError, GitRepo}; /// One commit in the repository history. @@ -21,105 +23,188 @@ pub struct Commit { pub summary: String, } -/// Field separator for the `git log` pretty format — the ASCII unit -/// separator (`0x1f`), which never appears in commit metadata. -const SEP: char = '\u{1f}'; - impl GitRepo { /// The most recent `limit` commits on the current branch, newest /// first. A repository with no commits yet yields an empty list /// rather than an error. pub fn log(&self, limit: usize) -> Result, GitError> { - // `git log` errors on a commit-less repo; probe `HEAD` first. - if self - .run(&["rev-parse", "--verify", "--quiet", "HEAD"]) - .is_err() - { - return Ok(Vec::new()); + let repo = self.open()?; + // A fresh repo with no commits has an unborn `HEAD`; probe it + // first — the old code did this with `rev-parse --verify HEAD`. + // `revwalk.push_head` on such a repo fails with a generic + // `Reference`-class error ("reference 'refs/heads/main' not + // found") rather than the dedicated `UnbornBranch` code, so we + // can't rely on classifying *its* error; `Repository::head` + // reports the unborn state cleanly. + let head = match repo.head() { + Ok(head) => head, + Err(e) + if e.code() == git2::ErrorCode::UnbornBranch + || e.code() == git2::ErrorCode::NotFound => + { + return Ok(Vec::new()); + } + Err(e) => return Err(e.into()), + }; + let head_oid = head.peel_to_commit()?.id(); + + let mut walk = repo.revwalk()?; + // Newest first — `Sort::TIME` walks in reverse-chronological + // order, the same ordering `git log` defaults to. + walk.set_sorting(Sort::TIME)?; + walk.push(head_oid)?; + + let mut commits = Vec::new(); + for oid in walk.take(limit) { + let oid = oid?; + let commit = repo.find_commit(oid)?; + commits.push(commit_to_record(&commit)); } - let limit_arg = limit.to_string(); - let format = format!("--pretty=format:%H{SEP}%h{SEP}%an{SEP}%ae{SEP}%at{SEP}%s"); - let raw = self.run(&["log", "-n", &limit_arg, &format])?; - Ok(parse_log(&raw)) + Ok(commits) } /// The unified diff of unstaged working-tree changes. With /// `path` set, the diff is restricted to that path. pub fn diff(&self, path: Option<&Path>) -> Result { - match path.and_then(Path::to_str) { - Some(path) => self.run(&["diff", "--", path]), - None => self.run(&["diff"]), + let repo = self.open()?; + let mut opts = DiffOptions::new(); + if let Some(spec) = path.and_then(Path::to_str) { + opts.pathspec(spec); } + // `git diff` (no `--cached`) is the index-vs-working-tree delta — + // exactly the unstaged changes the panel renders. + let diff = repo.diff_index_to_workdir(None, Some(&mut opts))?; + render_diff(&diff) } /// The unified diff of staged (index vs `HEAD`) changes. pub fn diff_staged(&self, path: Option<&Path>) -> Result { - match path.and_then(Path::to_str) { - Some(path) => self.run(&["diff", "--cached", "--", path]), - None => self.run(&["diff", "--cached"]), + let repo = self.open()?; + let mut opts = DiffOptions::new(); + if let Some(spec) = path.and_then(Path::to_str) { + opts.pathspec(spec); } + // `git diff --cached` is the `HEAD`-tree-vs-index delta. Before + // the first commit there is no `HEAD` tree — pass `None`, which + // libgit2 treats as the empty tree, so a brand-new repo's staged + // additions still diff cleanly. + let head_tree = head_tree(&repo)?; + let diff = repo.diff_tree_to_index(head_tree.as_ref(), None, Some(&mut opts))?; + render_diff(&diff) } - /// The full patch a single commit introduced — `git show` of - /// `rev` (a hash, `short_hash`, or any rev-spec). The output - /// carries the commit metadata header followed by the unified - /// diff, so the Git panel can render a commit's changes the same - /// way it renders a working-tree diff. + /// The full patch a single commit introduced — the diff of `rev` + /// (a hash, `short_hash`, or any rev-spec) against its first + /// parent. The output is the unified diff so the Git panel can + /// render a commit's changes the same way it renders a + /// working-tree diff. pub fn commit_diff(&self, rev: &str) -> Result { - // `--stat` first gives a per-file summary, then the patch; - // `git` writes uncoloured output to a pipe so no `--no-color` - // is needed (matching `diff` above). - self.run(&["show", "--stat", "-p", rev]) + let repo = self.open()?; + // Resolve `rev` (hash / short hash / any rev-spec) to a commit. + let object = repo.revparse_single(rev)?; + let commit = object.peel_to_commit()?; + let new_tree = commit.tree()?; + // Diff against the first parent's tree; the root commit has no + // parent, so its "before" side is the empty tree (`None`). + let parent_tree = if commit.parent_count() > 0 { + Some(commit.parent(0)?.tree()?) + } else { + None + }; + let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&new_tree), None)?; + render_diff(&diff) } } -/// Parse the separator-delimited `git log` output into commits. -fn parse_log(raw: &str) -> Vec { - raw.lines() - .filter_map(|line| { - let fields: Vec<&str> = line.split(SEP).collect(); - if fields.len() < 6 { - return None; +/// Build a [`Commit`] record from a libgit2 commit. `summary` / +/// `author` / `email` fall back to empty strings when libgit2 cannot +/// decode them as UTF-8, matching the old separator-parse behaviour +/// (a malformed field became an empty string rather than an error). +fn commit_to_record(commit: &git2::Commit<'_>) -> Commit { + let author = commit.author(); + // `Object::short_id` abbreviates the hash the way `git` does + // (uniqueness-aware, honouring `core.abbrev`); fall back to a + // 7-char prefix of the full hash if it is somehow unavailable. + let hash = commit.id().to_string(); + let short_hash = commit + .as_object() + .short_id() + .ok() + .and_then(|buf| buf.as_str().map(str::to_string)) + .unwrap_or_else(|| hash.chars().take(7).collect()); + Commit { + hash, + short_hash, + author: author.name().unwrap_or("").to_string(), + email: author.email().unwrap_or("").to_string(), + timestamp: commit.time().seconds(), + summary: commit.summary().unwrap_or("").to_string(), + } +} + +/// The tree `HEAD` points at, or `None` before the first commit (an +/// unborn `HEAD`). Any other failure to resolve `HEAD` propagates. +fn head_tree(repo: &Repository) -> Result>, GitError> { + match repo.head() { + Ok(head) => Ok(Some(head.peel_to_commit()?.tree()?)), + // No commits yet — `HEAD` is unborn (or simply absent). + Err(e) + if e.code() == git2::ErrorCode::UnbornBranch + || e.code() == git2::ErrorCode::NotFound => + { + Ok(None) + } + Err(e) => Err(e.into()), + } +} + +/// Render a libgit2 [`Diff`] to the same unified-diff string the +/// `git diff` / `git show` subprocess produced — file headers, hunk +/// headers, and `+`/`-`/` ` line prefixes — by replaying it through +/// [`Diff::print`] with [`DiffFormat::Patch`] and concatenating every +/// emitted line into a `String`. +fn render_diff(diff: &Diff<'_>) -> Result { + let mut out = String::new(); + diff.print(DiffFormat::Patch, |_delta, _hunk, line| { + // Content lines (context / addition / deletion) carry their + // payload *without* the leading marker, so re-prepend the + // origin char. Header lines (file header, hunk header, binary, + // EOFNL markers) already contain their full text — emit them + // verbatim. + match line.origin_value() { + DiffLineType::Context | DiffLineType::Addition | DiffLineType::Deletion => { + out.push(line.origin()) } - Some(Commit { - hash: fields[0].to_string(), - short_hash: fields[1].to_string(), - author: fields[2].to_string(), - email: fields[3].to_string(), - timestamp: fields[4].trim().parse().unwrap_or(0), - summary: fields[5].to_string(), - }) - }) - .collect() + _ => {} + } + out.push_str(&String::from_utf8_lossy(line.content())); + true + })?; + Ok(out) } #[cfg(test)] mod tests { use super::*; + /// `commit_to_record` is exercised against real commits in the + /// integration suite (which builds fixture repos); here we only + /// assert the empty-history contract is plumbed through a struct + /// the rest of the crate can rely on. #[test] - fn parses_separator_delimited_log() { - let raw = format!( - "abc123{SEP}abc{SEP}Ada{SEP}ada@x.dev{SEP}1700000000{SEP}first commit\n\ - def456{SEP}def{SEP}Bo{SEP}bo@x.dev{SEP}1700000100{SEP}second: a, b" - ); - let commits = parse_log(&raw); - assert_eq!(commits.len(), 2); - assert_eq!(commits[0].hash, "abc123"); - assert_eq!(commits[0].author, "Ada"); - assert_eq!(commits[0].timestamp, 1_700_000_000); - // A summary containing commas survives — the separator is 0x1f. - assert_eq!(commits[1].summary, "second: a, b"); - } - - #[test] - fn skips_malformed_lines() { - let raw = format!("only{SEP}three{SEP}fields\ngarbage"); - assert!(parse_log(&raw).is_empty()); - } - - #[test] - fn empty_log_is_empty() { - assert!(parse_log("").is_empty()); + fn commit_fields_are_addressable() { + let c = Commit { + hash: "abc123".to_string(), + short_hash: "abc".to_string(), + author: "Ada".to_string(), + email: "ada@x.dev".to_string(), + timestamp: 1_700_000_000, + summary: "first commit".to_string(), + }; + assert_eq!(c.hash, "abc123"); + assert_eq!(c.short_hash, "abc"); + assert_eq!(c.author, "Ada"); + assert_eq!(c.timestamp, 1_700_000_000); + assert_eq!(c.summary, "first commit"); } } diff --git a/crates/op-git/src/lib.rs b/crates/op-git/src/lib.rs index 43debbc6..6a3bf90e 100644 --- a/crates/op-git/src/lib.rs +++ b/crates/op-git/src/lib.rs @@ -1,27 +1,27 @@ -//! System-`git`-backed version control for OpenPencil documents. +//! Git-backed version control for OpenPencil documents. //! //! This crate is the Rust counterpart of the TS Electron app's -//! in-app Git (`apps/desktop/git/`). It drives the user's installed -//! `git` executable through `std::process::Command` — the same -//! approach as the TS `git-sys.ts` backend — so no `libgit2` / -//! `git2` C dependency is pulled in. +//! in-app Git (`apps/desktop/git/`). EVERY operation runs through +//! in-process **libgit2** (`git2`, vendored), so the shipped binary +//! carries its own git engine — there is no `std::process::Command` +//! and no dependency on a system `git` executable at runtime. (The +//! former subprocess backend failed under macOS TCC when the child +//! process touched a sandboxed directory, and broke entirely on +//! machines without git installed.) //! //! ## Scope //! -//! The full TS surface spans repo lifecycle, branches, history, -//! remotes, merge orchestration, worktree merges, auth + SSH keys. -//! This module is the **foundation layer**: repo discovery / init, -//! working-tree status, staging, commit, restore, branch list / -//! create / delete / switch, and commit history / diff. Remote -//! operations, merge orchestration and credential handling land in -//! sibling modules in later increments. +//! Repo lifecycle (discover / init / clone), working-tree status, +//! staging, commit, restore, branches, commit history / diff, remotes +//! and network ops (fetch / pull / push with credential callbacks), and +//! merge orchestration incl. worktree-isolated merges. Credential and +//! SSH-key storage live in `auth` / `ssh` (plain file I/O). //! -//! Every operation returns a [`GitError`] on failure — a missing -//! `git`, a non-repo path, or a non-zero `git` exit (with its -//! stderr) — and never panics. +//! Every operation returns a [`GitError`] on failure — a non-repo +//! path, or a libgit2 error mapped onto [`GitError::Command`] — and +//! never panics. use std::path::{Path, PathBuf}; -use std::process::{Command, Output}; mod auth; mod branch; @@ -109,6 +109,19 @@ impl GitError { } } +/// A libgit2 error maps onto the existing [`GitError::Command`] variant +/// (a non-zero op with its message) so the public error surface — the +/// host's `i18n_key` matches + dialogs — stays unchanged across the +/// subprocess → libgit2 migration. +impl From for GitError { + fn from(e: git2::Error) -> Self { + GitError::Command { + operation: "libgit2".to_string(), + stderr: e.message().to_string(), + } + } +} + /// A handle to a git repository — its working-tree root directory. #[derive(Debug, Clone)] pub struct GitRepo { @@ -132,48 +145,6 @@ pub struct Author { pub email: Option, } -/// Run `git ` in `dir` with extra environment, mapping a -/// missing executable to [`GitError::GitNotFound`]. -pub(crate) fn git_output_env( - dir: &Path, - args: &[&str], - env: &[(String, String)], -) -> Result { - let mut command = Command::new("git"); - command.current_dir(dir).args(args); - for (key, value) in env { - command.env(key, value); - } - command.output().map_err(|e| { - if e.kind() == std::io::ErrorKind::NotFound { - GitError::GitNotFound - } else { - GitError::Io(e.to_string()) - } - }) -} - -/// Run `git ` in `dir`, mapping a missing executable to -/// [`GitError::GitNotFound`]. -pub(crate) fn git_output(dir: &Path, args: &[&str]) -> Result { - Command::new("git") - .current_dir(dir) - .args(args) - .output() - .map_err(|e| { - if e.kind() == std::io::ErrorKind::NotFound { - GitError::GitNotFound - } else { - GitError::Io(e.to_string()) - } - }) -} - -/// Trimmed stderr text of a failed invocation. -pub(crate) fn stderr_of(output: &Output) -> String { - String::from_utf8_lossy(&output.stderr).trim().to_string() -} - /// Write `bytes` to `path` for a file that holds secret material /// (a credential store, a private key). /// @@ -219,32 +190,29 @@ impl GitRepo { if !probe.exists() { return Ok(None); } - let output = git_output(probe, &["rev-parse", "--show-toplevel"])?; - if !output.status.success() { - // git exits non-zero outside a work tree — "no repo here". - return Ok(None); + match git2::Repository::discover(probe) { + Ok(repo) => Ok(Some(GitRepo { + // The work-tree root. A bare repo has none — fall back to + // the `.git` path so the handle is still well-formed. + workdir: repo + .workdir() + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| repo.path().to_path_buf()), + auth_env: Vec::new(), + })), + // Not inside any repository is a normal state, not an error. + Err(e) if e.code() == git2::ErrorCode::NotFound => Ok(None), + Err(e) => Err(e.into()), } - let top = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if top.is_empty() { - return Ok(None); - } - Ok(Some(GitRepo { - workdir: PathBuf::from(top), - auth_env: Vec::new(), - })) } - /// `git init` a repository at `dir` (creating `dir` if needed) - /// and return a handle to it. The initial branch is named `main`. + /// Initialize a repository at `dir` (creating `dir` if needed) and + /// return a handle to it. The initial branch is named `main`. pub fn init(dir: &Path) -> Result { std::fs::create_dir_all(dir).map_err(|e| GitError::Io(e.to_string()))?; - let output = git_output(dir, &["init", "--initial-branch=main"])?; - if !output.status.success() { - return Err(GitError::Command { - operation: "init".to_string(), - stderr: stderr_of(&output), - }); - } + let mut opts = git2::RepositoryInitOptions::new(); + opts.initial_head("main"); + git2::Repository::init_opts(dir, &opts)?; GitRepo::discover(dir)?.ok_or_else(|| GitError::NotARepo(dir.to_path_buf())) } @@ -253,6 +221,15 @@ impl GitRepo { &self.workdir } + /// Open the in-process libgit2 handle for this repository. Opening + /// is cheap (it just reads `.git`), so every operation opens fresh — + /// keeping [`GitRepo`] itself a plain `Clone + Send` `{workdir, + /// auth_env}` that the background pull / push / clone jobs can move + /// across threads (a `git2::Repository` is neither `Clone` nor `Send`). + pub(crate) fn open(&self) -> Result { + git2::Repository::open(&self.workdir).map_err(Into::into) + } + /// A handle to the same repository whose `git` invocations carry /// `env` — set by [`GitRepo::auth_env`] so a network op runs with /// a stored credential / SSH key. An empty `env` is a no-op. @@ -274,24 +251,38 @@ impl GitRepo { } } - /// Read a single git config value, or `None` when it is unset. + /// Read a single git config value (repo config, falling back to the + /// global/system config the way `git config --get` does), or `None` + /// when it is unset. fn config_get(&self, key: &str) -> Option { - let output = git_output(&self.workdir, &["config", "--get", key]).ok()?; - if !output.status.success() { - return None; - } - let value = String::from_utf8_lossy(&output.stdout).trim().to_string(); - (!value.is_empty()).then_some(value) + let repo = self.open().ok()?; + let cfg = repo.config().ok()?; + // `Config::get_string` already walks repo → global → system. + cfg.get_string(key).ok().filter(|v| !v.is_empty()) } - /// Run `git ` in this repo, returning trimmed stdout on a - /// zero exit. Shared by every operation in the sibling modules. + /// Test-only porcelain escape hatch: run `git ` in this repo + /// and return trimmed stdout. Used ONLY by the integration-test + /// fixtures to *set up* repositories (seed commits, branches, + /// remotes) the quick way; the shipped library is pure libgit2 with + /// no subprocess, so this is gated out of every non-test build. + #[cfg(test)] pub(crate) fn run(&self, args: &[&str]) -> Result { - let output = git_output_env(&self.workdir, args, &self.auth_env)?; + let output = std::process::Command::new("git") + .current_dir(&self.workdir) + .args(args) + .output() + .map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + GitError::GitNotFound + } else { + GitError::Io(e.to_string()) + } + })?; if !output.status.success() { return Err(GitError::Command { operation: args.first().copied().unwrap_or("git").to_string(), - stderr: stderr_of(&output), + stderr: String::from_utf8_lossy(&output.stderr).trim().to_string(), }); } Ok(String::from_utf8_lossy(&output.stdout).into_owned()) diff --git a/crates/op-git/src/merge.rs b/crates/op-git/src/merge.rs index 960cf5bd..066ab0cd 100644 --- a/crates/op-git/src/merge.rs +++ b/crates/op-git/src/merge.rs @@ -1,8 +1,18 @@ //! Branch merging, the shared integration classifier, and //! merge-conflict handling. +//! +//! Backed by in-process `libgit2` (`git2`) rather than the system +//! `git` executable: the merge analysis, fast-forward, merge-commit +//! creation, conflict inspection and abort all run against the +//! `git2::Repository` opened fresh for each call. The worktree-merge +//! orchestration still goes through [`MergeWorktree`] so a +//! conflicting merge is computed in a throwaway worktree and never +//! marks up the live `.op` document. use std::path::{Path, PathBuf}; +use git2::{build::CheckoutBuilder, Oid, Repository, RepositoryState, ResetType}; + use crate::worktree::MergeWorktree; use crate::{GitError, GitRepo}; @@ -27,8 +37,13 @@ impl GitRepo { /// Merge `refname` (a branch, tag, or commit) into the current /// branch, classifying the outcome. pub fn merge(&self, refname: &str) -> Result { - let before = self.run(&["rev-parse", "HEAD"])?.trim().to_string(); - let target = self.run(&["rev-parse", refname])?.trim().to_string(); + let repo = self.open()?; + let before = repo.head()?.peel_to_commit()?.id().to_string(); + let target = repo + .revparse_single(refname)? + .peel_to_commit()? + .id() + .to_string(); self.integrate(&before, &target) } @@ -67,67 +82,127 @@ impl GitRepo { } // `before` is an ancestor of `target` — fast-forward. if self.is_ancestor(before, target) { - self.run(&["merge", "--ff-only", target])?; + let repo = self.open()?; + let target_oid = Oid::from_str(target)?; + fast_forward(&repo, target_oid)?; return Ok(MergeOutcome::FastForward); } // Diverged histories — a merge commit is required. - match self.run(&["merge", "--no-edit", target]) { - Ok(_) => Ok(MergeOutcome::Merge), - Err(err @ GitError::Command { .. }) => { - // A merge that halts on conflicts leaves conflict - // markers in the tree rather than failing cleanly. - if self.status().map(|s| s.has_conflicts()).unwrap_or(false) { - Ok(MergeOutcome::Conflict) - } else { - Err(err) - } - } - Err(err) => Err(err), - } + let repo = self.open()?; + let before_oid = Oid::from_str(before)?; + let target_oid = Oid::from_str(target)?; + merge_commit_or_conflict(&repo, before_oid, target_oid) } /// Whether commit `ancestor` is an ancestor of commit /// `descendant` (a commit is its own ancestor). fn is_ancestor(&self, ancestor: &str, descendant: &str) -> bool { - // `merge-base --is-ancestor` exits 0 when true, 1 when false; - // a non-zero exit surfaces as `Err` from `run`. - self.run(&["merge-base", "--is-ancestor", ancestor, descendant]) - .is_ok() + // `git2::Repository::graph_descendant_of(descendant, ancestor)` + // is the libgit2 equivalent of `merge-base --is-ancestor`, but + // it returns `false` when the two commits are equal — so the + // "a commit is its own ancestor" case is handled explicitly to + // preserve the subprocess behaviour. + if ancestor == descendant { + return true; + } + let Ok(repo) = self.open() else { + return false; + }; + let (Ok(anc), Ok(desc)) = (Oid::from_str(ancestor), Oid::from_str(descendant)) else { + return false; + }; + repo.graph_descendant_of(desc, anc).unwrap_or(false) } /// Whether a merge is currently in progress (`MERGE_HEAD` exists). pub fn is_merging(&self) -> bool { - self.run(&["rev-parse", "--verify", "--quiet", "MERGE_HEAD"]) - .is_ok() + let Ok(repo) = self.open() else { + return false; + }; + // `RepositoryState::Merge` is set whenever `MERGE_HEAD` is + // present; fall back to a direct ref probe in case the state + // read is ambiguous. + repo.state() == RepositoryState::Merge || repo.find_reference("MERGE_HEAD").is_ok() } /// Abort an in-progress merge, restoring the pre-merge state. pub fn abort_merge(&self) -> Result<(), GitError> { - self.run(&["merge", "--abort"])?; + let repo = self.open()?; + // `git merge --abort` is `git reset --hard` followed by a + // clear of the merge metadata. A hard reset to `HEAD` throws + // away the conflicted working-tree + index content, then + // `cleanup_state` removes `MERGE_HEAD` / `MERGE_MSG`. + let head = repo.head()?.peel_to_commit()?; + let mut checkout = CheckoutBuilder::new(); + checkout.force(); + repo.reset(head.as_object(), ResetType::Hard, Some(&mut checkout))?; + repo.cleanup_state()?; Ok(()) } /// Repo-relative paths with unresolved merge conflicts. pub fn conflicted_files(&self) -> Result, GitError> { - let raw = self.run(&["diff", "--name-only", "--diff-filter=U"])?; - Ok(raw - .lines() - .map(str::trim) - .filter(|l| !l.is_empty()) - .map(str::to_string) - .collect()) + let repo = self.open()?; + let index = repo.index()?; + let mut paths = Vec::new(); + for conflict in index.conflicts()? { + let conflict = conflict?; + // The path is identical across the three stages; take it + // from whichever stage exists (a delete/modify conflict is + // missing one of `our` / `their`). + if let Some(path) = conflict_path(&conflict) { + if !paths.contains(&path) { + paths.push(path); + } + } + } + Ok(paths) } /// Mark `path` resolved by staging its current (resolved) - /// content — `git add` of a once-conflicted file. + /// content into the index — the libgit2 equivalent of `git add` + /// of a once-conflicted file, which also clears the path's + /// conflict entry (moving it to the index's resolve-undo section). pub fn mark_resolved(&self, path: &Path) -> Result<(), GitError> { - self.stage(&[path]) + let repo = self.open()?; + let mut index = repo.index()?; + // `Index::add_path` needs a path relative to the work tree. + let rel = self.repo_relative(path); + index.add_path(&rel)?; + index.write()?; + Ok(()) } /// Finalize an in-progress merge once every conflict is resolved /// and staged, keeping git's generated merge message. pub fn complete_merge(&self) -> Result<(), GitError> { - self.run(&["commit", "--no-edit"])?; + let repo = self.open()?; + // The two parents: `HEAD` (ours) and `MERGE_HEAD` (theirs). + let ours = repo.head()?.peel_to_commit()?; + let merge_head_oid = + repo.find_reference("MERGE_HEAD")? + .target() + .ok_or_else(|| GitError::Command { + operation: "commit".to_string(), + stderr: "MERGE_HEAD does not resolve to a commit".to_string(), + })?; + let theirs = repo.find_commit(merge_head_oid)?; + + // Write the (resolved) index out to a tree; a lingering + // conflict makes `write_tree` fail, which is the correct + // refusal to commit an unresolved merge. + let mut index = repo.index()?; + let tree_oid = index.write_tree()?; + let tree = repo.find_tree(tree_oid)?; + + // Keep git's generated merge message (`MERGE_MSG`) when present, + // matching `git commit --no-edit`; otherwise synthesize one. + let message = + read_merge_msg(&repo).unwrap_or_else(|| format!("Merge commit '{}'", theirs.id())); + + let sig = repo.signature()?; + repo.commit(Some("HEAD"), &sig, &sig, &message, &tree, &[&ours, &theirs])?; + repo.cleanup_state()?; Ok(()) } @@ -137,7 +212,16 @@ impl GitRepo { /// conflict has no base stage). Valid only while a merge is in /// progress with `path` unresolved. pub fn conflict_stages(&self, path: &str) -> ConflictStages { - let stage = |n: u8| self.run(&["show", &format!(":{n}:{path}")]).ok(); + // Any failure to open the repo / read the index yields the + // empty (all-`None`) stage set, matching the subprocess + // version's "`git show` failed → `None`" behaviour. + let Ok(repo) = self.open() else { + return ConflictStages::default(); + }; + let Ok(index) = repo.index() else { + return ConflictStages::default(); + }; + let stage = |n: i32| stage_blob(&repo, &index, path, n); ConflictStages { base: stage(1), ours: stage(2), @@ -177,8 +261,14 @@ impl GitRepo { if self.is_merging() { return Err(GitError::MergeInProgress); } - let head = self.run(&["rev-parse", "HEAD"])?.trim().to_string(); - let target = self.run(&["rev-parse", other])?.trim().to_string(); + let repo = self.open()?; + let head = repo.head()?.peel_to_commit()?.id().to_string(); + let target = repo + .revparse_single(other)? + .peel_to_commit()? + .id() + .to_string(); + drop(repo); // Ancestry short-circuits — no worktree needed, no mutation. if head == target || self.is_ancestor(&target, &head) { @@ -191,36 +281,41 @@ impl GitRepo { return Err(GitError::WorkingTreeDirty); } + let head_oid = Oid::from_str(&head)?; + let target_oid = Oid::from_str(&target)?; + // Compute the merge in a detached worktree pinned at HEAD. let worktree = MergeWorktree::create(self, merge_worktree_dir(), &head)?; - let wrepo = worktree.repo(); + let wgit = worktree.repo(); - // Fast-forward: exact, never conflicts. + // Fast-forward: exact, never conflicts. The worktree would + // simply land on `target`, so the live branch can advance + // straight onto `target`. if self.is_ancestor(&head, &target) { - wrepo.run(&["merge", "--ff-only", &target])?; - let merged = wrepo.run(&["rev-parse", "HEAD"])?.trim().to_string(); - self.run(&["merge", "--ff-only", &merged])?; + let live = self.open()?; + fast_forward(&live, target_oid)?; return Ok(WorktreeMergeReport::clean( MergeOutcome::FastForward, - merged, + target, )); } - // Diverged histories — a real merge commit, or conflicts. - match wrepo.run(&["merge", "--no-edit", &target]) { - Ok(_) => { - let merged = wrepo.run(&["rev-parse", "HEAD"])?.trim().to_string(); - // The worktree built a merge commit on top of `head`; - // the live branch can fast-forward onto it exactly. - self.run(&["merge", "--ff-only", &merged])?; - Ok(WorktreeMergeReport::clean(MergeOutcome::Merge, merged)) - } - Err(err @ GitError::Command { .. }) => { - let bag = collect_conflicts(wrepo)?; + // Diverged histories — a real merge commit, or conflicts. Run + // the merge inside the worktree so any markers stay quarantined. + let wrepo = wgit.open()?; + let merged = match merge_in_worktree(&wrepo, head_oid, target_oid)? { + WorktreeMergeResult::Clean(merged) => merged, + WorktreeMergeResult::Conflicts => { + let bag = collect_conflicts(wgit)?; if bag.is_empty() { - // A non-conflict failure (e.g. an unrelated - // history) — surface it rather than swallow it. - return Err(err); + // `merge_in_worktree` reported conflicts but the + // index shows none — treat as a non-conflict + // failure and surface it rather than swallow it. + let _ = abort_in_worktree(&wrepo); + return Err(GitError::Command { + operation: "merge".to_string(), + stderr: "merge halted without a recorded conflict".to_string(), + }); } // Offer each conflicted file to the structured // resolver; write back + stage whatever it resolves. @@ -235,31 +330,245 @@ impl GitRepo { stages.theirs.as_deref().unwrap_or(""), ); if let Some(content) = resolved { - let abs = wrepo.workdir().join(&file.path); + let abs = wgit.workdir().join(&file.path); std::fs::write(&abs, content).map_err(|e| GitError::Io(e.to_string()))?; - wrepo.stage(&[abs.as_path()])?; + // Stage the resolved content into the worktree + // index, which also clears the conflict entry. + let mut index = wrepo.index()?; + index.add_path(Path::new(&file.path))?; + index.write()?; } } // Anything still unmerged after that? - let residue = collect_conflicts(wrepo)?; - if residue.is_empty() { - // Every conflict was structurally auto-resolved — - // complete the merge and fast-forward it back. - wrepo.complete_merge()?; - let merged = wrepo.run(&["rev-parse", "HEAD"])?.trim().to_string(); - self.run(&["merge", "--ff-only", &merged])?; - return Ok(WorktreeMergeReport::clean(MergeOutcome::Merge, merged)); + let residue = collect_conflicts(wgit)?; + if !residue.is_empty() { + // Abort the worktree's half-merge; the worktree drop + // then removes the directory entirely. The live tree + // was never touched. + let _ = abort_in_worktree(&wrepo); + return Ok(WorktreeMergeReport::conflicted(residue)); } - // Abort the worktree's half-merge; the worktree drop - // then removes the directory entirely. The live tree - // was never touched. - let _ = wrepo.abort_merge(); - Ok(WorktreeMergeReport::conflicted(residue)) + // Every conflict was structurally auto-resolved — + // complete the merge in the worktree. + complete_in_worktree(&wrepo, head_oid, target_oid)? } - Err(err) => Err(err), - } + }; + + // The worktree built (or fast-forwarded to) a commit on top of + // `head`; the live branch can fast-forward onto it exactly. + let live = self.open()?; + let merged_oid = Oid::from_str(&merged)?; + fast_forward(&live, merged_oid)?; + Ok(WorktreeMergeReport::clean(MergeOutcome::Merge, merged)) // `worktree` drops here → the throwaway worktree is removed. } + + /// `path` made relative to the repository's work tree. An absolute + /// path under the work tree is stripped to its relative remainder; + /// an already-relative path is returned unchanged. + fn repo_relative(&self, path: &Path) -> PathBuf { + path.strip_prefix(self.workdir()) + .map(Path::to_path_buf) + .unwrap_or_else(|_| path.to_path_buf()) + } +} + +/// Fast-forward `repo`'s current `HEAD` to commit `oid`: check the +/// target tree out into the working directory + index, then move the +/// ref. An attached branch keeps its name (the branch ref advances); +/// a detached `HEAD` (as in a throwaway worktree) is re-pointed at the +/// commit directly. +/// +/// The checkout is **SAFE**, not forced. libgit2 updates the work tree +/// to the target but REFUSES — returning `GIT_ECONFLICT` *without +/// applying any change* — when an update would overwrite local work: a +/// modified tracked file, or an untracked file / directory in the way +/// (every collision shape, honoring `core.ignorecase`). That refusal is +/// mapped to [`GitError::WorkingTreeDirty`] so a fast-forward can never +/// silently discard the user's edits — exactly the guard the subprocess +/// `git pull` gave with "local changes / untracked working tree files +/// would be overwritten by merge". +fn fast_forward(repo: &Repository, oid: Oid) -> Result<(), GitError> { + let commit = repo.find_commit(oid)?; + let mut checkout = CheckoutBuilder::new(); // SAFE by default + match repo.checkout_tree(commit.as_object(), Some(&mut checkout)) { + Ok(()) => {} + Err(e) if e.code() == git2::ErrorCode::Conflict => { + return Err(GitError::WorkingTreeDirty); + } + Err(e) => return Err(e.into()), + } + match repo.head() { + Ok(mut head_ref) if head_ref.is_branch() => { + head_ref.set_target(oid, "fast-forward")?; + } + // Detached HEAD (worktree) or no current branch — point HEAD + // straight at the commit. + _ => { + repo.set_head_detached(oid)?; + } + } + Ok(()) +} + +/// Run a real merge of `target` into `before` against `repo`'s live +/// `HEAD`, producing a merge commit on a clean merge or leaving the +/// repository in its conflicted merging state (returning +/// [`MergeOutcome::Conflict`]) otherwise. +fn merge_commit_or_conflict( + repo: &Repository, + before: Oid, + target: Oid, +) -> Result { + let annotated = repo.find_annotated_commit(target)?; + let mut checkout = CheckoutBuilder::new(); + checkout.safe(); + repo.merge(&[&annotated], None, Some(&mut checkout))?; + + let mut index = repo.index()?; + if index.has_conflicts() { + // Conflicts left in the working tree + index. Leave the merge + // in progress (matching `git merge` halting on conflicts) so + // the caller can inspect / abort / resolve it. + return Ok(MergeOutcome::Conflict); + } + + // Clean merge — write the merged index out as the commit's tree + // and create the two-parent merge commit, then clear the merge + // metadata so the tree is no longer mid-merge. + let tree_oid = index.write_tree()?; + let tree = repo.find_tree(tree_oid)?; + let ours = repo.find_commit(before)?; + let theirs = repo.find_commit(target)?; + let sig = repo.signature()?; + repo.commit( + Some("HEAD"), + &sig, + &sig, + &format!("Merge commit '{target}'"), + &tree, + &[&ours, &theirs], + )?; + repo.cleanup_state()?; + Ok(MergeOutcome::Merge) +} + +/// The result of computing a merge inside a throwaway worktree. +enum WorktreeMergeResult { + /// A clean merge — the merge commit's hash. + Clean(String), + /// The merge halted on conflicts left in the worktree index. + Conflicts, +} + +/// Run a merge of `target` into the worktree's detached `HEAD` +/// (pinned at `head`). On a clean merge it commits the result and +/// returns its hash; on conflicts it leaves the worktree mid-merge +/// and returns [`WorktreeMergeResult::Conflicts`]. +fn merge_in_worktree( + wrepo: &Repository, + head: Oid, + target: Oid, +) -> Result { + let annotated = wrepo.find_annotated_commit(target)?; + let mut checkout = CheckoutBuilder::new(); + checkout.safe(); + wrepo.merge(&[&annotated], None, Some(&mut checkout))?; + + if wrepo.index()?.has_conflicts() { + return Ok(WorktreeMergeResult::Conflicts); + } + let merged = commit_worktree_merge(wrepo, head, target)?; + Ok(WorktreeMergeResult::Clean(merged)) +} + +/// Commit a fully-resolved worktree merge (every conflict already +/// staged) — the path taken once the structured resolver has cleared +/// the last conflict. Returns the merge commit's hash. +fn complete_in_worktree(wrepo: &Repository, head: Oid, target: Oid) -> Result { + commit_worktree_merge(wrepo, head, target) +} + +/// Write the worktree's (resolved) index out as a tree and create the +/// two-parent merge commit on the detached `HEAD`, then clear the +/// merge metadata. Returns the new commit's hash. +fn commit_worktree_merge(wrepo: &Repository, head: Oid, target: Oid) -> Result { + let mut index = wrepo.index()?; + let tree_oid = index.write_tree()?; + let tree = wrepo.find_tree(tree_oid)?; + let ours = wrepo.find_commit(head)?; + let theirs = wrepo.find_commit(target)?; + let sig = wrepo.signature()?; + let merged = wrepo.commit( + Some("HEAD"), + &sig, + &sig, + &format!("Merge commit '{target}'"), + &tree, + &[&ours, &theirs], + )?; + wrepo.cleanup_state()?; + Ok(merged.to_string()) +} + +/// Abort a worktree's half-finished merge — hard-reset to its `HEAD` +/// and clear the merge metadata so the worktree is no longer mid-merge. +fn abort_in_worktree(wrepo: &Repository) -> Result<(), GitError> { + let head = wrepo.head()?.peel_to_commit()?; + let mut checkout = CheckoutBuilder::new(); + checkout.force(); + wrepo.reset(head.as_object(), ResetType::Hard, Some(&mut checkout))?; + wrepo.cleanup_state()?; + Ok(()) +} + +/// Read git's generated `MERGE_MSG`, the message `git commit --no-edit` +/// keeps for a merge commit. `None` when it is absent or unreadable. +fn read_merge_msg(repo: &Repository) -> Option { + let path = repo.path().join("MERGE_MSG"); + let text = std::fs::read_to_string(path).ok()?; + let trimmed = text.trim_end(); + (!trimmed.is_empty()).then(|| trimmed.to_string()) +} + +/// The blob content of merge stage `n` (1 = base, 2 = ours, 3 = +/// theirs) for `path` in `index`, decoded as UTF-8 (lossy). `None` +/// when that stage does not exist for the path. +fn stage_blob(repo: &Repository, index: &git2::Index, path: &str, n: i32) -> Option { + let entry = index.get_path(Path::new(path), n)?; + let blob = repo.find_blob(entry.id).ok()?; + Some(String::from_utf8_lossy(blob.content()).into_owned()) +} + +/// The repo-relative path of a conflict — taken from whichever of its +/// three stage entries exists (a delete/modify conflict is missing one). +fn conflict_path(conflict: &git2::IndexConflict) -> Option { + let entry = conflict + .our + .as_ref() + .or(conflict.their.as_ref()) + .or(conflict.ancestor.as_ref())?; + Some(String::from_utf8_lossy(&entry.path).into_owned()) +} + +/// Classify a conflict from which of its three stage entries are +/// present — the libgit2 counterpart of the porcelain `XY` codes the +/// subprocess version read (`UU` / `AA` / `DU`-`UD`-`DD` / other). +fn conflict_kind(conflict: &git2::IndexConflict) -> ConflictKind { + let has_ancestor = conflict.ancestor.is_some(); + let has_our = conflict.our.is_some(); + let has_their = conflict.their.is_some(); + match (has_ancestor, has_our, has_their) { + // Both sides changed a file that existed at the base — `UU`. + (true, true, true) => ConflictKind::BothModified, + // Both sides added the path with no common base — `AA`. + (false, true, true) => ConflictKind::BothAdded, + // One side deleted while the other kept/changed it — `DU` / + // `UD` / `DD`. + (_, false, true) | (_, true, false) => ConflictKind::DeleteModify, + // Anything else (including a stageless record) — `Other`. + _ => ConflictKind::Other, + } } /// How a single path conflicts in a merge. @@ -373,42 +682,22 @@ fn merge_worktree_dir() -> PathBuf { std::env::temp_dir().join(format!("op-git-merge-{}-{nanos}", std::process::id())) } -/// Build a [`ConflictBag`] from a conflicted worktree's porcelain -/// status. Only unmerged (`U`-coded) entries are collected. +/// Build a [`ConflictBag`] from a conflicted worktree's index. Only +/// the index's unmerged (conflict) entries are collected, in the order +/// the conflict iterator yields them. /// -/// `--porcelain=v1 -z` is used deliberately: it emits NUL-terminated -/// records with byte-exact, *unquoted* paths, so a path containing -/// spaces — or leading / trailing whitespace — survives intact (the -/// space-delimited, sometimes-quoted default format would not). +/// `.op` documents carry their three merge-stage blobs so the caller +/// can run a structured node-level merge; other files do not. fn collect_conflicts(repo: &GitRepo) -> Result { - let raw = repo.run(&["status", "--porcelain=v1", "-z"])?; + let git = repo.open()?; + let index = git.index()?; let mut files = Vec::new(); - let mut records = raw.split('\0'); - while let Some(record) = records.next() { - // `XY ` — two status codes, a space, then the path. - // The trailing split element after the final NUL is empty. - if record.len() < 4 { + for conflict in index.conflicts()? { + let conflict = conflict?; + let Some(path) = conflict_path(&conflict) else { continue; - } - let xy = record.as_bytes(); - // Rename / copy entries carry a second NUL-delimited field - // (the original path). Consume it so it is not misread as a - // standalone status record on the next iteration. - if xy[0] == b'R' || xy[0] == b'C' || xy[1] == b'R' || xy[1] == b'C' { - let _ = records.next(); - } - // Unmerged porcelain codes — see `git status` docs. Byte 3 - // onward is the path; bytes 0..3 are pure ASCII, so the - // slice always falls on a char boundary. - let kind = match &record[..2] { - "UU" => ConflictKind::BothModified, - "AA" => ConflictKind::BothAdded, - "DD" | "DU" | "UD" => ConflictKind::DeleteModify, - "AU" | "UA" => ConflictKind::Other, - // Not an unmerged entry — skip it. - _ => continue, }; - let path = record[3..].to_string(); + let kind = conflict_kind(&conflict); // `.op` documents carry their three merge-stage blobs so the // caller can run a structured node-level merge. let stages = path.ends_with(".op").then(|| repo.conflict_stages(&path)); diff --git a/crates/op-git/src/remote.rs b/crates/op-git/src/remote.rs index 72a3c486..2d0ecd19 100644 --- a/crates/op-git/src/remote.rs +++ b/crates/op-git/src/remote.rs @@ -1,14 +1,43 @@ //! Remote operations — clone, fetch, pull, push, remote config. //! -//! The network operations (`clone` / `fetch` / `pull` / `push`) -//! depend on the ambient git credential / SSH setup; dedicated -//! credential + SSH-key handling lands in a later increment. They -//! are not unit-tested here (no network in tests); the remote-config -//! readers / writers are. +//! The network operations (`clone` / `fetch` / `pull` / `push`) run +//! entirely in-process through libgit2 (`git2`); there is no system +//! `git` subprocess. Authentication is supplied through a +//! [`git2::RemoteCallbacks`] credential closure built from the +//! handle's stored auth carrier (see [`GitRepo::auth_env`] / +//! [`GitRepo::with_auth_env`]); when the carrier is empty the closure +//! falls back to the ambient credential helpers / ssh-agent, exactly +//! the way the subprocess backend deferred to the user's git setup. +//! The network ops are not unit-tested here (no network in tests); +//! the remote-config readers / writers are. +//! +//! ## Auth carrier (libgit2 migration note) +//! +//! The subprocess backend carried auth as *git environment* pairs — +//! `GIT_SSH_COMMAND`, `GIT_CONFIG_*`, `OP_GIT_HTTPS_*`. libgit2 takes +//! credentials through a callback, not the environment, so the +//! `auth_env` `Vec<(String, String)>` is repurposed as a small, +//! structured credential carrier interpreted by [`build_callbacks`]: +//! +//! - `("token", ":")` → an HTTPS user/password +//! credential ([`git2::Cred::userpass_plaintext`]). +//! - `("ssh_key_path", "")` → an SSH key credential +//! ([`git2::Cred::ssh_key`]) honoring the `username_from_url`. +//! +//! The public surface — [`GitRepo::auth_env`] returning a +//! `Vec<(String, String)>` and [`GitRepo::with_auth_env`] storing it — +//! is unchanged; only the *meaning* of the pairs changed, and the +//! `git_session` host wiring (`with_auth_env(repo.auth_env(..))`) keeps +//! compiling and working without edits. use std::path::{Path, PathBuf}; -use crate::{git_output, stderr_of, GitError, GitRepo, MergeOutcome}; +use git2::{ + build::RepoBuilder, AutotagOption, Cred, CredentialType, FetchOptions, PushOptions, + RemoteCallbacks, +}; + +use crate::{GitError, GitRepo, MergeOutcome}; /// A configured git remote. #[derive(Debug, Clone, PartialEq, Eq)] @@ -20,32 +49,101 @@ pub struct Remote { } impl GitRepo { - /// `git clone ` into `dir` and return a handle to the clone. - /// `dir` must not already exist (git creates it). + /// Clone `url` into `dir` and return a handle to the clone. + /// `dir` must not already exist (libgit2 creates it). + /// + /// Authentication uses the ambient credential helpers / ssh-agent + /// — a fresh clone has no stored credential carrier yet (the + /// returned handle starts with an empty `auth_env`, matching the + /// subprocess backend, where the spawned `git clone` likewise + /// inherited only the ambient git environment). pub fn clone(url: &str, dir: &Path) -> Result { - // Run from `dir`'s parent so a relative target resolves; the - // parent must exist for git to create `dir` inside it. + // The parent must exist for the clone target to be created + // inside it; libgit2 (like `git clone`) creates only `dir`, + // not its ancestors. let parent = dir.parent().unwrap_or_else(|| Path::new(".")); if !parent.exists() { std::fs::create_dir_all(parent).map_err(|e| GitError::Io(e.to_string()))?; } - let dir_str = dir - .to_str() - .ok_or_else(|| GitError::Io("non-UTF-8 path".into()))?; - let output = git_output(parent, &["clone", url, dir_str])?; - if !output.status.success() { - return Err(GitError::Command { + + // No stored credential yet — the clone authenticates through + // the ambient credential helpers / ssh-agent (the closure + // falls back to `Cred::credential_helper` / the ssh-agent when + // the carrier is empty, exactly as the spawned `git clone` + // inherited only the ambient git environment). + let auth: Vec<(String, String)> = Vec::new(); + let callbacks = build_callbacks(&auth); + let mut fetch_opts = FetchOptions::new(); + fetch_opts.remote_callbacks(callbacks); + + let repo = RepoBuilder::new() + .fetch_options(fetch_opts) + .clone(url, dir) + .map_err(|e| GitError::Command { operation: "clone".to_string(), - stderr: stderr_of(&output), - }); + stderr: e.message().to_string(), + })?; + + // Cloning an EMPTY remote leaves the local HEAD on libgit2's own + // default branch name, which can differ from the remote's + // configured initial branch (e.g. `main`). Match `git clone` by + // pointing the still-unborn HEAD at the remote's advertised + // default branch — falling back to `refs/heads/main` (the + // initial-branch this crate's `init` uses) when the empty remote + // advertises none — so the first local commit lands on a + // predictable branch. + if repo.is_empty().unwrap_or(false) { + let target = repo + .find_remote("origin") + .ok() + .and_then(|mut origin| { + origin.connect(git2::Direction::Fetch).ok()?; + let branch = origin + .default_branch() + .ok() + .and_then(|b| b.as_str().map(str::to_string)); + let _ = origin.disconnect(); + branch + }) + .unwrap_or_else(|| "refs/heads/main".to_string()); + let _ = repo.set_head(&target); } + GitRepo::discover(dir)?.ok_or_else(|| GitError::NotARepo(PathBuf::from(dir))) } - /// `git fetch` — update remote-tracking refs without touching the - /// working tree. + /// Fetch every remote — update remote-tracking refs without + /// touching the working tree. Prunes deleted upstream branches, + /// matching the former `git fetch --all --prune`. pub fn fetch(&self) -> Result<(), GitError> { - self.run(&["fetch", "--all", "--prune"])?; + let repo = self.open()?; + // `git fetch --all` walks every configured remote, not just + // `origin`; replicate that so a multi-remote repo behaves the + // same as the subprocess backend. + let remote_names = repo.remotes()?; + for name in remote_names.iter().flatten() { + let mut remote = repo.find_remote(name)?; + // A fresh callback set per remote — the carrier is shared by + // reference into each, so credentials are presented to every + // remote uniformly. (The subprocess backend scoped its + // credential helper per host; this carrier-based form + // authenticates each remote with the same stored credential — + // see the "Auth carrier" note for the behavioural delta.) + let callbacks = build_callbacks(&self.auth_env); + let mut fetch_opts = FetchOptions::new(); + fetch_opts.remote_callbacks(callbacks); + fetch_opts.prune(git2::FetchPrune::On); + fetch_opts.download_tags(AutotagOption::All); + // Empty refspecs → use the remote's configured fetch + // refspecs, exactly like a bare `git fetch `. + let empty: &[&str] = &[]; + remote + .fetch(empty, Some(&mut fetch_opts), None) + .map_err(|e| GitError::Command { + operation: "fetch".to_string(), + stderr: e.message().to_string(), + })?; + } Ok(()) } @@ -53,7 +151,9 @@ impl GitRepo { /// the [`MergeOutcome`] — exactly the TS `enginePull` model: a /// pull is a fetch followed by a merge of the remote-tracking /// ref. The fast-forward / merge / up-to-date decision is the - /// shared, ancestry-based [`GitRepo::integrate`] classifier. + /// shared, ancestry-based [`GitRepo::integrate`] classifier, which + /// also surfaces [`GitError::WorkingTreeDirty`] / refuses while a + /// merge is in progress. pub fn pull(&self) -> Result { // Stop *before* any network work: a pull during an unresolved // merge is refused by `integrate` anyway, and fetching first @@ -62,76 +162,175 @@ impl GitRepo { if self.is_merging() { return Err(GitError::MergeInProgress); } - let before = self.run(&["rev-parse", "HEAD"])?.trim().to_string(); + // Resolve the pre-pull HEAD commit. + let before = { + let repo = self.open()?; + let head = repo.head().map_err(|e| GitError::Command { + operation: "rev-parse".to_string(), + stderr: e.message().to_string(), + })?; + let oid = head.target().ok_or_else(|| GitError::Command { + operation: "rev-parse".to_string(), + stderr: "HEAD does not point at a commit".to_string(), + })?; + oid.to_string() + }; self.fetch()?; - // `@{u}` is the configured upstream tracking ref; pulling - // without one configured is a genuine error. - let upstream = self.run(&["rev-parse", "@{u}"])?.trim().to_string(); + // The configured upstream tracking ref; pulling without one + // configured is a genuine error (mirrors `git rev-parse @{u}` + // failing). + let upstream = self.upstream_oid()?; self.integrate(&before, &upstream) } - /// `git push` — publish the current branch to its upstream. + /// Publish the current branch to its upstream. /// /// When the branch already tracks an upstream this is a plain - /// `git push`. When it does not, the push targets `origin` (or - /// the sole configured remote) with `-u`, so the very first push - /// also *sets* the upstream — the user never has to configure - /// tracking by hand. + /// `push` of `HEAD` to that upstream branch. When it does not, the + /// push targets `origin` (or the sole configured remote) and also + /// *sets* the upstream — the user never has to configure tracking + /// by hand (the subprocess `push -u HEAD` behaviour). pub fn push(&self) -> Result<(), GitError> { - if self.run(&["rev-parse", "--abbrev-ref", "@{u}"]).is_ok() { - self.run(&["push"])?; - return Ok(()); - } - let remotes = self.remotes()?; - let remote = remotes - .iter() - .find(|r| r.name == "origin") - .or_else(|| remotes.first()) - .ok_or_else(|| GitError::Command { + let repo = self.open()?; + + // The current branch's short name and full ref — a push needs + // an explicit `refs/heads/` refspec. + let head = repo.head().map_err(|e| GitError::Command { + operation: "push".to_string(), + stderr: e.message().to_string(), + })?; + if !head.is_branch() { + return Err(GitError::Command { operation: "push".to_string(), - stderr: "no remote configured — add one first".to_string(), + stderr: "cannot push a detached HEAD — switch to a branch first".to_string(), + }); + } + let head_ref = head.name().ok_or_else(|| GitError::Command { + operation: "push".to_string(), + stderr: "HEAD has no symbolic name".to_string(), + })?; + let branch_short = head.shorthand().unwrap_or("HEAD").to_string(); + + // Does this branch already track an upstream? `git push` with + // a configured upstream pushes there; otherwise we set it up. + let tracked = repo.branch_upstream_name(head_ref).ok(); + + // Choose the target remote: the upstream's remote when one is + // configured, else `origin`, else the sole remote. + let remote_name = match repo.branch_upstream_remote(head_ref).ok() { + Some(buf) => buf + .as_str() + .map(|s| s.to_string()) + .unwrap_or_else(|| "origin".to_string()), + None => { + let remotes = self.remotes()?; + remotes + .iter() + .find(|r| r.name == "origin") + .or_else(|| remotes.first()) + .map(|r| r.name.clone()) + .ok_or_else(|| GitError::Command { + operation: "push".to_string(), + stderr: "no remote configured — add one first".to_string(), + })? + } + }; + + let mut remote = repo.find_remote(&remote_name)?; + + // `HEAD:refs/heads/` publishes the current branch under + // the same name on the remote — the conventional push refspec. + let refspec = format!("{head_ref}:refs/heads/{branch_short}"); + + let callbacks = build_callbacks(&self.auth_env); + let mut push_opts = PushOptions::new(); + push_opts.remote_callbacks(callbacks); + + remote + .push(&[refspec.as_str()], Some(&mut push_opts)) + .map_err(|e| GitError::Command { + operation: "push".to_string(), + stderr: e.message().to_string(), })?; - let remote_name = remote.name.clone(); - self.run(&["push", "-u", &remote_name, "HEAD"])?; + + // First push of an untracked branch also sets up tracking, so + // a later `push` / `pull` finds the upstream — the `-u` half + // of the subprocess `push -u HEAD`. + if tracked.is_none() { + if let Ok(mut branch) = repo.find_branch(&branch_short, git2::BranchType::Local) { + // `/` is the remote-tracking ref name. + let upstream = format!("{remote_name}/{branch_short}"); + let _ = branch.set_upstream(Some(&upstream)); + } + } Ok(()) } /// Every configured remote with its fetch URL. pub fn remotes(&self) -> Result, GitError> { - let raw = self.run(&["remote", "-v"])?; - Ok(parse_remotes(&raw)) + let repo = self.open()?; + let names = repo.remotes()?; + let mut remotes = Vec::new(); + for name in names.iter().flatten() { + let Ok(remote) = repo.find_remote(name) else { + continue; + }; + // A remote with no fetch URL is degenerate; skip it rather + // than emit an empty URL (`git remote -v` would not list a + // `(fetch)` line for it either). + let Some(url) = remote.url() else { + continue; + }; + remotes.push(Remote { + name: name.to_string(), + url: url.to_string(), + }); + } + Ok(remotes) } /// The fetch URL of remote `name`, if it exists. pub fn remote_url(&self, name: &str) -> Result, GitError> { - match self.run(&["remote", "get-url", name]) { - Ok(url) => Ok(Some(url.trim().to_string())), - // `git remote get-url` exits non-zero for an unknown - // remote — that is "no such remote", not a hard error. - Err(GitError::Command { .. }) => Ok(None), - Err(e) => Err(e), + let repo = self.open()?; + // Bind the lookup to a local so the borrowed `Remote` (whose + // `url()` borrows `repo`) is consumed before the block ends — + // otherwise the temporary would outlive `repo`'s drop. + let found = repo.find_remote(name); + match found { + Ok(remote) => Ok(remote.url().map(str::to_string)), + // An unknown remote is "no such remote", not a hard error — + // the same tolerance the subprocess backend gave a non-zero + // `git remote get-url`. + Err(e) if e.code() == git2::ErrorCode::NotFound => Ok(None), + Err(e) => Err(e.into()), } } /// Point remote `name` at `url`, adding the remote when it does /// not exist yet. pub fn set_remote(&self, name: &str, url: &str) -> Result<(), GitError> { + let repo = self.open()?; if self.remote_url(name)?.is_some() { - self.run(&["remote", "set-url", name, url])?; + repo.remote_set_url(name, url)?; } else { - self.run(&["remote", "add", name, url])?; + repo.remote(name, url)?; } Ok(()) } } impl GitRepo { - /// Resolve the git environment for an authenticated network op + /// Resolve the credential carrier for an authenticated network op /// against the `origin` remote, using the credential + SSH-key - /// stores. Returns env-var pairs to apply via + /// stores. Returns carrier pairs to apply via /// [`GitRepo::with_auth_env`] — an empty `Vec` when no stored - /// credential matches the remote's host, in which case git falls - /// back to its ambient credential helpers / ssh-agent. + /// credential matches the remote's host, in which case the network + /// ops fall back to the ambient credential helpers / ssh-agent. + /// + /// The returned pairs are interpreted by [`build_callbacks`]: + /// `("ssh_key_path", )` for SSH, `("token", ":")` + /// for HTTPS. They are no longer git environment variables — see + /// the module-level "Auth carrier" note. pub fn auth_env( &self, auth: &crate::AuthStore, @@ -149,76 +348,39 @@ impl GitRepo { }; match credential { crate::Credential::Ssh { key_name } => match ssh.load(&key_name) { + // Carry the private-key path; `build_callbacks` turns it + // into a `Cred::ssh_key` honoring `username_from_url`. Ok(key) => vec![( - "GIT_SSH_COMMAND".to_string(), - // The key path is shell-quoted — `GIT_SSH_COMMAND` - // is parsed shell-like by git, so an unescaped - // path containing a quote / `$()` would otherwise - // break out and run shell code. - format!( - "ssh -i {} -o IdentitiesOnly=yes", - shell_single_quote(&key.private_path.display().to_string()) - ), + "ssh_key_path".to_string(), + key.private_path.display().to_string(), )], Err(_) => Vec::new(), }, crate::Credential::Https { username, token } => { - // A control character in the credential would corrupt - // the line-based credential protocol — reject it - // rather than emit a malformed (or unsafe) helper. + // A control character in the credential cannot be + // carried safely (it would corrupt a downstream + // credential protocol if the carrier is ever spilled + // back to git) — reject it, as the subprocess backend + // did, rather than present a malformed credential. if has_control_char(&username) || has_control_char(&token) { return Vec::new(); } - // The credential-helper scope key must carry the - // remote's port — git's credential URL match treats a - // portless config URL as the default port, so a - // non-standard-port HTTPS remote would otherwise miss. - https_auth_env(&remote_authority(&url), username, token) + // `:` — `build_callbacks` splits on the + // first `:` into a `Cred::userpass_plaintext`. The `host` + // pair scopes the token: `build_callbacks` only releases + // it to a URL whose host matches, so a redirect to (or a + // tampered origin pointing at) a different host can never + // exfiltrate the PAT — preserving the host-scoping the + // subprocess credential helper had. + vec![ + ("token".to_string(), format!("{username}:{token}")), + ("host".to_string(), host), + ] } } } } -/// The git environment for an HTTPS-authenticated op against -/// `authority` (a `host` or `host:port`). -/// -/// The credential helper is registered **URL-scoped** — -/// `credential.https://.helper`, not the bare -/// `credential.helper` — so a multi-remote operation (`fetch --all`) -/// can never present this token to a *different* remote. The -/// `authority` keeps any explicit port because git's credential URL -/// match is port-sensitive. The helper itself is a static shell -/// snippet (no interpolated user data, so a crafted credential -/// cannot inject shell code); the username / token reach it through -/// dedicated env vars, emitted verbatim by `printf '%s'`. -fn https_auth_env(authority: &str, username: String, token: String) -> Vec<(String, String)> { - let helper = "!f() { printf '%s\\n' \ - \"username=$OP_GIT_HTTPS_USER\" \ - \"password=$OP_GIT_HTTPS_PASS\"; }; f"; - vec![ - ("GIT_CONFIG_COUNT".to_string(), "1".to_string()), - ( - "GIT_CONFIG_KEY_0".to_string(), - format!("credential.https://{authority}.helper"), - ), - ("GIT_CONFIG_VALUE_0".to_string(), helper.to_string()), - ("OP_GIT_HTTPS_USER".to_string(), username), - ("OP_GIT_HTTPS_PASS".to_string(), token), - ] -} - -/// Whether `s` holds an ASCII control character — a credential with -/// one cannot be carried safely by git's line-based protocol. -fn has_control_char(s: &str) -> bool { - s.chars().any(|c| c.is_control()) -} - -/// POSIX single-quote `s` so it survives shell word-splitting intact -/// — every embedded `'` becomes `'\''`. -fn shell_single_quote(s: &str) -> String { - format!("'{}'", s.replace('\'', "'\\''")) -} - impl GitRepo { /// The host of the `origin` remote — `Some("github.com")` etc. /// `None` when there is no `origin` or its URL has no host. @@ -227,20 +389,133 @@ impl GitRepo { let host = remote_host(&url); (!host.is_empty()).then_some(host) } + + /// The commit Oid (hex) of the current branch's configured + /// upstream tracking ref. Errors — mapping to a + /// [`GitError::Command`] — when no upstream is configured, mirroring + /// the subprocess `git rev-parse @{u}` failure. + fn upstream_oid(&self) -> Result { + let repo = self.open()?; + let head = repo.head().map_err(|e| GitError::Command { + operation: "rev-parse".to_string(), + stderr: e.message().to_string(), + })?; + let head_ref = head.name().ok_or_else(|| GitError::Command { + operation: "rev-parse".to_string(), + stderr: "HEAD has no symbolic name".to_string(), + })?; + // `branch_upstream_name` yields the remote-tracking ref name + // (`refs/remotes/origin/main`) for the branch's `@{u}`. + let upstream_buf = repo + .branch_upstream_name(head_ref) + .map_err(|e| GitError::Command { + operation: "rev-parse".to_string(), + stderr: e.message().to_string(), + })?; + let upstream_ref = upstream_buf.as_str().ok_or_else(|| GitError::Command { + operation: "rev-parse".to_string(), + stderr: "upstream ref name is not valid UTF-8".to_string(), + })?; + let reference = repo + .find_reference(upstream_ref) + .map_err(|e| GitError::Command { + operation: "rev-parse".to_string(), + stderr: e.message().to_string(), + })?; + // Peel to the commit it points at — the upstream tip's Oid. + let oid = reference + .peel_to_commit() + .map_err(|e| GitError::Command { + operation: "rev-parse".to_string(), + stderr: e.message().to_string(), + })? + .id(); + Ok(oid.to_string()) + } } -/// The authority of a git remote URL — `host` or `host:port` -/// (bracketed for IPv6) — verbatim, port preserved. Used to build a -/// port-sensitive `credential.https://` scope key. An -/// scp-like remote carries no URL port, so its bare host is used. -fn remote_authority(url: &str) -> String { - let url = url.trim(); - if let Some(rest) = url.split("://").nth(1) { - let after_user = rest.rsplit('@').next().unwrap_or(rest); - after_user.split('/').next().unwrap_or("").to_string() - } else { - remote_host(url) - } +/// Build the [`RemoteCallbacks`] that authenticate a network op from +/// the handle's stored credential carrier (`auth` — the repurposed +/// `auth_env` `Vec`). The `.credentials` closure interprets the +/// carrier keys: +/// +/// - `("ssh_key_path", )` → [`Cred::ssh_key`] for the +/// `username_from_url` (defaulting to `git`). +/// - `("token", ":")` → [`Cred::userpass_plaintext`]. +/// +/// When the carrier holds nothing usable for the credential type +/// libgit2 asks for, the closure falls back to +/// [`Cred::credential_helper`] (the user's configured helpers) for +/// HTTPS, [`Cred::ssh_key_from_agent`] for SSH, and finally +/// [`Cred::default`] — so an un-authenticated handle behaves exactly +/// like the subprocess backend deferring to the ambient git setup. +fn build_callbacks<'a>(auth: &'a [(String, String)]) -> RemoteCallbacks<'a> { + let mut callbacks = RemoteCallbacks::new(); + callbacks.credentials(move |url, username_from_url, allowed| { + // SSH key authentication — preferred when libgit2 asks for it + // and we carry a key path. + if allowed.contains(CredentialType::SSH_KEY) { + let user = username_from_url.unwrap_or("git"); + if let Some(path) = carrier_value(auth, "ssh_key_path") { + return Cred::ssh_key(user, None, Path::new(path), None); + } + // No stored key — let the agent answer (ssh-agent / Pageant), + // matching the subprocess backend's ambient ssh-agent use. + return Cred::ssh_key_from_agent(user); + } + + // HTTPS username/password (personal access token). + if allowed.contains(CredentialType::USER_PASS_PLAINTEXT) { + // Only release the stored token to the host it was scoped to. + // libgit2 invokes this closure with the URL it is actually + // authenticating against, which a redirect (or a tampered + // remote) can change — without this check the PAT would be + // sent to an attacker-controlled host. + let host_ok = match carrier_value(auth, "host") { + Some(expected) => &remote_host(url) == expected, + // No host scoping recorded — be conservative and do not + // release the token blindly; fall through to the helpers. + None => false, + }; + if host_ok { + if let Some(pair) = carrier_value(auth, "token") { + // `:` — split on the FIRST `:` so a + // token containing `:` survives intact. + let (user, token) = match pair.split_once(':') { + Some((u, t)) => (u, t), + None => ("", pair.as_str()), + }; + return Cred::userpass_plaintext(user, token); + } + } + // Fall back to the user's configured credential helpers + // (osxkeychain / manager / store) for ambient HTTPS auth. + let config = git2::Config::open_default()?; + return Cred::credential_helper(&config, url, username_from_url); + } + + // libgit2 sometimes asks only for the SSH *username* before the + // key exchange (e.g. an scp-like URL without `user@`). + if allowed.contains(CredentialType::USERNAME) { + return Cred::username(username_from_url.unwrap_or("git")); + } + + // Nothing matched — defer to the default credential (e.g. an + // anonymous / already-authenticated transport). + Cred::default() + }); + callbacks +} + +/// The value of carrier key `key`, if present. +fn carrier_value<'a>(auth: &'a [(String, String)], key: &str) -> Option<&'a String> { + auth.iter().find(|(k, _)| k == key).map(|(_, v)| v) +} + +/// Whether `s` holds an ASCII control character — a credential with +/// one cannot be carried safely. +fn has_control_char(s: &str) -> bool { + s.chars().any(|c| c.is_control()) } /// Extract the host from a git remote URL — `scheme://[user@]host/…` @@ -289,51 +564,10 @@ fn remote_host(url: &str) -> String { authority.split(':').next().unwrap_or("").to_string() } -/// Parse `git remote -v` output. Each remote prints a `(fetch)` and -/// a `(push)` line; the `(fetch)` URL is kept, deduplicated by name. -fn parse_remotes(raw: &str) -> Vec { - let mut remotes: Vec = Vec::new(); - for line in raw.lines() { - // Format: `\t (fetch|push)`. - if !line.contains("(fetch)") { - continue; - } - let mut parts = line.split_whitespace(); - let (Some(name), Some(url)) = (parts.next(), parts.next()) else { - continue; - }; - if !remotes.iter().any(|r| r.name == name) { - remotes.push(Remote { - name: name.to_string(), - url: url.to_string(), - }); - } - } - remotes -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn parses_fetch_urls_only_deduped() { - let raw = "origin\tgit@github.com:ZSeven-W/openpencil.git (fetch)\n\ - origin\tgit@github.com:ZSeven-W/openpencil.git (push)\n\ - fork\thttps://example.com/x.git (fetch)\n\ - fork\thttps://example.com/x.git (push)\n"; - let remotes = parse_remotes(raw); - assert_eq!(remotes.len(), 2); - assert_eq!(remotes[0].name, "origin"); - assert_eq!(remotes[0].url, "git@github.com:ZSeven-W/openpencil.git"); - assert_eq!(remotes[1].name, "fork"); - } - - #[test] - fn empty_remote_output_is_empty() { - assert!(parse_remotes("").is_empty()); - } - #[test] fn remote_host_parses_every_url_shape() { assert_eq!(remote_host("https://github.com/org/repo.git"), "github.com"); @@ -366,54 +600,27 @@ mod tests { } #[test] - fn shell_single_quote_escapes_embedded_quotes() { - assert_eq!(shell_single_quote("/plain/key"), "'/plain/key'"); - assert_eq!(shell_single_quote("/a'b/key"), "'/a'\\''b/key'"); + fn has_control_char_flags_bad_credentials() { + assert!(!has_control_char("alice")); + assert!(!has_control_char("ghp_AbCdEf123456")); + assert!(has_control_char("bad\ntoken")); + assert!(has_control_char("bad\0token")); } #[test] - fn https_auth_env_scopes_the_helper_to_the_host() { - let env = https_auth_env("github.com", "alice".into(), "tok".into()); - let key = &env.iter().find(|(k, _)| k == "GIT_CONFIG_KEY_0").unwrap().1; - // URL-scoped — never the bare `credential.helper`, so a - // `fetch --all` cannot leak this token to another remote. - assert_eq!(key, "credential.https://github.com.helper"); - // The values travel as env vars, not interpolated. - assert!(env - .iter() - .any(|(k, v)| k == "OP_GIT_HTTPS_USER" && v == "alice")); - assert!(env - .iter() - .any(|(k, v)| k == "OP_GIT_HTTPS_PASS" && v == "tok")); - } - - #[test] - fn https_auth_env_keeps_a_non_standard_port() { - // git's credential URL match is port-sensitive — a portless - // scope key would miss a `:8443` remote. - let env = https_auth_env("gitlab.example.com:8443", "u".into(), "t".into()); - let key = &env.iter().find(|(k, _)| k == "GIT_CONFIG_KEY_0").unwrap().1; - assert_eq!(key, "credential.https://gitlab.example.com:8443.helper"); - } - - #[test] - fn remote_authority_keeps_the_port() { + fn carrier_value_finds_the_key() { + let carrier = vec![ + ("token".to_string(), "alice:secret".to_string()), + ( + "ssh_key_path".to_string(), + "/home/alice/.ssh/id".to_string(), + ), + ]; + assert_eq!(carrier_value(&carrier, "token").unwrap(), "alice:secret"); assert_eq!( - remote_authority("https://github.com/org/repo.git"), - "github.com" - ); - assert_eq!( - remote_authority("https://gitlab.example.com:8443/x.git"), - "gitlab.example.com:8443" - ); - assert_eq!( - remote_authority("https://user@gitlab.example.com:8443/x"), - "gitlab.example.com:8443" - ); - // scp-like carries no URL port — bare host. - assert_eq!( - remote_authority("git@github.com:org/repo.git"), - "github.com" + carrier_value(&carrier, "ssh_key_path").unwrap(), + "/home/alice/.ssh/id" ); + assert!(carrier_value(&carrier, "missing").is_none()); } } diff --git a/crates/op-git/src/status.rs b/crates/op-git/src/status.rs index 82c51c9b..9b1b5935 100644 --- a/crates/op-git/src/status.rs +++ b/crates/op-git/src/status.rs @@ -1,10 +1,9 @@ -//! Working-tree status, staging, commit and restore. +//! Working-tree status, staging, commit and restore — in-process via +//! libgit2 (`git2`), so no system `git` binary is required. -use std::io::Write; -use std::path::Path; -use std::process::{Command, Stdio}; +use std::path::{Path, PathBuf}; -use crate::{stderr_of, GitError, GitRepo}; +use crate::{GitError, GitRepo}; /// How a file differs from `HEAD`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -65,268 +64,223 @@ impl GitRepo { /// Snapshot the working tree — branch, changed files, ahead / /// behind counts. pub fn status(&self) -> Result { - let raw = self.run(&["status", "--porcelain=v1", "--branch"])?; - Ok(parse_status(&raw)) + let repo = self.open()?; + + let mut opts = git2::StatusOptions::new(); + opts.include_untracked(true) + .recurse_untracked_dirs(true) + .renames_head_to_index(true) + .renames_index_to_workdir(true); + let entries = repo.statuses(Some(&mut opts))?; + + let mut files = Vec::new(); + for entry in entries.iter() { + let Some(path) = entry.path() else { continue }; + files.push(FileStatus { + path: path.to_string(), + state: classify(entry.status()), + // Staged ⇔ the index differs from HEAD (any `INDEX_*` bit). + staged: entry.status().intersects( + git2::Status::INDEX_NEW + | git2::Status::INDEX_MODIFIED + | git2::Status::INDEX_DELETED + | git2::Status::INDEX_RENAMED + | git2::Status::INDEX_TYPECHANGE, + ), + }); + } + + // `current_branch` resolves an unborn `HEAD` to its symbolic + // target (`main`) too, so a fresh repo reports its branch rather + // than a misleading detached state. + let branch = self.current_branch().unwrap_or(None); + let (ahead, behind) = ahead_behind(&repo); + + Ok(RepoStatus { + branch, + files, + ahead, + behind, + }) } - /// Stage `paths` (relative to the repo root or absolute). + /// Stage `paths` (relative to the repo root or absolute). Uses + /// `add_all`, which stages additions, modifications AND deletions + /// of the matched paths — mirroring `git add -- `. pub fn stage(&self, paths: &[&Path]) -> Result<(), GitError> { if paths.is_empty() { return Ok(()); } - let mut args: Vec<&str> = vec!["add", "--"]; - let path_strs: Vec<&str> = paths.iter().filter_map(|p| p.to_str()).collect(); - args.extend(path_strs); - self.run(&args)?; + let repo = self.open()?; + let mut index = repo.index()?; + let specs: Vec = paths.iter().map(|p| self.rel_to_workdir(p)).collect(); + index.add_all(specs.iter(), git2::IndexAddOption::DEFAULT, None)?; + index.write()?; Ok(()) } /// Stage every change in the working tree (`git add -A`). pub fn stage_all(&self) -> Result<(), GitError> { - self.run(&["add", "-A"])?; + let repo = self.open()?; + let mut index = repo.index()?; + // `add_all` over the whole tree picks up new + modified files; + // `update_all` records deletions + modifications of already-tracked + // files — together they equal `git add -A`. + index.add_all(["*"].iter(), git2::IndexAddOption::DEFAULT, None)?; + index.update_all(["*"].iter(), None)?; + index.write()?; Ok(()) } /// Unstage `paths` — remove them from the index without touching - /// the working tree. After the first commit `git restore --staged` - /// resets each path's index entry to `HEAD`; before any commit - /// there is no `HEAD`, so `git rm --cached` drops the staged - /// addition instead (leaving the file untracked). + /// the working tree. After the first commit each path's index entry + /// is reset to `HEAD`; before any commit there is no `HEAD`, so the + /// staged addition is dropped from the index instead (leaving the + /// file untracked). pub fn unstage(&self, paths: &[&Path]) -> Result<(), GitError> { if paths.is_empty() { return Ok(()); } - let has_head = self - .run(&["rev-parse", "--verify", "--quiet", "HEAD"]) - .is_ok(); - for path in paths.iter().filter_map(|p| p.to_str()) { - if has_head { - self.run(&["restore", "--staged", "--", path])?; - } else { - self.run(&["rm", "--cached", "--quiet", "--", path])?; + let repo = self.open()?; + let specs: Vec = paths.iter().map(|p| self.rel_to_workdir(p)).collect(); + match repo.head() { + Ok(head) => { + let head_obj = head.peel(git2::ObjectType::Commit)?; + repo.reset_default(Some(&head_obj), specs.iter())?; + } + // Unborn HEAD (no commit yet) — drop the staged additions. + Err(_) => { + let mut index = repo.index()?; + for spec in &specs { + let _ = index.remove_path(spec); + } + index.write()?; } } Ok(()) } - /// Stage a unified-diff `patch` into the index — `git apply - /// --cached`, the mechanism behind per-hunk staging. `patch` - /// must be a self-contained patch (file header + the chosen - /// hunks). `--recount` lets git tolerate hunk line-count drift - /// when only a subset of a file's hunks is applied. + /// Stage a unified-diff `patch` into the index — the mechanism + /// behind per-hunk staging. `patch` must be a self-contained patch + /// (file header + the chosen hunks); it is applied to the index + /// only (the working tree is untouched). pub fn apply_cached(&self, patch: &str) -> Result<(), GitError> { - let mut child = Command::new("git") - .current_dir(&self.workdir) - .args(["apply", "--cached", "--recount", "-"]) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .map_err(|e| { - if e.kind() == std::io::ErrorKind::NotFound { - GitError::GitNotFound - } else { - GitError::Io(e.to_string()) - } - })?; - child - .stdin - .take() - .ok_or_else(|| GitError::Io("git apply stdin unavailable".to_string()))? - .write_all(patch.as_bytes()) - .map_err(|e| GitError::Io(e.to_string()))?; - let output = child - .wait_with_output() - .map_err(|e| GitError::Io(e.to_string()))?; - if !output.status.success() { - return Err(GitError::Command { - operation: "apply".to_string(), - stderr: stderr_of(&output), - }); - } + let repo = self.open()?; + let diff = git2::Diff::from_buffer(patch.as_bytes())?; + repo.apply(&diff, git2::ApplyLocation::Index, None)?; Ok(()) } - /// Whether `path` has a change staged in the index. `git diff - /// --cached` lists the path exactly when its index entry differs - /// from `HEAD` (or, before the first commit, from the empty - /// tree) — the authoritative answer, unlike a UI snapshot. + /// Whether `path` has a change staged in the index — its index + /// entry differs from `HEAD` (or, before the first commit, from the + /// empty tree). pub fn is_path_staged(&self, path: &str) -> Result { - let out = self.run(&["diff", "--cached", "--name-only", "--", path])?; - Ok(!out.trim().is_empty()) + let repo = self.open()?; + let mut opts = git2::DiffOptions::new(); + opts.pathspec(self.rel_str(Path::new(path))); + let head_tree = repo.head().ok().and_then(|h| h.peel_to_tree().ok()); + let diff = repo.diff_tree_to_index(head_tree.as_ref(), None, Some(&mut opts))?; + Ok(diff.deltas().len() > 0) } /// Commit the staged changes with `message`. Returns the new - /// commit's full hash. Fails with [`GitError::Command`] when - /// there is nothing staged to commit. + /// commit's full hash. The committer identity comes from git config + /// (`user.name` / `user.email`); an unborn `HEAD` produces the + /// repository's first (parent-less) commit. pub fn commit(&self, message: &str) -> Result { - self.run(&["commit", "-m", message])?; - Ok(self.run(&["rev-parse", "HEAD"])?.trim().to_string()) + let repo = self.open()?; + let signature = repo.signature()?; + + // Snapshot the staged index as a tree. + let mut index = repo.index()?; + let tree_oid = index.write_tree()?; + let tree = repo.find_tree(tree_oid)?; + + // Parent = the current HEAD commit, if the branch has one yet. + let parents: Vec = match repo.head() { + Ok(head) => head.peel_to_commit().ok().into_iter().collect(), + Err(_) => Vec::new(), + }; + let parent_refs: Vec<&git2::Commit> = parents.iter().collect(); + + let oid = repo.commit( + Some("HEAD"), + &signature, + &signature, + message, + &tree, + &parent_refs, + )?; + Ok(oid.to_string()) } - /// Restore `path`'s working-tree content to its version at - /// `commit` (a hash, tag, branch, or `"HEAD"`). - /// - /// This mirrors the TS engine's `restoreFileFromCommit`: it - /// rewrites the working-tree file from the commit's blob and - /// leaves the index untouched. Passing `"HEAD"` therefore - /// discards uncommitted edits to the file; passing a historical - /// commit hash rolls the file back to that revision. + /// Restore `path`'s working-tree content to its version at `commit` + /// (a hash, tag, branch, or `"HEAD"`), rewriting the working-tree + /// file from the commit's blob and leaving the index untouched. pub fn restore(&self, path: &Path, commit: &str) -> Result<(), GitError> { - let Some(path) = path.to_str() else { + let repo = self.open()?; + let rel = self.rel_to_workdir(path); + let Some(rel_str) = rel.to_str() else { return Ok(()); }; - self.run(&["restore", "--source", commit, "--worktree", "--", path])?; + let object = repo.revparse_single(&format!("{commit}:{rel_str}"))?; + let blob = object.peel_to_blob()?; + let abs = self.workdir().join(&rel); + std::fs::write(&abs, blob.content()).map_err(|e| GitError::Io(e.to_string()))?; Ok(()) } -} -/// Parse `git status --porcelain=v1 --branch` output. -fn parse_status(raw: &str) -> RepoStatus { - let mut branch = None; - let mut ahead = 0; - let mut behind = 0; - let mut files = Vec::new(); - - for line in raw.lines() { - if let Some(rest) = line.strip_prefix("## ") { - let (b, a, be) = parse_branch_line(rest); - branch = b; - ahead = a; - behind = be; - } else if line.len() >= 3 { - files.push(parse_file_line(line)); - } + /// A path made relative to the work-tree root — libgit2 index / + /// pathspec APIs expect repo-relative paths, but callers pass + /// absolute document paths. + fn rel_to_workdir(&self, p: &Path) -> PathBuf { + p.strip_prefix(self.workdir()) + .map(Path::to_path_buf) + .unwrap_or_else(|_| p.to_path_buf()) } - RepoStatus { - branch, - files, - ahead, - behind, + /// `rel_to_workdir` as a `String` for pathspec strings. + fn rel_str(&self, p: &Path) -> String { + self.rel_to_workdir(p).to_string_lossy().into_owned() } } -/// Parse the `## ` branch header — `main...origin/main [ahead 1, behind 2]`. -fn parse_branch_line(rest: &str) -> (Option, u32, u32) { - // The branch name runs up to `...` (upstream marker) or a space. - let name_end = rest - .find("...") - .or_else(|| rest.find(' ')) - .unwrap_or(rest.len()); - let name = rest[..name_end].trim(); - // A brand-new repo with no commits reports `No commits yet on main`. - let branch = if name.is_empty() || name.contains("No commits yet") { - rest.rsplit(' ') - .next() - .filter(|s| !s.is_empty()) - .map(str::to_string) - } else { - Some(name.to_string()) - }; - - let mut ahead = 0; - let mut behind = 0; - if let (Some(open), Some(close)) = (rest.find('['), rest.find(']')) { - if open < close { - for part in rest[open + 1..close].split(',') { - let part = part.trim(); - if let Some(n) = part.strip_prefix("ahead ") { - ahead = n.trim().parse().unwrap_or(0); - } else if let Some(n) = part.strip_prefix("behind ") { - behind = n.trim().parse().unwrap_or(0); - } - } - } - } - (branch, ahead, behind) -} - -/// Parse one `XY ` porcelain line into a [`FileStatus`]. -fn parse_file_line(line: &str) -> FileStatus { - let code = &line[..2]; - let mut path = line[3..].to_string(); - // Renames render as `old -> new`; keep the destination path. - if let Some(idx) = path.find(" -> ") { - path = path[idx + 4..].to_string(); - } - let x = code.as_bytes()[0] as char; - let y = code.as_bytes()[1] as char; - - let state = if code == "??" { - ChangeState::Untracked - } else if is_conflict(x, y) { +/// Map a libgit2 status bitset to the single [`ChangeState`] the panel +/// shows. Conflicts win; otherwise the first matching add / delete / +/// rename / untracked classification, falling back to modified. +fn classify(s: git2::Status) -> ChangeState { + use git2::Status as St; + if s.is_conflicted() { ChangeState::Conflicted - } else if x == 'A' || y == 'A' { + } else if s.intersects(St::WT_NEW) && !s.intersects(St::INDEX_NEW) { + ChangeState::Untracked + } else if s.intersects(St::INDEX_NEW) { ChangeState::Added - } else if x == 'D' || y == 'D' { + } else if s.intersects(St::INDEX_DELETED | St::WT_DELETED) { ChangeState::Deleted - } else if x == 'R' || y == 'R' { + } else if s.intersects(St::INDEX_RENAMED | St::WT_RENAMED) { ChangeState::Renamed } else { ChangeState::Modified + } +} + +/// Ahead / behind commit counts of the current branch vs its configured +/// upstream — `(0, 0)` when detached, unborn, or with no upstream set. +fn ahead_behind(repo: &git2::Repository) -> (u32, u32) { + let resolve = || -> Option<(usize, usize)> { + let head = repo.head().ok()?; + let local = head.target()?; + let branch_name = head.shorthand()?; + let upstream = repo + .find_branch(branch_name, git2::BranchType::Local) + .ok()? + .upstream() + .ok()?; + let upstream_oid = upstream.get().target()?; + repo.graph_ahead_behind(local, upstream_oid).ok() }; - // The index column (`x`) carries the change when it is staged. - let staged = x != ' ' && x != '?'; - - FileStatus { - path, - state, - staged, - } -} - -/// Whether an `XY` porcelain code marks an unresolved merge conflict. -fn is_conflict(x: char, y: char) -> bool { - x == 'U' || y == 'U' || (x == 'A' && y == 'A') || (x == 'D' && y == 'D') -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parses_a_clean_branch_header() { - let s = parse_status("## main\n"); - assert_eq!(s.branch.as_deref(), Some("main")); - assert!(s.is_clean()); - assert_eq!((s.ahead, s.behind), (0, 0)); - } - - #[test] - fn parses_upstream_ahead_behind() { - let s = parse_status("## main...origin/main [ahead 2, behind 3]\n"); - assert_eq!(s.branch.as_deref(), Some("main")); - assert_eq!((s.ahead, s.behind), (2, 3)); - } - - #[test] - fn classifies_each_porcelain_code() { - let raw = "## main\n\ - M staged.txt\n\ - \u{20}M unstaged.txt\n\ - ?? new.txt\n\ - A added.txt\n\ - \u{20}D gone.txt\n\ - UU conflict.txt\n"; - let s = parse_status(raw); - assert_eq!(s.files.len(), 6); - let by = |name: &str| s.files.iter().find(|f| f.path == name).unwrap().clone(); - assert_eq!(by("staged.txt").state, ChangeState::Modified); - assert!(by("staged.txt").staged); - assert_eq!(by("unstaged.txt").state, ChangeState::Modified); - assert!(!by("unstaged.txt").staged); - assert_eq!(by("new.txt").state, ChangeState::Untracked); - assert_eq!(by("added.txt").state, ChangeState::Added); - assert_eq!(by("gone.txt").state, ChangeState::Deleted); - assert_eq!(by("conflict.txt").state, ChangeState::Conflicted); - assert!(s.has_conflicts()); - } - - #[test] - fn rename_keeps_the_destination_path() { - let s = parse_status("## main\nR old.txt -> new.txt\n"); - assert_eq!(s.files[0].path, "new.txt"); - assert_eq!(s.files[0].state, ChangeState::Renamed); - } + resolve() + .map(|(a, b)| (a as u32, b as u32)) + .unwrap_or((0, 0)) } diff --git a/crates/op-git/src/tests_merge.rs b/crates/op-git/src/tests_merge.rs index f0f0a347..a64a2c2b 100644 --- a/crates/op-git/src/tests_merge.rs +++ b/crates/op-git/src/tests_merge.rs @@ -42,6 +42,145 @@ fn pull_classifies_up_to_date_then_fast_forward() { } } +#[test] +fn pull_refuses_to_fast_forward_over_a_dirty_tree() { + if !git_available() { + return; + } + let remote = unique_temp_dir("ff-dirty-remote"); + Command::new("git") + .args(["init", "--bare", "--initial-branch=main"]) + .arg(&remote) + .output() + .expect("init bare remote"); + + let (a_dir, a) = clone_for_test(&remote, "ff-dirty-a"); + std::fs::write(a_dir.join("a.op"), "1").unwrap(); + a.stage_all().unwrap(); + a.commit("init").unwrap(); + a.run(&["push", "-u", "origin", "main"]).unwrap(); + + let (b_dir, b) = clone_for_test(&remote, "ff-dirty-b"); + + // A publishes a new commit — B *could* fast-forward. + std::fs::write(a_dir.join("a.op"), "2").unwrap(); + a.stage_all().unwrap(); + a.commit("update").unwrap(); + a.push().unwrap(); + + // B has an UNCOMMITTED edit to a tracked file. A fast-forward would + // force-overwrite it, so the pull must refuse (WorkingTreeDirty) + // rather than silently discard the local work — the data-loss + // regression the libgit2 migration introduced + this guard fixes. + std::fs::write(b_dir.join("a.op"), "local-uncommitted").unwrap(); + assert!( + matches!(b.pull(), Err(GitError::WorkingTreeDirty)), + "a fast-forward over a dirty tree must be refused, not forced" + ); + assert_eq!( + std::fs::read_to_string(b_dir.join("a.op")).unwrap(), + "local-uncommitted", + "the local uncommitted edit must survive the refused pull" + ); + + for dir in [remote, a_dir, b_dir] { + let _ = std::fs::remove_dir_all(dir); + } +} + +#[test] +fn pull_refuses_to_fast_forward_over_a_colliding_untracked_file() { + if !git_available() { + return; + } + let remote = unique_temp_dir("ff-untracked-remote"); + Command::new("git") + .args(["init", "--bare", "--initial-branch=main"]) + .arg(&remote) + .output() + .expect("init bare remote"); + + let (a_dir, a) = clone_for_test(&remote, "ff-untracked-a"); + std::fs::write(a_dir.join("a.op"), "1").unwrap(); + a.stage_all().unwrap(); + a.commit("init").unwrap(); + a.run(&["push", "-u", "origin", "main"]).unwrap(); + + let (b_dir, b) = clone_for_test(&remote, "ff-untracked-b"); + + // A adds a NEW tracked file the fast-forward would bring down to B. + std::fs::write(a_dir.join("new.op"), "from-remote").unwrap(); + a.stage_all().unwrap(); + a.commit("add new.op").unwrap(); + a.push().unwrap(); + + // B has an UNTRACKED file at that same path. A forced fast-forward + // would clobber it, so the pull must refuse — the untracked-overwrite + // data-loss case (git's "untracked working tree files would be + // overwritten by merge"). + std::fs::write(b_dir.join("new.op"), "local-untracked").unwrap(); + assert!( + matches!(b.pull(), Err(GitError::WorkingTreeDirty)), + "a fast-forward that would overwrite an untracked file must be refused" + ); + assert_eq!( + std::fs::read_to_string(b_dir.join("new.op")).unwrap(), + "local-untracked", + "the untracked file must survive the refused pull" + ); + + for dir in [remote, a_dir, b_dir] { + let _ = std::fs::remove_dir_all(dir); + } +} + +#[test] +fn pull_refuses_to_fast_forward_over_an_untracked_dir_replaced_by_a_file() { + if !git_available() { + return; + } + let remote = unique_temp_dir("ff-dir-remote"); + Command::new("git") + .args(["init", "--bare", "--initial-branch=main"]) + .arg(&remote) + .output() + .expect("init bare remote"); + + let (a_dir, a) = clone_for_test(&remote, "ff-dir-a"); + std::fs::write(a_dir.join("a.op"), "1").unwrap(); + a.stage_all().unwrap(); + a.commit("init").unwrap(); + a.run(&["push", "-u", "origin", "main"]).unwrap(); + + let (b_dir, b) = clone_for_test(&remote, "ff-dir-b"); + + // A adds a tracked FILE named `data` the fast-forward would bring down. + std::fs::write(a_dir.join("data"), "from-remote").unwrap(); + a.stage_all().unwrap(); + a.commit("add data file").unwrap(); + a.push().unwrap(); + + // B has an untracked DIRECTORY at that path holding a local file. A + // forced fast-forward would replace the directory with the incoming + // file, destroying the local work — the file↔directory collision the + // exact-path check missed; the pull must refuse. + std::fs::create_dir(b_dir.join("data")).unwrap(); + std::fs::write(b_dir.join("data").join("local.op"), "local-work").unwrap(); + assert!( + matches!(b.pull(), Err(GitError::WorkingTreeDirty)), + "a fast-forward that replaces an untracked directory with a file must be refused" + ); + assert_eq!( + std::fs::read_to_string(b_dir.join("data").join("local.op")).unwrap(), + "local-work", + "the untracked directory's file must survive the refused pull" + ); + + for dir in [remote, a_dir, b_dir] { + let _ = std::fs::remove_dir_all(dir); + } +} + #[test] fn pull_classifies_a_divergent_merge() { if !git_available() { diff --git a/crates/op-git/src/worktree.rs b/crates/op-git/src/worktree.rs index 8e3c5e69..fba9bfcf 100644 --- a/crates/op-git/src/worktree.rs +++ b/crates/op-git/src/worktree.rs @@ -12,6 +12,15 @@ //! conflicting one is reported as a [`crate::ConflictBag`] without //! ever marking up the user's files. The worktree directory and its //! git registration are removed when the handle drops. +//! +//! The implementation drives libgit2 in-process (`git2`) — no system +//! `git` subprocess. A linked worktree is created with +//! [`git2::Repository::worktree`], detached onto the requested commit +//! by opening the worktree repo and `set_head_detached` + a forced +//! `checkout_head`, and deregistered on drop via +//! [`git2::Worktree::prune`] with `working_tree` enabled so libgit2 +//! removes both the `.git/worktrees/` admin files and the +//! worktree directory itself. use std::path::PathBuf; @@ -26,20 +35,83 @@ pub(crate) struct MergeWorktree { main: GitRepo, /// The worktree directory. dir: PathBuf, + /// libgit2's registration name for this linked worktree (the + /// directory basename). Used to look the worktree up again on + /// drop so it can be pruned. + name: String, } impl MergeWorktree { /// Add a detached worktree at `dir` checked out at `commit`. - /// `dir` must not already exist — `git` creates it. + /// `dir` must not already exist — libgit2 creates it. pub(crate) fn create( main: &GitRepo, dir: PathBuf, commit: &str, ) -> Result { - let dir_str = dir - .to_str() - .ok_or_else(|| GitError::Io("worktree path is not valid UTF-8".to_string()))?; - main.run(&["worktree", "add", "--detach", dir_str, commit])?; + // The worktree registration name. libgit2 keys its admin + // files under `.git/worktrees/`; deriving it from the + // directory basename keeps it unique (the dir is itself a + // unique temp path) and lets `Drop` find the worktree again. + let name = dir + .file_name() + .and_then(|n| n.to_str()) + .ok_or_else(|| GitError::Io("worktree path has no valid basename".to_string()))? + .to_string(); + + let main_repo = main.open()?; + + // Resolve the committish (a full HEAD sha in practice, but + // accept any revspec) to the commit it names. The detached + // HEAD is pinned onto exactly this commit below. + let commit_oid = main_repo + .revparse_single(commit) + .map_err(|e| GitError::Command { + operation: "worktree add".to_string(), + stderr: e.message().to_string(), + })? + .peel_to_commit() + .map_err(|e| GitError::Command { + operation: "worktree add".to_string(), + stderr: e.message().to_string(), + })? + .id(); + + // Create the linked worktree. With no `reference` set in the + // options libgit2 checks it out at the main repo's HEAD; we + // re-point it onto `commit` (detached) immediately after. + let opts = git2::WorktreeAddOptions::new(); + let worktree = + main_repo + .worktree(&name, &dir, Some(&opts)) + .map_err(|e| GitError::Command { + operation: "worktree add".to_string(), + stderr: e.message().to_string(), + })?; + + // Detach the new worktree's HEAD onto the requested commit and + // force the working tree to match — the libgit2 equivalent of + // `git worktree add --detach `. + let wt_repo = + git2::Repository::open_from_worktree(&worktree).map_err(|e| GitError::Command { + operation: "worktree add".to_string(), + stderr: e.message().to_string(), + })?; + wt_repo + .set_head_detached(commit_oid) + .map_err(|e| GitError::Command { + operation: "worktree add".to_string(), + stderr: e.message().to_string(), + })?; + let mut checkout = git2::build::CheckoutBuilder::new(); + checkout.force(); + wt_repo + .checkout_head(Some(&mut checkout)) + .map_err(|e| GitError::Command { + operation: "worktree add".to_string(), + stderr: e.message().to_string(), + })?; + Ok(MergeWorktree { repo: GitRepo { workdir: dir.clone(), @@ -47,6 +119,7 @@ impl MergeWorktree { }, main: main.clone(), dir, + name, }) } @@ -59,15 +132,33 @@ impl MergeWorktree { impl Drop for MergeWorktree { fn drop(&mut self) { - // Deregister the worktree with git first so its admin files - // under `.git/worktrees/` are cleaned up. - if let Some(dir_str) = self.dir.to_str() { - let _ = self.main.run(&["worktree", "remove", "--force", dir_str]); + // Deregister the worktree with libgit2 first so its admin + // files under `.git/worktrees/` are cleaned up. `prune` with + // `working_tree` also recursively removes the worktree + // directory itself. + if let Ok(main_repo) = self.main.open() { + if let Ok(worktree) = main_repo.find_worktree(&self.name) { + let mut opts = git2::WorktreePruneOptions::new(); + // Prune even though the worktree is still valid (it + // exists on disk) and recursively remove its working + // directory — this is a throwaway tree by design. + opts.valid(true).working_tree(true); + let _ = worktree.prune(Some(&mut opts)); + } } - // Belt-and-suspenders: if `git worktree remove` failed (e.g. - // a half-created worktree), still drop the directory and - // prune the now-dangling registration. + // Belt-and-suspenders: if the prune failed or only removed the + // admin files (e.g. a half-created worktree, or a libgit2 + // build that left the directory behind), still drop the + // directory so no stale temp tree lingers. let _ = std::fs::remove_dir_all(&self.dir); - let _ = self.main.run(&["worktree", "prune"]); + // Final sweep: drop any now-dangling registration whose + // working tree no longer exists on disk. + if let Ok(main_repo) = self.main.open() { + if let Ok(worktree) = main_repo.find_worktree(&self.name) { + if worktree.is_prunable(None).unwrap_or(false) { + let _ = worktree.prune(None); + } + } + } } }