From cd607431e155d5d1267604dfdb4a798093c3a6d8 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 1 Jan 2026 00:19:23 +0000 Subject: [PATCH 001/784] t7527: fix flaky fsmonitor event tests with retry logic The fsmonitor event tests (edit, create, delete, rename, etc.) were flaky because there can be a race between the daemon writing events to the trace file and the test's grep commands checking for them. Add a retry_grep() helper function (similar to retry_until_success in lib-git-p4.sh) that retries grep with a timeout, and use it in all event-checking tests to wait for one expected event before checking the rest. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- t/t7527-builtin-fsmonitor.sh | 79 +++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh index 409cd0cd121695..e7b40654694c9e 100755 --- a/t/t7527-builtin-fsmonitor.sh +++ b/t/t7527-builtin-fsmonitor.sh @@ -408,9 +408,8 @@ move_directory() { # ensure we are getting the OS notifications and do not try to confirm what # is reported by `git status`. # -# We run a simple query after modifying the filesystem just to introduce -# a bit of a delay so that the trace logging from the daemon has time to -# get flushed to disk. +# We use retry_grep to handle races between the daemon writing events +# to the trace file and our check. # # We `reset` and `clean` at the bottom of each test (and before stopping the # daemon) because these commands might implicitly restart the daemon. @@ -422,6 +421,24 @@ clean_up_repo_and_stop_daemon () { rm -f .git/trace } +# Retry a grep up to RETRY_TIMEOUT times until it succeeds. +# +RETRY_TIMEOUT=5 + +retry_grep () { + nr_tries_left=$RETRY_TIMEOUT + until grep "$1" "$2" 2>/dev/null + do + if test $nr_tries_left -eq 0 + then + grep "$1" "$2" + return + fi + nr_tries_left=$(($nr_tries_left - 1)) + sleep 1 + done +} + test_expect_success 'edit some files' ' test_when_finished clean_up_repo_and_stop_daemon && @@ -429,12 +446,10 @@ test_expect_success 'edit some files' ' edit_files && - test-tool fsmonitor-client query --token 0 && - - grep "^event: dir1/modified$" .git/trace && - grep "^event: dir2/modified$" .git/trace && - grep "^event: modified$" .git/trace && - grep "^event: dir1/untracked$" .git/trace + retry_grep "^event: dir1/modified$" .git/trace && + retry_grep "^event: dir2/modified$" .git/trace && + retry_grep "^event: modified$" .git/trace && + retry_grep "^event: dir1/untracked$" .git/trace ' test_expect_success 'create some files' ' @@ -444,11 +459,9 @@ test_expect_success 'create some files' ' create_files && - test-tool fsmonitor-client query --token 0 && - - grep "^event: dir1/new$" .git/trace && - grep "^event: dir2/new$" .git/trace && - grep "^event: new$" .git/trace + retry_grep "^event: dir1/new$" .git/trace && + retry_grep "^event: dir2/new$" .git/trace && + retry_grep "^event: new$" .git/trace ' test_expect_success 'delete some files' ' @@ -458,11 +471,9 @@ test_expect_success 'delete some files' ' delete_files && - test-tool fsmonitor-client query --token 0 && - - grep "^event: dir1/delete$" .git/trace && - grep "^event: dir2/delete$" .git/trace && - grep "^event: delete$" .git/trace + retry_grep "^event: dir1/delete$" .git/trace && + retry_grep "^event: dir2/delete$" .git/trace && + retry_grep "^event: delete$" .git/trace ' test_expect_success 'rename some files' ' @@ -472,14 +483,12 @@ test_expect_success 'rename some files' ' rename_files && - test-tool fsmonitor-client query --token 0 && - - grep "^event: dir1/rename$" .git/trace && - grep "^event: dir2/rename$" .git/trace && - grep "^event: rename$" .git/trace && - grep "^event: dir1/renamed$" .git/trace && - grep "^event: dir2/renamed$" .git/trace && - grep "^event: renamed$" .git/trace + retry_grep "^event: dir1/rename$" .git/trace && + retry_grep "^event: dir2/rename$" .git/trace && + retry_grep "^event: rename$" .git/trace && + retry_grep "^event: dir1/renamed$" .git/trace && + retry_grep "^event: dir2/renamed$" .git/trace && + retry_grep "^event: renamed$" .git/trace ' test_expect_success 'rename directory' ' @@ -489,10 +498,8 @@ test_expect_success 'rename directory' ' mv dirtorename dirrenamed && - test-tool fsmonitor-client query --token 0 && - - grep "^event: dirtorename/*$" .git/trace && - grep "^event: dirrenamed/*$" .git/trace + retry_grep "^event: dirtorename/*$" .git/trace && + retry_grep "^event: dirrenamed/*$" .git/trace ' test_expect_success 'file changes to directory' ' @@ -502,10 +509,8 @@ test_expect_success 'file changes to directory' ' file_to_directory && - test-tool fsmonitor-client query --token 0 && - - grep "^event: delete$" .git/trace && - grep "^event: delete/new$" .git/trace + retry_grep "^event: delete$" .git/trace && + retry_grep "^event: delete/new$" .git/trace ' test_expect_success 'directory changes to a file' ' @@ -515,9 +520,7 @@ test_expect_success 'directory changes to a file' ' directory_to_file && - test-tool fsmonitor-client query --token 0 && - - grep "^event: dir1$" .git/trace + retry_grep "^event: dir1$" .git/trace ' # The next few test cases exercise the token-resync code. When filesystem From 5913fd26aad32bd028a8fe4e5b80fccc28e118af Mon Sep 17 00:00:00 2001 From: Ashlesh Gawande Date: Wed, 7 Jan 2026 13:17:24 +0530 Subject: [PATCH 002/784] t5550: add netrc tests for http 401/403 git allows using .netrc file to supply credentials for HTTP auth. Three test cases are added in this patch to provide missing coverage when cloning over HTTP using .netrc file: - First test case checks that the git clone is successful when credentials are provided via .netrc file - Second test case checks that the git clone fails when the .netrc file provides invalid credentials. The HTTP server is expected to return 401 Unauthorized in such a case. The test checks that the user is provided with a prompt for username/password on 401 to provide the valid ones. - Third test case checks that the git clone fails when the .netrc file provides credentials that are valid but do not have permission for this user. For example one may have multiple tokens in GitHub and uses the one which was not authorized for cloning this repo. In such a case the HTTP server returns 403 Forbidden. For this test, the apache.conf is modified to return a 403 on finding a forbidden-user. No prompt for username/password is expected after the 403 (unlike 401). This is because prompting may wipe out existing credentials or conflict with custom credential helpers. Signed-off-by: Ashlesh Gawande Signed-off-by: Junio C Hamano --- t/lib-httpd.sh | 13 +++++++++++-- t/lib-httpd/apache.conf | 4 ++++ t/lib-httpd/passwd | 1 + t/t5550-http-fetch-dumb.sh | 25 +++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh index 5091db949b7f99..5f42c311c2f2f5 100644 --- a/t/lib-httpd.sh +++ b/t/lib-httpd.sh @@ -319,13 +319,22 @@ setup_askpass_helper() { ' } -set_askpass() { +set_askpass () { >"$TRASH_DIRECTORY/askpass-query" && echo "$1" >"$TRASH_DIRECTORY/askpass-user" && echo "$2" >"$TRASH_DIRECTORY/askpass-pass" } -expect_askpass() { +set_netrc () { + # $HOME=$TRASH_DIRECTORY + echo "machine $1 login $2 password $3" >"$TRASH_DIRECTORY/.netrc" +} + +clear_netrc () { + rm -f "$TRASH_DIRECTORY/.netrc" +} + +expect_askpass () { dest=$HTTPD_DEST${3+/$3} { diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf index e631ab0eb5ef05..6b8c50a51a3b72 100644 --- a/t/lib-httpd/apache.conf +++ b/t/lib-httpd/apache.conf @@ -238,6 +238,10 @@ SSLEngine On AuthName "git-auth" AuthUserFile passwd Require valid-user + + # return 403 for authenticated user: forbidden-user@host + RewriteCond "%{REMOTE_USER}" "^forbidden-user@host" + RewriteRule ^ - [F] diff --git a/t/lib-httpd/passwd b/t/lib-httpd/passwd index d9c122f3482891..3bab7b64236b11 100644 --- a/t/lib-httpd/passwd +++ b/t/lib-httpd/passwd @@ -1 +1,2 @@ user@host:$apr1$LGPmCZWj$9vxEwj5Z5GzQLBMxp3mCx1 +forbidden-user@host:$apr1$LGPmCZWj$9vxEwj5Z5GzQLBMxp3mCx1 diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index ed0ad66fade32b..9530f01b9e3d13 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -102,6 +102,31 @@ test_expect_success 'cloning password-protected repository can fail' ' expect_askpass both wrong ' +test_expect_success 'using credentials from netrc to clone successfully' ' + test_when_finished clear_netrc && + set_askpass wrong && + set_netrc 127.0.0.1 user@host pass@host && + git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-netrc && + expect_askpass none +' + +test_expect_success 'netrc unauthorized credentials (prompt after 401)' ' + test_when_finished clear_netrc && + set_askpass wrong && + set_netrc 127.0.0.1 user@host pass@wrong && + test_must_fail git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-netrc-401 && + expect_askpass both wrong +' + +test_expect_success 'netrc authorized but forbidden credentials (fail on 403)' ' + test_when_finished clear_netrc && + set_askpass wrong && + set_netrc 127.0.0.1 forbidden-user@host pass@host && + test_must_fail git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-netrc-403 2>err && + expect_askpass none && + grep "The requested URL returned error: 403" err +' + test_expect_success 'http auth can use user/pass in URL' ' set_askpass wrong && git clone "$HTTPD_URL_USER_PASS/auth/dumb/repo.git" clone-auth-none && From 8cafc305e22a59efb92472d4132616e24d3184c6 Mon Sep 17 00:00:00 2001 From: Abraham Samuel Adekunle Date: Thu, 8 Jan 2026 16:07:04 +0100 Subject: [PATCH 003/784] add -p: show user's hunk decision when selecting hunks When a user is interactively deciding which hunks to use or skip for staging, unstaging, stashing etc, there is no way to know the decision previously chosen for a hunk when navigating through the previous and next hunks using K/J respectively. Improve the UI to explicitly show if a user has previously decided to use a hunk (by pressing 'y') or skip the hunk (by pressing 'n'). This will improve clarity when and aid the navigation process for the user. Reported-by: Junio C Hamano Signed-off-by: Abraham Samuel Adekunle Signed-off-by: Junio C Hamano --- add-patch.c | 81 +++++++++++++++++++++----------------- t/t3701-add-interactive.sh | 18 ++++----- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/add-patch.c b/add-patch.c index 173a53241ebf07..df8f2e6d74e135 100644 --- a/add-patch.c +++ b/add-patch.c @@ -42,10 +42,10 @@ static struct patch_mode patch_mode_add = { .apply_args = { "--cached", NULL }, .apply_check_args = { "--cached", NULL }, .prompt_mode = { - N_("Stage mode change [y,n,q,a,d%s,?]? "), - N_("Stage deletion [y,n,q,a,d%s,?]? "), - N_("Stage addition [y,n,q,a,d%s,?]? "), - N_("Stage this hunk [y,n,q,a,d%s,?]? ") + N_("Stage mode change%s [y,n,q,a,d%s,?]? "), + N_("Stage deletion%s [y,n,q,a,d%s,?]? "), + N_("Stage addition%s [y,n,q,a,d%s,?]? "), + N_("Stage this hunk%s [y,n,q,a,d%s,?]? ") }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for staging."), @@ -64,10 +64,10 @@ static struct patch_mode patch_mode_stash = { .apply_args = { "--cached", NULL }, .apply_check_args = { "--cached", NULL }, .prompt_mode = { - N_("Stash mode change [y,n,q,a,d%s,?]? "), - N_("Stash deletion [y,n,q,a,d%s,?]? "), - N_("Stash addition [y,n,q,a,d%s,?]? "), - N_("Stash this hunk [y,n,q,a,d%s,?]? "), + N_("Stash mode change%s [y,n,q,a,d%s,?]? "), + N_("Stash deletion%s [y,n,q,a,d%s,?]? "), + N_("Stash addition%s [y,n,q,a,d%s,?]? "), + N_("Stash this hunk%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for stashing."), @@ -88,10 +88,10 @@ static struct patch_mode patch_mode_reset_head = { .is_reverse = 1, .index_only = 1, .prompt_mode = { - N_("Unstage mode change [y,n,q,a,d%s,?]? "), - N_("Unstage deletion [y,n,q,a,d%s,?]? "), - N_("Unstage addition [y,n,q,a,d%s,?]? "), - N_("Unstage this hunk [y,n,q,a,d%s,?]? "), + N_("Unstage mode change%s [y,n,q,a,d%s,?]? "), + N_("Unstage deletion%s [y,n,q,a,d%s,?]? "), + N_("Unstage addition%s [y,n,q,a,d%s,?]? "), + N_("Unstage this hunk%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for unstaging."), @@ -111,10 +111,10 @@ static struct patch_mode patch_mode_reset_nothead = { .apply_check_args = { "--cached", NULL }, .index_only = 1, .prompt_mode = { - N_("Apply mode change to index [y,n,q,a,d%s,?]? "), - N_("Apply deletion to index [y,n,q,a,d%s,?]? "), - N_("Apply addition to index [y,n,q,a,d%s,?]? "), - N_("Apply this hunk to index [y,n,q,a,d%s,?]? "), + N_("Apply mode change to index%s [y,n,q,a,d%s,?]? "), + N_("Apply deletion to index%s [y,n,q,a,d%s,?]? "), + N_("Apply addition to index%s [y,n,q,a,d%s,?]? "), + N_("Apply this hunk to index%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for applying."), @@ -134,10 +134,10 @@ static struct patch_mode patch_mode_checkout_index = { .apply_check_args = { "-R", NULL }, .is_reverse = 1, .prompt_mode = { - N_("Discard mode change from worktree [y,n,q,a,d%s,?]? "), - N_("Discard deletion from worktree [y,n,q,a,d%s,?]? "), - N_("Discard addition from worktree [y,n,q,a,d%s,?]? "), - N_("Discard this hunk from worktree [y,n,q,a,d%s,?]? "), + N_("Discard mode change from worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard deletion from worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard addition from worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard this hunk from worktree%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for discarding."), @@ -157,10 +157,10 @@ static struct patch_mode patch_mode_checkout_head = { .apply_check_args = { "-R", NULL }, .is_reverse = 1, .prompt_mode = { - N_("Discard mode change from index and worktree [y,n,q,a,d%s,?]? "), - N_("Discard deletion from index and worktree [y,n,q,a,d%s,?]? "), - N_("Discard addition from index and worktree [y,n,q,a,d%s,?]? "), - N_("Discard this hunk from index and worktree [y,n,q,a,d%s,?]? "), + N_("Discard mode change from index and worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard deletion from index and worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard addition from index and worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard this hunk from index and worktree%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for discarding."), @@ -179,10 +179,10 @@ static struct patch_mode patch_mode_checkout_nothead = { .apply_for_checkout = 1, .apply_check_args = { NULL }, .prompt_mode = { - N_("Apply mode change to index and worktree [y,n,q,a,d%s,?]? "), - N_("Apply deletion to index and worktree [y,n,q,a,d%s,?]? "), - N_("Apply addition to index and worktree [y,n,q,a,d%s,?]? "), - N_("Apply this hunk to index and worktree [y,n,q,a,d%s,?]? "), + N_("Apply mode change to index and worktree%s [y,n,q,a,d%s,?]? "), + N_("Apply deletion to index and worktree%s [y,n,q,a,d%s,?]? "), + N_("Apply addition to index and worktree%s [y,n,q,a,d%s,?]? "), + N_("Apply this hunk to index and worktree%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for applying."), @@ -202,10 +202,10 @@ static struct patch_mode patch_mode_worktree_head = { .apply_check_args = { "-R", NULL }, .is_reverse = 1, .prompt_mode = { - N_("Discard mode change from worktree [y,n,q,a,d%s,?]? "), - N_("Discard deletion from worktree [y,n,q,a,d%s,?]? "), - N_("Discard addition from worktree [y,n,q,a,d%s,?]? "), - N_("Discard this hunk from worktree [y,n,q,a,d%s,?]? "), + N_("Discard mode change from worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard deletion from worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard addition from worktree%s [y,n,q,a,d%s,?]? "), + N_("Discard this hunk from worktree%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for discarding."), @@ -224,10 +224,10 @@ static struct patch_mode patch_mode_worktree_nothead = { .apply_args = { NULL }, .apply_check_args = { NULL }, .prompt_mode = { - N_("Apply mode change to worktree [y,n,q,a,d%s,?]? "), - N_("Apply deletion to worktree [y,n,q,a,d%s,?]? "), - N_("Apply addition to worktree [y,n,q,a,d%s,?]? "), - N_("Apply this hunk to worktree [y,n,q,a,d%s,?]? "), + N_("Apply mode change to worktree%s [y,n,q,a,d%s,?]? "), + N_("Apply deletion to worktree%s [y,n,q,a,d%s,?]? "), + N_("Apply addition to worktree%s [y,n,q,a,d%s,?]? "), + N_("Apply this hunk to worktree%s [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " "will immediately be marked for applying."), @@ -1460,6 +1460,7 @@ static int patch_update_file(struct add_p_state *s, render_diff_header(s, file_diff, colored, &s->buf); fputs(s->buf.buf, stdout); for (;;) { + const char *hunk_use_decision = ""; enum { ALLOW_GOTO_PREVIOUS_HUNK = 1 << 0, ALLOW_GOTO_PREVIOUS_UNDECIDED_HUNK = 1 << 1, @@ -1564,8 +1565,14 @@ static int patch_update_file(struct add_p_state *s, (uintmax_t)(file_diff->hunk_nr ? file_diff->hunk_nr : 1)); + if (hunk->use != UNDECIDED_HUNK) { + if (hunk->use == USE_HUNK) + hunk_use_decision = _(" (was: y)"); + else + hunk_use_decision = _(" (was: n)"); + } printf(_(s->mode->prompt_mode[prompt_mode_type]), - s->buf.buf); + hunk_use_decision, s->buf.buf); if (*s->s.reset_color_interactive) fputs(s->s.reset_color_interactive, stdout); fflush(stdout); diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index 4285314f35f8f2..5ce9c6dd60e9b0 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -527,7 +527,7 @@ test_expect_success 'goto hunk 1 with "g 1"' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ EOF test_write_lines s y g 1 | git add -p >actual && tail -n 7 actual.trimmed && @@ -540,7 +540,7 @@ test_expect_success 'goto hunk 1 with "g1"' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ EOF test_write_lines s y g1 | git add -p >actual && tail -n 4 actual.trimmed && @@ -554,7 +554,7 @@ test_expect_success 'navigate to hunk via regex /pattern' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ EOF test_write_lines s y /1,2 | git add -p >actual && tail -n 5 actual.trimmed && @@ -567,7 +567,7 @@ test_expect_success 'navigate to hunk via regex / pattern' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ EOF test_write_lines s y / 1,2 | git add -p >actual && tail -n 4 actual.trimmed && @@ -579,11 +579,11 @@ test_expect_success 'print again the hunk' ' tr _ " " >expect <<-EOF && +15 20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? @@ -1,2 +1,3 @@ + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? @@ -1,2 +1,3 @@ 10 +15 20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]?_ EOF test_write_lines s y g 1 p | git add -p >actual && tail -n 7 actual.trimmed && @@ -595,11 +595,11 @@ test_expect_success TTY 'print again the hunk (PAGER)' ' cat >expect <<-EOF && +15 20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? PAGER @@ -1,2 +1,3 @@ + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? PAGER @@ -1,2 +1,3 @@ PAGER 10 PAGER +15 PAGER 20 - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? EOF test_write_lines s y g 1 P | ( @@ -810,7 +810,7 @@ test_expect_success 'colors can be overridden' ' -old +new more-context - (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? + (1/2) Stage this hunk (was: y) [y,n,q,a,d,k,K,j,J,g,/,e,p,P,?]? EOF test_cmp expect actual ' From b60f7d890dee571069f9c3c6d44ecaed5c34fa39 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:22 +0200 Subject: [PATCH 004/784] submodule--helper: use submodule_name_to_gitdir in add_submodule While testing submodule gitdir path encoding, I noticed submodule--helper is still using a hardcoded modules gitdir path leading to test failures. Call the submodule_name_to_gitdir() helper instead, which was invented exactly for this purpose and is already used by all the other locations which work on gitdirs. Also narrow the scope of the submod_gitdir_path variable which is not used anymore in the updated "else" branch. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fcd73abe5336a9..2873b2780ef941 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -3187,13 +3187,13 @@ static void append_fetch_remotes(struct strbuf *msg, const char *git_dir_path) static int add_submodule(const struct add_data *add_data) { - char *submod_gitdir_path; struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT; struct string_list reference = STRING_LIST_INIT_NODUP; int ret = -1; /* perhaps the path already exists and is already a git repo, else clone it */ if (is_directory(add_data->sm_path)) { + char *submod_gitdir_path; struct strbuf sm_path = STRBUF_INIT; strbuf_addstr(&sm_path, add_data->sm_path); submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path); @@ -3207,10 +3207,11 @@ static int add_submodule(const struct add_data *add_data) free(submod_gitdir_path); } else { struct child_process cp = CHILD_PROCESS_INIT; + struct strbuf submod_gitdir = STRBUF_INIT; - submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name); + submodule_name_to_gitdir(&submod_gitdir, the_repository, add_data->sm_name); - if (is_directory(submod_gitdir_path)) { + if (is_directory(submod_gitdir.buf)) { if (!add_data->force) { struct strbuf msg = STRBUF_INIT; char *die_msg; @@ -3219,8 +3220,8 @@ static int add_submodule(const struct add_data *add_data) "locally with remote(s):\n"), add_data->sm_name); - append_fetch_remotes(&msg, submod_gitdir_path); - free(submod_gitdir_path); + append_fetch_remotes(&msg, submod_gitdir.buf); + strbuf_release(&submod_gitdir); strbuf_addf(&msg, _("If you want to reuse this local git " "directory instead of cloning again from\n" @@ -3238,7 +3239,7 @@ static int add_submodule(const struct add_data *add_data) "submodule '%s'\n"), add_data->sm_name); } } - free(submod_gitdir_path); + strbuf_release(&submod_gitdir); clone_data.prefix = add_data->prefix; clone_data.path = add_data->sm_path; From 05a1cdb5255b2b438e66bdaa91b7b2ce75fbe71b Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:23 +0200 Subject: [PATCH 005/784] submodule: always validate gitdirs inside submodule_name_to_gitdir Move the ad-hoc validation checks sprinkled across the source tree, after calling submodule_name_to_gitdir() into the function proper, which now always validates the gitdir before returning it. This simplifies the API and helps to: 1. Avoid redundant validation calls after submodule_name_to_gitdir(). 2. Avoid the risk of callers forgetting to validate. 3. Ensure gitdir paths provided by users via configs are always valid (config gitdir paths are added in a subsequent commit). The validation function can still be called as many times as needed outside submodule_name_to_gitdir(), for example we keep two calls which are still required, to avoid parallel clone races by re-running the validation in builtin/submodule-helper.c. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 4 ---- submodule.c | 12 ++++-------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 2873b2780ef941..fc10ace5a847f1 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1703,10 +1703,6 @@ static int clone_submodule(const struct module_clone_data *clone_data, clone_data_path = to_free = xstrfmt("%s/%s", repo_get_work_tree(the_repository), clone_data->path); - if (validate_submodule_git_dir(sm_gitdir, clone_data->name) < 0) - die(_("refusing to create/use '%s' in another submodule's " - "git dir"), sm_gitdir); - if (!file_exists(sm_gitdir)) { if (clone_data->require_init && !stat(clone_data_path, &st) && !is_empty_dir(clone_data_path)) diff --git a/submodule.c b/submodule.c index 35c55155f7bf83..d937911fbcbd47 100644 --- a/submodule.c +++ b/submodule.c @@ -2172,11 +2172,6 @@ int submodule_move_head(const char *path, const char *super_prefix, struct strbuf gitdir = STRBUF_INIT; submodule_name_to_gitdir(&gitdir, the_repository, sub->name); - if (validate_submodule_git_dir(gitdir.buf, - sub->name) < 0) - die(_("refusing to create/use '%s' in another " - "submodule's git dir"), - gitdir.buf); connect_work_tree_and_git_dir(path, gitdir.buf, 0); strbuf_release(&gitdir); @@ -2355,9 +2350,6 @@ static void relocate_single_git_dir_into_superproject(const char *path, die(_("could not lookup name for submodule '%s'"), path); submodule_name_to_gitdir(&new_gitdir, the_repository, sub->name); - if (validate_submodule_git_dir(new_gitdir.buf, sub->name) < 0) - die(_("refusing to move '%s' into an existing git dir"), - real_old_git_dir); if (safe_create_leading_directories_const(the_repository, new_gitdir.buf) < 0) die(_("could not create directory '%s'"), new_gitdir.buf); real_new_git_dir = real_pathdup(new_gitdir.buf, 1); @@ -2606,4 +2598,8 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r, */ repo_git_path_append(r, buf, "modules/"); strbuf_addstr(buf, submodule_name); + + if (validate_submodule_git_dir(buf->buf, submodule_name) < 0) + die(_("refusing to create/use '%s' in another submodule's " + "git dir"), buf->buf); } From 34206caaf7c5059ac8480587e31cfc40473002b4 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:24 +0200 Subject: [PATCH 006/784] builtin/submodule--helper: add gitdir command This exposes the gitdir name computed by submodule_name_to_gitdir() internally, to make it easier for users and tests to interact with it. Next commit will add a gitdir configuration, so this helper can also be used to easily query that config or validate any gitdir path the user sets (submodule_name_to_git_dir now runs the validation logic, since our previous commit). Based-on-patch-by: Brandon Williams Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fc10ace5a847f1..7ea82d7fa243e4 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1208,6 +1208,22 @@ static int module_summary(int argc, const char **argv, const char *prefix, return ret; } +static int module_gitdir(int argc, const char **argv, const char *prefix UNUSED, + struct repository *repo) +{ + struct strbuf gitdir = STRBUF_INIT; + + if (argc != 2) + usage(_("git submodule--helper gitdir ")); + + submodule_name_to_gitdir(&gitdir, repo, argv[1]); + + printf("%s\n", gitdir.buf); + + strbuf_release(&gitdir); + return 0; +} + struct sync_cb { const char *prefix; const char *super_prefix; @@ -3587,6 +3603,7 @@ int cmd_submodule__helper(int argc, NULL }; struct option options[] = { + OPT_SUBCOMMAND("gitdir", &fn, module_gitdir), OPT_SUBCOMMAND("clone", &fn, module_clone), OPT_SUBCOMMAND("add", &fn, module_add), OPT_SUBCOMMAND("update", &fn, module_update), From 4173df5187c8ba8bc2cc1a215f25b284d70631da Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:25 +0200 Subject: [PATCH 007/784] submodule: introduce extensions.submodulePathConfig The idea of this extension is to abstract away the submodule gitdir path implementation: everyone is expected to use the config and not worry about how the path is computed internally, either in git or other implementations. With this extension enabled, the submodule..gitdir repo config becomes the single source of truth for all submodule gitdir paths. The submodule..gitdir config is added automatically for all new submodules when this extension is enabled. Git will throw an error if the extension is enabled and a config is missing, advising users how to migrate. Migration is manual for now. E.g. to add a missing config entry for an existing "foo" module: git config submodule.foo.gitdir .git/modules/foo Suggested-by: Junio C Hamano Suggested-by: Phillip Wood Suggested-by: Patrick Steinhardt Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/extensions.adoc | 23 +++ Documentation/config/submodule.adoc | 7 + builtin/submodule--helper.c | 54 ++++++- repository.c | 1 + repository.h | 1 + setup.c | 7 + setup.h | 1 + submodule.c | 61 ++++---- t/lib-verify-submodule-gitdir-path.sh | 24 ++++ t/meson.build | 1 + t/t7425-submodule-gitdir-path-extension.sh | 160 +++++++++++++++++++++ t/t9902-completion.sh | 1 + 12 files changed, 313 insertions(+), 28 deletions(-) create mode 100644 t/lib-verify-submodule-gitdir-path.sh create mode 100755 t/t7425-submodule-gitdir-path-extension.sh diff --git a/Documentation/config/extensions.adoc b/Documentation/config/extensions.adoc index 532456644b770e..f4f57c91141ccf 100644 --- a/Documentation/config/extensions.adoc +++ b/Documentation/config/extensions.adoc @@ -73,6 +73,29 @@ relativeWorktrees::: repaired with either the `--relative-paths` option or with the `worktree.useRelativePaths` config set to `true`. +submodulePathConfig::: + This extension is for the minority of users who: ++ +-- +* Encounter errors like `refusing to create ... in another submodule's git dir` + due to a number of reasons, like case-insensitive filesystem conflicts when + creating modules named `foo` and `Foo`. +* Require more flexible submodule layouts, for example due to nested names like + `foo`, `foo/bar` and `foo/baz` not supported by the default gitdir mechanism + which uses `.git/modules/` locations, causing further conflicts. +-- ++ +When `extensions.submodulePathConfig` is enabled, the `submodule..gitdir` +config becomes the single source of truth for all submodule gitdir paths and is +automatically set for all new submodules both during clone and init operations. ++ +Git will error out if a module does not have a corresponding +`submodule..gitdir` set. ++ +Existing (pre-extension) submodules need to be migrated by adding the missing +config entries. This is done manually for now, e.g. for each submodule: +`git config submodule..gitdir .git/modules/`. + worktreeConfig::: If enabled, then worktrees will load config settings from the `$GIT_DIR/config.worktree` file in addition to the diff --git a/Documentation/config/submodule.adoc b/Documentation/config/submodule.adoc index 0672d9911724d1..74f1659a91cfb8 100644 --- a/Documentation/config/submodule.adoc +++ b/Documentation/config/submodule.adoc @@ -52,6 +52,13 @@ submodule..active:: submodule.active config option. See linkgit:gitsubmodules[7] for details. +submodule..gitdir:: + This sets the gitdir path for submodule . This configuration is + respected when `extensions.submodulePathConfig` is enabled, otherwise it + has no effect. When enabled, this config becomes the single source of + truth for submodule gitdir paths and Git will error if it is missing. + See linkgit:git-config[1] for details. + submodule.active:: A repeated field which contains a pathspec used to match against a submodule's path to determine if the submodule is of interest to git diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 7ea82d7fa243e4..ef373525349c2a 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -435,6 +435,48 @@ struct init_cb { }; #define INIT_CB_INIT { 0 } +static int validate_and_set_submodule_gitdir(struct strbuf *gitdir_path, + const char *submodule_name) +{ + const char *value; + char *key; + + if (validate_submodule_git_dir(gitdir_path->buf, submodule_name)) + return -1; + + key = xstrfmt("submodule.%s.gitdir", submodule_name); + + /* Nothing to do if the config already exists. */ + if (!repo_config_get_string_tmp(the_repository, key, &value)) { + free(key); + return 0; + } + + if (repo_config_set_gently(the_repository, key, gitdir_path->buf)) { + free(key); + return -1; + } + + free(key); + return 0; +} + +static void create_default_gitdir_config(const char *submodule_name) +{ + struct strbuf gitdir_path = STRBUF_INIT; + + repo_git_path_append(the_repository, &gitdir_path, "modules/%s", submodule_name); + if (!validate_and_set_submodule_gitdir(&gitdir_path, submodule_name)) { + strbuf_release(&gitdir_path); + return; + } + + die(_("failed to set a valid default config for 'submodule.%s.gitdir'. " + "Please ensure it is set, for example by running something like: " + "'git config submodule.%s.gitdir .git/modules/%s'"), + submodule_name, submodule_name, submodule_name); +} + static void init_submodule(const char *path, const char *prefix, const char *super_prefix, unsigned int flags) @@ -511,6 +553,10 @@ static void init_submodule(const char *path, const char *prefix, if (repo_config_set_gently(the_repository, sb.buf, upd)) die(_("Failed to register update mode for submodule path '%s'"), displaypath); } + + if (the_repository->repository_format_submodule_path_cfg) + create_default_gitdir_config(sub->name); + strbuf_release(&sb); free(displaypath); free(url); @@ -1805,8 +1851,9 @@ static int clone_submodule(const struct module_clone_data *clone_data, char *head = xstrfmt("%s/HEAD", sm_gitdir); unlink(head); free(head); - die(_("refusing to create/use '%s' in another submodule's " - "git dir"), sm_gitdir); + die(_("refusing to create/use '%s' in another submodule's git dir. " + "Enabling extensions.submodulePathConfig should fix this."), + sm_gitdir); } connect_work_tree_and_git_dir(clone_data_path, sm_gitdir, 0); @@ -3578,6 +3625,9 @@ static int module_add(int argc, const char **argv, const char *prefix, add_data.progress = !!progress; add_data.dissociate = !!dissociate; + if (the_repository->repository_format_submodule_path_cfg) + create_default_gitdir_config(add_data.sm_name); + if (add_submodule(&add_data)) goto cleanup; configure_added_submodule(&add_data); diff --git a/repository.c b/repository.c index 6faf5c73981ebf..35a06e6719a7b5 100644 --- a/repository.c +++ b/repository.c @@ -288,6 +288,7 @@ int repo_init(struct repository *repo, repo->repository_format_worktree_config = format.worktree_config; repo->repository_format_relative_worktrees = format.relative_worktrees; repo->repository_format_precious_objects = format.precious_objects; + repo->repository_format_submodule_path_cfg = format.submodule_path_cfg; /* take ownership of format.partial_clone */ repo->repository_format_partial_clone = format.partial_clone; diff --git a/repository.h b/repository.h index 5808a5d610846a..aa907bd1e4d18c 100644 --- a/repository.h +++ b/repository.h @@ -158,6 +158,7 @@ struct repository { int repository_format_worktree_config; int repository_format_relative_worktrees; int repository_format_precious_objects; + int repository_format_submodule_path_cfg; /* Indicate if a repository has a different 'commondir' from 'gitdir' */ unsigned different_commondir:1; diff --git a/setup.c b/setup.c index 7086741e6c2d1f..207fa36e100b61 100644 --- a/setup.c +++ b/setup.c @@ -687,6 +687,9 @@ static enum extension_result handle_extension(const char *var, } else if (!strcmp(ext, "relativeworktrees")) { data->relative_worktrees = git_config_bool(var, value); return EXTENSION_OK; + } else if (!strcmp(ext, "submodulepathconfig")) { + data->submodule_path_cfg = git_config_bool(var, value); + return EXTENSION_OK; } return EXTENSION_UNKNOWN; } @@ -1865,6 +1868,8 @@ const char *setup_git_directory_gently(int *nongit_ok) repo_fmt.worktree_config; the_repository->repository_format_relative_worktrees = repo_fmt.relative_worktrees; + the_repository->repository_format_submodule_path_cfg = + repo_fmt.submodule_path_cfg; /* take ownership of repo_fmt.partial_clone */ the_repository->repository_format_partial_clone = repo_fmt.partial_clone; @@ -1963,6 +1968,8 @@ void check_repository_format(struct repository_format *fmt) fmt->ref_storage_format); the_repository->repository_format_worktree_config = fmt->worktree_config; + the_repository->repository_format_submodule_path_cfg = + fmt->submodule_path_cfg; the_repository->repository_format_relative_worktrees = fmt->relative_worktrees; the_repository->repository_format_partial_clone = diff --git a/setup.h b/setup.h index 8522fa8575da71..568bb9f1d12c39 100644 --- a/setup.h +++ b/setup.h @@ -130,6 +130,7 @@ struct repository_format { char *partial_clone; /* value of extensions.partialclone */ int worktree_config; int relative_worktrees; + int submodule_path_cfg; int is_bare; int hash_algo; int compat_hash_algo; diff --git a/submodule.c b/submodule.c index d937911fbcbd47..f7af389c79099e 100644 --- a/submodule.c +++ b/submodule.c @@ -31,6 +31,7 @@ #include "commit-reach.h" #include "read-cache-ll.h" #include "setup.h" +#include "advice.h" static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF; static int initialized_fetch_ref_tips; @@ -2164,8 +2165,9 @@ int submodule_move_head(const char *path, const char *super_prefix, if (validate_submodule_git_dir(git_dir, sub->name) < 0) die(_("refusing to create/use '%s' in " - "another submodule's git dir"), - git_dir); + "another submodule's git dir. " + "Enabling extensions.submodulePathConfig " + "should fix this."), git_dir); free(git_dir); } } else { @@ -2576,30 +2578,37 @@ int submodule_to_gitdir(struct repository *repo, void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r, const char *submodule_name) { - /* - * NEEDSWORK: The current way of mapping a submodule's name to - * its location in .git/modules/ has problems with some naming - * schemes. For example, if a submodule is named "foo" and - * another is named "foo/bar" (whether present in the same - * superproject commit or not - the problem will arise if both - * superproject commits have been checked out at any point in - * time), or if two submodule names only have different cases in - * a case-insensitive filesystem. - * - * There are several solutions, including encoding the path in - * some way, introducing a submodule..gitdir config in - * .git/config (not .gitmodules) that allows overriding what the - * gitdir of a submodule would be (and teach Git, upon noticing - * a clash, to automatically determine a non-clashing name and - * to write such a config), or introducing a - * submodule..gitdir config in .gitmodules that repo - * administrators can explicitly set. Nothing has been decided, - * so for now, just append the name at the end of the path. - */ - repo_git_path_append(r, buf, "modules/"); - strbuf_addstr(buf, submodule_name); + if (!r->repository_format_submodule_path_cfg) { + /* + * If extensions.submodulePathConfig is disabled, + * continue to use the plain path. + */ + repo_git_path_append(r, buf, "modules/%s", submodule_name); + } else { + const char *gitdir; + char *key; + int ret; - if (validate_submodule_git_dir(buf->buf, submodule_name) < 0) + /* Otherwise the extension is enabled, so use the gitdir config. */ + key = xstrfmt("submodule.%s.gitdir", submodule_name); + ret = repo_config_get_string_tmp(r, key, &gitdir); + FREE_AND_NULL(key); + + if (ret) + die(_("the 'submodule.%s.gitdir' config does not exist for module '%s'. " + "Please ensure it is set, for example by running something like: " + "'git config submodule.%s.gitdir .git/modules/%s'. For details " + "see the extensions.submodulePathConfig documentation."), + submodule_name, submodule_name, submodule_name, submodule_name); + + strbuf_addstr(buf, gitdir); + } + + /* validate because users might have modified the config */ + if (validate_submodule_git_dir(buf->buf, submodule_name)) { + advise(_("enabling extensions.submodulePathConfig might fix the " + "following error, if it's not already enabled.")); die(_("refusing to create/use '%s' in another submodule's " - "git dir"), buf->buf); + " git dir."), buf->buf); + } } diff --git a/t/lib-verify-submodule-gitdir-path.sh b/t/lib-verify-submodule-gitdir-path.sh new file mode 100644 index 00000000000000..4e0cfdc605bd56 --- /dev/null +++ b/t/lib-verify-submodule-gitdir-path.sh @@ -0,0 +1,24 @@ +# Helper to verify if repo $1 contains a submodule named $2 with gitdir path $3 + +# This does not check filesystem existence. That is done in submodule.c via the +# submodule_name_to_gitdir() API which this helper ends up calling. The gitdirs +# might or might not exist (e.g. when adding a new submodule), so this only +# checks the expected configuration path, which might be overridden by the user. + +verify_submodule_gitdir_path () { + repo="$1" && + name="$2" && + path="$3" && + ( + cd "$repo" && + # Compute expected absolute path + expected="$(git rev-parse --git-common-dir)/$path" && + expected="$(test-tool path-utils real_path "$expected")" && + # Compute actual absolute path + actual="$(git submodule--helper gitdir "$name")" && + actual="$(test-tool path-utils real_path "$actual")" && + echo "$expected" >expect && + echo "$actual" >actual && + test_cmp expect actual + ) +} diff --git a/t/meson.build b/t/meson.build index a5531df415ffe2..2c565beb8d22e8 100644 --- a/t/meson.build +++ b/t/meson.build @@ -884,6 +884,7 @@ integration_tests = [ 't7422-submodule-output.sh', 't7423-submodule-symlinks.sh', 't7424-submodule-mixed-ref-formats.sh', + 't7425-submodule-gitdir-path-extension.sh', 't7450-bad-git-dotfiles.sh', 't7500-commit-template-squash-signoff.sh', 't7501-commit-basic-functionality.sh', diff --git a/t/t7425-submodule-gitdir-path-extension.sh b/t/t7425-submodule-gitdir-path-extension.sh new file mode 100755 index 00000000000000..453183e27c95a2 --- /dev/null +++ b/t/t7425-submodule-gitdir-path-extension.sh @@ -0,0 +1,160 @@ +#!/bin/sh + +test_description='submodulePathConfig extension works as expected' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-verify-submodule-gitdir-path.sh + +test_expect_success 'setup: allow file protocol' ' + git config --global protocol.file.allow always +' + +test_expect_success 'create repo with mixed extension submodules' ' + git init -b main legacy-sub && + test_commit -C legacy-sub legacy-initial && + legacy_rev=$(git -C legacy-sub rev-parse HEAD) && + + git init -b main new-sub && + test_commit -C new-sub new-initial && + new_rev=$(git -C new-sub rev-parse HEAD) && + + git init -b main main && + ( + cd main && + git submodule add ../legacy-sub legacy && + test_commit legacy-sub && + + # trigger the "die_path_inside_submodule" check + test_must_fail git submodule add ../new-sub "legacy/nested" && + + git config core.repositoryformatversion 1 && + git config extensions.submodulePathConfig true && + + git submodule add ../new-sub "New Sub" && + test_commit new && + + # retrigger the "die_path_inside_submodule" check with encoding + test_must_fail git submodule add ../new-sub "New Sub/nested2" + ) +' + +test_expect_success 'verify new submodule gitdir config' ' + git -C main config submodule."New Sub".gitdir >actual && + echo ".git/modules/New Sub" >expect && + test_cmp expect actual && + verify_submodule_gitdir_path main "New Sub" "modules/New Sub" +' + +test_expect_success 'manual add and verify legacy submodule gitdir config' ' + # the legacy module should not contain a gitdir config, because it + # was added before the extension was enabled. Add and test it. + test_must_fail git -C main config submodule.legacy.gitdir && + git -C main config submodule.legacy.gitdir .git/modules/legacy && + git -C main config submodule.legacy.gitdir >actual && + echo ".git/modules/legacy" >expect && + test_cmp expect actual && + verify_submodule_gitdir_path main "legacy" "modules/legacy" +' + +test_expect_success 'gitdir config path is relative for both absolute and relative urls' ' + test_when_finished "rm -rf relative-cfg-path-test" && + git init -b main relative-cfg-path-test && + ( + cd relative-cfg-path-test && + git config core.repositoryformatversion 1 && + git config extensions.submodulePathConfig true && + + # Test with absolute URL + git submodule add "$TRASH_DIRECTORY/new-sub" sub-abs && + git config submodule.sub-abs.gitdir >actual && + echo ".git/modules/sub-abs" >expect && + test_cmp expect actual && + + # Test with relative URL + git submodule add ../new-sub sub-rel && + git config submodule.sub-rel.gitdir >actual && + echo ".git/modules/sub-rel" >expect && + test_cmp expect actual + ) +' + +test_expect_success 'clone from repo with both legacy and new-style submodules' ' + git clone --recurse-submodules main cloned-non-extension && + ( + cd cloned-non-extension && + + test_path_is_dir .git/modules/legacy && + test_path_is_dir .git/modules/"New Sub" && + + test_must_fail git config submodule.legacy.gitdir && + test_must_fail git config submodule."New Sub".gitdir && + + git submodule status >list && + test_grep "$legacy_rev legacy" list && + test_grep "$new_rev New Sub" list + ) && + + git clone -c extensions.submodulePathConfig=true --recurse-submodules main cloned-extension && + ( + cd cloned-extension && + + test_path_is_dir .git/modules/legacy && + test_path_is_dir ".git/modules/New Sub" && + + git config submodule.legacy.gitdir && + git config submodule."New Sub".gitdir && + + git submodule status >list && + test_grep "$legacy_rev legacy" list && + test_grep "$new_rev New Sub" list + ) +' + +test_expect_success 'commit and push changes to encoded submodules' ' + git -C legacy-sub config receive.denyCurrentBranch updateInstead && + git -C new-sub config receive.denyCurrentBranch updateInstead && + git -C main config receive.denyCurrentBranch updateInstead && + ( + cd cloned-extension && + + git -C legacy switch --track -C main origin/main && + test_commit -C legacy second-commit && + git -C legacy push && + + git -C "New Sub" switch --track -C main origin/main && + test_commit -C "New Sub" second-commit && + git -C "New Sub" push && + + # Stage and commit submodule changes in superproject + git switch --track -C main origin/main && + git add legacy "New Sub" && + git commit -m "update submodules" && + + # push superproject commit to main repo + git push + ) && + + # update expected legacy & new submodule checksums + legacy_rev=$(git -C legacy-sub rev-parse HEAD) && + new_rev=$(git -C new-sub rev-parse HEAD) +' + +test_expect_success 'fetch mixed submodule changes and verify updates' ' + ( + cd main && + + # only update submodules because superproject was + # pushed into at the end of last test + git submodule update --init --recursive && + + test_path_is_dir .git/modules/legacy && + test_path_is_dir ".git/modules/New Sub" && + + # Verify both submodules are at the expected commits + git submodule status >list && + test_grep "$legacy_rev legacy" list && + test_grep "$new_rev New Sub" list + ) +' + +test_done diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 964e1f156932c6..ffb9c8b522e269 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -3053,6 +3053,7 @@ test_expect_success 'git config set - variable name - __git_compute_second_level submodule.sub.fetchRecurseSubmodules Z submodule.sub.ignore Z submodule.sub.active Z + submodule.sub.gitdir Z EOF ' From c349bad72969d59758e1294b4e9964dccd967fa0 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:26 +0200 Subject: [PATCH 008/784] submodule: allow runtime enabling extensions.submodulePathConfig Add a new config `init.defaultSubmodulePathConfig` which allows enabling `extensions.submodulePathConfig` for new submodules by default (those created via git init or clone). Important: setting init.defaultSubmodulePathConfig = true does not globally enable `extensions.submodulePathConfig`. Existing repositories will still have the extension disabled and will require migration (for example via git submodule--helper command added in the next commit). Suggested-by: Patrick Steinhardt Suggested-by: Junio C Hamano Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/extensions.adoc | 4 + Documentation/config/init.adoc | 6 + setup.c | 10 ++ t/t7425-submodule-gitdir-path-extension.sh | 122 +++++++++++++++++++++ 4 files changed, 142 insertions(+) diff --git a/Documentation/config/extensions.adoc b/Documentation/config/extensions.adoc index f4f57c91141ccf..e8d9d9a19a5fb6 100644 --- a/Documentation/config/extensions.adoc +++ b/Documentation/config/extensions.adoc @@ -95,6 +95,10 @@ Git will error out if a module does not have a corresponding Existing (pre-extension) submodules need to be migrated by adding the missing config entries. This is done manually for now, e.g. for each submodule: `git config submodule..gitdir .git/modules/`. ++ +The extension can be enabled automatically for new repositories by setting +`init.defaultSubmodulePathConfig` to `true`, for example by running +`git config --global init.defaultSubmodulePathConfig true`. worktreeConfig::: If enabled, then worktrees will load config settings from the diff --git a/Documentation/config/init.adoc b/Documentation/config/init.adoc index e45b2a812151dc..7b4abdaf8ba29b 100644 --- a/Documentation/config/init.adoc +++ b/Documentation/config/init.adoc @@ -18,3 +18,9 @@ endif::[] See `--ref-format=` in linkgit:git-init[1]. Both the command line option and the `GIT_DEFAULT_REF_FORMAT` environment variable take precedence over this config. + +init.defaultSubmodulePathConfig:: + A boolean that specifies if `git init` and `git clone` should + automatically set `extensions.submodulePathConfig` to `true`. This + allows all new repositories to automatically use the submodule path + extension. Defaults to `false` when unset. diff --git a/setup.c b/setup.c index 207fa36e100b61..3f91a4aaec174b 100644 --- a/setup.c +++ b/setup.c @@ -2228,6 +2228,7 @@ void initialize_repository_version(int hash_algo, { struct strbuf repo_version = STRBUF_INIT; int target_version = GIT_REPO_VERSION; + int default_submodule_path_config = 0; /* * Note that we initialize the repository version to 1 when the ref @@ -2266,6 +2267,15 @@ void initialize_repository_version(int hash_algo, clear_repository_format(&repo_fmt); } + repo_config_get_bool(the_repository, "init.defaultSubmodulePathConfig", + &default_submodule_path_config); + if (default_submodule_path_config) { + /* extensions.submodulepathconfig requires at least version 1 */ + if (target_version == 0) + target_version = 1; + repo_config_set(the_repository, "extensions.submodulepathconfig", "true"); + } + strbuf_addf(&repo_version, "%d", target_version); repo_config_set(the_repository, "core.repositoryformatversion", repo_version.buf); diff --git a/t/t7425-submodule-gitdir-path-extension.sh b/t/t7425-submodule-gitdir-path-extension.sh index 453183e27c95a2..03ac165de96477 100755 --- a/t/t7425-submodule-gitdir-path-extension.sh +++ b/t/t7425-submodule-gitdir-path-extension.sh @@ -157,4 +157,126 @@ test_expect_success 'fetch mixed submodule changes and verify updates' ' ) ' +test_expect_success '`git init` respects init.defaultSubmodulePathConfig' ' + test_config_global init.defaultSubmodulePathConfig true && + git init repo-init && + git -C repo-init config extensions.submodulePathConfig >actual && + echo true >expect && + test_cmp expect actual && + # create a submodule and check gitdir + ( + cd repo-init && + git init -b main sub && + test_commit -C sub sub-initial && + git submodule add ./sub sub && + git config submodule.sub.gitdir >actual && + echo ".git/modules/sub" >expect && + test_cmp expect actual + ) +' + +test_expect_success '`git init` does not set extension by default' ' + git init upstream && + test_commit -C upstream initial && + test_must_fail git -C upstream config extensions.submodulePathConfig && + # create a pair of submodules and check gitdir is not created + git init -b main sub && + test_commit -C sub sub-initial && + ( + cd upstream && + git submodule add ../sub sub1 && + test_path_is_dir .git/modules/sub1 && + test_must_fail git config submodule.sub1.gitdir && + git submodule add ../sub sub2 && + test_path_is_dir .git/modules/sub2 && + test_must_fail git config submodule.sub2.gitdir && + git commit -m "Add submodules" + ) +' + +test_expect_success '`git clone` does not set extension by default' ' + test_when_finished "rm -rf repo-clone-no-ext" && + git clone upstream repo-clone-no-ext && + ( + cd repo-clone-no-ext && + + test_must_fail git config extensions.submodulePathConfig && + test_path_is_missing .git/modules/sub1 && + test_path_is_missing .git/modules/sub2 && + + # create a submodule and check gitdir is not created + git submodule add ../sub sub3 && + test_must_fail git config submodule.sub3.gitdir + ) +' + +test_expect_success '`git clone --recurse-submodules` does not set extension by default' ' + test_when_finished "rm -rf repo-clone-no-ext" && + git clone --recurse-submodules upstream repo-clone-no-ext && + ( + cd repo-clone-no-ext && + + # verify that that submodules do not have gitdir set + test_must_fail git config extensions.submodulePathConfig && + test_path_is_dir .git/modules/sub1 && + test_must_fail git config submodule.sub1.gitdir && + test_path_is_dir .git/modules/sub2 && + test_must_fail git config submodule.sub2.gitdir && + + # create another submodule and check that gitdir is not created + git submodule add ../sub sub3 && + test_path_is_dir .git/modules/sub3 && + test_must_fail git config submodule.sub3.gitdir + ) + +' + +test_expect_success '`git clone` respects init.defaultSubmodulePathConfig' ' + test_when_finished "rm -rf repo-clone" && + test_config_global init.defaultSubmodulePathConfig true && + git clone upstream repo-clone && + ( + cd repo-clone && + + # verify new repo extension is inherited from global config + git config extensions.submodulePathConfig >actual && + echo true >expect && + test_cmp expect actual && + + # new submodule has a gitdir config + git submodule add ../sub sub && + test_path_is_dir .git/modules/sub && + git config submodule.sub.gitdir >actual && + echo ".git/modules/sub" >expect && + test_cmp expect actual + ) +' + +test_expect_success '`git clone --recurse-submodules` respects init.defaultSubmodulePathConfig' ' + test_when_finished "rm -rf repo-clone-recursive" && + test_config_global init.defaultSubmodulePathConfig true && + git clone --recurse-submodules upstream repo-clone-recursive && + ( + cd repo-clone-recursive && + + # verify new repo extension is inherited from global config + git config extensions.submodulePathConfig >actual && + echo true >expect && + test_cmp expect actual && + + # previous submodules should exist + git config submodule.sub1.gitdir && + git config submodule.sub2.gitdir && + test_path_is_dir .git/modules/sub1 && + test_path_is_dir .git/modules/sub2 && + + # create another submodule and check that gitdir is created + git submodule add ../sub new-sub && + test_path_is_dir .git/modules/new-sub && + git config submodule.new-sub.gitdir >actual && + echo ".git/modules/new-sub" >expect && + test_cmp expect actual + ) +' + test_done From e14349d58eeae0eac23bf7f740d22f51fc90a49d Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:27 +0200 Subject: [PATCH 009/784] submodule--helper: add gitdir migration command Manually running "git config submodule..gitdir .git/modules/" for each submodule can be impractical, so add a migration command to submodule--helper to automatically create configs for all submodules as required by extensions.submodulePathConfig. The command calls create_default_gitdir_config() which validates the gitdir paths before adding the configs. Suggested-by: Junio C Hamano Suggested-by: Patrick Steinhardt Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/extensions.adoc | 6 +- builtin/submodule--helper.c | 61 ++++++++++++++++++++ t/t7425-submodule-gitdir-path-extension.sh | 67 ++++++++++++++++++++++ 3 files changed, 132 insertions(+), 2 deletions(-) diff --git a/Documentation/config/extensions.adoc b/Documentation/config/extensions.adoc index e8d9d9a19a5fb6..2aef3315b1d275 100644 --- a/Documentation/config/extensions.adoc +++ b/Documentation/config/extensions.adoc @@ -93,8 +93,10 @@ Git will error out if a module does not have a corresponding `submodule..gitdir` set. + Existing (pre-extension) submodules need to be migrated by adding the missing -config entries. This is done manually for now, e.g. for each submodule: -`git config submodule..gitdir .git/modules/`. +config entries. This can be done manually, e.g. for each submodule: +`git config submodule..gitdir .git/modules/`, or via the +`git submodule--helper migrate-gitdir-configs` command which iterates over all +submodules and attempts to migrate them. + The extension can be enabled automatically for new repositories by setting `init.defaultSubmodulePathConfig` to `true`, for example by running diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index ef373525349c2a..fa4f5cc15919f3 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1270,6 +1270,66 @@ static int module_gitdir(int argc, const char **argv, const char *prefix UNUSED, return 0; } +static int module_migrate(int argc UNUSED, const char **argv UNUSED, + const char *prefix UNUSED, struct repository *repo) +{ + struct strbuf module_dir = STRBUF_INIT; + DIR *dir; + struct dirent *de; + int repo_version = 0; + + repo_git_path_append(repo, &module_dir, "modules/"); + + dir = opendir(module_dir.buf); + if (!dir) + die(_("could not open '%s'"), module_dir.buf); + + while ((de = readdir(dir))) { + struct strbuf gitdir_path = STRBUF_INIT; + char *key; + const char *value; + + if (is_dot_or_dotdot(de->d_name)) + continue; + + strbuf_addf(&gitdir_path, "%s/%s", module_dir.buf, de->d_name); + if (!is_git_directory(gitdir_path.buf)) { + strbuf_release(&gitdir_path); + continue; + } + strbuf_release(&gitdir_path); + + key = xstrfmt("submodule.%s.gitdir", de->d_name); + if (!repo_config_get_string_tmp(repo, key, &value)) { + /* Already has a gitdir config, nothing to do. */ + free(key); + continue; + } + free(key); + + create_default_gitdir_config(de->d_name); + } + + closedir(dir); + strbuf_release(&module_dir); + + repo_config_get_int(the_repository, "core.repositoryformatversion", &repo_version); + if (repo_version == 0 && + repo_config_set_gently(repo, "core.repositoryformatversion", "1")) + die(_("could not set core.repositoryformatversion to 1.\n" + "Please set it for migration to work, for example:\n" + "git config core.repositoryformatversion 1")); + + if (repo_config_set_gently(repo, "extensions.submodulePathConfig", "true")) + die(_("could not enable submodulePathConfig extension. It is required\n" + "for migration to work. Please enable it in the root repo:\n" + "git config extensions.submodulePathConfig true")); + + repo->repository_format_submodule_path_cfg = 1; + + return 0; +} + struct sync_cb { const char *prefix; const char *super_prefix; @@ -3653,6 +3713,7 @@ int cmd_submodule__helper(int argc, NULL }; struct option options[] = { + OPT_SUBCOMMAND("migrate-gitdir-configs", &fn, module_migrate), OPT_SUBCOMMAND("gitdir", &fn, module_gitdir), OPT_SUBCOMMAND("clone", &fn, module_clone), OPT_SUBCOMMAND("add", &fn, module_add), diff --git a/t/t7425-submodule-gitdir-path-extension.sh b/t/t7425-submodule-gitdir-path-extension.sh index 03ac165de96477..89e2feab8b7c4b 100755 --- a/t/t7425-submodule-gitdir-path-extension.sh +++ b/t/t7425-submodule-gitdir-path-extension.sh @@ -279,4 +279,71 @@ test_expect_success '`git clone --recurse-submodules` respects init.defaultSubmo ) ' +test_expect_success 'submodule--helper migrates legacy modules' ' + ( + cd upstream && + + # previous submodules exist and were not migrated yet + test_must_fail git config submodule.sub1.gitdir && + test_must_fail git config submodule.sub2.gitdir && + test_path_is_dir .git/modules/sub1 && + test_path_is_dir .git/modules/sub2 && + + # run migration + git submodule--helper migrate-gitdir-configs && + + # test that migration worked + git config submodule.sub1.gitdir >actual && + echo ".git/modules/sub1" >expect && + test_cmp expect actual && + git config submodule.sub2.gitdir >actual && + echo ".git/modules/sub2" >expect && + test_cmp expect actual && + + # repository extension is enabled after migration + git config extensions.submodulePathConfig >actual && + echo "true" >expect && + test_cmp expect actual + ) +' + +test_expect_success '`git clone --recurse-submodules` works after migration' ' + test_when_finished "rm -rf repo-clone-recursive" && + + # test with extension disabled after the upstream repo was migrated + git clone --recurse-submodules upstream repo-clone-recursive && + ( + cd repo-clone-recursive && + + # init.defaultSubmodulePathConfig was disabled before clone, so + # the repo extension config should also be off, the migration ignored + test_must_fail git config extensions.submodulePathConfig && + + # modules should look like there was no migration done + test_must_fail git config submodule.sub1.gitdir && + test_must_fail git config submodule.sub2.gitdir && + test_path_is_dir .git/modules/sub1 && + test_path_is_dir .git/modules/sub2 + ) && + rm -rf repo-clone-recursive && + + # enable the extension, then retry the clone + test_config_global init.defaultSubmodulePathConfig true && + git clone --recurse-submodules upstream repo-clone-recursive && + ( + cd repo-clone-recursive && + + # repository extension is enabled + git config extensions.submodulePathConfig >actual && + echo "true" >expect && + test_cmp expect actual && + + # gitdir configs exist for submodules + git config submodule.sub1.gitdir && + git config submodule.sub2.gitdir && + test_path_is_dir .git/modules/sub1 && + test_path_is_dir .git/modules/sub2 + ) +' + test_done From 226694bdf4aaecd18f6cd4df12656cc61b213d16 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:28 +0200 Subject: [PATCH 010/784] builtin/credential-store: move is_rfc3986_unreserved to url.[ch] is_rfc3986_unreserved() was moved to credential-store.c and was made static by f89854362c (credential-store: move related functions to credential-store file, 2023-06-06) under a correct assumption, at the time, that it was the only place using it. However now we need it to apply URL-encoding to submodule names when constructing gitdir paths, to avoid conflicts, so bring it back as a public function exposed via url.h, instead of the old helper path (strbuf), which has nothing to do with 3986 encoding/decoding anymore. This function will be used in subsequent commits which do the encoding. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- builtin/credential-store.c | 7 +------ url.c | 6 ++++++ url.h | 7 +++++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/builtin/credential-store.c b/builtin/credential-store.c index b74e06cc93d9cd..bc1453c6b2b1b2 100644 --- a/builtin/credential-store.c +++ b/builtin/credential-store.c @@ -7,6 +7,7 @@ #include "path.h" #include "string-list.h" #include "parse-options.h" +#include "url.h" #include "write-or-die.h" static struct lock_file credential_lock; @@ -76,12 +77,6 @@ static void rewrite_credential_file(const char *fn, struct credential *c, die_errno("unable to write credential store"); } -static int is_rfc3986_unreserved(char ch) -{ - return isalnum(ch) || - ch == '-' || ch == '_' || ch == '.' || ch == '~'; -} - static int is_rfc3986_reserved_or_unreserved(char ch) { if (is_rfc3986_unreserved(ch)) diff --git a/url.c b/url.c index 282b12495ae7d4..adc289229c6491 100644 --- a/url.c +++ b/url.c @@ -3,6 +3,12 @@ #include "strbuf.h" #include "url.h" +int is_rfc3986_unreserved(char ch) +{ + return isalnum(ch) || + ch == '-' || ch == '_' || ch == '.' || ch == '~'; +} + int is_urlschemechar(int first_flag, int ch) { /* diff --git a/url.h b/url.h index 2a27c3427763b2..e644c3c8096028 100644 --- a/url.h +++ b/url.h @@ -21,4 +21,11 @@ char *url_decode_parameter_value(const char **query); void end_url_with_slash(struct strbuf *buf, const char *url); void str_end_url_with_slash(const char *url, char **dest); +/* + * The set of unreserved characters as per STD66 (RFC3986) is + * '[A-Za-z0-9-._~]'. These characters are safe to appear in URI + * components without percent-encoding. + */ +int is_rfc3986_unreserved(char ch); + #endif /* URL_H */ From 920fbe4d4ee8d4e191d33dde05a16ee0e74bdd44 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:29 +0200 Subject: [PATCH 011/784] submodule--helper: fix filesystem collisions by encoding gitdir paths Fix nested filesystem collisions by url-encoding gitdir paths stored in submodule.%s.gitdir, when extensions.submodulePathConfig is enabled. Credit goes to Junio and Patrick for coming up with this design: the encoding is only applied when necessary, to newly added submodules. Existing modules don't need the encoding because git already errors out when detecting nested gitdirs before this patch. This commit adds the basic url-encoding and some tests. Next commits extend the encode -> validate -> retry loop to fix more conflicts. Suggested-by: Junio C Hamano Suggested-by: Patrick Steinhardt Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 12 +++++ submodule.c | 42 +++++++++++++++- t/t7425-submodule-gitdir-path-extension.sh | 57 ++++++++++++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fa4f5cc15919f3..361542d67805b8 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -34,6 +34,7 @@ #include "list-objects-filter-options.h" #include "wildmatch.h" #include "strbuf.h" +#include "url.h" #define OPT_QUIET (1 << 0) #define OPT_CACHED (1 << 1) @@ -465,12 +466,23 @@ static void create_default_gitdir_config(const char *submodule_name) { struct strbuf gitdir_path = STRBUF_INIT; + /* Case 1: try the plain module name */ repo_git_path_append(the_repository, &gitdir_path, "modules/%s", submodule_name); if (!validate_and_set_submodule_gitdir(&gitdir_path, submodule_name)) { strbuf_release(&gitdir_path); return; } + /* Case 2: Try URI-safe (RFC3986) encoding first, this fixes nested gitdirs */ + strbuf_reset(&gitdir_path); + repo_git_path_append(the_repository, &gitdir_path, "modules/"); + strbuf_addstr_urlencode(&gitdir_path, submodule_name, is_rfc3986_unreserved); + if (!validate_and_set_submodule_gitdir(&gitdir_path, submodule_name)) { + strbuf_release(&gitdir_path); + return; + } + + /* Case 3: nothing worked, error out */ die(_("failed to set a valid default config for 'submodule.%s.gitdir'. " "Please ensure it is set, for example by running something like: " "'git config submodule.%s.gitdir .git/modules/%s'"), diff --git a/submodule.c b/submodule.c index f7af389c79099e..609d9073772ef3 100644 --- a/submodule.c +++ b/submodule.c @@ -32,6 +32,7 @@ #include "read-cache-ll.h" #include "setup.h" #include "advice.h" +#include "url.h" static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF; static int initialized_fetch_ref_tips; @@ -2253,12 +2254,43 @@ int submodule_move_head(const char *path, const char *super_prefix, return ret; } -int validate_submodule_git_dir(char *git_dir, const char *submodule_name) +/* + * Encoded gitdir validation, only used when extensions.submodulePathConfig is enabled. + * This does not print errors like the non-encoded version, because encoding is supposed + * to mitigate / fix all these. + */ +static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodule_name UNUSED) +{ + const char *modules_marker = "/modules/"; + char *p = git_dir, *last_submodule_name = NULL; + + if (!the_repository->repository_format_submodule_path_cfg) + BUG("validate_submodule_encoded_git_dir() must be called with " + "extensions.submodulePathConfig enabled."); + + /* Find the last submodule name in the gitdir path (modules can be nested). */ + while ((p = strstr(p, modules_marker))) { + last_submodule_name = p + strlen(modules_marker); + p++; + } + + /* Prevent the use of '/' in encoded names */ + if (!last_submodule_name || strchr(last_submodule_name, '/')) + return -1; + + return 0; +} + +static int validate_submodule_legacy_git_dir(char *git_dir, const char *submodule_name) { size_t len = strlen(git_dir), suffix_len = strlen(submodule_name); char *p; int ret = 0; + if (the_repository->repository_format_submodule_path_cfg) + BUG("validate_submodule_git_dir() must be called with " + "extensions.submodulePathConfig disabled."); + if (len <= suffix_len || (p = git_dir + len - suffix_len)[-1] != '/' || strcmp(p, submodule_name)) BUG("submodule name '%s' not a suffix of git dir '%s'", @@ -2294,6 +2326,14 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name) return 0; } +int validate_submodule_git_dir(char *git_dir, const char *submodule_name) +{ + if (!the_repository->repository_format_submodule_path_cfg) + return validate_submodule_legacy_git_dir(git_dir, submodule_name); + + return validate_submodule_encoded_git_dir(git_dir, submodule_name); +} + int validate_submodule_path(const char *path) { char *p = xstrdup(path); diff --git a/t/t7425-submodule-gitdir-path-extension.sh b/t/t7425-submodule-gitdir-path-extension.sh index 89e2feab8b7c4b..ce1428a2ffa4b9 100755 --- a/t/t7425-submodule-gitdir-path-extension.sh +++ b/t/t7425-submodule-gitdir-path-extension.sh @@ -346,4 +346,61 @@ test_expect_success '`git clone --recurse-submodules` works after migration' ' ) ' +test_expect_success 'setup submodules with nested git dirs' ' + git init nested && + test_commit -C nested nested && + ( + cd nested && + cat >.gitmodules <<-EOF && + [submodule "hippo"] + url = . + path = thing1 + [submodule "hippo/hooks"] + url = . + path = thing2 + EOF + git clone . thing1 && + git clone . thing2 && + git add .gitmodules thing1 thing2 && + test_tick && + git commit -m nested + ) +' + +test_expect_success 'git dirs of encoded sibling submodules must not be nested' ' + git clone -c extensions.submodulePathConfig=true --recurse-submodules nested clone_nested && + + verify_submodule_gitdir_path clone_nested hippo modules/hippo && + git -C clone_nested config submodule.hippo.gitdir >actual && + test_grep "\.git/modules/hippo$" actual && + + verify_submodule_gitdir_path clone_nested hippo/hooks modules/hippo%2fhooks && + git -C clone_nested config submodule.hippo/hooks.gitdir >actual && + test_grep "\.git/modules/hippo%2fhooks$" actual +' + +test_expect_success 'submodule git dir nesting detection must work with parallel cloning' ' + git clone -c extensions.submodulePathConfig=true --recurse-submodules --jobs=2 nested clone_parallel && + + verify_submodule_gitdir_path clone_parallel hippo modules/hippo && + git -C clone_nested config submodule.hippo.gitdir >actual && + test_grep "\.git/modules/hippo$" actual && + + verify_submodule_gitdir_path clone_parallel hippo/hooks modules/hippo%2fhooks && + git -C clone_nested config submodule.hippo/hooks.gitdir >actual && + test_grep "\.git/modules/hippo%2fhooks$" actual +' + +test_expect_success 'disabling extensions.submodulePathConfig prevents nested submodules' ' + ( + cd clone_nested && + # disable extension and verify failure + git config --replace-all extensions.submodulePathConfig false && + test_must_fail git submodule add ./thing2 hippo/foobar && + # re-enable extension and verify it works + git config --replace-all extensions.submodulePathConfig true && + git submodule add ./thing2 hippo/foobar + ) +' + test_done From 1685bba838ace8b4e325616ab914a6b01f18547f Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:30 +0200 Subject: [PATCH 012/784] submodule: fix case-folding gitdir filesystem collisions Add a new check when extension.submodulePathConfig is enabled, to detect and prevent case-folding filesystem colisions. When this new check is triggered, a stricter casefolding aware URI encoding is used to percent-encode uppercase characters. By using this check/retry mechanism the uppercase encoding is only applied when necessary, so case-sensitive filesystems are not affected. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 26 ++++++++++- submodule.c | 53 +++++++++++++++++++++- t/t7425-submodule-gitdir-path-extension.sh | 35 ++++++++++++++ url.c | 7 +++ url.h | 7 +++ 5 files changed, 126 insertions(+), 2 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 361542d67805b8..746f9fa63c3be9 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -473,7 +473,7 @@ static void create_default_gitdir_config(const char *submodule_name) return; } - /* Case 2: Try URI-safe (RFC3986) encoding first, this fixes nested gitdirs */ + /* Case 2.1: Try URI-safe (RFC3986) encoding first, this fixes nested gitdirs */ strbuf_reset(&gitdir_path); repo_git_path_append(the_repository, &gitdir_path, "modules/"); strbuf_addstr_urlencode(&gitdir_path, submodule_name, is_rfc3986_unreserved); @@ -482,6 +482,30 @@ static void create_default_gitdir_config(const char *submodule_name) return; } + /* Case 2.2: Try extended uppercase URI (RFC3986) encoding, to fix case-folding */ + strbuf_reset(&gitdir_path); + repo_git_path_append(the_repository, &gitdir_path, "modules/"); + strbuf_addstr_urlencode(&gitdir_path, submodule_name, is_casefolding_rfc3986_unreserved); + if (!validate_and_set_submodule_gitdir(&gitdir_path, submodule_name)) + return; + + /* Case 2.3: Try some derived gitdir names, see if one sticks */ + for (char c = '0'; c <= '9'; c++) { + strbuf_reset(&gitdir_path); + repo_git_path_append(the_repository, &gitdir_path, "modules/"); + strbuf_addstr_urlencode(&gitdir_path, submodule_name, is_rfc3986_unreserved); + strbuf_addch(&gitdir_path, c); + if (!validate_and_set_submodule_gitdir(&gitdir_path, submodule_name)) + return; + + strbuf_reset(&gitdir_path); + repo_git_path_append(the_repository, &gitdir_path, "modules/"); + strbuf_addstr_urlencode(&gitdir_path, submodule_name, is_casefolding_rfc3986_unreserved); + strbuf_addch(&gitdir_path, c); + if (!validate_and_set_submodule_gitdir(&gitdir_path, submodule_name)) + return; + } + /* Case 3: nothing worked, error out */ die(_("failed to set a valid default config for 'submodule.%s.gitdir'. " "Please ensure it is set, for example by running something like: " diff --git a/submodule.c b/submodule.c index 609d9073772ef3..a5a47359c71386 100644 --- a/submodule.c +++ b/submodule.c @@ -2254,15 +2254,58 @@ int submodule_move_head(const char *path, const char *super_prefix, return ret; } +static int check_casefolding_conflict(const char *git_dir, + const char *submodule_name, + const bool suffixes_match) +{ + char *p, *modules_dir = xstrdup(git_dir); + struct dirent *de; + DIR *dir = NULL; + int ret = 0; + + if ((p = find_last_dir_sep(modules_dir))) + *p = '\0'; + + /* No conflict is possible if modules_dir doesn't exist (first clone) */ + if (!is_directory(modules_dir)) + goto cleanup; + + dir = opendir(modules_dir); + if (!dir) { + ret = -1; + goto cleanup; + } + + /* Check for another directory under .git/modules that differs only in case. */ + while ((de = readdir(dir))) { + if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) + continue; + + if ((suffixes_match || is_git_directory(git_dir)) && + !strcasecmp(de->d_name, submodule_name) && + strcmp(de->d_name, submodule_name)) { + ret = -1; /* collision found */ + break; + } + } + +cleanup: + if (dir) + closedir(dir); + free(modules_dir); + return ret; +} + /* * Encoded gitdir validation, only used when extensions.submodulePathConfig is enabled. * This does not print errors like the non-encoded version, because encoding is supposed * to mitigate / fix all these. */ -static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodule_name UNUSED) +static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodule_name) { const char *modules_marker = "/modules/"; char *p = git_dir, *last_submodule_name = NULL; + int config_ignorecase = 0; if (!the_repository->repository_format_submodule_path_cfg) BUG("validate_submodule_encoded_git_dir() must be called with " @@ -2278,6 +2321,14 @@ static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodu if (!last_submodule_name || strchr(last_submodule_name, '/')) return -1; + /* Prevent conflicts on case-folding filesystems */ + repo_config_get_bool(the_repository, "core.ignorecase", &config_ignorecase); + if (ignore_case || config_ignorecase) { + bool suffixes_match = !strcmp(last_submodule_name, submodule_name); + return check_casefolding_conflict(git_dir, submodule_name, + suffixes_match); + } + return 0; } diff --git a/t/t7425-submodule-gitdir-path-extension.sh b/t/t7425-submodule-gitdir-path-extension.sh index ce1428a2ffa4b9..3cca93c8972b01 100755 --- a/t/t7425-submodule-gitdir-path-extension.sh +++ b/t/t7425-submodule-gitdir-path-extension.sh @@ -403,4 +403,39 @@ test_expect_success 'disabling extensions.submodulePathConfig prevents nested su ) ' +test_expect_success CASE_INSENSITIVE_FS 'verify case-folding conflicts are correctly encoded' ' + git clone -c extensions.submodulePathConfig=true main cloned-folding && + ( + cd cloned-folding && + + # conflict: the "folding" gitdir will already be taken + git submodule add ../new-sub "folding" && + test_commit lowercase && + git submodule add ../new-sub "FoldinG" && + test_commit uppercase && + + # conflict: the "foo" gitdir will already be taken + git submodule add ../new-sub "FOO" && + test_commit uppercase-foo && + git submodule add ../new-sub "foo" && + test_commit lowercase-foo && + + # create a multi conflict between foobar, fooBar and foo%42ar + # the "foo" gitdir will already be taken + git submodule add ../new-sub "foobar" && + test_commit lowercase-foobar && + git submodule add ../new-sub "foo%42ar" && + test_commit encoded-foo%42ar && + git submodule add ../new-sub "fooBar" && + test_commit mixed-fooBar + ) && + verify_submodule_gitdir_path cloned-folding "folding" "modules/folding" && + verify_submodule_gitdir_path cloned-folding "FoldinG" "modules/%46oldin%47" && + verify_submodule_gitdir_path cloned-folding "FOO" "modules/FOO" && + verify_submodule_gitdir_path cloned-folding "foo" "modules/foo0" && + verify_submodule_gitdir_path cloned-folding "foobar" "modules/foobar" && + verify_submodule_gitdir_path cloned-folding "foo%42ar" "modules/foo%42ar" && + verify_submodule_gitdir_path cloned-folding "fooBar" "modules/fooBar0" +' + test_done diff --git a/url.c b/url.c index adc289229c6491..3ca5987e905d59 100644 --- a/url.c +++ b/url.c @@ -9,6 +9,13 @@ int is_rfc3986_unreserved(char ch) ch == '-' || ch == '_' || ch == '.' || ch == '~'; } +int is_casefolding_rfc3986_unreserved(char c) +{ + return (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '-' || c == '.' || c == '_' || c == '~'; +} + int is_urlschemechar(int first_flag, int ch) { /* diff --git a/url.h b/url.h index e644c3c8096028..cd9140e9946b16 100644 --- a/url.h +++ b/url.h @@ -28,4 +28,11 @@ void str_end_url_with_slash(const char *url, char **dest); */ int is_rfc3986_unreserved(char ch); +/* + * This is a variant of is_rfc3986_unreserved() that treats uppercase + * letters as "reserved". This forces them to be percent-encoded, allowing + * 'Foo' (%46oo) and 'foo' (foo) to be distinct on case-folding filesystems. + */ +int is_casefolding_rfc3986_unreserved(char c); + #endif /* URL_H */ From 82c36fa0a987c9c8617f5ded41834f7487e616e2 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:31 +0200 Subject: [PATCH 013/784] submodule: hash the submodule name for the gitdir path If none of the previous plain-text / encoding / derivation steps work and case 2.4 is reached, then try a hash of the submodule name to see if that can be a valid gitdir before giving up and throwing an error. This is a "last resort" type of measure to avoid conflicts since it loses the human readability of the gitdir path. This logic will be reached in rare cases, as can be seen in the test we added. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 19 +++++++ t/t7425-submodule-gitdir-path-extension.sh | 59 ++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 746f9fa63c3be9..2fa71c814cb6df 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -465,6 +465,10 @@ static int validate_and_set_submodule_gitdir(struct strbuf *gitdir_path, static void create_default_gitdir_config(const char *submodule_name) { struct strbuf gitdir_path = STRBUF_INIT; + struct git_hash_ctx ctx; + char hex_name_hash[GIT_MAX_HEXSZ + 1], header[128]; + unsigned char raw_name_hash[GIT_MAX_RAWSZ]; + int header_len; /* Case 1: try the plain module name */ repo_git_path_append(the_repository, &gitdir_path, "modules/%s", submodule_name); @@ -506,6 +510,21 @@ static void create_default_gitdir_config(const char *submodule_name) return; } + /* Case 2.4: If all the above failed, try a hash of the name as a last resort */ + header_len = snprintf(header, sizeof(header), "blob %zu", strlen(submodule_name)); + the_hash_algo->init_fn(&ctx); + the_hash_algo->update_fn(&ctx, header, header_len); + the_hash_algo->update_fn(&ctx, "\0", 1); + the_hash_algo->update_fn(&ctx, submodule_name, strlen(submodule_name)); + the_hash_algo->final_fn(raw_name_hash, &ctx); + hash_to_hex_algop_r(hex_name_hash, raw_name_hash, the_hash_algo); + strbuf_reset(&gitdir_path); + repo_git_path_append(the_repository, &gitdir_path, "modules/%s", hex_name_hash); + if (!validate_and_set_submodule_gitdir(&gitdir_path, submodule_name)) { + strbuf_release(&gitdir_path); + return; + } + /* Case 3: nothing worked, error out */ die(_("failed to set a valid default config for 'submodule.%s.gitdir'. " "Please ensure it is set, for example by running something like: " diff --git a/t/t7425-submodule-gitdir-path-extension.sh b/t/t7425-submodule-gitdir-path-extension.sh index 3cca93c8972b01..a76e64a9f7d7d3 100755 --- a/t/t7425-submodule-gitdir-path-extension.sh +++ b/t/t7425-submodule-gitdir-path-extension.sh @@ -438,4 +438,63 @@ test_expect_success CASE_INSENSITIVE_FS 'verify case-folding conflicts are corre verify_submodule_gitdir_path cloned-folding "fooBar" "modules/fooBar0" ' +test_expect_success CASE_INSENSITIVE_FS 'verify hashing conflict resolution as a last resort' ' + git clone -c extensions.submodulePathConfig=true main cloned-hash && + ( + cd cloned-hash && + + # conflict: add all submodule conflicting variants until we reach the + # final hashing conflict resolution for submodule "foo" + git submodule add ../new-sub "foo" && + git submodule add ../new-sub "foo0" && + git submodule add ../new-sub "foo1" && + git submodule add ../new-sub "foo2" && + git submodule add ../new-sub "foo3" && + git submodule add ../new-sub "foo4" && + git submodule add ../new-sub "foo5" && + git submodule add ../new-sub "foo6" && + git submodule add ../new-sub "foo7" && + git submodule add ../new-sub "foo8" && + git submodule add ../new-sub "foo9" && + git submodule add ../new-sub "%46oo" && + git submodule add ../new-sub "%46oo0" && + git submodule add ../new-sub "%46oo1" && + git submodule add ../new-sub "%46oo2" && + git submodule add ../new-sub "%46oo3" && + git submodule add ../new-sub "%46oo4" && + git submodule add ../new-sub "%46oo5" && + git submodule add ../new-sub "%46oo6" && + git submodule add ../new-sub "%46oo7" && + git submodule add ../new-sub "%46oo8" && + git submodule add ../new-sub "%46oo9" && + test_commit add-foo-variants && + git submodule add ../new-sub "Foo" && + test_commit add-uppercase-foo + ) && + verify_submodule_gitdir_path cloned-hash "foo" "modules/foo" && + verify_submodule_gitdir_path cloned-hash "foo0" "modules/foo0" && + verify_submodule_gitdir_path cloned-hash "foo1" "modules/foo1" && + verify_submodule_gitdir_path cloned-hash "foo2" "modules/foo2" && + verify_submodule_gitdir_path cloned-hash "foo3" "modules/foo3" && + verify_submodule_gitdir_path cloned-hash "foo4" "modules/foo4" && + verify_submodule_gitdir_path cloned-hash "foo5" "modules/foo5" && + verify_submodule_gitdir_path cloned-hash "foo6" "modules/foo6" && + verify_submodule_gitdir_path cloned-hash "foo7" "modules/foo7" && + verify_submodule_gitdir_path cloned-hash "foo8" "modules/foo8" && + verify_submodule_gitdir_path cloned-hash "foo9" "modules/foo9" && + verify_submodule_gitdir_path cloned-hash "%46oo" "modules/%46oo" && + verify_submodule_gitdir_path cloned-hash "%46oo0" "modules/%46oo0" && + verify_submodule_gitdir_path cloned-hash "%46oo1" "modules/%46oo1" && + verify_submodule_gitdir_path cloned-hash "%46oo2" "modules/%46oo2" && + verify_submodule_gitdir_path cloned-hash "%46oo3" "modules/%46oo3" && + verify_submodule_gitdir_path cloned-hash "%46oo4" "modules/%46oo4" && + verify_submodule_gitdir_path cloned-hash "%46oo5" "modules/%46oo5" && + verify_submodule_gitdir_path cloned-hash "%46oo6" "modules/%46oo6" && + verify_submodule_gitdir_path cloned-hash "%46oo7" "modules/%46oo7" && + verify_submodule_gitdir_path cloned-hash "%46oo8" "modules/%46oo8" && + verify_submodule_gitdir_path cloned-hash "%46oo9" "modules/%46oo9" && + hash=$(printf "Foo" | git hash-object --stdin) && + verify_submodule_gitdir_path cloned-hash "Foo" "modules/${hash}" +' + test_done From e897c9b7f31cf83e93cfefe1f82eb4a18337c9b1 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 12 Jan 2026 20:46:32 +0200 Subject: [PATCH 014/784] submodule: detect conflicts with existing gitdir configs Credit goes to Emily and Josh for testing and noticing a corner-case which caused conflicts with existing gitdir configs to silently pass validation, then fail later in add_submodule() with a cryptic error: fatal: A git directory for 'nested%2fsub' is found locally with remote(s): origin /.../trash directory.t7425-submodule-gitdir-path-extension/sub This change ensures the validation step checks existing gitdirs for conflicts. We only have to do this for submodules having gitdirs, because those without submodule.%s.gitdir need to be migrated and will throw an error earlier in the submodule codepath. Quoting Josh: My testing setup has been as follows: * Using our locally-built Git with our downstream patch of [1] included: * create a repo "sub" * create a repo "super" * In "super": * mkdir nested * git submodule add ../sub nested/sub * Verify that the submodule's gitdir is .git/modules/nested%2fsub * Using a build of git from upstream `next` plus this series: * git config set --global extensions.submodulepathconfig true * git clone --recurse-submodules super super2 * create a repo "nested%2fsub" * In "super2": * git submodule add ../nested%2fsub At this point I'd expect the collision detection / encoding to take effect, but instead I get the error listed above. End quote Suggested-by: Josh Steadmon Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- submodule.c | 61 ++++++++++++++++++++++ t/t7425-submodule-gitdir-path-extension.sh | 28 ++++++++++ 2 files changed, 89 insertions(+) diff --git a/submodule.c b/submodule.c index a5a47359c71386..4ab3fcb5981c41 100644 --- a/submodule.c +++ b/submodule.c @@ -2296,6 +2296,62 @@ static int check_casefolding_conflict(const char *git_dir, return ret; } +struct submodule_from_gitdir_cb { + const char *gitdir; + const char *submodule_name; + bool conflict_found; +}; + +static int find_conflict_by_gitdir_cb(const char *var, const char *value, + const struct config_context *ctx UNUSED, void *data) +{ + struct submodule_from_gitdir_cb *cb = data; + const char *submodule_name_start; + size_t submodule_name_len; + const char *suffix = ".gitdir"; + size_t suffix_len = strlen(suffix); + + if (!skip_prefix(var, "submodule.", &submodule_name_start)) + return 0; + + /* Check if submodule_name_start ends with ".gitdir" */ + submodule_name_len = strlen(submodule_name_start); + if (submodule_name_len < suffix_len || + strcmp(submodule_name_start + submodule_name_len - suffix_len, suffix) != 0) + return 0; /* Does not end with ".gitdir" */ + + submodule_name_len -= suffix_len; + + /* + * A conflict happens if: + * 1. The submodule names are different and + * 2. The gitdir paths resolve to the same absolute path + */ + if (value && strncmp(cb->submodule_name, submodule_name_start, submodule_name_len)) { + char *abs_path_cb = absolute_pathdup(cb->gitdir); + char *abs_path_value = absolute_pathdup(value); + + cb->conflict_found = !strcmp(abs_path_cb, abs_path_value); + + free(abs_path_cb); + free(abs_path_value); + } + + return cb->conflict_found; +} + +static bool submodule_conflicts_with_existing(const char *gitdir, const char *submodule_name) +{ + struct submodule_from_gitdir_cb cb = { 0 }; + cb.submodule_name = submodule_name; + cb.gitdir = gitdir; + + /* Find conflicts with existing repo gitdir configs */ + repo_config(the_repository, find_conflict_by_gitdir_cb, &cb); + + return cb.conflict_found; +} + /* * Encoded gitdir validation, only used when extensions.submodulePathConfig is enabled. * This does not print errors like the non-encoded version, because encoding is supposed @@ -2321,6 +2377,11 @@ static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodu if (!last_submodule_name || strchr(last_submodule_name, '/')) return -1; + /* Prevent conflicts with existing submodule gitdirs */ + if (is_git_directory(git_dir) && + submodule_conflicts_with_existing(git_dir, submodule_name)) + return -1; + /* Prevent conflicts on case-folding filesystems */ repo_config_get_bool(the_repository, "core.ignorecase", &config_ignorecase); if (ignore_case || config_ignorecase) { diff --git a/t/t7425-submodule-gitdir-path-extension.sh b/t/t7425-submodule-gitdir-path-extension.sh index a76e64a9f7d7d3..ea86ecf7ee5b48 100755 --- a/t/t7425-submodule-gitdir-path-extension.sh +++ b/t/t7425-submodule-gitdir-path-extension.sh @@ -497,4 +497,32 @@ test_expect_success CASE_INSENSITIVE_FS 'verify hashing conflict resolution as a verify_submodule_gitdir_path cloned-hash "Foo" "modules/${hash}" ' +test_expect_success 'submodule gitdir conflicts with previously encoded name (local config)' ' + git init -b main super_with_encoded && + ( + cd super_with_encoded && + + git config core.repositoryformatversion 1 && + git config extensions.submodulePathConfig true && + + # Add a submodule with a nested path + git submodule add --name "nested/sub" ../sub nested/sub && + test_commit add-encoded-gitdir && + + verify_submodule_gitdir_path . "nested/sub" "modules/nested%2fsub" && + test_path_is_dir ".git/modules/nested%2fsub" + ) && + + # create a submodule that will conflict with the encoded gitdir name: + # the existing gitdir is ".git/modules/nested%2fsub", which is used + # by "nested/sub", so the new submod will get another (non-conflicting) + # name: "nested%252fsub". + ( + cd super_with_encoded && + git submodule add ../sub "nested%2fsub" && + verify_submodule_gitdir_path . "nested%2fsub" "modules/nested%252fsub" && + test_path_is_dir ".git/modules/nested%252fsub" + ) +' + test_done From 1954b943227f2455d97e3b35ce106c203471b0ba Mon Sep 17 00:00:00 2001 From: Deveshi Dwivedi Date: Mon, 12 Jan 2026 16:36:42 +0000 Subject: [PATCH 015/784] t5403: introduce check_post_checkout helper function The test file repeatedly uses the same four-line pattern to validate post-checkout hook arguments: read the args file, then test each of the three values individually. Introduce a check_post_checkout helper function that encapsulates this pattern. This patch does not change test behavior; it prepares the code for improvement in the next step. Additionally, the 'post-checkout hook is triggered by clone' test is improved to validate the hook arguments (old ref, new ref, and flag) rather than just checking that the hook file was created. Signed-off-by: Deveshi Dwivedi Signed-off-by: Junio C Hamano --- t/t5403-post-checkout-hook.sh | 49 ++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/t/t5403-post-checkout-hook.sh b/t/t5403-post-checkout-hook.sh index ade9e5087f9f30..d9de4e75297af9 100755 --- a/t/t5403-post-checkout-hook.sh +++ b/t/t5403-post-checkout-hook.sh @@ -10,6 +10,17 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh +# Usage: check_post_checkout +# +# Verify that the post-checkout hook arguments in match the expected +# values: for the previous HEAD, for the new HEAD, and +# indicating whether this was a branch checkout (1) or file checkout (0). +check_post_checkout () { + test "$#" = 4 || BUG "check_post_checkout takes 4 args" + read old new flag <"$1" && + test "$old" = "$2" && test "$new" = "$3" && test "$flag" = "$4" +} + test_expect_success setup ' test_hook --setup post-checkout <<-\EOF && echo "$@" >.git/post-checkout.args @@ -24,29 +35,30 @@ test_expect_success setup ' test_expect_success 'post-checkout receives the right arguments with HEAD unchanged ' ' test_when_finished "rm -f .git/post-checkout.args" && git checkout main && - read old new flag <.git/post-checkout.args && - test $old = $new && test $flag = 1 + check_post_checkout .git/post-checkout.args \ + "$(git rev-parse HEAD)" "$(git rev-parse HEAD)" 1 ' test_expect_success 'post-checkout args are correct with git checkout -b ' ' test_when_finished "rm -f .git/post-checkout.args" && git checkout -b new1 && - read old new flag <.git/post-checkout.args && - test $old = $new && test $flag = 1 + check_post_checkout .git/post-checkout.args \ + "$(git rev-parse HEAD)" "$(git rev-parse HEAD)" 1 ' test_expect_success 'post-checkout receives the right args with HEAD changed ' ' test_when_finished "rm -f .git/post-checkout.args" && + old=$(git rev-parse HEAD) && git checkout two && - read old new flag <.git/post-checkout.args && - test $old != $new && test $flag = 1 + check_post_checkout .git/post-checkout.args \ + "$old" "$(git rev-parse HEAD)" 1 ' test_expect_success 'post-checkout receives the right args when not switching branches ' ' test_when_finished "rm -f .git/post-checkout.args" && git checkout main -- three.t && - read old new flag <.git/post-checkout.args && - test $old = $new && test $flag = 0 + check_post_checkout .git/post-checkout.args \ + "$(git rev-parse HEAD)" "$(git rev-parse HEAD)" 0 ' test_rebase () { @@ -56,10 +68,8 @@ test_rebase () { git checkout -B rebase-test main && rm -f .git/post-checkout.args && git rebase $args rebase-on-me && - read old new flag <.git/post-checkout.args && - test_cmp_rev main $old && - test_cmp_rev rebase-on-me $new && - test $flag = 1 + check_post_checkout .git/post-checkout.args \ + "$(git rev-parse main)" "$(git rev-parse rebase-on-me)" 1 ' test_expect_success "post-checkout is triggered on rebase $args with fast-forward" ' @@ -67,10 +77,8 @@ test_rebase () { git checkout -B ff-rebase-test rebase-on-me^ && rm -f .git/post-checkout.args && git rebase $args rebase-on-me && - read old new flag <.git/post-checkout.args && - test_cmp_rev rebase-on-me^ $old && - test_cmp_rev rebase-on-me $new && - test $flag = 1 + check_post_checkout .git/post-checkout.args \ + "$(git rev-parse rebase-on-me^)" "$(git rev-parse rebase-on-me)" 1 ' test_expect_success "rebase $args fast-forward branch checkout runs post-checkout hook" ' @@ -80,10 +88,8 @@ test_rebase () { git checkout two && rm -f .git/post-checkout.args && git rebase $args HEAD rebase-fast-forward && - read old new flag <.git/post-checkout.args && - test_cmp_rev two $old && - test_cmp_rev three $new && - test $flag = 1 + check_post_checkout .git/post-checkout.args \ + "$(git rev-parse two)" "$(git rev-parse three)" 1 ' test_expect_success "rebase $args checkout does not remove untracked files" ' @@ -110,7 +116,8 @@ test_expect_success 'post-checkout hook is triggered by clone' ' echo "$@" >"$GIT_DIR/post-checkout.args" EOF git clone --template=templates . clone3 && - test_path_is_file clone3/.git/post-checkout.args + check_post_checkout clone3/.git/post-checkout.args \ + "$(test_oid zero)" "$(git -C clone3 rev-parse HEAD)" 1 ' test_done From 7a747f972d73d9419603d8127514cf188ed7a9ab Mon Sep 17 00:00:00 2001 From: Deveshi Dwivedi Date: Mon, 12 Jan 2026 16:36:43 +0000 Subject: [PATCH 016/784] t5403: use test_cmp for post-checkout argument checks Update check_post_checkout and the post-checkout hook implementation to use test_cmp instead of individual test commands. This provides better error messages when tests fail, making it easier to debug which specific argument (old ref, new ref, or flag) was incorrect. The hook now outputs in key=value format which test_cmp can display clearly when there's a mismatch. Signed-off-by: Deveshi Dwivedi Signed-off-by: Junio C Hamano --- t/t5403-post-checkout-hook.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/t/t5403-post-checkout-hook.sh b/t/t5403-post-checkout-hook.sh index d9de4e75297af9..53d97df070ffc1 100755 --- a/t/t5403-post-checkout-hook.sh +++ b/t/t5403-post-checkout-hook.sh @@ -17,13 +17,13 @@ TEST_PASSES_SANITIZE_LEAK=true # indicating whether this was a branch checkout (1) or file checkout (0). check_post_checkout () { test "$#" = 4 || BUG "check_post_checkout takes 4 args" - read old new flag <"$1" && - test "$old" = "$2" && test "$new" = "$3" && test "$flag" = "$4" + echo "old=$2 new=$3 flag=$4" >expect && + test_cmp expect "$1" } test_expect_success setup ' test_hook --setup post-checkout <<-\EOF && - echo "$@" >.git/post-checkout.args + echo "old=$1 new=$2 flag=$3" >.git/post-checkout.args EOF test_commit one && test_commit two && @@ -113,7 +113,7 @@ test_rebase --merge test_expect_success 'post-checkout hook is triggered by clone' ' mkdir -p templates/hooks && write_script templates/hooks/post-checkout <<-\EOF && - echo "$@" >"$GIT_DIR/post-checkout.args" + echo "old=$1 new=$2 flag=$3" >"$GIT_DIR/post-checkout.args" EOF git clone --template=templates . clone3 && check_post_checkout clone3/.git/post-checkout.args \ From 81021871eaa8b16a892b9c8791a0c905ab26e342 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Tue, 13 Jan 2026 01:23:43 +0530 Subject: [PATCH 017/784] doc: MyFirstContribution: fix missing dependencies and clarify build steps Fix issues in the MyFirstContribution guide that can lead to confusion or test failures when following the documented steps. * Add missing header includes in code examples (environment.h and strbuf.h). * Correct manpage synopsis formatting to prevent failing documentation tests. * Specify the use of parallel test execution with -j$(nproc), noting that it runs tests using all available CPUs and may be adjusted. These updates improve documentation accuracy and make the first-time contributor journey smoother. Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- Documentation/MyFirstContribution.adoc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/MyFirstContribution.adoc b/Documentation/MyFirstContribution.adoc index f186dfbc898fd4..7306edab0ff58e 100644 --- a/Documentation/MyFirstContribution.adoc +++ b/Documentation/MyFirstContribution.adoc @@ -331,7 +331,8 @@ on the command line, including the name of our command. (If `prefix` is empty for you, try `cd Documentation/ && ../bin-wrappers/git psuh`). That's not so helpful. So what other context can we get? -Add a line to `#include "config.h"` and `#include "repository.h"`. +Add a line to `#include "config.h"`, `#include "repository.h"` and +`#include "environment.h"`. Then, add the following bits to the function body: function body: @@ -429,6 +430,7 @@ Add the following includes: ---- #include "commit.h" #include "pretty.h" +#include "strbuf.h" ---- Then, add the following lines within your implementation of `cmd_psuh()` near @@ -503,8 +505,8 @@ git-psuh - Delight users' typo with a shy horse SYNOPSIS -------- -[verse] -'git-psuh [...]' +[synopsis] +git psuh [...] DESCRIPTION ----------- @@ -726,9 +728,10 @@ $ prove -j$(nproc) --shuffle t[0-9]*.sh ---- NOTE: You can also do this with `make test` or use any testing harness which can -speak TAP. `prove` can run concurrently. `shuffle` randomizes the order the -tests are run in, which makes them resilient against unwanted inter-test -dependencies. `prove` also makes the output nicer. +speak TAP. `prove` can run concurrently. `-j$(nproc)` runs tests using all +available CPUs in parallel, but the job count can be adjusted as needed. +`shuffle` randomizes the order the tests are run in, which makes them resilient +against unwanted inter-test dependencies. `prove` also makes the output nicer. Go ahead and commit this change, as well. From 1454743eb84eea1f7e4ea23e0556ed5ef9888894 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:32 +0100 Subject: [PATCH 018/784] builtin/replay: extract core logic to replay revisions We're about to move the core logic used to replay revisions onto a new base into the "libgit.a" library. Prepare for this by pulling out the logic into a new function `replay_revisions()` that: 1. Takes a set of revisions to replay and some options that tell it how it ought to replay the revisions. 2. Replays the commits. 3. Records any reference updates that would be caused by replaying the commits in a structure that is owned by the caller. The logic itself will be moved into a separate file in the next commit. This change is not expected to cause user-visible change in behaviour. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/replay.c | 269 ++++++++++++++++++++++++++++------------------- 1 file changed, 162 insertions(+), 107 deletions(-) diff --git a/builtin/replay.c b/builtin/replay.c index 1960bbbee8685d..daf907668c109a 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -177,8 +177,9 @@ static void set_up_replay_mode(struct repository *repo, if (!rinfo.positive_refexprs) die(_("need some commits to replay")); - die_for_incompatible_opt2(!!onto_name, "--onto", - !!*advance_name, "--advance"); + if (!onto_name == !*advance_name) + BUG("one and only one of onto_name and *advance_name must be given"); + if (onto_name) { *onto = peel_committish(repo, onto_name, "--onto"); if (rinfo.positive_refexprs < @@ -253,6 +254,134 @@ static struct commit *pick_regular_commit(struct repository *repo, return create_commit(repo, result->tree, pickme, replayed_base); } +struct replay_revisions_options { + const char *advance; + const char *onto; + int contained; +}; + +struct replay_result { + struct replay_ref_update { + char *refname; + struct object_id old_oid; + struct object_id new_oid; + } *updates; + size_t updates_nr, updates_alloc; +}; + +static void replay_result_release(struct replay_result *result) +{ + for (size_t i = 0; i < result->updates_nr; i++) + free(result->updates[i].refname); + free(result->updates); +} + +static void replay_result_queue_update(struct replay_result *result, + const char *refname, + const struct object_id *old_oid, + const struct object_id *new_oid) +{ + ALLOC_GROW(result->updates, result->updates_nr + 1, result->updates_alloc); + result->updates[result->updates_nr].refname = xstrdup(refname); + result->updates[result->updates_nr].old_oid = *old_oid; + result->updates[result->updates_nr].new_oid = *new_oid; + result->updates_nr++; +} + +static int replay_revisions(struct rev_info *revs, + struct replay_revisions_options *opts, + struct replay_result *out) +{ + kh_oid_map_t *replayed_commits = NULL; + struct strset *update_refs = NULL; + struct commit *last_commit = NULL; + struct commit *commit; + struct commit *onto = NULL; + struct merge_options merge_opt; + struct merge_result result; + char *advance; + int ret; + + advance = xstrdup_or_null(opts->advance); + set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, &advance, + &onto, &update_refs); + + /* FIXME: Should allow replaying commits with the first as a root commit */ + + if (prepare_revision_walk(revs) < 0) { + ret = error(_("error preparing revisions")); + goto out; + } + + init_basic_merge_options(&merge_opt, revs->repo); + memset(&result, 0, sizeof(result)); + merge_opt.show_rename_progress = 0; + last_commit = onto; + replayed_commits = kh_init_oid_map(); + while ((commit = get_revision(revs))) { + const struct name_decoration *decoration; + khint_t pos; + int hr; + + if (!commit->parents) + die(_("replaying down from root commit is not supported yet!")); + if (commit->parents->next) + die(_("replaying merge commits is not supported yet!")); + + last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, + onto, &merge_opt, &result); + if (!last_commit) + break; + + /* Record commit -> last_commit mapping */ + pos = kh_put_oid_map(replayed_commits, commit->object.oid, &hr); + if (hr == 0) + BUG("Duplicate rewritten commit: %s\n", + oid_to_hex(&commit->object.oid)); + kh_value(replayed_commits, pos) = last_commit; + + /* Update any necessary branches */ + if (advance) + continue; + decoration = get_name_decoration(&commit->object); + if (!decoration) + continue; + while (decoration) { + if (decoration->type == DECORATION_REF_LOCAL && + (opts->contained || strset_contains(update_refs, + decoration->name))) { + replay_result_queue_update(out, decoration->name, + &commit->object.oid, + &last_commit->object.oid); + } + decoration = decoration->next; + } + } + + if (!result.clean) { + ret = 1; + goto out; + } + + /* In --advance mode, advance the target ref */ + if (advance) + replay_result_queue_update(out, advance, + &onto->object.oid, + &last_commit->object.oid); + + ret = 0; + +out: + if (update_refs) { + strset_clear(update_refs); + free(update_refs); + } + kh_destroy_oid_map(replayed_commits); + merge_finalize(&merge_opt, &result); + free(advance); + return ret; +} + static enum ref_action_mode parse_ref_action_mode(const char *ref_action, const char *source) { if (!ref_action || !strcmp(ref_action, "update")) @@ -306,21 +435,11 @@ int cmd_replay(int argc, const char *prefix, struct repository *repo) { - const char *advance_name_opt = NULL; - char *advance_name = NULL; - struct commit *onto = NULL; - const char *onto_name = NULL; - int contained = 0; + struct replay_revisions_options opts = { 0 }; + struct replay_result result = { 0 }; const char *ref_action = NULL; enum ref_action_mode ref_mode; - struct rev_info revs; - struct commit *last_commit = NULL; - struct commit *commit; - struct merge_options merge_opt; - struct merge_result result; - struct strset *update_refs = NULL; - kh_oid_map_t *replayed_commits; struct ref_transaction *transaction = NULL; struct strbuf transaction_err = STRBUF_INIT; struct strbuf reflog_msg = STRBUF_INIT; @@ -333,13 +452,13 @@ int cmd_replay(int argc, NULL }; struct option replay_options[] = { - OPT_STRING(0, "advance", &advance_name_opt, + OPT_STRING(0, "advance", &opts.advance, N_("branch"), N_("make replay advance given branch")), - OPT_STRING(0, "onto", &onto_name, + OPT_STRING(0, "onto", &opts.onto, N_("revision"), N_("replay onto given commit")), - OPT_BOOL(0, "contained", &contained, + OPT_BOOL(0, "contained", &opts.contained, N_("update all branches that point at commits in ")), OPT_STRING(0, "ref-action", &ref_action, N_("mode"), @@ -350,19 +469,19 @@ int cmd_replay(int argc, argc = parse_options(argc, argv, prefix, replay_options, replay_usage, PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); - if (!onto_name && !advance_name_opt) { + if (!opts.onto && !opts.advance) { error(_("option --onto or --advance is mandatory")); usage_with_options(replay_usage, replay_options); } - die_for_incompatible_opt2(!!advance_name_opt, "--advance", - contained, "--contained"); + die_for_incompatible_opt2(!!opts.advance, "--advance", + opts.contained, "--contained"); + die_for_incompatible_opt2(!!opts.advance, "--advance", + !!opts.onto, "--onto"); /* Parse ref action mode from command line or config */ ref_mode = get_ref_action_mode(repo, ref_action); - advance_name = xstrdup_or_null(advance_name_opt); - repo_init_revisions(repo, &revs, prefix); /* @@ -414,18 +533,19 @@ int cmd_replay(int argc, revs.simplify_history = 0; } - set_up_replay_mode(repo, &revs.cmdline, - onto_name, &advance_name, - &onto, &update_refs); - - /* FIXME: Should allow replaying commits with the first as a root commit */ + ret = replay_revisions(&revs, &opts, &result); + if (ret) + goto cleanup; /* Build reflog message */ - if (advance_name_opt) - strbuf_addf(&reflog_msg, "replay --advance %s", advance_name_opt); - else - strbuf_addf(&reflog_msg, "replay --onto %s", - oid_to_hex(&onto->object.oid)); + if (opts.advance) { + strbuf_addf(&reflog_msg, "replay --advance %s", opts.advance); + } else { + struct object_id oid; + if (repo_get_oid_committish(repo, opts.onto, &oid)) + BUG("--onto commit should have been resolved beforehand already"); + strbuf_addf(&reflog_msg, "replay --onto %s", oid_to_hex(&oid)); + } /* Initialize ref transaction if using update mode */ if (ref_mode == REF_ACTION_UPDATE) { @@ -438,78 +558,19 @@ int cmd_replay(int argc, } } - if (prepare_revision_walk(&revs) < 0) { - ret = error(_("error preparing revisions")); - goto cleanup; - } - - init_basic_merge_options(&merge_opt, repo); - memset(&result, 0, sizeof(result)); - merge_opt.show_rename_progress = 0; - last_commit = onto; - replayed_commits = kh_init_oid_map(); - while ((commit = get_revision(&revs))) { - const struct name_decoration *decoration; - khint_t pos; - int hr; - - if (!commit->parents) - die(_("replaying down from root commit is not supported yet!")); - if (commit->parents->next) - die(_("replaying merge commits is not supported yet!")); - - last_commit = pick_regular_commit(repo, commit, replayed_commits, - onto, &merge_opt, &result); - if (!last_commit) - break; - - /* Record commit -> last_commit mapping */ - pos = kh_put_oid_map(replayed_commits, commit->object.oid, &hr); - if (hr == 0) - BUG("Duplicate rewritten commit: %s\n", - oid_to_hex(&commit->object.oid)); - kh_value(replayed_commits, pos) = last_commit; - - /* Update any necessary branches */ - if (advance_name) - continue; - decoration = get_name_decoration(&commit->object); - if (!decoration) - continue; - while (decoration) { - if (decoration->type == DECORATION_REF_LOCAL && - (contained || strset_contains(update_refs, - decoration->name))) { - if (handle_ref_update(ref_mode, transaction, - decoration->name, - &last_commit->object.oid, - &commit->object.oid, - reflog_msg.buf, - &transaction_err) < 0) { - ret = error(_("failed to update ref '%s': %s"), - decoration->name, transaction_err.buf); - goto cleanup; - } - } - decoration = decoration->next; - } - } - - /* In --advance mode, advance the target ref */ - if (result.clean == 1 && advance_name) { - if (handle_ref_update(ref_mode, transaction, advance_name, - &last_commit->object.oid, - &onto->object.oid, - reflog_msg.buf, - &transaction_err) < 0) { + for (size_t i = 0; i < result.updates_nr; i++) { + ret = handle_ref_update(ref_mode, transaction, result.updates[i].refname, + &result.updates[i].new_oid, &result.updates[i].old_oid, + reflog_msg.buf, &transaction_err); + if (ret) { ret = error(_("failed to update ref '%s': %s"), - advance_name, transaction_err.buf); + result.updates[i].refname, transaction_err.buf); goto cleanup; } } /* Commit the ref transaction if we have one */ - if (transaction && result.clean == 1) { + if (transaction) { if (ref_transaction_commit(transaction, &transaction_err)) { ret = error(_("failed to commit ref transaction: %s"), transaction_err.buf); @@ -517,24 +578,18 @@ int cmd_replay(int argc, } } - merge_finalize(&merge_opt, &result); - kh_destroy_oid_map(replayed_commits); - if (update_refs) { - strset_clear(update_refs); - free(update_refs); - } - ret = result.clean; + ret = 0; cleanup: if (transaction) ref_transaction_free(transaction); + replay_result_release(&result); strbuf_release(&transaction_err); strbuf_release(&reflog_msg); release_revisions(&revs); - free(advance_name); /* Return */ if (ret < 0) exit(128); - return ret ? 0 : 1; + return ret; } From 6aeda3cf5b6d5c38e9b51f1f39133e23d0981d55 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:33 +0100 Subject: [PATCH 019/784] builtin/replay: move core logic into "libgit.a" Move the core logic used to replay commits into "libgit.a" so that it can be easily reused by other commands. It will be used in a subsequent commit where we're about to introduce a new git-history(1) command. Note that with this change we have no sign-comparison warnings anymore, and neither do we depend on `the_repository`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/replay.c | 366 +---------------------------------------------- meson.build | 1 + replay.c | 354 +++++++++++++++++++++++++++++++++++++++++++++ replay.h | 61 ++++++++ 5 files changed, 418 insertions(+), 365 deletions(-) create mode 100644 replay.c create mode 100644 replay.h diff --git a/Makefile b/Makefile index b7eba509c6a0ca..1c64a5d2aea3e3 100644 --- a/Makefile +++ b/Makefile @@ -1285,6 +1285,7 @@ LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o +LIB_OBJS += replay.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o LIB_OBJS += rerere.o diff --git a/builtin/replay.c b/builtin/replay.c index daf907668c109a..2cdde830a8a0f4 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -2,386 +2,22 @@ * "git replay" builtin command */ -#define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS - #include "git-compat-util.h" #include "builtin.h" #include "config.h" -#include "environment.h" #include "hex.h" -#include "lockfile.h" -#include "merge-ort.h" #include "object-name.h" #include "parse-options.h" #include "refs.h" +#include "replay.h" #include "revision.h" -#include "strmap.h" -#include -#include enum ref_action_mode { REF_ACTION_UPDATE, REF_ACTION_PRINT, }; -static const char *short_commit_name(struct repository *repo, - struct commit *commit) -{ - return repo_find_unique_abbrev(repo, &commit->object.oid, - DEFAULT_ABBREV); -} - -static struct commit *peel_committish(struct repository *repo, - const char *name, - const char *mode) -{ - struct object *obj; - struct object_id oid; - - if (repo_get_oid(repo, name, &oid)) - die(_("'%s' is not a valid commit-ish for %s"), name, mode); - obj = parse_object_or_die(repo, &oid, name); - return (struct commit *)repo_peel_to_type(repo, name, 0, obj, - OBJ_COMMIT); -} - -static char *get_author(const char *message) -{ - size_t len; - const char *a; - - a = find_commit_header(message, "author", &len); - if (a) - return xmemdupz(a, len); - - return NULL; -} - -static struct commit *create_commit(struct repository *repo, - struct tree *tree, - struct commit *based_on, - struct commit *parent) -{ - struct object_id ret; - struct object *obj = NULL; - struct commit_list *parents = NULL; - char *author; - char *sign_commit = NULL; /* FIXME: cli users might want to sign again */ - struct commit_extra_header *extra = NULL; - struct strbuf msg = STRBUF_INIT; - const char *out_enc = get_commit_output_encoding(); - const char *message = repo_logmsg_reencode(repo, based_on, - NULL, out_enc); - const char *orig_message = NULL; - const char *exclude_gpgsig[] = { "gpgsig", "gpgsig-sha256", NULL }; - - commit_list_insert(parent, &parents); - extra = read_commit_extra_headers(based_on, exclude_gpgsig); - find_commit_subject(message, &orig_message); - strbuf_addstr(&msg, orig_message); - author = get_author(message); - reset_ident_date(); - if (commit_tree_extended(msg.buf, msg.len, &tree->object.oid, parents, - &ret, author, NULL, sign_commit, extra)) { - error(_("failed to write commit object")); - goto out; - } - - obj = parse_object(repo, &ret); - -out: - repo_unuse_commit_buffer(the_repository, based_on, message); - free_commit_extra_headers(extra); - free_commit_list(parents); - strbuf_release(&msg); - free(author); - return (struct commit *)obj; -} - -struct ref_info { - struct commit *onto; - struct strset positive_refs; - struct strset negative_refs; - int positive_refexprs; - int negative_refexprs; -}; - -static void get_ref_information(struct repository *repo, - struct rev_cmdline_info *cmd_info, - struct ref_info *ref_info) -{ - int i; - - ref_info->onto = NULL; - strset_init(&ref_info->positive_refs); - strset_init(&ref_info->negative_refs); - ref_info->positive_refexprs = 0; - ref_info->negative_refexprs = 0; - - /* - * When the user specifies e.g. - * git replay origin/main..mybranch - * git replay ^origin/next mybranch1 mybranch2 - * we want to be able to determine where to replay the commits. In - * these examples, the branches are probably based on an old version - * of either origin/main or origin/next, so we want to replay on the - * newest version of that branch. In contrast we would want to error - * out if they ran - * git replay ^origin/master ^origin/next mybranch - * git replay mybranch~2..mybranch - * the first of those because there's no unique base to choose, and - * the second because they'd likely just be replaying commits on top - * of the same commit and not making any difference. - */ - for (i = 0; i < cmd_info->nr; i++) { - struct rev_cmdline_entry *e = cmd_info->rev + i; - struct object_id oid; - const char *refexpr = e->name; - char *fullname = NULL; - int can_uniquely_dwim = 1; - - if (*refexpr == '^') - refexpr++; - if (repo_dwim_ref(repo, refexpr, strlen(refexpr), &oid, &fullname, 0) != 1) - can_uniquely_dwim = 0; - - if (e->flags & BOTTOM) { - if (can_uniquely_dwim) - strset_add(&ref_info->negative_refs, fullname); - if (!ref_info->negative_refexprs) - ref_info->onto = lookup_commit_reference_gently(repo, - &e->item->oid, 1); - ref_info->negative_refexprs++; - } else { - if (can_uniquely_dwim) - strset_add(&ref_info->positive_refs, fullname); - ref_info->positive_refexprs++; - } - - free(fullname); - } -} - -static void set_up_replay_mode(struct repository *repo, - struct rev_cmdline_info *cmd_info, - const char *onto_name, - char **advance_name, - struct commit **onto, - struct strset **update_refs) -{ - struct ref_info rinfo; - - get_ref_information(repo, cmd_info, &rinfo); - if (!rinfo.positive_refexprs) - die(_("need some commits to replay")); - - if (!onto_name == !*advance_name) - BUG("one and only one of onto_name and *advance_name must be given"); - - if (onto_name) { - *onto = peel_committish(repo, onto_name, "--onto"); - if (rinfo.positive_refexprs < - strset_get_size(&rinfo.positive_refs)) - die(_("all positive revisions given must be references")); - *update_refs = xcalloc(1, sizeof(**update_refs)); - **update_refs = rinfo.positive_refs; - memset(&rinfo.positive_refs, 0, sizeof(**update_refs)); - } else { - struct object_id oid; - char *fullname = NULL; - - if (!*advance_name) - BUG("expected either onto_name or *advance_name in this function"); - - if (repo_dwim_ref(repo, *advance_name, strlen(*advance_name), - &oid, &fullname, 0) == 1) { - free(*advance_name); - *advance_name = fullname; - } else { - die(_("argument to --advance must be a reference")); - } - *onto = peel_committish(repo, *advance_name, "--advance"); - if (rinfo.positive_refexprs > 1) - die(_("cannot advance target with multiple sources because ordering would be ill-defined")); - } - strset_clear(&rinfo.negative_refs); - strset_clear(&rinfo.positive_refs); -} - -static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, - struct commit *commit, - struct commit *fallback) -{ - khint_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); - if (pos == kh_end(replayed_commits)) - return fallback; - return kh_value(replayed_commits, pos); -} - -static struct commit *pick_regular_commit(struct repository *repo, - struct commit *pickme, - kh_oid_map_t *replayed_commits, - struct commit *onto, - struct merge_options *merge_opt, - struct merge_result *result) -{ - struct commit *base, *replayed_base; - struct tree *pickme_tree, *base_tree; - - base = pickme->parents->item; - replayed_base = mapped_commit(replayed_commits, base, onto); - - result->tree = repo_get_commit_tree(repo, replayed_base); - pickme_tree = repo_get_commit_tree(repo, pickme); - base_tree = repo_get_commit_tree(repo, base); - - merge_opt->branch1 = short_commit_name(repo, replayed_base); - merge_opt->branch2 = short_commit_name(repo, pickme); - merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); - - merge_incore_nonrecursive(merge_opt, - base_tree, - result->tree, - pickme_tree, - result); - - free((char*)merge_opt->ancestor); - merge_opt->ancestor = NULL; - if (!result->clean) - return NULL; - return create_commit(repo, result->tree, pickme, replayed_base); -} - -struct replay_revisions_options { - const char *advance; - const char *onto; - int contained; -}; - -struct replay_result { - struct replay_ref_update { - char *refname; - struct object_id old_oid; - struct object_id new_oid; - } *updates; - size_t updates_nr, updates_alloc; -}; - -static void replay_result_release(struct replay_result *result) -{ - for (size_t i = 0; i < result->updates_nr; i++) - free(result->updates[i].refname); - free(result->updates); -} - -static void replay_result_queue_update(struct replay_result *result, - const char *refname, - const struct object_id *old_oid, - const struct object_id *new_oid) -{ - ALLOC_GROW(result->updates, result->updates_nr + 1, result->updates_alloc); - result->updates[result->updates_nr].refname = xstrdup(refname); - result->updates[result->updates_nr].old_oid = *old_oid; - result->updates[result->updates_nr].new_oid = *new_oid; - result->updates_nr++; -} - -static int replay_revisions(struct rev_info *revs, - struct replay_revisions_options *opts, - struct replay_result *out) -{ - kh_oid_map_t *replayed_commits = NULL; - struct strset *update_refs = NULL; - struct commit *last_commit = NULL; - struct commit *commit; - struct commit *onto = NULL; - struct merge_options merge_opt; - struct merge_result result; - char *advance; - int ret; - - advance = xstrdup_or_null(opts->advance); - set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, &advance, - &onto, &update_refs); - - /* FIXME: Should allow replaying commits with the first as a root commit */ - - if (prepare_revision_walk(revs) < 0) { - ret = error(_("error preparing revisions")); - goto out; - } - - init_basic_merge_options(&merge_opt, revs->repo); - memset(&result, 0, sizeof(result)); - merge_opt.show_rename_progress = 0; - last_commit = onto; - replayed_commits = kh_init_oid_map(); - while ((commit = get_revision(revs))) { - const struct name_decoration *decoration; - khint_t pos; - int hr; - - if (!commit->parents) - die(_("replaying down from root commit is not supported yet!")); - if (commit->parents->next) - die(_("replaying merge commits is not supported yet!")); - - last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, - onto, &merge_opt, &result); - if (!last_commit) - break; - - /* Record commit -> last_commit mapping */ - pos = kh_put_oid_map(replayed_commits, commit->object.oid, &hr); - if (hr == 0) - BUG("Duplicate rewritten commit: %s\n", - oid_to_hex(&commit->object.oid)); - kh_value(replayed_commits, pos) = last_commit; - - /* Update any necessary branches */ - if (advance) - continue; - decoration = get_name_decoration(&commit->object); - if (!decoration) - continue; - while (decoration) { - if (decoration->type == DECORATION_REF_LOCAL && - (opts->contained || strset_contains(update_refs, - decoration->name))) { - replay_result_queue_update(out, decoration->name, - &commit->object.oid, - &last_commit->object.oid); - } - decoration = decoration->next; - } - } - - if (!result.clean) { - ret = 1; - goto out; - } - - /* In --advance mode, advance the target ref */ - if (advance) - replay_result_queue_update(out, advance, - &onto->object.oid, - &last_commit->object.oid); - - ret = 0; - -out: - if (update_refs) { - strset_clear(update_refs); - free(update_refs); - } - kh_destroy_oid_map(replayed_commits); - merge_finalize(&merge_opt, &result); - free(advance); - return ret; -} - static enum ref_action_mode parse_ref_action_mode(const char *ref_action, const char *source) { if (!ref_action || !strcmp(ref_action, "update")) diff --git a/meson.build b/meson.build index dd52efd1c87574..a5a4e99b259cf9 100644 --- a/meson.build +++ b/meson.build @@ -471,6 +471,7 @@ libgit_sources = [ 'repack-midx.c', 'repack-promisor.c', 'replace-object.c', + 'replay.c', 'repo-settings.c', 'repository.c', 'rerere.c', diff --git a/replay.c b/replay.c new file mode 100644 index 00000000000000..c580a9caa2a43e --- /dev/null +++ b/replay.c @@ -0,0 +1,354 @@ +#define USE_THE_REPOSITORY_VARIABLE +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "environment.h" +#include "hex.h" +#include "merge-ort.h" +#include "object-name.h" +#include "refs.h" +#include "replay.h" +#include "revision.h" +#include "strmap.h" +#include "tree.h" + +static const char *short_commit_name(struct repository *repo, + struct commit *commit) +{ + return repo_find_unique_abbrev(repo, &commit->object.oid, + DEFAULT_ABBREV); +} + +static struct commit *peel_committish(struct repository *repo, + const char *name, + const char *mode) +{ + struct object *obj; + struct object_id oid; + + if (repo_get_oid(repo, name, &oid)) + die(_("'%s' is not a valid commit-ish for %s"), name, mode); + obj = parse_object_or_die(repo, &oid, name); + return (struct commit *)repo_peel_to_type(repo, name, 0, obj, + OBJ_COMMIT); +} + +static char *get_author(const char *message) +{ + size_t len; + const char *a; + + a = find_commit_header(message, "author", &len); + if (a) + return xmemdupz(a, len); + + return NULL; +} + +static struct commit *create_commit(struct repository *repo, + struct tree *tree, + struct commit *based_on, + struct commit *parent) +{ + struct object_id ret; + struct object *obj = NULL; + struct commit_list *parents = NULL; + char *author; + char *sign_commit = NULL; /* FIXME: cli users might want to sign again */ + struct commit_extra_header *extra = NULL; + struct strbuf msg = STRBUF_INIT; + const char *out_enc = get_commit_output_encoding(); + const char *message = repo_logmsg_reencode(repo, based_on, + NULL, out_enc); + const char *orig_message = NULL; + const char *exclude_gpgsig[] = { "gpgsig", "gpgsig-sha256", NULL }; + + commit_list_insert(parent, &parents); + extra = read_commit_extra_headers(based_on, exclude_gpgsig); + find_commit_subject(message, &orig_message); + strbuf_addstr(&msg, orig_message); + author = get_author(message); + reset_ident_date(); + if (commit_tree_extended(msg.buf, msg.len, &tree->object.oid, parents, + &ret, author, NULL, sign_commit, extra)) { + error(_("failed to write commit object")); + goto out; + } + + obj = parse_object(repo, &ret); + +out: + repo_unuse_commit_buffer(the_repository, based_on, message); + free_commit_extra_headers(extra); + free_commit_list(parents); + strbuf_release(&msg); + free(author); + return (struct commit *)obj; +} + +struct ref_info { + struct commit *onto; + struct strset positive_refs; + struct strset negative_refs; + int positive_refexprs; + int negative_refexprs; +}; + +static void get_ref_information(struct repository *repo, + struct rev_cmdline_info *cmd_info, + struct ref_info *ref_info) +{ + int i; + + ref_info->onto = NULL; + strset_init(&ref_info->positive_refs); + strset_init(&ref_info->negative_refs); + ref_info->positive_refexprs = 0; + ref_info->negative_refexprs = 0; + + /* + * When the user specifies e.g. + * git replay origin/main..mybranch + * git replay ^origin/next mybranch1 mybranch2 + * we want to be able to determine where to replay the commits. In + * these examples, the branches are probably based on an old version + * of either origin/main or origin/next, so we want to replay on the + * newest version of that branch. In contrast we would want to error + * out if they ran + * git replay ^origin/master ^origin/next mybranch + * git replay mybranch~2..mybranch + * the first of those because there's no unique base to choose, and + * the second because they'd likely just be replaying commits on top + * of the same commit and not making any difference. + */ + for (i = 0; i < cmd_info->nr; i++) { + struct rev_cmdline_entry *e = cmd_info->rev + i; + struct object_id oid; + const char *refexpr = e->name; + char *fullname = NULL; + int can_uniquely_dwim = 1; + + if (*refexpr == '^') + refexpr++; + if (repo_dwim_ref(repo, refexpr, strlen(refexpr), &oid, &fullname, 0) != 1) + can_uniquely_dwim = 0; + + if (e->flags & BOTTOM) { + if (can_uniquely_dwim) + strset_add(&ref_info->negative_refs, fullname); + if (!ref_info->negative_refexprs) + ref_info->onto = lookup_commit_reference_gently(repo, + &e->item->oid, 1); + ref_info->negative_refexprs++; + } else { + if (can_uniquely_dwim) + strset_add(&ref_info->positive_refs, fullname); + ref_info->positive_refexprs++; + } + + free(fullname); + } +} + +static void set_up_replay_mode(struct repository *repo, + struct rev_cmdline_info *cmd_info, + const char *onto_name, + char **advance_name, + struct commit **onto, + struct strset **update_refs) +{ + struct ref_info rinfo; + + get_ref_information(repo, cmd_info, &rinfo); + if (!rinfo.positive_refexprs) + die(_("need some commits to replay")); + + if (!onto_name == !*advance_name) + BUG("one and only one of onto_name and *advance_name must be given"); + + if (onto_name) { + *onto = peel_committish(repo, onto_name, "--onto"); + if (rinfo.positive_refexprs < + strset_get_size(&rinfo.positive_refs)) + die(_("all positive revisions given must be references")); + *update_refs = xcalloc(1, sizeof(**update_refs)); + **update_refs = rinfo.positive_refs; + memset(&rinfo.positive_refs, 0, sizeof(**update_refs)); + } else { + struct object_id oid; + char *fullname = NULL; + + if (!*advance_name) + BUG("expected either onto_name or *advance_name in this function"); + + if (repo_dwim_ref(repo, *advance_name, strlen(*advance_name), + &oid, &fullname, 0) == 1) { + free(*advance_name); + *advance_name = fullname; + } else { + die(_("argument to --advance must be a reference")); + } + *onto = peel_committish(repo, *advance_name, "--advance"); + if (rinfo.positive_refexprs > 1) + die(_("cannot advance target with multiple sources because ordering would be ill-defined")); + } + strset_clear(&rinfo.negative_refs); + strset_clear(&rinfo.positive_refs); +} + +static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, + struct commit *commit, + struct commit *fallback) +{ + khint_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); + if (pos == kh_end(replayed_commits)) + return fallback; + return kh_value(replayed_commits, pos); +} + +static struct commit *pick_regular_commit(struct repository *repo, + struct commit *pickme, + kh_oid_map_t *replayed_commits, + struct commit *onto, + struct merge_options *merge_opt, + struct merge_result *result) +{ + struct commit *base, *replayed_base; + struct tree *pickme_tree, *base_tree; + + base = pickme->parents->item; + replayed_base = mapped_commit(replayed_commits, base, onto); + + result->tree = repo_get_commit_tree(repo, replayed_base); + pickme_tree = repo_get_commit_tree(repo, pickme); + base_tree = repo_get_commit_tree(repo, base); + + merge_opt->branch1 = short_commit_name(repo, replayed_base); + merge_opt->branch2 = short_commit_name(repo, pickme); + merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); + + merge_incore_nonrecursive(merge_opt, + base_tree, + result->tree, + pickme_tree, + result); + + free((char*)merge_opt->ancestor); + merge_opt->ancestor = NULL; + if (!result->clean) + return NULL; + return create_commit(repo, result->tree, pickme, replayed_base); +} + +void replay_result_release(struct replay_result *result) +{ + for (size_t i = 0; i < result->updates_nr; i++) + free(result->updates[i].refname); + free(result->updates); +} + +static void replay_result_queue_update(struct replay_result *result, + const char *refname, + const struct object_id *old_oid, + const struct object_id *new_oid) +{ + ALLOC_GROW(result->updates, result->updates_nr + 1, result->updates_alloc); + result->updates[result->updates_nr].refname = xstrdup(refname); + result->updates[result->updates_nr].old_oid = *old_oid; + result->updates[result->updates_nr].new_oid = *new_oid; + result->updates_nr++; +} + +int replay_revisions(struct rev_info *revs, + struct replay_revisions_options *opts, + struct replay_result *out) +{ + kh_oid_map_t *replayed_commits = NULL; + struct strset *update_refs = NULL; + struct commit *last_commit = NULL; + struct commit *commit; + struct commit *onto = NULL; + struct merge_options merge_opt; + struct merge_result result; + char *advance; + int ret; + + advance = xstrdup_or_null(opts->advance); + set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, &advance, + &onto, &update_refs); + + /* FIXME: Should allow replaying commits with the first as a root commit */ + + if (prepare_revision_walk(revs) < 0) { + ret = error(_("error preparing revisions")); + goto out; + } + + init_basic_merge_options(&merge_opt, revs->repo); + memset(&result, 0, sizeof(result)); + merge_opt.show_rename_progress = 0; + last_commit = onto; + replayed_commits = kh_init_oid_map(); + while ((commit = get_revision(revs))) { + const struct name_decoration *decoration; + khint_t pos; + int hr; + + if (!commit->parents) + die(_("replaying down from root commit is not supported yet!")); + if (commit->parents->next) + die(_("replaying merge commits is not supported yet!")); + + last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, + onto, &merge_opt, &result); + if (!last_commit) + break; + + /* Record commit -> last_commit mapping */ + pos = kh_put_oid_map(replayed_commits, commit->object.oid, &hr); + if (hr == 0) + BUG("Duplicate rewritten commit: %s\n", + oid_to_hex(&commit->object.oid)); + kh_value(replayed_commits, pos) = last_commit; + + /* Update any necessary branches */ + if (advance) + continue; + decoration = get_name_decoration(&commit->object); + if (!decoration) + continue; + while (decoration) { + if (decoration->type == DECORATION_REF_LOCAL && + (opts->contained || strset_contains(update_refs, + decoration->name))) { + replay_result_queue_update(out, decoration->name, + &commit->object.oid, + &last_commit->object.oid); + } + decoration = decoration->next; + } + } + + if (!result.clean) { + ret = 1; + goto out; + } + + /* In --advance mode, advance the target ref */ + if (advance) + replay_result_queue_update(out, advance, + &onto->object.oid, + &last_commit->object.oid); + + ret = 0; + +out: + if (update_refs) { + strset_clear(update_refs); + free(update_refs); + } + kh_destroy_oid_map(replayed_commits); + merge_finalize(&merge_opt, &result); + free(advance); + return ret; +} diff --git a/replay.h b/replay.h new file mode 100644 index 00000000000000..d8407dc7f710fc --- /dev/null +++ b/replay.h @@ -0,0 +1,61 @@ +#ifndef REPLAY_H +#define REPLAY_H + +#include "hash.h" + +struct repository; +struct rev_info; + +/* + * A set of options that can be passed to `replay_revisions()`. + */ +struct replay_revisions_options { + /* + * Starting point at which to create the new commits; must be a branch + * name. The branch will be updated to point to the rewritten commits. + * This option is mutually exclusive with `onto`. + */ + const char *advance; + + /* + * Starting point at which to create the new commits; must be a + * committish. References pointing at decendants of `onto` will be + * updated to point to the new commits. + */ + const char *onto; + + /* + * Update branches that point at commits in the given revision range. + * Requires `onto` to be set. + */ + int contained; +}; + +/* This struct is used as an out-parameter by `replay_revisions()`. */ +struct replay_result { + /* + * The set of reference updates that are caused by replaying the + * commits. + */ + struct replay_ref_update { + char *refname; + struct object_id old_oid; + struct object_id new_oid; + } *updates; + size_t updates_nr, updates_alloc; +}; + +void replay_result_release(struct replay_result *result); + +/* + * Replay a set of commits onto a new location. Leaves both the working tree, + * index and references untouched. Reference updates caused by the replay will + * be recorded in the `updates` out pointer. + * + * Returns 0 on success, 1 on conflict and a negative error code otherwise. + */ +int replay_revisions(struct rev_info *revs, + struct replay_revisions_options *opts, + struct replay_result *out); + +#endif From 410e3780602a5da36a91d7456daf8b1427d80318 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:34 +0100 Subject: [PATCH 020/784] replay: small set of cleanups Perform a small set of cleanups so that the "replay" logic compiles with "-Wsign-compare" and doesn't use `the_repository` anymore. Note that there are still some implicit dependencies on `the_repository`, e.g. because we use `get_commit_output_encoding()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- replay.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/replay.c b/replay.c index c580a9caa2a43e..ae13b59abced70 100644 --- a/replay.c +++ b/replay.c @@ -1,5 +1,4 @@ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "environment.h" @@ -78,7 +77,7 @@ static struct commit *create_commit(struct repository *repo, obj = parse_object(repo, &ret); out: - repo_unuse_commit_buffer(the_repository, based_on, message); + repo_unuse_commit_buffer(repo, based_on, message); free_commit_extra_headers(extra); free_commit_list(parents); strbuf_release(&msg); @@ -90,16 +89,14 @@ struct ref_info { struct commit *onto; struct strset positive_refs; struct strset negative_refs; - int positive_refexprs; - int negative_refexprs; + size_t positive_refexprs; + size_t negative_refexprs; }; static void get_ref_information(struct repository *repo, struct rev_cmdline_info *cmd_info, struct ref_info *ref_info) { - int i; - ref_info->onto = NULL; strset_init(&ref_info->positive_refs); strset_init(&ref_info->negative_refs); @@ -121,7 +118,7 @@ static void get_ref_information(struct repository *repo, * the second because they'd likely just be replaying commits on top * of the same commit and not making any difference. */ - for (i = 0; i < cmd_info->nr; i++) { + for (size_t i = 0; i < cmd_info->nr; i++) { struct rev_cmdline_entry *e = cmd_info->rev + i; struct object_id oid; const char *refexpr = e->name; From 5425771568ee286ed7ee848b8886cfdc98806b7a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:35 +0100 Subject: [PATCH 021/784] replay: support empty commit ranges In a subsequent commit we're about to introduce a new user of the replay subsystem. With that new user, the range of commits that we'll want to replay will be identified implicitly via a single commit, and will include all descendants of that commit to any branch. If that commit has no descendants (because it's the tip of some branch), then the range of revisions that we're asked to replay becomes empty. This case does not make sense with git-replay(1), but with the new command it will. This case is not currently supported by `replay_revisions()` though because we zero-initialize `struct merge_result`. This includes its `.clean` member, which indicates whether the merge ran into a conflict or not. But given that we don't have any revision to replay, we won't ever perform any merge at all, and consequently that member will never be set to `1`. We thus later think that there's been a merge conflict and return an error from `replay_commits()`. Address this issue by initializing the `.clean` member to `1`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- replay.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/replay.c b/replay.c index ae13b59abced70..6680d50bd7cfcd 100644 --- a/replay.c +++ b/replay.c @@ -266,7 +266,9 @@ int replay_revisions(struct rev_info *revs, struct commit *commit; struct commit *onto = NULL; struct merge_options merge_opt; - struct merge_result result; + struct merge_result result = { + .clean = 1, + }; char *advance; int ret; @@ -282,7 +284,6 @@ int replay_revisions(struct rev_info *revs, } init_basic_merge_options(&merge_opt, revs->repo); - memset(&result, 0, sizeof(result)); merge_opt.show_rename_progress = 0; last_commit = onto; replayed_commits = kh_init_oid_map(); From 48a72f61f04cb2357544f373677acd5b4149237e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:36 +0100 Subject: [PATCH 022/784] replay: support updating detached HEAD In a subsequent commit we're about to introduce a new git-history(1) command, which will by default work on all local branches and HEAD. This is already well-supported by the replay machinery for most of the part: updating branches is one of its prime use cases, and the HEAD ref is also updated in case it points to any of the branches. However, what's not supported yet is to update HEAD in case it is not a symbolic ref. We determine the refs that need to be updated by iterating through the decorations of the original commit, but we only update those refs that are `DECORATION_REF_LOCAL`, which covers local branches. Address this gap by also handling `DECORATION_REF_HEAD`. Note though that this needs to only happen in case we're working on a detached HEAD. If HEAD is pointing to a branch, then we'd already update that branch via `DECORATION_REF_LOCAL`. Refactor the loop that iterates through the decorations a bit to make the individual conditions easier to understand. Based-on-patch-by: Elijah Newren Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- replay.c | 47 ++++++++++++++++++++++++++++------------ t/t3650-replay-basics.sh | 9 ++++++++ 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/replay.c b/replay.c index 6680d50bd7cfcd..94fb76384b49d1 100644 --- a/replay.c +++ b/replay.c @@ -150,11 +150,17 @@ static void get_ref_information(struct repository *repo, static void set_up_replay_mode(struct repository *repo, struct rev_cmdline_info *cmd_info, const char *onto_name, + bool *detached_head, char **advance_name, struct commit **onto, struct strset **update_refs) { struct ref_info rinfo; + int head_flags = 0; + + refs_read_ref_full(get_main_ref_store(repo), "HEAD", + RESOLVE_REF_NO_RECURSE, NULL, &head_flags); + *detached_head = !(head_flags & REF_ISSYMREF); get_ref_information(repo, cmd_info, &rinfo); if (!rinfo.positive_refexprs) @@ -269,12 +275,13 @@ int replay_revisions(struct rev_info *revs, struct merge_result result = { .clean = 1, }; + bool detached_head; char *advance; int ret; advance = xstrdup_or_null(opts->advance); - set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, &advance, - &onto, &update_refs); + set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, + &detached_head, &advance, &onto, &update_refs); /* FIXME: Should allow replaying commits with the first as a root commit */ @@ -312,18 +319,30 @@ int replay_revisions(struct rev_info *revs, /* Update any necessary branches */ if (advance) continue; - decoration = get_name_decoration(&commit->object); - if (!decoration) - continue; - while (decoration) { - if (decoration->type == DECORATION_REF_LOCAL && - (opts->contained || strset_contains(update_refs, - decoration->name))) { - replay_result_queue_update(out, decoration->name, - &commit->object.oid, - &last_commit->object.oid); - } - decoration = decoration->next; + + for (decoration = get_name_decoration(&commit->object); + decoration; + decoration = decoration->next) + { + if (decoration->type != DECORATION_REF_LOCAL && + decoration->type != DECORATION_REF_HEAD) + continue; + + /* + * We only need to update HEAD separately in case it's + * detached. If it's not we'd already update the branch + * it is pointing to. + */ + if (decoration->type == DECORATION_REF_HEAD && !detached_head) + continue; + + if (!opts->contained && + !strset_contains(update_refs, decoration->name)) + continue; + + replay_result_queue_update(out, decoration->name, + &commit->object.oid, + &last_commit->object.oid); } } diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index 307101eeb911f7..c862aa39f31e08 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -249,6 +249,15 @@ test_expect_success 'using replay on bare repo to rebase multiple divergent bran done ' +test_expect_success 'using replay to update detached HEAD' ' + current_head=$(git branch --show-current) && + test_when_finished git switch "$current_head" && + git switch --detach && + test_commit something && + git replay --ref-action=print --onto HEAD~2 --ref-action=print HEAD~..HEAD >updates && + test_grep "update HEAD " updates +' + test_expect_success 'merge.directoryRenames=false' ' # create a test case that stress-tests the rename caching git switch -c rename-onto && From 475ade1cd6e8ff07495b4b5871a69f7b385259f7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:37 +0100 Subject: [PATCH 023/784] wt-status: provide function to expose status for trees The "wt-status" subsystem is responsible for printing status information around the current state of the working tree. This most importantly includes information around whether the working tree or the index have any changes. We're about to introduce a new command where the changes in neither of them are actually relevant to us. Instead, what we want is to format the changes between two different trees. While it is a little bit of a stretch to add this as functionality to _working tree_ status, it doesn't make any sense to open-code this functionality, either. Implement a new function `wt_status_collect_changes_trees()` that diffs two trees and formats the status accordingly. This function is not yet used, but will be in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- wt-status.c | 24 ++++++++++++++++++++++++ wt-status.h | 9 +++++++++ 2 files changed, 33 insertions(+) diff --git a/wt-status.c b/wt-status.c index e12adb26b9f8eb..95942399f8cec6 100644 --- a/wt-status.c +++ b/wt-status.c @@ -612,6 +612,30 @@ static void wt_status_collect_updated_cb(struct diff_queue_struct *q, } } +void wt_status_collect_changes_trees(struct wt_status *s, + const struct object_id *old_treeish, + const struct object_id *new_treeish) +{ + struct diff_options opts = { 0 }; + + repo_diff_setup(s->repo, &opts); + opts.output_format = DIFF_FORMAT_CALLBACK; + opts.format_callback = wt_status_collect_updated_cb; + opts.format_callback_data = s; + opts.detect_rename = s->detect_rename >= 0 ? s->detect_rename : opts.detect_rename; + opts.rename_limit = s->rename_limit >= 0 ? s->rename_limit : opts.rename_limit; + opts.rename_score = s->rename_score >= 0 ? s->rename_score : opts.rename_score; + opts.flags.recursive = 1; + diff_setup_done(&opts); + + diff_tree_oid(old_treeish, new_treeish, "", &opts); + diffcore_std(&opts); + diff_flush(&opts); + wt_status_get_state(s->repo, &s->state, 0); + + diff_free(&opts); +} + static void wt_status_collect_changes_worktree(struct wt_status *s) { struct rev_info rev; diff --git a/wt-status.h b/wt-status.h index e40a27214a700d..e9fe32e98cc18c 100644 --- a/wt-status.h +++ b/wt-status.h @@ -153,6 +153,15 @@ void wt_status_add_cut_line(struct wt_status *s); void wt_status_prepare(struct repository *r, struct wt_status *s); void wt_status_print(struct wt_status *s); void wt_status_collect(struct wt_status *s); + +/* + * Collect all changes between the two trees. Changes will be displayed as if + * they were staged into the index. + */ +void wt_status_collect_changes_trees(struct wt_status *s, + const struct object_id *old_treeish, + const struct object_id *new_treeish); + /* * Frees the buffers allocated by wt_status_collect. */ From a675183d483cb106c076395936cd9e602ae94404 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:38 +0100 Subject: [PATCH 024/784] builtin: add new "history" command When rewriting history via git-rebase(1) there are a few very common use cases: - The ordering of two commits should be reversed. - A commit should be split up into two commits. - A commit should be dropped from the history completely. - Multiple commits should be squashed into one. - Editing an existing commit that is not the tip of the current branch. While these operations are all doable, it often feels needlessly kludgey to do so by doing an interactive rebase, using the editor to say what one wants, and then perform the actions. Also, some operations like splitting up a commit into two are way more involved than that and require a whole series of commands. Rebases also do not update dependent branches. The use of stacked branches has grown quite common with competing version control systems like Jujutsu though, so it clearly is a need that users have. While rebases _can_ serve this use case if one always works on the latest stacked branch, it is somewhat awkward and very easy to get wrong. Add a new "history" command to plug these gaps. This command will have several different subcommands to imperatively rewrite history for common use cases like the above. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- .gitignore | 1 + Documentation/git-history.adoc | 57 ++++++++++++++++++++++++++++++++++ Documentation/meson.build | 1 + Makefile | 1 + builtin.h | 1 + builtin/history.c | 22 +++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + t/meson.build | 1 + t/t3450-history.sh | 17 ++++++++++ 11 files changed, 104 insertions(+) create mode 100644 Documentation/git-history.adoc create mode 100644 builtin/history.c create mode 100755 t/t3450-history.sh diff --git a/.gitignore b/.gitignore index 78a45cb5bec991..24635cf2d6f4a3 100644 --- a/.gitignore +++ b/.gitignore @@ -79,6 +79,7 @@ /git-grep /git-hash-object /git-help +/git-history /git-hook /git-http-backend /git-http-fetch diff --git a/Documentation/git-history.adoc b/Documentation/git-history.adoc new file mode 100644 index 00000000000000..68c35f36b95a98 --- /dev/null +++ b/Documentation/git-history.adoc @@ -0,0 +1,57 @@ +git-history(1) +============== + +NAME +---- +git-history - EXPERIMENTAL: Rewrite history + +SYNOPSIS +-------- +[synopsis] +git history [] + +DESCRIPTION +----------- + +Rewrite history by rearranging or modifying specific commits in the +history. + +THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. + +This command is related to linkgit:git-rebase[1] in that both commands can be +used to rewrite history. There are a couple of major differences though: + +* linkgit:git-history[1] can work in a bare repository as it does not need to + touch either the index or the worktree. +* linkgit:git-history[1] does not execute any linkgit:githooks[5] at the + current point in time. This may change in the future. +* linkgit:git-history[1] by default updates all branches that are descendants + of the original commit to point to the rewritten commit. + +Overall, linkgit:git-history[1] aims to provide a more opinionated way to modify +your commit history that is simpler to use compared to linkgit:git-rebase[1] in +general. + +Use linkgit:git-rebase[1] if you want to reapply a range of commits onto a +different base, or interactive rebases if you want to edit a range of commits +at once. + +LIMITATIONS +----------- + +This command does not (yet) work with histories that contain merges. You +should use linkgit:git-rebase[1] with the `--rebase-merges` flag instead. + +Furthermore, the command does not support operations that can result in merge +conflicts. This limitation is by design as history rewrites are not intended to +be stateful operations. The limitation can be lifted once (if) Git learns about +first-class conflicts. + +COMMANDS +-------- + +No commands are supported yet. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index f02dbc20cbcb86..fd2e8cc02d689f 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -64,6 +64,7 @@ manpages = { 'git-gui.adoc' : 1, 'git-hash-object.adoc' : 1, 'git-help.adoc' : 1, + 'git-history.adoc' : 1, 'git-hook.adoc' : 1, 'git-http-backend.adoc' : 1, 'git-http-fetch.adoc' : 1, diff --git a/Makefile b/Makefile index 1c64a5d2aea3e3..c0569ed8e476d7 100644 --- a/Makefile +++ b/Makefile @@ -1418,6 +1418,7 @@ BUILTIN_OBJS += builtin/get-tar-commit-id.o BUILTIN_OBJS += builtin/grep.o BUILTIN_OBJS += builtin/hash-object.o BUILTIN_OBJS += builtin/help.o +BUILTIN_OBJS += builtin/history.o BUILTIN_OBJS += builtin/hook.o BUILTIN_OBJS += builtin/index-pack.o BUILTIN_OBJS += builtin/init-db.o diff --git a/builtin.h b/builtin.h index 1b35565fbd9a3c..93c91d07d4bfdc 100644 --- a/builtin.h +++ b/builtin.h @@ -172,6 +172,7 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix, struc int cmd_grep(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_hash_object(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_help(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_history(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_hook(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_index_pack(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_init_db(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/history.c b/builtin/history.c new file mode 100644 index 00000000000000..f6fe32610b0645 --- /dev/null +++ b/builtin/history.c @@ -0,0 +1,22 @@ +#include "builtin.h" +#include "gettext.h" +#include "parse-options.h" + +int cmd_history(int argc, + const char **argv, + const char *prefix, + struct repository *repo UNUSED) +{ + const char * const usage[] = { + N_("git history []"), + NULL, + }; + struct option options[] = { + OPT_END(), + }; + + argc = parse_options(argc, argv, prefix, options, usage, 0); + if (argc) + usagef("unrecognized argument: %s", argv[0]); + return 0; +} diff --git a/command-list.txt b/command-list.txt index accd3d0c4b5524..f9005cf45979f1 100644 --- a/command-list.txt +++ b/command-list.txt @@ -115,6 +115,7 @@ git-grep mainporcelain info git-gui mainporcelain git-hash-object plumbingmanipulators git-help ancillaryinterrogators complete +git-history mainporcelain history git-hook purehelpers git-http-backend synchingrepositories git-http-fetch synchelpers diff --git a/git.c b/git.c index c5fad56813f437..744cb6527e065e 100644 --- a/git.c +++ b/git.c @@ -586,6 +586,7 @@ static struct cmd_struct commands[] = { { "grep", cmd_grep, RUN_SETUP_GENTLY }, { "hash-object", cmd_hash_object }, { "help", cmd_help }, + { "history", cmd_history, RUN_SETUP }, { "hook", cmd_hook, RUN_SETUP }, { "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "init", cmd_init_db }, diff --git a/meson.build b/meson.build index a5a4e99b259cf9..3a1d12caa4b94f 100644 --- a/meson.build +++ b/meson.build @@ -610,6 +610,7 @@ builtin_sources = [ 'builtin/grep.c', 'builtin/hash-object.c', 'builtin/help.c', + 'builtin/history.c', 'builtin/hook.c', 'builtin/index-pack.c', 'builtin/init-db.c', diff --git a/t/meson.build b/t/meson.build index 459c52a48972e4..73006b095afc9f 100644 --- a/t/meson.build +++ b/t/meson.build @@ -387,6 +387,7 @@ integration_tests = [ 't3436-rebase-more-options.sh', 't3437-rebase-fixup-options.sh', 't3438-rebase-broken-files.sh', + 't3450-history.sh', 't3500-cherry.sh', 't3501-revert-cherry-pick.sh', 't3502-cherry-pick-merge.sh', diff --git a/t/t3450-history.sh b/t/t3450-history.sh new file mode 100755 index 00000000000000..417c343d43b8d3 --- /dev/null +++ b/t/t3450-history.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +test_description='tests for git-history command' + +. ./test-lib.sh + +test_expect_success 'does nothing without any arguments' ' + git history >out 2>&1 && + test_must_be_empty out +' + +test_expect_success 'raises an error with unknown argument' ' + test_must_fail git history garbage 2>err && + test_grep "unrecognized argument: garbage" err +' + +test_done From d205234cb05a5e330c0f7f5b3ea764533a74d69e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 13 Jan 2026 10:54:39 +0100 Subject: [PATCH 025/784] builtin/history: implement "reword" subcommand Implement a new "reword" subcommand for git-history(1). This subcommand is similar to the user performing an interactive rebase with a single commit changed to use the "reword" instruction. The "reword" subcommand is built on top of the replay subsystem instead of the sequencer. This leads to some major differences compared to git-rebase(1): - We do not check out the commit that is to be reworded and instead perform the operation in-memory. This has the obvious benefit of being significantly faster compared to git-rebase(1), but even more importantly it allows the user to rewrite history even if there are local changes in the working tree or in the index. - We do not execute any hooks, even though we leave some room for changing this in the future. - By default, all local branches that contain the commit will be rewritten. This especially helps with workflows that use stacked branches. Helped-by: Elijah Newren Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-history.adoc | 20 +- builtin/history.c | 415 ++++++++++++++++++++++++++++++++- t/meson.build | 1 + t/t3450-history.sh | 6 +- t/t3451-history-reword.sh | 391 +++++++++++++++++++++++++++++++ 5 files changed, 823 insertions(+), 10 deletions(-) create mode 100755 t/t3451-history-reword.sh diff --git a/Documentation/git-history.adoc b/Documentation/git-history.adoc index 68c35f36b95a98..154e262b76a698 100644 --- a/Documentation/git-history.adoc +++ b/Documentation/git-history.adoc @@ -8,7 +8,7 @@ git-history - EXPERIMENTAL: Rewrite history SYNOPSIS -------- [synopsis] -git history [] +git history reword [--ref-action=(branches|head|print)] DESCRIPTION ----------- @@ -50,7 +50,23 @@ first-class conflicts. COMMANDS -------- -No commands are supported yet. +The following commands are available to rewrite history in different ways: + +`reword `:: + Rewrite the commit message of the specified commit. All the other + details of this commit remain unchanged. This command will spawn an + editor with the current message of that commit. + +OPTIONS +------- + +`--ref-action=(branches|head|print)`:: + Control which references will be updated by the command, if any. With + `branches`, all local branches that point to commits which are + descendants of the original commit will be rewritten. With `head`, only + the current `HEAD` reference will be rewritten. With `print`, all + updates as they would be performed with `branches` are printed in a + format that can be consumed by linkgit:git-update-ref[1]. GIT --- diff --git a/builtin/history.c b/builtin/history.c index f6fe32610b0645..8dcb9a604665f8 100644 --- a/builtin/history.c +++ b/builtin/history.c @@ -1,22 +1,427 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "builtin.h" +#include "commit.h" +#include "commit-reach.h" +#include "config.h" +#include "editor.h" +#include "environment.h" #include "gettext.h" +#include "hex.h" #include "parse-options.h" +#include "refs.h" +#include "replay.h" +#include "revision.h" +#include "sequencer.h" +#include "strvec.h" +#include "tree.h" +#include "wt-status.h" + +#define GIT_HISTORY_REWORD_USAGE \ + N_("git history reword [--ref-action=(branches|head|print)]") + +static void change_data_free(void *util, const char *str UNUSED) +{ + struct wt_status_change_data *d = util; + free(d->rename_source); + free(d); +} + +static int fill_commit_message(struct repository *repo, + const struct object_id *old_tree, + const struct object_id *new_tree, + const char *default_message, + const char *action, + struct strbuf *out) +{ + const char *path = git_path_commit_editmsg(); + const char *hint = + _("Please enter the commit message for the %s changes." + " Lines starting\nwith '%s' will be ignored, and an" + " empty message aborts the commit.\n"); + struct wt_status s; + + strbuf_addstr(out, default_message); + strbuf_addch(out, '\n'); + strbuf_commented_addf(out, comment_line_str, hint, action, comment_line_str); + write_file_buf(path, out->buf, out->len); + + wt_status_prepare(repo, &s); + FREE_AND_NULL(s.branch); + s.ahead_behind_flags = AHEAD_BEHIND_QUICK; + s.commit_template = 1; + s.colopts = 0; + s.display_comment_prefix = 1; + s.hints = 0; + s.use_color = 0; + s.whence = FROM_COMMIT; + s.committable = 1; + + s.fp = fopen(git_path_commit_editmsg(), "a"); + if (!s.fp) + return error_errno(_("could not open '%s'"), git_path_commit_editmsg()); + + wt_status_collect_changes_trees(&s, old_tree, new_tree); + wt_status_print(&s); + wt_status_collect_free_buffers(&s); + string_list_clear_func(&s.change, change_data_free); + + strbuf_reset(out); + if (launch_editor(path, out, NULL)) { + fprintf(stderr, _("Aborting commit as launching the editor failed.\n")); + return -1; + } + strbuf_stripspace(out, comment_line_str); + + cleanup_message(out, COMMIT_MSG_CLEANUP_ALL, 0); + + if (!out->len) { + fprintf(stderr, _("Aborting commit due to empty commit message.\n")); + return -1; + } + + return 0; +} + +static int commit_tree_with_edited_message(struct repository *repo, + const char *action, + struct commit *original, + struct commit **out) +{ + const char *exclude_gpgsig[] = { + /* We reencode the message, so the encoding needs to be stripped. */ + "encoding", + /* We need to strip signatures as those will become invalid. */ + "gpgsig", + "gpgsig-sha256", + NULL, + }; + const char *original_message, *original_body, *ptr; + struct commit_extra_header *original_extra_headers = NULL; + struct strbuf commit_message = STRBUF_INIT; + struct object_id rewritten_commit_oid; + struct object_id original_tree_oid; + struct object_id parent_tree_oid; + char *original_author = NULL; + struct commit *parent; + size_t len; + int ret; + + original_tree_oid = repo_get_commit_tree(repo, original)->object.oid; + + parent = original->parents ? original->parents->item : NULL; + if (parent) { + if (repo_parse_commit(repo, parent)) { + ret = error(_("unable to parse parent commit %s"), + oid_to_hex(&parent->object.oid)); + goto out; + } + + parent_tree_oid = repo_get_commit_tree(repo, parent)->object.oid; + } else { + oidcpy(&parent_tree_oid, repo->hash_algo->empty_tree); + } + + /* We retain authorship of the original commit. */ + original_message = repo_logmsg_reencode(repo, original, NULL, NULL); + ptr = find_commit_header(original_message, "author", &len); + if (ptr) + original_author = xmemdupz(ptr, len); + find_commit_subject(original_message, &original_body); + + ret = fill_commit_message(repo, &parent_tree_oid, &original_tree_oid, + original_body, action, &commit_message); + if (ret < 0) + goto out; + + original_extra_headers = read_commit_extra_headers(original, exclude_gpgsig); + + ret = commit_tree_extended(commit_message.buf, commit_message.len, &original_tree_oid, + original->parents, &rewritten_commit_oid, original_author, + NULL, NULL, original_extra_headers); + if (ret < 0) + goto out; + + *out = lookup_commit_or_die(&rewritten_commit_oid, "rewritten commit"); + +out: + free_commit_extra_headers(original_extra_headers); + strbuf_release(&commit_message); + free(original_author); + return ret; +} + +enum ref_action { + REF_ACTION_DEFAULT, + REF_ACTION_BRANCHES, + REF_ACTION_HEAD, + REF_ACTION_PRINT, +}; + +static int parse_ref_action(const struct option *opt, const char *value, int unset) +{ + enum ref_action *action = opt->value; + + BUG_ON_OPT_NEG_NOARG(unset, value); + if (!strcmp(value, "branches")) { + *action = REF_ACTION_BRANCHES; + } else if (!strcmp(value, "head")) { + *action = REF_ACTION_HEAD; + } else if (!strcmp(value, "print")) { + *action = REF_ACTION_PRINT; + } else { + return error(_("%s expects one of 'branches', 'head' or 'print'"), + opt->long_name); + } + + return 0; +} + +static int handle_reference_updates(enum ref_action action, + struct repository *repo, + struct commit *original, + struct commit *rewritten, + const char *reflog_msg) +{ + const struct name_decoration *decoration; + struct replay_revisions_options opts = { 0 }; + struct replay_result result = { 0 }; + struct ref_transaction *transaction = NULL; + struct strvec args = STRVEC_INIT; + struct strbuf err = STRBUF_INIT; + struct commit *head = NULL; + struct rev_info revs; + char hex[GIT_MAX_HEXSZ + 1]; + bool detached_head; + int head_flags = 0; + int ret; + + refs_read_ref_full(get_main_ref_store(repo), "HEAD", + RESOLVE_REF_NO_RECURSE, NULL, &head_flags); + detached_head = !(head_flags & REF_ISSYMREF); + + repo_init_revisions(repo, &revs, NULL); + strvec_push(&args, "ignored"); + strvec_push(&args, "--reverse"); + strvec_push(&args, "--topo-order"); + strvec_push(&args, "--full-history"); + + /* We only want to see commits that are descendants of the old commit. */ + strvec_pushf(&args, "--ancestry-path=%s", + oid_to_hex(&original->object.oid)); + + /* + * Ancestry path may also show ancestors of the old commit, but we + * don't want to see those, either. + */ + strvec_pushf(&args, "^%s", oid_to_hex(&original->object.oid)); + + /* + * When we're asked to update HEAD we need to verify that the commit + * that we want to rewrite is actually an ancestor of it and, if so, + * update it. Otherwise we'll update (or print) all descendant + * branches. + */ + if (action == REF_ACTION_HEAD) { + struct commit_list *from_list = NULL; + + head = lookup_commit_reference_by_name("HEAD"); + if (!head) { + ret = error(_("cannot look up HEAD")); + goto out; + } + + commit_list_insert(original, &from_list); + ret = repo_is_descendant_of(repo, head, from_list); + free_commit_list(from_list); + + if (ret < 0) { + ret = error(_("cannot determine descendance")); + goto out; + } else if (!ret) { + ret = error(_("rewritten commit must be an ancestor " + "of HEAD when using --ref-action=head")); + goto out; + } + + strvec_push(&args, "HEAD"); + } else { + strvec_push(&args, "--branches"); + strvec_push(&args, "HEAD"); + } + + setup_revisions_from_strvec(&args, &revs, NULL); + if (args.nr != 1) + BUG("revisions were set up with invalid argument"); + + opts.onto = oid_to_hex_r(hex, &rewritten->object.oid); + + ret = replay_revisions(&revs, &opts, &result); + if (ret) + goto out; + + switch (action) { + case REF_ACTION_BRANCHES: + case REF_ACTION_HEAD: + transaction = ref_store_transaction_begin(get_main_ref_store(repo), 0, &err); + if (!transaction) { + ret = error(_("failed to begin ref transaction: %s"), err.buf); + goto out; + } + + for (size_t i = 0; i < result.updates_nr; i++) { + ret = ref_transaction_update(transaction, + result.updates[i].refname, + &result.updates[i].new_oid, + &result.updates[i].old_oid, + NULL, NULL, 0, reflog_msg, &err); + if (ret) { + ret = error(_("failed to update ref '%s': %s"), + result.updates[i].refname, err.buf); + goto out; + } + } + + /* + * `replay_revisions()` only updates references that are + * ancestors of `rewritten`, so we need to manually + * handle updating references that point to `original`. + */ + for (decoration = get_name_decoration(&original->object); + decoration; + decoration = decoration->next) + { + if (decoration->type != DECORATION_REF_LOCAL && + decoration->type != DECORATION_REF_HEAD) + continue; + + if (action == REF_ACTION_HEAD && + decoration->type != DECORATION_REF_HEAD) + continue; + + /* + * We only need to update HEAD separately in case it's + * detached. If it's not we'd already update the branch + * it is pointing to. + */ + if (action == REF_ACTION_BRANCHES && + decoration->type == DECORATION_REF_HEAD && + !detached_head) + continue; + + ret = ref_transaction_update(transaction, + decoration->name, + &rewritten->object.oid, + &original->object.oid, + NULL, NULL, 0, reflog_msg, &err); + if (ret) { + ret = error(_("failed to update ref '%s': %s"), + decoration->name, err.buf); + goto out; + } + } + + if (ref_transaction_commit(transaction, &err)) { + ret = error(_("failed to commit ref transaction: %s"), err.buf); + goto out; + } + + break; + case REF_ACTION_PRINT: + for (size_t i = 0; i < result.updates_nr; i++) + printf("update %s %s %s\n", + result.updates[i].refname, + oid_to_hex(&result.updates[i].new_oid), + oid_to_hex(&result.updates[i].old_oid)); + break; + default: + BUG("unsupported ref action %d", action); + } + + ret = 0; + +out: + ref_transaction_free(transaction); + replay_result_release(&result); + release_revisions(&revs); + strbuf_release(&err); + strvec_clear(&args); + return ret; +} + +static int cmd_history_reword(int argc, + const char **argv, + const char *prefix, + struct repository *repo) +{ + const char * const usage[] = { + GIT_HISTORY_REWORD_USAGE, + NULL, + }; + enum ref_action action = REF_ACTION_DEFAULT; + struct option options[] = { + OPT_CALLBACK_F(0, "ref-action", &action, N_(""), + N_("control ref update behavior (branches|head|print)"), + PARSE_OPT_NONEG, parse_ref_action), + OPT_END(), + }; + struct strbuf reflog_msg = STRBUF_INIT; + struct commit *original, *rewritten; + int ret; + + argc = parse_options(argc, argv, prefix, options, usage, 0); + if (argc != 1) { + ret = error(_("command expects a single revision")); + goto out; + } + repo_config(repo, git_default_config, NULL); + + if (action == REF_ACTION_DEFAULT) + action = REF_ACTION_BRANCHES; + + original = lookup_commit_reference_by_name(argv[0]); + if (!original) { + ret = error(_("commit cannot be found: %s"), argv[0]); + goto out; + } + + ret = commit_tree_with_edited_message(repo, "reworded", original, &rewritten); + if (ret < 0) { + ret = error(_("failed writing reworded commit")); + goto out; + } + + strbuf_addf(&reflog_msg, "reword: updating %s", argv[0]); + + ret = handle_reference_updates(action, repo, original, rewritten, + reflog_msg.buf); + if (ret < 0) { + ret = error(_("failed replaying descendants")); + goto out; + } + + ret = 0; + +out: + strbuf_release(&reflog_msg); + return ret; +} int cmd_history(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { const char * const usage[] = { - N_("git history []"), + GIT_HISTORY_REWORD_USAGE, NULL, }; + parse_opt_subcommand_fn *fn = NULL; struct option options[] = { + OPT_SUBCOMMAND("reword", &fn, cmd_history_reword), OPT_END(), }; argc = parse_options(argc, argv, prefix, options, usage, 0); - if (argc) - usagef("unrecognized argument: %s", argv[0]); - return 0; + return fn(argc, argv, prefix, repo); } diff --git a/t/meson.build b/t/meson.build index 73006b095afc9f..c9f92450dc53b5 100644 --- a/t/meson.build +++ b/t/meson.build @@ -388,6 +388,7 @@ integration_tests = [ 't3437-rebase-fixup-options.sh', 't3438-rebase-broken-files.sh', 't3450-history.sh', + 't3451-history-reword.sh', 't3500-cherry.sh', 't3501-revert-cherry-pick.sh', 't3502-cherry-pick-merge.sh', diff --git a/t/t3450-history.sh b/t/t3450-history.sh index 417c343d43b8d3..f513463b92bf43 100755 --- a/t/t3450-history.sh +++ b/t/t3450-history.sh @@ -5,13 +5,13 @@ test_description='tests for git-history command' . ./test-lib.sh test_expect_success 'does nothing without any arguments' ' - git history >out 2>&1 && - test_must_be_empty out + test_must_fail git history 2>err && + test_grep "need a subcommand" err ' test_expect_success 'raises an error with unknown argument' ' test_must_fail git history garbage 2>err && - test_grep "unrecognized argument: garbage" err + test_grep "unknown subcommand: .garbage." err ' test_done diff --git a/t/t3451-history-reword.sh b/t/t3451-history-reword.sh new file mode 100755 index 00000000000000..3594421b681c40 --- /dev/null +++ b/t/t3451-history-reword.sh @@ -0,0 +1,391 @@ +#!/bin/sh + +test_description='tests for git-history reword subcommand' + +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-log-graph.sh" + +reword_with_message () { + cat >message && + write_script fake-editor.sh <<-\EOF && + cp message "$1" + EOF + test_set_editor "$(pwd)"/fake-editor.sh && + git history reword "$@" && + rm fake-editor.sh message +} + +expect_graph () { + cat >expect && + lib_test_cmp_graph --graph --format=%s "$@" +} + +expect_log () { + git log --format="%s" "$@" >actual && + cat >expect && + test_cmp expect actual +} + +test_expect_success 'can reword tip of a branch' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + test_commit second && + test_commit third && + + git symbolic-ref HEAD >expect && + reword_with_message HEAD <<-EOF && + third reworded + EOF + git symbolic-ref HEAD >actual && + test_cmp expect actual && + + expect_log <<-\EOF && + third reworded + second + first + EOF + + git reflog >reflog && + test_grep "reword: updating HEAD" reflog + ) +' + +test_expect_success 'can reword commit in the middle' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + test_commit second && + test_commit third && + + git symbolic-ref HEAD >expect && + reword_with_message HEAD~ <<-EOF && + second reworded + EOF + git symbolic-ref HEAD >actual && + test_cmp expect actual && + + expect_log <<-\EOF + third + second reworded + first + EOF + ) +' + +test_expect_success 'can reword commit in the middle even on detached head' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + test_commit second && + test_commit third_on_main && + git checkout --detach HEAD^ && + test_commit third_on_head && + + reword_with_message HEAD~ <<-EOF && + second reworded + EOF + + expect_graph HEAD --branches <<-\EOF + * third_on_head + | * third_on_main + |/ + * second reworded + * first + EOF + ) +' + +test_expect_success 'can reword the detached head' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + test_commit second && + git checkout --detach HEAD && + test_commit third && + + reword_with_message HEAD <<-EOF && + third reworded + EOF + + expect_log <<-\EOF + third reworded + second + first + EOF + ) +' + +test_expect_success 'can reword root commit' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + test_commit second && + test_commit third && + reword_with_message HEAD~2 <<-EOF && + first reworded + EOF + + expect_log <<-\EOF + third + second + first reworded + EOF + ) +' + +test_expect_success 'can reword in a bare repo' ' + test_when_finished "rm -rf repo repo.git" && + git init repo && + test_commit -C repo first && + git clone --bare repo repo.git && + ( + cd repo.git && + reword_with_message HEAD <<-EOF && + reworded + EOF + + expect_log <<-\EOF + reworded + EOF + ) +' + +test_expect_success 'can reword a commit on a different branch' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit base && + git branch theirs && + test_commit ours && + git switch theirs && + test_commit theirs && + + git rev-parse ours >ours-before && + reword_with_message theirs <<-EOF && + Reworded theirs + EOF + git rev-parse ours >ours-after && + test_cmp ours-before ours-after && + + expect_graph --branches <<-\EOF + * Reworded theirs + | * ours + |/ + * base + EOF + ) +' + +test_expect_success 'can reword a merge commit' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit base && + git branch branch && + test_commit ours && + git switch branch && + test_commit theirs && + git switch - && + git merge theirs && + + # It is not possible to replay merge commits embedded in the + # history (yet). + test_must_fail git history reword HEAD~ 2>err && + test_grep "replaying merge commits is not supported yet" err && + + # But it is possible to reword a merge commit directly. + reword_with_message HEAD <<-EOF && + Reworded merge commit + EOF + expect_graph <<-\EOF + * Reworded merge commit + |\ + | * theirs + * | ours + |/ + * base + EOF + ) +' + +test_expect_success '--ref-action=print prints ref updates without modifying repo' ' + test_when_finished "rm -rf repo" && + git init repo --initial-branch=main && + ( + cd repo && + test_commit base && + git branch branch && + test_commit ours && + git switch branch && + test_commit theirs && + + git refs list >refs-expect && + reword_with_message --ref-action=print base >updates <<-\EOF && + reworded commit + EOF + git refs list >refs-actual && + test_cmp refs-expect refs-actual && + + test_grep "update refs/heads/branch" updates && + test_grep "update refs/heads/main" updates && + git update-ref --stdin err && + test_grep "rewritten commit must be an ancestor of HEAD" err && + + reword_with_message --ref-action=head base >updates <<-\EOF && + reworded base + EOF + expect_log HEAD <<-\EOF && + ours + reworded base + EOF + expect_log main <<-\EOF + theirs + base + EOF + ) +' + +test_expect_success 'editor shows proper status' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + + write_script fake-editor.sh <<-\EOF && + cp "$1" . && + printf "\namend a comment\n" >>"$1" + EOF + test_set_editor "$(pwd)"/fake-editor.sh && + git history reword HEAD && + + cat >expect <<-EOF && + first + + # Please enter the commit message for the reworded changes. Lines starting + # with ${SQ}#${SQ} will be ignored, and an empty message aborts the commit. + # Changes to be committed: + # new file: first.t + # + EOF + test_cmp expect COMMIT_EDITMSG && + + test_commit_message HEAD <<-\EOF + first + + amend a comment + EOF + ) +' + +# For now, git-history(1) does not yet execute any hooks. This is subject to +# change in the future, and if it does this test here is expected to start +# failing. In other words, this test is not an endorsement of the current +# status quo. +test_expect_success 'hooks are not executed for rewritten commits' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + test_commit second && + test_commit third && + + ORIG_PATH="$(pwd)" && + export ORIG_PATH && + for hook in prepare-commit-msg pre-commit post-commit post-rewrite commit-msg + do + write_script .git/hooks/$hook <<-\EOF || exit 1 + touch "$ORIG_PATH/hooks.log + EOF + done && + + reword_with_message HEAD~ <<-EOF && + second reworded + EOF + + cat >expect <<-EOF && + third + second reworded + first + EOF + git log --format=%s >actual && + test_cmp expect actual && + + test_path_is_missing hooks.log + ) +' + +test_expect_success 'aborts with empty commit message' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit first && + + ! reword_with_message HEAD 2>err a && + echo bar >b && + git add b && + reword_with_message HEAD <<-EOF && + message + EOF + cat >expect <<-\EOF && + M a + M b + ?? actual + ?? expect + EOF + git status --porcelain >actual && + test_cmp expect actual + ) +' + +test_done From 0ee71f4bd035db61342c2c5a25984e4545347c11 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 18 Dec 2025 16:50:26 +0000 Subject: [PATCH 026/784] replay: drop commits that become empty If the changes in a commit being replayed are already in the branch that the commits are being replayed onto, then "git replay" creates an empty commit. This is confusing because the commit message no longer matches the contents of the commit. Drop the commit instead. Commits that start off empty are not dropped. This matches the behavior of "git rebase --reapply-cherry-pick --empty=drop" and "git cherry-pick --empty-drop". If a branch points to a commit that is dropped it will be updated to point to the last commit that was not dropped. This can be seen in the new test where "topic1" is updated to point to the rebased "C" as "F" is dropped because it is already upstream. While this is a breaking change, "git replay" is marked as experimental to allow improvements like this that change the behavior. Helped-by: Elijah Newren Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- Documentation/git-replay.adoc | 4 +++- replay.c | 10 +++++++--- t/t3650-replay-basics.sh | 21 +++++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/Documentation/git-replay.adoc b/Documentation/git-replay.adoc index 4c61f3aa1f1c70..dc966486ca6e7d 100644 --- a/Documentation/git-replay.adoc +++ b/Documentation/git-replay.adoc @@ -62,7 +62,9 @@ The default mode can be configured via the `replay.refAction` configuration vari Range of commits to replay; see "Specifying Ranges" in linkgit:git-rev-parse[1]. In `--advance ` mode, the range should have a single tip, so that it's clear to which tip the - advanced should point. + advanced should point. Any commits in the range whose + changes are already present in the branch the commits are being + replayed onto will be dropped. include::rev-list-options.adoc[] diff --git a/replay.c b/replay.c index 94fb76384b49d1..f97d652f338f1d 100644 --- a/replay.c +++ b/replay.c @@ -217,12 +217,12 @@ static struct commit *pick_regular_commit(struct repository *repo, struct merge_result *result) { struct commit *base, *replayed_base; - struct tree *pickme_tree, *base_tree; + struct tree *pickme_tree, *base_tree, *replayed_base_tree; base = pickme->parents->item; replayed_base = mapped_commit(replayed_commits, base, onto); - result->tree = repo_get_commit_tree(repo, replayed_base); + replayed_base_tree = repo_get_commit_tree(repo, replayed_base); pickme_tree = repo_get_commit_tree(repo, pickme); base_tree = repo_get_commit_tree(repo, base); @@ -232,7 +232,7 @@ static struct commit *pick_regular_commit(struct repository *repo, merge_incore_nonrecursive(merge_opt, base_tree, - result->tree, + replayed_base_tree, pickme_tree, result); @@ -240,6 +240,10 @@ static struct commit *pick_regular_commit(struct repository *repo, merge_opt->ancestor = NULL; if (!result->clean) return NULL; + /* Drop commits that become empty */ + if (oideq(&replayed_base_tree->object.oid, &result->tree->object.oid) && + !oideq(&pickme_tree->object.oid, &base_tree->object.oid)) + return replayed_base; return create_commit(repo, result->tree, pickme, replayed_base); } diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index c862aa39f31e08..a03f8f9293eb12 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -25,6 +25,8 @@ test_expect_success 'setup' ' git switch -c topic3 && test_commit G && test_commit H && + git switch -c empty && + git commit --allow-empty -m empty && git switch -c topic4 main && test_commit I && test_commit J && @@ -160,6 +162,25 @@ test_expect_success 'using replay on bare repo to perform basic cherry-pick' ' test_cmp expect result-bare ' +test_expect_success 'commits that become empty are dropped' ' + # Save original branches + git for-each-ref --format="update %(refname) %(objectname)" \ + refs/heads/ >original-branches && + test_when_finished "git update-ref --stdin result && + git log --format="%s%d" L..empty >actual && + test_write_lines >expect \ + "empty (empty)" "H (topic3)" G "C (topic1)" "F (main)" "M (tag: M)" && + test_cmp expect actual +' + test_expect_success 'replay on bare repo fails with both --advance and --onto' ' test_must_fail git -C bare replay --advance main --onto main topic1..topic2 >result-bare ' From bcb4fd1799485afbb2391aa312f4d947274bd7d4 Mon Sep 17 00:00:00 2001 From: Shannon Barber Date: Tue, 13 Jan 2026 06:28:41 +0000 Subject: [PATCH 027/784] gitk: use config settings for head/tag colors The drawtags procedure currently uses headfgcolor for all label text, ignoring the tagfgcolor setting. The call to create the outline polygon for (non-tag) heads currently has the color for headoutlinecolor hardcoded to black. This patch maintains the variables for the non-tag refs so that heads are colored differently from non-head (non-tag) refs. The outline and fill colors for the non-head refs remain hardcoded to the prior values, black & #ddddff. Signed-off-by: Shannon Barber Signed-off-by: Johannes Sixt --- gitk | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/gitk b/gitk index 7f62c8041d1c77..0415abd873633e 100755 --- a/gitk +++ b/gitk @@ -6831,16 +6831,18 @@ proc drawtags {id x xt y1} { } else { # draw a head or other ref if {[incr nheads -1] >= 0} { - set col $headbgcolor + set refoutlinecol $headoutlinecolor + set reffillcol $headbgcolor if {$tag eq $mainhead} { set font mainfontbold } } else { - set col "#ddddff" + set refoutlinecol black + set reffillcol "#ddddff" } set xl [expr {$xl - $delta/2}] $canv create polygon $x $yt $xr $yt $xr $yb $x $yb \ - -width 1 -outline black -fill $col -tags tag.$id + -width 1 -outline $refoutlinecol -fill $reffillcol -tags tag.$id if {[regexp {^(remotes/.*/|remotes/)} $tag match remoteprefix]} { set rwid [font measure mainfont $remoteprefix] set xi [expr {$x + 1}] @@ -6850,7 +6852,8 @@ proc drawtags {id x xt y1} { -width 0 -fill $remotebgcolor -tags tag.$id } } - set t [$canv create text $xl $y1 -anchor w -text $tag -fill $headfgcolor \ + set textfgcolor [expr {$ntags >= 0 ? $tagfgcolor : $headfgcolor}] + set t [$canv create text $xl $y1 -anchor w -text $tag -fill $textfgcolor \ -font $font -tags [list tag.$id text]] if {$ntags >= 0} { $canv bind $t <1> $tagclick From ed0f7a62f75232896eccb622bfaf9fe56903a261 Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Wed, 14 Jan 2026 08:36:19 -0800 Subject: [PATCH 028/784] remote-curl: use auth for probe_rpc() requests too If a large request requires post_rpc() to call probe_rpc(), the latter does not use the authorization credentials used for other requests. If this fails with an HTTP 401 error and http_auth.multistage isn't set, then the whole request just fails. For example, using git-credential-msal [1], the following attempt to clone a large repository fails partway through because the initial request to download the commit history and promisor packs succeeds, but the subsequent request to download the blobs needed to construct the working tree fails with a 401 error and the checkout fails. (lines removed for brevity) git clone --filter=blob:none https://secure-server.example/repo 11:03:26.855369 git.c:502 trace: built-in: git clone --filter=blob:none https://secure-server.example/repo Cloning into 'sw'... warning: templates not found in /home/aaron/share/git-core/templates 11:03:26.857169 run-command.c:673 trace: run_command: git remote-https origin https://secure-server.example/repo 11:03:27.012104 http.c:849 => Send header: GET repo/info/refs?service=git-upload-pack HTTP/1.1 11:03:27.049243 http.c:849 <= Recv header: HTTP/1.1 401 Unauthorized 11:03:27.049270 http.c:849 <= Recv header: WWW-Authenticate: Bearer error="invalid_request", error_description="No bearer token found in the request", msal-tenant-id="", msal-client-id="" 11:03:27.053786 run-command.c:673 trace: run_command: 'git credential-msal get' 11:03:27.952830 http.c:849 => Send header: GET repo/info/refs?service=git-upload-pack HTTP/1.1 11:03:27.952849 http.c:849 => Send header: Authorization: Bearer 11:03:27.995419 http.c:849 <= Recv header: HTTP/1.1 200 OK 11:03:28.230039 http.c:890 == Info: Reusing existing https: connection with host secure-server.example 11:03:28.230208 http.c:849 => Send header: POST repo/git-upload-pack HTTP/1.1 11:03:28.230216 http.c:849 => Send header: Content-Type: application/x-git-upload-pack-request 11:03:28.230221 http.c:849 => Send header: Authorization: Bearer 11:03:28.269085 http.c:849 <= Recv header: HTTP/1.1 200 OK 11:03:28.684163 http.c:890 == Info: Reusing existing https: connection with host secure-server.example 11:03:28.684379 http.c:849 => Send header: POST repo/git-upload-pack HTTP/1.1 11:03:28.684391 http.c:849 => Send header: Accept: application/x-git-upload-pack-result 11:03:28.684393 http.c:849 => Send header: Authorization: Bearer 11:03:28.869546 run-command.c:673 trace: run_command: git index-pack --stdin --fix-thin '--keep=fetch-pack 43856 on dgx-spark' --promisor 11:06:39.861237 run-command.c:673 trace: run_command: git -c fetch.negotiationAlgorithm=noop fetch origin --no-tags --no-write-fetch-head --recurse-submodules=no --filter=blob:none --stdin 11:06:39.865981 run-command.c:673 trace: run_command: git remote-https origin https://secure-server.example/repo 11:06:39.868039 run-command.c:673 trace: run_command: git-remote-https origin https://secure-server.example/repo 11:07:30.412575 http.c:849 => Send header: GET repo/info/refs?service=git-upload-pack HTTP/1.1 11:07:30.456285 http.c:849 <= Recv header: HTTP/1.1 401 Unauthorized 11:07:30.456318 http.c:849 <= Recv header: WWW-Authenticate: Bearer error="invalid_request", error_description="No bearer token found in the request", msal-tenant-id="", msal-client-id="" 11:07:30.456439 run-command.c:673 trace: run_command: 'git credential-cache get' 11:07:30.461266 http.c:849 => Send header: GET repo/info/refs?service=git-upload-pack HTTP/1.1 11:07:30.461282 http.c:849 => Send header: Authorization: Bearer 11:07:30.501628 http.c:849 <= Recv header: HTTP/1.1 200 OK 11:07:34.725262 http.c:849 => Send header: POST repo/git-upload-pack HTTP/1.1 11:07:34.725279 http.c:849 => Send header: Content-Type: application/x-git-upload-pack-request 11:07:34.761407 http.c:849 <= Recv header: HTTP/1.1 401 Unauthorized 11:07:34.761443 http.c:890 == Info: Bearer authentication problem, ignoring. 11:07:34.761453 http.c:849 <= Recv header: WWW-Authenticate: Bearer error="invalid_request", error_description="No bearer token found in the request", msal-tenant-id="", msal-client-id="" 11:07:34.761509 http.c:890 == Info: The requested URL returned error: 401 11:07:34.761530 http.c:890 == Info: closing connection #0 11:07:34.761913 run-command.c:673 trace: run_command: 'git credential-cache erase' 11:07:34.761927 run-command.c:765 trace: start_command: /bin/sh -c 'git credential-cache erase' 'git credential-cache erase' 11:07:34.768069 git.c:502 trace: built-in: git credential-cache erase 11:07:34.768690 run-command.c:673 trace: run_command: 'git credential-msal erase' 11:07:34.768713 run-command.c:765 trace: start_command: /bin/sh -c 'git credential-msal erase' 'git credential-msal erase' 11:07:34.772742 git.c:808 trace: exec: git-credential-msal erase 11:07:34.772783 run-command.c:673 trace: run_command: git-credential-msal erase 11:07:34.772819 run-command.c:765 trace: start_command: /usr/bin/git-credential-msal erase error: RPC failed; HTTP 401 curl 22 The requested URL returned error: 401 fatal: unable to write request to remote: Broken pipe fatal: could not fetch c4fff0229c9be06ecf576356a4d39a8a755b8d81 from promisor remote warning: Clone succeeded, but checkout failed. You can inspect what was checked out with 'git status' and retry with 'git restore --source=HEAD :/' In this case, the HTTP_REAUTH retry logic is not used because the credential helper didn't set the 'continue' flag, so http_auth.multistage is false and handle_curl_result() fails with HTTP_NOAUTH instead. Fix the immediate problem by including the authorization headers in the probe_rpc() request as well. Add a test for this scenario: 1. Create a repository with two thousand refs. 2. Clone that into the web root used by t5563-simple-http-auth.sh. 3. Configure http.postBuffer to be very small in order to trigger the probe_rpc() path that fails. 4. Clone using a valid Bearer token. [1] https://github.com/Binary-Eater/git-credential-msal Tested-by: Lucas De Marchi Signed-off-by: Aaron Plattner Signed-off-by: Junio C Hamano --- remote-curl.c | 1 + t/t5563-simple-http-auth.sh | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/remote-curl.c b/remote-curl.c index 69f919454a4565..92e40bb682d34d 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -876,6 +876,7 @@ static int probe_rpc(struct rpc_state *rpc, struct slot_results *results) headers = curl_slist_append(headers, rpc->hdr_content_type); headers = curl_slist_append(headers, rpc->hdr_accept); + headers = http_append_auth_header(&http_auth, headers); curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0L); curl_easy_setopt(slot->curl, CURLOPT_POST, 1L); diff --git a/t/t5563-simple-http-auth.sh b/t/t5563-simple-http-auth.sh index 317f33af5a7e60..0e1465c15755de 100755 --- a/t/t5563-simple-http-auth.sh +++ b/t/t5563-simple-http-auth.sh @@ -605,6 +605,51 @@ test_expect_success 'access using bearer auth with invalid credentials' ' EOF ' +test_expect_success 'clone with bearer auth and probe_rpc' ' + test_when_finished "per_test_cleanup" && + test_when_finished "rm -rf large.git" && + + # Set up a repository large enough to trigger probe_rpc + git init large.git && + ( + cd large.git && + git config set maintenance.auto false && + git commit --allow-empty --message "initial" && + # Create many refs to trigger probe_rpc, which is called when + # the request body is larger than http.postBuffer. + # + # In the test later, http.postBuffer is set to 70000. Each + # "want" line is ~45 bytes, so we need at least 70000/45 = ~1600 + # refs + test_seq -f "create refs/heads/branch-%d @" 2000 | + git update-ref --stdin + ) && + git clone --bare large.git "$HTTPD_DOCUMENT_ROOT_PATH/large.git" && + + # Clone it through HTTP with a Bearer token + set_credential_reply get <<-EOF && + capability[]=authtype + authtype=Bearer + credential=YS1naXQtdG9rZW4= + EOF + + # Bearer token + cat >"$HTTPD_ROOT_PATH/custom-auth.valid" <<-EOF && + id=1 creds=Bearer YS1naXQtdG9rZW4= + EOF + + cat >"$HTTPD_ROOT_PATH/custom-auth.challenge" <<-EOF && + id=1 status=200 + id=default response=WWW-Authenticate: Bearer authorize_uri="id.example.com" + EOF + + # Set a small buffer to force probe_rpc to be called + # Must be > LARGE_PACKET_MAX (65520) + test_config_global http.postBuffer 70000 && + test_config_global credential.helper test-helper && + git clone "$HTTPD_URL/custom_auth/large.git" partial-auth-clone 2>clone-error +' + test_expect_success 'access using three-legged auth' ' test_when_finished "per_test_cleanup" && From ff9fb2cfe6efb26b9d25dc5c114ab56126f9003e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 10:35:32 +0100 Subject: [PATCH 029/784] commit: rename `copy_commit_list()` to conform to coding guidelines Our coding guidelines say that: Functions that operate on `struct S` are named `S_()` and should generally receive a pointer to `struct S` as first parameter. While most of the functions related to `struct commit_list` already follow that naming schema, `copy_commit_list()` doesn't. Rename the function to address this and adjust all of its callers. Add a compatibility wrapper for the old function name to ease the transition and avoid any semantic conflicts with in-flight patch series. This wrapper will be removed once Git 2.53 has been released. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/commit.c | 2 +- commit.c | 2 +- commit.h | 11 ++++++++++- merge-ort.c | 2 +- revision.c | 4 ++-- sequencer.c | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 0243f17d53c97c..0aa3690b04b955 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1849,7 +1849,7 @@ int cmd_commit(int argc, } else if (amend) { if (!reflog_msg) reflog_msg = "commit (amend)"; - parents = copy_commit_list(current_head->parents); + parents = commit_list_copy(current_head->parents); } else if (whence == FROM_MERGE) { struct strbuf m = STRBUF_INIT; FILE *fp; diff --git a/commit.c b/commit.c index efd0c026831e6b..c5c66d3a6b2b78 100644 --- a/commit.c +++ b/commit.c @@ -680,7 +680,7 @@ unsigned commit_list_count(const struct commit_list *l) return c; } -struct commit_list *copy_commit_list(const struct commit_list *list) +struct commit_list *commit_list_copy(const struct commit_list *list) { struct commit_list *head = NULL; struct commit_list **pp = &head; diff --git a/commit.h b/commit.h index 79a761c37df023..2faf08cd1863d5 100644 --- a/commit.h +++ b/commit.h @@ -186,13 +186,22 @@ struct commit_list *commit_list_insert_by_date(struct commit *item, void commit_list_sort_by_date(struct commit_list **list); /* Shallow copy of the input list */ -struct commit_list *copy_commit_list(const struct commit_list *list); +struct commit_list *commit_list_copy(const struct commit_list *list); /* Modify list in-place to reverse it, returning new head; list will be tail */ struct commit_list *reverse_commit_list(struct commit_list *list); void free_commit_list(struct commit_list *list); +/* + * Deprecated compatibility functions for `struct commit_list`, to be removed + * once Git 2.53 is released. + */ +static inline struct commit_list *copy_commit_list(struct commit_list *l) +{ + return commit_list_copy(l); +} + struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ const char *repo_logmsg_reencode(struct repository *r, diff --git a/merge-ort.c b/merge-ort.c index 2b837a58c3a6f8..f31754c3611c43 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -5301,7 +5301,7 @@ static void merge_ort_internal(struct merge_options *opt, struct commit *h2, struct merge_result *result) { - struct commit_list *merge_bases = copy_commit_list(_merge_bases); + struct commit_list *merge_bases = commit_list_copy(_merge_bases); struct commit *next; struct commit *merged_merge_bases; const char *ancestor_name; diff --git a/revision.c b/revision.c index 1858e093eeeb89..9f5baceb85f2b3 100644 --- a/revision.c +++ b/revision.c @@ -4224,7 +4224,7 @@ static void save_parents(struct rev_info *revs, struct commit *commit) if (*pp) return; if (commit->parents) - *pp = copy_commit_list(commit->parents); + *pp = commit_list_copy(commit->parents); else *pp = EMPTY_PARENT_LIST; } @@ -4294,7 +4294,7 @@ static void track_linear(struct rev_info *revs, struct commit *commit) commit->object.flags |= TRACK_LINEAR; } free_commit_list(revs->previous_parents); - revs->previous_parents = copy_commit_list(commit->parents); + revs->previous_parents = commit_list_copy(commit->parents); } static struct commit *get_revision_1(struct rev_info *revs) diff --git a/sequencer.c b/sequencer.c index 71ed31c7740688..f38d247b1099d1 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1566,7 +1566,7 @@ static int try_to_commit(struct repository *r, res = error(_("unable to parse commit author")); goto out; } - parents = copy_commit_list(current_head->parents); + parents = commit_list_copy(current_head->parents); extra = read_commit_extra_headers(current_head, exclude_gpgsig); } else if (current_head && (!(flags & CREATE_ROOT_COMMIT) || (flags & AMEND_MSG))) { From a468f3cefab32eed7d9a12bd6b93719d38ec67a6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 10:35:33 +0100 Subject: [PATCH 030/784] commit: rename `reverse_commit_list()` to conform to coding guidelines Our coding guidelines say that: Functions that operate on `struct S` are named `S_()` and should generally receive a pointer to `struct S` as first parameter. While most of the functions related to `struct commit_list` already follow that naming schema, `reverse_commit_list()` doesn't. Rename the function to address this and adjust all of its callers. Add a compatibility wrapper for the old function name to ease the transition and avoid any semantic conflicts with in-flight patch series. This wrapper will be removed once Git 2.53 has been released. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/merge-tree.c | 2 +- builtin/stash.c | 2 +- commit.c | 2 +- commit.h | 7 ++++++- merge-ort.c | 2 +- sequencer.c | 2 +- 6 files changed, 11 insertions(+), 6 deletions(-) diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 1c063d9a41a695..979a55d3b2983f 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -483,7 +483,7 @@ static int real_merge(struct merge_tree_options *o, exit(128); if (!merge_bases && !o->allow_unrelated_histories) die(_("refusing to merge unrelated histories")); - merge_bases = reverse_commit_list(merge_bases); + merge_bases = commit_list_reverse(merge_bases); merge_incore_recursive(&opt, merge_bases, parent1, parent2, &result); free_commit_list(merge_bases); } diff --git a/builtin/stash.c b/builtin/stash.c index 948eba06fbccb3..4cb2351787c33d 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -2308,7 +2308,7 @@ static int do_export_stash(struct repository *r, * but where their first parents form a chain to our original empty * base commit. */ - items = reverse_commit_list(items); + items = commit_list_reverse(items); for (cur = items; cur; cur = cur->next) { struct commit_list *parents = NULL; struct commit_list **next = &parents; diff --git a/commit.c b/commit.c index c5c66d3a6b2b78..36f02c96aabb9d 100644 --- a/commit.c +++ b/commit.c @@ -691,7 +691,7 @@ struct commit_list *commit_list_copy(const struct commit_list *list) return head; } -struct commit_list *reverse_commit_list(struct commit_list *list) +struct commit_list *commit_list_reverse(struct commit_list *list) { struct commit_list *next = NULL, *current, *backup; for (current = list; current; current = backup) { diff --git a/commit.h b/commit.h index 2faf08cd1863d5..f50d9e5a4abe91 100644 --- a/commit.h +++ b/commit.h @@ -189,7 +189,7 @@ void commit_list_sort_by_date(struct commit_list **list); struct commit_list *commit_list_copy(const struct commit_list *list); /* Modify list in-place to reverse it, returning new head; list will be tail */ -struct commit_list *reverse_commit_list(struct commit_list *list); +struct commit_list *commit_list_reverse(struct commit_list *list); void free_commit_list(struct commit_list *list); @@ -202,6 +202,11 @@ static inline struct commit_list *copy_commit_list(struct commit_list *l) return commit_list_copy(l); } +static inline struct commit_list *reverse_commit_list(struct commit_list *l) +{ + return commit_list_reverse(l); +} + struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ const char *repo_logmsg_reencode(struct repository *r, diff --git a/merge-ort.c b/merge-ort.c index f31754c3611c43..2ddaaffc263d46 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -5314,7 +5314,7 @@ static void merge_ort_internal(struct merge_options *opt, goto out; } /* See merge-ort.h:merge_incore_recursive() declaration NOTE */ - merge_bases = reverse_commit_list(merge_bases); + merge_bases = commit_list_reverse(merge_bases); } merged_merge_bases = pop_commit(&merge_bases); diff --git a/sequencer.c b/sequencer.c index f38d247b1099d1..e09f8eed551425 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4317,7 +4317,7 @@ static int do_merge(struct repository *r, git_path_merge_head(r), 0); write_message("no-ff", 5, git_path_merge_mode(r), 0); - bases = reverse_commit_list(bases); + bases = commit_list_reverse(bases); repo_read_index(r); init_ui_merge_options(&o, r); From 9f18d089c51fba2776fe1fece877a359c47417f7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 10:35:34 +0100 Subject: [PATCH 031/784] commit: rename `free_commit_list()` to conform to coding guidelines Our coding guidelines say that: Functions that operate on `struct S` are named `S_()` and should generally receive a pointer to `struct S` as first parameter. While most of the functions related to `struct commit_list` already follow that naming schema, `free_commit_list()` doesn't. Rename the function to address this and adjust all of its callers. Add a compatibility wrapper for the old function name to ease the transition and avoid any semantic conflicts with in-flight patch series. This wrapper will be removed once Git 2.53 has been released. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- bisect.c | 12 ++++++------ blame.c | 2 +- builtin/am.c | 2 +- builtin/commit-tree.c | 2 +- builtin/commit.c | 2 +- builtin/describe.c | 2 +- builtin/diff-tree.c | 2 +- builtin/gc.c | 2 +- builtin/log.c | 12 ++++++------ builtin/merge-base.c | 14 +++++++------- builtin/merge-tree.c | 2 +- builtin/merge.c | 16 ++++++++-------- builtin/pull.c | 8 ++++---- builtin/rebase.c | 4 ++-- builtin/rev-list.c | 2 +- builtin/show-branch.c | 2 +- builtin/stash.c | 10 +++++----- commit-graph.c | 2 +- commit-reach.c | 30 +++++++++++++++--------------- commit.c | 12 ++++++------ commit.h | 7 ++++++- contrib/coccinelle/free.cocci | 8 ++++---- diff-lib.c | 2 +- fmt-merge-msg.c | 2 +- line-log.c | 2 +- log-tree.c | 2 +- merge-ort-wrappers.c | 2 +- merge-ort.c | 2 +- notes-merge.c | 4 ++-- notes-utils.c | 2 +- object-name.c | 8 ++++---- pack-bitmap-write.c | 6 +++--- ref-filter.c | 8 ++++---- reflog.c | 4 ++-- remote.c | 2 +- revision.c | 32 ++++++++++++++++---------------- sequencer.c | 16 ++++++++-------- shallow.c | 4 ++-- submodule.c | 4 ++-- t/helper/test-reach.c | 10 +++++----- 40 files changed, 136 insertions(+), 131 deletions(-) diff --git a/bisect.c b/bisect.c index 326b59c0dc70e7..b313f1324009b6 100644 --- a/bisect.c +++ b/bisect.c @@ -257,7 +257,7 @@ static struct commit_list *best_bisection_sorted(struct commit_list *list, int n p = p->next; } if (p) { - free_commit_list(p->next); + commit_list_free(p->next); p->next = NULL; } strbuf_release(&buf); @@ -438,7 +438,7 @@ void find_bisection(struct commit_list **commit_list, int *reaches, if (best) { if (!(bisect_flags & FIND_BISECTION_ALL)) { list->item = best->item; - free_commit_list(list->next); + commit_list_free(list->next); best = list; best->next = NULL; } @@ -559,8 +559,8 @@ struct commit_list *filter_skipped(struct commit_list *list, } else { if (!show_all) { if (!skipped_first || !*skipped_first) { - free_commit_list(next); - free_commit_list(filtered); + commit_list_free(next); + commit_list_free(filtered); return list; } } else if (skipped_first && !*skipped_first) { @@ -879,7 +879,7 @@ static enum bisect_error check_merge_bases(size_t rev_nr, struct commit **rev, i } } - free_commit_list(result); + commit_list_free(result); return res; } @@ -1142,7 +1142,7 @@ enum bisect_error bisect_next_all(struct repository *r, const char *prefix) res = bisect_checkout(bisect_rev, no_checkout); cleanup: - free_commit_list(tried); + commit_list_free(tried); release_revisions(&revs); strvec_clear(&rev_argv); return res; diff --git a/blame.c b/blame.c index cb0b08342308ef..a3c49d132e4ae1 100644 --- a/blame.c +++ b/blame.c @@ -2368,7 +2368,7 @@ static struct commit_list *first_scapegoat(struct rev_info *revs, struct commit if (revs->first_parent_only && commit->parents && commit->parents->next) { - free_commit_list(commit->parents->next); + commit_list_free(commit->parents->next); commit->parents->next = NULL; } return commit->parents; diff --git a/builtin/am.c b/builtin/am.c index 277c2e7937dcc1..97a7b1d46a2892 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1726,7 +1726,7 @@ static void do_commit(const struct am_state *state) run_hooks(the_repository, "post-applypatch"); - free_commit_list(parents); + commit_list_free(parents); strbuf_release(&sb); } diff --git a/builtin/commit-tree.c b/builtin/commit-tree.c index 5189e685a7eccd..30535db131eaa6 100644 --- a/builtin/commit-tree.c +++ b/builtin/commit-tree.c @@ -154,7 +154,7 @@ int cmd_commit_tree(int argc, ret = 0; out: - free_commit_list(parents); + commit_list_free(parents); strbuf_release(&buffer); return ret; } diff --git a/builtin/commit.c b/builtin/commit.c index 0aa3690b04b955..b1315b512bd12d 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1978,7 +1978,7 @@ int cmd_commit(int argc, cleanup: free_commit_extra_headers(extra); - free_commit_list(parents); + commit_list_free(parents); strbuf_release(&author_ident); strbuf_release(&err); strbuf_release(&sb); diff --git a/builtin/describe.c b/builtin/describe.c index 989a78d715d525..abfe3525a5385b 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -558,7 +558,7 @@ static void process_object(struct object *obj, const char *path, void *data) describe_commit(pcd->current_commit, pcd->dst); strbuf_addf(pcd->dst, ":%s", path); } - free_commit_list(pcd->revs->commits); + commit_list_free(pcd->revs->commits); pcd->revs->commits = NULL; } } diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c index 49dd4d00ebf1bc..cd35d1c91575b2 100644 --- a/builtin/diff-tree.c +++ b/builtin/diff-tree.c @@ -33,7 +33,7 @@ static int stdin_diff_commit(struct commit *commit, const char *p) struct commit *parent = lookup_commit(the_repository, &oid); if (!pptr) { /* Free the real parent list */ - free_commit_list(commit->parents); + commit_list_free(commit->parents); commit->parents = NULL; pptr = &(commit->parents); } diff --git a/builtin/gc.c b/builtin/gc.c index 92c6e7b954faff..6c529c429e7660 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1166,7 +1166,7 @@ static int dfs_on_ref(const struct reference *ref, void *cb_data) } } - free_commit_list(stack); + commit_list_free(stack); return result; } diff --git a/builtin/log.c b/builtin/log.c index 5c9a8ef3632906..d43ca693bf7a8a 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -424,7 +424,7 @@ static int cmd_log_walk_no_free(struct rev_info *rev) */ free_commit_buffer(the_repository->parsed_objects, commit); - free_commit_list(commit->parents); + commit_list_free(commit->parents); commit->parents = NULL; } if (saved_nrl < rev->diffopt.needed_rename_limit) @@ -1697,12 +1697,12 @@ static struct commit *get_base_commit(const struct format_config *cfg, if (die_on_failure) { die(_("could not find exact merge base")); } else { - free_commit_list(base_list); + commit_list_free(base_list); return NULL; } } base = base_list->item; - free_commit_list(base_list); + commit_list_free(base_list); } else { if (die_on_failure) die(_("failed to get upstream, if you want to record base commit automatically,\n" @@ -1732,14 +1732,14 @@ static struct commit *get_base_commit(const struct format_config *cfg, if (die_on_failure) { die(_("failed to find exact merge base")); } else { - free_commit_list(merge_base); + commit_list_free(merge_base); free(rev); return NULL; } } rev[i] = merge_base->item; - free_commit_list(merge_base); + commit_list_free(merge_base); } if (rev_nr % 2) @@ -2610,7 +2610,7 @@ int cmd_cherry(int argc, print_commit(sign, commit, verbose, abbrev, revs.diffopt.file); } - free_commit_list(list); + commit_list_free(list); free_patch_ids(&ids); return 0; } diff --git a/builtin/merge-base.c b/builtin/merge-base.c index 3f82781245bd27..c7ee97fa6ac62a 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -15,7 +15,7 @@ static int show_merge_base(struct commit **rev, size_t rev_nr, int show_all) if (repo_get_merge_bases_many_dirty(the_repository, rev[0], rev_nr - 1, rev + 1, &result) < 0) { - free_commit_list(result); + commit_list_free(result); return -1; } @@ -28,7 +28,7 @@ static int show_merge_base(struct commit **rev, size_t rev_nr, int show_all) break; } - free_commit_list(result); + commit_list_free(result); return 0; } @@ -71,7 +71,7 @@ static int handle_independent(int count, const char **args) for (rev = revs; rev; rev = rev->next) printf("%s\n", oid_to_hex(&rev->item->object.oid)); - free_commit_list(revs); + commit_list_free(revs); return 0; } @@ -85,11 +85,11 @@ static int handle_octopus(int count, const char **args, int show_all) commit_list_insert(get_commit_reference(args[i]), &revs); if (get_octopus_merge_bases(revs, &result) < 0) { - free_commit_list(revs); - free_commit_list(result); + commit_list_free(revs); + commit_list_free(result); return 128; } - free_commit_list(revs); + commit_list_free(revs); reduce_heads_replace(&result); if (!result) @@ -101,7 +101,7 @@ static int handle_octopus(int count, const char **args, int show_all) break; } - free_commit_list(result); + commit_list_free(result); return 0; } diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 979a55d3b2983f..e141fef3ce6148 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -485,7 +485,7 @@ static int real_merge(struct merge_tree_options *o, die(_("refusing to merge unrelated histories")); merge_bases = commit_list_reverse(merge_bases); merge_incore_recursive(&opt, merge_bases, parent1, parent2, &result); - free_commit_list(merge_bases); + commit_list_free(merge_bases); } if (result.clean < 0) diff --git a/builtin/merge.c b/builtin/merge.c index c421a11b0b69df..6a0831a6588701 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -831,7 +831,7 @@ static int try_merge_strategy(const char *strategy, struct commit_list *common, LOCK_DIE_ON_ERROR); clean = merge_ort_recursive(&o, head, remoteheads->item, reversed, &result); - free_commit_list(reversed); + commit_list_free(reversed); strbuf_release(&o.obuf); if (clean < 0) { @@ -1006,7 +1006,7 @@ static int merge_trivial(struct commit *head, struct commit_list *remoteheads) finish(head, remoteheads, &result_commit, "In-index merge"); remove_merge_branch_state(the_repository); - free_commit_list(parents); + commit_list_free(parents); return 0; } @@ -1022,7 +1022,7 @@ static int finish_automerge(struct commit *head, struct object_id result_commit; write_tree_trivial(result_tree); - free_commit_list(common); + commit_list_free(common); parents = remoteheads; if (!head_subsumed || fast_forward == FF_NO) commit_list_insert(head, &parents); @@ -1035,7 +1035,7 @@ static int finish_automerge(struct commit *head, strbuf_release(&buf); remove_merge_branch_state(the_repository); - free_commit_list(parents); + commit_list_free(parents); return 0; } @@ -1197,7 +1197,7 @@ static struct commit_list *reduce_parents(struct commit *head_commit, /* Find what parents to record by checking independent ones. */ parents = reduce_heads(remoteheads); - free_commit_list(remoteheads); + commit_list_free(remoteheads); remoteheads = NULL; remotes = &remoteheads; @@ -1748,7 +1748,7 @@ int cmd_merge(int argc, exit(128); common_item = common_one->item; - free_commit_list(common_one); + commit_list_free(common_one); if (!oideq(&common_item->object.oid, &j->item->object.oid)) { up_to_date = 0; break; @@ -1880,8 +1880,8 @@ int cmd_merge(int argc, done: if (!automerge_was_ok) { - free_commit_list(common); - free_commit_list(remoteheads); + commit_list_free(common); + commit_list_free(remoteheads); } strbuf_release(&buf); free(branch_to_free); diff --git a/builtin/pull.c b/builtin/pull.c index 3ff748e0b3ea60..6ad420ce6f9b41 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -704,14 +704,14 @@ static int get_octopus_merge_base(struct object_id *merge_base, if (get_octopus_merge_bases(revs, &result) < 0) exit(128); - free_commit_list(revs); + commit_list_free(revs); reduce_heads_replace(&result); if (!result) return 1; oidcpy(merge_base, &result->item->object.oid); - free_commit_list(result); + commit_list_free(result); return 0; } @@ -803,7 +803,7 @@ static int get_can_ff(struct object_id *orig_head, commit_list_insert(head, &list); merge_head = lookup_commit_reference(the_repository, orig_merge_head); ret = repo_is_descendant_of(the_repository, merge_head, list); - free_commit_list(list); + commit_list_free(list); if (ret < 0) exit(128); return ret; @@ -828,7 +828,7 @@ static int already_up_to_date(struct object_id *orig_head, theirs = lookup_commit_reference(the_repository, &merge_heads->oid[i]); commit_list_insert(theirs, &list); ok = repo_is_descendant_of(the_repository, ours, list); - free_commit_list(list); + commit_list_free(list); if (ok < 0) exit(128); if (!ok) diff --git a/builtin/rebase.c b/builtin/rebase.c index c46882818982aa..c487e1090779c2 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -912,7 +912,7 @@ static int can_fast_forward(struct commit *onto, struct commit *upstream, res = 1; done: - free_commit_list(merge_bases); + commit_list_free(merge_bases); return res && is_linear_history(onto, head); } @@ -929,7 +929,7 @@ static void fill_branch_base(struct rebase_options *options, else oidcpy(branch_base, &merge_bases->item->object.oid); - free_commit_list(merge_bases); + commit_list_free(merge_bases); } static int parse_opt_am(const struct option *opt, const char *arg, int unset) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 99f876ba857579..ddea8aa251a361 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -216,7 +216,7 @@ static inline void finish_object__ma(struct object *obj, const char *name) static void finish_commit(struct commit *commit) { - free_commit_list(commit->parents); + commit_list_free(commit->parents); commit->parents = NULL; free_commit_buffer(the_repository->parsed_objects, commit); diff --git a/builtin/show-branch.c b/builtin/show-branch.c index f3ebc1d4eaf14b..f02831b08500c4 100644 --- a/builtin/show-branch.c +++ b/builtin/show-branch.c @@ -1008,7 +1008,7 @@ int cmd_show_branch(int ac, out: for (size_t i = 0; i < ARRAY_SIZE(reflog_msg); i++) free(reflog_msg[i]); - free_commit_list(seen); + commit_list_free(seen); clear_prio_queue(&queue); free(args_copy); free(head); diff --git a/builtin/stash.c b/builtin/stash.c index 4cb2351787c33d..aea68a16aab27e 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -1495,7 +1495,7 @@ static int do_create_stash(const struct pathspec *ps, struct strbuf *stash_msg_b goto done; } - free_commit_list(parents); + commit_list_free(parents); parents = NULL; if (include_untracked) { @@ -1564,7 +1564,7 @@ static int do_create_stash(const struct pathspec *ps, struct strbuf *stash_msg_b strbuf_release(&commit_tree_label); strbuf_release(&msg); strbuf_release(&untracked_files); - free_commit_list(parents); + commit_list_free(parents); free(branch_name_buf); return ret; } @@ -2184,7 +2184,7 @@ static int do_import_stash(struct repository *r, const char *rev) out: if (this && buffer) repo_unuse_commit_buffer(r, this, buffer); - free_commit_list(items); + commit_list_free(items); free(msg); return res; @@ -2318,7 +2318,7 @@ static int do_export_stash(struct repository *r, next = commit_list_append(prev, next); next = commit_list_append(stash, next); res = write_commit_with_parents(r, &out, &stash->object.oid, parents); - free_commit_list(parents); + commit_list_free(parents); if (res) goto out; prev = lookup_commit_reference(r, &out); @@ -2330,7 +2330,7 @@ static int do_export_stash(struct repository *r, puts(oid_to_hex(&prev->object.oid)); out: strbuf_release(&revision); - free_commit_list(items); + commit_list_free(items); return res; } diff --git a/commit-graph.c b/commit-graph.c index 00e8193adcab81..ed480c05379154 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -965,7 +965,7 @@ static int fill_commit_in_graph(struct commit *item, do { if (g->chunk_extra_edges_size / sizeof(uint32_t) <= parent_data_pos) { error(_("commit-graph extra-edges pointer out of bounds")); - free_commit_list(item->parents); + commit_list_free(item->parents); item->parents = NULL; item->object.parsed = 0; return 0; diff --git a/commit-reach.c b/commit-reach.c index e7d9b3208fabc4..9604bbdcce2f35 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -109,7 +109,7 @@ static int paint_down_to_common(struct repository *r, continue; if (repo_parse_commit(r, p)) { clear_prio_queue(&queue); - free_commit_list(*result); + commit_list_free(*result); *result = NULL; /* * At this stage, we know that the commit is @@ -166,7 +166,7 @@ static int merge_bases_many(struct repository *r, } if (paint_down_to_common(r, one, n, twos, 0, 0, &list)) { - free_commit_list(list); + commit_list_free(list); return -1; } @@ -195,8 +195,8 @@ int get_octopus_merge_bases(struct commit_list *in, struct commit_list **result) struct commit_list *bases = NULL; if (repo_get_merge_bases(the_repository, i->item, j->item, &bases) < 0) { - free_commit_list(bases); - free_commit_list(*result); + commit_list_free(bases); + commit_list_free(*result); *result = NULL; return -1; } @@ -207,7 +207,7 @@ int get_octopus_merge_bases(struct commit_list *in, struct commit_list **result) for (k = bases; k; k = k->next) end = k; } - free_commit_list(*result); + commit_list_free(*result); *result = new_commits; } return 0; @@ -249,7 +249,7 @@ static int remove_redundant_no_gen(struct repository *r, work, min_generation, 0, &common)) { clear_commit_marks(array[i], all_flags); clear_commit_marks_many(filled, work, all_flags); - free_commit_list(common); + commit_list_free(common); free(work); free(redundant); free(filled_index); @@ -262,7 +262,7 @@ static int remove_redundant_no_gen(struct repository *r, redundant[filled_index[j]] = 1; clear_commit_marks(array[i], all_flags); clear_commit_marks_many(filled, work, all_flags); - free_commit_list(common); + commit_list_free(common); } /* Now collect the result */ @@ -374,7 +374,7 @@ static int remove_redundant_with_gen(struct repository *r, if (!parents) pop_commit(&stack); } - free_commit_list(stack); + commit_list_free(stack); } free(sorted); @@ -451,7 +451,7 @@ static int get_merge_bases_many_0(struct repository *r, CALLOC_ARRAY(rslt, cnt); for (list = *result, i = 0; list; list = list->next) rslt[i++] = list->item; - free_commit_list(*result); + commit_list_free(*result); *result = NULL; clear_commit_marks(one, all_flags); @@ -510,7 +510,7 @@ int repo_is_descendant_of(struct repository *r, int result; commit_list_insert(commit, &from_list); result = can_all_from_reach(from_list, with_commit, 0); - free_commit_list(from_list); + commit_list_free(from_list); return result; } else { while (with_commit) { @@ -561,7 +561,7 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, ret = 1; clear_commit_marks(commit, all_flags); clear_commit_marks_many(nr_reference, reference, all_flags); - free_commit_list(bases); + commit_list_free(bases); return ret; } @@ -578,7 +578,7 @@ int repo_in_merge_bases(struct repository *r, next = commit_list_append(commit, next); res = repo_is_descendant_of(r, reference, list); - free_commit_list(list); + commit_list_free(list); return res; } @@ -626,7 +626,7 @@ struct commit_list *reduce_heads(struct commit_list *heads) void reduce_heads_replace(struct commit_list **heads) { struct commit_list *result = reduce_heads(*heads); - free_commit_list(*heads); + commit_list_free(*heads); *heads = result; } @@ -661,7 +661,7 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) new_commit, old_commit_list); if (ret < 0) exit(128); - free_commit_list(old_commit_list); + commit_list_free(old_commit_list); return ret; } @@ -1236,7 +1236,7 @@ void tips_reachable_from_bases(struct repository *r, done: free(commits); repo_clear_commit_marks(r, SEEN); - free_commit_list(stack); + commit_list_free(stack); } /* diff --git a/commit.c b/commit.c index 36f02c96aabb9d..ddda9ee19d6538 100644 --- a/commit.c +++ b/commit.c @@ -191,7 +191,7 @@ void unparse_commit(struct repository *r, const struct object_id *oid) if (!c->object.parsed) return; - free_commit_list(c->parents); + commit_list_free(c->parents); c->parents = NULL; c->object.parsed = 0; } @@ -436,7 +436,7 @@ void release_commit_memory(struct parsed_object_pool *pool, struct commit *c) set_commit_tree(c, NULL); free_commit_buffer(pool, c); c->index = 0; - free_commit_list(c->parents); + commit_list_free(c->parents); c->object.parsed = 0; } @@ -480,7 +480,7 @@ int parse_commit_buffer(struct repository *r, struct commit *item, const void *b * same error, but that's good, since it lets our caller know * the result cannot be trusted. */ - free_commit_list(item->parents); + commit_list_free(item->parents); item->parents = NULL; tail += size; @@ -702,7 +702,7 @@ struct commit_list *commit_list_reverse(struct commit_list *list) return next; } -void free_commit_list(struct commit_list *list) +void commit_list_free(struct commit_list *list) { while (list) pop_commit(&list); @@ -977,7 +977,7 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so prio_queue_reverse(&queue); /* We no longer need the commit list */ - free_commit_list(orig); + commit_list_free(orig); pptr = list; *list = NULL; @@ -1107,7 +1107,7 @@ struct commit *get_fork_point(const char *refname, struct commit *commit) cleanup_return: free(revs.commit); - free_commit_list(bases); + commit_list_free(bases); free(full_refname); return ret; } diff --git a/commit.h b/commit.h index f50d9e5a4abe91..1635de418b59e0 100644 --- a/commit.h +++ b/commit.h @@ -191,7 +191,7 @@ struct commit_list *commit_list_copy(const struct commit_list *list); /* Modify list in-place to reverse it, returning new head; list will be tail */ struct commit_list *commit_list_reverse(struct commit_list *list); -void free_commit_list(struct commit_list *list); +void commit_list_free(struct commit_list *list); /* * Deprecated compatibility functions for `struct commit_list`, to be removed @@ -207,6 +207,11 @@ static inline struct commit_list *reverse_commit_list(struct commit_list *l) return commit_list_reverse(l); } +static inline void free_commit_list(struct commit_list *l) +{ + commit_list_free(l); +} + struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ const char *repo_logmsg_reencode(struct repository *r, diff --git a/contrib/coccinelle/free.cocci b/contrib/coccinelle/free.cocci index 6fb9eb6e88379a..03799e190828fa 100644 --- a/contrib/coccinelle/free.cocci +++ b/contrib/coccinelle/free.cocci @@ -5,7 +5,7 @@ expression E; ( free(E); | - free_commit_list(E); + commit_list_free(E); ) @@ @@ -15,7 +15,7 @@ expression E; ( free(E); | - free_commit_list(E); + commit_list_free(E); ) @@ @@ -30,7 +30,7 @@ expression E; @@ - if (E) - { - free_commit_list(E); + commit_list_free(E); E = NULL; - } @@ -41,5 +41,5 @@ statement S; - if (E) { + if (E) S - free_commit_list(E); + commit_list_free(E); - } diff --git a/diff-lib.c b/diff-lib.c index 5307390ff3db7b..4772e5a561717a 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -615,7 +615,7 @@ void diff_get_merge_base(const struct rev_info *revs, struct object_id *mb) oidcpy(mb, &merge_bases->item->object.oid); - free_commit_list(merge_bases); + commit_list_free(merge_bases); } void run_diff_index(struct rev_info *revs, unsigned int option) diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c index c9085edc40e934..877a7daed5c268 100644 --- a/fmt-merge-msg.c +++ b/fmt-merge-msg.c @@ -421,7 +421,7 @@ static void shortlog(const char *name, clear_commit_marks((struct commit *)branch, flags); clear_commit_marks(head, flags); - free_commit_list(rev->commits); + commit_list_free(rev->commits); rev->commits = NULL; rev->pending.nr = 0; diff --git a/line-log.c b/line-log.c index 8bd422148dd492..eeaf68454e2246 100644 --- a/line-log.c +++ b/line-log.c @@ -1239,7 +1239,7 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm * don't follow any other path in history */ add_line_range(rev, parent, cand[i]); - free_commit_list(commit->parents); + commit_list_free(commit->parents); commit_list_append(parent, &commit->parents); ret = 0; diff --git a/log-tree.c b/log-tree.c index 1729b0c201271b..7e048701d0c5b4 100644 --- a/log-tree.c +++ b/log-tree.c @@ -1077,7 +1077,7 @@ static int do_remerge_diff(struct rev_info *opt, log_tree_diff_flush(opt); /* Cleanup */ - free_commit_list(bases); + commit_list_free(bases); cleanup_additional_headers(&opt->diffopt); strbuf_release(&parent1_desc); strbuf_release(&parent2_desc); diff --git a/merge-ort-wrappers.c b/merge-ort-wrappers.c index c54d56b34465bf..2110844f5331c1 100644 --- a/merge-ort-wrappers.c +++ b/merge-ort-wrappers.c @@ -120,7 +120,7 @@ int merge_ort_generic(struct merge_options *opt, repo_hold_locked_index(opt->repo, &lock, LOCK_DIE_ON_ERROR); clean = merge_ort_recursive(opt, head_commit, next_commit, ca, result); - free_commit_list(ca); + commit_list_free(ca); if (clean < 0) { rollback_lock_file(&lock); return clean; diff --git a/merge-ort.c b/merge-ort.c index 2ddaaffc263d46..0c755361cdb9ee 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -5382,7 +5382,7 @@ static void merge_ort_internal(struct merge_options *opt, opt->ancestor = NULL; /* avoid accidental re-use of opt->ancestor */ out: - free_commit_list(merge_bases); + commit_list_free(merge_bases); } void merge_incore_nonrecursive(struct merge_options *opt, diff --git a/notes-merge.c b/notes-merge.c index 586939939f2451..49d0dadd32e427 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -668,11 +668,11 @@ int notes_merge(struct notes_merge_options *o, commit_list_insert(local, &parents); create_notes_commit(o->repo, local_tree, parents, o->commit_msg.buf, o->commit_msg.len, result_oid); - free_commit_list(parents); + commit_list_free(parents); } found_result: - free_commit_list(bases); + commit_list_free(bases); strbuf_release(&(o->commit_msg)); trace_printf("notes_merge(): result = %i, result_oid = %.7s\n", result, oid_to_hex(result_oid)); diff --git a/notes-utils.c b/notes-utils.c index 6a50c6d56466d5..5c1c75d5b8099a 100644 --- a/notes-utils.c +++ b/notes-utils.c @@ -40,7 +40,7 @@ void create_notes_commit(struct repository *r, NULL)) die("Failed to commit notes tree to database"); - free_commit_list(parents_to_free); + commit_list_free(parents_to_free); } void commit_notes(struct repository *r, struct notes_tree *t, const char *msg) diff --git a/object-name.c b/object-name.c index 8b862c124e05a9..e697566423a075 100644 --- a/object-name.c +++ b/object-name.c @@ -1281,7 +1281,7 @@ static int peel_onion(struct repository *r, const char *name, int len, commit_list_insert((struct commit *)o, &list); ret = get_oid_oneline(r, prefix, oid, list); - free_commit_list(list); + commit_list_free(list); free(prefix); return ret; } @@ -1623,7 +1623,7 @@ int repo_get_oid_mb(struct repository *r, if (!two) return -1; if (repo_get_merge_bases(r, one, two, &mbs) < 0) { - free_commit_list(mbs); + commit_list_free(mbs); return -1; } if (!mbs || mbs->next) @@ -1632,7 +1632,7 @@ int repo_get_oid_mb(struct repository *r, st = 0; oidcpy(oid, &mbs->item->object.oid); } - free_commit_list(mbs); + commit_list_free(mbs); return st; } @@ -2052,7 +2052,7 @@ static enum get_oid_result get_oid_with_context_1(struct repository *repo, refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb); ret = get_oid_oneline(repo, name + 2, oid, list); - free_commit_list(list); + commit_list_free(list); return ret; } if (namelen < 3 || diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index bf73ce5710abcc..2e3f1c1530bad5 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -306,7 +306,7 @@ struct bb_commit { static void clear_bb_commit(struct bb_commit *commit) { - free_commit_list(commit->reverse_edges); + commit_list_free(commit->reverse_edges); bitmap_free(commit->commit_mask); bitmap_free(commit->bitmap); } @@ -414,7 +414,7 @@ static void bitmap_builder_init(struct bitmap_builder *bb, p_ent->maximal = 1; else { p_ent->maximal = 0; - free_commit_list(p_ent->reverse_edges); + commit_list_free(p_ent->reverse_edges); p_ent->reverse_edges = NULL; } @@ -445,7 +445,7 @@ static void bitmap_builder_init(struct bitmap_builder *bb, "num_maximal_commits", num_maximal); release_revisions(&revs); - free_commit_list(reusable); + commit_list_free(reusable); } static void bitmap_builder_clear(struct bitmap_builder *bb) diff --git a/ref-filter.c b/ref-filter.c index c318f9ca0ec8dd..3917c4ccd9f73a 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -3782,9 +3782,9 @@ void ref_filter_clear(struct ref_filter *filter) { strvec_clear(&filter->exclude); oid_array_clear(&filter->points_at); - free_commit_list(filter->with_commit); - free_commit_list(filter->no_commit); - free_commit_list(filter->reachable_from); - free_commit_list(filter->unreachable_from); + commit_list_free(filter->with_commit); + commit_list_free(filter->no_commit); + commit_list_free(filter->reachable_from); + commit_list_free(filter->unreachable_from); ref_filter_init(filter); } diff --git a/reflog.c b/reflog.c index ac87e20c4f97ff..1460ae9d0dd5f7 100644 --- a/reflog.c +++ b/reflog.c @@ -493,7 +493,7 @@ void reflog_expiry_cleanup(void *cb_data) case UE_HEAD: for (elem = cb->tips; elem; elem = elem->next) clear_commit_marks(elem->item, REACHABLE); - free_commit_list(cb->tips); + commit_list_free(cb->tips); break; case UE_NORMAL: clear_commit_marks(cb->tip_commit, REACHABLE); @@ -501,7 +501,7 @@ void reflog_expiry_cleanup(void *cb_data) } for (elem = cb->mark_list; elem; elem = elem->next) clear_commit_marks(elem->item, REACHABLE); - free_commit_list(cb->mark_list); + commit_list_free(cb->mark_list); } int count_reflog_ent(const char *refname UNUSED, diff --git a/remote.c b/remote.c index b756ff6f1594d9..1c8a9f1a888dd2 100644 --- a/remote.c +++ b/remote.c @@ -1497,7 +1497,7 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds clear_commit_marks_many(src_commits.nr, src_commits.items, reachable_flag); commit_stack_clear(&src_commits); - free_commit_list(found_commits); + commit_list_free(found_commits); } string_list_clear(&src_tag, 0); diff --git a/revision.c b/revision.c index 9f5baceb85f2b3..6d207c2f23ba62 100644 --- a/revision.c +++ b/revision.c @@ -1048,7 +1048,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) continue; } - free_commit_list(parent->next); + commit_list_free(parent->next); parent->next = NULL; while (commit->parents != parent) pop_commit(&commit->parents); @@ -1083,7 +1083,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) die("cannot simplify commit %s (invalid %s)", oid_to_hex(&commit->object.oid), oid_to_hex(&p->object.oid)); - free_commit_list(p->parents); + commit_list_free(p->parents); p->parents = NULL; } /* fallthrough */ @@ -1405,7 +1405,7 @@ static void limit_to_ancestry(struct commit_list *bottoms, struct commit_list *l p->item->object.flags &= ~(TMP_MARK | ANCESTRY_PATH); for (p = bottoms; p; p = p->next) p->item->object.flags &= ~(TMP_MARK | ANCESTRY_PATH); - free_commit_list(rlist); + commit_list_free(rlist); } /* @@ -1508,7 +1508,7 @@ static int limit_list(struct rev_info *revs) } } - free_commit_list(original_list); + commit_list_free(original_list); revs->commits = newlist; return 0; } @@ -2011,7 +2011,7 @@ static void prepare_show_merge(struct rev_info *revs) exit(128); add_rev_cmdline_list(revs, bases, REV_CMD_MERGE_BASE, UNINTERESTING | BOTTOM); add_pending_commit_list(revs, bases, UNINTERESTING | BOTTOM); - free_commit_list(bases); + commit_list_free(bases); head->object.flags |= SYMMETRIC_LEFT; if (!istate->cache_nr) @@ -2105,13 +2105,13 @@ static int handle_dotdot_1(const char *arg, char *dotdot, return dotdot_missing(arg, dotdot, revs, symmetric); if (repo_get_merge_bases(the_repository, a, b, &exclude) < 0) { - free_commit_list(exclude); + commit_list_free(exclude); return -1; } add_rev_cmdline_list(revs, exclude, REV_CMD_MERGE_BASE, flags_exclude); add_pending_commit_list(revs, exclude, flags_exclude); - free_commit_list(exclude); + commit_list_free(exclude); b_flags = flags; a_flags = flags | SYMMETRIC_LEFT; @@ -3221,13 +3221,13 @@ static void release_revisions_bloom_keyvecs(struct rev_info *revs) static void free_void_commit_list(void *list) { - free_commit_list(list); + commit_list_free(list); } void release_revisions(struct rev_info *revs) { - free_commit_list(revs->commits); - free_commit_list(revs->ancestry_path_bottoms); + commit_list_free(revs->commits); + commit_list_free(revs->ancestry_path_bottoms); release_display_notes(&revs->notes_opt); object_array_clear(&revs->pending); object_array_clear(&revs->boundary_commits); @@ -3335,7 +3335,7 @@ static int mark_redundant_parents(struct commit *commit) if (i != cnt || cnt+marked != orig_cnt) die("mark_redundant_parents %d %d %d %d", orig_cnt, cnt, i, marked); - free_commit_list(h); + commit_list_free(h); return marked; } @@ -4232,7 +4232,7 @@ static void save_parents(struct rev_info *revs, struct commit *commit) static void free_saved_parent(struct commit_list **parents) { if (*parents != EMPTY_PARENT_LIST) - free_commit_list(*parents); + commit_list_free(*parents); } static void free_saved_parents(struct rev_info *revs) @@ -4293,7 +4293,7 @@ static void track_linear(struct rev_info *revs, struct commit *commit) if (revs->linear) commit->object.flags |= TRACK_LINEAR; } - free_commit_list(revs->previous_parents); + commit_list_free(revs->previous_parents); revs->previous_parents = commit_list_copy(commit->parents); } @@ -4382,7 +4382,7 @@ static void create_boundary_commit_list(struct rev_info *revs) * boundary commits anyway. (This is what the code has always * done.) */ - free_commit_list(revs->commits); + commit_list_free(revs->commits); revs->commits = NULL; /* @@ -4504,7 +4504,7 @@ struct commit *get_revision(struct rev_info *revs) reversed = NULL; while ((c = get_revision_internal(revs))) commit_list_insert(c, &reversed); - free_commit_list(revs->commits); + commit_list_free(revs->commits); revs->commits = reversed; revs->reverse = 0; revs->reverse_output_stage = 1; @@ -4522,7 +4522,7 @@ struct commit *get_revision(struct rev_info *revs) graph_update(revs->graph, c); if (!c) { free_saved_parents(revs); - free_commit_list(revs->previous_parents); + commit_list_free(revs->previous_parents); revs->previous_parents = NULL; } return c; diff --git a/sequencer.c b/sequencer.c index e09f8eed551425..f5a6496937a893 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1698,7 +1698,7 @@ static int try_to_commit(struct repository *r, out: free_commit_extra_headers(extra); - free_commit_list(parents); + commit_list_free(parents); strbuf_release(&err); strbuf_release(&commit_msg); free(amend_author); @@ -2476,8 +2476,8 @@ static int do_pick_commit(struct repository *r, res |= try_merge_command(r, opts->strategy, opts->xopts.nr, opts->xopts.v, common, oid_to_hex(&head), remotes); - free_commit_list(common); - free_commit_list(remotes); + commit_list_free(common); + commit_list_free(remotes); } /* @@ -4381,8 +4381,8 @@ static int do_merge(struct repository *r, leave_merge: strbuf_release(&ref_name); rollback_lock_file(&lock); - free_commit_list(to_merge); - free_commit_list(bases); + commit_list_free(to_merge); + commit_list_free(bases); return ret; } @@ -6039,11 +6039,11 @@ static int make_script_with_merges(struct pretty_print_context *pp, oidset_insert(&shown, oid); } - free_commit_list(list); + commit_list_free(list); } - free_commit_list(commits); - free_commit_list(tips); + commit_list_free(commits); + commit_list_free(tips); strbuf_release(&label_from_message); strbuf_release(&oneline); diff --git a/shallow.c b/shallow.c index c870efcefcac4a..0409b1354cb5f8 100644 --- a/shallow.c +++ b/shallow.c @@ -40,7 +40,7 @@ int register_shallow(struct repository *r, const struct object_id *oid) oidcpy(&graft->oid, oid); graft->nr_parent = -1; if (commit && commit->object.parsed) { - free_commit_list(commit->parents); + commit_list_free(commit->parents); commit->parents = NULL; } return register_commit_graft(r, graft, 0); @@ -267,7 +267,7 @@ struct commit_list *get_shallow_commits_by_rev_list(struct strvec *argv, break; } } - free_commit_list(not_shallow_list); + commit_list_free(not_shallow_list); /* * Now we can clean up NOT_SHALLOW on border commits. Having diff --git a/submodule.c b/submodule.c index 40a5c6fb9d1545..85e9586e669257 100644 --- a/submodule.c +++ b/submodule.c @@ -639,7 +639,7 @@ void show_submodule_diff_summary(struct diff_options *o, const char *path, print_submodule_diff_summary(sub, &rev, o); out: - free_commit_list(merge_bases); + commit_list_free(merge_bases); release_revisions(&rev); clear_commit_marks(left, ~0); clear_commit_marks(right, ~0); @@ -729,7 +729,7 @@ void show_submodule_inline_diff(struct diff_options *o, const char *path, done: strbuf_release(&sb); - free_commit_list(merge_bases); + commit_list_free(merge_bases); if (left) clear_commit_marks(left, ~0); if (right) diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index feabeb29c25d89..3131b54a871c1b 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -120,12 +120,12 @@ int cmd__reach(int ac, const char **av) exit(128); printf("%s(A,X):\n", av[1]); print_sorted_commit_ids(list); - free_commit_list(list); + commit_list_free(list); } else if (!strcmp(av[1], "reduce_heads")) { struct commit_list *list = reduce_heads(X); printf("%s(X):\n", av[1]); print_sorted_commit_ids(list); - free_commit_list(list); + commit_list_free(list); } else if (!strcmp(av[1], "can_all_from_reach")) { printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1)); } else if (!strcmp(av[1], "can_all_from_reach_with_flag")) { @@ -172,13 +172,13 @@ int cmd__reach(int ac, const char **av) die(_("too many commits marked reachable")); print_sorted_commit_ids(list); - free_commit_list(list); + commit_list_free(list); } object_array_clear(&X_obj); strbuf_release(&buf); - free_commit_list(X); - free_commit_list(Y); + commit_list_free(X); + commit_list_free(Y); commit_stack_clear(&X_stack); commit_stack_clear(&Y_stack); return 0; From f85b49f3d4af5ee0b428285799ac711d6abe1cfb Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Fri, 16 Jan 2026 01:05:03 +0100 Subject: [PATCH 032/784] diff: improve scaling of filenames in diffstat to handle UTF-8 chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `show_stats()` function tries to scale the filenames in the diffstat to ensure they don't exceed the given `name-width`. It does so by calculating the "display width" of the characters to be dropped, but then advances the filename pointer by that number of bytes. However, the "display width" of a character is not always equal to its byte count. The result is that sometimes, when displaying UTF-8 characters, filenames exceed the given `name-width`, and frequently the bytes of the UTF-8 characters are truncated. The following is an example of the issue, where the 2 files are "HelloHi" and "Hello你好", and `name-width=6`: ...oHi | 0 ...好 | 0 Make the filename pointer move by the actual number of bytes of the characters to drop from the filename, rather than their display width, using the `utf8_width()` function. Force `len` to not be less than 0 (this happens if the given `name-width` is 2 or less), otherwise an infinite loop is entered. Signed-off-by: LorenzoPegorari Signed-off-by: Junio C Hamano --- diff.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/diff.c b/diff.c index a1961526c0dab1..86fdf4d8d738fd 100644 --- a/diff.c +++ b/diff.c @@ -2823,17 +2823,12 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options) char *slash; prefix = "..."; len -= 3; - /* - * NEEDSWORK: (name_len - len) counts the display - * width, which would be shorter than the byte - * length of the corresponding substring. - * Advancing "name" by that number of bytes does - * *NOT* skip over that many columns, so it is - * very likely that chomping the pathname at the - * slash we will find starting from "name" will - * leave the resulting string still too long. - */ - name += name_len - len; + if (len < 0) + len = 0; + + while (name_len > len) + name_len -= utf8_width((const char**)&name, NULL); + slash = strchr(name, '/'); if (slash) name = slash; From 04f5d95ef7715e952c93f078e2973c44bb6f3396 Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Fri, 16 Jan 2026 01:05:38 +0100 Subject: [PATCH 033/784] t4073: add test for diffstat paths length when containing UTF-8 chars Add test checking the length of filepaths containing UTF-8 chars when generating a diffstat with various `name-width`s. Signed-off-by: LorenzoPegorari [jc: fixed up t/meson.build to spell the name of the new test file correctly] Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t4073-diff-stat-name-width.sh | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100755 t/t4073-diff-stat-name-width.sh diff --git a/t/meson.build b/t/meson.build index a5531df415ffe2..73edae4e3d8a64 100644 --- a/t/meson.build +++ b/t/meson.build @@ -496,6 +496,7 @@ integration_tests = [ 't4070-diff-pairs.sh', 't4071-diff-minimal.sh', 't4072-diff-max-depth.sh', + 't4073-diff-stat-name-width.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4073-diff-stat-name-width.sh b/t/t4073-diff-stat-name-width.sh new file mode 100755 index 00000000000000..ec5d3c3c1ffc9d --- /dev/null +++ b/t/t4073-diff-stat-name-width.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='git-diff check diffstat filepaths length when containing UTF-8 chars' + +. ./test-lib.sh + + +create_files () { + mkdir -p "d你好" && + touch "d你好/f再见" +} + +test_expect_success 'setup' ' + git init && + git config core.quotepath off && + git commit -m "Initial commit" --allow-empty && + create_files && + git add . && + git commit -m "Added files" +' + +test_expect_success 'test name-width long enough for filepath' ' + git diff HEAD~1 HEAD --stat --stat-name-width=12 >out && + grep "d你好/f再见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=11 >out && + grep "d你好/f再见 |" out +' + +test_expect_success 'test name-width not long enough for dir name' ' + git diff HEAD~1 HEAD --stat --stat-name-width=10 >out && + grep ".../f再见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=9 >out && + grep ".../f再见 |" out +' + +test_expect_success 'test name-width not long enough for slash' ' + git diff HEAD~1 HEAD --stat --stat-name-width=8 >out && + grep "...f再见 |" out +' + +test_expect_success 'test name-width not long enough for file name' ' + git diff HEAD~1 HEAD --stat --stat-name-width=7 >out && + grep "...再见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=6 >out && + grep "...见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=5 >out && + grep "...见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=4 >out && + grep "... |" out +' + +test_expect_success 'test name-width minimum length' ' + git diff HEAD~1 HEAD --stat --stat-name-width=3 >out && + grep "... |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=2 >out && + grep "... |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=1 >out && + grep "... |" out +' + +test_done From 9500b2131d29960d3fbd35559c063f7a74568875 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 19 Jan 2026 00:19:45 -0500 Subject: [PATCH 034/784] remote: return non-const pointer from error_buf() We have an error_buf() helper that functions a bit like our error() helper, but returns NULL instead of -1. Its return type is "const char *", but this is overly restrictive. If we use the helper in a function that returns non-const "char *", the compiler will complain about the implicit cast from const to non-const. Meanwhile, the const in the helper is doing nothing useful, as it only ever returns NULL. Let's drop the const, which will let us use it in both types of function. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remote.c b/remote.c index df9675cd330ed1..246c8b92e2584c 100644 --- a/remote.c +++ b/remote.c @@ -1838,7 +1838,7 @@ int branch_merge_matches(struct branch *branch, } __attribute__((format (printf,2,3))) -static const char *error_buf(struct strbuf *err, const char *fmt, ...) +static char *error_buf(struct strbuf *err, const char *fmt, ...) { if (err) { va_list ap; From 782a719e99b8a66ef7e05481a481ddfc329985b5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 19 Jan 2026 00:20:26 -0500 Subject: [PATCH 035/784] remote: drop const return of tracking_for_push_dest() The string returned from tracking_for_push_dest() comes from apply_refspec(), and thus is always an allocated string (or NULL). We should return a non-const pointer so that the caller knows that ownership of the string is being transferred. This goes back to the function's origin in e291c75a95 (remote.c: add branch_get_push, 2015-05-21). It never really mattered because our return is just forwarded through branch_get_push_1(), which returns a const string as part of an intentionally hacky memory management scheme (see that commit for details). As the first step of untangling that hackery, let's drop the extra const from this helper function (and from the variables that store its result). There should be no functional change (yet). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/remote.c b/remote.c index 246c8b92e2584c..fc5894e9496c9d 100644 --- a/remote.c +++ b/remote.c @@ -1876,9 +1876,9 @@ const char *branch_get_upstream(struct branch *branch, struct strbuf *err) return branch->merge[0]->dst; } -static const char *tracking_for_push_dest(struct remote *remote, - const char *refname, - struct strbuf *err) +static char *tracking_for_push_dest(struct remote *remote, + const char *refname, + struct strbuf *err) { char *ret; @@ -1906,7 +1906,7 @@ static const char *branch_get_push_1(struct repository *repo, if (remote->push.nr) { char *dst; - const char *ret; + char *ret; dst = apply_refspecs(&remote->push, branch->refname); if (!dst) @@ -1936,7 +1936,8 @@ static const char *branch_get_push_1(struct repository *repo, case PUSH_DEFAULT_UNSPECIFIED: case PUSH_DEFAULT_SIMPLE: { - const char *up, *cur; + const char *up; + char *cur; up = branch_get_upstream(branch, err); if (!up) From 9bf9eed093561f50091014faa7164c0325ea9ced Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 19 Jan 2026 00:22:08 -0500 Subject: [PATCH 036/784] remote: fix leak in branch_get_push_1() with invalid "simple" config Most of the code paths in branch_get_push_1() allocate a string for the @{push} value. We then return the result, which is stored in a "struct branch", so the value is not leaked. But there's one path that does leak: when we are in the "simple" push mode, we have to check that the @{push} value matches what we'd get for @{upstream}. If it doesn't, we return an error, but forget to free the @{push} value we computed. Curiously, the existing tests don't trigger this with LSan, even though they do exercise the code path. As far as I can tell, it should be triggered via: git -c push.default=simple \ -c branch.foo.remote=origin \ -c branch.foo.merge=refs/heads/not-foo \ rev-parse foo@{push} which will complain that the upstream ("not-foo") does not match the push destination ("foo"). We do die() shortly after this, but not until after returning from branch_get_push_1(), which is where the leak happens. So it seems like a false negative in LSan. However, I can trigger it reliably by printing the @{push} value using for-each-ref. This takes a little more setup (because we need "foo" to actually exist to iterate over it with for-each-ref), but we can piggy-back on the existing repo config in t6300. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 4 +++- t/for-each-ref-tests.sh | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/remote.c b/remote.c index fc5894e9496c9d..041f9ceb527b0e 100644 --- a/remote.c +++ b/remote.c @@ -1945,9 +1945,11 @@ static const char *branch_get_push_1(struct repository *repo, cur = tracking_for_push_dest(remote, branch->refname, err); if (!cur) return NULL; - if (strcmp(cur, up)) + if (strcmp(cur, up)) { + free(cur); return error_buf(err, _("cannot resolve 'simple' push to a single destination")); + } return cur; } } diff --git a/t/for-each-ref-tests.sh b/t/for-each-ref-tests.sh index e3ad19298accde..02fb92e99e5475 100644 --- a/t/for-each-ref-tests.sh +++ b/t/for-each-ref-tests.sh @@ -1744,6 +1744,15 @@ test_expect_success ':remotename and :remoteref' ' ) ' +test_expect_success '%(push) with an invalid push-simple config' ' + echo "refs/heads/main " >expect && + git -c push.default=simple \ + -c remote.pushdefault=myfork \ + for-each-ref \ + --format="%(refname) %(push)" refs/heads/main >actual && + test_cmp expect actual +' + test_expect_success "${git_for_each_ref} --ignore-case ignores case" ' ${git_for_each_ref} --format="%(refname)" refs/heads/MAIN >actual && test_must_be_empty actual && From d79fff4a11a527f57516c62fe00777852bab719a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 19 Jan 2026 00:23:20 -0500 Subject: [PATCH 037/784] remote: always allocate branch.push_tracking_ref In branch_get_push(), we usually allocate a new string for the @{push} ref, but will not do so in push.default=upstream mode, where we just pass back the result of branch_get_upstream() directly. This led to a hacky memory management scheme in e291c75a95 (remote.c: add branch_get_push, 2015-05-21): we store the result in the push_tracking_ref field of a "struct branch", under the assumption that the branch struct will last until the end of the program. So even though the struct doesn't know if it has an allocated string or not, it doesn't matter because we hold on to it either way. But that assumption was violated by f5ccb535cc (remote: fix leaking config strings, 2024-08-22), which added a function to free branch structs. Any struct which is fed to branch_release() is at risk of leaking its push_tracking_ref member. I don't think this can actually be triggered in practice. We rarely actually free the branch structs, and we only fill in the push_tracking_ref string lazily when it is needed. So triggering the leak would require a code path that does both, and I couldn't find one. Still, this is an ugly trap that may eventually spring on us. Since there is only one code path in branch_get_push() that doesn't allocate, let's just have it copy the string. And then we know that push_tracking_ref is always allocated, and we can free it in branch_release(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 7 ++++--- remote.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/remote.c b/remote.c index 041f9ceb527b0e..c61bcc905f900d 100644 --- a/remote.c +++ b/remote.c @@ -272,6 +272,7 @@ static void branch_release(struct branch *branch) free((char *)branch->refname); free(branch->remote_name); free(branch->pushremote_name); + free(branch->push_tracking_ref); merge_clear(branch); } @@ -1890,8 +1891,8 @@ static char *tracking_for_push_dest(struct remote *remote, return ret; } -static const char *branch_get_push_1(struct repository *repo, - struct branch *branch, struct strbuf *err) +static char *branch_get_push_1(struct repository *repo, + struct branch *branch, struct strbuf *err) { struct remote_state *remote_state = repo->remote_state; struct remote *remote; @@ -1931,7 +1932,7 @@ static const char *branch_get_push_1(struct repository *repo, return tracking_for_push_dest(remote, branch->refname, err); case PUSH_DEFAULT_UPSTREAM: - return branch_get_upstream(branch, err); + return xstrdup_or_null(branch_get_upstream(branch, err)); case PUSH_DEFAULT_UNSPECIFIED: case PUSH_DEFAULT_SIMPLE: diff --git a/remote.h b/remote.h index 0ca399e1835bf1..fc052945ee451d 100644 --- a/remote.h +++ b/remote.h @@ -331,7 +331,7 @@ struct branch { int merge_alloc; - const char *push_tracking_ref; + char *push_tracking_ref; }; struct branch *branch_get(const char *name); From 28f96e0173b4de1543ce45978837ddc49b532a83 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 14 Jan 2026 19:27:53 +0100 Subject: [PATCH 038/784] gitk: fix highlighted remote prefix of branches with directories The decoration of a remote ref is colored in two parts: (1) the prefix that mentions the remove (including "remote/"); and (2) the branch name. To extract the prefix from the ref name, a regular expression is used. However, the expression is not restrictive enough: it picks everything before the last slash character as prefix, so that, for example, the ref name "remotes/orgin/ml/themes" is split into "remotes/origin/ml" and "themes". Tighten the regular expression so that only the name of the remote is pulled into the prefix, but no part of the branch name. This gives the desired result in the example: "remotes/origin" and "ml/themes". Signed-off-by: Johannes Sixt --- gitk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gitk b/gitk index 7f62c8041d1c77..cbaaee994e0aa8 100755 --- a/gitk +++ b/gitk @@ -6841,7 +6841,7 @@ proc drawtags {id x xt y1} { set xl [expr {$xl - $delta/2}] $canv create polygon $x $yt $xr $yt $xr $yb $x $yb \ -width 1 -outline black -fill $col -tags tag.$id - if {[regexp {^(remotes/.*/|remotes/)} $tag match remoteprefix]} { + if {[regexp {^(remotes/[^/]*/|remotes/)} $tag match remoteprefix]} { set rwid [font measure mainfont $remoteprefix] set xi [expr {$x + 1}] set yti [expr {$yt + 1}] From b143f0f60816bbb2095eadc15d81b49c131f6a19 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 20 Jan 2026 22:47:08 +0100 Subject: [PATCH 039/784] last-modified: clarify in the docs the command takes a pathspec The documentation mentions git-last-modified(1) takes `...`, but that argument actually accepts a pathspec. Reword the documentation to reflect that. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- Documentation/git-last-modified.adoc | 11 ++++++----- builtin/last-modified.c | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc index 602843e09598a5..7c3fd844b8eca5 100644 --- a/Documentation/git-last-modified.adoc +++ b/Documentation/git-last-modified.adoc @@ -9,7 +9,8 @@ git-last-modified - EXPERIMENTAL: Show when files were last modified SYNOPSIS -------- [synopsis] -git last-modified [--recursive] [--show-trees] [] [[--] ...] +git last-modified [--recursive] [--show-trees] + [] [[--] ...] DESCRIPTION ----------- @@ -39,10 +40,10 @@ OPTIONS spell ``, see the 'Specifying Ranges' section of linkgit:gitrevisions[7]. -`[--] ...`:: - For each __ given, the commit which last modified it is returned. - Without an optional path parameter, all files and subdirectories - in path traversal the are included in the output. +`[--] ...`:: + Show the commit that last modified each path matching __. + If no __ is given, all files and subdirectories are included. + See linkgit:gitglossary[7] for details on pathspec syntax. SEE ALSO -------- diff --git a/builtin/last-modified.c b/builtin/last-modified.c index b0ecbdc5400d13..781495f597652e 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -510,8 +510,8 @@ int cmd_last_modified(int argc, const char **argv, const char *prefix, struct last_modified lm = { 0 }; const char * const last_modified_usage[] = { - N_("git last-modified [--recursive] [--show-trees] " - "[] [[--] ...]"), + N_("git last-modified [--recursive] [--show-trees]\n" + " [] [[--] ...]"), NULL }; From 209574de2d2ab0a264522c8c44c3eebb6d03ec43 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 20 Jan 2026 22:47:09 +0100 Subject: [PATCH 040/784] last-modified: document option '-z' The command git-last-modified(1) already recognizes the option '-z', and similar to many other commands this will make the output NUL-terminated instead of using newlines. Although, this option is missing from the documentation, so add it. In addition to that, to have '-z' also appear in the help output of `git last-modified -h`, move the handling of '-z' to parse_options() in builtin/last-modified.c itself. Before, the parsing of option '-z' was done by diff_opt_parse(), which is called by setup_revisions(). That would fill in `struct diff_options::line_termination`, but that field was not used by the diff machinery itself. Thus it makes more sense to have the handling of that option completely in builtin/last-modified.c. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- Documentation/git-last-modified.adoc | 21 ++++++++++++++++++++- builtin/last-modified.c | 11 +++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc index 7c3fd844b8eca5..3760fd33a1826f 100644 --- a/Documentation/git-last-modified.adoc +++ b/Documentation/git-last-modified.adoc @@ -9,7 +9,7 @@ git-last-modified - EXPERIMENTAL: Show when files were last modified SYNOPSIS -------- [synopsis] -git last-modified [--recursive] [--show-trees] +git last-modified [--recursive] [--show-trees] [-z] [] [[--] ...] DESCRIPTION @@ -33,6 +33,9 @@ OPTIONS Show tree entries even when recursing into them. It has no effect without `--recursive`. +`-z`:: + Terminate each line with a _NUL_ character rather than a newline. + ``:: Only traverse commits in the specified revision range. When no `` is specified, it defaults to `HEAD` (i.e. the whole @@ -45,6 +48,22 @@ OPTIONS If no __ is given, all files and subdirectories are included. See linkgit:gitglossary[7] for details on pathspec syntax. +OUTPUT +------ + +The output is in the format: + +------------ + TAB LF +------------ + +If a path contains any special characters, the path is C-style quoted. To +avoid quoting, pass option `-z` to terminate each line with a NUL. + +------------ + TAB NUL +------------ + SEE ALSO -------- linkgit:git-blame[1], diff --git a/builtin/last-modified.c b/builtin/last-modified.c index 781495f597652e..46423b527e44a1 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -55,6 +55,7 @@ struct last_modified { struct rev_info rev; bool recursive; bool show_trees; + bool nul_termination; const char **all_paths; size_t all_paths_nr; @@ -165,10 +166,10 @@ static void last_modified_emit(struct last_modified *lm, putchar('^'); printf("%s\t", oid_to_hex(&commit->object.oid)); - if (lm->rev.diffopt.line_termination) - write_name_quoted(path, stdout, '\n'); - else + if (lm->nul_termination) printf("%s%c", path, '\0'); + else + write_name_quoted(path, stdout, '\n'); } static void mark_path(const char *path, const struct object_id *oid, @@ -510,7 +511,7 @@ int cmd_last_modified(int argc, const char **argv, const char *prefix, struct last_modified lm = { 0 }; const char * const last_modified_usage[] = { - N_("git last-modified [--recursive] [--show-trees]\n" + N_("git last-modified [--recursive] [--show-trees] [-z]\n" " [] [[--] ...]"), NULL }; @@ -520,6 +521,8 @@ int cmd_last_modified(int argc, const char **argv, const char *prefix, N_("recurse into subtrees")), OPT_BOOL('t', "show-trees", &lm.show_trees, N_("show tree entries when recursing into subtrees")), + OPT_BOOL('z', NULL, &lm.nul_termination, + N_("lines are separated with NUL character")), OPT_END() }; From 9bfaf78cb28b26ab0538e2edd2229547d6be962f Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 20 Jan 2026 22:47:10 +0100 Subject: [PATCH 041/784] last-modified: document option '--max-depth' Option --max-depth is supported by git-last-modified(1), because it was added to the diff machinery in a1dfa5448d (diff: teach tree-diff a max-depth parameter, 2025-08-07). This option is useful for everyday use of the git-last-modified(1) command, so document it's existence in the man page. To have it also appear in the help output of `git last-modified -h`, move the handling of '--max-depth' to parse_options() in builtin/last-modified.c itself. This prepares for the change in default behavior in the next commit. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- Documentation/git-last-modified.adoc | 8 +++++++- builtin/last-modified.c | 17 ++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc index 3760fd33a1826f..6f9b119bb655d0 100644 --- a/Documentation/git-last-modified.adoc +++ b/Documentation/git-last-modified.adoc @@ -9,7 +9,7 @@ git-last-modified - EXPERIMENTAL: Show when files were last modified SYNOPSIS -------- [synopsis] -git last-modified [--recursive] [--show-trees] [-z] +git last-modified [--recursive] [--show-trees] [--max-depth=] [-z] [] [[--] ...] DESCRIPTION @@ -33,6 +33,12 @@ OPTIONS Show tree entries even when recursing into them. It has no effect without `--recursive`. +`--max-depth=`:: + For each pathspec given on the command line, traverse at most `` + levels into subtrees. A negative value means no limit. + The default is 0, which shows all paths matching the pathspec + without descending into subtrees. + `-z`:: Terminate each line with a _NUL_ character rather than a newline. diff --git a/builtin/last-modified.c b/builtin/last-modified.c index 46423b527e44a1..797c1bb88b5a8d 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -56,6 +56,7 @@ struct last_modified { bool recursive; bool show_trees; bool nul_termination; + int max_depth; const char **all_paths; size_t all_paths_nr; @@ -483,6 +484,12 @@ static int last_modified_init(struct last_modified *lm, struct repository *r, lm->rev.diffopt.flags.recursive = lm->recursive; lm->rev.diffopt.flags.tree_in_recursive = lm->show_trees; + if (lm->max_depth >= 0) { + lm->rev.diffopt.flags.recursive = 1; + lm->rev.diffopt.max_depth = lm->max_depth; + lm->rev.diffopt.max_depth_valid = 1; + } + argc = setup_revisions(argc, argv, &lm->rev, NULL); if (argc > 1) { error(_("unknown last-modified argument: %s"), argv[1]); @@ -511,7 +518,7 @@ int cmd_last_modified(int argc, const char **argv, const char *prefix, struct last_modified lm = { 0 }; const char * const last_modified_usage[] = { - N_("git last-modified [--recursive] [--show-trees] [-z]\n" + N_("git last-modified [--recursive] [--show-trees] [--max-depth=] [-z]\n" " [] [[--] ...]"), NULL }; @@ -521,11 +528,19 @@ int cmd_last_modified(int argc, const char **argv, const char *prefix, N_("recurse into subtrees")), OPT_BOOL('t', "show-trees", &lm.show_trees, N_("show tree entries when recursing into subtrees")), + OPT_INTEGER_F(0, "max-depth", &lm.max_depth, + N_("maximum tree depth to recurse"), PARSE_OPT_NONEG), OPT_BOOL('z', NULL, &lm.nul_termination, N_("lines are separated with NUL character")), OPT_END() }; + /* + * Set the default of a max-depth to "unset". This will change in a + * subsequent commit. + */ + lm.max_depth = -1; + argc = parse_options(argc, argv, prefix, last_modified_options, last_modified_usage, PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); From 9dcc09bed13aba0dc93d253f18ee2c7da5970c0c Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 20 Jan 2026 22:47:11 +0100 Subject: [PATCH 042/784] last-modified: change default max-depth to 0 By default git-last-modified(1) doesn't recurse into subtrees. So when the pathspec contained a path in a subtree, the command would only print the commit information about the parent tree of the path, like: $ git last-modified -- path/file aaa0aab1bbb2bcc3ccc4ddd5dde6eee7eff8fff9 path Change the default behavior to give commit information about the exact path instead: $ git last-modified -- path/file aaa0aab1bbb2bcc3ccc4ddd5dde6eee7eff8fff9 path/file To achieve this, the default max-depth is changed to 0 and recursive is always enabled. The handling of option '-r' is modified to disable a max-depth, resulting in the behavior of this option to remain unchanged. No existing tests were modified, because there didn't exist any tests covering the example above. But more tests are added to cover this now. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- Documentation/git-last-modified.adoc | 9 +++---- builtin/last-modified.c | 21 ++++------------- t/t8020-last-modified.sh | 35 ++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc index 6f9b119bb655d0..d7d16fc4f73e66 100644 --- a/Documentation/git-last-modified.adoc +++ b/Documentation/git-last-modified.adoc @@ -25,13 +25,14 @@ OPTIONS `-r`:: `--recursive`:: - Instead of showing tree entries, step into subtrees and show all entries - inside them recursively. + Recursively traverse into all subtrees. By default, the command only + shows tree entries matching the ``. With this option, it + descends into subtrees and displays all entries within them. + Equivalent to `--max-depth=-1`. `-t`:: `--show-trees`:: - Show tree entries even when recursing into them. It has no effect - without `--recursive`. + Show tree entries even when recursing into them. `--max-depth=`:: For each pathspec given on the command line, traverse at most `` diff --git a/builtin/last-modified.c b/builtin/last-modified.c index 797c1bb88b5a8d..e27f36b624c60d 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -53,7 +53,6 @@ define_commit_slab(active_paths_for_commit, struct bitmap *); struct last_modified { struct hashmap paths; struct rev_info rev; - bool recursive; bool show_trees; bool nul_termination; int max_depth; @@ -481,14 +480,10 @@ static int last_modified_init(struct last_modified *lm, struct repository *r, lm->rev.no_commit_id = 1; lm->rev.diff = 1; lm->rev.diffopt.flags.no_recursive_diff_tree_combined = 1; - lm->rev.diffopt.flags.recursive = lm->recursive; + lm->rev.diffopt.flags.recursive = 1; lm->rev.diffopt.flags.tree_in_recursive = lm->show_trees; - - if (lm->max_depth >= 0) { - lm->rev.diffopt.flags.recursive = 1; - lm->rev.diffopt.max_depth = lm->max_depth; - lm->rev.diffopt.max_depth_valid = 1; - } + lm->rev.diffopt.max_depth = lm->max_depth; + lm->rev.diffopt.max_depth_valid = lm->max_depth >= 0; argc = setup_revisions(argc, argv, &lm->rev, NULL); if (argc > 1) { @@ -524,8 +519,8 @@ int cmd_last_modified(int argc, const char **argv, const char *prefix, }; struct option last_modified_options[] = { - OPT_BOOL('r', "recursive", &lm.recursive, - N_("recurse into subtrees")), + OPT_SET_INT('r', "recursive", &lm.max_depth, + N_("recurse into subtrees"), -1), OPT_BOOL('t', "show-trees", &lm.show_trees, N_("show tree entries when recursing into subtrees")), OPT_INTEGER_F(0, "max-depth", &lm.max_depth, @@ -535,12 +530,6 @@ int cmd_last_modified(int argc, const char **argv, const char *prefix, OPT_END() }; - /* - * Set the default of a max-depth to "unset". This will change in a - * subsequent commit. - */ - lm.max_depth = -1; - argc = parse_options(argc, argv, prefix, last_modified_options, last_modified_usage, PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh index a4c1114ee28f7f..43f38937baf634 100755 --- a/t/t8020-last-modified.sh +++ b/t/t8020-last-modified.sh @@ -85,6 +85,41 @@ test_expect_success 'last-modified subdir recursive' ' EOF ' +test_expect_success 'last-modified subdir non-recursive' ' + check_last_modified a <<-\EOF + 3 a + EOF +' + +test_expect_success 'last-modified path in subdir non-recursive' ' + check_last_modified a/file <<-\EOF + 2 a/file + EOF +' + +test_expect_success 'last-modified subdir with wildcard non-recursive' ' + check_last_modified a/* <<-\EOF + 3 a/b + 2 a/file + EOF +' + +test_expect_success 'last-modified with negative max-depth' ' + check_last_modified --max-depth=-1 <<-\EOF + 3 a/b/file + 2 a/file + 1 file + EOF +' + +test_expect_success 'last-modified with max-depth of 1' ' + check_last_modified --max-depth=1 <<-\EOF + 3 a/b + 2 a/file + 1 file + EOF +' + test_expect_success 'last-modified from non-HEAD commit' ' check_last_modified HEAD^ <<-\EOF 2 a From 49223593fd743998d13fcd27fceaf1e0095bb08e Mon Sep 17 00:00:00 2001 From: Amisha Chhajed Date: Wed, 21 Jan 2026 18:30:05 +0530 Subject: [PATCH 043/784] sparse-checkout: optimize string_list construction and add tests to verify deduplication. Improve O(n^2) complexity to O(n log n) while building a sorted 'string_list' by constructing it unsorted then sorting it followed by removing duplicates. sparse-checkout deduplicates repeated cone-mode patterns, but this behaviour was previously untested, add tests that verify that sparse-checkout file contain each cone pattern only once and sparse-checkout list reports each pattern only once. Signed-off-by: Amisha Chhajed Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 7 +++-- t/t1091-sparse-checkout-builtin.sh | 48 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 15d51e60a86533..7dfb276bf01431 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -91,10 +91,11 @@ static int sparse_checkout_list(int argc, const char **argv, const char *prefix, hashmap_for_each_entry(&pl.recursive_hashmap, &iter, pe, ent) { /* pe->pattern starts with "/", skip it */ - string_list_insert(&sl, pe->pattern + 1); + string_list_append(&sl, pe->pattern + 1); } string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); for (i = 0; i < sl.nr; i++) { quote_c_style(sl.items[i].string, NULL, stdout, 0); @@ -289,7 +290,7 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) if (!hashmap_contains_parent(&pl->recursive_hashmap, pe->pattern, &parent_pattern)) - string_list_insert(&sl, pe->pattern); + string_list_append(&sl, pe->pattern); } string_list_sort(&sl); @@ -311,7 +312,7 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) if (!hashmap_contains_parent(&pl->recursive_hashmap, pe->pattern, &parent_pattern)) - string_list_insert(&sl, pe->pattern); + string_list_append(&sl, pe->pattern); } strbuf_release(&parent_pattern); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index b2da4feaeff9ec..cd0aed9975fe24 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -817,6 +817,54 @@ test_expect_success 'cone mode clears ignored subdirectories' ' test_cmp expect out ' +test_expect_success 'sparse-checkout deduplicates repeated cone patterns' ' + rm -f repo/.git/info/sparse-checkout && + git -C repo sparse-checkout init --cone && + git -C repo sparse-checkout add --stdin <<-\EOF && + foo/bar/baz + a/b/c + foo/bar/baz + a/b + EOF + cat >expect <<-\EOF && + /* + !/*/ + /a/ + !/a/*/ + /foo/ + !/foo/*/ + /foo/bar/ + !/foo/bar/*/ + /a/b/ + /foo/bar/baz/ + EOF + test_cmp expect repo/.git/info/sparse-checkout +' + +test_expect_success 'sparse-checkout list deduplicates repeated cone patterns' ' + rm -f repo/.git/info/sparse-checkout && + git -C repo sparse-checkout init --cone && + cat <<-\EOF >repo/.git/info/sparse-checkout && + /* + !/*/ + /a/ + !/a/*/ + /foo/ + !/foo/*/ + /foo/bar/ + !/foo/bar/*/ + /a/b/ + /foo/bar/baz/ + /foo/bar/baz/ + EOF + git -C repo sparse-checkout list >actual && + cat <<-\EOF >expect && + a/b + foo/bar/baz + EOF + test_cmp expect actual +' + test_expect_success 'malformed cone-mode patterns' ' git -C repo sparse-checkout init --cone && mkdir -p repo/foo/bar && From a824421d3644f39bfa8dfc75876db8ed1c7bcdbf Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Wed, 21 Jan 2026 18:24:11 +0530 Subject: [PATCH 044/784] t5500: simplify test implementation and fix git exit code suppression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'shallow since with commit graph and already-seen commit” test uses a convoluted here-doc that combines manual input construction with packetize, echo and embedded Git commands. This structure hides failures from the git commands, as their exit codes are suppressed inside echo command substitution and being on the upstream side of pipes. Instead of using here-doc to construct the pack protocol that is directly sent to the 'git upload-pack' command being tested, capture the outputs of the git commands upfront and use the 'test-tool pkt-line pack' tool to construct the input in a temporary file, and then feed it to the command. This has a few advantages: * Executing the git commands outside the here-doc avoids suppressing their exit codes and makes debugging easier. * It removes the need to manually count and manage pkt-line lengths to keep in line with the v2 protocol, as the tool handles this internally. Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- t/t5500-fetch-pack.sh | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh index 2677cd5faa8253..4bb56c167a52ec 100755 --- a/t/t5500-fetch-pack.sh +++ b/t/t5500-fetch-pack.sh @@ -892,15 +892,20 @@ test_expect_success 'shallow since with commit graph and already-seen commit' ' test_commit other && git commit-graph write --reachable && git config core.commitGraph true && - - GIT_PROTOCOL=version=2 git upload-pack . <<-EOF >/dev/null - 0012command=fetch - $(echo "object-format=$(test_oid algo)" | packetize) - 00010013deepen-since 1 - $(echo "want $(git rev-parse other)" | packetize) - $(echo "have $(git rev-parse main)" | packetize) + oid_algo=$(test_oid algo) && + oid_other=$(git rev-parse other) && + oid_main=$(git rev-parse main) && + + test-tool pkt-line pack >input <<-EOF && + command=fetch + object-format=$oid_algo + 0001 + deepen-since 1 + want $oid_other + have $oid_main 0000 EOF + GIT_PROTOCOL=version=2 git upload-pack . /dev/null ) ' From b4e8f60a3c78477e1f28b052cd740ac4a43741d5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 22 Jan 2026 16:05:58 +0000 Subject: [PATCH 045/784] revision: add --maximal-only option When inspecting a range of commits from some set of starting references, it is sometimes useful to learn which commits are not reachable from any other commits in the selected range. One such application is in the creation of a sequence of bundles for the bundle URI feature. Creating a stack of bundles representing different slices of time includes defining which references to include. If all references are used, then this may be overwhelming or redundant. Instead, selecting commits that are maximal to the range could help defining a smaller reference set to use in the bundle header. Add a new '--maximal-only' option to restrict the output of a revision range to be only the commits that are not reachable from any other commit in the range, based on the reachability definition of the walk. This is accomplished by adding a new 28th bit flag, CHILD_VISITED, that is set as we walk. This does extend the bit range in object.h, but using an earlier bit may collide with another feature. The tests demonstrate the behavior of the feature with a positive-only range, ranges with negative references, and walk-modifying flags like --first-parent and --exclude-first-parent-only. Since the --boundary option would not increase any results when used with the --maximal-only option, mark them as incompatible. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.adoc | 4 ++ object.h | 4 +- revision.c | 12 ++++- revision.h | 5 +- t/t6000-rev-list-misc.sh | 15 ++++++ t/t6600-test-reach.sh | 75 +++++++++++++++++++++++++++++ 6 files changed, 110 insertions(+), 5 deletions(-) diff --git a/Documentation/rev-list-options.adoc b/Documentation/rev-list-options.adoc index 453ec590571ffc..a39cf88bbcfaaa 100644 --- a/Documentation/rev-list-options.adoc +++ b/Documentation/rev-list-options.adoc @@ -148,6 +148,10 @@ endif::git-log[] from the point where it diverged from the remote branch, given that arbitrary merges can be valid topic branch changes. +`--maximal-only`:: + Restrict the output commits to be those that are not reachable + from any other commits in the revision range. + `--not`:: Reverses the meaning of the '{caret}' prefix (or lack thereof) for all following revision specifiers, up to the next `--not`. diff --git a/object.h b/object.h index 4bca957b8dcbd6..dfe7a1f0ea29da 100644 --- a/object.h +++ b/object.h @@ -64,7 +64,7 @@ void object_array_init(struct object_array *array); /* * object flag allocation: - * revision.h: 0---------10 15 23------27 + * revision.h: 0---------10 15 23--------28 * fetch-pack.c: 01 67 * negotiator/default.c: 2--5 * walker.c: 0-2 @@ -86,7 +86,7 @@ void object_array_init(struct object_array *array); * builtin/unpack-objects.c: 2021 * pack-bitmap.h: 2122 */ -#define FLAG_BITS 28 +#define FLAG_BITS 29 #define TYPE_BITS 3 diff --git a/revision.c b/revision.c index 9b131670f79b96..6ca4b9dfcd54b9 100644 --- a/revision.c +++ b/revision.c @@ -1150,7 +1150,8 @@ static int process_parents(struct rev_info *revs, struct commit *commit, struct commit *p = parent->item; parent = parent->next; if (p) - p->object.flags |= UNINTERESTING; + p->object.flags |= UNINTERESTING | + CHILD_VISITED; if (repo_parse_commit_gently(revs->repo, p, 1) < 0) continue; if (p->parents) @@ -1204,7 +1205,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit, if (!*slot) *slot = *revision_sources_at(revs->sources, commit); } - p->object.flags |= pass_flags; + p->object.flags |= pass_flags | CHILD_VISITED; if (!(p->object.flags & SEEN)) { p->object.flags |= (SEEN | NOT_USER_GIVEN); if (list) @@ -2377,6 +2378,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if ((argcount = parse_long_opt("until", argv, &optarg))) { revs->min_age = approxidate(optarg); return argcount; + } else if (!strcmp(arg, "--maximal-only")) { + revs->maximal_only = 1; } else if (!strcmp(arg, "--first-parent")) { revs->first_parent_only = 1; } else if (!strcmp(arg, "--exclude-first-parent-only")) { @@ -3147,6 +3150,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s !!revs->reverse, "--reverse", !!revs->reflog_info, "--walk-reflogs"); + die_for_incompatible_opt2(!!revs->boundary, "--boundary", + !!revs->maximal_only, "--maximal-only"); + if (revs->no_walk && revs->graph) die(_("options '%s' and '%s' cannot be used together"), "--no-walk", "--graph"); if (!revs->reflog_info && revs->grep_filter.use_reflog_filter) @@ -4125,6 +4131,8 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi { if (commit->object.flags & SHOWN) return commit_ignore; + if (revs->maximal_only && (commit->object.flags & CHILD_VISITED)) + return commit_ignore; if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid)) return commit_ignore; if (revs->no_kept_objects) { diff --git a/revision.h b/revision.h index b36acfc2d9f61d..69242ecb189a52 100644 --- a/revision.h +++ b/revision.h @@ -52,7 +52,9 @@ #define NOT_USER_GIVEN (1u<<25) #define TRACK_LINEAR (1u<<26) #define ANCESTRY_PATH (1u<<27) -#define ALL_REV_FLAGS (((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR | PULL_MERGE) +#define CHILD_VISITED (1u<<28) +#define ALL_REV_FLAGS (((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR \ + | PULL_MERGE | CHILD_VISITED) #define DECORATE_SHORT_REFS 1 #define DECORATE_FULL_REFS 2 @@ -189,6 +191,7 @@ struct rev_info { left_right:1, left_only:1, right_only:1, + maximal_only:1, rewrite_parents:1, print_parents:1, show_decorations:1, diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh index fec16448cfddb8..d0a2a866100d56 100755 --- a/t/t6000-rev-list-misc.sh +++ b/t/t6000-rev-list-misc.sh @@ -248,4 +248,19 @@ test_expect_success 'rev-list -z --boundary' ' test_cmp expect actual ' +test_expect_success 'rev-list --boundary incompatible with --maximal-only' ' + test_when_finished rm -rf repo && + + git init repo && + test_commit -C repo 1 && + test_commit -C repo 2 && + + oid1=$(git -C repo rev-parse HEAD~) && + oid2=$(git -C repo rev-parse HEAD) && + + test_must_fail git -C repo rev-list --boundary --maximal-only \ + HEAD~1..HEAD 2>err && + test_grep "cannot be used together" err +' + test_done diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 6638d1aa1dcebe..2613075894282d 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -762,4 +762,79 @@ test_expect_success 'for-each-ref is-base: --sort' ' --sort=refname --sort=-is-base:commit-2-3 ' +test_expect_success 'rev-list --maximal-only (all positive)' ' + # Only one maximal. + cat >input <<-\EOF && + refs/heads/commit-1-1 + refs/heads/commit-4-2 + refs/heads/commit-4-4 + refs/heads/commit-8-4 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-8-4) + EOF + run_all_modes git rev-list --maximal-only --stdin && + + # All maximal. + cat >input <<-\EOF && + refs/heads/commit-5-2 + refs/heads/commit-4-3 + refs/heads/commit-3-4 + refs/heads/commit-2-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-5-2) + $(git rev-parse refs/heads/commit-4-3) + $(git rev-parse refs/heads/commit-3-4) + $(git rev-parse refs/heads/commit-2-5) + EOF + run_all_modes git rev-list --maximal-only --stdin && + + # Mix of both. + cat >input <<-\EOF && + refs/heads/commit-5-2 + refs/heads/commit-3-2 + refs/heads/commit-2-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-5-2) + $(git rev-parse refs/heads/commit-2-5) + EOF + run_all_modes git rev-list --maximal-only --stdin +' + +test_expect_success 'rev-list --maximal-only (range)' ' + cat >input <<-\EOF && + refs/heads/commit-1-1 + refs/heads/commit-2-5 + refs/heads/commit-6-4 + ^refs/heads/commit-4-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-6-4) + EOF + run_all_modes git rev-list --maximal-only --stdin && + + # first-parent changes reachability: the first parent + # reduces the second coordinate to 1 before reducing the + # first coordinate. + cat >input <<-\EOF && + refs/heads/commit-1-1 + refs/heads/commit-2-5 + refs/heads/commit-6-4 + ^refs/heads/commit-4-5 + EOF + + cat >expect <<-EOF && + $(git rev-parse refs/heads/commit-6-4) + $(git rev-parse refs/heads/commit-2-5) + EOF + run_all_modes git rev-list --maximal-only --stdin \ + --first-parent --exclude-first-parent-only +' + test_done From dbdcab6b89ea86fe58ece01bbb7be297ff23b2c4 Mon Sep 17 00:00:00 2001 From: Paulo Casaretto Date: Thu, 22 Jan 2026 19:23:35 +0000 Subject: [PATCH 046/784] lockfile: add PID file for debugging stale locks When a lock file is held, it can be helpful to know which process owns it, especially when debugging stale locks left behind by crashed processes. Add an optional feature that creates a companion PID file alongside each lock file, containing the PID of the lock holder. For a lock file "foo.lock", the PID file is named "foo~pid.lock". The tilde character is forbidden in refnames and allowed in Windows filenames, which guarantees no collision with the refs namespace (e.g., refs "foo" and "foo~pid" cannot both exist). The file contains a single line in the format "pid " followed by a newline. The PID file is created when a lock is acquired (if enabled), and automatically cleaned up when the lock is released (via commit or rollback). The file is registered as a tempfile so it gets cleaned up by signal and atexit handlers if the process terminates abnormally. When a lock conflict occurs, the code checks for an existing PID file and, if found, uses kill(pid, 0) to determine if the process is still running. This allows providing context-aware error messages: Lock is held by process 12345. Wait for it to finish, or remove the lock file to continue. Or for a stale lock: Lock was held by process 12345, which is no longer running. Remove the stale lock file to continue. The feature is controlled via core.lockfilePid configuration (boolean). Defaults to false. When enabled, PID files are created for all lock operations. Existing PID files are always read when displaying lock errors, regardless of the core.lockfilePid setting. This ensures helpful diagnostics even when the feature was previously enabled and later disabled. Signed-off-by: Paulo Casaretto Signed-off-by: Junio C Hamano --- Documentation/config/core.adoc | 11 +++ compat/mingw.c | 10 ++ environment.c | 6 ++ lockfile.c | 168 ++++++++++++++++++++++++++++++--- lockfile.h | 43 ++++++--- t/meson.build | 1 + t/t0031-lockfile-pid.sh | 105 +++++++++++++++++++++ 7 files changed, 315 insertions(+), 29 deletions(-) create mode 100755 t/t0031-lockfile-pid.sh diff --git a/Documentation/config/core.adoc b/Documentation/config/core.adoc index 01202da7cd4a36..5c4bc9206fb0fb 100644 --- a/Documentation/config/core.adoc +++ b/Documentation/config/core.adoc @@ -348,6 +348,17 @@ confusion unless you know what you are doing (e.g. you are creating a read-only snapshot of the same index to a location different from the repository's usual working tree). +core.lockfilePid:: + If true, Git will create a PID file alongside lock files. When a + lock acquisition fails and a PID file exists, Git can provide + additional diagnostic information about the process holding the + lock, including whether it is still running. Defaults to `false`. ++ +The PID file is named by inserting `~pid` before the `.lock` suffix. +For example, if the lock file is `index.lock`, the PID file will be +`index~pid.lock`. The file contains a single line in the format +`pid ` followed by a newline. + core.logAllRefUpdates:: Enable the reflog. Updates to a ref is logged to the file "`$GIT_DIR/logs/`", by appending the new and old diff --git a/compat/mingw.c b/compat/mingw.c index 939f938fe27fe5..146b2585ce7ebf 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1972,6 +1972,16 @@ int mingw_kill(pid_t pid, int sig) CloseHandle(h); return 0; } + /* + * OpenProcess returns ERROR_INVALID_PARAMETER for + * non-existent PIDs. Map this to ESRCH for POSIX + * compatibility with kill(pid, 0). + */ + if (GetLastError() == ERROR_INVALID_PARAMETER) + errno = ESRCH; + else + errno = err_win_to_posix(GetLastError()); + return -1; } errno = EINVAL; diff --git a/environment.c b/environment.c index a770b5921d9546..4adcce8606a34c 100644 --- a/environment.c +++ b/environment.c @@ -21,6 +21,7 @@ #include "gettext.h" #include "git-zlib.h" #include "ident.h" +#include "lockfile.h" #include "mailmap.h" #include "object-name.h" #include "repository.h" @@ -532,6 +533,11 @@ static int git_default_core_config(const char *var, const char *value, return 0; } + if (!strcmp(var, "core.lockfilepid")) { + lockfile_pid_enabled = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.createobject")) { if (!value) return config_error_nonbool(var); diff --git a/lockfile.c b/lockfile.c index 1d5ed016828746..13e2ad1307a186 100644 --- a/lockfile.c +++ b/lockfile.c @@ -6,6 +6,9 @@ #include "abspath.h" #include "gettext.h" #include "lockfile.h" +#include "parse.h" +#include "strbuf.h" +#include "wrapper.h" /* * path = absolute or relative path name @@ -71,19 +74,115 @@ static void resolve_symlink(struct strbuf *path) strbuf_reset(&link); } +/* + * Lock PID file functions - write PID to a foo~pid.lock file alongside + * the lock file for debugging stale locks. The PID file is registered + * as a tempfile so it gets cleaned up by signal/atexit handlers. + * + * Naming: For "foo.lock", the PID file is "foo~pid.lock". The tilde is + * forbidden in refnames and allowed in Windows filenames, guaranteeing + * no collision with the refs namespace. + */ + +/* Global config variable, initialized from core.lockfilePid */ +int lockfile_pid_enabled; + +/* + * Path generation helpers. + * Given base path "foo", generate: + * - lock path: "foo.lock" + * - pid path: "foo-pid.lock" + */ +static void get_lock_path(struct strbuf *out, const char *path) +{ + strbuf_addstr(out, path); + strbuf_addstr(out, LOCK_SUFFIX); +} + +static void get_pid_path(struct strbuf *out, const char *path) +{ + strbuf_addstr(out, path); + strbuf_addstr(out, LOCK_PID_INFIX); + strbuf_addstr(out, LOCK_SUFFIX); +} + +static struct tempfile *create_lock_pid_file(const char *pid_path, int mode) +{ + struct strbuf content = STRBUF_INIT; + struct tempfile *pid_tempfile = NULL; + int fd; + + if (!lockfile_pid_enabled) + goto out; + + fd = open(pid_path, O_WRONLY | O_CREAT | O_EXCL, mode); + if (fd < 0) + goto out; + + strbuf_addf(&content, "pid %" PRIuMAX "\n", (uintmax_t)getpid()); + if (write_in_full(fd, content.buf, content.len) < 0) { + warning_errno(_("could not write lock pid file '%s'"), pid_path); + close(fd); + unlink(pid_path); + goto out; + } + + close(fd); + pid_tempfile = register_tempfile(pid_path); + +out: + strbuf_release(&content); + return pid_tempfile; +} + +static int read_lock_pid(const char *pid_path, uintmax_t *pid_out) +{ + struct strbuf content = STRBUF_INIT; + const char *val; + int ret = -1; + + if (strbuf_read_file(&content, pid_path, LOCK_PID_MAXLEN) <= 0) + goto out; + + strbuf_rtrim(&content); + + if (skip_prefix(content.buf, "pid ", &val)) { + char *endptr; + *pid_out = strtoumax(val, &endptr, 10); + if (*pid_out > 0 && !*endptr) + ret = 0; + } + + if (ret) + warning(_("malformed lock pid file '%s'"), pid_path); + +out: + strbuf_release(&content); + return ret; +} + /* Make sure errno contains a meaningful value on error */ static int lock_file(struct lock_file *lk, const char *path, int flags, int mode) { - struct strbuf filename = STRBUF_INIT; + struct strbuf base_path = STRBUF_INIT; + struct strbuf lock_path = STRBUF_INIT; + struct strbuf pid_path = STRBUF_INIT; - strbuf_addstr(&filename, path); + strbuf_addstr(&base_path, path); if (!(flags & LOCK_NO_DEREF)) - resolve_symlink(&filename); + resolve_symlink(&base_path); + + get_lock_path(&lock_path, base_path.buf); + get_pid_path(&pid_path, base_path.buf); + + lk->tempfile = create_tempfile_mode(lock_path.buf, mode); + if (lk->tempfile) + lk->pid_tempfile = create_lock_pid_file(pid_path.buf, mode); - strbuf_addstr(&filename, LOCK_SUFFIX); - lk->tempfile = create_tempfile_mode(filename.buf, mode); - strbuf_release(&filename); + strbuf_release(&base_path); + strbuf_release(&lock_path); + strbuf_release(&pid_path); return lk->tempfile ? lk->tempfile->fd : -1; } @@ -151,16 +250,49 @@ static int lock_file_timeout(struct lock_file *lk, const char *path, void unable_to_lock_message(const char *path, int err, struct strbuf *buf) { if (err == EEXIST) { - strbuf_addf(buf, _("Unable to create '%s.lock': %s.\n\n" - "Another git process seems to be running in this repository, e.g.\n" - "an editor opened by 'git commit'. Please make sure all processes\n" - "are terminated then try again. If it still fails, a git process\n" - "may have crashed in this repository earlier:\n" - "remove the file manually to continue."), - absolute_path(path), strerror(err)); - } else + const char *abs_path = absolute_path(path); + struct strbuf lock_path = STRBUF_INIT; + struct strbuf pid_path = STRBUF_INIT; + uintmax_t pid; + int pid_status = 0; /* 0 = unknown, 1 = running, -1 = stale */ + + get_lock_path(&lock_path, abs_path); + get_pid_path(&pid_path, abs_path); + + strbuf_addf(buf, _("Unable to create '%s': %s.\n\n"), + lock_path.buf, strerror(err)); + + /* + * Try to read PID file unconditionally - it may exist if + * core.lockfilePid was enabled. + */ + if (!read_lock_pid(pid_path.buf, &pid)) { + if (kill((pid_t)pid, 0) == 0 || errno == EPERM) + pid_status = 1; /* running (or no permission to signal) */ + else if (errno == ESRCH) + pid_status = -1; /* no such process - stale lock */ + } + + if (pid_status == 1) + strbuf_addf(buf, _("Lock may be held by process %" PRIuMAX "; " + "if no git process is running, the lock file " + "may be stale (PIDs can be reused)"), + pid); + else if (pid_status == -1) + strbuf_addf(buf, _("Lock was held by process %" PRIuMAX ", " + "which is no longer running; the lock file " + "appears to be stale"), + pid); + else + strbuf_addstr(buf, _("Another git process seems to be running in this repository, " + "or the lock file may be stale")); + + strbuf_release(&lock_path); + strbuf_release(&pid_path); + } else { strbuf_addf(buf, _("Unable to create '%s.lock': %s"), absolute_path(path), strerror(err)); + } } NORETURN void unable_to_lock_die(const char *path, int err) @@ -207,6 +339,8 @@ int commit_lock_file(struct lock_file *lk) { char *result_path = get_locked_file_path(lk); + delete_tempfile(&lk->pid_tempfile); + if (commit_lock_file_to(lk, result_path)) { int save_errno = errno; free(result_path); @@ -216,3 +350,9 @@ int commit_lock_file(struct lock_file *lk) free(result_path); return 0; } + +int rollback_lock_file(struct lock_file *lk) +{ + delete_tempfile(&lk->pid_tempfile); + return delete_tempfile(&lk->tempfile); +} diff --git a/lockfile.h b/lockfile.h index 1bb99264976d27..e7233f28dea5c7 100644 --- a/lockfile.h +++ b/lockfile.h @@ -119,6 +119,7 @@ struct lock_file { struct tempfile *tempfile; + struct tempfile *pid_tempfile; }; #define LOCK_INIT { 0 } @@ -127,6 +128,22 @@ struct lock_file { #define LOCK_SUFFIX ".lock" #define LOCK_SUFFIX_LEN 5 +/* + * PID file naming: for a lock file "foo.lock", the PID file is "foo~pid.lock". + * The tilde is forbidden in refnames and allowed in Windows filenames, avoiding + * namespace collisions (e.g., refs "foo" and "foo~pid" cannot both exist). + */ +#define LOCK_PID_INFIX "~pid" +#define LOCK_PID_INFIX_LEN 4 + +/* Maximum length for PID file content */ +#define LOCK_PID_MAXLEN 32 + +/* + * Whether to create PID files alongside lock files. + * Configured via core.lockfilePid (boolean). + */ +extern int lockfile_pid_enabled; /* * Flags @@ -169,12 +186,12 @@ struct lock_file { * handling, and mode are described above. */ int hold_lock_file_for_update_timeout_mode( - struct lock_file *lk, const char *path, - int flags, long timeout_ms, int mode); + struct lock_file *lk, const char *path, + int flags, long timeout_ms, int mode); static inline int hold_lock_file_for_update_timeout( - struct lock_file *lk, const char *path, - int flags, long timeout_ms) + struct lock_file *lk, const char *path, + int flags, long timeout_ms) { return hold_lock_file_for_update_timeout_mode(lk, path, flags, timeout_ms, 0666); @@ -186,15 +203,14 @@ static inline int hold_lock_file_for_update_timeout( * argument and error handling are described above. */ static inline int hold_lock_file_for_update( - struct lock_file *lk, const char *path, - int flags) + struct lock_file *lk, const char *path, int flags) { return hold_lock_file_for_update_timeout(lk, path, flags, 0); } static inline int hold_lock_file_for_update_mode( - struct lock_file *lk, const char *path, - int flags, int mode) + struct lock_file *lk, const char *path, + int flags, int mode) { return hold_lock_file_for_update_timeout_mode(lk, path, flags, 0, mode); } @@ -319,13 +335,10 @@ static inline int commit_lock_file_to(struct lock_file *lk, const char *path) /* * Roll back `lk`: close the file descriptor and/or file pointer and - * remove the lockfile. It is a NOOP to call `rollback_lock_file()` - * for a `lock_file` object that has already been committed or rolled - * back. No error will be returned in this case. + * remove the lockfile and any associated PID file. It is a NOOP to + * call `rollback_lock_file()` for a `lock_file` object that has already + * been committed or rolled back. No error will be returned in this case. */ -static inline int rollback_lock_file(struct lock_file *lk) -{ - return delete_tempfile(&lk->tempfile); -} +int rollback_lock_file(struct lock_file *lk); #endif /* LOCKFILE_H */ diff --git a/t/meson.build b/t/meson.build index 459c52a48972e4..2aec2c011e6706 100644 --- a/t/meson.build +++ b/t/meson.build @@ -98,6 +98,7 @@ integration_tests = [ 't0028-working-tree-encoding.sh', 't0029-core-unsetenvvars.sh', 't0030-stripspace.sh', + 't0031-lockfile-pid.sh', 't0033-safe-directory.sh', 't0034-root-safe-directory.sh', 't0035-safe-bare-repository.sh', diff --git a/t/t0031-lockfile-pid.sh b/t/t0031-lockfile-pid.sh new file mode 100755 index 00000000000000..8ef87addf56f1e --- /dev/null +++ b/t/t0031-lockfile-pid.sh @@ -0,0 +1,105 @@ +#!/bin/sh + +test_description='lock file PID info tests + +Tests for PID info file alongside lock files. +The feature is opt-in via core.lockfilePid config setting (boolean). +' + +. ./test-lib.sh + +test_expect_success 'stale lock detected when PID is not running' ' + git init repo && + ( + cd repo && + touch .git/index.lock && + printf "pid 99999" >.git/index~pid.lock && + test_must_fail git -c core.lockfilePid=true add . 2>err && + test_grep "process 99999, which is no longer running" err && + test_grep "appears to be stale" err + ) +' + +test_expect_success 'PID info not shown by default' ' + git init repo2 && + ( + cd repo2 && + touch .git/index.lock && + printf "pid 99999" >.git/index~pid.lock && + test_must_fail git add . 2>err && + # Should not crash, just show normal error without PID + test_grep "Unable to create" err && + ! test_grep "is held by process" err + ) +' + +test_expect_success 'running process detected when PID is alive' ' + git init repo3 && + ( + cd repo3 && + echo content >file && + # Get the correct PID for this platform + shell_pid=$$ && + if test_have_prereq MINGW && test -f /proc/$shell_pid/winpid + then + # In Git for Windows, Bash uses MSYS2 PIDs but git.exe + # uses Windows PIDs. Use the Windows PID. + shell_pid=$(cat /proc/$shell_pid/winpid) + fi && + # Create a lock and PID file with current shell PID (which is running) + touch .git/index.lock && + printf "pid %d" "$shell_pid" >.git/index~pid.lock && + # Verify our PID is shown in the error message + test_must_fail git -c core.lockfilePid=true add file 2>err && + test_grep "held by process $shell_pid" err + ) +' + +test_expect_success 'PID info file cleaned up on successful operation when enabled' ' + git init repo4 && + ( + cd repo4 && + echo content >file && + git -c core.lockfilePid=true add file && + # After successful add, no lock or PID files should exist + test_path_is_missing .git/index.lock && + test_path_is_missing .git/index~pid.lock + ) +' + +test_expect_success 'no PID file created by default' ' + git init repo5 && + ( + cd repo5 && + echo content >file && + git add file && + # PID file should not be created when feature is disabled + test_path_is_missing .git/index~pid.lock + ) +' + +test_expect_success 'core.lockfilePid=false does not create PID file' ' + git init repo6 && + ( + cd repo6 && + echo content >file && + git -c core.lockfilePid=false add file && + # PID file should not be created when feature is disabled + test_path_is_missing .git/index~pid.lock + ) +' + +test_expect_success 'existing PID files are read even when feature disabled' ' + git init repo7 && + ( + cd repo7 && + touch .git/index.lock && + printf "pid 99999" >.git/index~pid.lock && + # Even with lockfilePid disabled, existing PID files are read + # to help diagnose stale locks + test_must_fail git add . 2>err && + test_grep "process 99999" err + ) +' + +test_done From b52a28b03ec99f2cfe4ef921b0d47250c665b0c6 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 25 Jan 2026 23:52:36 +0100 Subject: [PATCH 047/784] refs: skip to next ref when current ref is rejected In `refs_verify_refnames_available()` we have two nested loops: the outer loop iterates over all references to check, while the inner loop checks for filesystem conflicts for a given ref by breaking down its path. With batched updates, when we detect a filesystem conflict, we mark the update as rejected and execute 'continue'. However, this only skips to the next iteration of the inner loop, not the outer loop as intended. This causes the same reference to be repeatedly rejected. Fix this by using a goto statement to skip to the next reference in the outer loop. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.c | 44 ++++++++++++++++++++++++----------------- refs/files-backend.c | 5 ++--- refs/packed-backend.c | 12 +++++------ refs/refs-internal.h | 4 +++- refs/reftable-backend.c | 5 ++--- 5 files changed, 39 insertions(+), 31 deletions(-) diff --git a/refs.c b/refs.c index e06e0cb07283d3..53919c3d22fc42 100644 --- a/refs.c +++ b/refs.c @@ -1224,6 +1224,7 @@ void ref_transaction_free(struct ref_transaction *transaction) free(transaction->updates[i]->committer_info); free((char *)transaction->updates[i]->new_target); free((char *)transaction->updates[i]->old_target); + free((char *)transaction->updates[i]->rejection_details); free(transaction->updates[i]); } @@ -1238,7 +1239,8 @@ void ref_transaction_free(struct ref_transaction *transaction) int ref_transaction_maybe_set_rejected(struct ref_transaction *transaction, size_t update_idx, - enum ref_transaction_error err) + enum ref_transaction_error err, + struct strbuf *details) { if (update_idx >= transaction->nr) BUG("trying to set rejection on invalid update index"); @@ -1264,6 +1266,7 @@ int ref_transaction_maybe_set_rejected(struct ref_transaction *transaction, transaction->updates[update_idx]->refname, 0); transaction->updates[update_idx]->rejection_err = err; + transaction->updates[update_idx]->rejection_details = strbuf_detach(details, NULL); ALLOC_GROW(transaction->rejections->update_indices, transaction->rejections->nr + 1, transaction->rejections->alloc); @@ -2659,30 +2662,33 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs if (!initial_transaction && (strset_contains(&conflicting_dirnames, dirname.buf) || !refs_read_raw_ref(refs, dirname.buf, &oid, &referent, - &type, &ignore_errno))) { + &type, &ignore_errno))) { + + strbuf_addf(err, _("'%s' exists; cannot create '%s'"), + dirname.buf, refname); + if (transaction && ref_transaction_maybe_set_rejected( transaction, *update_idx, - REF_TRANSACTION_ERROR_NAME_CONFLICT)) { + REF_TRANSACTION_ERROR_NAME_CONFLICT, err)) { strset_remove(&dirnames, dirname.buf); strset_add(&conflicting_dirnames, dirname.buf); - continue; + goto next_ref; } - strbuf_addf(err, _("'%s' exists; cannot create '%s'"), - dirname.buf, refname); goto cleanup; } if (extras && string_list_has_string(extras, dirname.buf)) { + strbuf_addf(err, _("cannot process '%s' and '%s' at the same time"), + refname, dirname.buf); + if (transaction && ref_transaction_maybe_set_rejected( transaction, *update_idx, - REF_TRANSACTION_ERROR_NAME_CONFLICT)) { + REF_TRANSACTION_ERROR_NAME_CONFLICT, err)) { strset_remove(&dirnames, dirname.buf); - continue; + goto next_ref; } - strbuf_addf(err, _("cannot process '%s' and '%s' at the same time"), - refname, dirname.buf); goto cleanup; } } @@ -2712,14 +2718,14 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs if (skip && string_list_has_string(skip, iter->ref.name)) continue; + strbuf_addf(err, _("'%s' exists; cannot create '%s'"), + iter->ref.name, refname); if (transaction && ref_transaction_maybe_set_rejected( transaction, *update_idx, - REF_TRANSACTION_ERROR_NAME_CONFLICT)) - continue; + REF_TRANSACTION_ERROR_NAME_CONFLICT, err)) + goto next_ref; - strbuf_addf(err, _("'%s' exists; cannot create '%s'"), - iter->ref.name, refname); goto cleanup; } @@ -2729,15 +2735,17 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs extra_refname = find_descendant_ref(dirname.buf, extras, skip); if (extra_refname) { + strbuf_addf(err, _("cannot process '%s' and '%s' at the same time"), + refname, extra_refname); + if (transaction && ref_transaction_maybe_set_rejected( transaction, *update_idx, - REF_TRANSACTION_ERROR_NAME_CONFLICT)) - continue; + REF_TRANSACTION_ERROR_NAME_CONFLICT, err)) + goto next_ref; - strbuf_addf(err, _("cannot process '%s' and '%s' at the same time"), - refname, extra_refname); goto cleanup; } +next_ref:; } ret = 0; diff --git a/refs/files-backend.c b/refs/files-backend.c index 6f6f76a8d86dc4..6790d8bf535e5a 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -2983,10 +2983,9 @@ static int files_transaction_prepare(struct ref_store *ref_store, head_ref, &refnames_to_check, err); if (ret) { - if (ref_transaction_maybe_set_rejected(transaction, i, ret)) { - strbuf_reset(err); + if (ref_transaction_maybe_set_rejected(transaction, i, + ret, err)) { ret = 0; - continue; } goto cleanup; diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 4ea0c1229946bd..59b3ecb9d64716 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1437,8 +1437,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re update->refname); ret = REF_TRANSACTION_ERROR_CREATE_EXISTS; - if (ref_transaction_maybe_set_rejected(transaction, i, ret)) { - strbuf_reset(err); + if (ref_transaction_maybe_set_rejected(transaction, i, + ret, err)) { ret = 0; continue; } @@ -1452,8 +1452,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re oid_to_hex(&update->old_oid)); ret = REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE; - if (ref_transaction_maybe_set_rejected(transaction, i, ret)) { - strbuf_reset(err); + if (ref_transaction_maybe_set_rejected(transaction, i, + ret, err)) { ret = 0; continue; } @@ -1496,8 +1496,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re oid_to_hex(&update->old_oid)); ret = REF_TRANSACTION_ERROR_NONEXISTENT_REF; - if (ref_transaction_maybe_set_rejected(transaction, i, ret)) { - strbuf_reset(err); + if (ref_transaction_maybe_set_rejected(transaction, i, + ret, err)) { ret = 0; continue; } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index c7d2a6e50b7696..191a25683fdf31 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -128,6 +128,7 @@ struct ref_update { * was rejected. */ enum ref_transaction_error rejection_err; + const char *rejection_details; /* * If this ref_update was split off of a symref update via @@ -153,7 +154,8 @@ int refs_read_raw_ref(struct ref_store *ref_store, const char *refname, */ int ref_transaction_maybe_set_rejected(struct ref_transaction *transaction, size_t update_idx, - enum ref_transaction_error err); + enum ref_transaction_error err, + struct strbuf *details); /* * Add a ref_update with the specified properties to transaction, and diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 4319a4eacbafc4..0e2648e36cc8bd 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1401,10 +1401,9 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, &refnames_to_check, head_type, &head_referent, &referent, err); if (ret) { - if (ref_transaction_maybe_set_rejected(transaction, i, ret)) { - strbuf_reset(err); + if (ref_transaction_maybe_set_rejected(transaction, i, + ret, err)) { ret = 0; - continue; } goto done; From be54b10fd7f313b107c00061349a6a46e9eb926e Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 25 Jan 2026 23:52:37 +0100 Subject: [PATCH 048/784] refs: add rejection detail to the callback function The previous commit started storing the rejection details alongside the error code for rejected updates. Pass this along to the callback function `ref_transaction_for_each_rejected_update()`. Currently the field is unused, but will be integrated in the upcoming commits. Co-authored-by: Jeff King Signed-off-by: Jeff King Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 1 + builtin/receive-pack.c | 1 + builtin/update-ref.c | 1 + refs.c | 2 +- refs.h | 1 + 5 files changed, 5 insertions(+), 1 deletion(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 288d3772eaeabe..d427adea614eed 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1649,6 +1649,7 @@ static void ref_transaction_rejection_handler(const char *refname, const char *old_target UNUSED, const char *new_target UNUSED, enum ref_transaction_error err, + const char *details UNUSED, void *cb_data) { struct ref_rejection_data *data = cb_data; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index ef1f77be8c9db6..94d3e73cee3ec2 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1813,6 +1813,7 @@ static void ref_transaction_rejection_handler(const char *refname, const char *old_target UNUSED, const char *new_target UNUSED, enum ref_transaction_error err, + const char *details UNUSED, void *cb_data) { struct strmap *failed_refs = cb_data; diff --git a/builtin/update-ref.c b/builtin/update-ref.c index 195437e7c656ff..0046a87c579122 100644 --- a/builtin/update-ref.c +++ b/builtin/update-ref.c @@ -573,6 +573,7 @@ static void print_rejected_refs(const char *refname, const char *old_target, const char *new_target, enum ref_transaction_error err, + const char *details UNUSED, void *cb_data UNUSED) { struct strbuf sb = STRBUF_INIT; diff --git a/refs.c b/refs.c index 53919c3d22fc42..c85c3d2c8bf278 100644 --- a/refs.c +++ b/refs.c @@ -2874,7 +2874,7 @@ void ref_transaction_for_each_rejected_update(struct ref_transaction *transactio (update->flags & REF_HAVE_OLD) ? &update->old_oid : NULL, (update->flags & REF_HAVE_NEW) ? &update->new_oid : NULL, update->old_target, update->new_target, - update->rejection_err, cb_data); + update->rejection_err, update->rejection_details, cb_data); } } diff --git a/refs.h b/refs.h index d9051bbb0414c2..4fbe3da9245755 100644 --- a/refs.h +++ b/refs.h @@ -975,6 +975,7 @@ typedef void ref_transaction_for_each_rejected_update_fn(const char *refname, const char *old_target, const char *new_target, enum ref_transaction_error err, + const char *details, void *cb_data); void ref_transaction_for_each_rejected_update(struct ref_transaction *transaction, ref_transaction_for_each_rejected_update_fn cb, From a366bdec0fb06a61d5c42e4047aab0658cec912e Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 25 Jan 2026 23:52:38 +0100 Subject: [PATCH 049/784] update-ref: utilize rejected error details if available When git-update-ref(1) received the '--update-ref' flag, the error details generated in the refs namespace wasn't propagated with failed updates. Instead only an error code pertaining to the type of rejection was noted. This missed detailed error message which the user can act upon. The previous commits added the required code to propagate these detailed error messages from the refs namespace. Now that additional details are available, let's output this additional details to stderr. This allows users to have additional information over the already present machine parsable output. While we're here, improve the existing tests for the machine parsable output by checking for the entire output string and not just the rejection reason. Reported-by: Elijah Newren Co-authored-by: Jeff King Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/update-ref.c | 8 +++-- t/t1400-update-ref.sh | 71 +++++++++++++++++++++++++------------------ 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/builtin/update-ref.c b/builtin/update-ref.c index 0046a87c579122..2d68c40ecb7010 100644 --- a/builtin/update-ref.c +++ b/builtin/update-ref.c @@ -573,16 +573,18 @@ static void print_rejected_refs(const char *refname, const char *old_target, const char *new_target, enum ref_transaction_error err, - const char *details UNUSED, + const char *details, void *cb_data UNUSED) { struct strbuf sb = STRBUF_INIT; - const char *reason = ref_transaction_error_msg(err); + + if (details && *details) + error("%s", details); strbuf_addf(&sb, "rejected %s %s %s %s\n", refname, new_oid ? oid_to_hex(new_oid) : new_target, old_oid ? oid_to_hex(old_oid) : old_target, - reason); + ref_transaction_error_msg(err)); fwrite(sb.buf, sb.len, 1, stdout); strbuf_release(&sb); diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index db7f5444da2162..db6585b8d828a5 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -2093,14 +2093,15 @@ do format_command $type "update refs/heads/ref1" "$old_head" "$head" >stdin && format_command $type "update refs/heads/ref2" "$(test_oid 001)" "$head" >>stdin && - git update-ref $type --stdin --batch-updates stdout && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref1 >actual && test_cmp expect actual && echo $head >expect && git rev-parse refs/heads/ref2 >actual && test_cmp expect actual && - test_grep -q "invalid new value provided" stdout + test_grep "rejected refs/heads/ref2 $(test_oid 001) $head invalid new value provided" stdout && + test_grep "trying to write ref ${SQ}refs/heads/ref2${SQ} with nonexistent object" err ) ' @@ -2119,14 +2120,15 @@ do format_command $type "update refs/heads/ref1" "$old_head" "$head" >stdin && format_command $type "update refs/heads/ref2" "$head_tree" "$head" >>stdin && - git update-ref $type --stdin --batch-updates stdout && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref1 >actual && test_cmp expect actual && echo $head >expect && git rev-parse refs/heads/ref2 >actual && test_cmp expect actual && - test_grep -q "invalid new value provided" stdout + test_grep "rejected refs/heads/ref2 $head_tree $head invalid new value provided" stdout && + test_grep "trying to write non-commit object $head_tree to branch ${SQ}refs/heads/ref2${SQ}" err ) ' @@ -2143,12 +2145,13 @@ do format_command $type "update refs/heads/ref1" "$old_head" "$head" >stdin && format_command $type "update refs/heads/ref2" "$old_head" "$head" >>stdin && - git update-ref $type --stdin --batch-updates stdout && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref1 >actual && test_cmp expect actual && test_must_fail git rev-parse refs/heads/ref2 && - test_grep -q "reference does not exist" stdout + test_grep "rejected refs/heads/ref2 $old_head $head reference does not exist" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/ref2${SQ}: unable to resolve reference ${SQ}refs/heads/ref2${SQ}" err ) ' @@ -2166,13 +2169,14 @@ do format_command $type "update refs/heads/ref1" "$old_head" "$head" >stdin && format_command $type "update refs/heads/ref2" "$old_head" "$head" >>stdin && - git update-ref $type --no-deref --stdin --batch-updates stdout && + git update-ref $type --no-deref --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref1 >actual && test_cmp expect actual && echo $head >expect && test_must_fail git rev-parse refs/heads/ref2 && - test_grep -q "reference does not exist" stdout + test_grep "rejected refs/heads/ref2 $old_head $head reference does not exist" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/ref2${SQ}: reference is missing but expected $head" err ) ' @@ -2190,7 +2194,7 @@ do format_command $type "update refs/heads/ref1" "$old_head" "$head" >stdin && format_command $type "symref-update refs/heads/ref2" "$old_head" "ref" "refs/heads/nonexistent" >>stdin && - git update-ref $type --no-deref --stdin --batch-updates stdout && + git update-ref $type --no-deref --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref1 >actual && test_cmp expect actual && @@ -2198,7 +2202,8 @@ do echo $head >expect && git rev-parse refs/heads/ref2 >actual && test_cmp expect actual && - test_grep -q "expected symref but found regular ref" stdout + test_grep "rejected refs/heads/ref2 $ZERO_OID $ZERO_OID expected symref but found regular ref" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/ref2${SQ}: expected symref with target ${SQ}refs/heads/nonexistent${SQ}: but is a regular ref" err ) ' @@ -2216,14 +2221,15 @@ do format_command $type "update refs/heads/ref1" "$old_head" "$head" >stdin && format_command $type "update refs/heads/ref2" "$old_head" "$Z" >>stdin && - git update-ref $type --stdin --batch-updates stdout && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref1 >actual && test_cmp expect actual && echo $head >expect && git rev-parse refs/heads/ref2 >actual && test_cmp expect actual && - test_grep -q "reference already exists" stdout + test_grep "rejected refs/heads/ref2 $old_head $ZERO_OID reference already exists" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/ref2${SQ}: reference already exists" err ) ' @@ -2241,14 +2247,15 @@ do format_command $type "update refs/heads/ref1" "$old_head" "$head" >stdin && format_command $type "update refs/heads/ref2" "$head" "$old_head" >>stdin && - git update-ref $type --stdin --batch-updates stdout && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref1 >actual && test_cmp expect actual && echo $head >expect && git rev-parse refs/heads/ref2 >actual && test_cmp expect actual && - test_grep -q "incorrect old value provided" stdout + test_grep "rejected refs/heads/ref2 $head $old_head incorrect old value provided" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/ref2${SQ}: is at $head but expected $old_head" err ) ' @@ -2264,12 +2271,13 @@ do git update-ref refs/heads/ref/foo $head && format_command $type "update refs/heads/ref/foo" "$old_head" "$head" >stdin && - format_command $type "update refs/heads/ref" "$old_head" "" >>stdin && - git update-ref $type --stdin --batch-updates stdout && + format_command $type "update refs/heads/ref" "$old_head" "$ZERO_OID" >>stdin && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/ref/foo >actual && test_cmp expect actual && - test_grep -q "refname conflict" stdout + test_grep "rejected refs/heads/ref $old_head $ZERO_OID refname conflict" stdout && + test_grep "${SQ}refs/heads/ref/foo${SQ} exists; cannot create ${SQ}refs/heads/ref${SQ}" err ) ' @@ -2284,13 +2292,14 @@ do head=$(git rev-parse HEAD) && git update-ref refs/heads/ref/foo $head && - format_command $type "update refs/heads/foo" "$old_head" "" >stdin && - format_command $type "update refs/heads/ref" "$old_head" "" >>stdin && - git update-ref $type --stdin --batch-updates stdout && + format_command $type "update refs/heads/foo" "$old_head" "$ZERO_OID" >stdin && + format_command $type "update refs/heads/ref" "$old_head" "$ZERO_OID" >>stdin && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $old_head >expect && git rev-parse refs/heads/foo >actual && test_cmp expect actual && - test_grep -q "refname conflict" stdout + test_grep "rejected refs/heads/ref $old_head $ZERO_OID refname conflict" stdout && + test_grep "${SQ}refs/heads/ref/foo${SQ} exists; cannot create ${SQ}refs/heads/ref${SQ}" err ) ' @@ -2309,14 +2318,15 @@ do format_command $type "create refs/heads/ref" "$old_head" && format_command $type "create refs/heads/Foo" "$old_head" } >stdin && - git update-ref $type --stdin --batch-updates stdout && + git update-ref $type --stdin --batch-updates stdout 2>err && echo $head >expect && git rev-parse refs/heads/foo >actual && echo $old_head >expect && git rev-parse refs/heads/ref >actual && test_cmp expect actual && - test_grep -q "reference conflict due to case-insensitive filesystem" stdout + test_grep "rejected refs/heads/Foo $old_head $ZERO_OID reference conflict due to case-insensitive filesystem" stdout && + test_grep -e "cannot lock ref ${SQ}refs/heads/Foo${SQ}: Unable to create" -e "Foo.lock" err ) ' @@ -2357,8 +2367,9 @@ do git symbolic-ref refs/heads/symbolic refs/heads/non-existent && format_command $type "delete refs/heads/symbolic" "$head" >stdin && - git update-ref $type --stdin --batch-updates stdout && - test_grep "reference does not exist" stdout + git update-ref $type --stdin --batch-updates stdout 2>err && + test_grep "rejected refs/heads/non-existent $ZERO_OID $head reference does not exist" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/symbolic${SQ}: unable to resolve reference ${SQ}refs/heads/non-existent${SQ}" err ) ' @@ -2373,8 +2384,9 @@ do head=$(git rev-parse HEAD) && format_command $type "delete refs/heads/new-branch" "$head" >stdin && - git update-ref $type --stdin --batch-updates stdout && - test_grep "incorrect old value provided" stdout + git update-ref $type --stdin --batch-updates stdout 2>err && + test_grep "rejected refs/heads/new-branch $ZERO_OID $head incorrect old value provided" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/new-branch${SQ}: is at $(git rev-parse new-branch) but expected $head" err ) ' @@ -2387,8 +2399,9 @@ do head=$(git rev-parse HEAD) && format_command $type "delete refs/heads/non-existent" "$head" >stdin && - git update-ref $type --stdin --batch-updates stdout && - test_grep "reference does not exist" stdout + git update-ref $type --stdin --batch-updates stdout 2>err && + test_grep "rejected refs/heads/non-existent $ZERO_OID $head reference does not exist" stdout && + test_grep "cannot lock ref ${SQ}refs/heads/non-existent${SQ}: unable to resolve reference ${SQ}refs/heads/non-existent${SQ}" err ) ' done From 274f4355527275515a2c7c5f5a1214cf57b06338 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 25 Jan 2026 23:52:39 +0100 Subject: [PATCH 050/784] fetch: utilize rejected ref error details In 0e358de64a (fetch: use batched reference updates, 2025-05-19), git-fetch(1) switched to using batched reference updates. This also introduced a regression wherein instead of providing detailed error messages for failed referenced updates, the users were provided generic error messages based on the error type. Similar to the previous commit, switch to using detailed error messages if present for failed reference updates to fix this regression. Reported-by: Elijah Newren Co-authored-by: Jeff King Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 10 ++++++---- t/t5510-fetch.sh | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index d427adea614eed..49495be0b693fb 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1649,7 +1649,7 @@ static void ref_transaction_rejection_handler(const char *refname, const char *old_target UNUSED, const char *new_target UNUSED, enum ref_transaction_error err, - const char *details UNUSED, + const char *details, void *cb_data) { struct ref_rejection_data *data = cb_data; @@ -1674,9 +1674,11 @@ static void ref_transaction_rejection_handler(const char *refname, "branches"), data->remote_name); data->conflict_msg_shown = true; } else { - const char *reason = ref_transaction_error_msg(err); - - error(_("fetching ref %s failed: %s"), refname, reason); + if (details) + error("%s", details); + else + error(_("fetching ref %s failed: %s"), + refname, ref_transaction_error_msg(err)); } *data->retcode = 1; diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index ce1c23684ece38..c69afb5a609343 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1516,7 +1516,7 @@ test_expect_success REFFILES 'existing reference lock in repo' ' git remote add origin ../base && touch refs/heads/foo.lock && test_must_fail git fetch -f origin "refs/heads/*:refs/heads/*" 2>err && - test_grep "error: fetching ref refs/heads/foo failed: reference already exists" err && + test_grep -e "error: cannot lock ref ${SQ}refs/heads/foo${SQ}: Unable to create" -e "refs/heads/foo.lock${SQ}: File exists." err && git rev-parse refs/heads/main >expect && git rev-parse refs/heads/branch >actual && test_cmp expect actual @@ -1530,7 +1530,7 @@ test_expect_success CASE_INSENSITIVE_FS,REFFILES 'F/D conflict on case insensiti cd case_insensitive && git remote add origin -- ../case_sensitive_fd && test_must_fail git fetch -f origin "refs/heads/*:refs/heads/*" 2>err && - test_grep "failed: refname conflict" err && + test_grep "cannot process ${SQ}refs/remotes/origin/foo${SQ} and ${SQ}refs/remotes/origin/foo/bar${SQ} at the same time" err && git rev-parse refs/heads/main >expect && git rev-parse refs/heads/foo/bar >actual && test_cmp expect actual @@ -1544,7 +1544,7 @@ test_expect_success CASE_INSENSITIVE_FS,REFFILES 'D/F conflict on case insensiti cd case_insensitive && git remote add origin -- ../case_sensitive_df && test_must_fail git fetch -f origin "refs/heads/*:refs/heads/*" 2>err && - test_grep "failed: refname conflict" err && + test_grep "cannot lock ref ${SQ}refs/remotes/origin/foo${SQ}: there is a non-empty directory ${SQ}./refs/remotes/origin/foo${SQ} blocking reference ${SQ}refs/remotes/origin/foo${SQ}" err && git rev-parse refs/heads/main >expect && git rev-parse refs/heads/Foo/bar >actual && test_cmp expect actual @@ -1658,7 +1658,7 @@ test_expect_success REFFILES "FETCH_HEAD is updated even if ref updates fail" ' git remote add origin ../base && >refs/heads/foo.lock && test_must_fail git fetch -f origin "refs/heads/*:refs/heads/*" 2>err && - test_grep "error: fetching ref refs/heads/foo failed: reference already exists" err && + test_grep -e "error: cannot lock ref ${SQ}refs/heads/foo${SQ}: Unable to create" -e "refs/heads/foo.lock${SQ}: File exists." err && test_grep "branch ${SQ}branch${SQ} of ../base" FETCH_HEAD && test_grep "branch ${SQ}foo${SQ} of ../base" FETCH_HEAD ) From 2ea49f21e39de63481a6faf93e82a4b35f0e0ca2 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 25 Jan 2026 23:52:40 +0100 Subject: [PATCH 051/784] receive-pack: utilize rejected ref error details In 9d2962a7c4 (receive-pack: use batched reference updates, 2025-05-19), git-receive-pack(1) switched to using batched reference updates. This also introduced a regression wherein instead of providing detailed error messages for failed referenced updates, the users were provided generic error messages based on the error type. Now that the updates also contain detailed error message, propagate those to the client via 'rp_error'. The detailed error messages can be very verbose, for e.g. in the files backend, when trying to write a non-commit object to a branch, you would see: ! [remote rejected] 3eaec9ccf3a53f168362a6b3fdeb73426fb9813d -> branch (cannot update ref 'refs/heads/branch': trying to write non-commit object 3eaec9ccf3a53f168362a6b3fdeb73426fb9813d to branch 'refs/heads/branch') Here the refname is repeated multiple times due to how error messages are propagated and filled over the code stack. This potentially can be cleaned up in a future commit. Reported-by: Elijah Newren Co-authored-by: Jeff King Signed-off-by: Jeff King Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 8 ++++++-- t/t5516-fetch-push.sh | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 94d3e73cee3ec2..70e04b3efb3189 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1813,11 +1813,14 @@ static void ref_transaction_rejection_handler(const char *refname, const char *old_target UNUSED, const char *new_target UNUSED, enum ref_transaction_error err, - const char *details UNUSED, + const char *details, void *cb_data) { struct strmap *failed_refs = cb_data; + if (details) + rp_error("%s", details); + strmap_put(failed_refs, refname, (char *)ref_transaction_error_msg(err)); } @@ -1884,6 +1887,7 @@ static void execute_commands_non_atomic(struct command *commands, } ref_transaction_for_each_rejected_update(transaction, + ref_transaction_rejection_handler, &failed_refs); @@ -1895,7 +1899,7 @@ static void execute_commands_non_atomic(struct command *commands, if (reported_error) cmd->error_string = reported_error; else if (strmap_contains(&failed_refs, cmd->ref_name)) - cmd->error_string = strmap_get(&failed_refs, cmd->ref_name); + cmd->error_string = cmd->error_string_owned = xstrdup(strmap_get(&failed_refs, cmd->ref_name)); } cleanup: diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 46926e7bbd3a9a..45595991c8d5fe 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1882,4 +1882,19 @@ test_expect_success 'push with F/D conflict with deletion and creation' ' git push testrepo :refs/heads/branch/conflict refs/heads/branch ' +test_expect_success 'pushing non-commit objects should report error' ' + test_when_finished "rm -rf dest repo" && + git init dest && + git init repo && + + ( + cd repo && + test_commit --annotate test && + + tagsha=$(git rev-parse test^{tag}) && + test_must_fail git push ../dest "$tagsha:refs/heads/branch" 2>err && + test_grep "trying to write non-commit object $tagsha to branch ${SQ}refs/heads/branch${SQ}" err + ) +' + test_done From eff9299eacb9d88ded6efdc2a78024dc5fc20eea Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 25 Jan 2026 23:52:41 +0100 Subject: [PATCH 052/784] fetch: delay user information post committing of transaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Git 2.50 and earlier, we would display failure codes and error message as part of the status display: $ git fetch . v1.0.0:refs/heads/foo error: cannot update ref 'refs/heads/foo': trying to write non-commit object f665776185ad074b236c00751d666da7d1977dbe to branch 'refs/heads/foo' From . ! [new tag] v1.0.0 -> foo (unable to update local ref) With the addition of batched updates, this information is no longer shown to the user: $ git fetch . v1.0.0:refs/heads/foo From . * [new tag] v1.0.0 -> foo error: cannot update ref 'refs/heads/foo': trying to write non-commit object f665776185ad074b236c00751d666da7d1977dbe to branch 'refs/heads/foo' Since reference updates are batched and processed together at the end, information around the outcome is not available during individual reference parsing. To overcome this, collate and delay the output to the end. Introduce `ref_update_display_info` which will hold individual update's information and also whether the update failed or succeeded. This finally allows us to iterate over all such updates and print them to the user. Using an dynamic array and strmap does add some overhead to 'git-fetch(1)', but from benchmarking this seems to be not too bad: Benchmark 1: fetch: many refs (refformat = files, refcount = 1000, revision = master) Time (mean ± σ): 42.6 ms ± 1.2 ms [User: 13.1 ms, System: 29.8 ms] Range (min … max): 40.1 ms … 45.8 ms 47 runs Benchmark 2: fetch: many refs (refformat = files, refcount = 1000, revision = HEAD) Time (mean ± σ): 43.1 ms ± 1.2 ms [User: 12.7 ms, System: 30.7 ms] Range (min … max): 40.5 ms … 45.8 ms 48 runs Summary fetch: many refs (refformat = files, refcount = 1000, revision = master) ran 1.01 ± 0.04 times faster than fetch: many refs (refformat = files, refcount = 1000, revision = HEAD) Another approach would be to move the status printing logic to be handled post the transaction being committed. That however would require adding an iterator to the ref transaction that tracks both the outcome (success/failure) and the original refspec information for each update, which is more involved infrastructure work compared to the strmap approach here. Helped-by: Phillip Wood Reported-by: Jeff King Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 246 ++++++++++++++++++++++++++++++++---------- t/t5516-fetch-push.sh | 1 + 2 files changed, 193 insertions(+), 54 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 49495be0b693fb..a3bc7e9380b9b6 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -861,12 +861,87 @@ static void display_ref_update(struct display_state *display_state, char code, fputs(display_state->buf.buf, f); } +struct ref_update_display_info { + bool failed; + char success_code; + char fail_code; + char *summary; + char *fail_detail; + char *success_detail; + char *ref; + char *remote; + struct object_id old_oid; + struct object_id new_oid; +}; + +struct ref_update_display_info_array { + struct ref_update_display_info *info; + size_t alloc, nr; +}; + +static struct ref_update_display_info *ref_update_display_info_append( + struct ref_update_display_info_array *array, + char success_code, + char fail_code, + const char *summary, + const char *success_detail, + const char *fail_detail, + const char *ref, + const char *remote, + const struct object_id *old_oid, + const struct object_id *new_oid) +{ + struct ref_update_display_info *info; + + ALLOC_GROW(array->info, array->nr + 1, array->alloc); + info = &array->info[array->nr++]; + + info->failed = false; + info->success_code = success_code; + info->fail_code = fail_code; + info->summary = xstrdup(summary); + info->success_detail = xstrdup_or_null(success_detail); + info->fail_detail = xstrdup_or_null(fail_detail); + info->remote = xstrdup(remote); + info->ref = xstrdup(ref); + + oidcpy(&info->old_oid, old_oid); + oidcpy(&info->new_oid, new_oid); + + return info; +} + +static void ref_update_display_info_set_failed(struct ref_update_display_info *info) +{ + info->failed = true; +} + +static void ref_update_display_info_free(struct ref_update_display_info *info) +{ + free(info->summary); + free(info->success_detail); + free(info->fail_detail); + free(info->remote); + free(info->ref); +} + +static void ref_update_display_info_display(struct ref_update_display_info *info, + struct display_state *display_state, + int summary_width) +{ + display_ref_update(display_state, + info->failed ? info->fail_code : info->success_code, + info->summary, + info->failed ? info->fail_detail : info->success_detail, + info->remote, info->ref, &info->old_oid, + &info->new_oid, summary_width); +} + static int update_local_ref(struct ref *ref, struct ref_transaction *transaction, - struct display_state *display_state, const struct ref *remote_ref, - int summary_width, - const struct fetch_config *config) + const struct fetch_config *config, + struct ref_update_display_info_array *display_array) { struct commit *current = NULL, *updated; int fast_forward = 0; @@ -877,41 +952,56 @@ static int update_local_ref(struct ref *ref, if (oideq(&ref->old_oid, &ref->new_oid)) { if (verbosity > 0) - display_ref_update(display_state, '=', _("[up to date]"), NULL, - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + ref_update_display_info_append(display_array, '=', '=', + _("[up to date]"), NULL, + NULL, ref->name, + remote_ref->name, &ref->old_oid, + &ref->new_oid); return 0; } if (!update_head_ok && !is_null_oid(&ref->old_oid) && branch_checked_out(ref->name)) { + struct ref_update_display_info *info; /* * If this is the head, and it's not okay to update * the head, and the old value of the head isn't empty... */ - display_ref_update(display_state, '!', _("[rejected]"), - _("can't fetch into checked-out branch"), - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + info = ref_update_display_info_append(display_array, '!', '!', + _("[rejected]"), NULL, + _("can't fetch into checked-out branch"), + ref->name, remote_ref->name, + &ref->old_oid, &ref->new_oid); + ref_update_display_info_set_failed(info); return 1; } if (!is_null_oid(&ref->old_oid) && starts_with(ref->name, "refs/tags/")) { + struct ref_update_display_info *info; + if (force || ref->force) { int r; + r = s_update_ref("updating tag", ref, transaction, 0); - display_ref_update(display_state, r ? '!' : 't', _("[tag update]"), - r ? _("unable to update local ref") : NULL, - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + + info = ref_update_display_info_append(display_array, 't', '!', + _("[tag update]"), NULL, + _("unable to update local ref"), + ref->name, remote_ref->name, + &ref->old_oid, &ref->new_oid); + if (r) + ref_update_display_info_set_failed(info); + return r; } else { - display_ref_update(display_state, '!', _("[rejected]"), - _("would clobber existing tag"), - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + info = ref_update_display_info_append(display_array, '!', '!', + _("[rejected]"), NULL, + _("would clobber existing tag"), + ref->name, remote_ref->name, + &ref->old_oid, &ref->new_oid); + ref_update_display_info_set_failed(info); return 1; } } @@ -921,6 +1011,7 @@ static int update_local_ref(struct ref *ref, updated = lookup_commit_reference_gently(the_repository, &ref->new_oid, 1); if (!current || !updated) { + struct ref_update_display_info *info; const char *msg; const char *what; int r; @@ -941,10 +1032,15 @@ static int update_local_ref(struct ref *ref, } r = s_update_ref(msg, ref, transaction, 0); - display_ref_update(display_state, r ? '!' : '*', what, - r ? _("unable to update local ref") : NULL, - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + + info = ref_update_display_info_append(display_array, '*', '!', + what, NULL, + _("unable to update local ref"), + ref->name, remote_ref->name, + &ref->old_oid, &ref->new_oid); + if (r) + ref_update_display_info_set_failed(info); + return r; } @@ -960,6 +1056,7 @@ static int update_local_ref(struct ref *ref, } if (fast_forward) { + struct ref_update_display_info *info; struct strbuf quickref = STRBUF_INIT; int r; @@ -967,29 +1064,46 @@ static int update_local_ref(struct ref *ref, strbuf_addstr(&quickref, ".."); strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV); r = s_update_ref("fast-forward", ref, transaction, 1); - display_ref_update(display_state, r ? '!' : ' ', quickref.buf, - r ? _("unable to update local ref") : NULL, - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + + info = ref_update_display_info_append(display_array, ' ', '!', + quickref.buf, NULL, + _("unable to update local ref"), + ref->name, remote_ref->name, + &ref->old_oid, &ref->new_oid); + if (r) + ref_update_display_info_set_failed(info); + strbuf_release(&quickref); return r; } else if (force || ref->force) { + struct ref_update_display_info *info; struct strbuf quickref = STRBUF_INIT; int r; + strbuf_add_unique_abbrev(&quickref, ¤t->object.oid, DEFAULT_ABBREV); strbuf_addstr(&quickref, "..."); strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV); r = s_update_ref("forced-update", ref, transaction, 1); - display_ref_update(display_state, r ? '!' : '+', quickref.buf, - r ? _("unable to update local ref") : _("forced update"), - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + + info = ref_update_display_info_append(display_array, '+', '!', + quickref.buf, _("forced update"), + _("unable to update local ref"), + ref->name, remote_ref->name, + &ref->old_oid, &ref->new_oid); + + if (r) + ref_update_display_info_set_failed(info); + strbuf_release(&quickref); return r; } else { - display_ref_update(display_state, '!', _("[rejected]"), _("non-fast-forward"), - remote_ref->name, ref->name, - &ref->old_oid, &ref->new_oid, summary_width); + struct ref_update_display_info *info; + info = ref_update_display_info_append(display_array, '!', '!', + _("[rejected]"), NULL, + _("non-fast-forward"), + ref->name, remote_ref->name, + &ref->old_oid, &ref->new_oid); + ref_update_display_info_set_failed(info); return 1; } } @@ -1103,17 +1217,14 @@ static int store_updated_refs(struct display_state *display_state, int connectivity_checked, struct ref_transaction *transaction, struct ref *ref_map, struct fetch_head *fetch_head, - const struct fetch_config *config) + const struct fetch_config *config, + struct ref_update_display_info_array *display_array) { int rc = 0; struct strbuf note = STRBUF_INIT; const char *what, *kind; struct ref *rm; int want_status; - int summary_width = 0; - - if (verbosity >= 0) - summary_width = transport_summary_width(ref_map); if (!connectivity_checked) { struct check_connected_options opt = CHECK_CONNECTED_INIT; @@ -1218,8 +1329,8 @@ static int store_updated_refs(struct display_state *display_state, display_state->url_len); if (ref) { - rc |= update_local_ref(ref, transaction, display_state, - rm, summary_width, config); + rc |= update_local_ref(ref, transaction, rm, + config, display_array); free(ref); } else if (write_fetch_head || dry_run) { /* @@ -1227,12 +1338,12 @@ static int store_updated_refs(struct display_state *display_state, * would be written to FETCH_HEAD, if --dry-run * is set). */ - display_ref_update(display_state, '*', - *kind ? kind : "branch", NULL, - rm->name, - "FETCH_HEAD", - &rm->new_oid, &rm->old_oid, - summary_width); + + ref_update_display_info_append(display_array, '*', '*', + *kind ? kind : "branch", + NULL, NULL, "FETCH_HEAD", + rm->name, &rm->new_oid, + &rm->old_oid); } } } @@ -1300,7 +1411,8 @@ static int fetch_and_consume_refs(struct display_state *display_state, struct ref_transaction *transaction, struct ref *ref_map, struct fetch_head *fetch_head, - const struct fetch_config *config) + const struct fetch_config *config, + struct ref_update_display_info_array *display_array) { int connectivity_checked = 1; int ret; @@ -1322,7 +1434,8 @@ static int fetch_and_consume_refs(struct display_state *display_state, trace2_region_enter("fetch", "consume_refs", the_repository); ret = store_updated_refs(display_state, connectivity_checked, - transaction, ref_map, fetch_head, config); + transaction, ref_map, fetch_head, config, + display_array); trace2_region_leave("fetch", "consume_refs", the_repository); out: @@ -1493,7 +1606,8 @@ static int backfill_tags(struct display_state *display_state, struct ref_transaction *transaction, struct ref *ref_map, struct fetch_head *fetch_head, - const struct fetch_config *config) + const struct fetch_config *config, + struct ref_update_display_info_array *display_array) { int retcode, cannot_reuse; @@ -1515,7 +1629,7 @@ static int backfill_tags(struct display_state *display_state, transport_set_option(transport, TRANS_OPT_DEPTH, "0"); transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL); retcode = fetch_and_consume_refs(display_state, transport, transaction, ref_map, - fetch_head, config); + fetch_head, config, display_array); if (gsecondary) { transport_disconnect(gsecondary); @@ -1641,6 +1755,7 @@ struct ref_rejection_data { bool conflict_msg_shown; bool case_sensitive_msg_shown; const char *remote_name; + struct strmap *rejected_refs; }; static void ref_transaction_rejection_handler(const char *refname, @@ -1681,6 +1796,7 @@ static void ref_transaction_rejection_handler(const char *refname, refname, ref_transaction_error_msg(err)); } + strmap_put(data->rejected_refs, refname, NULL); *data->retcode = 1; } @@ -1690,6 +1806,7 @@ static void ref_transaction_rejection_handler(const char *refname, */ static int commit_ref_transaction(struct ref_transaction **transaction, bool is_atomic, const char *remote_name, + struct strmap *rejected_refs, struct strbuf *err) { int retcode = ref_transaction_commit(*transaction, err); @@ -1701,6 +1818,7 @@ static int commit_ref_transaction(struct ref_transaction **transaction, .conflict_msg_shown = 0, .remote_name = remote_name, .retcode = &retcode, + .rejected_refs = rejected_refs, }; ref_transaction_for_each_rejected_update(*transaction, @@ -1729,6 +1847,9 @@ static int do_fetch(struct transport *transport, struct fetch_head fetch_head = { 0 }; struct strbuf err = STRBUF_INIT; int do_set_head = 0; + struct ref_update_display_info_array display_array = { 0 }; + struct strmap rejected_refs = STRMAP_INIT; + int summary_width = 0; if (tags == TAGS_DEFAULT) { if (transport->remote->fetch_tags == 2) @@ -1853,7 +1974,7 @@ static int do_fetch(struct transport *transport, } if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map, - &fetch_head, config)) { + &fetch_head, config, &display_array)) { retcode = 1; goto cleanup; } @@ -1876,7 +1997,7 @@ static int do_fetch(struct transport *transport, * the transaction and don't commit anything. */ if (backfill_tags(&display_state, transport, transaction, tags_ref_map, - &fetch_head, config)) + &fetch_head, config, &display_array)) retcode = 1; } @@ -1886,8 +2007,12 @@ static int do_fetch(struct transport *transport, if (retcode) goto cleanup; + if (verbosity >= 0) + summary_width = transport_summary_width(ref_map); + retcode = commit_ref_transaction(&transaction, atomic_fetch, - transport->remote->name, &err); + transport->remote->name, + &rejected_refs, &err); /* * With '--atomic', bail out if the transaction fails. Without '--atomic', * continue to fetch head and perform other post-fetch operations. @@ -1965,7 +2090,17 @@ static int do_fetch(struct transport *transport, */ if (retcode && !atomic_fetch && transaction) commit_ref_transaction(&transaction, false, - transport->remote->name, &err); + transport->remote->name, + &rejected_refs, &err); + + for (size_t i = 0; i < display_array.nr; i++) { + struct ref_update_display_info *info = &display_array.info[i]; + + if (!info->failed && strmap_contains(&rejected_refs, info->ref)) + ref_update_display_info_set_failed(info); + ref_update_display_info_display(info, &display_state, summary_width); + ref_update_display_info_free(info); + } if (retcode) { if (err.len) { @@ -1980,6 +2115,9 @@ static int do_fetch(struct transport *transport, if (transaction) ref_transaction_free(transaction); + + free(display_array.info); + strmap_clear(&rejected_refs, 0); display_state_release(&display_state); close_fetch_head(&fetch_head); strbuf_release(&err); diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 45595991c8d5fe..29e2f176081561 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1893,6 +1893,7 @@ test_expect_success 'pushing non-commit objects should report error' ' tagsha=$(git rev-parse test^{tag}) && test_must_fail git push ../dest "$tagsha:refs/heads/branch" 2>err && + test_grep "! \[remote rejected\] $tagsha -> branch (invalid new value provided)" err && test_grep "trying to write non-commit object $tagsha to branch ${SQ}refs/heads/branch${SQ}" err ) ' From bd1855b89760cc0f9a185010a0d92d2e11a73132 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:17 +0100 Subject: [PATCH 053/784] odb: rename `FOR_EACH_OBJECT_*` flags Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This prepares us for a new upcoming `odb_for_each_object()` function and ensures that both the function and its flags have the same prefix. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/pack-objects.c | 10 +++++----- commit-graph.c | 4 ++-- object-file.c | 4 ++-- object-file.h | 2 +- odb.h | 13 +++++++------ packfile.c | 20 ++++++++++---------- packfile.h | 4 ++-- reachable.c | 8 ++++---- repack-promisor.c | 2 +- revision.c | 2 +- 11 files changed, 36 insertions(+), 35 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 2ad712e9f8f55c..6964a5a52c1646 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -922,7 +922,7 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; batch_each_object(opt, batch_unordered_object, - FOR_EACH_OBJECT_PACK_ORDER, &cb); + ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6ee31d48c94748..74317051fdf7f6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) for_each_object_in_pack(p, add_object_entry_from_pack, revs, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); } strbuf_release(&buf); @@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void) if (for_each_packed_object(to_pack.repo, add_object_in_unpacked_pack, NULL, - FOR_EACH_OBJECT_PACK_ORDER | - FOR_EACH_OBJECT_LOCAL_ONLY | - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) die(_("cannot open pack index")); } diff --git a/commit-graph.c b/commit-graph.c index 6b1f02e1792b64..7f1145a0821cbb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1927,7 +1927,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, goto cleanup; } for_each_object_in_pack(p, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); free(p); } @@ -1965,7 +1965,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); for_each_packed_object(ctx->r, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); diff --git a/object-file.c b/object-file.c index e7e4c3348f9c1b..64e9e239dc341a 100644 --- a/object-file.c +++ b/object-file.c @@ -1789,7 +1789,7 @@ int for_each_loose_file_in_source(struct odb_source *source, int for_each_loose_object(struct object_database *odb, each_loose_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { struct odb_source *source; @@ -1800,7 +1800,7 @@ int for_each_loose_object(struct object_database *odb, if (r) return r; - if (flags & FOR_EACH_OBJECT_LOCAL_ONLY) + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) break; } diff --git a/object-file.h b/object-file.h index 1229d5f675b44a..42bb50e10cf296 100644 --- a/object-file.h +++ b/object-file.h @@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source, */ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); /** diff --git a/odb.h b/odb.h index bab07755f4ec95..74503addf1462c 100644 --- a/odb.h +++ b/odb.h @@ -442,24 +442,25 @@ static inline void obj_read_unlock(void) if(obj_read_use_lock) pthread_mutex_unlock(&obj_read_mutex); } + /* Flags for for_each_*_object(). */ -enum for_each_object_flags { +enum odb_for_each_object_flags { /* Iterate only over local objects, not alternates. */ - FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), + ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), /* Only iterate over packs obtained from the promisor remote. */ - FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), /* * Visit objects within a pack in packfile order rather than .idx order */ - FOR_EACH_OBJECT_PACK_ORDER = (1<<2), + ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2), /* Only iterate over packs that are not marked as kept in-core. */ - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), /* Only iterate over packs that do not have .keep files. */ - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; enum { diff --git a/packfile.c b/packfile.c index 402c3b5dc73131..b65f0b43f16eb3 100644 --- a/packfile.c +++ b/packfile.c @@ -2259,12 +2259,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { uint32_t i; int r = 0; - if (flags & FOR_EACH_OBJECT_PACK_ORDER) { + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) { if (load_pack_revindex(p->repo, p)) return -1; } @@ -2285,7 +2285,7 @@ int for_each_object_in_pack(struct packed_git *p, * - in pack-order, it is pack position, which we must * convert to an index position in order to get the oid. */ - if (flags & FOR_EACH_OBJECT_PACK_ORDER) + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) index_pos = pack_pos_to_index(p, i); else index_pos = i; @@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags) + void *data, enum odb_for_each_object_flags flags) { struct odb_source *source; int r = 0; @@ -2318,15 +2318,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2413,8 +2413,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (repo_has_promisor_remote(r)) { for_each_packed_object(r, add_promisor_object, &promisor_objects, - FOR_EACH_OBJECT_PROMISOR_ONLY | - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | + ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } diff --git a/packfile.h b/packfile.h index acc5c55ad57754..15551258bde519 100644 --- a/packfile.h +++ b/packfile.h @@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags); + void *data, enum odb_for_each_object_flags flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 diff --git a/reachable.c b/reachable.c index 4b532039d5f84f..82676b2668090d 100644 --- a/reachable.c +++ b/reachable.c @@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum for_each_object_flags flags; + enum odb_for_each_object_flags flags; int r; data.revs = revs; @@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, data.extra_recent_oids_loaded = 0; r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - FOR_EACH_OBJECT_LOCAL_ONLY); + ODB_FOR_EACH_OBJECT_LOCAL_ONLY); if (r) goto done; - flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER; + flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) - flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; + flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); diff --git a/repack-promisor.c b/repack-promisor.c index ee6e0669f65602..45c330b9a53ae6 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo, ctx.cmd = &cmd; ctx.algop = repo->hash_algo; for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/revision.c b/revision.c index b65a76377062cd..5aadf46dac2b9d 100644 --- a/revision.c +++ b/revision.c @@ -3938,7 +3938,7 @@ int prepare_revision_walk(struct rev_info *revs) if (revs->exclude_promisor_objects) { for_each_packed_object(revs->repo, mark_uninteresting, revs, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); } if (!revs->reflog_info) From 6358da200fffc7f010f079c3f64ed77f10cd751d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:18 +0100 Subject: [PATCH 054/784] odb: fix flags parameter to be unsigned The `flags` parameter accepted by various `for_each_object()` functions is a bitfield of multiple flags. Such parameters are typically unsigned in the Git codebase, but we use `enum odb_for_each_object_flags` in some places. Adapt these function signatures to use the correct type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 3 ++- object-file.h | 3 ++- packfile.c | 4 ++-- packfile.h | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index 64e9e239dc341a..8fa461dd596615 100644 --- a/object-file.c +++ b/object-file.c @@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags) + struct object_info *oi, + unsigned flags) { int ret; int fd; diff --git a/object-file.h b/object-file.h index 42bb50e10cf296..2acf19fb91ab41 100644 --- a/object-file.h +++ b/object-file.h @@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source); int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags); + struct object_info *oi, + unsigned flags); int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct odb_source *source, diff --git a/packfile.c b/packfile.c index b65f0b43f16eb3..79fe64a25b2f5e 100644 --- a/packfile.c +++ b/packfile.c @@ -2259,7 +2259,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum odb_for_each_object_flags flags) + unsigned flags) { uint32_t i; int r = 0; @@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags) + void *data, unsigned flags) { struct odb_source *source; int r = 0; diff --git a/packfile.h b/packfile.h index 15551258bde519..447c44c4a7517d 100644 --- a/packfile.h +++ b/packfile.h @@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum odb_for_each_object_flags flags); + unsigned flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags); + void *data, unsigned flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 From 6ecab3cdf67012592734ed9493db634d39326d43 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:19 +0100 Subject: [PATCH 055/784] object-file: extract function to read object info from path Extract a new function that allows us to read object info for a specific loose object via a user-supplied path. This function will be used in a subsequent commit. Note that this also allows us to drop `stat_loose_object()`, which is a simple wrapper around `odb_loose_path()` plus lstat(3p). Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/object-file.c b/object-file.c index 8fa461dd596615..a651129426992c 100644 --- a/object-file.c +++ b/object-file.c @@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in given source. - * Returns 0 on success, negative on failure. + * Find "oid" as a loose object in given source, open the object and return its + * file descriptor. Returns the file descriptor on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct odb_source_loose *loose, - const struct object_id *oid, - struct stat *st, const char **path) -{ - static struct strbuf buf = STRBUF_INIT; - - *path = odb_loose_path(loose->source, &buf, oid); - if (!lstat(*path, st)) - return 0; - - return -1; -} - -/* - * Like stat_loose_object(), but actually open the object and return the - * descriptor. See the caveats on the "path" parameter above. - */ static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { @@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int odb_source_loose_read_object_info(struct odb_source *source, +static int read_object_info_from_path(struct odb_source *source, + const char *path, const struct object_id *oid, struct object_info *oi, unsigned flags) @@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source, int ret; int fd; unsigned long mapsize; - const char *path; void *map = NULL; git_zstream stream, *stream_to_end = NULL; char hdr[MAX_HEADER_LEN]; @@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source, goto out; } - if (stat_loose_object(source->loose, oid, &st, &path) < 0) { + if (lstat(path, &st) < 0) { ret = -1; goto out; } @@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source, goto out; } - fd = open_loose_object(source->loose, oid, &path); + fd = git_open(path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); @@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source, return ret; } +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + static struct strbuf buf = STRBUF_INIT; + odb_loose_path(source, &buf, oid); + return read_object_info_from_path(source, buf.buf, oid, oi, flags); +} + static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, From cde615b6f05228cd7cf125de6bf5757381f65381 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:20 +0100 Subject: [PATCH 056/784] object-file: introduce function to iterate through objects We have multiple divergent interfaces to iterate through objects of a specific backend: - `for_each_loose_object()` yields all loose objects. - `for_each_packed_object()` (somewhat obviously) yields all packed objects. These functions have different function signatures, which makes it hard to create a common abstraction layer that covers both of these. Introduce a new function `odb_source_loose_for_each_object()` to plug this gap. This function doesn't take any data specific to loose objects, but instead it accepts a `struct object_info` that will be populated the exact same as if `odb_source_loose_read_object()` was called. The benefit of this new interface is that we can continue to pass backend-specific data, as `struct object_info` contains a union for these exact use cases. This will allow us to unify how we iterate through objects across both loose and packed objects in a subsequent commit. The `for_each_loose_object()` function continues to exist for now, but it will be removed at the end of this patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ object-file.h | 11 +++++++++++ odb.h | 12 ++++++++++++ 3 files changed, 71 insertions(+) diff --git a/object-file.c b/object-file.c index a651129426992c..ef2c7618c17a64 100644 --- a/object-file.c +++ b/object-file.c @@ -1801,6 +1801,54 @@ int for_each_loose_object(struct object_database *odb, return 0; } +struct for_each_object_wrapper_data { + struct odb_source *source; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int for_each_object_wrapper_cb(const struct object_id *oid, + const char *path, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + + if (data->request) { + struct object_info oi = *data->request; + + if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +int odb_source_loose_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct for_each_object_wrapper_data data = { + .source = source, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + + /* There are no loose promisor objects, so we can return immediately. */ + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + return 0; + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + return 0; + + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, + NULL, NULL, &data); +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index 2acf19fb91ab41..5b9641cd890175 100644 --- a/object-file.h +++ b/object-file.h @@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, enum odb_for_each_object_flags flags); +/* + * Iterate through all loose objects in the given object database source and + * invoke the callback function for each of them. If given, the object info + * will be populated with the object's data as if you had called + * `odb_source_loose_read_object_info()` on the object. + */ +int odb_source_loose_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); /** * format_object_header() is a thin wrapper around s xsnprintf() that diff --git a/odb.h b/odb.h index 74503addf1462c..f97f249580e08a 100644 --- a/odb.h +++ b/odb.h @@ -463,6 +463,18 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + enum { /* * By default, `odb_write_object()` does not actually write anything From 37353119046414b2dccb26b32cb5224e0c9258e1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:21 +0100 Subject: [PATCH 057/784] packfile: extract function to iterate through objects of a store In the next commit we're about to introduce a new function that knows to iterate through objects of a given packfile store. Same as with the equivalent function for loose objects, this new function will also be agnostic of backends by using a `struct object_info`. Prepare for this by extracting a new shared function to iterate through a single packfile store. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 78 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/packfile.c b/packfile.c index 79fe64a25b2f5e..d15a2ce12b1ce5 100644 --- a/packfile.c +++ b/packfile.c @@ -2301,51 +2301,63 @@ int for_each_object_in_pack(struct packed_git *p, return r; } -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags) +static int packfile_store_for_each_object_internal(struct packfile_store *store, + each_packed_object_fn cb, + void *data, + unsigned flags, + int *pack_errors) { - struct odb_source *source; - int r = 0; - int pack_errors = 0; + struct packfile_list_entry *e; + int ret = 0; - odb_prepare_alternates(repo->objects); + store->skip_mru_updates = true; - for (source = repo->objects->sources; source; source = source->next) { - struct packfile_list_entry *e; + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; - source->packfiles->skip_mru_updates = true; + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + *pack_errors = 1; + continue; + } - for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { - struct packed_git *p = e->pack; + ret = for_each_object_in_pack(p, cb, data, flags); + if (ret) + break; + } - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; - } + store->skip_mru_updates = false; - r = for_each_object_in_pack(p, cb, data, flags); - if (r) - break; - } + return ret; +} - source->packfiles->skip_mru_updates = false; +int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, + void *data, unsigned flags) +{ + struct odb_source *source; + int pack_errors = 0; + int ret = 0; - if (r) + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, + flags, &pack_errors); + if (ret) break; } - return r ? r : pack_errors; + return ret ? ret : pack_errors; } static int add_promisor_object(const struct object_id *oid, From 736464b84f4439361ec10e9ef49bff674fea952d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:22 +0100 Subject: [PATCH 058/784] packfile: introduce function to iterate through objects Introduce a new function `packfile_store_for_each_object()`. This function is equivalent to `odb_source_loose_for_each_object()`, except that it: - Works on a single packfile store instead of working on the object database level. Consequently, it will only yield packed objects of a single object database source. - Passes a `struct object_info` to the callback function. As such, it provides the same callback interface as we already provide for loose objects now. These functions will be used in a subsequent step to implement `odb_for_each_object()`. The `for_each_packed_object()` function continues to exist for now, but it will be removed at the end of this patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 15 +++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/packfile.c b/packfile.c index d15a2ce12b1ce5..c35d5ea6552ec3 100644 --- a/packfile.c +++ b/packfile.c @@ -2360,6 +2360,57 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, return ret ? ret : pack_errors; } +struct packfile_store_for_each_object_wrapper_data { + struct packfile_store *store; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int packfile_store_for_each_object_wrapper(const struct object_id *oid, + struct packed_git *pack, + uint32_t index_pos, + void *cb_data) +{ + struct packfile_store_for_each_object_wrapper_data *data = cb_data; + + if (data->request) { + off_t offset = nth_packed_object_offset(pack, index_pos); + struct object_info oi = *data->request; + + if (packed_object_info(pack, offset, &oi) < 0) { + mark_bad_packed_object(pack, oid); + return -1; + } + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +int packfile_store_for_each_object(struct packfile_store *store, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct packfile_store_for_each_object_wrapper_data data = { + .store = store, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + int pack_errors = 0, ret; + + ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, + &data, flags, &pack_errors); + if (ret) + return ret; + + return pack_errors ? -1 : 0; +} + static int add_promisor_object(const struct object_id *oid, struct packed_git *pack, uint32_t pos UNUSED, diff --git a/packfile.h b/packfile.h index 447c44c4a7517d..b7964f0289705c 100644 --- a/packfile.h +++ b/packfile.h @@ -343,6 +343,21 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, unsigned flags); +/* + * Iterate through all packed objects in the given packfile store and invoke + * the callback function for each of them. If an object info request is given, + * then the object info will be read for every individual object and passed to + * the callback as if `packfile_store_read_object_info()` was called for the + * object. + * + * The flags parameter is a combination of `odb_for_each_object_flags`. + */ +int packfile_store_for_each_object(struct packfile_store *store, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 From df2fbdfa553526062e5234286f60bd643941298a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:23 +0100 Subject: [PATCH 059/784] odb: introduce `odb_for_each_object()` Introduce a new function `odb_for_each_object()` that knows to iterate through all objects part of a given object database. This function is essentially a simple wrapper around the object database sources. Subsequent commits will adapt callers to use this new function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.h | 7 ++++--- odb.c | 29 +++++++++++++++++++++++++++++ odb.h | 20 ++++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/object-file.h b/object-file.h index 5b9641cd890175..b5eac0349ebc98 100644 --- a/object-file.h +++ b/object-file.h @@ -139,9 +139,10 @@ int for_each_loose_object(struct object_database *odb, /* * Iterate through all loose objects in the given object database source and - * invoke the callback function for each of them. If given, the object info - * will be populated with the object's data as if you had called - * `odb_source_loose_read_object_info()` on the object. + * invoke the callback function for each of them. If an object info request is + * given, then the object info will be read for every individual object and + * passed to the callback as if `odb_source_loose_read_object_info()` was + * called for the object. */ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, diff --git a/odb.c b/odb.c index ac70b6a099f588..13a415c2c3e415 100644 --- a/odb.c +++ b/odb.c @@ -995,6 +995,35 @@ int odb_freshen_object(struct object_database *odb, return 0; } +int odb_for_each_object(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, request, + cb, cb_data, flags); + if (ret) + return ret; + } + + ret = packfile_store_for_each_object(source->packfiles, request, + cb, cb_data, flags); + if (ret) + return ret; + } + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index f97f249580e08a..b5d28bc188f957 100644 --- a/odb.h +++ b/odb.h @@ -475,6 +475,26 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Iterate through all objects contained in the object database. Note that + * objects may be iterated over multiple times in case they are either stored + * in different backends or in case they are stored in multiple sources. + * If an object info request is given, then the object info will be read and + * passed to the callback as if `odb_read_object_info()` was called for the + * object. + * + * Returning a non-zero error code from the callback function will cause + * iteration to abort. The error code will be propagated. + * + * Returns 0 on success, a negative error code in case a failure occurred, or + * an arbitrary non-zero error code returned by the callback itself. + */ +int odb_for_each_object(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + enum { /* * By default, `odb_write_object()` does not actually write anything From cc47e3d38c5be2969df3dba6814ee0e685a07de2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:24 +0100 Subject: [PATCH 060/784] builtin/fsck: refactor to use `odb_for_each_object()` In git-fsck(1) we have two callsites where we iterate over all objects via `for_each_loose_object()` and `for_each_packed_object()`. Both of these are trivially convertible with `odb_for_each_object()`. Refactor these callsites accordingly. Note that `odb_for_each_object()` may iterate over the same object multiple times, for example when it exists both in packed and loose format. But this has already been the case beforehand, so this does not result in a change in behaviour. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 57 +++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 45 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 4979bc795e5d61..2ebe77d58e5c0e 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED, return 0; } -static void mark_unreachable_referents(const struct object_id *oid) +static int mark_unreachable_referents(const struct object_id *oid, + struct object_info *oi UNUSED, + void *data UNUSED) { struct fsck_options options = FSCK_OPTIONS_DEFAULT; struct object *obj = lookup_object(the_repository, oid); if (!obj || !(obj->flags & HAS_OBJ)) - return; /* not part of our original set */ + return 0; /* not part of our original set */ if (obj->flags & REACHABLE) - return; /* reachable objects already traversed */ + return 0; /* reachable objects already traversed */ /* * Avoid passing OBJ_NONE to fsck_walk, which will parse the object @@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid) fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) free_tree_buffer((struct tree *)obj); -} -static int mark_loose_unreachable_referents(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); - return 0; -} - -static int mark_packed_unreachable_referents(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); return 0; } @@ -394,12 +381,8 @@ static void check_connectivity(void) * and ignore any that weren't present in our earlier * traversal. */ - for_each_loose_object(the_repository->objects, - mark_loose_unreachable_referents, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_unreachable_referents, - NULL, - 0); + odb_for_each_object(the_repository->objects, NULL, + mark_unreachable_referents, NULL, 0); } /* Look up all the requirements, warn about missing objects.. */ @@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path, fsck_resolve_undo(istate, index_path); } -static void mark_object_for_connectivity(const struct object_id *oid) +static int mark_object_for_connectivity(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data UNUSED) { struct object *obj = lookup_unknown_object(the_repository, oid); obj->flags |= HAS_OBJ; -} - -static int mark_loose_for_connectivity(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); - return 0; -} - -static int mark_packed_for_connectivity(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); return 0; } @@ -1001,10 +970,8 @@ int cmd_fsck(int argc, fsck_refs(the_repository); if (connectivity_only) { - for_each_loose_object(the_repository->objects, - mark_loose_for_connectivity, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_for_connectivity, NULL, 0); + odb_for_each_object(the_repository->objects, NULL, + mark_object_for_connectivity, NULL, 0); } else { odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) From 2813c97310a998510ad4bcbbf38a774fd6bb5386 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:25 +0100 Subject: [PATCH 061/784] treewide: enumerate promisor objects via `odb_for_each_object()` We have multiple callsites where we enumerate all promisor objects in the object database via `for_each_packed_object()`. This is done by passing the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY` flag, which causes us to skip over all non-promisor objects. These callsites can be trivially converted to `odb_for_each_object()` as we know to skip enumeration of loose objects in case the `PROMISOR_ONLY` flag was passed by the caller. Refactor the sites accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 37 ++++++++++++++++++++++--------------- repack-promisor.c | 8 ++++---- revision.c | 10 ++++------ 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/packfile.c b/packfile.c index c35d5ea6552ec3..c54deabd645075 100644 --- a/packfile.c +++ b/packfile.c @@ -2411,28 +2411,32 @@ int packfile_store_for_each_object(struct packfile_store *store, return pack_errors ? -1 : 0; } +struct add_promisor_object_data { + struct repository *repo; + struct oidset *set; +}; + static int add_promisor_object(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos UNUSED, - void *set_) + struct object_info *oi UNUSED, + void *cb_data) { - struct oidset *set = set_; + struct add_promisor_object_data *data = cb_data; struct object *obj; int we_parsed_object; - obj = lookup_object(pack->repo, oid); + obj = lookup_object(data->repo, oid); if (obj && obj->parsed) { we_parsed_object = 0; } else { we_parsed_object = 1; - obj = parse_object_with_flags(pack->repo, oid, + obj = parse_object_with_flags(data->repo, oid, PARSE_OBJECT_SKIP_HASH_CHECK); } if (!obj) return 1; - oidset_insert(set, oid); + oidset_insert(data->set, oid); /* * If this is a tree, commit, or tag, the objects it refers @@ -2450,19 +2454,19 @@ static int add_promisor_object(const struct object_id *oid, */ return 0; while (tree_entry_gently(&desc, &entry)) - oidset_insert(set, &entry.oid); + oidset_insert(data->set, &entry.oid); if (we_parsed_object) free_tree_buffer(tree); } else if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; struct commit_list *parents = commit->parents; - oidset_insert(set, get_commit_tree_oid(commit)); + oidset_insert(data->set, get_commit_tree_oid(commit)); for (; parents; parents = parents->next) - oidset_insert(set, &parents->item->object.oid); + oidset_insert(data->set, &parents->item->object.oid); } else if (obj->type == OBJ_TAG) { struct tag *tag = (struct tag *) obj; - oidset_insert(set, get_tagged_oid(tag)); + oidset_insert(data->set, get_tagged_oid(tag)); } return 0; } @@ -2474,10 +2478,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (!promisor_objects_prepared) { if (repo_has_promisor_remote(r)) { - for_each_packed_object(r, add_promisor_object, - &promisor_objects, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | - ODB_FOR_EACH_OBJECT_PACK_ORDER); + struct add_promisor_object_data data = { + .repo = r, + .set = &promisor_objects, + }; + + odb_for_each_object(r->objects, NULL, add_promisor_object, &data, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } diff --git a/repack-promisor.c b/repack-promisor.c index 45c330b9a53ae6..35c4073632b1b4 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -17,8 +17,8 @@ struct write_oid_context { * necessary. */ static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) + struct object_info *oi UNUSED, + void *data) { struct write_oid_context *ctx = data; struct child_process *cmd = ctx->cmd; @@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo, */ ctx.cmd = &cmd; ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); + odb_for_each_object(repo->objects, NULL, write_oid, &ctx, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/revision.c b/revision.c index 5aadf46dac2b9d..e34bcd8e888127 100644 --- a/revision.c +++ b/revision.c @@ -3626,8 +3626,7 @@ void reset_revision_walk(void) } static int mark_uninteresting(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, + struct object_info *oi UNUSED, void *cb) { struct rev_info *revs = cb; @@ -3936,10 +3935,9 @@ int prepare_revision_walk(struct rev_info *revs) (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; - if (revs->exclude_promisor_objects) { - for_each_packed_object(revs->repo, mark_uninteresting, revs, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); - } + if (revs->exclude_promisor_objects) + odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting, + revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (!revs->reflog_info) prepare_to_use_bloom_filter(revs); From 317ea9a6c3c134a1bcdee49bbbbf1731c17b967a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:26 +0100 Subject: [PATCH 062/784] treewide: drop uses of `for_each_{loose,packed}_object()` We're using `for_each_loose_object()` and `for_each_packed_object()` at a couple of callsites to enumerate all loose and packed objects, respectively. These functions will be removed in a subsequent commit in favor of the newly introduced `odb_source_loose_for_each_object()` and `packfile_store_for_each_object()` replacements. Prepare for this by refactoring the sites accordingly. Note that ideally, we'd convert all callsites to use the generic `odb_for_each_object()` function already. But for some callers this is not possible (yet), and it would require some significant refactorings to make this work. Converting these site will thus be deferred to a later patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 34 ++++++++++++++++++++++++++++------ commit-graph.c | 44 +++++++++++++++++++++++++++++++------------- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 6964a5a52c1646..e2c63dbedf48fc 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -806,11 +806,14 @@ struct for_each_object_payload { void *payload; }; -static int batch_one_object_loose(const struct object_id *oid, - const char *path UNUSED, - void *_payload) +static int batch_one_object_oi(const struct object_id *oid, + struct object_info *oi, + void *_payload) { struct for_each_object_payload *payload = _payload; + if (oi && oi->whence == OI_PACKED) + return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset, + payload->payload); return payload->callback(oid, NULL, 0, payload->payload); } @@ -846,8 +849,21 @@ static void batch_each_object(struct batch_options *opt, .payload = _payload, }; struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + struct odb_source *source; - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0); + /* + * TODO: we still need to tap into implementation details of the object + * database sources. Ideally, we should extend `odb_for_each_object()` + * to handle object filters itself so that we can move the filtering + * logic into the individual sources. + */ + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, + &payload, flags); + if (ret) + break; + } if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { @@ -861,8 +877,14 @@ static void batch_each_object(struct batch_options *opt, &payload, flags); } } else { - for_each_packed_object(the_repository, batch_one_object_packed, - &payload, flags); + struct object_info oi = { 0 }; + + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = packfile_store_for_each_object(source->packfiles, &oi, + batch_one_object_oi, &payload, flags); + if (ret) + break; + } } free_bitmap_index(bitmap); diff --git a/commit-graph.c b/commit-graph.c index 7f1145a0821cbb..a3087d78835677 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1479,30 +1479,38 @@ static int write_graph_chunk_bloom_data(struct hashfile *f, return 0; } +static int add_packed_commits_oi(const struct object_id *oid, + struct object_info *oi, + void *data) +{ + struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; + + if (ctx->progress) + display_progress(ctx->progress, ++ctx->progress_done); + + if (*oi->typep != OBJ_COMMIT) + return 0; + + oid_array_append(&ctx->oids, oid); + set_commit_pos(ctx->r, oid); + + return 0; +} + static int add_packed_commits(const struct object_id *oid, struct packed_git *pack, uint32_t pos, void *data) { - struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; enum object_type type; off_t offset = nth_packed_object_offset(pack, pos); struct object_info oi = OBJECT_INFO_INIT; - if (ctx->progress) - display_progress(ctx->progress, ++ctx->progress_done); - oi.typep = &type; if (packed_object_info(pack, offset, &oi) < 0) die(_("unable to get type of object %s"), oid_to_hex(oid)); - if (type != OBJ_COMMIT) - return 0; - - oid_array_append(&ctx->oids, oid); - set_commit_pos(ctx->r, oid); - - return 0; + return add_packed_commits_oi(oid, &oi, data); } static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit) @@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx, static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { + struct odb_source *source; + enum object_type type; + struct object_info oi = { + .typep = &type, + }; + if (ctx->report_progress) ctx->progress = start_delayed_progress( ctx->r, _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); - for_each_packed_object(ctx->r, add_packed_commits, ctx, - ODB_FOR_EACH_OBJECT_PACK_ORDER); + + odb_prepare_alternates(ctx->r->objects); + for (source = ctx->r->objects->sources; source; source = source->next) + packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi, + ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); From 7b7cbaef2781cf755bc900e871964ae62ad532c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:27 +0100 Subject: [PATCH 063/784] odb: introduce mtime fields for object info requests There are some use cases where we need to figure out the mtime for objects. Most importantly, this is the case when we want to prune unreachable objects. But getting at that data requires users to manually derive the info either via the loose object's mtime, the packfiles' mtime or via the ".mtimes" file. Introduce a new `struct object_info::mtimep` pointer that allows callers to request an object's mtime. This new field will be used in a subsequent commit. Note that the concept of "mtime" is ambiguous: given an object, it may be stored multiple times in the object database, and each of these instances may have a different mtime. Disambiguating these mtimes is nothing that can happen on the generic ODB layer: the caller may search for the oldest object, the newest object, or even the relation of object mtimes depending on the specific source they are located in. As such, it is the responsibility of the caller to disambiguate mtimes. A consequence of this is that it's most likely incorrect to look up the mtime via `odb_read_object_info()`, as this interface does not give us enough information to disambiguate the mtime. Document this accordingly and tell users to use `odb_for_each_object()` instead. Even with this gotcha though it's sensible to have this request as part of the object info, as the mtime is a property of the object storage format. If we for example had a "black-box" storage backend, we'd still need to be able to query it for the mtime info in a generic way. We could introduce a safety mechanism that for example calls `BUG()` in case we look up the mtime outside of `odb_for_each_object()`. But that feels somewhat heavy-handed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 29 +++++++++++++++++++++++++---- odb.c | 2 ++ odb.h | 13 +++++++++++++ packfile.c | 41 ++++++++++++++++++++++++++++++++++------- 4 files changed, 74 insertions(+), 11 deletions(-) diff --git a/object-file.c b/object-file.c index ef2c7618c17a64..5537ab2c370992 100644 --- a/object-file.c +++ b/object-file.c @@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source, char hdr[MAX_HEADER_LEN]; unsigned long size_scratch; enum object_type type_scratch; + struct stat st; /* * If we don't care about type or size, then we don't @@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source, if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { struct stat st; - if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) { + if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { ret = quick_has_loose(source->loose, oid) ? 0 : -1; goto out; } @@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source, goto out; } - if (oi && oi->disk_sizep) - *oi->disk_sizep = st.st_size; + if (oi) { + if (oi->disk_sizep) + *oi->disk_sizep = st.st_size; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + } ret = 0; goto out; @@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source, goto out; } - map = map_fd(fd, path, &mapsize); + if (fstat(fd, &st)) { + close(fd); + ret = -1; + goto out; + } + + mapsize = xsize_t(st.st_size); + if (!mapsize) { + close(fd); + ret = error(_("object file %s is empty"), path); + goto out; + } + + map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); if (!map) { ret = -1; goto out; @@ -454,6 +473,8 @@ static int read_object_info_from_path(struct odb_source *source, if (oi->disk_sizep) *oi->disk_sizep = mapsize; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; stream_to_end = &stream; diff --git a/odb.c b/odb.c index 13a415c2c3e415..9d9a3fad627369 100644 --- a/odb.c +++ b/odb.c @@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb, oidclr(oi->delta_base_oid, odb->repo->hash_algo); if (oi->contentp) *oi->contentp = xmemdupz(co->buf, co->size); + if (oi->mtimep) + *oi->mtimep = 0; oi->whence = OI_CACHED; } return 0; diff --git a/odb.h b/odb.h index b5d28bc188f957..8ad0fcc02f148d 100644 --- a/odb.h +++ b/odb.h @@ -318,6 +318,19 @@ struct object_info { struct object_id *delta_base_oid; void **contentp; + /* + * The time the given looked-up object has been last modified. + * + * Note: the mtime may be ambiguous in case the object exists multiple + * times in the object database. It is thus _not_ recommended to use + * this field outside of contexts where you would read every instance + * of the object, like for example with `odb_for_each_object()`. As it + * is impossible to say at the ODB level what the intent of the caller + * is (e.g. whether to find the oldest or newest object), it is the + * responsibility of the caller to disambiguate the mtimes. + */ + time_t *mtimep; + /* Response */ enum { OI_CACHED, diff --git a/packfile.c b/packfile.c index c54deabd645075..845633139f9942 100644 --- a/packfile.c +++ b/packfile.c @@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, &ent->ent); } -int packed_object_info(struct packed_git *p, - off_t obj_offset, struct object_info *oi) +static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset, + uint32_t *maybe_index_pos, struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; off_t curpos = obj_offset; enum object_type type = OBJ_NONE; + uint32_t pack_pos; int ret; /* @@ -1619,16 +1620,35 @@ int packed_object_info(struct packed_git *p, } } - if (oi->disk_sizep) { - uint32_t pos; - if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { + if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) { + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { error("could not find object at offset %"PRIuMAX" " "in pack %s", (uintmax_t)obj_offset, p->pack_name); ret = -1; goto out; } + } + + if (oi->disk_sizep) + *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; + + if (oi->mtimep) { + if (p->is_cruft) { + uint32_t index_pos; + + if (load_pack_mtimes(p) < 0) + die(_("could not load .mtimes for cruft pack '%s'"), + pack_basename(p)); + + if (maybe_index_pos) + index_pos = *maybe_index_pos; + else + index_pos = pack_pos_to_index(p, pack_pos); - *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; + *oi->mtimep = nth_packed_mtime(p, index_pos); + } else { + *oi->mtimep = p->mtime; + } } if (oi->typep) { @@ -1681,6 +1701,12 @@ int packed_object_info(struct packed_git *p, return ret; } +int packed_object_info(struct packed_git *p, off_t obj_offset, + struct object_info *oi) +{ + return packed_object_info_with_index_pos(p, obj_offset, NULL, oi); +} + static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, off_t curpos, @@ -2378,7 +2404,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, off_t offset = nth_packed_object_offset(pack, index_pos); struct object_info oi = *data->request; - if (packed_object_info(pack, offset, &oi) < 0) { + if (packed_object_info_with_index_pos(pack, offset, + &index_pos, &oi) < 0) { mark_bad_packed_object(pack, oid); return -1; } From dd097bbe295d58fa698708d3754426f664fdfe02 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:28 +0100 Subject: [PATCH 064/784] builtin/pack-objects: use `packfile_store_for_each_object()` When enumerating objects that are supposed to be stored in a new cruft pack we use `for_each_packed_object()` and then derive each object's mtime individually. Refactor this logic to instead use the new `packfile_store_for_each_object()` function with an object info request that asks for the respective mtimes. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 46 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 74317051fdf7f6..a6d37366ffa36f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit) } static int add_object_in_unpacked_pack(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, + struct object_info *oi, void *data UNUSED) { if (cruft) { - off_t offset; - time_t mtime; - - if (pack->is_cruft) { - if (load_pack_mtimes(pack) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(pack, pos); - } else { - mtime = pack->mtime; - } - offset = nth_packed_object_offset(pack, pos); - - add_cruft_object_entry(oid, OBJ_NONE, pack, offset, - NULL, mtime); + add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack, + oi->u.packed.offset, NULL, *oi->mtimep); } else { add_object_entry(oid, OBJ_NONE, "", 0); } @@ -4341,14 +4328,25 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, static void add_objects_in_unpacked_packs(void) { - if (for_each_packed_object(to_pack.repo, - add_object_in_unpacked_pack, - NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) - die(_("cannot open pack index")); + struct odb_source *source; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + }; + + odb_prepare_alternates(to_pack.repo->objects); + for (source = to_pack.repo->objects->sources; source; source = source->next) { + if (!source->local) + continue; + + if (packfile_store_for_each_object(source->packfiles, &oi, + add_object_in_unpacked_pack, NULL, + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + die(_("cannot open pack index")); + } } static int add_loose_object(const struct object_id *oid, const char *path, From 7a8582c82ce896d89bbcc1d91d8b5bdc31902416 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:29 +0100 Subject: [PATCH 065/784] reachable: convert to use `odb_for_each_object()` To figure out which objects expired objects we enumerate all loose and packed objects individually so that we can figure out their respective mtimes. Refactor the code to instead use `odb_for_each_object()` with a request that ask for the object mtime instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reachable.c | 125 +++++++++++++++------------------------------------- 1 file changed, 35 insertions(+), 90 deletions(-) diff --git a/reachable.c b/reachable.c index 82676b2668090d..101cfc272715fb 100644 --- a/reachable.c +++ b/reachable.c @@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime, return oidset_contains(&data->extra_recent_oids, oid); } -static void add_recent_object(const struct object_id *oid, - struct packed_git *pack, - off_t offset, - timestamp_t mtime, - struct recent_data *data) +static int want_recent_object(struct recent_data *data, + const struct object_id *oid) { - struct object *obj; - enum object_type type; + if (data->ignore_in_core_kept_packs && + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + return 0; + return 1; +} - if (!obj_is_recent(oid, mtime, data)) - return; +static int add_recent_object(const struct object_id *oid, + struct object_info *oi, + void *cb_data) +{ + struct recent_data *data = cb_data; + struct object *obj; - /* - * We do not want to call parse_object here, because - * inflating blobs and trees could be very expensive. - * However, we do need to know the correct type for - * later processing, and the revision machinery expects - * commits and tags to have been parsed. - */ - type = odb_read_object_info(the_repository->objects, oid, NULL); - if (type < 0) - die("unable to get object info for %s", oid_to_hex(oid)); + if (!want_recent_object(data, oid) || + !obj_is_recent(oid, *oi->mtimep, data)) + return 0; - switch (type) { + switch (*oi->typep) { case OBJ_TAG: case OBJ_COMMIT: obj = parse_object_or_die(the_repository, oid, NULL); @@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid, break; default: die("unknown object type for %s: %s", - oid_to_hex(oid), type_name(type)); + oid_to_hex(oid), type_name(*oi->typep)); } if (!obj) die("unable to lookup %s", oid_to_hex(oid)); - - add_pending_object(data->revs, obj, ""); - if (data->cb) - data->cb(obj, pack, offset, mtime); -} - -static int want_recent_object(struct recent_data *data, - const struct object_id *oid) -{ - if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + if (obj->flags & SEEN) return 0; - return 1; -} -static int add_recent_loose(const struct object_id *oid, - const char *path, void *data) -{ - struct stat st; - struct object *obj; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - - if (stat(path, &st) < 0) { - /* - * It's OK if an object went away during our iteration; this - * could be due to a simultaneous repack. But anything else - * we should abort, since we might then fail to mark objects - * which should not be pruned. - */ - if (errno == ENOENT) - return 0; - return error_errno("unable to stat %s", oid_to_hex(oid)); + add_pending_object(data->revs, obj, ""); + if (data->cb) { + if (oi->whence == OI_PACKED) + data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep); + else + data->cb(obj, NULL, 0, *oi->mtimep); } - add_recent_object(oid, NULL, 0, st.st_mtime, data); - return 0; -} - -static int add_recent_packed(const struct object_id *oid, - struct packed_git *p, - uint32_t pos, - void *data) -{ - struct object *obj; - timestamp_t mtime = p->mtime; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - if (p->is_cruft) { - if (load_pack_mtimes(p) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(p, pos); - } - add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data); return 0; } @@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum odb_for_each_object_flags flags; + unsigned flags; + enum object_type type; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + .typep = &type, + }; int r; data.revs = revs; @@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, oidset_init(&data.extra_recent_oids, 0); data.extra_recent_oids_loaded = 0; - r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - ODB_FOR_EACH_OBJECT_LOCAL_ONLY); - if (r) - goto done; - flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; - r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); + r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags); + if (r) + goto done; done: oidset_clear(&data.extra_recent_oids); From 3565faf28c2059c6260d53ac71a303b1c04b0a7b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 26 Jan 2026 10:51:30 +0100 Subject: [PATCH 066/784] odb: drop unused `for_each_{loose,packed}_object()` functions We have converted all callers of `for_each_loose_object()` and `for_each_packed_object()` to use their new replacement functions instead. We can thus remove them now. Do so and inline `packfile_store_for_each_object_internal()` now that it only has a single callsite again. This makes it a bit easier to follow the callback indirection that is happening there. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 20 ----------- object-file.h | 11 ------ packfile.c | 99 ++++++++++++++++++--------------------------------- packfile.h | 2 -- 4 files changed, 35 insertions(+), 97 deletions(-) diff --git a/object-file.c b/object-file.c index 5537ab2c370992..6785821c8c9fc0 100644 --- a/object-file.c +++ b/object-file.c @@ -1802,26 +1802,6 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn cb, void *data, - enum odb_for_each_object_flags flags) -{ - struct odb_source *source; - - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - int r = for_each_loose_file_in_source(source, cb, NULL, - NULL, data); - if (r) - return r; - - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) - break; - } - - return 0; -} - struct for_each_object_wrapper_data { struct odb_source *source; const struct object_info *request; diff --git a/object-file.h b/object-file.h index b5eac0349ebc98..d9979baea8e39c 100644 --- a/object-file.h +++ b/object-file.h @@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source, each_loose_subdir_fn subdir_cb, void *data); -/* - * Iterate over all accessible loose objects without respect to - * reachability. By default, this includes both local and alternate objects. - * The order in which objects are visited is unspecified. - * - * Any flags specific to packs are ignored. - */ -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn, void *, - enum odb_for_each_object_flags flags); - /* * Iterate through all loose objects in the given object database source and * invoke the callback function for each of them. If an object info request is diff --git a/packfile.c b/packfile.c index 845633139f9942..57fbf518762f2f 100644 --- a/packfile.c +++ b/packfile.c @@ -2327,65 +2327,6 @@ int for_each_object_in_pack(struct packed_git *p, return r; } -static int packfile_store_for_each_object_internal(struct packfile_store *store, - each_packed_object_fn cb, - void *data, - unsigned flags, - int *pack_errors) -{ - struct packfile_list_entry *e; - int ret = 0; - - store->skip_mru_updates = true; - - for (e = packfile_store_get_packs(store); e; e = e->next) { - struct packed_git *p = e->pack; - - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - *pack_errors = 1; - continue; - } - - ret = for_each_object_in_pack(p, cb, data, flags); - if (ret) - break; - } - - store->skip_mru_updates = false; - - return ret; -} - -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags) -{ - struct odb_source *source; - int pack_errors = 0; - int ret = 0; - - odb_prepare_alternates(repo->objects); - - for (source = repo->objects->sources; source; source = source->next) { - ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, - flags, &pack_errors); - if (ret) - break; - } - - return ret ? ret : pack_errors; -} - struct packfile_store_for_each_object_wrapper_data { struct packfile_store *store; const struct object_info *request; @@ -2428,14 +2369,44 @@ int packfile_store_for_each_object(struct packfile_store *store, .cb = cb, .cb_data = cb_data, }; + struct packfile_list_entry *e; int pack_errors = 0, ret; - ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, - &data, flags, &pack_errors); - if (ret) - return ret; + store->skip_mru_updates = true; + + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper, + &data, flags); + if (ret) + goto out; + } + + ret = 0; - return pack_errors ? -1 : 0; +out: + store->skip_mru_updates = false; + + if (!ret && pack_errors) + ret = -1; + return ret; } struct add_promisor_object_data { diff --git a/packfile.h b/packfile.h index b7964f0289705c..1a1b72076457f1 100644 --- a/packfile.h +++ b/packfile.h @@ -340,8 +340,6 @@ typedef int each_packed_object_fn(const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, unsigned flags); -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags); /* * Iterate through all packed objects in the given packfile store and invoke From c27afcbfd0f440f410758432e2fe11a16fb2b360 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 26 Jan 2026 10:48:51 +0000 Subject: [PATCH 067/784] xdiff: remove "line_hash" field from xrecord_t Prior to commit 6a26019c81 (xdiff: split xrecord_t.ha into line_hash and minimal_perfect_hash, 2025-11-18) the "ha" field of xrecord_t initially held the "line_hash" value and once the line had been interned that field was updated to hold the "minimal_perfect_hash". The "line_hash" is only used to intern the line so there is no point in storing it after all the input lines have been interned. Removing the "line_hash" field from xrecord_t and storing it in xdlclass_t where it is actually used makes it clearer that it is a temporary value and it should not be used once we're calculated the "minimal_perfect_hash". This also reduces the size of xrecord_t by 25% on 64-bit platforms and 40% on 32-bit platforms. While the struct is small we create one instance per input line so any saving is welcome. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 12 +++++++----- xdiff/xtypes.h | 1 - 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 34c82e4f8e1626..08e5d3f4dfafdc 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -34,6 +34,7 @@ #define INVESTIGATE 2 typedef struct s_xdlclass { + uint64_t line_hash; struct s_xdlclass *next; xrecord_t rec; long idx; @@ -92,13 +93,14 @@ static void xdl_free_classifier(xdlclassifier_t *cf) { } -static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) { +static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec, + uint64_t line_hash) { size_t hi; xdlclass_t *rcrec; - hi = XDL_HASHLONG(rec->line_hash, cf->hbits); + hi = XDL_HASHLONG(line_hash, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) - if (rcrec->rec.line_hash == rec->line_hash && + if (rcrec->line_hash == line_hash && xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size, (const char *)rec->ptr, (long)rec->size, cf->flags)) break; @@ -112,6 +114,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t if (XDL_ALLOC_GROW(cf->rcrecs, cf->count, cf->alloc)) return -1; cf->rcrecs[rcrec->idx] = rcrec; + rcrec->line_hash = line_hash; rcrec->rec = *rec; rcrec->len1 = rcrec->len2 = 0; rcrec->next = cf->rchash[hi]; @@ -158,8 +161,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ crec = &xdf->recs[xdf->nrec++]; crec->ptr = prev; crec->size = cur - prev; - crec->line_hash = hav; - if (xdl_classify_record(pass, cf, crec) < 0) + if (xdl_classify_record(pass, cf, crec, hav) < 0) goto abort; } } diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 979586f20a6028..50aee779be3583 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -41,7 +41,6 @@ typedef struct s_chastore { typedef struct s_xrecord { uint8_t const *ptr; size_t size; - uint64_t line_hash; size_t minimal_perfect_hash; } xrecord_t; From 5086213bd2f44fdc793fd8a081fd1c40a3267c44 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 26 Jan 2026 10:48:52 +0000 Subject: [PATCH 068/784] xdiff: remove unused data from xdlclass_t Prior to commit 6d507bd41a (xdiff: delete fields ha, line, size in xdlclass_t in favor of an xrecord_t, 2025-09-26) xdlclass_t carried a copy of all the fields in xrecord_t. That commit embedded xrecord_t in xdlclass_t to make it easier to change the types of the fields in xrecord_t. However commit 6a26019c81 (xdiff: split xrecord_t.ha into line_hash and minimal_perfect_hash, 2025-11-18) added the "minimal_perfect_hash" field to xrecord_t which is not used by xdlclass_t. To avoid wasting space stop copying the whole of xrecord_t and just copy the pointer and length that we need to intern the line. Together with the previous commit this effectively reverts 6d507bd41a. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 08e5d3f4dfafdc..cd4fc405eb18fe 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -36,7 +36,8 @@ typedef struct s_xdlclass { uint64_t line_hash; struct s_xdlclass *next; - xrecord_t rec; + const uint8_t *ptr; + size_t size; long idx; long len1, len2; } xdlclass_t; @@ -101,7 +102,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t hi = XDL_HASHLONG(line_hash, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) if (rcrec->line_hash == line_hash && - xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size, + xdl_recmatch((const char *)rcrec->ptr, (long)rcrec->size, (const char *)rec->ptr, (long)rec->size, cf->flags)) break; @@ -115,7 +116,8 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t return -1; cf->rcrecs[rcrec->idx] = rcrec; rcrec->line_hash = line_hash; - rcrec->rec = *rec; + rcrec->ptr = rec->ptr; + rcrec->size = rec->size; rcrec->len1 = rcrec->len2 = 0; rcrec->next = cf->rchash[hi]; cf->rchash[hi] = rcrec; From 6f5ca70580444e61231ad33de0aac81d2420d523 Mon Sep 17 00:00:00 2001 From: Sam Bostock Date: Wed, 28 Jan 2026 05:39:45 +0000 Subject: [PATCH 069/784] worktree: clarify that --expire only affects missing worktrees The --expire option for "git worktree list" and "git worktree prune" only affects worktrees whose working directory path no longer exists. The help text did not make this clear, and the documentation inconsistently used "unused" for prune but "missing" for list. Update the help text and documentation to consistently describe these as "missing worktrees", and use "prune" instead of "expire" when describing the effect on missing worktrees since the terminology is clearer. While at it, expand the description of the "prune" subcommand itself to better explain what it does and when to use it, as suggested by Junio. Helped-by: Eric Sunshine Helped-by: Junio C Hamano Signed-off-by: Sam Bostock Signed-off-by: Junio C Hamano --- Documentation/git-worktree.adoc | 10 ++++++++-- builtin/worktree.c | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/git-worktree.adoc b/Documentation/git-worktree.adoc index f272f797837f45..22ef37ec0283c0 100644 --- a/Documentation/git-worktree.adoc +++ b/Documentation/git-worktree.adoc @@ -131,7 +131,13 @@ with linked worktrees if you move the main worktree manually.) `prune`:: -Prune worktree information in `$GIT_DIR/worktrees`. +Remove worktree information in `$GIT_DIR/worktrees` for worktrees +whose working trees are missing. Useful after manually removing +a working tree that is no longer needed (but use "git worktree +remove" next time you want to do so). Also, if you _moved_ a +working tree elsewhere causing the worktree information to become +dangling, see "git worktree repair" to reconnect the worktree to +the new working tree location. `remove`:: @@ -271,7 +277,7 @@ mismatch, even if the links are correct. With `list`, output additional information about worktrees (see below). `--expire