From 3f203359fb4a269c581594dfd0673d32603181c7 Mon Sep 17 00:00:00 2001 From: Zaki Shaikh Date: Fri, 19 Dec 2025 12:57:49 +0530 Subject: [PATCH] feat: introduce deduplicate-pipelineruns setting Introduce a new 'deduplicate-pipelineruns' setting to prevent duplicate PipelineRuns for the same commit across different event types (e.g., push and pull_request). - Implement fingerprint generation using MD5 hash of commit SHA, PipelineRun name, head branch, and repository name. - Store the fingerprint as a label on the PipelineRun resource. - Check for existing non-completed PipelineRuns with the same fingerprint before creating a new one. - Deprecate the 'skip-push-event-for-pr-commits' setting and add a warning log when it is used. - Update the default ConfigMap and settings documentation. - Add comprehensive unit tests for deduplication logic and deprecation warnings. Signed-off-by: Zaki Shaikh Assisted-by: Gemini (via Cursor) --- config/302-pac-configmap.yaml | 9 +- docs/content/docs/install/settings.md | 20 ++++- pkg/apis/pipelinesascode/keys/keys.go | 1 + pkg/params/settings/config.go | 4 + pkg/params/settings/config_test.go | 3 + pkg/pipelineascode/pipelineascode.go | 42 +++++++++ .../pipelineascode_startpr_test.go | 60 +++++++++++++ pkg/provider/github/parse_payload.go | 4 + pkg/provider/github/parse_payload_test.go | 7 +- test/github_deduplicate_pipelineruns_test.go | 86 +++++++++++++++++++ 10 files changed, 231 insertions(+), 5 deletions(-) create mode 100644 test/github_deduplicate_pipelineruns_test.go diff --git a/config/302-pac-configmap.yaml b/config/302-pac-configmap.yaml index 6174affe3b..2145227511 100644 --- a/config/302-pac-configmap.yaml +++ b/config/302-pac-configmap.yaml @@ -166,12 +166,17 @@ data: # Default: false require-ok-to-test-sha: "false" + # Deduplicate PipelineRuns using fingerprinting to prevent duplicate runs for the same commit + # Default: true + deduplicate-pipelineruns: "true" + + # DEPRECATED: use deduplicate-pipelineruns instead. # When enabled, this option prevents duplicate pipeline runs when a commit appears in # both a push event and a pull request. If a push event comes from a commit that is # part of an open pull request, the push event will be skipped as it would create # a duplicate pipeline run. - # Default: true - skip-push-event-for-pr-commits: "true" + # Default: false + skip-push-event-for-pr-commits: "false" # Configure a custom console here, the driver support custom parameters from # Repo CR along a few other template variable, see documentation for more diff --git a/docs/content/docs/install/settings.md b/docs/content/docs/install/settings.md index a85fd3bcd8..74f6726f30 100644 --- a/docs/content/docs/install/settings.md +++ b/docs/content/docs/install/settings.md @@ -152,7 +152,23 @@ There are a few things you can configure through the ConfigMap risk and should be aware of the potential security vulnerabilities. (only GitHub and Gitea is supported at the moment). -* `skip-push-event-for-pr-commits` +* `deduplicate-pipelineruns` + + When enabled, this option uses fingerprinting to prevent duplicate PipelineRuns + when the same commit would trigger multiple events (e.g., both a push event + and a pull request event). Whichever event arrives first will trigger the + PipelineRun, and subsequent events for the same commit and pipeline will be + deduplicated. + + The fingerprint is generated from the commit SHA, repository name, head branch, and + pipeline name, ensuring that unique pipelines for the same commit are still + allowed to run. + + Default: `true` + +* `skip-push-event-for-pr-commits` (DEPRECATED) + + **DEPRECATED: Use `deduplicate-pipelineruns` instead.** When enabled, this option prevents duplicate PipelineRuns when a commit appears in both a push event and a pull request. If a push event comes from a commit that is @@ -162,7 +178,7 @@ There are a few things you can configure through the ConfigMap This feature works by checking if a pushed commit SHA exists in any open pull request, and if so, skipping the push event processing. - Default: `true` + Default: `false` **Note:** This setting does not apply to git tag push events. Tag push events will always trigger pipeline runs regardless of whether the tagged commit is part of an open pull request. diff --git a/pkg/apis/pipelinesascode/keys/keys.go b/pkg/apis/pipelinesascode/keys/keys.go index 10febd501f..5708ae47d0 100644 --- a/pkg/apis/pipelinesascode/keys/keys.go +++ b/pkg/apis/pipelinesascode/keys/keys.go @@ -60,6 +60,7 @@ const ( CancelInProgress = pipelinesascode.GroupName + "/cancel-in-progress" LogURL = pipelinesascode.GroupName + "/log-url" ExecutionOrder = pipelinesascode.GroupName + "/execution-order" + PipelineRunFingerprint = pipelinesascode.GroupName + "/fingerprint" SCMReportingPLRStarted = pipelinesascode.GroupName + "/scm-reporting-plr-started" // PublicGithubAPIURL default is "https://api.github.com" but it can be overridden by X-GitHub-Enterprise-Host header. PublicGithubAPIURL = "https://api.github.com" diff --git a/pkg/params/settings/config.go b/pkg/params/settings/config.go index 30a4cc0cc7..363ad22ff0 100644 --- a/pkg/params/settings/config.go +++ b/pkg/params/settings/config.go @@ -71,6 +71,10 @@ type Settings struct { EnableCancelInProgressOnPullRequests bool `json:"enable-cancel-in-progress-on-pull-requests"` EnableCancelInProgressOnPush bool `json:"enable-cancel-in-progress-on-push"` + // DeduplicatePipelineRuns uses fingerprinting to prevent duplicate PipelineRuns for the same commit + DeduplicatePipelineRuns bool `json:"deduplicate-pipelineruns" default:"true"` //nolint:tagalign + + // SkipPushEventForPRCommits is deprecated, use DeduplicatePipelineRuns instead SkipPushEventForPRCommits bool `json:"skip-push-event-for-pr-commits" default:"true"` // nolint:tagalign CustomConsoleName string `json:"custom-console-name"` diff --git a/pkg/params/settings/config_test.go b/pkg/params/settings/config_test.go index e8e4cdaa09..1a008203eb 100644 --- a/pkg/params/settings/config_test.go +++ b/pkg/params/settings/config_test.go @@ -26,6 +26,7 @@ func TestSyncConfig(t *testing.T) { RemoteTasks: true, MaxKeepRunsUpperLimit: 0, DefaultMaxKeepRuns: 0, + DeduplicatePipelineRuns: true, BitbucketCloudCheckSourceIP: true, BitbucketCloudAdditionalSourceIP: "", TektonDashboardURL: "", @@ -59,6 +60,7 @@ func TestSyncConfig(t *testing.T) { "default-max-keep-runs": "5", "bitbucket-cloud-check-source-ip": "false", "bitbucket-cloud-additional-source-ip": "some-ip", + "deduplicate-pipelineruns": "true", "tekton-dashboard-url": "https://tekton-dashboard", "auto-configure-new-github-repo": "true", "auto-configure-repo-namespace-template": "template", @@ -85,6 +87,7 @@ func TestSyncConfig(t *testing.T) { RemoteTasks: false, MaxKeepRunsUpperLimit: 10, DefaultMaxKeepRuns: 5, + DeduplicatePipelineRuns: true, BitbucketCloudCheckSourceIP: false, BitbucketCloudAdditionalSourceIP: "some-ip", TektonDashboardURL: "https://tekton-dashboard", diff --git a/pkg/pipelineascode/pipelineascode.go b/pkg/pipelineascode/pipelineascode.go index fd91b49580..249c6972fa 100644 --- a/pkg/pipelineascode/pipelineascode.go +++ b/pkg/pipelineascode/pipelineascode.go @@ -2,7 +2,10 @@ package pipelineascode import ( "context" + "crypto/md5" //nolint:gosec + "encoding/hex" "fmt" + "strings" "sync" "github.com/openshift-pipelines/pipelines-as-code/pkg/action" @@ -172,9 +175,48 @@ func (p *PacRun) Run(ctx context.Context) error { return nil } +func (p *PacRun) generateFingerprint(pr *tektonv1.PipelineRun, repoName string) string { + prName := pr.GetName() + if prName == "" { + prName = pr.GetGenerateName() + } + + headBranch := strings.TrimPrefix(p.event.HeadBranch, "refs/heads/") + headBranch = strings.TrimPrefix(headBranch, "refs/tags/") + + data := fmt.Sprintf("%s-%s-%s-%s", p.event.SHA, prName, headBranch, repoName) + hash := md5.Sum([]byte(data)) //nolint:gosec + return hex.EncodeToString(hash[:])[:16] +} + func (p *PacRun) startPR(ctx context.Context, match matcher.Match) (*tektonv1.PipelineRun, error) { var gitAuthSecretName string + if p.pacInfo.DeduplicatePipelineRuns { + fingerprint := p.generateFingerprint(match.PipelineRun, match.Repo.GetName()) + + // Check for existing run with same fingerprint + labelSelector := fmt.Sprintf("%s=%s", keys.PipelineRunFingerprint, fingerprint) + existing, err := p.run.Clients.Tekton.TektonV1().PipelineRuns(match.Repo.GetNamespace()).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err == nil { + for i := range existing.Items { + pr := &existing.Items[i] + if !pr.IsDone() { + p.logger.Infof("Skipping duplicate PipelineRun - fingerprint %s already exists: %s", + fingerprint, pr.GetName()) + return pr, nil + } + } + } + // Add fingerprint to labels so it gets created with it + if match.PipelineRun.Labels == nil { + match.PipelineRun.Labels = map[string]string{} + } + match.PipelineRun.Labels[keys.PipelineRunFingerprint] = fingerprint + } + // Automatically create a secret with the token to be reused by git-clone task if p.pacInfo.SecretAutoCreation { if annotation, ok := match.PipelineRun.GetAnnotations()[keys.GitAuthSecret]; ok { diff --git a/pkg/pipelineascode/pipelineascode_startpr_test.go b/pkg/pipelineascode/pipelineascode_startpr_test.go index d80718f50f..542829b944 100644 --- a/pkg/pipelineascode/pipelineascode_startpr_test.go +++ b/pkg/pipelineascode/pipelineascode_startpr_test.go @@ -1069,3 +1069,63 @@ func TestStartPR_ConcurrentWithSameSecret(t *testing.T) { assert.Assert(t, attempts >= 1, "Secret creation should have been attempted at least once, got %d attempts", attempts) t.Logf("Successfully created %d/%d concurrent PipelineRuns with shared secret (%d creation attempts)", successCount, numConcurrent, attempts) } + +func TestStartPR_Deduplication(t *testing.T) { + fixture := setupStartPRTestDefault(t) + defer fixture.teardown() + + fixture.pacInfo.DeduplicatePipelineRuns = true + match := createTestMatch(true, nil) + match.PipelineRun.Annotations[keys.OriginalPRName] = "test-pipeline" + + // Create an existing PipelineRun with the same fingerprint + fingerprint := fixture.pac.generateFingerprint(match.PipelineRun, match.Repo.GetName()) + existingPR := &pipelinev1.PipelineRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "existing-pr", + Namespace: match.Repo.GetNamespace(), + Labels: map[string]string{ + keys.PipelineRunFingerprint: fingerprint, + }, + }, + } + _, err := fixture.cs.Clients.Tekton.TektonV1().PipelineRuns(match.Repo.GetNamespace()).Create(fixture.ctx, existingPR, metav1.CreateOptions{}) + assert.NilError(t, err) + + // Attempt to start a new PR with the same fingerprint + pr, err := fixture.pac.startPR(fixture.ctx, match) + + assert.NilError(t, err) + assert.Assert(t, pr != nil) + assert.Equal(t, pr.GetName(), "existing-pr") +} + +func TestStartPR_NoDeduplicationIfDisabled(t *testing.T) { + fixture := setupStartPRTestDefault(t) + defer fixture.teardown() + + fixture.pacInfo.DeduplicatePipelineRuns = false + match := createTestMatch(true, nil) + match.PipelineRun.Annotations[keys.OriginalPRName] = "test-pipeline" + + // Create an existing PipelineRun with the same fingerprint + fingerprint := fixture.pac.generateFingerprint(match.PipelineRun, match.Repo.GetName()) + existingPR := &pipelinev1.PipelineRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "existing-pr", + Namespace: match.Repo.GetNamespace(), + Labels: map[string]string{ + keys.PipelineRunFingerprint: fingerprint, + }, + }, + } + _, err := fixture.cs.Clients.Tekton.TektonV1().PipelineRuns(match.Repo.GetNamespace()).Create(fixture.ctx, existingPR, metav1.CreateOptions{}) + assert.NilError(t, err) + + // Attempt to start a new PR with the same fingerprint + pr, err := fixture.pac.startPR(fixture.ctx, match) + + assert.NilError(t, err) + assert.Assert(t, pr != nil) + assert.Assert(t, pr.GetName() != "existing-pr") +} diff --git a/pkg/provider/github/parse_payload.go b/pkg/provider/github/parse_payload.go index a2b98a96b8..967923b48e 100644 --- a/pkg/provider/github/parse_payload.go +++ b/pkg/provider/github/parse_payload.go @@ -378,6 +378,10 @@ func (v *Provider) processEvent(ctx context.Context, event *info.Event, eventInt v.Logger.Infof("Processing tag push event for commit %s despite skip-push-events-for-pr-commits being enabled (tag events are excluded from this setting)", sha) } + if v.pacInfo.SkipPushEventForPRCommits { + v.Logger.Warn("The 'skip-push-event-for-pr-commits' setting is deprecated and will be removed in a future version. Please use 'deduplicate-pipelineruns' instead.") + } + // Only check if the flag is enabled, and there are pull requests associated with this commit, and it's not a tag push event. if v.pacInfo.SkipPushEventForPRCommits && len(prs) > 0 && !isGitTagEvent { isPartOfPR, prNumber := v.isCommitPartOfPullRequest(sha, org, repoName, prs) diff --git a/pkg/provider/github/parse_payload_test.go b/pkg/provider/github/parse_payload_test.go index ae24893de1..56b07796a8 100644 --- a/pkg/provider/github/parse_payload_test.go +++ b/pkg/provider/github/parse_payload_test.go @@ -1037,7 +1037,7 @@ func TestParsePayLoad(t *testing.T) { }) } - logger, _ := logger.GetLogger() + logger, observer := logger.GetLogger() gprovider := Provider{ ghClient: ghClient, Logger: logger, @@ -1060,6 +1060,11 @@ func TestParsePayLoad(t *testing.T) { return } assert.NilError(t, err) + + if tt.skipPushEventForPRCommits { + logMsg := observer.FilterMessageSnippet("The 'skip-push-event-for-pr-commits' setting is deprecated").TakeAll() + assert.Assert(t, len(logMsg) > 0, "Deprecation warning not found in logs") + } // If shaRet is empty, this is a skip case (push event for PR commit) // In this case, ret should be nil if tt.shaRet == "" { diff --git a/test/github_deduplicate_pipelineruns_test.go b/test/github_deduplicate_pipelineruns_test.go new file mode 100644 index 0000000000..5655edefb6 --- /dev/null +++ b/test/github_deduplicate_pipelineruns_test.go @@ -0,0 +1,86 @@ +//go:build e2e + +package test + +import ( + "context" + "fmt" + "net/http" + "regexp" + "testing" + + tgithub "github.com/openshift-pipelines/pipelines-as-code/test/pkg/github" + "github.com/openshift-pipelines/pipelines-as-code/test/pkg/payload" + twait "github.com/openshift-pipelines/pipelines-as-code/test/pkg/wait" + "github.com/tektoncd/pipeline/pkg/names" + "gotest.tools/v3/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGithubDeduplicatePipelineRuns(t *testing.T) { + targetNS := names.SimpleNameGenerator.RestrictLengthWithRandomSuffix("pac-e2e-ns") + + ctx := context.TODO() + ctx, runcnx, opts, ghcnx, err := tgithub.Setup(ctx, false, false) + assert.NilError(t, err) + + yamlEntries := map[string]string{} + yamlEntries[".tekton/pipelinerun-match-all-branch.yaml"] = "testdata/pipelinerun.yaml" + + repoinfo, resp, err := ghcnx.Client().Repositories.Get(ctx, opts.Organization, opts.Repo) + assert.NilError(t, err) + if resp != nil && resp.StatusCode == http.StatusNotFound { + t.Errorf("Repository %s not found in %s", opts.Organization, opts.Repo) + } + + err = tgithub.CreateCRD(ctx, t, repoinfo, runcnx, opts, targetNS) + assert.NilError(t, err) + + entries, err := payload.GetEntries(yamlEntries, targetNS, "*", "pull_request, push", map[string]string{}) + assert.NilError(t, err) + + targetRefName := fmt.Sprintf("refs/heads/%s", + names.SimpleNameGenerator.RestrictLengthWithRandomSuffix("pac-e2e-test")) + + sha, vref, err := tgithub.PushFilesToRef(ctx, ghcnx.Client(), "Test Github Deduplicate PipelineRuns", repoinfo.GetDefaultBranch(), targetRefName, + opts.Organization, opts.Repo, entries) + assert.NilError(t, err) + + runcnx.Clients.Log.Infof("Commit %s has been created and pushed to %s", sha, vref.GetURL()) + number, err := tgithub.PRCreate(ctx, runcnx, ghcnx, opts.Organization, + opts.Repo, targetRefName, repoinfo.GetDefaultBranch(), "Test Github Deduplicate PipelineRuns") + assert.NilError(t, err) + + g := &tgithub.PRTest{ + PRNumber: number, + SHA: sha, + TargetNamespace: targetNS, + Logger: runcnx.Clients.Log, + Provider: ghcnx, + Options: opts, + TargetRefName: targetRefName, + Cnx: runcnx, + } + + defer g.TearDown(ctx, t) + + runcnx.Clients.Log.Infof("Pull request %d has been created", number) + err = twait.UntilPipelineRunCreated(ctx, runcnx.Clients, twait.Opts{ + RepoName: targetNS, + Namespace: targetNS, + MinNumberStatus: 1, + PollTimeout: twait.DefaultTimeout, + TargetSHA: sha, + }) + assert.NilError(t, err) + + // but here we can't guarantee that for which event PipelineRun is created because we don't which + // event is received first on controller either push or pull_request. + prs, err := runcnx.Clients.Tekton.TektonV1().PipelineRuns(targetNS).List(ctx, metav1.ListOptions{}) + assert.NilError(t, err) + assert.Assert(t, len(prs.Items) == 1) + + maxLines := int64(50) + err = twait.RegexpMatchingInControllerLog(ctx, runcnx, *regexp.MustCompile("Skipping duplicate PipelineRun"), 10, "controller", &maxLines) + assert.NilError(t, err) +}