77# See https://aboutcode.org for more information about nexB OSS projects.
88#
99
10+ from collections import defaultdict
11+
1012from aboutcode .pipeline import LoopProgress
1113from django .db .models import Prefetch
1214from packageurl .contrib .purl2url import purl2url
@@ -65,7 +67,7 @@ def collect_and_store_fix_commits(self):
6567
6668 commit_batch = []
6769 updated_pkg_patch_commit_count = 0
68- batch_size = 1000
70+ batch_size = 10000
6971 for adv in progress .iter (advisories .paginated (per_page = batch_size )):
7072 urls = {r .url for r in adv .references .all ()} | {p .patch_url for p in adv .patches .all ()}
7173
@@ -90,14 +92,22 @@ def bulk_commit_batch_update(self, vcs_data_table):
9092 impact_data = {(row [0 ], row [3 ]) for row in vcs_data_table } # base_purl, adv_id
9193 commit_data = {(row [1 ], row [2 ]) for row in vcs_data_table } # vcs_url, commit_hash
9294
93- adv_ids = {aid for _ , aid in impact_data }
94- existing_impacts = ImpactedPackage .objects .filter (advisory_id__in = adv_ids )
95- existing_impact_pairs = {(ip .base_purl , ip .advisory_id ) for ip in existing_impacts }
95+ adv_ids = {adv_id for _ , adv_id in impact_data }
96+ commit_hashes = {commit_hash for _ , commit_hash in commit_data }
9697
97- new_impacts = impact_data - existing_impact_pairs
98- if new_impacts :
98+ existing_impacts = ImpactedPackage .objects .filter (advisory_id__in = adv_ids ).only (
99+ "base_purl" , "advisory_id"
100+ )
101+ existing_impact_pairs = {
102+ (impact_pkg .base_purl , impact_pkg .advisory_id ) for impact_pkg in existing_impacts
103+ }
104+
105+ if new_impacts := impact_data - existing_impact_pairs :
99106 ImpactedPackage .objects .bulk_create (
100- [ImpactedPackage (base_purl = bp , advisory_id = aid ) for bp , aid in new_impacts ]
107+ [
108+ ImpactedPackage (base_purl = base_purl , advisory_id = adv_id )
109+ for base_purl , adv_id in new_impacts
110+ ]
101111 )
102112
103113 PackageCommitPatch .objects .bulk_create (
@@ -108,23 +118,28 @@ def bulk_commit_batch_update(self, vcs_data_table):
108118 ignore_conflicts = True ,
109119 )
110120
111- adv_ids = {adv_id for _ , adv_id in impact_data }
112121 fetched_impacts = {
113122 (impacted_pkg .base_purl , impacted_pkg .advisory_id ): impacted_pkg
114- for impacted_pkg in ImpactedPackage .objects .filter (advisory_id__in = adv_ids )
123+ for impacted_pkg in ImpactedPackage .objects .filter (advisory_id__in = adv_ids ).only (
124+ "base_purl" , "advisory_id"
125+ )
115126 }
116127
117- commit_hashes = {commit_hash for _ , commit_hash in commit_data }
118- fetched_commits = {
128+ fetched_pkg_commits = {
119129 (pkg_commit_patch .vcs_url , pkg_commit_patch .commit_hash ): pkg_commit_patch
120- for pkg_commit_patch in PackageCommitPatch .objects .filter (commit_hash__in = commit_hashes )
130+ for pkg_commit_patch in PackageCommitPatch .objects .filter (
131+ commit_hash__in = commit_hashes
132+ ).only ("vcs_url" , "commit_hash" )
121133 }
122134
135+ pkg_commit_add_impact_pkg = defaultdict (list )
123136 for base_purl , vcs_url , commit_hash , adv_id in vcs_data_table :
124- impacted_package = fetched_impacts .get ((base_purl , adv_id ))
125- package_commit_obj = fetched_commits .get ((vcs_url , commit_hash ))
137+ impacted_pkg_obj = fetched_impacts .get ((base_purl , adv_id ))
138+ pkg_commit_obj = fetched_pkg_commits .get ((vcs_url , commit_hash ))
139+ if impacted_pkg_obj and pkg_commit_obj :
140+ pkg_commit_add_impact_pkg [pkg_commit_obj ].append (impacted_pkg_obj )
126141
127- if impacted_package and package_commit_obj :
128- package_commit_obj .fixed_in_impacts .add (impacted_package )
142+ for pkg_commit_obj , impact_pkgs in pkg_commit_add_impact_pkg . items () :
143+ pkg_commit_obj .fixed_in_impacts .add (* impact_pkgs )
129144
130145 return len (vcs_data_table )
0 commit comments