Skip to content

Commit 63242d9

Browse files
cow-langsschuberth
andcommitted
fix(spdx-utils): Make license choice matching order-dependent
ORT had an architectural mismatch where `SpdxCompoundExpression` used order-invariant equality ("MIT OR Apache-2.0" == "Apache-2.0 OR MIT"), but the license choice matching used order-dependent matching based on strings. That is, if the expression contained "MIT OR Apache-2.0" but the user gave "Apache-2.0 OR MIT", the string match failed, triggering a fallback that rebuilt the expression by OR-ing all valid choices. With multiple license choices, this caused exponential expression growth (> 1000 characters in length). This solution replaces the string-based algorithm with set-based operations in `replaceSubexpressionWithChoice()`: 1. Decompose expressions into a set using `validChoices()`. 2. Remove dismissed choices from the set (no string operations). 3. Rebuild from remaining choices with controlled reduction. 4. Only recurse to AND-children if the sub-expression is actually contained in a child to prevent inappropriate recursion for the entire expression or derived sub-expression choices. Since sets are order-invariant ({A,B} == {B,A}), both orderings execute identical code paths, guaranteeing deterministic behavior. Fixes #10888. Signed-off-by: Antoni <168914426+cow-lang@users.noreply.github.com> Co-authored-by: Sebastian Schuberth <sebastian@doubleopen.org>
1 parent be610e3 commit 63242d9

File tree

2 files changed

+75
-13
lines changed

2 files changed

+75
-13
lines changed

utils/spdx/src/main/kotlin/SpdxExpression.kt

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -343,20 +343,44 @@ class SpdxCompoundExpression(
343343
}
344344

345345
private fun replaceSubexpressionWithChoice(subExpression: SpdxExpression, choice: SpdxExpression): SpdxExpression {
346-
val expressionString = toString()
347-
val subExpressionString = subExpression.toString()
348-
val choiceString = choice.toString()
349-
350-
return if (subExpressionString in expressionString) {
351-
expressionString.replace(subExpressionString, choiceString).toSpdx()
352-
} else {
353-
val dismissedLicense = subExpression.validChoices().first { it != choice }
354-
val unchosenLicenses = validChoices().filter { it != dismissedLicense }
355-
356-
requireNotNull(unchosenLicenses.toExpression(SpdxOperator.OR)) {
357-
"No licenses left after applying choice $choice to $subExpression."
358-
}
346+
// If the operator is AND, then there is no choice at this level, but try with the children if they contain the
347+
// sub-expression. Also, if the sub-expression is the whole expression, no recursive application of the choice
348+
// is needed, as it will be handled by the logic below.
349+
if (
350+
operator == SpdxOperator.AND && subExpression != this && children.any { it.isSubExpression(subExpression) }
351+
) {
352+
// Reconstruct this AND-expression with all children having the subexpression replaced with the choice.
353+
return SpdxCompoundExpression(
354+
SpdxOperator.AND,
355+
children.map {
356+
when {
357+
it is SpdxCompoundExpression -> it.replaceSubexpressionWithChoice(subExpression, choice)
358+
else -> it
359+
}
360+
}
361+
)
359362
}
363+
364+
// The basic idea of the following algorithm is that any expression can be represented as a disjunction of the
365+
// choices it offers, and that any expressions that offer exactly the same choices are equal. So an expression
366+
// is first "decomposed" into its choices, then dismissed choices (unchosen expressions) are removed, and the
367+
// remaining expressions are recomped with the OR-operand.
368+
369+
val choices = validChoices().toMutableSet()
370+
val subExpressionChoices = subExpression.validChoices()
371+
val dismissedChoices = subExpressionChoices - choice.validChoices()
372+
373+
// While the sub-expression is still contained in this expression...
374+
while (choices.containsAll(subExpressionChoices)) {
375+
// ... remove all unchosen expressions.
376+
choices.removeAll(dismissedChoices)
377+
}
378+
379+
require(choices.isNotEmpty()) {
380+
"No licenses left after applying choice $choice to $subExpression."
381+
}
382+
383+
return choices.reduce(SpdxExpression::or)
360384
}
361385

362386
override fun isSubExpression(subExpression: SpdxExpression?): Boolean =

utils/spdx/src/test/kotlin/SpdxExpressionChoiceTest.kt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,17 @@ class SpdxExpressionChoiceTest : WordSpec({
123123

124124
shouldThrow<InvalidSubExpressionException> { expression.applyChoice(choice, subExpression) }
125125
}
126+
127+
"resolve the choice for a sub-expression in a compound expression" {
128+
val expression = "(CDDL-1.1 OR GPL-2.0-only) AND (CDDL-1.1 OR GPL-2.0-only WITH Classpath-exception-2.0)"
129+
.toSpdx()
130+
val choice = "CDDL-1.1".toSpdx()
131+
val subExpression = "CDDL-1.1 OR GPL-2.0-only".toSpdx()
132+
133+
val result = expression.applyChoice(choice, subExpression)
134+
135+
result shouldBe "CDDL-1.1 AND (CDDL-1.1 OR GPL-2.0-only WITH Classpath-exception-2.0)".toSpdx()
136+
}
126137
}
127138

128139
"applyChoices()" should {
@@ -199,6 +210,33 @@ class SpdxExpressionChoiceTest : WordSpec({
199210

200211
result shouldBe "a".toSpdx()
201212
}
213+
214+
"apply choices regardless of license order in an OR-expression" {
215+
val expression = "(GPL-2.0-only OR MIT) AND Apache-2.0".toSpdx()
216+
val choicesGivenGplOrMit = listOf(SpdxLicenseChoice("GPL-2.0-only OR MIT".toSpdx(), "MIT".toSpdx()))
217+
val choicesGivenMitOrGpl = listOf(SpdxLicenseChoice("MIT OR GPL-2.0-only".toSpdx(), "MIT".toSpdx()))
218+
219+
val resultGivenGplOrMit = expression.applyChoices(choicesGivenGplOrMit)
220+
val resultGivenMitOrGpl = expression.applyChoices(choicesGivenMitOrGpl)
221+
222+
resultGivenGplOrMit shouldBe "MIT AND Apache-2.0".toSpdx()
223+
resultGivenMitOrGpl shouldBe "MIT AND Apache-2.0".toSpdx()
224+
}
225+
226+
"resolve the choice for a sub-expression in a compound expression" {
227+
// This test ensures that when applying a choice to a sub-expression that appears multiple times,
228+
// only the matching occurrence is replaced, not similar-looking expressions with WITH-operands.
229+
// This prevents the over-simplification bug where string replacement would affect too much.
230+
val expression = "(CDDL-1.1 OR GPL-2.0-only) AND (CDDL-1.1 OR GPL-2.0-only WITH Classpath-exception-2.0)"
231+
.toSpdx()
232+
val choices = listOf(SpdxLicenseChoice("CDDL-1.1 OR GPL-2.0-only".toSpdx(), "CDDL-1.1".toSpdx()))
233+
234+
val result = expression.applyChoices(choices)
235+
236+
// The second OR-expression should remain unchanged because it has a WITH-operand and thus does not exactly
237+
// match the sub-expression despite containing a sub-string of it.
238+
result shouldBe "CDDL-1.1 AND (CDDL-1.1 OR GPL-2.0-only WITH Classpath-exception-2.0)".toSpdx()
239+
}
202240
}
203241

204242
"isSubExpression()" should {

0 commit comments

Comments
 (0)