From 3423526c136f432a519f282bce82416cf36807ab Mon Sep 17 00:00:00 2001 From: Parth Arora Date: Mon, 13 Oct 2025 09:02:22 +0530 Subject: [PATCH] Add source assignment in symbol nodes This commit changes how symbol expression nodes gets evaluated for the absolute symbols. The key goal is to improve the layout convergence when forward references is involved. This design change is a step in the direction to improve the layout convergence. When the linker script contains forward references, then we may need to re-evaluate the linker script and the layout multiple times to reach convergence. For example: ``` SECTIONS { u = bar_end; // A1 .foo (u) : { *(.text.foo) } v = w; // A2 .bar (v) : { *(.text.bar) } bar_end = .; // A3 w = 0x2000; // A4 } ``` The above linker script contains 4 assignments: A1, A2, A3, and A4, and it needs more than one pass of layout evaluation for reaching convergence. In the 1st pass, the assignment A1 is evaluted incorrectly because `bar_end` has not been assigned a value yet. We also need to stop the layout pass in between and recompute the layout from the beginning whenever some initial assumption changes such as a new segment needs to be inserted. There is an important difference between these two types of layout recomputation. In the first type, that is, layout recomputation to achieve layout convergence, we must use the symbol values from the previous pass. In the second type, that is, layout recomputation due to an initial assumption change such as a new segment needs to be inserted, we must reset the symbol values before recomputing the layout. Let's see why: ``` SECTIONS { u = v; // A1 .foo : { *(.text.foo) } v = 0x1000; // A2 .data : { *(.text.data) }} v = 0x2000; // A3 } ``` In the first evaluation of A1, u is assigned the value `0`. On encountering '.data' output section, the layout is recomputed, and this time when A1 is recomputed, u is assigned the value `0x1000` because A2 was evaluated in the last pass. The value `0x1000` is incorrect for `u` because the value of `v` that is to be used in A1 must come from the last evaluation of `v`, that is, A3. Reusing values from the last pass in this case is error-prone and can lead to incorrect layouts. **It is difficult to add logic for when to reuse symbol values and when to reset them because these two types of layout recomputations can be intermixed.** Adding a source assignment node with the symbol expression node makes the recomputation simpler by obviating the need to reset the symbol values in both the cases. *The key idea is that the value of a symbol node of an absolute symbol is not the value of the corresponding symbol, but instead is the result of the last assignment node for that symbol.* Adding the source assignment with a symbol node has additional benefits as well: 1) It makes it easier to determine the culprit / closest assignment to use in diagnostics when a symbol value is not converging. 2) It makes it easier to add heuristics such as constant expression evaluation to speed up the layout convergence. For example: ``` u = v; .foo (u) : { *(.text.foo) } v = 0x2000; ``` In this case, if the symbol v encodes the source assignment, then we can easily add heuristic to determine if `v` source assignment can be evaluated early. 3) Selectively recompute only those assignment nodes which needs to be recomputed. If we reset symbol values, then all the assignment nodes always needs to be recomputed in each layout pass. Resolves #468 Signed-off-by: Parth Arora --- include/eld/Script/Expression.h | 1 + include/eld/Target/GNULDBackend.h | 14 ++++++++++++++ lib/Object/ObjectLinker.cpp | 1 + lib/Script/Assignment.cpp | 3 +++ lib/Script/Expression.cpp | 14 +++++++++++++- lib/Target/GNULDBackend.cpp | 1 + 6 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/eld/Script/Expression.h b/include/eld/Script/Expression.h index af6acd30a..faad33c38 100644 --- a/include/eld/Script/Expression.h +++ b/include/eld/Script/Expression.h @@ -331,6 +331,7 @@ class Symbol : public Expression { Expression *getRightExpression() const override { return nullptr; } mutable LDSymbol *ThisSymbol = nullptr; + const Assignment *SourceAssignment = nullptr; }; //===----------------------------------------------------------------------===// diff --git a/include/eld/Target/GNULDBackend.h b/include/eld/Target/GNULDBackend.h index ab46d82de..e18929d8a 100644 --- a/include/eld/Target/GNULDBackend.h +++ b/include/eld/Target/GNULDBackend.h @@ -20,6 +20,7 @@ #include "eld/Readers/ELFSection.h" #include "eld/Readers/SymDefReader.h" #include "eld/Script/Assignment.h" +#include "eld/Script/Expression.h" #include "eld/Script/VersionScript.h" #include "eld/SymbolResolver/ResolveInfo.h" #include "eld/Target/ELFSegment.h" @@ -824,6 +825,17 @@ class GNULDBackend { const ResolveInfo *findAbsolutePLT(ResolveInfo *I) const; + const Assignment *getLatestAssignment(llvm::StringRef SymName) { + auto it = SymbolNameToLatestAssignment.find(SymName); + if (it != SymbolNameToLatestAssignment.end()) + return it->getValue(); + return nullptr; + } + + void updateLatestAssignment(llvm::StringRef SymName, const Assignment *A) { + SymbolNameToLatestAssignment[SymName] = A; + } + protected: virtual int numReservedSegments() const { return m_NumReservedSegments; } @@ -1132,6 +1144,8 @@ class GNULDBackend { bool m_NeedEhdr = false; bool m_NeedPhdr = false; + + llvm::StringMap SymbolNameToLatestAssignment; }; } // namespace eld diff --git a/lib/Object/ObjectLinker.cpp b/lib/Object/ObjectLinker.cpp index dc1077ed5..5fc794d13 100644 --- a/lib/Object/ObjectLinker.cpp +++ b/lib/Object/ObjectLinker.cpp @@ -1716,6 +1716,7 @@ bool ObjectLinker::addScriptSymbols() { // If there is a relocation to this symbol, the symbols contained in the // assignment also need to be considered as part of the list of symbols // that will be live. + // FIXME: Duplicate redundant addAssignment! if (Symbol) ThisModule->addAssignment(Symbol->resolveInfo()->name(), AssignCmd); } diff --git a/lib/Script/Assignment.cpp b/lib/Script/Assignment.cpp index a03b762e6..808b15f7c 100644 --- a/lib/Script/Assignment.cpp +++ b/lib/Script/Assignment.cpp @@ -222,6 +222,9 @@ bool Assignment::assign(Module &CurModule, const ELFSection *Section) { ThisSymbol->setScriptValueDefined(); } + auto &Backend = CurModule.getBackend(); + Backend.updateLatestAssignment(Name, this); + if (CurModule.getPrinter()->traceAssignments()) trace(llvm::outs()); return true; diff --git a/lib/Script/Expression.cpp b/lib/Script/Expression.cpp index 1628f8e44..cfe645250 100644 --- a/lib/Script/Expression.cpp +++ b/lib/Script/Expression.cpp @@ -9,6 +9,7 @@ #include "eld/Core/LinkerScript.h" #include "eld/Core/Module.h" #include "eld/Readers/ELFSection.h" +#include "eld/Script/Assignment.h" #include "eld/Script/ScriptFile.h" #include "eld/Support/MsgHandling.h" #include "eld/Support/Utils.h" @@ -133,10 +134,17 @@ bool Symbol::hasDot() const { } eld::Expected Symbol::evalImpl() { - if (!ThisSymbol) ThisSymbol = ThisModule.getNamePool().findSymbol(Name); + if (!SourceAssignment && ThisSymbol->resolveInfo()->isAbsolute()) { + auto &Backend = ThisModule.getBackend(); + const auto *A = Backend.getLatestAssignment(Name); + if (!A) + A = ThisModule.getAssignmentForSymbol(Name); + SourceAssignment = A; + } + if (!ThisSymbol || ThisSymbol->resolveInfo()->isUndef() || ThisSymbol->resolveInfo()->isBitCode()) return std::make_unique( @@ -152,6 +160,10 @@ eld::Expected Symbol::evalImpl() { "using a symbol that points to a non allocatable section!"); return Section->addr() + FragRef->getOutputOffset(ThisModule); } + if (hasDot()) + return ThisSymbol->value(); + if (SourceAssignment) + return SourceAssignment->value(); return ThisSymbol->value(); } diff --git a/lib/Target/GNULDBackend.cpp b/lib/Target/GNULDBackend.cpp index a68890486..2aea5b14b 100644 --- a/lib/Target/GNULDBackend.cpp +++ b/lib/Target/GNULDBackend.cpp @@ -3100,6 +3100,7 @@ bool GNULDBackend::layout() { return false; } + // FIXME: Adding more symbols this late can cause layout issues. { eld::RegisterTimer T("Define Magic Symbols", "Establish Layout", m_Module.getConfig().options().printTimingStats());