Skip to content

Commit ec41c7d

Browse files
committed
[SYCL] Rewrite aspect propagation function lookup using toposort
Signed-off-by: Dmitry Sidorov <dmitrii.s.sidorov@gmail.com>
1 parent 3ccc8ec commit ec41c7d

File tree

1 file changed

+146
-13
lines changed

1 file changed

+146
-13
lines changed

llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp

Lines changed: 146 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,50 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511511
}
512512
}
513513

514+
/// Computes topological order of functions in the call graph.
515+
/// Returns functions in reverse topological order.
516+
/// This allows single-pass bottom-up propagation.
517+
std::vector<Function *> getTopologicalOrder(const CallGraphTy &CG,
518+
const std::vector<Function *> &EntryPoints) {
519+
std::vector<Function *> Result;
520+
DenseMap<const Function *, unsigned> InDegree;
521+
522+
// Build reverse call graph and compute in-degrees.
523+
DenseMap<Function *, SmallVector<Function *, 4>> ReverseCG;
524+
for (const auto &[Caller, Callees] : CG) {
525+
for (Function *Callee : Callees) {
526+
ReverseCG[Callee].push_back(Caller);
527+
InDegree[Caller]++;
528+
}
529+
}
530+
531+
// Start with functions that have no callees.
532+
std::queue<Function *> Worklist;
533+
for (const auto &[F, Callees] : CG) {
534+
if (InDegree[F] == 0)
535+
Worklist.push(F);
536+
}
537+
538+
// Kahn's algorithm for topological sort.
539+
while (!Worklist.empty()) {
540+
Function *F = Worklist.front();
541+
Worklist.pop();
542+
Result.push_back(F);
543+
544+
auto It = ReverseCG.find(F);
545+
if (It != ReverseCG.end()) {
546+
for (Function *Caller : It->second) {
547+
if (--InDegree[Caller] == 0)
548+
Worklist.push(Caller);
549+
}
550+
}
551+
}
552+
553+
return Result;
554+
}
555+
514556
/// Propagates aspects from leaves up to the top of call graph.
557+
/// Uses topological sort for efficient single-pass propagation.
515558
/// NB! Call graph corresponds to call graph of SYCL code which
516559
/// can't contain recursive calls. So there can't be loops in
517560
/// a call graph. But there can be path's intersections.
@@ -534,6 +577,91 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534577
AspectsMap[F].insert(LocalAspects.begin(), LocalAspects.end());
535578
}
536579

580+
/// Processes each function exactly once in bottom-up order.
581+
void propagateAspectsThroughCGOptimized(const std::vector<Function *> &TopoOrder,
582+
const CallGraphTy &CG,
583+
FunctionToAspectsMapTy &AspectsMap) {
584+
// Process in topological order.
585+
for (Function *F : TopoOrder) {
586+
auto It = CG.find(F);
587+
if (It == CG.end())
588+
continue;
589+
590+
// Merge aspects from all callees.
591+
for (Function *Callee : It->second) {
592+
const auto &CalleeAspects = AspectsMap[Callee];
593+
AspectsMap[F].insert(CalleeAspects.begin(), CalleeAspects.end());
594+
}
595+
}
596+
}
597+
598+
/// This reduces redundant type lookups when multiple instructions use the same type.
599+
AspectsSetTy getAspectsFromInstructions(Function &F,
600+
TypeToAspectsMapTy &TypesWithAspects,
601+
int FP64Aspect, bool FP64ConvEmu) {
602+
AspectsSetTy Result;
603+
604+
// Collect unique types used across all instructions.
605+
SmallDenseMap<const Type *, bool, 32> SeenTypes;
606+
607+
for (Instruction &I : instructions(F)) {
608+
// Check instruction return type.
609+
const Type *ReturnType = I.getType();
610+
if (auto *AI = dyn_cast<AllocaInst>(&I))
611+
ReturnType = AI->getAllocatedType();
612+
613+
bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction(I);
614+
615+
// Only analyze this type once.
616+
if (SeenTypes.try_emplace(ReturnType, IsFP64Conversion && hasDoubleType(ReturnType)).second) {
617+
const AspectsSetTy &Aspects = getAspectsFromType(ReturnType, TypesWithAspects);
618+
for (int Aspect : Aspects) {
619+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[ReturnType])
620+
Result.insert(Aspect);
621+
}
622+
}
623+
624+
// Check operand types.
625+
for (const auto &OperandIt : I.operands()) {
626+
const Type *OpType = nullptr;
627+
if (const auto *GV = dyn_cast<GlobalValue>(OperandIt->stripPointerCasts()))
628+
OpType = GV->getValueType();
629+
else
630+
OpType = OperandIt->getType();
631+
632+
if (OpType && SeenTypes.try_emplace(OpType, IsFP64Conversion && hasDoubleType(OpType)).second) {
633+
const AspectsSetTy &Aspects = getAspectsFromType(OpType, TypesWithAspects);
634+
for (int Aspect : Aspects) {
635+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[OpType])
636+
Result.insert(Aspect);
637+
}
638+
}
639+
}
640+
641+
// Check GEP source type.
642+
if (auto *GEPI = dyn_cast<GetElementPtrInst>(&I)) {
643+
const Type *SourceType = GEPI->getSourceElementType();
644+
if (SeenTypes.try_emplace(SourceType, IsFP64Conversion && hasDoubleType(SourceType)).second) {
645+
const AspectsSetTy &Aspects = getAspectsFromType(SourceType, TypesWithAspects);
646+
for (int Aspect : Aspects) {
647+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[SourceType])
648+
Result.insert(Aspect);
649+
}
650+
}
651+
}
652+
653+
// Check instruction-level metadata.
654+
if (const MDNode *InstAspects = I.getMetadata("sycl_used_aspects")) {
655+
for (const MDOperand &MDOp : InstAspects->operands()) {
656+
const Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue();
657+
Result.insert(cast<ConstantInt>(C)->getSExtValue());
658+
}
659+
}
660+
}
661+
662+
return Result;
663+
}
664+
537665
/// Processes a function:
538666
/// - checks if return and argument types are using any aspects
539667
/// - checks if instructions are using any aspects
@@ -549,12 +677,14 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
549677
assert(FP64AspectIt != AspectValues.end() &&
550678
"fp64 aspect was not found in the aspect values.");
551679
auto FP64Aspect = FP64AspectIt->second;
680+
552681
const AspectsSetTy RetTyAspects =
553682
getAspectsFromType(F.getReturnType(), TypesWithAspects);
554683
for (const auto &Aspect : RetTyAspects)
555684
if (!FP64ConvEmu || (Aspect != FP64Aspect) ||
556685
!hasDoubleType(F.getReturnType()))
557686
FunctionToUsedAspects[&F].insert(Aspect);
687+
558688
for (Argument &Arg : F.args()) {
559689
const AspectsSetTy ArgAspects =
560690
getAspectsFromType(Arg.getType(), TypesWithAspects);
@@ -564,19 +694,20 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564694
FunctionToUsedAspects[&F].insert(Aspect);
565695
}
566696

697+
// Optimized instruction analysis with type deduplication.
698+
const AspectsSetTy InstrAspects =
699+
getAspectsFromInstructions(F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
700+
FunctionToUsedAspects[&F].insert(InstrAspects.begin(), InstrAspects.end());
701+
702+
// Build call graph.
567703
for (Instruction &I : instructions(F)) {
568-
const AspectsSetTy Aspects =
569-
getAspectsUsedByInstruction(I, TypesWithAspects);
570-
for (const auto &Aspect : Aspects)
571-
if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType(I) ||
572-
!isFP64ConversionInstruction(I))
573-
FunctionToUsedAspects[&F].insert(Aspect);
574704
if (const auto *CI = dyn_cast<CallInst>(&I)) {
575705
if (!CI->isIndirectCall() && CI->getCalledFunction())
576706
CG[&F].insert(CI->getCalledFunction());
577707
}
578708
}
579709

710+
// Collect aspects from metadata (combined to reduce lookups).
580711
auto CollectAspectsFromMD = [&F](const char* MDName, FunctionToAspectsMapTy &Map) {
581712
if (const MDNode *MD = F.getMetadata(MDName)) {
582713
AspectsSetTy Aspects;
@@ -696,23 +827,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696827
collectVirtualFunctionSetInfo(F, VirtualFunctionSets);
697828
}
698829

830+
// Compute topological order once for both propagation passes.
831+
std::vector<Function *> TopoOrder = getTopologicalOrder(CG, EntryPoints);
832+
833+
// Handle virtual function sets (still needs old recursive propagation)
699834
SmallPtrSet<const Function *, 16> Visited;
700835
for (Function *F : EntryPoints) {
701-
propagateAspectsThroughCG(F, CG, FunctionToUsedAspects, Visited);
702836
processDeclaredVirtualFunctionSets(F, CG, FunctionToUsedAspects, Visited,
703837
VirtualFunctionSets);
704838
}
705839

840+
// Optimized single-pass propagation for used aspects.
841+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToUsedAspects);
842+
706843
if (ValidateAspects)
707844
validateUsedAspectsForFunctions(FunctionToUsedAspects, AspectValues,
708845
EntryPoints, CG);
709846

710-
// The set of aspects from FunctionToDeclaredAspects should be merged to the
711-
// set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712-
// avoid errors during validation.
713-
Visited.clear();
714-
for (Function *F : EntryPoints)
715-
propagateAspectsThroughCG(F, CG, FunctionToDeclaredAspects, Visited);
847+
// Optimized single-pass propagation for declared aspects.
848+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToDeclaredAspects);
716849

717850
return {std::move(FunctionToUsedAspects),
718851
std::move(FunctionToDeclaredAspects)};

0 commit comments

Comments
 (0)