@@ -348,6 +348,28 @@ AspectsSetTy getAspectsUsedByInstruction(const Instruction &I,
348348 return Result;
349349}
350350
351+ // / Collects aspects from all instructions in a function.
352+ // / Applies FP64 conversion emulation filtering per-instruction.
353+ AspectsSetTy getAspectsFromInstructions (Function &F,
354+ TypeToAspectsMapTy &TypesWithAspects,
355+ int FP64Aspect, bool FP64ConvEmu) {
356+ AspectsSetTy Result;
357+
358+ for (Instruction &I : instructions (F)) {
359+ bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction (I);
360+ bool HasDouble = hasDoubleType (I);
361+
362+ const AspectsSetTy Aspects = getAspectsUsedByInstruction (I, TypesWithAspects);
363+
364+ for (int Aspect : Aspects) {
365+ if (!FP64ConvEmu || Aspect != FP64Aspect || !HasDouble || !IsFP64Conversion)
366+ Result.insert (Aspect);
367+ }
368+ }
369+
370+ return Result;
371+ }
372+
351373using FunctionToAspectsMapTy = DenseMap<Function *, AspectsSetTy>;
352374using CallGraphTy = DenseMap<Function *, SmallPtrSet<Function *, 8 >>;
353375
@@ -511,7 +533,54 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511533 }
512534}
513535
536+ // / Computes topological order of functions in the call graph.
537+ // / Returns functions in reverse topological order.
538+ // / This allows single-pass bottom-up propagation.
539+ std::vector<Function *>
540+ getTopologicalOrder (const CallGraphTy &CG,
541+ const std::vector<Function *> &EntryPoints) {
542+ std::vector<Function *> Result;
543+ DenseMap<const Function *, unsigned > InDegree;
544+
545+ // Build reverse call graph and compute in-degrees.
546+ DenseMap<Function *, SmallVector<Function *, 4 >> ReverseCG;
547+ SmallPtrSet<Function *, 32 > AllFunctions;
548+ for (const auto &[Caller, Callees] : CG) {
549+ AllFunctions.insert (Caller);
550+ for (Function *Callee : Callees) {
551+ AllFunctions.insert (Callee);
552+ ReverseCG[Callee].push_back (Caller);
553+ InDegree[Caller]++;
554+ }
555+ }
556+
557+ // Start with leaf functions.
558+ std::queue<Function *> Worklist;
559+ for (Function *F : AllFunctions) {
560+ if (InDegree[F] == 0 )
561+ Worklist.push (F);
562+ }
563+
564+ // Kahn's algorithm for topological sort.
565+ while (!Worklist.empty ()) {
566+ Function *F = Worklist.front ();
567+ Worklist.pop ();
568+ Result.push_back (F);
569+
570+ auto It = ReverseCG.find (F);
571+ if (It != ReverseCG.end ()) {
572+ for (Function *Caller : It->second ) {
573+ if (--InDegree[Caller] == 0 )
574+ Worklist.push (Caller);
575+ }
576+ }
577+ }
578+
579+ return Result;
580+ }
581+
514582// / Propagates aspects from leaves up to the top of call graph.
583+ // / Uses topological sort for efficient single-pass propagation.
515584// / NB! Call graph corresponds to call graph of SYCL code which
516585// / can't contain recursive calls. So there can't be loops in
517586// / a call graph. But there can be path's intersections.
@@ -534,6 +603,24 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534603 AspectsMap[F].insert (LocalAspects.begin (), LocalAspects.end ());
535604}
536605
606+ // / Processes each function exactly once in bottom-up order.
607+ void propagateAspectsThroughCGOptimized (
608+ const std::vector<Function *> &TopoOrder, const CallGraphTy &CG,
609+ FunctionToAspectsMapTy &AspectsMap) {
610+ // Process in topological order.
611+ for (Function *F : TopoOrder) {
612+ auto It = CG.find (F);
613+ if (It == CG.end ())
614+ continue ;
615+
616+ // Merge aspects from all callees.
617+ for (Function *Callee : It->second ) {
618+ const auto &CalleeAspects = AspectsMap[Callee];
619+ AspectsMap[F].insert (CalleeAspects.begin (), CalleeAspects.end ());
620+ }
621+ }
622+ }
623+
537624// / Processes a function:
538625// / - checks if return and argument types are using any aspects
539626// / - checks if instructions are using any aspects
@@ -564,19 +651,19 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564651 FunctionToUsedAspects[&F].insert (Aspect);
565652 }
566653
654+ const AspectsSetTy InstrAspects =
655+ getAspectsFromInstructions (F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
656+ FunctionToUsedAspects[&F].insert (InstrAspects.begin (), InstrAspects.end ());
657+
658+ // Build call graph.
567659 for (Instruction &I : instructions (F)) {
568- const AspectsSetTy Aspects =
569- getAspectsUsedByInstruction (I, TypesWithAspects);
570- for (const auto &Aspect : Aspects)
571- if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType (I) ||
572- !isFP64ConversionInstruction (I))
573- FunctionToUsedAspects[&F].insert (Aspect);
574660 if (const auto *CI = dyn_cast<CallInst>(&I)) {
575661 if (!CI->isIndirectCall () && CI->getCalledFunction ())
576662 CG[&F].insert (CI->getCalledFunction ());
577663 }
578664 }
579665
666+ // Collect aspects from metadata (combined to reduce lookups).
580667 auto CollectAspectsFromMD = [&F](const char * MDName, FunctionToAspectsMapTy &Map) {
581668 if (const MDNode *MD = F.getMetadata (MDName)) {
582669 AspectsSetTy Aspects;
@@ -696,23 +783,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696783 collectVirtualFunctionSetInfo (F, VirtualFunctionSets);
697784 }
698785
786+ // Compute topological order once for both propagation passes.
787+ std::vector<Function *> TopoOrder = getTopologicalOrder (CG, EntryPoints);
788+
789+ // Handle virtual function sets (still needs old recursive propagation)
699790 SmallPtrSet<const Function *, 16 > Visited;
700791 for (Function *F : EntryPoints) {
701- propagateAspectsThroughCG (F, CG, FunctionToUsedAspects, Visited);
702792 processDeclaredVirtualFunctionSets (F, CG, FunctionToUsedAspects, Visited,
703793 VirtualFunctionSets);
704794 }
705795
796+ // Optimized single-pass propagation for used aspects.
797+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToUsedAspects);
798+
706799 if (ValidateAspects)
707800 validateUsedAspectsForFunctions (FunctionToUsedAspects, AspectValues,
708801 EntryPoints, CG);
709802
710- // The set of aspects from FunctionToDeclaredAspects should be merged to the
711- // set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712- // avoid errors during validation.
713- Visited.clear ();
714- for (Function *F : EntryPoints)
715- propagateAspectsThroughCG (F, CG, FunctionToDeclaredAspects, Visited);
803+ // Optimized single-pass propagation for declared aspects.
804+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToDeclaredAspects);
716805
717806 return {std::move (FunctionToUsedAspects),
718807 std::move (FunctionToDeclaredAspects)};
0 commit comments