@@ -511,7 +511,50 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511511  }
512512}
513513
514+ // / Computes topological order of functions in the call graph.
515+ // / Returns functions in reverse topological order.
516+ // / This allows single-pass bottom-up propagation.
517+ std::vector<Function *> getTopologicalOrder (const  CallGraphTy &CG,
518+                                             const  std::vector<Function *> &EntryPoints) {
519+   std::vector<Function *> Result;
520+   DenseMap<const  Function *, unsigned > InDegree;
521+ 
522+   //  Build reverse call graph and compute in-degrees.
523+   DenseMap<Function *, SmallVector<Function *, 4 >> ReverseCG;
524+   for  (const  auto  &[Caller, Callees] : CG) {
525+     for  (Function *Callee : Callees) {
526+       ReverseCG[Callee].push_back (Caller);
527+       InDegree[Caller]++;
528+     }
529+   }
530+ 
531+   //  Start with functions that have no callees.
532+   std::queue<Function *> Worklist;
533+   for  (const  auto  &[F, Callees] : CG) {
534+     if  (InDegree[F] == 0 )
535+       Worklist.push (F);
536+   }
537+ 
538+   //  Kahn's algorithm for topological sort.
539+   while  (!Worklist.empty ()) {
540+     Function *F = Worklist.front ();
541+     Worklist.pop ();
542+     Result.push_back (F);
543+ 
544+     auto  It = ReverseCG.find (F);
545+     if  (It != ReverseCG.end ()) {
546+       for  (Function *Caller : It->second ) {
547+         if  (--InDegree[Caller] == 0 )
548+           Worklist.push (Caller);
549+       }
550+     }
551+   }
552+ 
553+   return  Result;
554+ }
555+ 
514556// / Propagates aspects from leaves up to the top of call graph.
557+ // / Uses topological sort for efficient single-pass propagation.
515558// / NB! Call graph corresponds to call graph of SYCL code which
516559// / can't contain recursive calls. So there can't be loops in
517560// / a call graph. But there can be path's intersections.
@@ -534,6 +577,91 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534577  AspectsMap[F].insert (LocalAspects.begin (), LocalAspects.end ());
535578}
536579
580+ // / Processes each function exactly once in bottom-up order.
581+ void  propagateAspectsThroughCGOptimized (const  std::vector<Function *> &TopoOrder,
582+                                         const  CallGraphTy &CG,
583+                                         FunctionToAspectsMapTy &AspectsMap) {
584+   //  Process in topological order.
585+   for  (Function *F : TopoOrder) {
586+     auto  It = CG.find (F);
587+     if  (It == CG.end ())
588+       continue ;
589+ 
590+     //  Merge aspects from all callees.
591+     for  (Function *Callee : It->second ) {
592+       const  auto  &CalleeAspects = AspectsMap[Callee];
593+       AspectsMap[F].insert (CalleeAspects.begin (), CalleeAspects.end ());
594+     }
595+   }
596+ }
597+ 
598+ // / This reduces redundant type lookups when multiple instructions use the same type.
599+ AspectsSetTy getAspectsFromInstructions (Function &F,
600+                                         TypeToAspectsMapTy &TypesWithAspects,
601+                                         int  FP64Aspect, bool  FP64ConvEmu) {
602+   AspectsSetTy Result;
603+ 
604+   //  Collect unique types used across all instructions.
605+   SmallDenseMap<const  Type *, bool , 32 > SeenTypes;
606+ 
607+   for  (Instruction &I : instructions (F)) {
608+     //  Check instruction return type.
609+     const  Type *ReturnType = I.getType ();
610+     if  (auto  *AI = dyn_cast<AllocaInst>(&I))
611+       ReturnType = AI->getAllocatedType ();
612+ 
613+     bool  IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction (I);
614+ 
615+     //  Only analyze this type once.
616+     if  (SeenTypes.try_emplace (ReturnType, IsFP64Conversion && hasDoubleType (ReturnType)).second ) {
617+       const  AspectsSetTy &Aspects = getAspectsFromType (ReturnType, TypesWithAspects);
618+       for  (int  Aspect : Aspects) {
619+         if  (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[ReturnType])
620+           Result.insert (Aspect);
621+       }
622+     }
623+ 
624+     //  Check operand types.
625+     for  (const  auto  &OperandIt : I.operands ()) {
626+       const  Type *OpType = nullptr ;
627+       if  (const  auto  *GV = dyn_cast<GlobalValue>(OperandIt->stripPointerCasts ()))
628+         OpType = GV->getValueType ();
629+       else 
630+         OpType = OperandIt->getType ();
631+ 
632+       if  (OpType && SeenTypes.try_emplace (OpType, IsFP64Conversion && hasDoubleType (OpType)).second ) {
633+         const  AspectsSetTy &Aspects = getAspectsFromType (OpType, TypesWithAspects);
634+         for  (int  Aspect : Aspects) {
635+           if  (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[OpType])
636+             Result.insert (Aspect);
637+         }
638+       }
639+     }
640+ 
641+     //  Check GEP source type.
642+     if  (auto  *GEPI = dyn_cast<GetElementPtrInst>(&I)) {
643+       const  Type *SourceType = GEPI->getSourceElementType ();
644+       if  (SeenTypes.try_emplace (SourceType, IsFP64Conversion && hasDoubleType (SourceType)).second ) {
645+         const  AspectsSetTy &Aspects = getAspectsFromType (SourceType, TypesWithAspects);
646+         for  (int  Aspect : Aspects) {
647+           if  (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[SourceType])
648+             Result.insert (Aspect);
649+         }
650+       }
651+     }
652+ 
653+     //  Check instruction-level metadata.
654+     if  (const  MDNode *InstAspects = I.getMetadata (" sycl_used_aspects" 
655+       for  (const  MDOperand &MDOp : InstAspects->operands ()) {
656+         const  Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue ();
657+         Result.insert (cast<ConstantInt>(C)->getSExtValue ());
658+       }
659+     }
660+   }
661+ 
662+   return  Result;
663+ }
664+ 
537665// / Processes a function:
538666// /  - checks if return and argument types are using any aspects
539667// /  - checks if instructions are using any aspects
@@ -549,12 +677,14 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
549677  assert (FP64AspectIt != AspectValues.end () &&
550678         " fp64 aspect was not found in the aspect values." 
551679  auto  FP64Aspect = FP64AspectIt->second ;
680+ 
552681  const  AspectsSetTy RetTyAspects =
553682      getAspectsFromType (F.getReturnType (), TypesWithAspects);
554683  for  (const  auto  &Aspect : RetTyAspects)
555684    if  (!FP64ConvEmu || (Aspect != FP64Aspect) ||
556685        !hasDoubleType (F.getReturnType ()))
557686      FunctionToUsedAspects[&F].insert (Aspect);
687+ 
558688  for  (Argument &Arg : F.args ()) {
559689    const  AspectsSetTy ArgAspects =
560690        getAspectsFromType (Arg.getType (), TypesWithAspects);
@@ -564,19 +694,20 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564694        FunctionToUsedAspects[&F].insert (Aspect);
565695  }
566696
697+   //  Optimized instruction analysis with type deduplication.
698+   const  AspectsSetTy InstrAspects =
699+       getAspectsFromInstructions (F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
700+   FunctionToUsedAspects[&F].insert (InstrAspects.begin (), InstrAspects.end ());
701+ 
702+   //  Build call graph.
567703  for  (Instruction &I : instructions (F)) {
568-     const  AspectsSetTy Aspects =
569-         getAspectsUsedByInstruction (I, TypesWithAspects);
570-     for  (const  auto  &Aspect : Aspects)
571-       if  (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType (I) ||
572-           !isFP64ConversionInstruction (I))
573-         FunctionToUsedAspects[&F].insert (Aspect);
574704    if  (const  auto  *CI = dyn_cast<CallInst>(&I)) {
575705      if  (!CI->isIndirectCall () && CI->getCalledFunction ())
576706        CG[&F].insert (CI->getCalledFunction ());
577707    }
578708  }
579709
710+   //  Collect aspects from metadata (combined to reduce lookups).
580711  auto  CollectAspectsFromMD = [&F](const  char * MDName, FunctionToAspectsMapTy &Map) {
581712    if  (const  MDNode *MD = F.getMetadata (MDName)) {
582713      AspectsSetTy Aspects;
@@ -696,23 +827,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696827    collectVirtualFunctionSetInfo (F, VirtualFunctionSets);
697828  }
698829
830+   //  Compute topological order once for both propagation passes.
831+   std::vector<Function *> TopoOrder = getTopologicalOrder (CG, EntryPoints);
832+ 
833+   //  Handle virtual function sets (still needs old recursive propagation)
699834  SmallPtrSet<const  Function *, 16 > Visited;
700835  for  (Function *F : EntryPoints) {
701-     propagateAspectsThroughCG (F, CG, FunctionToUsedAspects, Visited);
702836    processDeclaredVirtualFunctionSets (F, CG, FunctionToUsedAspects, Visited,
703837                                       VirtualFunctionSets);
704838  }
705839
840+   //  Optimized single-pass propagation for used aspects.
841+   propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToUsedAspects);
842+ 
706843  if  (ValidateAspects)
707844    validateUsedAspectsForFunctions (FunctionToUsedAspects, AspectValues,
708845                                    EntryPoints, CG);
709846
710-   //  The set of aspects from FunctionToDeclaredAspects should be merged to the
711-   //  set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712-   //  avoid errors during validation.
713-   Visited.clear ();
714-   for  (Function *F : EntryPoints)
715-     propagateAspectsThroughCG (F, CG, FunctionToDeclaredAspects, Visited);
847+   //  Optimized single-pass propagation for declared aspects.
848+   propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToDeclaredAspects);
716849
717850  return  {std::move (FunctionToUsedAspects),
718851          std::move (FunctionToDeclaredAspects)};
0 commit comments