@@ -385,6 +385,11 @@ static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
385385 cl::Hidden,
386386 cl::desc(" Try wider VFs if they enable the use of vector variants" ));
387387
388+ static cl::opt<bool > EnableEarlyExitVectorization (
389+ " enable-early-exit-vectorization" , cl::init(false ), cl::Hidden,
390+ cl::desc(
391+ " Enable vectorization of early exit loops with uncountable exits." ));
392+
388393// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
389394// variables not overflowing do not hold. See `emitSCEVChecks`.
390395static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1382,9 +1387,10 @@ class LoopVectorizationCostModel {
13821387 LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
13831388 return false ;
13841389 }
1385- // If we might exit from anywhere but the latch, must run the exiting
1386- // iteration in scalar form.
1387- if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1390+ // If we might exit from anywhere but the latch and early exit vectorization
1391+ // is disabled, we must run the exiting iteration in scalar form.
1392+ if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch () &&
1393+ !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit ())) {
13881394 LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: not exiting "
13891395 " from latch block\n " );
13901396 return true ;
@@ -3656,10 +3662,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
36563662
36573663 // Start with the conditional branches exiting the loop. If the branch
36583664 // condition is an instruction contained in the loop that is only used by the
3659- // branch, it is uniform.
3665+ // branch, it is uniform. Note conditions from uncountable early exits are not
3666+ // uniform.
36603667 SmallVector<BasicBlock *> Exiting;
36613668 TheLoop->getExitingBlocks (Exiting);
36623669 for (BasicBlock *E : Exiting) {
3670+ if (Legal->hasUncountableEarlyExit () && TheLoop->getLoopLatch () != E)
3671+ continue ;
36633672 auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
36643673 if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
36653674 AddToWorklistIfAllowed (Cmp);
@@ -8239,8 +8248,11 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
82398248
82408249 // If source is an exiting block, we know the exit edge is dynamically dead
82418250 // in the vector loop, and thus we don't need to restrict the mask. Avoid
8242- // adding uses of an otherwise potentially dead instruction.
8243- if (OrigLoop->isLoopExiting (Src))
8251+ // adding uses of an otherwise potentially dead instruction unless we are
8252+ // vectorizing a loop with uncountable exits. In that case, we always
8253+ // materialize the mask.
8254+ if (OrigLoop->isLoopExiting (Src) &&
8255+ Src != Legal->getUncountableEarlyExitingBlock ())
82448256 return EdgeMaskCache[Edge] = SrcMask;
82458257
82468258 VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8931,50 +8943,58 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89318943static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
89328944 Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
89338945 const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8946+ auto *MiddleVPBB = Plan.getMiddleBlock ();
89348947 SetVector<VPIRInstruction *> ExitUsersToFix;
89358948 for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
8936- BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock ();
8937- BasicBlock *ExitingBB = find_singleton<BasicBlock>(
8938- to_vector (predecessors (ExitBB)),
8939- [OrigLoop](BasicBlock *Pred, bool AllowRepeats) {
8940- return OrigLoop->contains (Pred) ? Pred : nullptr ;
8941- });
89428949 for (VPRecipeBase &R : *ExitVPBB) {
89438950 auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
89448951 if (!ExitIRI)
89458952 continue ;
89468953 auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction ());
89478954 if (!ExitPhi)
89488955 break ;
8949- Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8950- VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8951- // Exit values for inductions are computed and updated outside of VPlan
8952- // and independent of induction recipes.
8953- // TODO: Compute induction exit values in VPlan.
8954- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8955- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8956- isa<VPWidenPointerInductionRecipe>(V) ||
8957- (isa<Instruction>(IncomingValue) &&
8958- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8959- any_of (IncomingValue->users (), [&Inductions](User *U) {
8960- auto *P = dyn_cast<PHINode>(U);
8961- return P && Inductions.contains (P);
8962- })))
8963- continue ;
8964- ExitUsersToFix.insert (ExitIRI);
8965- ExitIRI->addOperand (V);
8956+ for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors ()) {
8957+ BasicBlock *ExitingBB = OrigLoop->getLoopLatch ();
8958+ if (PredVPBB != MiddleVPBB) {
8959+ SmallVector<BasicBlock *> ExitingBlocks;
8960+ OrigLoop->getExitingBlocks (ExitingBlocks);
8961+ assert (ExitingBlocks.size () == 2 && " only support 2 exiting blocks" );
8962+ ExitingBB = ExitingBB == ExitingBlocks[0 ] ? ExitingBlocks[1 ]
8963+ : ExitingBlocks[0 ];
8964+ }
8965+ Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8966+ VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8967+ // Exit values for inductions are computed and updated outside of VPlan
8968+ // and independent of induction recipes.
8969+ // TODO: Compute induction exit values in VPlan.
8970+ if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8971+ !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8972+ isa<VPWidenPointerInductionRecipe>(V) ||
8973+ (isa<Instruction>(IncomingValue) &&
8974+ OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8975+ any_of (IncomingValue->users (), [&Inductions](User *U) {
8976+ auto *P = dyn_cast<PHINode>(U);
8977+ return P && Inductions.contains (P);
8978+ }))) {
8979+ if (ExitVPBB->getSinglePredecessor () == MiddleVPBB)
8980+ continue ;
8981+ }
8982+ ExitUsersToFix.insert (ExitIRI);
8983+ ExitIRI->addOperand (V);
8984+ }
89668985 }
89678986 }
89688987 return ExitUsersToFix;
89698988}
89708989
89718990// Add exit values to \p Plan. Extracts are added for each entry in \p
8972- // ExitUsersToFix if needed and their operands are updated.
8973- static void
8991+ // ExitUsersToFix if needed and their operands are updated. Returns true if all
8992+ // exit users can be handled, otherwise return false.
8993+ static bool
89748994addUsersInExitBlocks (VPlan &Plan,
89758995 const SetVector<VPIRInstruction *> &ExitUsersToFix) {
89768996 if (ExitUsersToFix.empty ())
8977- return ;
8997+ return true ;
89788998
89798999 auto *MiddleVPBB = Plan.getMiddleBlock ();
89809000 VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
@@ -8988,14 +9008,18 @@ addUsersInExitBlocks(VPlan &Plan,
89889008 if (V->isLiveIn ())
89899009 continue ;
89909010
8991- assert (ExitIRI->getParent ()->getSinglePredecessor () == MiddleVPBB &&
8992- " Exit value not handled yet for this edge." );
9011+ // Currently only live-ins can be used by exit values from blocks not
9012+ // exiting via the vector latch through to the middle block.
9013+ if (ExitIRI->getParent ()->getSinglePredecessor () != MiddleVPBB)
9014+ return false ;
9015+
89939016 LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
89949017 VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
89959018 {V, Plan.getOrAddLiveIn (ConstantInt::get (
89969019 IntegerType::get (Ctx, 32 ), 1 ))});
89979020 ExitIRI->setOperand (0 , Ext);
89989021 }
9022+ return true ;
89999023}
90009024
90019025// / Handle users in the exit block for first order reductions in the original
@@ -9268,11 +9292,23 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92689292 " VPBasicBlock" );
92699293 RecipeBuilder.fixHeaderPhis ();
92709294
9295+ if (auto *UncountableExitingBlock =
9296+ Legal->getUncountableEarlyExitingBlock ()) {
9297+ VPlanTransforms::handleUncountableEarlyExit (
9298+ *Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9299+ }
92719300 addScalarResumePhis (RecipeBuilder, *Plan);
92729301 SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
92739302 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
92749303 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9275- addUsersInExitBlocks (*Plan, ExitUsersToFix);
9304+ if (!addUsersInExitBlocks (*Plan, ExitUsersToFix)) {
9305+ reportVectorizationFailure (
9306+ " Some exit values in loop with uncountable exit not supported yet" ,
9307+ " Some exit values in loop with uncountable exit not supported yet" ,
9308+ " UncountableEarlyExitLoopsUnsupportedExitValue" , ORE, OrigLoop);
9309+ return nullptr ;
9310+ }
9311+
92769312 // ---------------------------------------------------------------------------
92779313 // Transform initial VPlan: Apply previously taken decisions, in order, to
92789314 // bring the VPlan to its final state.
@@ -10138,12 +10174,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1013810174 return false ;
1013910175 }
1014010176
10141- if (LVL.hasUncountableEarlyExit ()) {
10177+ if (LVL.hasUncountableEarlyExit () && !EnableEarlyExitVectorization ) {
1014210178 reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10143- " early exit is not yet supported " ,
10179+ " early exit is not enabled " ,
1014410180 " Auto-vectorization of loops with uncountable "
10145- " early exit is not yet supported " ,
10146- " UncountableEarlyExitLoopsUnsupported " , ORE, L);
10181+ " early exit is not enabled " ,
10182+ " UncountableEarlyExitLoopsDisabled " , ORE, L);
1014710183 return false ;
1014810184 }
1014910185
0 commit comments