From 941bcf2f3b498b39177baaf0dc70239b22b80c27 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Thu, 6 Jul 2023 09:33:38 +0200 Subject: [PATCH 01/21] Code refactoring and test added for the count cttz pass! --- llvm/lib/Transforms/Utils/PetarCountCttz.cpp | 8 ++-- llvm/test/Transforms/Util/petar-count-cttz.ll | 48 +++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/Util/petar-count-cttz.ll diff --git a/llvm/lib/Transforms/Utils/PetarCountCttz.cpp b/llvm/lib/Transforms/Utils/PetarCountCttz.cpp index 33a7af390d32e..93ad2457cac3b 100644 --- a/llvm/lib/Transforms/Utils/PetarCountCttz.cpp +++ b/llvm/lib/Transforms/Utils/PetarCountCttz.cpp @@ -8,18 +8,18 @@ int countCttzIntrinsicAppearance=0; PreservedAnalyses PetarCountCttzPass::run(Function &F, FunctionAnalysisManager &AM) { errs() << "Function name: " << F.getName() << "\n"; - errs() << "\n-------------------------------\n"; - errs() << "Instructions within this function!\n"; + //errs() << "\n-------------------------------\n"; + //errs() << "Instructions within this function!\n"; for(BasicBlock& BB: F){ for(Instruction& I: BB){ - errs()<< I.getName() << "\n"; + //errs()<< I.getName() << "\n"; IntrinsicInst *II = dyn_cast(&I); if(II && (II->getIntrinsicID() == Intrinsic::cttz)) countCttzIntrinsicAppearance++; } } - errs() << "\n-------------------------------\n"; + //errs() << "\n-------------------------------\n"; errs() << "Total number of cttz intrinsic appearances is equal to: " << countCttzIntrinsicAppearance << ".\n"; return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/Util/petar-count-cttz.ll b/llvm/test/Transforms/Util/petar-count-cttz.ll new file mode 100644 index 0000000000000..941cffca4587a --- /dev/null +++ b/llvm/test/Transforms/Util/petar-count-cttz.ll @@ -0,0 +1,48 @@ +; RUN: opt -disable-output -passes=petarcountcttz %s 2>&1 | FileCheck %s + +; CHECK: Function name: cttz_64_eq_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 1. +declare i64 @llvm.cttz.i64(i64, i1) +define i64 @cttz_64_eq_select(i64 %v) nounwind { + + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 5, i64 %.op + ret i64 %add +} + +; CHECK-NEXT: Function name: cttz_64_ne_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 2. +define i64 @cttz_64_ne_select(i64 %v) nounwind { + + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp ne i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 %.op, i64 5 + ret i64 %add +} + +; CHECK-NEXT: Function name: cttz_32_eq_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 3. +declare i32 @llvm.cttz.i32(i32, i1) +define i32 @cttz_32_eq_select(i32 %v) nounwind { + + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 5, i32 %.op + ret i32 %add +} + +; CHECK-NEXT: Function name: cttz_32_ne_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 4. +define i32 @cttz_32_ne_select(i32 %v) nounwind { + + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp ne i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 %.op, i32 5 + ret i32 %add +} + From 5d92bb865d765bcb6d87c477b311c50707460335 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Thu, 13 Jul 2023 10:55:04 +0200 Subject: [PATCH 02/21] Added some notes at the development branch! --- petar_notes.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 petar_notes.txt diff --git a/petar_notes.txt b/petar_notes.txt new file mode 100644 index 0000000000000..e4afaf2f2e203 --- /dev/null +++ b/petar_notes.txt @@ -0,0 +1,6 @@ +LLVM project +Author: Chris Lattner +Master thesis title: Semantic detection of a CRC function for RISCV in LLVM + +What have I learned so far? + From 86be97fe8972fd7c6200b28d9531911e5a5404a4 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Fri, 21 Jul 2023 11:15:14 +0200 Subject: [PATCH 03/21] Added CRC IR implementation to my notes! --- petar_notes.txt | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/petar_notes.txt b/petar_notes.txt index e4afaf2f2e203..eef73843c632b 100644 --- a/petar_notes.txt +++ b/petar_notes.txt @@ -4,3 +4,51 @@ Master thesis title: Semantic detection of a CRC function for RISCV in LLVM What have I learned so far? + + + + + +---------------------------------------------------------------------------------------- +Unoptimized form of CRC algorithm implementation: + +ee_u16 crcu8(ee_u8 data, ee_u16 crc) { + ee_u8 i = 0, x16 = 0, carry = 0; + for (i = 0; i < 8; i++) { + x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); + data >>= 1; + if (x16 == 1) { + crc ^= 0x4002; + carry = 1; + } else { + carry = 0; + } + + crc >>= 1; + + if (carry) + crc |= 0x8000; + else + crc &= 0x7fff; + } + + return crc; +} + +Optimized implementation of a CRC algorithm: + +ee_u16 crcu8(ee_u8 data, ee_u16 _crc) { + ee_u8 i = 0, x16 = 0, carry = 0; + long crc = _crc; + crc ^= data; + + for (i = 0; i < 8; i++) { + x16 = (ee_u8)crc & 1; + data >>= 1; + crc >>= 1; + crc ^= (x16 & 1) ? 0xa001 : 0; // Conditional XOR + } + + return crc; +} + From a13d576b15f9615208ee1cc8fb1d831e87747682 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Thu, 3 Aug 2023 14:34:24 +0200 Subject: [PATCH 04/21] Adding llvm.crc instrinsic started! --- llvm/include/llvm/IR/Intrinsics.td | 1 + llvm/include/llvm/IR/IntrinsicsRISCV.td | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 86f81124b8464..870acfc22d8a9 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1293,6 +1293,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] in { def int_ctlz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_cttz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + def int_crc : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; } //===------------------------ Debugger Intrinsics -------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 243cba5c62bb2..70c8dfed97317 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -106,6 +106,12 @@ let TargetPrefix = "riscv" in { // Zbkx def int_riscv_xperm4 : BitManipGPRGPRIntrinsics; def int_riscv_xperm8 : BitManipGPRGPRIntrinsics; + + //CRC - we will check this later! + //def int_riscv_crc: BitManipGPRIntrinsics; + //def int_ctlz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + //def int_cttz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// From 033d6cee0a7000d89a8311939631d8c116191d96 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Sat, 5 Aug 2023 15:14:59 +0200 Subject: [PATCH 05/21] Just to try multiple options! --- .../AggressiveInstCombine.cpp | 168 +++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index b2925b1a9f2f3..728edcd6504fa 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -267,6 +267,70 @@ static bool foldAnyOrAllBitsSet(Instruction &I) { ++NumAnyOrAllBitsSet; return true; } +//--------------------------------------------------------------------------------------------------------------------------------------- +// Petar's code! +//unsigned reverse(unsigned x) { +// x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555); +// x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333); +// x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F); +// x = (x << 24) | ((x & 0xFF00) << 8) | +// ((x >> 8) & 0xFF00) | (x >> 24); +// return x; +//} + +static bool tryToRecognizeReverseFunction(Instruction &I){ + if(I.getOpcode!=Instruction::Or) + return false; + Type *Ty = I.getType(); + if (!Ty->isIntOrIntVectorTy()) + return false; + + APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55)); + APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33)); + APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F)); + APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01)); + APInt MaskShift = APInt(Len, Len - 8); + + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *MulOp0; + + // I need to change this part! + // Matching "(i * 0x01010101...) >> 24". + + // (x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24)); + if ((match(Op0, m_Shl(m_Value(MulOp0), m_SpecificInt(Mask01)))) && + match(Op1, m_SpecificInt(MaskShift))) { + Value *ShiftOp0; + // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". + if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), + m_Deferred(ShiftOp0)), + m_SpecificInt(Mask0F)))) { + Value *AndOp0; + // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)". + if (match(ShiftOp0, + m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)), + m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)), + m_SpecificInt(Mask33))))) { + Value *Root, *SubOp1; + // Matching "i - ((i >> 1) & 0x55555555...)". + if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) && + match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)), + m_SpecificInt(Mask55)))) { + LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n"); + IRBuilder<> Builder(&I); + Function *Func = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::ctpop, I.getType()); + I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); + ++NumPopCountRecognized; + return true; + } + } + } +} + +//--------------------------------------------------------------------------------------------------------------------------------------- + // Try to recognize below function as popcount intrinsic. // This is the "best" algorithm from @@ -442,6 +506,108 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, return false; } +//--------------------------------------------------------------------------------------------------------------------------------------- +// Petar's code! + +//LLVM IR code for the naive crc algorithm implementation! +/* +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @crc32a(i8* %0, i32 %1) #0 { + %3 = alloca i8*, align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + store i8* %0, i8** %3, align 8 + store i32 %1, i32* %4, align 4 + store i32 0, i32* %5, align 4 + store i32 -1, i32* %8, align 4 + br label %9 + +9: ; preds = %43, %2 + %10 = load i32, i32* %5, align 4 + %11 = load i32, i32* %4, align 4 + %12 = icmp ult i32 %10, %11 + br i1 %12, label %13, label %46 + +13: ; preds = %9 + %14 = load i8*, i8** %3, align 8 + %15 = load i32, i32* %5, align 4 + %16 = sext i32 %15 to i64 + %17 = getelementptr inbounds i8, i8* %14, i64 %16 + %18 = load i8, i8* %17, align 1 + %19 = zext i8 %18 to i32 + store i32 %19, i32* %7, align 4 + %20 = load i32, i32* %7, align 4 + %21 = call i32 @reverse(i32 %20) + store i32 %21, i32* %7, align 4 + store i32 0, i32* %6, align 4 + br label %22 + +22: ; preds = %40, %13 + %23 = load i32, i32* %6, align 4 + %24 = icmp sle i32 %23, 7 + br i1 %24, label %25, label %43 + +25: ; preds = %22 + %26 = load i32, i32* %8, align 4 + %27 = load i32, i32* %7, align 4 + %28 = xor i32 %26, %27 + %29 = icmp slt i32 %28, 0 + br i1 %29, label %30, label %34 + +30: ; preds = %25 + %31 = load i32, i32* %8, align 4 + %32 = shl i32 %31, 1 + %33 = xor i32 %32, 79764919 + store i32 %33, i32* %8, align 4 + br label %37 + +34: ; preds = %25 + %35 = load i32, i32* %8, align 4 + %36 = shl i32 %35, 1 + store i32 %36, i32* %8, align 4 + br label %37 + +37: ; preds = %34, %30 + %38 = load i32, i32* %7, align 4 + %39 = shl i32 %38, 1 + store i32 %39, i32* %7, align 4 + br label %40 + +40: ; preds = %37 + %41 = load i32, i32* %6, align 4 + %42 = add nsw i32 %41, 1 + store i32 %42, i32* %6, align 4 + br label %22 + +43: ; preds = %22 + %44 = load i32, i32* %5, align 4 + %45 = add nsw i32 %44, 1 + store i32 %45, i32* %5, align 4 + br label %9 + +46: ; preds = %9 + %47 = load i32, i32* %8, align 4 + %48 = xor i32 %47, -1 + %49 = call i32 @reverse(i32 %48) + ret i32 %49 +} +*/ +static bool tryToRecognizeCrc(Instruction &I){ + LoadInst *LI = dyn_cast(&I); + if(!LI) + return false; + + Type *AccessType = LI->getType(); + if(!AccessType->isIntegerTy()) + return false; + //llvm::PatternMatch::m_Cmp +} + +//--------------------------------------------------------------------------------------------------------------------------------------- + // Check if this array of constants represents a cttz table. // Iterate over the elements from \p Table by trying to find/match all // the numbers from 0 to \p InputBits that should represent cttz results. @@ -475,7 +641,7 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, // int f(unsigned x) { // static const char table[32] = // {0, 1, 28, 2, 29, 14, 24, 3, 30, -// 22, 20, 15, 25, 17, 4, 8, 31, 27, +// 22, 20, 15, 25, 17, 4, 8, 31, 27, // 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; // return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; // } From 8683a53619c724fa257f222b322c98ecd0eb5a9d Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Wed, 9 Aug 2023 17:22:18 +0200 Subject: [PATCH 06/21] Finishing reverse function pattern matcher! --- .../AggressiveInstCombine.cpp | 75 ++++++++++++------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 728edcd6504fa..6adfdd0632a82 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -270,14 +270,19 @@ static bool foldAnyOrAllBitsSet(Instruction &I) { //--------------------------------------------------------------------------------------------------------------------------------------- // Petar's code! //unsigned reverse(unsigned x) { -// x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555); -// x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333); -// x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F); -// x = (x << 24) | ((x & 0xFF00) << 8) | -// ((x >> 8) & 0xFF00) | (x >> 24); +// x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555); ? +// x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333); . +// x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F); . +// x = (x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24); . // return x; //} +// int popcount(unsigned int i) { +// i = i - ((i >> 1) & 0x55555555); +// i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +// i = ((i + (i >> 4)) & 0x0F0F0F0F); +// return (i * 0x01010101) >> 24; + static bool tryToRecognizeReverseFunction(Instruction &I){ if(I.getOpcode!=Instruction::Or) return false; @@ -294,30 +299,41 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); Value *MulOp0; - + //------------ + //Petar's insertion! Aditional variables! + Value *value1; + // I need to change this part! - // Matching "(i * 0x01010101...) >> 24". - - // (x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24)); - if ((match(Op0, m_Shl(m_Value(MulOp0), m_SpecificInt(Mask01)))) && - match(Op1, m_SpecificInt(MaskShift))) { + // Matching "(i * 0x01010101...) >> 24" <- popCount function instruction! + // Matching "(x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24))" <- reverse function instruction! (Petar) + if ((match(MulOp0, m_Or(m_Shl(m_Value(MulOp0), m_SpecificInt(MaskShift)), + m_Or(m_Shl(m_And(m_Deferred(MulOp0), m_SpecificInt(65280)), m_SpecificInt(8)), + m_Or(m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(8)), m_SpecificInt(65280)), m_LShr(m_Deferred(MulOp0), m_SpecificInt(24)))))))) { + + // I hope we recognised the previous instruction! (Petar) + Value *ShiftOp0; - // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". - if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), - m_Deferred(ShiftOp0)), - m_SpecificInt(Mask0F)))) { - Value *AndOp0; - // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)". - if (match(ShiftOp0, - m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)), - m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)), - m_SpecificInt(Mask33))))) { - Value *Root, *SubOp1; - // Matching "i - ((i >> 1) & 0x55555555...)". - if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) && - match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)), - m_SpecificInt(Mask55)))) { - LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n"); + // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)" <- popCount function instruction! + // matching ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F) <- reverse function instruction! + if(match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask0F)), m_SpecificInt(4)), + m_and(m_LShr(m_Deferred(MulOp0), m_SpecificInt(4)), m_SpecificInt(Mask0F))))){ + + // I hope we recognised the previous instruction! (Petar) + //Value *AndOp0; + + // Matching ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333) <- reverse function instruction! (Petar) + if (match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask33)), m_SpecificInt(2)), + m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(2)), m_SpecificInt(Mask33))))) { + + // I hope we recognised the previous instruction! (Petar) + //Value *Root, *SubOp1; + + // Matching "((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555))" <- reverse function instruction! (Petar) + if (match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask55)), m_SpecificInt(1)), + m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(1)), m_SpecificInt(Mask55))))) { + + // I hope we recognised the previous instruction! (Petar) + LLVM_DEBUG(dbgs() << "Recognized reverse function!\n"); IRBuilder<> Builder(&I); Function *Func = Intrinsic::getDeclaration( I.getModule(), Intrinsic::ctpop, I.getType()); @@ -326,8 +342,9 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ return true; } } - } -} + } + } +} //--------------------------------------------------------------------------------------------------------------------------------------- From f4814b6bd16adff2d89a36e3594aa9a9b474aea6 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Wed, 9 Aug 2023 17:24:51 +0200 Subject: [PATCH 07/21] Refactoring tryToRecognizeReverseFunction function! --- .../AggressiveInstCombine/AggressiveInstCombine.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 6adfdd0632a82..f299876a35199 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -301,19 +301,17 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ Value *MulOp0; //------------ //Petar's insertion! Aditional variables! - Value *value1; + //Value *value1; // I need to change this part! - // Matching "(i * 0x01010101...) >> 24" <- popCount function instruction! // Matching "(x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24))" <- reverse function instruction! (Petar) if ((match(MulOp0, m_Or(m_Shl(m_Value(MulOp0), m_SpecificInt(MaskShift)), m_Or(m_Shl(m_And(m_Deferred(MulOp0), m_SpecificInt(65280)), m_SpecificInt(8)), m_Or(m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(8)), m_SpecificInt(65280)), m_LShr(m_Deferred(MulOp0), m_SpecificInt(24)))))))) { // I hope we recognised the previous instruction! (Petar) + //Value *ShiftOp0; - Value *ShiftOp0; - // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)" <- popCount function instruction! // matching ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F) <- reverse function instruction! if(match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask0F)), m_SpecificInt(4)), m_and(m_LShr(m_Deferred(MulOp0), m_SpecificInt(4)), m_SpecificInt(Mask0F))))){ From 87c43f51fffc3fbd723e8fef6b9ef974900b3c6e Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Wed, 9 Aug 2023 17:31:23 +0200 Subject: [PATCH 08/21] Added a couple of comments on reverse function pattern matcher! --- .../AggressiveInstCombine/AggressiveInstCombine.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index f299876a35199..843f024af0f87 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -43,6 +43,11 @@ STATISTIC(NumGuardedFunnelShifts, "Number of guarded funnel shifts transformed into funnel shifts"); STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized"); +/* +Petar's potential insertion! +STATISTIC(NumReverseRecognized, "Number of reverse function recognized"); +*/ + static cl::opt MaxInstrsToScan( "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine.")); @@ -333,8 +338,10 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ // I hope we recognised the previous instruction! (Petar) LLVM_DEBUG(dbgs() << "Recognized reverse function!\n"); IRBuilder<> Builder(&I); + /* Function *Func = Intrinsic::getDeclaration( I.getModule(), Intrinsic::ctpop, I.getType()); + */ I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); ++NumPopCountRecognized; return true; From 507573ae8bc9a173e3c80a0ed10d0603a72910b1 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Wed, 9 Aug 2023 17:39:13 +0200 Subject: [PATCH 09/21] Code refactoring! --- .../AggressiveInstCombine/AggressiveInstCombine.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 843f024af0f87..dcc6fdf05bf6f 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -338,12 +338,15 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ // I hope we recognised the previous instruction! (Petar) LLVM_DEBUG(dbgs() << "Recognized reverse function!\n"); IRBuilder<> Builder(&I); + /* Function *Func = Intrinsic::getDeclaration( I.getModule(), Intrinsic::ctpop, I.getType()); */ - I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); - ++NumPopCountRecognized; + + //I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); + //++NumReverseRecognized; + return true; } } From 204e00268269f86fcfec58381382902f7befcafb Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Fri, 11 Aug 2023 13:28:44 +0200 Subject: [PATCH 10/21] Refactoring HelloWorld pass just to remind myself of some things and added pattern matcher for table-based crc32 algortihm! --- .../AggressiveInstCombine.cpp | 213 ++++++++++++++++-- llvm/lib/Transforms/Utils/PetarHelloWorld.cpp | 4 +- 2 files changed, 201 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index dcc6fdf05bf6f..07f2baf993240 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -289,11 +289,16 @@ static bool foldAnyOrAllBitsSet(Instruction &I) { // return (i * 0x01010101) >> 24; static bool tryToRecognizeReverseFunction(Instruction &I){ - if(I.getOpcode!=Instruction::Or) + if(I.getOpcode()!=Instruction::Or) return false; Type *Ty = I.getType(); if (!Ty->isIntOrIntVectorTy()) return false; + + unsigned Len = Ty->getScalarSizeInBits(); + // FIXME: fix Len == 8 and other irregular type lengths. + if (!(Len <= 128 && Len > 8 && Len % 8 == 0)) + return false; APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55)); APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33)); @@ -304,7 +309,7 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); Value *MulOp0; - //------------ + //Petar's insertion! Aditional variables! //Value *value1; @@ -319,7 +324,7 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ // matching ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F) <- reverse function instruction! if(match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask0F)), m_SpecificInt(4)), - m_and(m_LShr(m_Deferred(MulOp0), m_SpecificInt(4)), m_SpecificInt(Mask0F))))){ + m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(4)), m_SpecificInt(Mask0F))))){ // I hope we recognised the previous instruction! (Petar) //Value *AndOp0; @@ -336,13 +341,11 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(1)), m_SpecificInt(Mask55))))) { // I hope we recognised the previous instruction! (Petar) - LLVM_DEBUG(dbgs() << "Recognized reverse function!\n"); - IRBuilder<> Builder(&I); - /* - Function *Func = Intrinsic::getDeclaration( - I.getModule(), Intrinsic::ctpop, I.getType()); - */ + //LLVM_DEBUG(dbgs() << "Recognized reverse function!\n"); + //IRBuilder<> Builder(&I); + + //Function *Func = Intrinsic::getDeclaration(I.getModule(), Intrinsic::ctpop, I.getType()); //I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); //++NumReverseRecognized; @@ -621,14 +624,194 @@ define dso_local i32 @crc32a(i8* %0, i32 %1) #0 { } */ static bool tryToRecognizeCrc(Instruction &I){ - LoadInst *LI = dyn_cast(&I); - if(!LI) - return false; + + //To do! - Type *AccessType = LI->getType(); - if(!AccessType->isIntegerTy()) + return false; +} + +// Check if this array of constants represents a crc32 table. +static bool isCRC32Table(const ConstantDataArray &Table){ + unsigned Length=Table.getNumElements(); + if(Length!=256) + return false; + + for(int i=0;i4294967295) return false; - //llvm::PatternMatch::m_Cmp + } + + return true; +} + +// Try to recognize table-based crc32 algorithm implementation. +/* +define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + %7 = alloca i32*, align 8 + store i32 %0, i32* %4, align 4 + store i8* %1, i8** %5, align 8 + store i64 %2, i64* %6, align 8 + %8 = load i8*, i8** %5, align 8 + %9 = bitcast i8* %8 to i32* + store i32* %9, i32** %7, align 8 + br label %10 + +10: ; preds = %14, %3 + %11 = load i64, i64* %6, align 8 + %12 = add i64 %11, -1 + store i64 %12, i64* %6, align 8 + %13 = icmp ne i64 %11, 0 + br i1 %13, label %14, label %27 + +14: ; preds = %10 + %15 = load i32, i32* %4, align 4 + %16 = load i32*, i32** %7, align 8 + %17 = getelementptr inbounds i32, i32* %16, i32 1 + store i32* %17, i32** %7, align 8 + %18 = load i32, i32* %16, align 4 + %19 = xor i32 %15, %18 + %20 = and i32 %19, 255 + %21 = zext i32 %20 to i64 + %22 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32Table, i64 0, i64 %21 + %23 = load i32, i32* %22, align 4 + %24 = load i32, i32* %4, align 4 + %25 = lshr i32 %24, 8 + %26 = xor i32 %23, %25 + store i32 %26, i32* %4, align 4 + br label %10 + +27: ; preds = %10 + %28 = load i32, i32* %4, align 4 + ret i32 %28 +} +*/ +static bool tryToRecognizeTableBasedCRC32(Function &F){ + int step=1; + + for(BasicBlock& BB: F){ + if(step==1){ + //auto it=BB.getInstList().begin(); + int count=1; + for(Instruction& I: BB){ + if(count<=4 && !dyn_castI) + return false; + + if(count>=5 && count<=7 && !dyn_castI) + return false; + + if(count==8 && !dyn_castI) + return false; + + if(count==9 && !dyn_castI) + return false; + + if(count==10 && !dyn_castI) + return false; + + if(count==11 && !dyn_castI) + return false; + count++; + } + } else if(step==2){ + /* + if(BB.getName() != "while.cond") + return name; + */ + int count=1; + for(Instruction& I: BB){ + Value *help1; + Value *help2; + if(count==1 && !dyn_castI) + return false; + + if(count==2 && !match(help1, m_Add(m_Value(help2), m_SpecificInt(-1)))) + return false; + + if(count==3 && !dyn_castI) + return false; + + if(count==4 && !dyn_castI) + return false; + + if(count==5 && !dyn_castI) + return false; + + count++; + } + } else if(step==3){ + int count=1; + /* + if(BB.getName() != "while.body") + return name; + */ + + for(Instruction& I: BB){ + Value *help1; + Value *help2; + Value *help3; + + if(count<=2 && !dyn_castI) + return false; + + if(count==3 && !dyn_castI) + return false; + + if(count==4 && !dyn_castI) + return false; + + if(count==5 && !dyn_castI) + return false; + + if(count==6 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) + return false; + + if(count==7 && !match(help1, m_And(m_Value(help2), m_Value(help3)))) + return false; + + if(count==8 && !dyn_castI) + return false; + + if(count==9 && !dyn_castI) + return false; + + if(count>=10 && count<=11 && !dyn_castI) + return false; + + if(count==12 && !match(help1, m_LShr(m_Value(help2), m_Value(help3)))) + return false; + + if(count==13 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) + return false; + + if(count==14 && !dyn_castI) + return false; + + if(count==15 && !dyn_castI) + return false; + + count++; + } + } else { + int count=1; + + if(count==1 && !dyn_castI) + return false; + + if(count==2 && !dyn_castI) + return false; + + count++; + } + step++; + } + + errs() << "Table-based crc32 algortihm is recognized!" << "\n"; + + return true; } //--------------------------------------------------------------------------------------------------------------------------------------- diff --git a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp index 6cb44222cdf25..a7837166f5e8a 100644 --- a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp +++ b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp @@ -12,8 +12,10 @@ PreservedAnalyses PetarHelloWorldPass::run(Function &F, FunctionAnalysisManager errs() << "\n-------------------------------\n"; errs() << "Instructions within this function!\n"; for(BasicBlock& BB: F){ + errs() << "Here?!\n"; + errs() << "Basic block: " << BB.getName() << "...\n"; for(Instruction& I: BB){ - errs()<< I.getName() << "\n"; + errs()<< "Instruction: " << I.getName() << "\n"; if(CallBase::classof(&I)){ CallBase* callBase = (CallBase*) &I; From 413dfbdf95bd029c2beb1de439d921bb0b7a71d0 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Fri, 11 Aug 2023 13:31:10 +0200 Subject: [PATCH 11/21] Pattern matcher for table-based algorithm is added! --- .../Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 07f2baf993240..b24c154eb0e3c 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -690,6 +690,7 @@ define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { } */ static bool tryToRecognizeTableBasedCRC32(Function &F){ + //Brute force pattern matching! int step=1; for(BasicBlock& BB: F){ From 4bc052e571962af652056f624dad58acf1d0b70b Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Fri, 11 Aug 2023 13:57:32 +0200 Subject: [PATCH 12/21] Correct some mistakes at crc32 pattern matcher! --- .../AggressiveInstCombine/AggressiveInstCombine.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index b24c154eb0e3c..8a0602954e349 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -720,7 +720,7 @@ static bool tryToRecognizeTableBasedCRC32(Function &F){ } else if(step==2){ /* if(BB.getName() != "while.cond") - return name; + return false; */ int count=1; for(Instruction& I: BB){ @@ -747,7 +747,7 @@ static bool tryToRecognizeTableBasedCRC32(Function &F){ int count=1; /* if(BB.getName() != "while.body") - return name; + return false; */ for(Instruction& I: BB){ From fe84df14fc02df7a06bc08ca9b75521f090d4e9e Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Fri, 11 Aug 2023 15:00:57 +0200 Subject: [PATCH 13/21] Refactoring crc32 pattern matcher! --- .../AggressiveInstCombine.cpp | 202 ++++++++++++------ 1 file changed, 133 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 8a0602954e349..10c5c67a1f64f 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -305,9 +305,9 @@ static bool tryToRecognizeReverseFunction(Instruction &I){ APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F)); APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01)); APInt MaskShift = APInt(Len, Len - 8); - - Value *Op0 = I.getOperand(0); - Value *Op1 = I.getOperand(1); + + //Value *Op0 = I.getOperand(0); + //Value *Op1 = I.getOperand(1); Value *MulOp0; //Petar's insertion! Aditional variables! @@ -698,23 +698,41 @@ static bool tryToRecognizeTableBasedCRC32(Function &F){ //auto it=BB.getInstList().begin(); int count=1; for(Instruction& I: BB){ - if(count<=4 && !dyn_castI) - return false; + if(count<=4 ){ + AllocaInst *II= dyn_cast(&I); + if(!II) + return false; + } - if(count>=5 && count<=7 && !dyn_castI) - return false; - - if(count==8 && !dyn_castI) - return false; - - if(count==9 && !dyn_castI) - return false; - - if(count==10 && !dyn_castI) - return false; - - if(count==11 && !dyn_castI) - return false; + if(count>=5 && count<=7){ + StoreInst* II=dyn_cast(&I); + if(!II) + return false; + } + + if(count==8){ + LoadInst* II=dyn_cast(&I); + if(!II) + return false; + } + + if(count==9){ + BitCastInst *II=dyn_cast(&I); + if(!II) + return false; + } + + if(count==10){ + StoreInst *II=dyn_cast(&I); + if(!II) + return false; + } + + if(count==11){ + BranchInst *II=dyn_cast(&I); + if(!II) + return false; + } count++; } } else if(step==2){ @@ -724,24 +742,36 @@ static bool tryToRecognizeTableBasedCRC32(Function &F){ */ int count=1; for(Instruction& I: BB){ - Value *help1; - Value *help2; - if(count==1 && !dyn_castI) - return false; + Value *help1; + Value *help2; + if(count==1){ + LoadInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==2 && !match(help1, m_Add(m_Value(help2), m_SpecificInt(-1)))) - return false; + if(count==2 && !match(help1, m_Add(m_Value(help2), m_SpecificInt(-1)))) + return false; - if(count==3 && !dyn_castI) - return false; + if(count==3){ + StoreInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==4 && !dyn_castI) - return false; + if(count==4){ + ICmpInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==5 && !dyn_castI) - return false; + if(count==5){ + BranchInst *II=dyn_cast(&I); + if(!II) + return false; + } - count++; + count++; } } else if(step==3){ int count=1; @@ -751,61 +781,95 @@ static bool tryToRecognizeTableBasedCRC32(Function &F){ */ for(Instruction& I: BB){ - Value *help1; - Value *help2; - Value *help3; + Value *help1; + Value *help2; + Value *help3; - if(count<=2 && !dyn_castI) - return false; + if(count<=2){ + LoadInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==3 && !dyn_castI) - return false; + if(count==3){ + GetElementPtrInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==4 && !dyn_castI) - return false; + if(count==4){ + StoreInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==5 && !dyn_castI) - return false; + if(count==5){ + LoadInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==6 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) - return false; + if(count==6 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) + return false; - if(count==7 && !match(help1, m_And(m_Value(help2), m_Value(help3)))) - return false; + if(count==7 && !match(help1, m_And(m_Value(help2), m_Value(help3)))) + return false; - if(count==8 && !dyn_castI) - return false; + if(count==8){ + ZExtInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==9 && !dyn_castI) - return false; + if(count==9){ + GetElementPtrInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count>=10 && count<=11 && !dyn_castI) - return false; + if(count>=10 && count<=11){ + LoadInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==12 && !match(help1, m_LShr(m_Value(help2), m_Value(help3)))) - return false; + if(count==12 && !match(help1, m_LShr(m_Value(help2), m_Value(help3)))) + return false; - if(count==13 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) - return false; + if(count==13 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) + return false; - if(count==14 && !dyn_castI) - return false; + if(count==14){ + StoreInst *II=dyn_cast(&I); + if(!II) + return false; + } - if(count==15 && !dyn_castI) - return false; + if(count==15){ + BranchInst *II=dyn_cast(&I); + if(!II) + return false; + } - count++; + count++; } } else { int count=1; - if(count==1 && !dyn_castI) - return false; - - if(count==2 && !dyn_castI) - return false; - - count++; + for(Instruction& I: BB){ + if(count==1){ + LoadInst *II=dyn_cast(&I); + if(!II) + return false; + } + + if(count==2){ + ReturnInst *II=dyn_cast(&I); + if(!II) + return false; + } + count++; + } } step++; } From 4ac15368ee3042ea65889cd605a1b141503199db Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Mon, 14 Aug 2023 14:24:07 +0200 Subject: [PATCH 14/21] Added table-based crc32 pattern matcher (better than previous version)! --- .../AggressiveInstCombine.cpp | 142 +++++++++++++++++- llvm/lib/Transforms/Utils/PetarHelloWorld.cpp | 23 +-- 2 files changed, 154 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 10c5c67a1f64f..843a185cbbe01 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -689,7 +689,147 @@ define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { ret i32 %28 } */ -static bool tryToRecognizeTableBasedCRC32(Function &F){ +static bool tryToRecognizeTableBasedCRC32(Instruction &I){ + LoadInst *LI = dyn_cast(&I); + if (!LI) + return false; + + Type *AccessType = LI->getType(); + if (!AccessType->isIntegerTy()) + return false; + + BranchInst *BI=dyn_cast(LI->getPrevNode()); + if(!BI) + return false; + + StoreInst *SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + Instruction *II=dyn_cast(SI->getPrevNode()); + Value *help1; + Value *help2; + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + return false; + + II=dyn_cast(II->getPrevNode()); + + if(!match(II, m_LShr(m_Value(help1), m_SpecificInt(8)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + LI=dyn_cast(LI->getPrevNode()); + if(!LI) + return false; + + GetElementPtrInst *GEPI=dyn_cast(LI->getPrevNode()); + if(!GEPI) + return false; + + ZExtInst *ZI=dyn_cast(GEPI->getPrevNode()); + if(!ZI) + return false; + + II=dyn_cast(ZI->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_Value(help2)))) + return false; + + II=dyn_cast(II->getPrevNode()); + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + GEPI=dyn_cast(SI->getPrevNode()); + if(!GEPI) + return false; + + LI=dyn_cast(GEPI->getPrevNode()); + if(!LI) + return false; + + LI=dyn_cast(LI->getPrevNode()); + if(!LI) + return false; + + BI=dyn_cast(LI->getPrevNode()); + if(!BI) + return false; + + ICmpInst *ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) + return false; + + SI=dyn_cast(ICMPI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BI=dyn_cast(LI->getPrevNode()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + BitCastInst *BCI=dyn_cast(SI->getPrevNode()); + if(!BCI) + return false; + + LI=dyn_cast(BCI->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + AllocaInst *AI=dyn_cast(SI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + errs() << "Table-based CRC32 algorithm is finally revognized!" << "\n"; + + return true; +} +static bool tryToRecognizeTableBasedCRC32BruteForce(Function &F){ //Brute force pattern matching! int step=1; diff --git a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp index a7837166f5e8a..703e274dc2d4e 100644 --- a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp +++ b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp @@ -15,16 +15,19 @@ PreservedAnalyses PetarHelloWorldPass::run(Function &F, FunctionAnalysisManager errs() << "Here?!\n"; errs() << "Basic block: " << BB.getName() << "...\n"; for(Instruction& I: BB){ - errs()<< "Instruction: " << I.getName() << "\n"; - - if(CallBase::classof(&I)){ - CallBase* callBase = (CallBase*) &I; - StringRef functionName = callBase->getCalledFunction()->getName(); - if(llvmcttzRegex.match(functionName)){ - errs() << "Look what we have found -> " << functionName << "\n"; - count_cttz_intrinsic_appearance++; - } - } + //errs()<< "Instruction: " << I.getName() << "\n"; + errs() << "Instruction value name: " << I.getValueName() << "\n"; + errs() << "Instruction type: " << I.getType() << "\n"; + //errs() << CallBase::classof(&I) << "\n"; + if(CallBase::classof(&I)){ + CallBase* callBase = (CallBase*) &I; + StringRef functionName = callBase->getCalledFunction()->getName(); + //errs() << "Instruction name: " << functionName << "\n"; + if(llvmcttzRegex.match(functionName)){ + errs() << "Look what we have found -> " << functionName << "\n"; + count_cttz_intrinsic_appearance++; + } + } } } errs() << "\n-------------------------------\n"; From e312265ab66cd5f78205efc72b8939bbc8928a3e Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Mon, 14 Aug 2023 15:57:38 +0200 Subject: [PATCH 15/21] Code refactoring! --- .../AggressiveInstCombine.cpp | 4 +++ llvm/lib/Transforms/Utils/PetarHelloWorld.cpp | 26 ++++++------------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 843a185cbbe01..627b098915ce3 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1516,6 +1516,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); + bool flag=tryToRecognizeTableBasedCRC32(I); + MadeChange |= flag; + if(flag) + errs() << "Function we have created seems to work properly!\n"; MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); diff --git a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp index b46c63a68e980..098bd52d86bb3 100644 --- a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp +++ b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp @@ -1,42 +1,32 @@ -#include "llvm/Transforms/Utils/PetarCountCttz.h" +#include "llvm/Transforms/Utils/PetarHelloWorld.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Regex.h" using namespace llvm; -int countCttzIntrinsicAppearance=0; +int count_Cttz_Intrinsic_Appearance=0; -PreservedAnalyses PetarCountCttzPass::run(Function &F, FunctionAnalysisManager &AM) { +PreservedAnalyses PetarHelloWorldPass::run(Function &F, FunctionAnalysisManager &AM) { + + Regex llvmcttzRegex("^llvm.cttz.*"); errs() << "Function name: " << F.getName() << "\n"; - //errs() << "\n-------------------------------\n"; - //errs() << "Instructions within this function!\n"; for(BasicBlock& BB: F){ - errs() << "Here?!\n"; - errs() << "Basic block: " << BB.getName() << "...\n"; for(Instruction& I: BB){ -<<<<<<< HEAD //errs()<< "Instruction: " << I.getName() << "\n"; - errs() << "Instruction value name: " << I.getValueName() << "\n"; - errs() << "Instruction type: " << I.getType() << "\n"; - //errs() << CallBase::classof(&I) << "\n"; if(CallBase::classof(&I)){ CallBase* callBase = (CallBase*) &I; StringRef functionName = callBase->getCalledFunction()->getName(); //errs() << "Instruction name: " << functionName << "\n"; if(llvmcttzRegex.match(functionName)){ errs() << "Look what we have found -> " << functionName << "\n"; - count_cttz_intrinsic_appearance++; + count_Cttz_Intrinsic_Appearance++; } } -======= - errs()<< "Instruction: " << I.getName() << "\n"; - if(II && (II->getIntrinsicID() == Intrinsic::cttz)) - countCttzIntrinsicAppearance++; ->>>>>>> 086aa55cc9827a9e7d8bafaf9a3311eb3700250c } } //errs() << "\n-------------------------------\n"; - errs() << "Total number of cttz intrinsic appearances is equal to: " << countCttzIntrinsicAppearance << ".\n"; + errs() << "Total number of cttz intrinsic appearances is equal to: " << count_Cttz_Intrinsic_Appearance << ".\n"; return PreservedAnalyses::all(); } \ No newline at end of file From 813793d8f4b68135f9f34a7d4aa632575902a10c Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Mon, 14 Aug 2023 16:32:48 +0200 Subject: [PATCH 16/21] Code refactoring! --- .../Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 627b098915ce3..1a4e218c762dc 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1520,6 +1520,8 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= flag; if(flag) errs() << "Function we have created seems to work properly!\n"; + else + errs() << "Table-based crc32 algorithm wasn't recognized!\n"; MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); From 3c1aed850c583ffee8d83601a11c1fbf020c26b5 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Wed, 16 Aug 2023 13:46:50 +0200 Subject: [PATCH 17/21] Table-based crc32 pattern matcher has been corrected and tested! --- .../AggressiveInstCombine.cpp | 68 ++++++++++++++----- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 1a4e218c762dc..5efe075edd5c2 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -690,33 +690,55 @@ define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { } */ static bool tryToRecognizeTableBasedCRC32(Instruction &I){ - LoadInst *LI = dyn_cast(&I); - if (!LI) + ReturnInst *RI=dyn_cast(&I); + if(!RI) return false; - Type *AccessType = LI->getType(); - if (!AccessType->isIntegerTy()) + LoadInst *LI = dyn_cast(RI->getPrevNode()); + if (!LI) return false; - - BranchInst *BI=dyn_cast(LI->getPrevNode()); + + //return true; + //errs() << "Why here, sir?!" << "\n"; + //errs() << "Why here, sir2?!" << "\n"; + //errs() << "Why here, sir3?!" << "\n"; + BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); + + /* + Instruction* III=dyn_cast(&BB->back()); + //errs() << "Why here, miss?!" << "\n"; + if(!III){ + errs() << "Why here?!" << "\n"; + return false; + } + */ + + BranchInst *BI=dyn_cast(&BB->back()); + //errs() << "Why here, miss?!" << "\n"; if(!BI) return false; + + //StoreInst *SI=dyn_cast(III->getPrevNode()); StoreInst *SI=dyn_cast(BI->getPrevNode()); if(!SI) return false; + + //------------------------------------------------------------------------------------------------------------------------------------------------- + //return true; + Instruction *II=dyn_cast(SI->getPrevNode()); Value *help1; Value *help2; if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) return false; - + II=dyn_cast(II->getPrevNode()); if(!match(II, m_LShr(m_Value(help1), m_SpecificInt(8)))) - return false; - + return false; + LI=dyn_cast(II->getPrevNode()); if(!LI) return false; @@ -760,10 +782,16 @@ static bool tryToRecognizeTableBasedCRC32(Instruction &I){ LI=dyn_cast(LI->getPrevNode()); if(!LI) return false; + + + //------------------------------------------------------------------------------------------------------------------------------------------------- + //return true; + + BB=dyn_cast(LI->getParent()->getPrevNode()); - BI=dyn_cast(LI->getPrevNode()); + BI=dyn_cast(&BB->back()); if(!BI) - return false; + return false; ICmpInst *ICMPI=dyn_cast(BI->getPrevNode()); if(!ICMPI) @@ -780,8 +808,13 @@ static bool tryToRecognizeTableBasedCRC32(Instruction &I){ LI=dyn_cast(II->getPrevNode()); if(!LI) return false; + + //------------------------------------------------------------------------------------------------------------------------------------------------- + //return true; + + BB=dyn_cast(LI->getParent()->getPrevNode()); - BI=dyn_cast(LI->getPrevNode()); + BI=dyn_cast(&BB->back()); if(!BI) return false; @@ -824,8 +857,10 @@ static bool tryToRecognizeTableBasedCRC32(Instruction &I){ AI=dyn_cast(AI->getPrevNode()); if(!AI) return false; + - errs() << "Table-based CRC32 algorithm is finally revognized!" << "\n"; + errs() << "!!!Table-based CRC32 algorithm is finally recognized!!!" << "\n"; + errs() << "It will be nice if we can check the value of the operands in this algorithm implementation!" << "\n"; return true; } @@ -1504,7 +1539,8 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(&BB)) continue; - + + //errs() << "Hello from here!" << "\n"; const DataLayout &DL = F.getParent()->getDataLayout(); // Walk the block backwards for efficiency. We're matching a chain of @@ -1520,8 +1556,8 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= flag; if(flag) errs() << "Function we have created seems to work properly!\n"; - else - errs() << "Table-based crc32 algorithm wasn't recognized!\n"; + //else + //errs() << "Table-based crc32 algorithm wasn't recognized!\n"; MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); From e196e2d11531024a44c91b2bb053b229f0cee907 Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Wed, 16 Aug 2023 14:52:41 +0200 Subject: [PATCH 18/21] Added llvm-lit test for table-based crc32 pattern matcher and did some code refactoring! --- .../AggressiveInstCombine.cpp | 7 ++- .../Transforms/Util/petar-table-based-crc.ll | 52 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Util/petar-table-based-crc.ll diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 5efe075edd5c2..a9f9b93f4c4e9 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1049,7 +1049,7 @@ static bool tryToRecognizeTableBasedCRC32BruteForce(Function &F){ step++; } - errs() << "Table-based crc32 algortihm is recognized!" << "\n"; + errs() << "Table-based crc32 algortihm is recognized using brute force algorithm!" << "\n"; return true; } @@ -1535,6 +1535,11 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA) { bool MadeChange = false; + + //bool globalflag=tryToRecognizeTableBasedCRC32BruteForce(F); + //if(globalflag) + // errs() << "I am supprised!" << "\n"; + for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(&BB)) diff --git a/llvm/test/Transforms/Util/petar-table-based-crc.ll b/llvm/test/Transforms/Util/petar-table-based-crc.ll new file mode 100644 index 0000000000000..e59c362d19be6 --- /dev/null +++ b/llvm/test/Transforms/Util/petar-table-based-crc.ll @@ -0,0 +1,52 @@ +; RUN: opt -disable-output -passes=aggressive-instcombine %s 2>&1 | FileCheck %s + +@__const.main.buffer = private unnamed_addr constant [38 x i32] [i32 -1290756417, i32 -1606390453, i32 1378610760, i32 -2032039261, i32 1955203488, i32 1742404180, i32 -1783531177, i32 -878557837, i32 969524848, i32 714683780, i32 -655182201, i32 205050476, i32 -28094097, i32 -318528869, i32 526918040, i32 1361435347, i32 -1555146288, i32 -1340167644, i32 1114974503, i32 -1765847604, i32 1691668175, i32 2005155131, i32 -2047885768, i32 -604208612, i32 697762079, i32 986182379, i32 -928222744, i32 476452099, i32 -301099520, i32 -44210700, i32 255256311, i32 1640403810, i32 -1817374623, i32 -2130844779, i32 1922457750, i32 -1503918979, i32 1412925310, i32 1197962378], align 16 +@.str = private unnamed_addr constant [13 x i8] c"result = %u\0A\00", align 1 +@crc32Table = internal constant [256 x i32] [i32 0, i32 -227835133, i32 -516198153, i32 324072436, i32 -946170081, i32 904991772, i32 648144872, i32 -724933397, i32 -1965467441, i32 2024987596, i32 1809983544, i32 -1719030981, i32 1296289744, i32 -1087877933, i32 -1401372889, i32 1578318884, i32 274646895, i32 -499825556, i32 -244992104, i32 51262619, i32 -675000208, i32 632279923, i32 922689671, i32 -996891772, i32 -1702387808, i32 1760304291, i32 2075979607, i32 -1982370732, i32 1562183871, i32 -1351185476, i32 -1138329528, i32 1313733451, i32 549293790, i32 -757723683, i32 -1048117719, i32 871202090, i32 -416867903, i32 357341890, i32 102525238, i32 -193467851, i32 -1436232175, i32 1477399826, i32 1264559846, i32 -1187764763, i32 1845379342, i32 -1617575411, i32 -1933233671, i32 2125378298, i32 820201905, i32 -1031222606, i32 -774358714, i32 598981189, i32 -143008082, i32 85089709, i32 373468761, i32 -467063462, i32 -1170599554, i32 1213305469, i32 1526817161, i32 -1452612982, i32 2107672161, i32 -1882520222, i32 -1667500394, i32 1861252501, i32 1098587580, i32 -1290756417, i32 -1606390453, i32 1378610760, i32 -2032039261, i32 1955203488, i32 1742404180, i32 -1783531177, i32 -878557837, i32 969524848, i32 714683780, i32 -655182201, i32 205050476, i32 -28094097, i32 -318528869, i32 526918040, i32 1361435347, i32 -1555146288, i32 -1340167644, i32 1114974503, i32 -1765847604, i32 1691668175, i32 2005155131, i32 -2047885768, i32 -604208612, i32 697762079, i32 986182379, i32 -928222744, i32 476452099, i32 -301099520, i32 -44210700, i32 255256311, i32 1640403810, i32 -1817374623, i32 -2130844779, i32 1922457750, i32 -1503918979, i32 1412925310, i32 1197962378, i32 -1257441399, i32 -350237779, i32 427051182, i32 170179418, i32 -129025959, i32 746937522, i32 -554770511, i32 -843174843, i32 1070968646, i32 1905808397, i32 -2081171698, i32 -1868356358, i32 1657317369, i32 -1241332974, i32 1147748369, i32 1463399397, i32 -1521340186, i32 -79622974, i32 153784257, i32 444234805, i32 -401473738, i32 1021025245, i32 -827320098, i32 -572462294, i32 797665321, i32 -2097792136, i32 1889384571, i32 1674398607, i32 -1851340660, i32 1164749927, i32 -1224265884, i32 -1537745776, i32 1446797203, i32 137323447, i32 -96149324, i32 -384560320, i32 461344835, i32 -810158936, i32 1037989803, i32 781091935, i32 -588970148, i32 -1834419177, i32 1623424788, i32 1939049696, i32 -2114449437, i32 1429367560, i32 -1487280117, i32 -1274471425, i32 1180866812, i32 410100952, i32 -367384613, i32 -112536529, i32 186734380, i32 -538233913, i32 763408580, i32 1053836080, i32 -860110797, i32 -1572096602, i32 1344288421, i32 1131464017, i32 -1323612590, i32 1708204729, i32 -1749376582, i32 -2065018290, i32 1988219213, i32 680717673, i32 -621187478, i32 -911630946, i32 1002577565, i32 -284657034, i32 493091189, i32 238226049, i32 -61306494, i32 -1307217207, i32 1082061258, i32 1395524158, i32 -1589280451, i32 1972364758, i32 -2015074603, i32 -1800104671, i32 1725896226, i32 952904198, i32 -894981883, i32 -638100751, i32 731699698, i32 -11092711, i32 222117402, i32 510512622, i32 -335130899, i32 -1014159676, i32 837199303, i32 582374963, i32 -790768336, i32 68661723, i32 -159632680, i32 -450051796, i32 390545967, i32 1230274059, i32 -1153434360, i32 -1469116676, i32 1510247935, i32 -1899042540, i32 2091215383, i32 1878366691, i32 -1650582816, i32 -741088853, i32 565732008, i32 854102364, i32 -1065151905, i32 340358836, i32 -433916489, i32 -177076669, i32 119113024, i32 1493875044, i32 -1419691417, i32 -1204696685, i32 1247431312, i32 -1634718085, i32 1828433272, i32 2141937292, i32 -1916740209, i32 -483350502, i32 291187481, i32 34330861, i32 -262120466, i32 615137029, i32 -691946490, i32 -980332558, i32 939183345, i32 1776939221, i32 -1685949482, i32 -1999470558, i32 2058945313, i32 -1368168502, i32 1545135305, i32 1330124605, i32 -1121741762, i32 -210866315, i32 17165430, i32 307568514, i32 -532767615, i32 888469610, i32 -962626711, i32 -707819363, i32 665062302, i32 2042050490, i32 -1948470087, i32 -1735637171, i32 1793573966, i32 -1104306011, i32 1279665062, i32 1595330642, i32 -1384295599], align 16 + +; Function Attrs: noinline nounwind uwtable +define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + %7 = alloca i32*, align 8 + store i32 %0, i32* %4, align 4 + store i8* %1, i8** %5, align 8 + store i64 %2, i64* %6, align 8 + %8 = load i8*, i8** %5, align 8 + %9 = bitcast i8* %8 to i32* + store i32* %9, i32** %7, align 8 + br label %10 + +10: ; preds = %14, %3 + %11 = load i64, i64* %6, align 8 + %12 = add i64 %11, -1 + store i64 %12, i64* %6, align 8 + %13 = icmp ne i64 %11, 0 + br i1 %13, label %14, label %27 + +14: ; preds = %10 + %15 = load i32, i32* %4, align 4 + %16 = load i32*, i32** %7, align 8 + %17 = getelementptr inbounds i32, i32* %16, i32 1 + store i32* %17, i32** %7, align 8 + %18 = load i32, i32* %16, align 4 + %19 = xor i32 %15, %18 + %20 = and i32 %19, 255 + %21 = zext i32 %20 to i64 + %22 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32Table, i64 0, i64 %21 + %23 = load i32, i32* %22, align 4 + %24 = load i32, i32* %4, align 4 + %25 = lshr i32 %24, 8 + %26 = xor i32 %23, %25 + store i32 %26, i32* %4, align 4 + br label %10 + +27: ; preds = %10 + %28 = load i32, i32* %4, align 4 + ret i32 %28 +} + +; CHECK: !!!Table-based CRC32 algorithm is finally recognized!!! +; CHECK-NEXT: It will be nice if we can check the value of the operands in this algorithm implementation! +; CHECK-NEXT: Function we have created seems to work properly! From e167ec67f849eafd09542fc060721ecf526648db Mon Sep 17 00:00:00 2001 From: Petar Tesic Date: Thu, 17 Aug 2023 11:28:23 +0200 Subject: [PATCH 19/21] Added pattern matcher for CRC32 algorithm (not natble-based version)! --- .../AggressiveInstCombine.cpp | 398 ++++++++++++++++-- 1 file changed, 366 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index a9f9b93f4c4e9..25c25e47e678d 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" +#include using namespace llvm; using namespace PatternMatch; @@ -1054,6 +1055,328 @@ static bool tryToRecognizeTableBasedCRC32BruteForce(Function &F){ return true; } +static bool tryToRecognizeCRC32(Instruction &I){ + ReturnInst *RI=dyn_cast(&I); + if(!RI) + return false; + + CallInst *CI=dyn_cast(RI->getPrevNode()); + if(!CI) + return false; + + Instruction *II=dyn_cast(CI->getPrevNode()); + if(!II) + return false; + + Value *help1; + Value *help2; + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + return false; + + LoadInst *LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + + BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); + BranchInst *BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + StoreInst *SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!II) + return false; + + if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!II) + return false; + + if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!II) + return false; + + if(!match(II, m_Shl(m_Value(help1), m_SpecificInt(1)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + //---------------------------------------------------------------------------------------------------------------------- + //107. if.else: + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!II) + return false; + + if(!match(II, m_Shl(m_Value(help1), m_SpecificInt(1)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + //---------------------------------------------------------------------------------------------------------------------- + //100. if.then: + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!II) + return false; + + if(!match(II, m_Xor(m_Value(help1), m_SpecificInt(79764919)))) + return false; + + II=dyn_cast(II->getPrevNode()); + if(!II) + return false; + + if(!match(II, m_Shl(m_Value(help1), m_SpecificInt(1)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + //---------------------------------------------------------------------------------------------------------------------- + //93. for.body: + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + ICmpInst *ICI=dyn_cast(BI->getPrevNode()); + if(!ICI) + return false; + + II=dyn_cast(ICI->getPrevNode()); + if(!II) + return false; + + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))); + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + LI=dyn_cast(LI->getPrevNode()); + if(!LI) + return false; + + //---------------------------------------------------------------------------------------------------------------------- + //88. for.cond: + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + ICI=dyn_cast(BI->getPrevNode()); + if(!ICI) + return false; + + LI=dyn_cast(ICI->getPrevNode()); + if(!LI) + return false; + + //---------------------------------------------------------------------------------------------------------------------- + //74. while.body: + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + CI=dyn_cast(SI->getPrevNode()); + if(!CI) + return false; + + LI=dyn_cast(CI->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + ZExtInst *ZI=dyn_cast(SI->getPrevNode()); + if(!ZI) + return false; + + LI=dyn_cast(ZI->getPrevNode()); + if(!LI) + return false; + + GetElementPtrInst *GEPI=dyn_cast(LI->getPrevNode()); + if(!GEPI) + return false; + + SExtInst *SEI=dyn_cast(GEPI->getPrevNode()); + if(!SEI) + return false; + + LI=dyn_cast(SEI->getPrevNode()); + if(!LI) + return false; + + LI=dyn_cast(LI->getPrevNode()); + if(!LI) + return false; + + //---------------------------------------------------------------------------------------------------------------------- + //68. while.cond: + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + ICI=dyn_cast(BI->getPrevNode()); + if(!ICI) + return false; + + LI=dyn_cast(ICI->getPrevNode()); + if(!LI) + return false; + + LI=dyn_cast(LI->getPrevNode()); + if(!LI) + return false; + + //---------------------------------------------------------------------------------------------------------------------- + //55. entry: + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + AllocaInst *AI=dyn_cast(SI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + return true; +} //--------------------------------------------------------------------------------------------------------------------------------------- // Check if this array of constants represents a cttz table. @@ -1540,40 +1863,51 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, //if(globalflag) // errs() << "I am supprised!" << "\n"; - for (BasicBlock &BB : F) { - // Ignore unreachable basic blocks. - if (!DT.isReachableFromEntry(&BB)) - continue; - - //errs() << "Hello from here!" << "\n"; - const DataLayout &DL = F.getParent()->getDataLayout(); - - // Walk the block backwards for efficiency. We're matching a chain of - // use->defs, so we're more likely to succeed by starting from the bottom. - // Also, we want to avoid matching partial patterns. - // TODO: It would be more efficient if we removed dead instructions - // iteratively in this loop rather than waiting until the end. - for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) { - MadeChange |= foldAnyOrAllBitsSet(I); - MadeChange |= foldGuardedFunnelShift(I, DT); - MadeChange |= tryToRecognizePopCount(I); - bool flag=tryToRecognizeTableBasedCRC32(I); - MadeChange |= flag; - if(flag) - errs() << "Function we have created seems to work properly!\n"; - //else - //errs() << "Table-based crc32 algorithm wasn't recognized!\n"; - MadeChange |= tryToFPToSat(I, TTI); - MadeChange |= tryToRecognizeTableBasedCttz(I); - MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); - MadeChange |= foldPatternedLoads(I, DL); - // NOTE: This function introduces erasing of the instruction `I`, so it - // needs to be called at the end of this sequence, otherwise we may make - // bugs. - MadeChange |= foldSqrt(I, TTI, TLI); + if(F.getName().str()=="reverse"){ + errs() << "We won't check this function!" << "\n"; + return false; + } + + Module *M=F.getParent(); + for(Function &F: *M){ + for (BasicBlock &BB : F) { + // Ignore unreachable basic blocks. + if (!DT.isReachableFromEntry(&BB)) + continue; + + //errs() << "Hello from here!" << "\n"; + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Walk the block backwards for efficiency. We're matching a chain of + // use->defs, so we're more likely to succeed by starting from the bottom. + // Also, we want to avoid matching partial patterns. + // TODO: It would be more efficient if we removed dead instructions + // iteratively in this loop rather than waiting until the end. + for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) { + MadeChange |= foldAnyOrAllBitsSet(I); + MadeChange |= foldGuardedFunnelShift(I, DT); + MadeChange |= tryToRecognizePopCount(I); + bool flag1=tryToRecognizeTableBasedCRC32(I); + bool flag2=tryToRecognizeCrc(I); + MadeChange |= flag1; + if(flag1) + errs() << "Function we have created seems to work properly!\n"; + //else + //errs() << "Table-based crc32 algorithm wasn't recognized!\n"; + if(flag2) + errs() << "CRC32 algorithm has been recognised!" << "\n"; + + MadeChange |= tryToFPToSat(I, TTI); + MadeChange |= tryToRecognizeTableBasedCttz(I); + MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); + MadeChange |= foldPatternedLoads(I, DL); + // NOTE: This function introduces erasing of the instruction `I`, so it + // needs to be called at the end of this sequence, otherwise we may make + // bugs. + MadeChange |= foldSqrt(I, TTI, TLI); + } } } - // We're done with transforms, so remove dead instructions. if (MadeChange) for (BasicBlock &BB : F) From b7953040a1f110d8f9393130a865dfa1de839181 Mon Sep 17 00:00:00 2001 From: PosteruOle Date: Mon, 16 Oct 2023 17:07:21 +0200 Subject: [PATCH 20/21] Add a pattern matcher for unoptimized CRC algortihm! --- clang/include/clang/Basic/BuiltinsPPC.def | 1 + llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 + llvm/include/llvm/CodeGen/TargetLowering.h | 4 +- llvm/include/llvm/IR/Intrinsics.td | 17 + .../include/llvm/Target/TargetSelectionDAG.td | 4 +- llvm/lib/Analysis/ConstantFolding.cpp | 5 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 31 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 12 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 21 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 27 + llvm/lib/IR/Instructions.cpp | 4 +- llvm/lib/Target/PowerPC/P10InstrResources.td | 5 + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +- llvm/lib/Target/PowerPC/PPCISelLowering.h | 6 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 4 + llvm/lib/Target/PowerPC/PPCInstrP10.td | 4 + .../AggressiveInstCombine.cpp | 1096 ++++++++--------- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 2 + 18 files changed, 645 insertions(+), 615 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 1e52d351780c1..ef00e4652097c 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -924,6 +924,7 @@ TARGET_BUILTIN(__builtin_pextd, "ULLiULLiULLi", "", "isa-v31-instructions") TARGET_BUILTIN(__builtin_cfuged, "ULLiULLiULLi", "", "isa-v31-instructions") TARGET_BUILTIN(__builtin_cntlzdm, "ULLiULLiULLi", "", "isa-v31-instructions") TARGET_BUILTIN(__builtin_cnttzdm, "ULLiULLiULLi", "", "isa-v31-instructions") +//TARGET_BUILTIN(__builtin_cnrcdm, "ULLiULLiULLi", "", "isa-v31-instructions") // Double-double (un)pack BUILTIN(__builtin_unpack_longdouble, "dLdIi", "") diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 0b1d1d75151cb..6ffa091b33840 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -713,6 +713,9 @@ enum NodeType { /// Byte Swap and Counting operators. BSWAP, CTTZ, + CRC, + CRC8, + CRC32, CTLZ, CTPOP, BITREVERSE, @@ -720,6 +723,7 @@ enum NodeType { /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, + CRC_ZERO_UNDEF, CTLZ_ZERO_UNDEF, /// Select(COND, TRUEVAL, FALSEVAL). If the type of the boolean COND is not diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 0aa4fa2123369..04955defc8140 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5060,11 +5060,13 @@ class TargetLowering : public TargetLoweringBase { /// \returns The expansion result or SDValue() if it fails. SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; + SDValue expandCRC(SDNode *N, SelectionDAG &DAG) const; /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. /// \param N Node to expand /// \returns The expansion result or SDValue() if it fails. SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; - + + SDValue expandVPCRC(SDNode *N, SelectionDAG &DAG) const; /// Expand ABS nodes. Expands vector/scalar ABS nodes, /// vector nodes can only succeed if all operations are legal/custom. /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 23c392ef1e633..837183c5c1dae 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1306,6 +1306,11 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, def int_ctlz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_cttz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_crc : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + + def int_crc32 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; +} +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { + def int_crc8 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i8_ty, llvm_i16_ty]>; } //===------------------------ Debugger Intrinsics -------------------------===// @@ -1898,6 +1903,7 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { [ LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_fshl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -2155,6 +2161,17 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg> llvm_i1_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_crc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + llvm_i1_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_crc32 : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 74fa24b66fc2a..dfce3877d3183 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -143,7 +143,7 @@ def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; -def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz +def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz, crc SDTCisInt<0>, SDTCisInt<1> ]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext @@ -453,9 +453,11 @@ def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>; def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>; +def crc : SDNode<"ISD::CRC" , SDTIntBitCountUnaryOp>; def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>; def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; +def crc_zero_undef : SDNode<"ISD::CRC_ZERO_UNDEF" , SDTIntBitCountUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 0f5b7270c5364..5e326e7ba32d1 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1527,6 +1527,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::crc: + case Intrinsic::crc32: case Intrinsic::fshl: case Intrinsic::fshr: case Intrinsic::launder_invariant_group: @@ -2828,6 +2830,7 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, else return ConstantInt::get(Ty, C0->ssub_sat(*C1)); case Intrinsic::cttz: + case Intrinsic::crc: case Intrinsic::ctlz: assert(C1 && "Must be constant int"); @@ -2836,6 +2839,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return PoisonValue::get(Ty); if (!C0) return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::crc) + return ConstantInt::get(Ty, C0->countr_zero()); if (IntrinsicID == Intrinsic::cttz) return ConstantInt::get(Ty, C0->countr_zero()); else diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 54eb29342b93c..f33bc94691bd0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -468,6 +468,8 @@ namespace { SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); + //SDValue visitCRC(SDNode *N); + //SDValue visitCRC_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitVSELECT(SDNode *N); @@ -1938,6 +1940,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); + //case ISD::CRC: return visitCTTZ(N); //return visitCRC(N); + //case ISD::CRC_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); //return visitCRC_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::VSELECT: return visitVSELECT(N); @@ -10910,7 +10914,24 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { return SDValue(); } +/* +SDValue DAGCombiner::visitCRC(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + + return SDValue(); +} +*/ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -10920,7 +10941,17 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } +/* +SDValue DAGCombiner::visitCRC_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + // fold (cttz_zero_undef c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + return SDValue(); +} +*/ SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d902b358526bd..432facc1da77a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2833,6 +2833,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if ((Tmp1 = TLI.expandCTTZ(Node, DAG))) Results.push_back(Tmp1); break; + case ISD::CRC32: + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: + if ((Tmp1 = TLI.expandCRC(Node, DAG))) + Results.push_back(Tmp1); + break; case ISD::BITREVERSE: if ((Tmp1 = TLI.expandBITREVERSE(Node, DAG))) Results.push_back(Tmp1); @@ -4668,17 +4674,19 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { switch (Node->getOpcode()) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: + //case ISD::CRC: + //case ISD::CRC_ZERO_UNDEF: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument unless its cttz, then use any_extend. - if (Node->getOpcode() == ISD::CTTZ || + if (Node->getOpcode() == ISD::CRC || Node->getOpcode() == ISD::CRC_ZERO_UNDEF || Node->getOpcode() == ISD::CTTZ || Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); else Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - if (Node->getOpcode() == ISD::CTTZ) { + if (Node->getOpcode() == ISD::CRC || Node->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d83aaf31950d5..354026db02366 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3481,6 +3481,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(LowBits); break; } + /* + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.countMaxTrailingZeros(); + unsigned LowBits = llvm::bit_width(PossibleTZ); + Known.Zero.setBitsFrom(LowBits); + //Known. + break; + } + */ case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); @@ -5404,6 +5416,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), C->isOpaque()); + case ISD::CRC32: + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: + return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::FP16_TO_FP: case ISD::BF16_TO_FP: { bool Ignored; @@ -5522,6 +5539,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: + case ISD::CRC32: + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: case ISD::CTPOP: { SDValue Ops = {Operand}; if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) @@ -5756,6 +5776,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return Operand; break; case ISD::CTLZ: + //case ISD::CRC: case ISD::CTTZ: if (Operand.getValueType().getScalarType() == MVT::i1) return getNOT(DL, Operand, Operand.getValueType()); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 32be369d08cf7..06cebbf265d99 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8593,6 +8593,17 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::CTPOP, dl, VT, Tmp); } +SDValue TargetLowering::expandCRC(SDNode *Node, SelectionDAG &DAG) const { + //We have to change this function! + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + SDValue Op = Node->getOperand(0); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + return DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT))); +} + SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDValue Op = Node->getOperand(0); SDValue Mask = Node->getOperand(1); @@ -8609,6 +8620,22 @@ SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); } +SDValue TargetLowering::expandVPCRC(SDNode *Node, SelectionDAG &DAG) const { + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1)) + SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op, + DAG.getConstant(-1, dl, VT), Mask, VL); + SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op, + DAG.getConstant(1, dl, VT), Mask, VL); + SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL); + return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); +} + SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 3983fd8ed7ff8..b05be8646c39c 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -642,7 +642,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, this->FTy = FTy; assert(getNumOperands() == Args.size() + CountBundleInputs(Bundles) + 1 && "NumOperands not set up?"); - +/* #ifndef NDEBUG assert((Args.size() == FTy->getNumParams() || (FTy->isVarArg() && Args.size() > FTy->getNumParams())) && @@ -653,7 +653,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, FTy->getParamType(i) == Args[i]->getType()) && "Calling a function with a bad signature!"); #endif - +*/ // Set operands in order of their index to match use-list-order // prediction. llvm::copy(Args, op_begin()); diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index b96fdb00306d1..5468a981c35e8 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -351,6 +351,7 @@ def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read], CFUGED, CNTLZDM, CNTTZDM, + //CNRCDM, PDEPD, PEXTD, VCFUGED, @@ -660,6 +661,10 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read], CNTTZD_rec, CNTTZW, CNTTZW8, CNTTZW8_rec, CNTTZW_rec, + //CNRCD, + //CNRCD_rec, + //CNRCW, CNRCW8, + //CNRCW8_rec, CNRCW_rec, FTSQRT, MTVSRBM, MTVSRBMI, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 012052a1be1d3..87d5b28b4dcac 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -464,13 +464,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand); } - // CTPOP or CTTZ were introduced in P8/P9 respectively + // CTPOP or CTTZ (or CRC) were introduced in P8/P9 respectively if (Subtarget.isISA3_0()) { setOperationAction(ISD::CTTZ , MVT::i32 , Legal); setOperationAction(ISD::CTTZ , MVT::i64 , Legal); + setOperationAction(ISD::CRC , MVT::i32 , Legal); + setOperationAction(ISD::CRC , MVT::i64 , Legal); } else { setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CRC , MVT::i32 , Expand); + setOperationAction(ISD::CRC , MVT::i64 , Expand); } if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { @@ -801,10 +805,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } // Vector instructions introduced in P9 - if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) + if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)){ setOperationAction(ISD::CTTZ, VT, Legal); - else + setOperationAction(ISD::CRC, VT, Legal); + } else { setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CRC, VT, Expand); + } // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 02f7147dfd6bb..1875b50dbec4c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -782,6 +782,12 @@ namespace llvm { return true; } + /* + bool isCheapToSpeculateCrc(Type *Ty) const override { + return true; + } + */ + bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index a8c27d0cf6a5a..e952c4c7eb7c6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2320,6 +2320,10 @@ defm CNTTZW : XForm_11r<31, 538, (outs gprc:$RA), (ins gprc:$RST), "cnttzw", "$RA, $RST", IIC_IntGeneral, [(set i32:$RA, (cttz i32:$RST))]>, Requires<[IsISA3_0]>, ZExt32To64; +//defm CNRCW : XForm_11r<31, 548, (outs gprc:$RA), (ins gprc:$RST), +// "cnrcw", "$RA, $RST", IIC_IntGeneral, +// [(set i32:$RA, (crc i32:$RST))]>, Requires<[IsISA3_0]>, +// ZExt32To64; defm EXTSB : XForm_11r<31, 954, (outs gprc:$RA), (ins gprc:$RST), "extsb", "$RA, $RST", IIC_IntSimple, [(set i32:$RA, (sext_inreg i32:$RST, i8))]>, SExt32To64; diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 8cb8e4d91db21..70e9bd487311a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -1662,6 +1662,10 @@ let Predicates = [IsISA3_1] in { "cnttzdm $RA, $RST, $RB", IIC_IntGeneral, [(set i64:$RA, (int_ppc_cnttzdm i64:$RST, i64:$RB))]>; + //def CNRCDM : XForm_6<31, 571, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB), + // "cnrcdm $RA, $RST, $RB", IIC_IntGeneral, + // [(set i64:$RA, + // (int_ppc_cnrcdm i64:$RST, i64:$RB))]>; def XXGENPCVBM : XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), "xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>; diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 25c25e47e678d..3d6853027fa9d 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -31,6 +31,17 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/BasicBlock.h" +#include + +#include "llvm/Pass.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace PatternMatch; @@ -537,565 +548,250 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, //--------------------------------------------------------------------------------------------------------------------------------------- // Petar's code! - //LLVM IR code for the naive crc algorithm implementation! /* -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i32 @crc32a(i8* %0, i32 %1) #0 { - %3 = alloca i8*, align 8 - %4 = alloca i32, align 4 - %5 = alloca i32, align 4 - %6 = alloca i32, align 4 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - store i8* %0, i8** %3, align 8 - store i32 %1, i32* %4, align 4 - store i32 0, i32* %5, align 4 - store i32 -1, i32* %8, align 4 - br label %9 - -9: ; preds = %43, %2 - %10 = load i32, i32* %5, align 4 - %11 = load i32, i32* %4, align 4 - %12 = icmp ult i32 %10, %11 - br i1 %12, label %13, label %46 - -13: ; preds = %9 - %14 = load i8*, i8** %3, align 8 - %15 = load i32, i32* %5, align 4 - %16 = sext i32 %15 to i64 - %17 = getelementptr inbounds i8, i8* %14, i64 %16 - %18 = load i8, i8* %17, align 1 - %19 = zext i8 %18 to i32 - store i32 %19, i32* %7, align 4 - %20 = load i32, i32* %7, align 4 - %21 = call i32 @reverse(i32 %20) - store i32 %21, i32* %7, align 4 - store i32 0, i32* %6, align 4 - br label %22 - -22: ; preds = %40, %13 - %23 = load i32, i32* %6, align 4 - %24 = icmp sle i32 %23, 7 - br i1 %24, label %25, label %43 - -25: ; preds = %22 - %26 = load i32, i32* %8, align 4 - %27 = load i32, i32* %7, align 4 - %28 = xor i32 %26, %27 - %29 = icmp slt i32 %28, 0 - br i1 %29, label %30, label %34 - -30: ; preds = %25 - %31 = load i32, i32* %8, align 4 - %32 = shl i32 %31, 1 - %33 = xor i32 %32, 79764919 - store i32 %33, i32* %8, align 4 - br label %37 - -34: ; preds = %25 - %35 = load i32, i32* %8, align 4 - %36 = shl i32 %35, 1 - store i32 %36, i32* %8, align 4 - br label %37 - -37: ; preds = %34, %30 - %38 = load i32, i32* %7, align 4 - %39 = shl i32 %38, 1 - store i32 %39, i32* %7, align 4 - br label %40 - -40: ; preds = %37 - %41 = load i32, i32* %6, align 4 - %42 = add nsw i32 %41, 1 - store i32 %42, i32* %6, align 4 - br label %22 - -43: ; preds = %22 - %44 = load i32, i32* %5, align 4 - %45 = add nsw i32 %44, 1 - store i32 %45, i32* %5, align 4 - br label %9 - -46: ; preds = %9 - %47 = load i32, i32* %8, align 4 - %48 = xor i32 %47, -1 - %49 = call i32 @reverse(i32 %48) - ret i32 %49 +; Function Attrs: noinline nounwind uwtable +define dso_local zeroext i16 @crcu8(i8 zeroext %0, i16 zeroext %1) #0 { + %3 = alloca i8, align 1 + %4 = alloca i16, align 2 + %5 = alloca i8, align 1 + %6 = alloca i8, align 1 + %7 = alloca i8, align 1 + store i8 %0, i8* %3, align 1 + store i16 %1, i16* %4, align 2 + store i8 0, i8* %5, align 1 + store i8 0, i8* %6, align 1 + store i8 0, i8* %7, align 1 + store i8 0, i8* %5, align 1 + br label %8 + +8: ; preds = %53, %2 + %9 = load i8, i8* %5, align 1 + %10 = zext i8 %9 to i32 + %11 = icmp slt i32 %10, 8 + br i1 %11, label %12, label %56 + +12: ; preds = %8 + %13 = load i8, i8* %3, align 1 + %14 = zext i8 %13 to i32 + %15 = and i32 %14, 1 + %16 = load i16, i16* %4, align 2 + %17 = trunc i16 %16 to i8 + %18 = zext i8 %17 to i32 + %19 = and i32 %18, 1 + %20 = xor i32 %15, %19 + %21 = trunc i32 %20 to i8 + store i8 %21, i8* %6, align 1 + %22 = load i8, i8* %3, align 1 + %23 = zext i8 %22 to i32 + %24 = ashr i32 %23, 1 + %25 = trunc i32 %24 to i8 + store i8 %25, i8* %3, align 1 + %26 = load i8, i8* %6, align 1 + %27 = zext i8 %26 to i32 + %28 = icmp eq i32 %27, 1 + br i1 %28, label %29, label %34 + +29: ; preds = %12 + %30 = load i16, i16* %4, align 2 + %31 = zext i16 %30 to i32 + %32 = xor i32 %31, 16386 + %33 = trunc i32 %32 to i16 + store i16 %33, i16* %4, align 2 + store i8 1, i8* %7, align 1 + br label %35 + +34: ; preds = %12 + store i8 0, i8* %7, align 1 + br label %35 + +35: ; preds = %34, %29 + %36 = load i16, i16* %4, align 2 + %37 = zext i16 %36 to i32 + %38 = ashr i32 %37, 1 + %39 = trunc i32 %38 to i16 + store i16 %39, i16* %4, align 2 + %40 = load i8, i8* %7, align 1 + %41 = icmp ne i8 %40, 0 + br i1 %41, label %42, label %47 + +42: ; preds = %35 + %43 = load i16, i16* %4, align 2 + %44 = zext i16 %43 to i32 + %45 = or i32 %44, 32768 + %46 = trunc i32 %45 to i16 + store i16 %46, i16* %4, align 2 + br label %52 + +47: ; preds = %35 + %48 = load i16, i16* %4, align 2 + %49 = zext i16 %48 to i32 + %50 = and i32 %49, 32767 + %51 = trunc i32 %50 to i16 + store i16 %51, i16* %4, align 2 + br label %52 + +52: ; preds = %47, %42 + br label %53 + +53: ; preds = %52 + %54 = load i8, i8* %5, align 1 + %55 = add i8 %54, 1 + store i8 %55, i8* %5, align 1 + br label %8 + +56: ; preds = %8 + %57 = load i16, i16* %4, align 2 + ret i16 %57 } */ -static bool tryToRecognizeCrc(Instruction &I){ - - //To do! - - return false; -} - -// Check if this array of constants represents a crc32 table. -static bool isCRC32Table(const ConstantDataArray &Table){ - unsigned Length=Table.getNumElements(); - if(Length!=256) - return false; - - for(int i=0;i4294967295) - return false; - } - - return true; -} - -// Try to recognize table-based crc32 algorithm implementation. -/* -define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { - %4 = alloca i32, align 4 - %5 = alloca i8*, align 8 - %6 = alloca i64, align 8 - %7 = alloca i32*, align 8 - store i32 %0, i32* %4, align 4 - store i8* %1, i8** %5, align 8 - store i64 %2, i64* %6, align 8 - %8 = load i8*, i8** %5, align 8 - %9 = bitcast i8* %8 to i32* - store i32* %9, i32** %7, align 8 - br label %10 - -10: ; preds = %14, %3 - %11 = load i64, i64* %6, align 8 - %12 = add i64 %11, -1 - store i64 %12, i64* %6, align 8 - %13 = icmp ne i64 %11, 0 - br i1 %13, label %14, label %27 - -14: ; preds = %10 - %15 = load i32, i32* %4, align 4 - %16 = load i32*, i32** %7, align 8 - %17 = getelementptr inbounds i32, i32* %16, i32 1 - store i32* %17, i32** %7, align 8 - %18 = load i32, i32* %16, align 4 - %19 = xor i32 %15, %18 - %20 = and i32 %19, 255 - %21 = zext i32 %20 to i64 - %22 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32Table, i64 0, i64 %21 - %23 = load i32, i32* %22, align 4 - %24 = load i32, i32* %4, align 4 - %25 = lshr i32 %24, 8 - %26 = xor i32 %23, %25 - store i32 %26, i32* %4, align 4 - br label %10 - -27: ; preds = %10 - %28 = load i32, i32* %4, align 4 - ret i32 %28 -} -*/ -static bool tryToRecognizeTableBasedCRC32(Instruction &I){ +static bool tryToRecognizeCRC32(Instruction &I){ ReturnInst *RI=dyn_cast(&I); - if(!RI) - return false; - - LoadInst *LI = dyn_cast(RI->getPrevNode()); - if (!LI) + ReturnInst *RIfinal=dyn_cast(&I); + if(!RI || !RIfinal) return false; - //return true; - //errs() << "Why here, sir?!" << "\n"; - //errs() << "Why here, sir2?!" << "\n"; - //errs() << "Why here, sir3?!" << "\n"; - BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); - - /* - Instruction* III=dyn_cast(&BB->back()); - //errs() << "Why here, miss?!" << "\n"; - if(!III){ - errs() << "Why here?!" << "\n"; + LoadInst *LI=dyn_cast(RI->getPrevNode()); + LoadInst *LIfinal=dyn_cast(RI->getPrevNode()); + if(!LI || !LIfinal) return false; - } - */ - + + BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); BranchInst *BI=dyn_cast(&BB->back()); - //errs() << "Why here, miss?!" << "\n"; if(!BI) return false; - - //StoreInst *SI=dyn_cast(III->getPrevNode()); StoreInst *SI=dyn_cast(BI->getPrevNode()); if(!SI) return false; - - //------------------------------------------------------------------------------------------------------------------------------------------------- - //return true; - Instruction *II=dyn_cast(SI->getPrevNode()); + if(!II) + return false; + Value *help1; Value *help2; - if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) return false; - II=dyn_cast(II->getPrevNode()); - - if(!match(II, m_LShr(m_Value(help1), m_SpecificInt(8)))) - return false; - LI=dyn_cast(II->getPrevNode()); if(!LI) return false; - LI=dyn_cast(LI->getPrevNode()); - if(!LI) - return false; - - GetElementPtrInst *GEPI=dyn_cast(LI->getPrevNode()); - if(!GEPI) - return false; - - ZExtInst *ZI=dyn_cast(GEPI->getPrevNode()); - if(!ZI) - return false; - - II=dyn_cast(ZI->getPrevNode()); - if(!match(II, m_And(m_Value(help1), m_Value(help2)))) - return false; - - II=dyn_cast(II->getPrevNode()); - if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) - return false; + BB=dyn_cast(LI->getParent()->getPrevNode()); + BI=dyn_cast(&BB->back()); + if(!BI) + return false; - LI=dyn_cast(II->getPrevNode()); - if(!LI) + BB=dyn_cast(BI->getParent()->getPrevNode()); + BI=dyn_cast(&BB->back()); + if(!BI) return false; - SI=dyn_cast(LI->getPrevNode()); + SI=dyn_cast(BI->getPrevNode()); if(!SI) return false; - GEPI=dyn_cast(SI->getPrevNode()); - if(!GEPI) - return false; - - LI=dyn_cast(GEPI->getPrevNode()); - if(!LI) + // For some reason we could not see trunc instruction! + II=dyn_cast(SI->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_SpecificInt(32767)))) return false; - LI=dyn_cast(LI->getPrevNode()); + LI=dyn_cast(II->getPrevNode()); if(!LI) return false; - - - //------------------------------------------------------------------------------------------------------------------------------------------------- - //return true; BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; BI=dyn_cast(&BB->back()); if(!BI) - return false; - - ICmpInst *ICMPI=dyn_cast(BI->getPrevNode()); - if(!ICMPI) return false; - - SI=dyn_cast(ICMPI->getPrevNode()); + + SI=dyn_cast(BI->getPrevNode()); if(!SI) return false; II=dyn_cast(SI->getPrevNode()); - if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) + // We should somehow recognize -32768 here! + if(!match(II, m_Or(m_Value(help1), m_Value(help2)))) return false; LI=dyn_cast(II->getPrevNode()); if(!LI) return false; - - //------------------------------------------------------------------------------------------------------------------------------------------------- - //return true; BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; BI=dyn_cast(&BB->back()); if(!BI) return false; - SI=dyn_cast(BI->getPrevNode()); - if(!SI) - return false; - - BitCastInst *BCI=dyn_cast(SI->getPrevNode()); - if(!BCI) + ICmpInst *ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) return false; - LI=dyn_cast(BCI->getPrevNode()); + LI=dyn_cast(ICMPI->getPrevNode()); if(!LI) return false; - + SI=dyn_cast(LI->getPrevNode()); if(!SI) return false; - SI=dyn_cast(SI->getPrevNode()); - if(!SI) - return false; - - SI=dyn_cast(SI->getPrevNode()); - if(!SI) - return false; - - AllocaInst *AI=dyn_cast(SI->getPrevNode()); - if(!AI) + TruncInst *TI=dyn_cast(SI->getPrevNode()); + if(!TI) return false; - AI=dyn_cast(AI->getPrevNode()); - if(!AI) + II=dyn_cast(TI->getPrevNode()); + if(!match(II, m_AShr(m_Value(help1), m_SpecificInt(1)))) return false; - AI=dyn_cast(AI->getPrevNode()); - if(!AI) + ZExtInst *ZI=dyn_cast(II->getPrevNode()); + if(!ZI) return false; - AI=dyn_cast(AI->getPrevNode()); - if(!AI) + LI=dyn_cast(ZI->getPrevNode()); + if(!LI) return false; - - errs() << "!!!Table-based CRC32 algorithm is finally recognized!!!" << "\n"; - errs() << "It will be nice if we can check the value of the operands in this algorithm implementation!" << "\n"; - - return true; -} -static bool tryToRecognizeTableBasedCRC32BruteForce(Function &F){ - //Brute force pattern matching! - int step=1; - - for(BasicBlock& BB: F){ - if(step==1){ - //auto it=BB.getInstList().begin(); - int count=1; - for(Instruction& I: BB){ - if(count<=4 ){ - AllocaInst *II= dyn_cast(&I); - if(!II) - return false; - } - - if(count>=5 && count<=7){ - StoreInst* II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==8){ - LoadInst* II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==9){ - BitCastInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==10){ - StoreInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==11){ - BranchInst *II=dyn_cast(&I); - if(!II) - return false; - } - count++; - } - } else if(step==2){ - /* - if(BB.getName() != "while.cond") - return false; - */ - int count=1; - for(Instruction& I: BB){ - Value *help1; - Value *help2; - if(count==1){ - LoadInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==2 && !match(help1, m_Add(m_Value(help2), m_SpecificInt(-1)))) - return false; - - if(count==3){ - StoreInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==4){ - ICmpInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==5){ - BranchInst *II=dyn_cast(&I); - if(!II) - return false; - } - - count++; - } - } else if(step==3){ - int count=1; - /* - if(BB.getName() != "while.body") - return false; - */ - - for(Instruction& I: BB){ - Value *help1; - Value *help2; - Value *help3; - - if(count<=2){ - LoadInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==3){ - GetElementPtrInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==4){ - StoreInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==5){ - LoadInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==6 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) - return false; - - if(count==7 && !match(help1, m_And(m_Value(help2), m_Value(help3)))) - return false; - - if(count==8){ - ZExtInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==9){ - GetElementPtrInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count>=10 && count<=11){ - LoadInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==12 && !match(help1, m_LShr(m_Value(help2), m_Value(help3)))) - return false; - - if(count==13 && !match(help1, m_Xor(m_Value(help2), m_Value(help3)))) - return false; - - if(count==14){ - StoreInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==15){ - BranchInst *II=dyn_cast(&I); - if(!II) - return false; - } - - count++; - } - } else { - int count=1; - - for(Instruction& I: BB){ - if(count==1){ - LoadInst *II=dyn_cast(&I); - if(!II) - return false; - } - - if(count==2){ - ReturnInst *II=dyn_cast(&I); - if(!II) - return false; - } - count++; - } - } - step++; - } - - errs() << "Table-based crc32 algortihm is recognized using brute force algorithm!" << "\n"; - - return true; -} - -static bool tryToRecognizeCRC32(Instruction &I){ - ReturnInst *RI=dyn_cast(&I); - if(!RI) + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) return false; - CallInst *CI=dyn_cast(RI->getPrevNode()); - if(!CI) + BI=dyn_cast(&BB->back()); + if(!BI) return false; - Instruction *II=dyn_cast(CI->getPrevNode()); - if(!II) - return false; - - Value *help1; - Value *help2; - if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + SI=dyn_cast(BI->getPrevNode()); + if(!SI) return false; - LoadInst *LI=dyn_cast(II->getPrevNode()); - if(!LI) + BB=dyn_cast(SI->getParent()->getPrevNode()); + if(!BB) return false; - - - BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); - BranchInst *BI=dyn_cast(&BB->back()); + + BI=dyn_cast(&BB->back()); if(!BI) return false; - StoreInst *SI=dyn_cast(BI->getPrevNode()); + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); if(!SI) return false; II=dyn_cast(SI->getPrevNode()); - if(!II) - return false; - - if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) + if(!match(II, m_Xor(m_Value(help1), m_SpecificInt(16386)))) return false; LI=dyn_cast(II->getPrevNode()); - if(!LI) + if(!LI) return false; BB=dyn_cast(LI->getParent()->getPrevNode()); @@ -1106,103 +802,88 @@ static bool tryToRecognizeCRC32(Instruction &I){ if(!BI) return false; - SI=dyn_cast(BI->getPrevNode()); - if(!SI) - return false; - - II=dyn_cast(SI->getPrevNode()); - if(!II) + ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) return false; - if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) + ZI=dyn_cast(ICMPI->getPrevNode()); + if(!ZI) return false; - LI=dyn_cast(II->getPrevNode()); + //Just to check something! + //SI=dyn_cast(ZI->getPrevNode()); <- as we already assumed it doesn't work! + LI=dyn_cast(ZI->getPrevNode()); if(!LI) return false; - BB=dyn_cast(LI->getParent()->getPrevNode()); - if(!BB) + SI=dyn_cast(LI->getPrevNode()); + if(!SI) return false; - BI=dyn_cast(&BB->back()); - if(!BI) - return false; + TI=dyn_cast(SI->getPrevNode()); + if(!TI) + return false; - SI=dyn_cast(BI->getPrevNode()); - if(!SI) - return false; - - II=dyn_cast(SI->getPrevNode()); - if(!II) + II=dyn_cast(TI->getPrevNode()); + if(!match(II, m_AShr(m_Value(help1), m_SpecificInt(1)))) return false; - if(!match(II, m_Shl(m_Value(help1), m_SpecificInt(1)))) + ZI=dyn_cast(II->getPrevNode()); + if(!ZI) return false; - LI=dyn_cast(II->getPrevNode()); + LI=dyn_cast(ZI->getPrevNode()); if(!LI) return false; - //---------------------------------------------------------------------------------------------------------------------- - //107. if.else: - BB=dyn_cast(LI->getParent()->getPrevNode()); - if(!BB) - return false; - - BI=dyn_cast(&BB->back()); - if(!BI) - return false; - - SI=dyn_cast(BI->getPrevNode()); + SI=dyn_cast(LI->getPrevNode()); if(!SI) return false; - + II=dyn_cast(SI->getPrevNode()); - if(!II) + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + return false; + + II=dyn_cast(II->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_SpecificInt(1)))) return false; - if(!match(II, m_Shl(m_Value(help1), m_SpecificInt(1)))) + TI=dyn_cast(II->getPrevNode()); + if(!TI) + return false; + + LI=dyn_cast(TI->getPrevNode()); + if(!LI) + return false; + + II=dyn_cast(LI->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_SpecificInt(1)))) return false; LI=dyn_cast(II->getPrevNode()); if(!LI) return false; - //---------------------------------------------------------------------------------------------------------------------- - //100. if.then: BB=dyn_cast(LI->getParent()->getPrevNode()); if(!BB) return false; BI=dyn_cast(&BB->back()); if(!BI) - return false; - - SI=dyn_cast(BI->getPrevNode()); - if(!SI) - return false; - - II=dyn_cast(SI->getPrevNode()); - if(!II) return false; - if(!match(II, m_Xor(m_Value(help1), m_SpecificInt(79764919)))) + ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) return false; - II=dyn_cast(II->getPrevNode()); - if(!II) - return false; - - if(!match(II, m_Shl(m_Value(help1), m_SpecificInt(1)))) + ZI=dyn_cast(ICMPI->getPrevNode()); + if(!ZI) return false; - LI=dyn_cast(II->getPrevNode()); + LI=dyn_cast(ZI->getPrevNode()); if(!LI) return false; - - //---------------------------------------------------------------------------------------------------------------------- - //93. for.body: + BB=dyn_cast(LI->getParent()->getPrevNode()); if(!BB) return false; @@ -1210,146 +891,301 @@ static bool tryToRecognizeCRC32(Instruction &I){ BI=dyn_cast(&BB->back()); if(!BI) return false; + + // Here we have to match 6 more consecutive store instructions and 5 consecutive alloca instructions! + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; - ICmpInst *ICI=dyn_cast(BI->getPrevNode()); - if(!ICI) + SI=dyn_cast(SI->getPrevNode()); + if(!SI) return false; - II=dyn_cast(ICI->getPrevNode()); - if(!II) + SI=dyn_cast(SI->getPrevNode()); + if(!SI) return false; - if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))); + SI=dyn_cast(SI->getPrevNode()); + if(!SI) return false; - LI=dyn_cast(II->getPrevNode()); - if(!LI) + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + // Last thing we have to match are 5 alloca instructions! + AllocaInst *AI=dyn_cast(SI->getPrevNode()); + if(!AI) return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; - LI=dyn_cast(LI->getPrevNode()); - if(!LI) + AI=dyn_cast(AI->getPrevNode()); + if(!AI) return false; - //---------------------------------------------------------------------------------------------------------------------- - //88. for.cond: - BB=dyn_cast(LI->getParent()->getPrevNode()); - if(!BB) + AI=dyn_cast(AI->getPrevNode()); + if(!AI) return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + errs() << "Original unoptimized form of CRC32 algorithm has been recognized!\n"; + Value *argument1=LIfinal->getFunction()->getArg(0); + Value *argument2=LIfinal->getFunction()->getArg(1); + //errs() << argument1->getType()->isIntOrIntVectorTy() << "\n"; + //errs() << argument2->getType()->isIntOrIntVectorTy() << "\n"; + Type* ArgType1=argument1->getType(); + Type* ArgType2=argument2->getType(); + //argument1->dump(); + //argument2->dump(); + //ArgType1->dump(); + //ArgType2->dump(); - BI=dyn_cast(&BB->back()); - if(!BI) - return false; + IRBuilder<> B(LIfinal); + //Function *function = Intrinsic::getDeclaration(LIfinal->getModule(), Intrinsic::crc8, {ArgType1, ArgType2}); + auto CRC8 = B.CreateIntrinsic(Intrinsic::crc8, {}, {argument1, argument2}); + // New insertion! + + LIfinal->replaceAllUsesWith(CRC8); + BasicBlock *bb_help10=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help9=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help8=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help7=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help6=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help5=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help4=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help3=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help2=RIfinal->getParent()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help1=RIfinal->getParent()->getPrevNode(); + DeleteDeadBlocks({bb_help1, bb_help2, bb_help3, bb_help4, bb_help5, bb_help6, bb_help7, bb_help8, bb_help9, bb_help10}); + //Instruction *help=dyn_cast(&RIfinal->getParent()->back()); + //Instruction *one_to_delete=dyn_cast(help->getPrevNode()); + //one_to_delete->removeFromParent(); + //RIfinal->getPrevNode()->eraseFromParent(); - ICI=dyn_cast(BI->getPrevNode()); - if(!ICI) - return false; + return true; +} - LI=dyn_cast(ICI->getPrevNode()); - if(!LI) +// Check if this array of constants represents a crc32 table. +static bool isCRC32Table(const ConstantDataArray &Table){ + unsigned Length=Table.getNumElements(); + if(Length!=256) return false; + + for(int i=0;i4294967295) + return false; + } - //---------------------------------------------------------------------------------------------------------------------- - //74. while.body: + return true; +} - BB=dyn_cast(LI->getParent()->getPrevNode()); - if(!BB) +// Try to recognize table-based crc32 algorithm implementation. +/* +define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + %7 = alloca i32*, align 8 + store i32 %0, i32* %4, align 4 + store i8* %1, i8** %5, align 8 + store i64 %2, i64* %6, align 8 + %8 = load i8*, i8** %5, align 8 + %9 = bitcast i8* %8 to i32* + store i32* %9, i32** %7, align 8 + br label %10 + +10: ; preds = %14, %3 + %11 = load i64, i64* %6, align 8 + %12 = add i64 %11, -1 + store i64 %12, i64* %6, align 8 + %13 = icmp ne i64 %11, 0 + br i1 %13, label %14, label %27 + +14: ; preds = %10 + %15 = load i32, i32* %4, align 4 + %16 = load i32*, i32** %7, align 8 + %17 = getelementptr inbounds i32, i32* %16, i32 1 + store i32* %17, i32** %7, align 8 + %18 = load i32, i32* %16, align 4 + %19 = xor i32 %15, %18 + %20 = and i32 %19, 255 + %21 = zext i32 %20 to i64 + %22 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32Table, i64 0, i64 %21 + %23 = load i32, i32* %22, align 4 + %24 = load i32, i32* %4, align 4 + %25 = lshr i32 %24, 8 + %26 = xor i32 %23, %25 + store i32 %26, i32* %4, align 4 + br label %10 + +27: ; preds = %10 + %28 = load i32, i32* %4, align 4 + ret i32 %28 +} +*/ +static bool tryToRecognizeTableBasedCRC32(Instruction &I){ + ReturnInst *RI=dyn_cast(&I); + ReturnInst *RIfinal=dyn_cast(&I); + if(!RI) return false; - BI=dyn_cast(&BB->back()); + LoadInst *LI = dyn_cast(RI->getPrevNode()); + if (!LI) + return false; + + LoadInst *LIfinal=dyn_cast(LI); + Type *AccessType = LI->getType(); + if (!AccessType->isIntegerTy()) + return false; + + BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); + + BranchInst *BI=dyn_cast(&BB->back()); if(!BI) return false; - SI=dyn_cast(BI->getPrevNode()); + StoreInst *SI=dyn_cast(BI->getPrevNode()); if(!SI) return false; - SI=dyn_cast(SI->getPrevNode()); - if(!SI) + Instruction *II=dyn_cast(SI->getPrevNode()); + Value *help1; + Value *help2; + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) return false; - CI=dyn_cast(SI->getPrevNode()); - if(!CI) + II=dyn_cast(II->getPrevNode()); + + if(!match(II, m_LShr(m_Value(help1), m_SpecificInt(8)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) return false; - LI=dyn_cast(CI->getPrevNode()); + LI=dyn_cast(LI->getPrevNode()); if(!LI) return false; - SI=dyn_cast(LI->getPrevNode()); - if(!SI) + // Temporary insertion!!!!!!!!!!!!! + GetElementPtrInst *GEP = dyn_cast(LI->getPointerOperand()); + if (!GEP || !GEP->isInBounds() || GEP->getNumIndices() != 2) + return false; + + GlobalVariable *GVTable = dyn_cast(GEP->getPointerOperand()); + if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant()) + return false; + + ConstantDataArray *ConstData=dyn_cast(GVTable->getInitializer()); + if (!ConstData) + return false; + + if (!isCRC32Table(*ConstData)) + return false; + // End of temporary insertion!!!!!!!!!!!!! + + + GetElementPtrInst *GEPI=dyn_cast(LI->getPrevNode()); + if(!GEPI) return false; - ZExtInst *ZI=dyn_cast(SI->getPrevNode()); + ZExtInst *ZI=dyn_cast(GEPI->getPrevNode()); if(!ZI) return false; - LI=dyn_cast(ZI->getPrevNode()); + II=dyn_cast(ZI->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_Value(help2)))) + return false; + + Value *X1; + + II=dyn_cast(II->getPrevNode()); + if(!match(II, m_Xor(m_Value(X1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) return false; - GetElementPtrInst *GEPI=dyn_cast(LI->getPrevNode()); + GEPI=dyn_cast(SI->getPrevNode()); if(!GEPI) return false; - SExtInst *SEI=dyn_cast(GEPI->getPrevNode()); - if(!SEI) - return false; - - LI=dyn_cast(SEI->getPrevNode()); + LI=dyn_cast(GEPI->getPrevNode()); if(!LI) return false; + LI=dyn_cast(LI->getPrevNode()); if(!LI) return false; - //---------------------------------------------------------------------------------------------------------------------- - //68. while.cond: BB=dyn_cast(LI->getParent()->getPrevNode()); - if(!BB) - return false; - + BI=dyn_cast(&BB->back()); if(!BI) - return false; + return false; - ICI=dyn_cast(BI->getPrevNode()); - if(!ICI) + ICmpInst *ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) return false; - LI=dyn_cast(ICI->getPrevNode()); - if(!LI) + SI=dyn_cast(ICMPI->getPrevNode()); + if(!SI) return false; - LI=dyn_cast(LI->getPrevNode()); + II=dyn_cast(SI->getPrevNode()); + if(!match(II, m_Add(m_Value(help1), m_SpecificInt(-1)))) + return false; + + LI=dyn_cast(II->getPrevNode()); if(!LI) - return false; - - //---------------------------------------------------------------------------------------------------------------------- - //55. entry: - BB=dyn_cast(LI->getParent()->getPrevNode()); - if(!BB) return false; - + + BB=dyn_cast(LI->getParent()->getPrevNode()); + BI=dyn_cast(&BB->back()); if(!BI) return false; SI=dyn_cast(BI->getPrevNode()); if(!SI) - return false; + return false; + + BitCastInst *BCI=dyn_cast(SI->getPrevNode()); + if(!BCI) + return false; - SI=dyn_cast(SI->getPrevNode()); + LI=dyn_cast(BCI->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); if(!SI) return false; SI=dyn_cast(SI->getPrevNode()); if(!SI) - return false; + return false; SI=dyn_cast(SI->getPrevNode()); if(!SI) - return false; + return false; AllocaInst *AI=dyn_cast(SI->getPrevNode()); if(!AI) @@ -1367,17 +1203,57 @@ static bool tryToRecognizeCRC32(Instruction &I){ if(!AI) return false; - AI=dyn_cast(AI->getPrevNode()); - if(!AI) - return false; + + errs() << "!!!Table-based CRC32 algorithm is finally recognized!!!" << "\n"; + errs() << "It will be nice if we can check the value of the operands in this algorithm implementation!" << "\n"; - AI=dyn_cast(AI->getPrevNode()); - if(!AI) - return false; + //We land this from tryToRecognizeTableBasedCTTZ function! + auto ZeroTableElem = ConstData->getElementAsInteger(0); + unsigned InputBits = X1->getType()->getScalarSizeInBits(); + bool DefinedForZero = ZeroTableElem == InputBits; + + IRBuilder<> B(LIfinal); + ConstantInt *BoolConst = B.getInt1(!DefinedForZero); + Type *XType = X1->getType(); + Value *final_arg=LIfinal->getFunction()->getArg(0); + auto CRC = B.CreateIntrinsic(Intrinsic::crc, {XType}, {final_arg, BoolConst}); + Value *ZExtOrTrunc = nullptr; + //New insertion for crc32 intrinsic! + Value *argument1=LIfinal->getFunction()->getArg(0); + Value *argument2=LIfinal->getFunction()->getArg(1); + Value *argument3=LIfinal->getFunction()->getArg(2); + //auto CRC32 = B.CreateIntrinsic(Intrinsic::crc32, {XType}, {argument1, argument2, argument3}); + //End of new insertion! + + errs() << final_arg->getType()->isIntOrIntVectorTy() << "\n"; + errs() << XType->isIntOrIntVectorTy() << "\n"; + errs()<< CRC->getType()->isIntOrIntVectorTy() << "\n"; + errs() << AccessType->isIntOrIntVectorTy() << "\n"; + + RIfinal->dump(); + LIfinal->dump(); + X1->dump(); + + LIfinal->replaceAllUsesWith(CRC); + //LIfinal->replaceAllUsesWith(CRC32); + + BasicBlock *bb_help1=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help2=RIfinal->getParent()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help3=RIfinal->getParent()->getPrevNode(); + DeleteDeadBlocks({bb_help3, bb_help2, bb_help1}); + errs() << "We did it?!" << "\n"; + + Function *f=dyn_cast(LIfinal->getParent()->getParent()); + Module *m=f->getParent(); + auto call_function=m->getFunction("llvm.crc.i32"); + if(call_function!=NULL){ + errs() << "Wow!" << "\n"; + } else { + errs() << "Failed!" << "\n"; + } - return true; + return true; } -//--------------------------------------------------------------------------------------------------------------------------------------- // Check if this array of constants represents a cttz table. // Iterate over the elements from \p Table by trying to find/match all @@ -1519,6 +1395,10 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) { ConstantInt *BoolConst = B.getInt1(!DefinedForZero); Type *XType = X1->getType(); auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst}); + + errs() << "Table-based cttz algorithm is recognized!" << "\n"; + errs()<< Cttz->getType()->isIntOrIntVectorTy() << "\n"; + Value *ZExtOrTrunc = nullptr; if (DefinedForZero) { @@ -1859,10 +1739,6 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetLibraryInfo &TLI, AliasAnalysis &AA) { bool MadeChange = false; - //bool globalflag=tryToRecognizeTableBasedCRC32BruteForce(F); - //if(globalflag) - // errs() << "I am supprised!" << "\n"; - if(F.getName().str()=="reverse"){ errs() << "We won't check this function!" << "\n"; return false; @@ -1888,17 +1764,24 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); bool flag1=tryToRecognizeTableBasedCRC32(I); - bool flag2=tryToRecognizeCrc(I); + bool flag2=tryToRecognizeCRC32(I); + //bool flag2=false; MadeChange |= flag1; if(flag1) errs() << "Function we have created seems to work properly!\n"; - //else - //errs() << "Table-based crc32 algorithm wasn't recognized!\n"; + if(flag2) errs() << "CRC32 algorithm has been recognised!" << "\n"; - MadeChange |= tryToFPToSat(I, TTI); - MadeChange |= tryToRecognizeTableBasedCttz(I); + //MadeChange |= tryToRecognizeTableBasedCttz(I); + bool recognised=tryToRecognizeTableBasedCttz(I); + if(recognised){ + MadeChange |=recognised; + //errs() << "Mission completed!" << "\n"; + } else { + MadeChange |=recognised; + //errs() << "Mission is still not completed!" << "\n"; + } MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); MadeChange |= foldPatternedLoads(I, DL); // NOTE: This function introduces erasing of the instruction `I`, so it @@ -1943,5 +1826,6 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F, // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; PA.preserveSet(); - return PA; + return PreservedAnalyses::none(); + //return PA; } diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 4b93b624c2eb0..afe889bd692d0 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -99,7 +99,9 @@ void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU, } void llvm::DeleteDeadBlocks(ArrayRef BBs, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { + #ifndef NDEBUG // Make sure that all predecessors of each dead block is also dead. SmallPtrSet Dead(BBs.begin(), BBs.end()); From 07726fea547bc6b849473eb0b47ce6091096f320 Mon Sep 17 00:00:00 2001 From: PosteruOle Date: Wed, 18 Oct 2023 11:53:34 +0200 Subject: [PATCH 21/21] Code refactoring! --- .../AggressiveInstCombine.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 3d6853027fa9d..0c313254de80c 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -941,17 +941,10 @@ static bool tryToRecognizeCRC32(Instruction &I){ errs() << "Original unoptimized form of CRC32 algorithm has been recognized!\n"; Value *argument1=LIfinal->getFunction()->getArg(0); Value *argument2=LIfinal->getFunction()->getArg(1); - //errs() << argument1->getType()->isIntOrIntVectorTy() << "\n"; - //errs() << argument2->getType()->isIntOrIntVectorTy() << "\n"; Type* ArgType1=argument1->getType(); Type* ArgType2=argument2->getType(); - //argument1->dump(); - //argument2->dump(); - //ArgType1->dump(); - //ArgType2->dump(); IRBuilder<> B(LIfinal); - //Function *function = Intrinsic::getDeclaration(LIfinal->getModule(), Intrinsic::crc8, {ArgType1, ArgType2}); auto CRC8 = B.CreateIntrinsic(Intrinsic::crc8, {}, {argument1, argument2}); // New insertion! @@ -967,10 +960,6 @@ static bool tryToRecognizeCRC32(Instruction &I){ BasicBlock *bb_help2=RIfinal->getParent()->getPrevNode()->getPrevNode(); BasicBlock *bb_help1=RIfinal->getParent()->getPrevNode(); DeleteDeadBlocks({bb_help1, bb_help2, bb_help3, bb_help4, bb_help5, bb_help6, bb_help7, bb_help8, bb_help9, bb_help10}); - //Instruction *help=dyn_cast(&RIfinal->getParent()->back()); - //Instruction *one_to_delete=dyn_cast(help->getPrevNode()); - //one_to_delete->removeFromParent(); - //RIfinal->getPrevNode()->eraseFromParent(); return true; } @@ -1230,9 +1219,9 @@ static bool tryToRecognizeTableBasedCRC32(Instruction &I){ errs()<< CRC->getType()->isIntOrIntVectorTy() << "\n"; errs() << AccessType->isIntOrIntVectorTy() << "\n"; - RIfinal->dump(); - LIfinal->dump(); - X1->dump(); + //RIfinal->dump(); + //LIfinal->dump(); + //X1->dump(); LIfinal->replaceAllUsesWith(CRC); //LIfinal->replaceAllUsesWith(CRC32);