diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 1e52d351780c1..ef00e4652097c 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -924,6 +924,7 @@ TARGET_BUILTIN(__builtin_pextd, "ULLiULLiULLi", "", "isa-v31-instructions") TARGET_BUILTIN(__builtin_cfuged, "ULLiULLiULLi", "", "isa-v31-instructions") TARGET_BUILTIN(__builtin_cntlzdm, "ULLiULLiULLi", "", "isa-v31-instructions") TARGET_BUILTIN(__builtin_cnttzdm, "ULLiULLiULLi", "", "isa-v31-instructions") +//TARGET_BUILTIN(__builtin_cnrcdm, "ULLiULLiULLi", "", "isa-v31-instructions") // Double-double (un)pack BUILTIN(__builtin_unpack_longdouble, "dLdIi", "") diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 0b1d1d75151cb..6ffa091b33840 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -713,6 +713,9 @@ enum NodeType { /// Byte Swap and Counting operators. BSWAP, CTTZ, + CRC, + CRC8, + CRC32, CTLZ, CTPOP, BITREVERSE, @@ -720,6 +723,7 @@ enum NodeType { /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, + CRC_ZERO_UNDEF, CTLZ_ZERO_UNDEF, /// Select(COND, TRUEVAL, FALSEVAL). If the type of the boolean COND is not diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 0aa4fa2123369..04955defc8140 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5060,11 +5060,13 @@ class TargetLowering : public TargetLoweringBase { /// \returns The expansion result or SDValue() if it fails. SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; + SDValue expandCRC(SDNode *N, SelectionDAG &DAG) const; /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. /// \param N Node to expand /// \returns The expansion result or SDValue() if it fails. SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; - + + SDValue expandVPCRC(SDNode *N, SelectionDAG &DAG) const; /// Expand ABS nodes. Expands vector/scalar ABS nodes, /// vector nodes can only succeed if all operations are legal/custom. /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index ebbd2af8efc7e..837183c5c1dae 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1305,6 +1305,12 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] in { def int_ctlz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_cttz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + def int_crc : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + + def int_crc32 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; +} +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { + def int_crc8 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i8_ty, llvm_i16_ty]>; } //===------------------------ Debugger Intrinsics -------------------------===// @@ -1897,6 +1903,7 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { [ LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_fshl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -2154,6 +2161,17 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg> llvm_i1_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_crc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + llvm_i1_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_crc32 : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 243cba5c62bb2..70c8dfed97317 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -106,6 +106,12 @@ let TargetPrefix = "riscv" in { // Zbkx def int_riscv_xperm4 : BitManipGPRGPRIntrinsics; def int_riscv_xperm8 : BitManipGPRGPRIntrinsics; + + //CRC - we will check this later! + //def int_riscv_crc: BitManipGPRIntrinsics; + //def int_ctlz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + //def int_cttz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 74fa24b66fc2a..dfce3877d3183 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -143,7 +143,7 @@ def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; -def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz +def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz, crc SDTCisInt<0>, SDTCisInt<1> ]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext @@ -453,9 +453,11 @@ def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>; def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>; +def crc : SDNode<"ISD::CRC" , SDTIntBitCountUnaryOp>; def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>; def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; +def crc_zero_undef : SDNode<"ISD::CRC_ZERO_UNDEF" , SDTIntBitCountUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 0f5b7270c5364..5e326e7ba32d1 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1527,6 +1527,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::crc: + case Intrinsic::crc32: case Intrinsic::fshl: case Intrinsic::fshr: case Intrinsic::launder_invariant_group: @@ -2828,6 +2830,7 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, else return ConstantInt::get(Ty, C0->ssub_sat(*C1)); case Intrinsic::cttz: + case Intrinsic::crc: case Intrinsic::ctlz: assert(C1 && "Must be constant int"); @@ -2836,6 +2839,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return PoisonValue::get(Ty); if (!C0) return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::crc) + return ConstantInt::get(Ty, C0->countr_zero()); if (IntrinsicID == Intrinsic::cttz) return ConstantInt::get(Ty, C0->countr_zero()); else diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 54eb29342b93c..f33bc94691bd0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -468,6 +468,8 @@ namespace { SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); + //SDValue visitCRC(SDNode *N); + //SDValue visitCRC_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitVSELECT(SDNode *N); @@ -1938,6 +1940,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); + //case ISD::CRC: return visitCTTZ(N); //return visitCRC(N); + //case ISD::CRC_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); //return visitCRC_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::VSELECT: return visitVSELECT(N); @@ -10910,7 +10914,24 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { return SDValue(); } +/* +SDValue DAGCombiner::visitCRC(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + + return SDValue(); +} +*/ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -10920,7 +10941,17 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } +/* +SDValue DAGCombiner::visitCRC_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + // fold (cttz_zero_undef c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + return SDValue(); +} +*/ SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d902b358526bd..432facc1da77a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2833,6 +2833,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if ((Tmp1 = TLI.expandCTTZ(Node, DAG))) Results.push_back(Tmp1); break; + case ISD::CRC32: + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: + if ((Tmp1 = TLI.expandCRC(Node, DAG))) + Results.push_back(Tmp1); + break; case ISD::BITREVERSE: if ((Tmp1 = TLI.expandBITREVERSE(Node, DAG))) Results.push_back(Tmp1); @@ -4668,17 +4674,19 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { switch (Node->getOpcode()) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: + //case ISD::CRC: + //case ISD::CRC_ZERO_UNDEF: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument unless its cttz, then use any_extend. - if (Node->getOpcode() == ISD::CTTZ || + if (Node->getOpcode() == ISD::CRC || Node->getOpcode() == ISD::CRC_ZERO_UNDEF || Node->getOpcode() == ISD::CTTZ || Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); else Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - if (Node->getOpcode() == ISD::CTTZ) { + if (Node->getOpcode() == ISD::CRC || Node->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d83aaf31950d5..354026db02366 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3481,6 +3481,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(LowBits); break; } + /* + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.countMaxTrailingZeros(); + unsigned LowBits = llvm::bit_width(PossibleTZ); + Known.Zero.setBitsFrom(LowBits); + //Known. + break; + } + */ case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); @@ -5404,6 +5416,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), C->isOpaque()); + case ISD::CRC32: + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: + return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::FP16_TO_FP: case ISD::BF16_TO_FP: { bool Ignored; @@ -5522,6 +5539,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: + case ISD::CRC32: + case ISD::CRC: + case ISD::CRC_ZERO_UNDEF: case ISD::CTPOP: { SDValue Ops = {Operand}; if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) @@ -5756,6 +5776,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return Operand; break; case ISD::CTLZ: + //case ISD::CRC: case ISD::CTTZ: if (Operand.getValueType().getScalarType() == MVT::i1) return getNOT(DL, Operand, Operand.getValueType()); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 32be369d08cf7..06cebbf265d99 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8593,6 +8593,17 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::CTPOP, dl, VT, Tmp); } +SDValue TargetLowering::expandCRC(SDNode *Node, SelectionDAG &DAG) const { + //We have to change this function! + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + SDValue Op = Node->getOperand(0); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + return DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT))); +} + SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDValue Op = Node->getOperand(0); SDValue Mask = Node->getOperand(1); @@ -8609,6 +8620,22 @@ SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); } +SDValue TargetLowering::expandVPCRC(SDNode *Node, SelectionDAG &DAG) const { + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1)) + SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op, + DAG.getConstant(-1, dl, VT), Mask, VL); + SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op, + DAG.getConstant(1, dl, VT), Mask, VL); + SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL); + return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); +} + SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 3983fd8ed7ff8..b05be8646c39c 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -642,7 +642,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, this->FTy = FTy; assert(getNumOperands() == Args.size() + CountBundleInputs(Bundles) + 1 && "NumOperands not set up?"); - +/* #ifndef NDEBUG assert((Args.size() == FTy->getNumParams() || (FTy->isVarArg() && Args.size() > FTy->getNumParams())) && @@ -653,7 +653,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, FTy->getParamType(i) == Args[i]->getType()) && "Calling a function with a bad signature!"); #endif - +*/ // Set operands in order of their index to match use-list-order // prediction. llvm::copy(Args, op_begin()); diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index b96fdb00306d1..5468a981c35e8 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -351,6 +351,7 @@ def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read], CFUGED, CNTLZDM, CNTTZDM, + //CNRCDM, PDEPD, PEXTD, VCFUGED, @@ -660,6 +661,10 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read], CNTTZD_rec, CNTTZW, CNTTZW8, CNTTZW8_rec, CNTTZW_rec, + //CNRCD, + //CNRCD_rec, + //CNRCW, CNRCW8, + //CNRCW8_rec, CNRCW_rec, FTSQRT, MTVSRBM, MTVSRBMI, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 012052a1be1d3..87d5b28b4dcac 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -464,13 +464,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand); } - // CTPOP or CTTZ were introduced in P8/P9 respectively + // CTPOP or CTTZ (or CRC) were introduced in P8/P9 respectively if (Subtarget.isISA3_0()) { setOperationAction(ISD::CTTZ , MVT::i32 , Legal); setOperationAction(ISD::CTTZ , MVT::i64 , Legal); + setOperationAction(ISD::CRC , MVT::i32 , Legal); + setOperationAction(ISD::CRC , MVT::i64 , Legal); } else { setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CRC , MVT::i32 , Expand); + setOperationAction(ISD::CRC , MVT::i64 , Expand); } if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { @@ -801,10 +805,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } // Vector instructions introduced in P9 - if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) + if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)){ setOperationAction(ISD::CTTZ, VT, Legal); - else + setOperationAction(ISD::CRC, VT, Legal); + } else { setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CRC, VT, Expand); + } // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 02f7147dfd6bb..1875b50dbec4c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -782,6 +782,12 @@ namespace llvm { return true; } + /* + bool isCheapToSpeculateCrc(Type *Ty) const override { + return true; + } + */ + bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index a8c27d0cf6a5a..e952c4c7eb7c6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2320,6 +2320,10 @@ defm CNTTZW : XForm_11r<31, 538, (outs gprc:$RA), (ins gprc:$RST), "cnttzw", "$RA, $RST", IIC_IntGeneral, [(set i32:$RA, (cttz i32:$RST))]>, Requires<[IsISA3_0]>, ZExt32To64; +//defm CNRCW : XForm_11r<31, 548, (outs gprc:$RA), (ins gprc:$RST), +// "cnrcw", "$RA, $RST", IIC_IntGeneral, +// [(set i32:$RA, (crc i32:$RST))]>, Requires<[IsISA3_0]>, +// ZExt32To64; defm EXTSB : XForm_11r<31, 954, (outs gprc:$RA), (ins gprc:$RST), "extsb", "$RA, $RST", IIC_IntSimple, [(set i32:$RA, (sext_inreg i32:$RST, i8))]>, SExt32To64; diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 8cb8e4d91db21..70e9bd487311a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -1662,6 +1662,10 @@ let Predicates = [IsISA3_1] in { "cnttzdm $RA, $RST, $RB", IIC_IntGeneral, [(set i64:$RA, (int_ppc_cnttzdm i64:$RST, i64:$RB))]>; + //def CNRCDM : XForm_6<31, 571, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB), + // "cnrcdm $RA, $RST, $RB", IIC_IntGeneral, + // [(set i64:$RA, + // (int_ppc_cnrcdm i64:$RST, i64:$RB))]>; def XXGENPCVBM : XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM), "xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>; diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index b2925b1a9f2f3..0c313254de80c 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -30,6 +30,18 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" +#include +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/BasicBlock.h" +#include + +#include "llvm/Pass.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace PatternMatch; @@ -43,6 +55,11 @@ STATISTIC(NumGuardedFunnelShifts, "Number of guarded funnel shifts transformed into funnel shifts"); STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized"); +/* +Petar's potential insertion! +STATISTIC(NumReverseRecognized, "Number of reverse function recognized"); +*/ + static cl::opt MaxInstrsToScan( "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine.")); @@ -267,6 +284,93 @@ static bool foldAnyOrAllBitsSet(Instruction &I) { ++NumAnyOrAllBitsSet; return true; } +//--------------------------------------------------------------------------------------------------------------------------------------- +// Petar's code! +//unsigned reverse(unsigned x) { +// x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555); ? +// x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333); . +// x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F); . +// x = (x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24); . +// return x; +//} + +// int popcount(unsigned int i) { +// i = i - ((i >> 1) & 0x55555555); +// i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +// i = ((i + (i >> 4)) & 0x0F0F0F0F); +// return (i * 0x01010101) >> 24; + +static bool tryToRecognizeReverseFunction(Instruction &I){ + if(I.getOpcode()!=Instruction::Or) + return false; + Type *Ty = I.getType(); + if (!Ty->isIntOrIntVectorTy()) + return false; + + unsigned Len = Ty->getScalarSizeInBits(); + // FIXME: fix Len == 8 and other irregular type lengths. + if (!(Len <= 128 && Len > 8 && Len % 8 == 0)) + return false; + + APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55)); + APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33)); + APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F)); + APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01)); + APInt MaskShift = APInt(Len, Len - 8); + + //Value *Op0 = I.getOperand(0); + //Value *Op1 = I.getOperand(1); + Value *MulOp0; + + //Petar's insertion! Aditional variables! + //Value *value1; + + // I need to change this part! + // Matching "(x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24))" <- reverse function instruction! (Petar) + if ((match(MulOp0, m_Or(m_Shl(m_Value(MulOp0), m_SpecificInt(MaskShift)), + m_Or(m_Shl(m_And(m_Deferred(MulOp0), m_SpecificInt(65280)), m_SpecificInt(8)), + m_Or(m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(8)), m_SpecificInt(65280)), m_LShr(m_Deferred(MulOp0), m_SpecificInt(24)))))))) { + + // I hope we recognised the previous instruction! (Petar) + //Value *ShiftOp0; + + // matching ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F) <- reverse function instruction! + if(match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask0F)), m_SpecificInt(4)), + m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(4)), m_SpecificInt(Mask0F))))){ + + // I hope we recognised the previous instruction! (Petar) + //Value *AndOp0; + + // Matching ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333) <- reverse function instruction! (Petar) + if (match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask33)), m_SpecificInt(2)), + m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(2)), m_SpecificInt(Mask33))))) { + + // I hope we recognised the previous instruction! (Petar) + //Value *Root, *SubOp1; + + // Matching "((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555))" <- reverse function instruction! (Petar) + if (match(MulOp0, m_Or(m_Shl(m_And(m_Value(MulOp0), m_SpecificInt(Mask55)), m_SpecificInt(1)), + m_And(m_LShr(m_Deferred(MulOp0), m_SpecificInt(1)), m_SpecificInt(Mask55))))) { + + // I hope we recognised the previous instruction! (Petar) + + //LLVM_DEBUG(dbgs() << "Recognized reverse function!\n"); + //IRBuilder<> Builder(&I); + + //Function *Func = Intrinsic::getDeclaration(I.getModule(), Intrinsic::ctpop, I.getType()); + + //I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); + //++NumReverseRecognized; + + return true; + } + } + } + } +} + +//--------------------------------------------------------------------------------------------------------------------------------------- + // Try to recognize below function as popcount intrinsic. // This is the "best" algorithm from @@ -442,6 +546,704 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, return false; } +//--------------------------------------------------------------------------------------------------------------------------------------- +// Petar's code! +//LLVM IR code for the naive crc algorithm implementation! +/* +; Function Attrs: noinline nounwind uwtable +define dso_local zeroext i16 @crcu8(i8 zeroext %0, i16 zeroext %1) #0 { + %3 = alloca i8, align 1 + %4 = alloca i16, align 2 + %5 = alloca i8, align 1 + %6 = alloca i8, align 1 + %7 = alloca i8, align 1 + store i8 %0, i8* %3, align 1 + store i16 %1, i16* %4, align 2 + store i8 0, i8* %5, align 1 + store i8 0, i8* %6, align 1 + store i8 0, i8* %7, align 1 + store i8 0, i8* %5, align 1 + br label %8 + +8: ; preds = %53, %2 + %9 = load i8, i8* %5, align 1 + %10 = zext i8 %9 to i32 + %11 = icmp slt i32 %10, 8 + br i1 %11, label %12, label %56 + +12: ; preds = %8 + %13 = load i8, i8* %3, align 1 + %14 = zext i8 %13 to i32 + %15 = and i32 %14, 1 + %16 = load i16, i16* %4, align 2 + %17 = trunc i16 %16 to i8 + %18 = zext i8 %17 to i32 + %19 = and i32 %18, 1 + %20 = xor i32 %15, %19 + %21 = trunc i32 %20 to i8 + store i8 %21, i8* %6, align 1 + %22 = load i8, i8* %3, align 1 + %23 = zext i8 %22 to i32 + %24 = ashr i32 %23, 1 + %25 = trunc i32 %24 to i8 + store i8 %25, i8* %3, align 1 + %26 = load i8, i8* %6, align 1 + %27 = zext i8 %26 to i32 + %28 = icmp eq i32 %27, 1 + br i1 %28, label %29, label %34 + +29: ; preds = %12 + %30 = load i16, i16* %4, align 2 + %31 = zext i16 %30 to i32 + %32 = xor i32 %31, 16386 + %33 = trunc i32 %32 to i16 + store i16 %33, i16* %4, align 2 + store i8 1, i8* %7, align 1 + br label %35 + +34: ; preds = %12 + store i8 0, i8* %7, align 1 + br label %35 + +35: ; preds = %34, %29 + %36 = load i16, i16* %4, align 2 + %37 = zext i16 %36 to i32 + %38 = ashr i32 %37, 1 + %39 = trunc i32 %38 to i16 + store i16 %39, i16* %4, align 2 + %40 = load i8, i8* %7, align 1 + %41 = icmp ne i8 %40, 0 + br i1 %41, label %42, label %47 + +42: ; preds = %35 + %43 = load i16, i16* %4, align 2 + %44 = zext i16 %43 to i32 + %45 = or i32 %44, 32768 + %46 = trunc i32 %45 to i16 + store i16 %46, i16* %4, align 2 + br label %52 + +47: ; preds = %35 + %48 = load i16, i16* %4, align 2 + %49 = zext i16 %48 to i32 + %50 = and i32 %49, 32767 + %51 = trunc i32 %50 to i16 + store i16 %51, i16* %4, align 2 + br label %52 + +52: ; preds = %47, %42 + br label %53 + +53: ; preds = %52 + %54 = load i8, i8* %5, align 1 + %55 = add i8 %54, 1 + store i8 %55, i8* %5, align 1 + br label %8 + +56: ; preds = %8 + %57 = load i16, i16* %4, align 2 + ret i16 %57 +} +*/ +static bool tryToRecognizeCRC32(Instruction &I){ + ReturnInst *RI=dyn_cast(&I); + ReturnInst *RIfinal=dyn_cast(&I); + if(!RI || !RIfinal) + return false; + + LoadInst *LI=dyn_cast(RI->getPrevNode()); + LoadInst *LIfinal=dyn_cast(RI->getPrevNode()); + if(!LI || !LIfinal) + return false; + + BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); + BranchInst *BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + StoreInst *SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + Instruction *II=dyn_cast(SI->getPrevNode()); + if(!II) + return false; + + Value *help1; + Value *help2; + if(!match(II, m_Add(m_Value(help1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + BB=dyn_cast(BI->getParent()->getPrevNode()); + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + // For some reason we could not see trunc instruction! + II=dyn_cast(SI->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_SpecificInt(32767)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + // We should somehow recognize -32768 here! + if(!match(II, m_Or(m_Value(help1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + ICmpInst *ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) + return false; + + LI=dyn_cast(ICMPI->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + TruncInst *TI=dyn_cast(SI->getPrevNode()); + if(!TI) + return false; + + II=dyn_cast(TI->getPrevNode()); + if(!match(II, m_AShr(m_Value(help1), m_SpecificInt(1)))) + return false; + + ZExtInst *ZI=dyn_cast(II->getPrevNode()); + if(!ZI) + return false; + + LI=dyn_cast(ZI->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + BB=dyn_cast(SI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!match(II, m_Xor(m_Value(help1), m_SpecificInt(16386)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) + return false; + + ZI=dyn_cast(ICMPI->getPrevNode()); + if(!ZI) + return false; + + //Just to check something! + //SI=dyn_cast(ZI->getPrevNode()); <- as we already assumed it doesn't work! + LI=dyn_cast(ZI->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + TI=dyn_cast(SI->getPrevNode()); + if(!TI) + return false; + + II=dyn_cast(TI->getPrevNode()); + if(!match(II, m_AShr(m_Value(help1), m_SpecificInt(1)))) + return false; + + ZI=dyn_cast(II->getPrevNode()); + if(!ZI) + return false; + + LI=dyn_cast(ZI->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + return false; + + II=dyn_cast(II->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_SpecificInt(1)))) + return false; + + TI=dyn_cast(II->getPrevNode()); + if(!TI) + return false; + + LI=dyn_cast(TI->getPrevNode()); + if(!LI) + return false; + + II=dyn_cast(LI->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_SpecificInt(1)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) + return false; + + ZI=dyn_cast(ICMPI->getPrevNode()); + if(!ZI) + return false; + + LI=dyn_cast(ZI->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + if(!BB) + return false; + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + // Here we have to match 6 more consecutive store instructions and 5 consecutive alloca instructions! + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + // Last thing we have to match are 5 alloca instructions! + AllocaInst *AI=dyn_cast(SI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + errs() << "Original unoptimized form of CRC32 algorithm has been recognized!\n"; + Value *argument1=LIfinal->getFunction()->getArg(0); + Value *argument2=LIfinal->getFunction()->getArg(1); + Type* ArgType1=argument1->getType(); + Type* ArgType2=argument2->getType(); + + IRBuilder<> B(LIfinal); + auto CRC8 = B.CreateIntrinsic(Intrinsic::crc8, {}, {argument1, argument2}); + // New insertion! + + LIfinal->replaceAllUsesWith(CRC8); + BasicBlock *bb_help10=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help9=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help8=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help7=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help6=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help5=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help4=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help3=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help2=RIfinal->getParent()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help1=RIfinal->getParent()->getPrevNode(); + DeleteDeadBlocks({bb_help1, bb_help2, bb_help3, bb_help4, bb_help5, bb_help6, bb_help7, bb_help8, bb_help9, bb_help10}); + + return true; +} + +// Check if this array of constants represents a crc32 table. +static bool isCRC32Table(const ConstantDataArray &Table){ + unsigned Length=Table.getNumElements(); + if(Length!=256) + return false; + + for(int i=0;i4294967295) + return false; + } + + return true; +} + +// Try to recognize table-based crc32 algorithm implementation. +/* +define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + %7 = alloca i32*, align 8 + store i32 %0, i32* %4, align 4 + store i8* %1, i8** %5, align 8 + store i64 %2, i64* %6, align 8 + %8 = load i8*, i8** %5, align 8 + %9 = bitcast i8* %8 to i32* + store i32* %9, i32** %7, align 8 + br label %10 + +10: ; preds = %14, %3 + %11 = load i64, i64* %6, align 8 + %12 = add i64 %11, -1 + store i64 %12, i64* %6, align 8 + %13 = icmp ne i64 %11, 0 + br i1 %13, label %14, label %27 + +14: ; preds = %10 + %15 = load i32, i32* %4, align 4 + %16 = load i32*, i32** %7, align 8 + %17 = getelementptr inbounds i32, i32* %16, i32 1 + store i32* %17, i32** %7, align 8 + %18 = load i32, i32* %16, align 4 + %19 = xor i32 %15, %18 + %20 = and i32 %19, 255 + %21 = zext i32 %20 to i64 + %22 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32Table, i64 0, i64 %21 + %23 = load i32, i32* %22, align 4 + %24 = load i32, i32* %4, align 4 + %25 = lshr i32 %24, 8 + %26 = xor i32 %23, %25 + store i32 %26, i32* %4, align 4 + br label %10 + +27: ; preds = %10 + %28 = load i32, i32* %4, align 4 + ret i32 %28 +} +*/ +static bool tryToRecognizeTableBasedCRC32(Instruction &I){ + ReturnInst *RI=dyn_cast(&I); + ReturnInst *RIfinal=dyn_cast(&I); + if(!RI) + return false; + + LoadInst *LI = dyn_cast(RI->getPrevNode()); + if (!LI) + return false; + + LoadInst *LIfinal=dyn_cast(LI); + Type *AccessType = LI->getType(); + if (!AccessType->isIntegerTy()) + return false; + + BasicBlock *BB=dyn_cast(LI->getParent()->getPrevNode()); + + BranchInst *BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + StoreInst *SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + Instruction *II=dyn_cast(SI->getPrevNode()); + Value *help1; + Value *help2; + if(!match(II, m_Xor(m_Value(help1), m_Value(help2)))) + return false; + + II=dyn_cast(II->getPrevNode()); + + if(!match(II, m_LShr(m_Value(help1), m_SpecificInt(8)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + LI=dyn_cast(LI->getPrevNode()); + if(!LI) + return false; + + // Temporary insertion!!!!!!!!!!!!! + GetElementPtrInst *GEP = dyn_cast(LI->getPointerOperand()); + if (!GEP || !GEP->isInBounds() || GEP->getNumIndices() != 2) + return false; + + GlobalVariable *GVTable = dyn_cast(GEP->getPointerOperand()); + if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant()) + return false; + + ConstantDataArray *ConstData=dyn_cast(GVTable->getInitializer()); + if (!ConstData) + return false; + + if (!isCRC32Table(*ConstData)) + return false; + // End of temporary insertion!!!!!!!!!!!!! + + + GetElementPtrInst *GEPI=dyn_cast(LI->getPrevNode()); + if(!GEPI) + return false; + + ZExtInst *ZI=dyn_cast(GEPI->getPrevNode()); + if(!ZI) + return false; + + II=dyn_cast(ZI->getPrevNode()); + if(!match(II, m_And(m_Value(help1), m_Value(help2)))) + return false; + + Value *X1; + + II=dyn_cast(II->getPrevNode()); + if(!match(II, m_Xor(m_Value(X1), m_Value(help2)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + GEPI=dyn_cast(SI->getPrevNode()); + if(!GEPI) + return false; + + LI=dyn_cast(GEPI->getPrevNode()); + if(!LI) + return false; + + + LI=dyn_cast(LI->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + ICmpInst *ICMPI=dyn_cast(BI->getPrevNode()); + if(!ICMPI) + return false; + + SI=dyn_cast(ICMPI->getPrevNode()); + if(!SI) + return false; + + II=dyn_cast(SI->getPrevNode()); + if(!match(II, m_Add(m_Value(help1), m_SpecificInt(-1)))) + return false; + + LI=dyn_cast(II->getPrevNode()); + if(!LI) + return false; + + BB=dyn_cast(LI->getParent()->getPrevNode()); + + BI=dyn_cast(&BB->back()); + if(!BI) + return false; + + SI=dyn_cast(BI->getPrevNode()); + if(!SI) + return false; + + BitCastInst *BCI=dyn_cast(SI->getPrevNode()); + if(!BCI) + return false; + + LI=dyn_cast(BCI->getPrevNode()); + if(!LI) + return false; + + SI=dyn_cast(LI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + SI=dyn_cast(SI->getPrevNode()); + if(!SI) + return false; + + AllocaInst *AI=dyn_cast(SI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + AI=dyn_cast(AI->getPrevNode()); + if(!AI) + return false; + + + errs() << "!!!Table-based CRC32 algorithm is finally recognized!!!" << "\n"; + errs() << "It will be nice if we can check the value of the operands in this algorithm implementation!" << "\n"; + + //We land this from tryToRecognizeTableBasedCTTZ function! + auto ZeroTableElem = ConstData->getElementAsInteger(0); + unsigned InputBits = X1->getType()->getScalarSizeInBits(); + bool DefinedForZero = ZeroTableElem == InputBits; + + IRBuilder<> B(LIfinal); + ConstantInt *BoolConst = B.getInt1(!DefinedForZero); + Type *XType = X1->getType(); + Value *final_arg=LIfinal->getFunction()->getArg(0); + auto CRC = B.CreateIntrinsic(Intrinsic::crc, {XType}, {final_arg, BoolConst}); + Value *ZExtOrTrunc = nullptr; + //New insertion for crc32 intrinsic! + Value *argument1=LIfinal->getFunction()->getArg(0); + Value *argument2=LIfinal->getFunction()->getArg(1); + Value *argument3=LIfinal->getFunction()->getArg(2); + //auto CRC32 = B.CreateIntrinsic(Intrinsic::crc32, {XType}, {argument1, argument2, argument3}); + //End of new insertion! + + errs() << final_arg->getType()->isIntOrIntVectorTy() << "\n"; + errs() << XType->isIntOrIntVectorTy() << "\n"; + errs()<< CRC->getType()->isIntOrIntVectorTy() << "\n"; + errs() << AccessType->isIntOrIntVectorTy() << "\n"; + + //RIfinal->dump(); + //LIfinal->dump(); + //X1->dump(); + + LIfinal->replaceAllUsesWith(CRC); + //LIfinal->replaceAllUsesWith(CRC32); + + BasicBlock *bb_help1=RIfinal->getParent()->getPrevNode()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help2=RIfinal->getParent()->getPrevNode()->getPrevNode(); + BasicBlock *bb_help3=RIfinal->getParent()->getPrevNode(); + DeleteDeadBlocks({bb_help3, bb_help2, bb_help1}); + errs() << "We did it?!" << "\n"; + + Function *f=dyn_cast(LIfinal->getParent()->getParent()); + Module *m=f->getParent(); + auto call_function=m->getFunction("llvm.crc.i32"); + if(call_function!=NULL){ + errs() << "Wow!" << "\n"; + } else { + errs() << "Failed!" << "\n"; + } + + return true; +} + // Check if this array of constants represents a cttz table. // Iterate over the elements from \p Table by trying to find/match all // the numbers from 0 to \p InputBits that should represent cttz results. @@ -475,7 +1277,7 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, // int f(unsigned x) { // static const char table[32] = // {0, 1, 28, 2, 29, 14, 24, 3, 30, -// 22, 20, 15, 25, 17, 4, 8, 31, 27, +// 22, 20, 15, 25, 17, 4, 8, 31, 27, // 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; // return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; // } @@ -582,6 +1384,10 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) { ConstantInt *BoolConst = B.getInt1(!DefinedForZero); Type *XType = X1->getType(); auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst}); + + errs() << "Table-based cttz algorithm is recognized!" << "\n"; + errs()<< Cttz->getType()->isIntOrIntVectorTy() << "\n"; + Value *ZExtOrTrunc = nullptr; if (DefinedForZero) { @@ -921,33 +1727,59 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA) { bool MadeChange = false; - for (BasicBlock &BB : F) { - // Ignore unreachable basic blocks. - if (!DT.isReachableFromEntry(&BB)) - continue; - - const DataLayout &DL = F.getParent()->getDataLayout(); - - // Walk the block backwards for efficiency. We're matching a chain of - // use->defs, so we're more likely to succeed by starting from the bottom. - // Also, we want to avoid matching partial patterns. - // TODO: It would be more efficient if we removed dead instructions - // iteratively in this loop rather than waiting until the end. - for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) { - MadeChange |= foldAnyOrAllBitsSet(I); - MadeChange |= foldGuardedFunnelShift(I, DT); - MadeChange |= tryToRecognizePopCount(I); - MadeChange |= tryToFPToSat(I, TTI); - MadeChange |= tryToRecognizeTableBasedCttz(I); - MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); - MadeChange |= foldPatternedLoads(I, DL); - // NOTE: This function introduces erasing of the instruction `I`, so it - // needs to be called at the end of this sequence, otherwise we may make - // bugs. - MadeChange |= foldSqrt(I, TTI, TLI); + + if(F.getName().str()=="reverse"){ + errs() << "We won't check this function!" << "\n"; + return false; + } + + Module *M=F.getParent(); + for(Function &F: *M){ + for (BasicBlock &BB : F) { + // Ignore unreachable basic blocks. + if (!DT.isReachableFromEntry(&BB)) + continue; + + //errs() << "Hello from here!" << "\n"; + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Walk the block backwards for efficiency. We're matching a chain of + // use->defs, so we're more likely to succeed by starting from the bottom. + // Also, we want to avoid matching partial patterns. + // TODO: It would be more efficient if we removed dead instructions + // iteratively in this loop rather than waiting until the end. + for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) { + MadeChange |= foldAnyOrAllBitsSet(I); + MadeChange |= foldGuardedFunnelShift(I, DT); + MadeChange |= tryToRecognizePopCount(I); + bool flag1=tryToRecognizeTableBasedCRC32(I); + bool flag2=tryToRecognizeCRC32(I); + //bool flag2=false; + MadeChange |= flag1; + if(flag1) + errs() << "Function we have created seems to work properly!\n"; + + if(flag2) + errs() << "CRC32 algorithm has been recognised!" << "\n"; + MadeChange |= tryToFPToSat(I, TTI); + //MadeChange |= tryToRecognizeTableBasedCttz(I); + bool recognised=tryToRecognizeTableBasedCttz(I); + if(recognised){ + MadeChange |=recognised; + //errs() << "Mission completed!" << "\n"; + } else { + MadeChange |=recognised; + //errs() << "Mission is still not completed!" << "\n"; + } + MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); + MadeChange |= foldPatternedLoads(I, DL); + // NOTE: This function introduces erasing of the instruction `I`, so it + // needs to be called at the end of this sequence, otherwise we may make + // bugs. + MadeChange |= foldSqrt(I, TTI, TLI); + } } } - // We're done with transforms, so remove dead instructions. if (MadeChange) for (BasicBlock &BB : F) @@ -983,5 +1815,6 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F, // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; PA.preserveSet(); - return PA; + return PreservedAnalyses::none(); + //return PA; } diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 4b93b624c2eb0..afe889bd692d0 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -99,7 +99,9 @@ void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU, } void llvm::DeleteDeadBlocks(ArrayRef BBs, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { + #ifndef NDEBUG // Make sure that all predecessors of each dead block is also dead. SmallPtrSet Dead(BBs.begin(), BBs.end()); diff --git a/llvm/lib/Transforms/Utils/PetarCountCttz.cpp b/llvm/lib/Transforms/Utils/PetarCountCttz.cpp index 33a7af390d32e..93ad2457cac3b 100644 --- a/llvm/lib/Transforms/Utils/PetarCountCttz.cpp +++ b/llvm/lib/Transforms/Utils/PetarCountCttz.cpp @@ -8,18 +8,18 @@ int countCttzIntrinsicAppearance=0; PreservedAnalyses PetarCountCttzPass::run(Function &F, FunctionAnalysisManager &AM) { errs() << "Function name: " << F.getName() << "\n"; - errs() << "\n-------------------------------\n"; - errs() << "Instructions within this function!\n"; + //errs() << "\n-------------------------------\n"; + //errs() << "Instructions within this function!\n"; for(BasicBlock& BB: F){ for(Instruction& I: BB){ - errs()<< I.getName() << "\n"; + //errs()<< I.getName() << "\n"; IntrinsicInst *II = dyn_cast(&I); if(II && (II->getIntrinsicID() == Intrinsic::cttz)) countCttzIntrinsicAppearance++; } } - errs() << "\n-------------------------------\n"; + //errs() << "\n-------------------------------\n"; errs() << "Total number of cttz intrinsic appearances is equal to: " << countCttzIntrinsicAppearance << ".\n"; return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp index 7359f58a19f6d..098bd52d86bb3 100644 --- a/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp +++ b/llvm/lib/Transforms/Utils/PetarHelloWorld.cpp @@ -1,26 +1,32 @@ -#include "llvm/Transforms/Utils/PetarCountCttz.h" +#include "llvm/Transforms/Utils/PetarHelloWorld.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Regex.h" using namespace llvm; -int countCttzIntrinsicAppearance=0; +int count_Cttz_Intrinsic_Appearance=0; -PreservedAnalyses PetarCountCttzPass::run(Function &F, FunctionAnalysisManager &AM) { +PreservedAnalyses PetarHelloWorldPass::run(Function &F, FunctionAnalysisManager &AM) { + + Regex llvmcttzRegex("^llvm.cttz.*"); errs() << "Function name: " << F.getName() << "\n"; - //errs() << "\n-------------------------------\n"; - //errs() << "Instructions within this function!\n"; for(BasicBlock& BB: F){ for(Instruction& I: BB){ - //errs()<< I.getName() << "\n"; - IntrinsicInst *II = dyn_cast(&I); - - if(II && (II->getIntrinsicID() == Intrinsic::cttz)) - countCttzIntrinsicAppearance++; + //errs()<< "Instruction: " << I.getName() << "\n"; + if(CallBase::classof(&I)){ + CallBase* callBase = (CallBase*) &I; + StringRef functionName = callBase->getCalledFunction()->getName(); + //errs() << "Instruction name: " << functionName << "\n"; + if(llvmcttzRegex.match(functionName)){ + errs() << "Look what we have found -> " << functionName << "\n"; + count_Cttz_Intrinsic_Appearance++; + } + } } } //errs() << "\n-------------------------------\n"; - errs() << "Total number of cttz intrinsic appearances is equal to: " << countCttzIntrinsicAppearance << ".\n"; + errs() << "Total number of cttz intrinsic appearances is equal to: " << count_Cttz_Intrinsic_Appearance << ".\n"; return PreservedAnalyses::all(); } \ No newline at end of file diff --git a/llvm/test/Transforms/Util/petar-count-cttz.ll b/llvm/test/Transforms/Util/petar-count-cttz.ll new file mode 100644 index 0000000000000..941cffca4587a --- /dev/null +++ b/llvm/test/Transforms/Util/petar-count-cttz.ll @@ -0,0 +1,48 @@ +; RUN: opt -disable-output -passes=petarcountcttz %s 2>&1 | FileCheck %s + +; CHECK: Function name: cttz_64_eq_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 1. +declare i64 @llvm.cttz.i64(i64, i1) +define i64 @cttz_64_eq_select(i64 %v) nounwind { + + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 5, i64 %.op + ret i64 %add +} + +; CHECK-NEXT: Function name: cttz_64_ne_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 2. +define i64 @cttz_64_ne_select(i64 %v) nounwind { + + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp ne i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 %.op, i64 5 + ret i64 %add +} + +; CHECK-NEXT: Function name: cttz_32_eq_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 3. +declare i32 @llvm.cttz.i32(i32, i1) +define i32 @cttz_32_eq_select(i32 %v) nounwind { + + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 5, i32 %.op + ret i32 %add +} + +; CHECK-NEXT: Function name: cttz_32_ne_select +; CHECK-NEXT: Total number of cttz intrinsic appearances is equal to: 4. +define i32 @cttz_32_ne_select(i32 %v) nounwind { + + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp ne i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 %.op, i32 5 + ret i32 %add +} + diff --git a/llvm/test/Transforms/Util/petar-table-based-crc.ll b/llvm/test/Transforms/Util/petar-table-based-crc.ll new file mode 100644 index 0000000000000..e59c362d19be6 --- /dev/null +++ b/llvm/test/Transforms/Util/petar-table-based-crc.ll @@ -0,0 +1,52 @@ +; RUN: opt -disable-output -passes=aggressive-instcombine %s 2>&1 | FileCheck %s + +@__const.main.buffer = private unnamed_addr constant [38 x i32] [i32 -1290756417, i32 -1606390453, i32 1378610760, i32 -2032039261, i32 1955203488, i32 1742404180, i32 -1783531177, i32 -878557837, i32 969524848, i32 714683780, i32 -655182201, i32 205050476, i32 -28094097, i32 -318528869, i32 526918040, i32 1361435347, i32 -1555146288, i32 -1340167644, i32 1114974503, i32 -1765847604, i32 1691668175, i32 2005155131, i32 -2047885768, i32 -604208612, i32 697762079, i32 986182379, i32 -928222744, i32 476452099, i32 -301099520, i32 -44210700, i32 255256311, i32 1640403810, i32 -1817374623, i32 -2130844779, i32 1922457750, i32 -1503918979, i32 1412925310, i32 1197962378], align 16 +@.str = private unnamed_addr constant [13 x i8] c"result = %u\0A\00", align 1 +@crc32Table = internal constant [256 x i32] [i32 0, i32 -227835133, i32 -516198153, i32 324072436, i32 -946170081, i32 904991772, i32 648144872, i32 -724933397, i32 -1965467441, i32 2024987596, i32 1809983544, i32 -1719030981, i32 1296289744, i32 -1087877933, i32 -1401372889, i32 1578318884, i32 274646895, i32 -499825556, i32 -244992104, i32 51262619, i32 -675000208, i32 632279923, i32 922689671, i32 -996891772, i32 -1702387808, i32 1760304291, i32 2075979607, i32 -1982370732, i32 1562183871, i32 -1351185476, i32 -1138329528, i32 1313733451, i32 549293790, i32 -757723683, i32 -1048117719, i32 871202090, i32 -416867903, i32 357341890, i32 102525238, i32 -193467851, i32 -1436232175, i32 1477399826, i32 1264559846, i32 -1187764763, i32 1845379342, i32 -1617575411, i32 -1933233671, i32 2125378298, i32 820201905, i32 -1031222606, i32 -774358714, i32 598981189, i32 -143008082, i32 85089709, i32 373468761, i32 -467063462, i32 -1170599554, i32 1213305469, i32 1526817161, i32 -1452612982, i32 2107672161, i32 -1882520222, i32 -1667500394, i32 1861252501, i32 1098587580, i32 -1290756417, i32 -1606390453, i32 1378610760, i32 -2032039261, i32 1955203488, i32 1742404180, i32 -1783531177, i32 -878557837, i32 969524848, i32 714683780, i32 -655182201, i32 205050476, i32 -28094097, i32 -318528869, i32 526918040, i32 1361435347, i32 -1555146288, i32 -1340167644, i32 1114974503, i32 -1765847604, i32 1691668175, i32 2005155131, i32 -2047885768, i32 -604208612, i32 697762079, i32 986182379, i32 -928222744, i32 476452099, i32 -301099520, i32 -44210700, i32 255256311, i32 1640403810, i32 -1817374623, i32 -2130844779, i32 1922457750, i32 -1503918979, i32 1412925310, i32 1197962378, i32 -1257441399, i32 -350237779, i32 427051182, i32 170179418, i32 -129025959, i32 746937522, i32 -554770511, i32 -843174843, i32 1070968646, i32 1905808397, i32 -2081171698, i32 -1868356358, i32 1657317369, i32 -1241332974, i32 1147748369, i32 1463399397, i32 -1521340186, i32 -79622974, i32 153784257, i32 444234805, i32 -401473738, i32 1021025245, i32 -827320098, i32 -572462294, i32 797665321, i32 -2097792136, i32 1889384571, i32 1674398607, i32 -1851340660, i32 1164749927, i32 -1224265884, i32 -1537745776, i32 1446797203, i32 137323447, i32 -96149324, i32 -384560320, i32 461344835, i32 -810158936, i32 1037989803, i32 781091935, i32 -588970148, i32 -1834419177, i32 1623424788, i32 1939049696, i32 -2114449437, i32 1429367560, i32 -1487280117, i32 -1274471425, i32 1180866812, i32 410100952, i32 -367384613, i32 -112536529, i32 186734380, i32 -538233913, i32 763408580, i32 1053836080, i32 -860110797, i32 -1572096602, i32 1344288421, i32 1131464017, i32 -1323612590, i32 1708204729, i32 -1749376582, i32 -2065018290, i32 1988219213, i32 680717673, i32 -621187478, i32 -911630946, i32 1002577565, i32 -284657034, i32 493091189, i32 238226049, i32 -61306494, i32 -1307217207, i32 1082061258, i32 1395524158, i32 -1589280451, i32 1972364758, i32 -2015074603, i32 -1800104671, i32 1725896226, i32 952904198, i32 -894981883, i32 -638100751, i32 731699698, i32 -11092711, i32 222117402, i32 510512622, i32 -335130899, i32 -1014159676, i32 837199303, i32 582374963, i32 -790768336, i32 68661723, i32 -159632680, i32 -450051796, i32 390545967, i32 1230274059, i32 -1153434360, i32 -1469116676, i32 1510247935, i32 -1899042540, i32 2091215383, i32 1878366691, i32 -1650582816, i32 -741088853, i32 565732008, i32 854102364, i32 -1065151905, i32 340358836, i32 -433916489, i32 -177076669, i32 119113024, i32 1493875044, i32 -1419691417, i32 -1204696685, i32 1247431312, i32 -1634718085, i32 1828433272, i32 2141937292, i32 -1916740209, i32 -483350502, i32 291187481, i32 34330861, i32 -262120466, i32 615137029, i32 -691946490, i32 -980332558, i32 939183345, i32 1776939221, i32 -1685949482, i32 -1999470558, i32 2058945313, i32 -1368168502, i32 1545135305, i32 1330124605, i32 -1121741762, i32 -210866315, i32 17165430, i32 307568514, i32 -532767615, i32 888469610, i32 -962626711, i32 -707819363, i32 665062302, i32 2042050490, i32 -1948470087, i32 -1735637171, i32 1793573966, i32 -1104306011, i32 1279665062, i32 1595330642, i32 -1384295599], align 16 + +; Function Attrs: noinline nounwind uwtable +define internal i32 @singletable_crc32c(i32 %0, i8* %1, i64 %2) #0 { + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + %7 = alloca i32*, align 8 + store i32 %0, i32* %4, align 4 + store i8* %1, i8** %5, align 8 + store i64 %2, i64* %6, align 8 + %8 = load i8*, i8** %5, align 8 + %9 = bitcast i8* %8 to i32* + store i32* %9, i32** %7, align 8 + br label %10 + +10: ; preds = %14, %3 + %11 = load i64, i64* %6, align 8 + %12 = add i64 %11, -1 + store i64 %12, i64* %6, align 8 + %13 = icmp ne i64 %11, 0 + br i1 %13, label %14, label %27 + +14: ; preds = %10 + %15 = load i32, i32* %4, align 4 + %16 = load i32*, i32** %7, align 8 + %17 = getelementptr inbounds i32, i32* %16, i32 1 + store i32* %17, i32** %7, align 8 + %18 = load i32, i32* %16, align 4 + %19 = xor i32 %15, %18 + %20 = and i32 %19, 255 + %21 = zext i32 %20 to i64 + %22 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32Table, i64 0, i64 %21 + %23 = load i32, i32* %22, align 4 + %24 = load i32, i32* %4, align 4 + %25 = lshr i32 %24, 8 + %26 = xor i32 %23, %25 + store i32 %26, i32* %4, align 4 + br label %10 + +27: ; preds = %10 + %28 = load i32, i32* %4, align 4 + ret i32 %28 +} + +; CHECK: !!!Table-based CRC32 algorithm is finally recognized!!! +; CHECK-NEXT: It will be nice if we can check the value of the operands in this algorithm implementation! +; CHECK-NEXT: Function we have created seems to work properly! diff --git a/petar_notes.txt b/petar_notes.txt new file mode 100644 index 0000000000000..eef73843c632b --- /dev/null +++ b/petar_notes.txt @@ -0,0 +1,54 @@ +LLVM project +Author: Chris Lattner +Master thesis title: Semantic detection of a CRC function for RISCV in LLVM + +What have I learned so far? + + + + + + +---------------------------------------------------------------------------------------- +Unoptimized form of CRC algorithm implementation: + +ee_u16 crcu8(ee_u8 data, ee_u16 crc) { + ee_u8 i = 0, x16 = 0, carry = 0; + for (i = 0; i < 8; i++) { + x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); + data >>= 1; + if (x16 == 1) { + crc ^= 0x4002; + carry = 1; + } else { + carry = 0; + } + + crc >>= 1; + + if (carry) + crc |= 0x8000; + else + crc &= 0x7fff; + } + + return crc; +} + +Optimized implementation of a CRC algorithm: + +ee_u16 crcu8(ee_u8 data, ee_u16 _crc) { + ee_u8 i = 0, x16 = 0, carry = 0; + long crc = _crc; + crc ^= data; + + for (i = 0; i < 8; i++) { + x16 = (ee_u8)crc & 1; + data >>= 1; + crc >>= 1; + crc ^= (x16 & 1) ? 0xa001 : 0; // Conditional XOR + } + + return crc; +} +