Skip to content

Panic on empty string #12

@nyw0102

Description

@nyw0102

platform: Linux
version: Latest

Overview:
Main() panicked with reachable assertion in MatchAndEmitATTInstruction() function.

Error Message
``asm-cli-rust: /home/nyw0102/.cargo/git/checkouts/keystone6a7a70f3378d0b72/1856935/bindings/rust/keystone-sys/keystone/llvm/include/llvm/ADT/StringRef.h:210: char llvm_ks::StringRef::operator const: Assertion `Index < Length && "Invalid index!"' failed. Aborted


**Description**

After running “asm-cli-rust” with flag “—syntax att” and give “” as input in repl, the string value goes into “rust/keystone-sys/keystone/llvm/keystone/ks.cpp” function as pointer to the input string.

```  1. int ks_asm(ks_engine *ks,
  2.         const char *assembly,
  3.         uint64_t address,
  4.         unsigned char **insn, size_t *insn_size,
  5.         size_t *stat_count)
  6. {
  7.     MCCodeEmitter *CE;
  8.     MCStreamer *Streamer;
  9.     unsigned char *encoding;
 10.     SmallString<1024> Msg;
 11.     raw_svector_ostream OS(Msg);
 12.  
 13.     if (ks->arch == KS_ARCH_EVM) {
 14.         // handle EVM differently
 15.         unsigned short opcode = EVM_opcode(assembly);
 16.         if (opcode == (unsigned short)-1) {
 17.             // invalid instruction
 18.             return -1;
 19.         }
 20.  
 21.         *insn_size = 1;
 22.         *stat_count = 1;
 23.         encoding = (unsigned char *)malloc(*insn_size);
 24.         encoding[0] = opcode;
 25.         *insn = encoding;
 26.         return 0;
 27.     }
 28.  
 29.     *insn = NULL;
 30.     *insn_size = 0;
 31.  
 32.     MCContext Ctx(ks->MAI, ks->MRI, &ks->MOFI, &ks->SrcMgr, true, address);
 33.     ks->MOFI.InitMCObjectFileInfo(Triple(ks->TripleName), Ctx);
 34.     CE = ks->TheTarget->createMCCodeEmitter(*ks->MCII, *ks->MRI, Ctx);
 35.     if (!CE) {
 36.         // memory insufficient
 37.         return KS_ERR_NOMEM;
 38.     }
 39.     Streamer = ks->TheTarget->createMCObjectStreamer(
 40.             Triple(ks->TripleName), Ctx, *ks->MAB, OS, CE, *ks->STI, ks->MCOptions.MCRelaxAll,
 41.             /*DWARFMustBeAtTheEnd*/ false);
 42.             
 43.     if (!Streamer) {
 44.         // memory insufficient
 45.         delete CE;
 46.         return KS_ERR_NOMEM;
 47.     }
 48.  
 49.     // Tell SrcMgr about this buffer, which is what the parser will pick up.
 50.     ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr = MemoryBuffer::getMemBuffer(assembly);
 51.     if (BufferPtr.getError()) {
 52.         delete Streamer;
 53.         delete CE;
 54.         return KS_ERR_NOMEM;
 55.     }
 56.  
 57.     ks->SrcMgr.clearBuffers();
 58.     ks->SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc());
 59.  
 60.     Streamer->setSymResolver((void *)(ks->sym_resolver));
 61.  
 62.     MCAsmParser *Parser = createMCAsmParser(ks->SrcMgr, Ctx, *Streamer, *ks->MAI);
 63.     if (!Parser) {
 64.         delete Streamer;
 65.         delete CE;
 66.         // memory insufficient
 67.         return KS_ERR_NOMEM;
 68.     }
 69.     MCTargetAsmParser *TAP = ks->TheTarget->createMCAsmParser(*ks->STI, *Parser, *ks->MCII, ks->MCOptions);
 70.     if (!TAP) { 
 71.         // memory insufficient
 72.         delete Parser;
 73.         delete Streamer;
 74.         delete CE;
 75.         return KS_ERR_NOMEM;
 76.     }
 77.     TAP->KsSyntax = ks->syntax;
 78.  
 79.     Parser->setTargetParser(*TAP);
 80.  
 81.     // TODO: optimize this to avoid setting up NASM every time we call ks_asm()
 82.     if (ks->arch == KS_ARCH_X86 && ks->syntax == KS_OPT_SYNTAX_NASM) {
 83.         Parser->initializeDirectiveKindMap(KS_OPT_SYNTAX_NASM);
 84.         ks->MAI->setCommentString(";");
 85.     }
 86.  
 87.     *stat_count = Parser->Run(false, address);
 88.  
 89.     // PPC counts empty statement
 90.     if (ks->arch == KS_ARCH_PPC)
 91.         *stat_count = *stat_count / 2;
 92.  
 93.     ks->errnum = Parser->KsError;
 94.  
 95.     delete TAP;
 96.     delete Parser;
 97.     delete CE;
 98.     delete Streamer;
 99.  
100.     if (ks->errnum >= KS_ERR_ASM)
101.         return -1;
102.     else {
103.         *insn_size = Msg.size();
104.         encoding = (unsigned char *)malloc(*insn_size);
105.         if (!encoding) {
106.             return KS_ERR_NOMEM;
107.         }
108.         memcpy(encoding, Msg.data(), *insn_size);
109.         *insn = encoding;
110.         return 0;
111.     }
112. }
113.  

In this function, SrcMgr, which is the element in Parser gets the address of the crash input through “getMemBuffer” function. And Parser is initialized by following function.

 2.                      const MCAsmInfo &MAI)
 3.     : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
 4.       PlatformParser(nullptr), CurBuffer(SM.getMainFileID()),
 5.       MacrosEnabledFlag(true), HadError(false), CppHashLineNumber(0),
 6.       AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false),
 7.       NasmDefaultRel(false) {
 8.   // Save the old handler.
 9.   SavedDiagHandler = SrcMgr.getDiagHandler();
10.   SavedDiagContext = SrcMgr.getDiagContext();
11.   // Set our own handler which calls the saved handler.
12.   SrcMgr.setDiagHandler(DiagHandler, this);
13.   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
14.   .
15.   .
16.   .

In this function, Parser gives the address of the input to ‘Lexer’
After this initialization of ‘Parser’, the function “ks_asm” runs “Parser::Run”

2.  while (Lexer.isNot(AsmToken::Eof)) {
3.     ParseStatementInfo Info;
4.     if (!parseStatement(Info, nullptr, Address)) {
5.       count++;
6.       continue;
7.     }

This function runs calls a function “parseStatement” and the function calls “X86AsmParser::MatchAndEmitATTInstruction” initializing the function’s parameter ‘Operand’

 2.                                               OperandVector &Operands,
 3.                                               MCStreamer &Out,
 4.                                               uint64_t &ErrorInfo,
 5.                                               bool MatchingInlineAsm, unsigned int &ErrorCode, uint64_t &Address)
 6. {
 7.   assert(!Operands.empty() && "Unexpect empty operand list!");
 8.   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
 9.   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
10.   //ArrayRef<SMRange> EmptyRanges = None;
11.  
12.   // First, handle aliases that expand to multiple instructions.
13.   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
14.  
15.   bool WasOriginallyInvalidOperand = false;
16.   MCInst Inst;
17.  
18.   // First, try a direct match.
19.   switch (MatchInstructionImpl(Operands, Inst,
20.                                ErrorInfo, MatchingInlineAsm,
21.                                isParsingIntelSyntax())) {
22.   default: llvm_unreachable("Unexpected match result!");
23.   case Match_Success:
24.     // Some instructions need post-processing to, for example, tweak which
25.     // encoding is selected. Loop on it while changes happen so the
26.     // individual transformations can chain off each other.
27.     if (!MatchingInlineAsm)
28.       while (processInstruction(Inst, Operands))
29.         ;
30.  
31.     Inst.setLoc(IDLoc);
32.     if (!MatchingInlineAsm) {
33.       EmitInstruction(Inst, Operands, Out, ErrorCode);
34.       if (ErrorCode)
35.           return true;
36.     }
37.     Opcode = Inst.getOpcode();
38.     return false;
39.   case Match_MissingFeature:
40.     return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
41.   case Match_InvalidOperand:
42.     WasOriginallyInvalidOperand = true;
43.     break;
44.   case Match_MnemonicFail:
45.     break;
46.   }
47.  
48.   // FIXME: Ideally, we would only attempt suffix matches for things which are
49.   // valid prefixes, and we could just infer the right unambiguous
50.   // type. However, that requires substantially more matcher support than the
51.   // following hack.
52.  
53.   // Change the operand to point to a temporary token.
54.   StringRef Base = Op.getToken();
55.   SmallString<16> Tmp;
56.   Tmp += Base;
57.   Tmp += ' ';
58.   Op.setTokenValue(Tmp);
59.  
60.   // If this instruction starts with an 'f', then it is a floating point stack
61.   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
62.   // 80-bit floating point, which use the suffixes s,l,t respectively.
63.   //
64.   // Otherwise, we assume that this may be an integer instruction, which comes
65.   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
66.   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";

In this function, the Operands passes assert(!Operands.empty() && "Unexpect empty operand list!"); And the value “Base” is initialized as “”. In the last line, the function dereferences the value in index 0 of Base. In this operation, the overloaded operator “[]” gets assertion().

 2.  
 3. StringRef(const char *Str)
 4.       : Data(Str) {
 5.         //assert(Str && "StringRef cannot be built from a NULL argument");
 6.         if (!Str)
 7.             Length = 0;
 8.         else 
 9.             Length = ::strlen(Str); // invoking strlen(NULL) is undefined behavior
10.       }
11.  
12. char operator[](size_t Index) const {
13.       assert(Index < Length && "Invalid index!");
14.       return Data[Index];
15.     }
16.  

In this file, the value “Length” is initilaized as 0 because it is empty string. So, in overloaded opertor “[]”, it produces assertion due to the equality between Index(0) and Length(0).

How to Reproduce

  1. Run “asm-cli-rust” with flag “—syntax att”
  2. Give input “” (empty string)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions