diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc index dccf15f3f3..bc1658bed7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc @@ -19,6 +19,7 @@ namespace ghidra { const int4 ArraySequence::MINIMUM_SEQUENCE_LENGTH = 4; +const int4 ArraySequence::MAXIMUM_SEQUENCE_LENGTH = 0x20000; /// Initialize the sequence with the \b root operation which writes the earliest character in the memory region. /// \param fdata is the function containing the sequence @@ -112,7 +113,7 @@ int4 ArraySequence::formByteArray(int4 sz,int4 slot,uint8 rootOff,bool bigEndian int4 elSize = charType->getSize(); for(int4 i=0;i sz) continue; + if (bytePos < 0 || bytePos + elSize > sz) continue; uint8 val = moveOps[i].op->getIn(slot)->getOffset(); used[bytePos] = (val == 0) ? 2 : 1; // Mark byte as used, a 2 indicates a null terminator if (bigEndian) { @@ -470,7 +471,7 @@ void HeapSequence::findBasePointer(Varnode *initPtr) OpCode opc = op->code(); if (opc == CPUI_PTRADD) { int8 sz = op->getIn(2)->getOffset(); - if (sz != charType->getAlignSize()) break; + if (sz != ptrAddMult) break; } else if (opc != CPUI_COPY) break; @@ -546,7 +547,6 @@ void HeapSequence::findInitialStores(vector &stores) vector ptradds; findDuplicateBases(ptradds); int4 pos = 0; - int4 alignSize = charType->getAlignSize(); while(pos < ptradds.size()) { Varnode *vn = ptradds[pos]; pos += 1; @@ -558,7 +558,7 @@ void HeapSequence::findInitialStores(vector &stores) if (op->getIn(0) != vn) continue; // We only check array element size here, if we checked the data-type, we would // need to take into account different pointer styles to the same element data-type - if (op->getIn(2)->getOffset() != alignSize) continue; + if (op->getIn(2)->getOffset() != ptrAddMult) continue; ptradds.push_back(op->getOut()); } else if (opc == CPUI_COPY) { @@ -594,13 +594,13 @@ uint8 HeapSequence::calcAddElements(Varnode *vn,vector &nonConst,int4 return res; } -/// \brief Calculate the offset and any non-constant additive elements between the given Varnode and the \b basePointer +/// \brief Calculate the byte offset and any non-constant additive elements between the given Varnode and the \b basePointer /// /// Walk backward from the given Varnode thru PTRADDs and COPYs, summing any offsets encountered. /// Any non-constant Varnodes encountered in the path, that are not themselves a pointer, are passed back in a list. /// \param vn is the given Varnode to trace back to the \b basePointer /// \param nonConst will hold the list of non-constant Varnodes being passed back -/// \return the sum off constant offsets on the path +/// \return the sum off constant offsets on the path in byte units uint8 HeapSequence::calcPtraddOffset(Varnode *vn,vector &nonConst) { @@ -610,7 +610,7 @@ uint8 HeapSequence::calcPtraddOffset(Varnode *vn,vector &nonConst) OpCode opc = op->code(); if (opc == CPUI_PTRADD) { uint8 mult = op->getIn(2)->getOffset(); - if (mult != charType->getAlignSize()) + if (mult != ptrAddMult) break; uint8 off = calcAddElements(op->getIn(1),nonConst,3); off *= mult; @@ -623,7 +623,7 @@ uint8 HeapSequence::calcPtraddOffset(Varnode *vn,vector &nonConst) else break; } - return res; + return AddrSpace::addressToByteInt(res, storeSpace->getWordSize()); } /// \brief Determine if two sets of Varnodes are equal @@ -667,18 +667,21 @@ bool HeapSequence::collectStoreOps(void) findInitialStores(initStores); if (initStores.size() + 1 < MINIMUM_SEQUENCE_LENGTH) return false; + uint8 maxSize = MAXIMUM_SEQUENCE_LENGTH * charType->getAlignSize(); // Maximum bytes + uint8 wrapMask = calc_mask(storeSpace->getAddrSize()); baseOffset = calcPtraddOffset(rootOp->getIn(1), nonConstAdds); vector nonConstComp; for(int4 i=0;igetIn(1), nonConstComp); + uint8 diff = (curOffset - baseOffset) & wrapMask; // Allow wrapping relative to base pointer if (setsEqual(nonConstAdds, nonConstComp)) { - if (curOffset < baseOffset) - return false; // Root is not the earliest STORE + if (diff >= maxSize) + return false; // Root is not the earliest STORE, or offsets span range larger then maxSize if (!testValue(op)) return false; - moveOps.emplace_back(curOffset - baseOffset,op,-1); + moveOps.emplace_back(diff,op,-1); } } moveOps.emplace_back(0,rootOp,-1); @@ -721,6 +724,7 @@ PcodeOp *HeapSequence::buildStringCopy(void) if (baseOffset != 0) { // Add in any non-zero constant uint8 numEl = baseOffset / charType->getAlignSize(); Varnode *cvn = data.newConstant(basePointer->getSize(), numEl); + cvn->updateType(intType, false, false); if (indexVn == (Varnode *)0) indexVn = cvn; else { @@ -875,13 +879,15 @@ HeapSequence::HeapSequence(Funcdata &fdata,Datatype *ct,PcodeOp *root) : ArraySequence(fdata,ct,root) { baseOffset = 0; + storeSpace = root->getIn(0)->getSpaceFromConst(); + ptrAddMult = AddrSpace::byteToAddressInt(charType->getAlignSize(), storeSpace->getWordSize()); findBasePointer(rootOp->getIn(1)); if (!collectStoreOps()) return; if (!checkInterference()) return; int4 arrSize = moveOps.size() * charType->getAlignSize(); - bool bigEndian = moveOps[0].op->getIn(0)->getSpaceFromConst()->isBigEndian(); + bool bigEndian = storeSpace->isBigEndian(); numElements = formByteArray(arrSize, 2, 0, bigEndian); } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh index 8c17117ddf..64f1140bce 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh @@ -29,6 +29,7 @@ namespace ghidra { class ArraySequence { public: static const int4 MINIMUM_SEQUENCE_LENGTH; ///< Minimum number of sequential characters to trigger replacement with CALLOTHER + static const int4 MAXIMUM_SEQUENCE_LENGTH; ///< Maximum number of characters in replacement string /// \brief Helper class holding a data-flow edge and optionally a memory offset being COPYed into or from class WriteNode { public: @@ -38,8 +39,6 @@ public: WriteNode(uint8 off,PcodeOp *o,int4 sl) { offset = off; op = o; slot = sl; } ///< Constructor /// \brief Compare two nodes by their order within a basic block bool operator<(const WriteNode &node2) const { return op->getSeqNum().getOrder() < node2.op->getSeqNum().getOrder(); } - /// \brief Compare two PcodeOps based on the position of the element they copy within the sequence - static bool compareOffset(const WriteNode &a,const WriteNode &b) { return a.offset < b.offset; } }; protected: Funcdata &data; ///< The function containing the sequence @@ -87,6 +86,8 @@ public: class HeapSequence : public ArraySequence { Varnode *basePointer; ///< Pointer that sequence is stored to uint8 baseOffset; ///< Offset relative to pointer to root STORE + AddrSpace *storeSpace; ///< Address space being STOREed to + int4 ptrAddMult; ///< Required multiplier for PTRADD ops vector nonConstAdds; ///< non-constant Varnodes being added into pointer calculation void findBasePointer(Varnode *initPtr); ///< Find the base pointer for the sequence void findDuplicateBases(vector &duplist); ///< Find any duplicates of \b basePointer diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/heapstring.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/heapstring.xml index 778d5fe627..45d8736cb8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/datatests/heapstring.xml +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/heapstring.xml @@ -22,7 +22,8 @@ f30f1efa488b0748ba454c4556454e54 488b0748ba770061007200 6e0048891048ba69006e006700210048 -895008c3 +895008c348b84e454741544956454889 +47f9c3 546865206e756d626572206973205858 @@ -31,6 +32,7 @@ f30f1efa488b0748ba454c4556454e54 + builtin_strncpy\(ptr-\>val,"Message: ",9\); @@ -60,4 +66,5 @@ f30f1efa488b0748ba454c4556454e54 builtin_strncpy\(ptr-\>val \+ pos \+ 0xf,"FOUR",4\); builtin_strncpy\(ptr-\>val,"ELEVENTWELVE",0xc\); builtin_memcpy\(ptr-\>wval,L"warning!",0x10\); +builtin_strncpy\(negptr \+ -7,"NEGATIVE",8\);