From 23d43b81675bde697c7753e65db3ce00429b00ca Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Mon, 30 Sep 2024 22:36:53 +0000 Subject: [PATCH] GP-4715 SSA revisit MemRange --- .../Decompiler/certification.manifest | 1 + .../Decompiler/src/decompile/cpp/double.cc | 6 +- .../Decompiler/src/decompile/cpp/funcdata.hh | 1 + .../src/decompile/cpp/funcdata_varnode.cc | 25 ++- .../Decompiler/src/decompile/cpp/heritage.cc | 201 +++++++++++------- .../Decompiler/src/decompile/cpp/heritage.hh | 62 ++++-- .../Decompiler/src/decompile/cpp/op.cc | 2 +- .../Decompiler/src/decompile/cpp/op.hh | 9 +- .../src/decompile/cpp/ruleaction.cc | 2 +- .../Decompiler/src/decompile/cpp/typeop.cc | 2 +- .../src/decompile/datatests/revisit.xml | 31 +++ 11 files changed, 229 insertions(+), 113 deletions(-) create mode 100644 Ghidra/Features/Decompiler/src/decompile/datatests/revisit.xml diff --git a/Ghidra/Features/Decompiler/certification.manifest b/Ghidra/Features/Decompiler/certification.manifest index 48191e20b2..ce36ce2ddb 100644 --- a/Ghidra/Features/Decompiler/certification.manifest +++ b/Ghidra/Features/Decompiler/certification.manifest @@ -66,6 +66,7 @@ src/decompile/datatests/ptrtoarray.xml||GHIDRA||||END| src/decompile/datatests/readvolatile.xml||GHIDRA||||END| src/decompile/datatests/retspecial.xml||GHIDRA||||END| src/decompile/datatests/retstruct.xml||GHIDRA||||END| +src/decompile/datatests/revisit.xml||GHIDRA||||END| src/decompile/datatests/sbyte.xml||GHIDRA||||END| src/decompile/datatests/skipnext2.xml||GHIDRA||||END| src/decompile/datatests/stackreturn.xml||GHIDRA||||END| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/double.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/double.cc index 8fcd94aa9e..b56f08658c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/double.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/double.cc @@ -1403,7 +1403,7 @@ void SplitVarnode::replaceCopyForce(Funcdata &data,const Address &addr,SplitVarn { Varnode *inVn = in.getWhole(); - bool returnForm = copyhi->stopsCopyPropagation(); + bool returnForm = copyhi->isReturnCopy(); if (returnForm && inVn->getAddr() != addr) { // Placeholder for global propagation past a RETURN needs an additional COPY PcodeOp *otherPoint1 = copyhi->getIn(0)->getDef(); @@ -1423,7 +1423,7 @@ void SplitVarnode::replaceCopyForce(Funcdata &data,const Address &addr,SplitVarn Varnode *outVn = data.newVarnodeOut(in.getSize(), addr, wholeCopy); outVn->setAddrForce(); if (returnForm) - wholeCopy->setStopCopyPropagation(); + data.markReturnCopy(wholeCopy); data.opSetInput(wholeCopy,inVn,0); data.opInsertBefore(wholeCopy, copyhi); data.opDestroy(copyhi); // Destroy the original COPYs. Outputs have no descendants. @@ -3156,7 +3156,7 @@ bool CopyForceForm::verify(Varnode *h,Varnode *l,Varnode *w,PcodeOp *cpy) continue; if (!SplitVarnode::isAddrTiedContiguous(reslo, reshi, addrOut)) // Output MUST be contiguous addresses continue; - if (copyhi->stopsCopyPropagation()) { // Special form has additional requirements + if (copyhi->isReturnCopy()) { // Special form has additional requirements if (h->loneDescend() == (PcodeOp *)0) continue; if (l->loneDescend() == (PcodeOp *)0) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index 1a04351d5d..c1cb07ab00 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -445,6 +445,7 @@ public: PcodeOp *newIndirectOp(PcodeOp *indeffect,const Address &addr,int4 sz,uint4 extraFlags); PcodeOp *newIndirectCreation(PcodeOp *indeffect,const Address &addr,int4 sz,bool possibleout); void markIndirectCreation(PcodeOp *indop,bool possibleOutput); ///< Convert CPUI_INDIRECT into an \e indirect \e creation + void markReturnCopy(PcodeOp *op) { op->flags |= PcodeOp::return_copy; } ///< Mark COPY as returning a global value PcodeOp *findOp(const SeqNum &sq) { return obank.findOp(sq); } ///< Find PcodeOp with given sequence number void opInsertBefore(PcodeOp *op,PcodeOp *follow); ///< Insert given PcodeOp before a specific op void opInsertAfter(PcodeOp *op,PcodeOp *prev); ///< Insert given PcodeOp after a specific op diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index a7b1bc1097..16ce0ae711 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -397,36 +397,45 @@ void Funcdata::combineInputVarnodes(Varnode *vnHi,Varnode *vnLo) if (!isContiguous) throw LowlevelError("Input varnodes being combined are not contiguous"); vector pieceList; - bool otherOps = false; + bool otherOpsHi = false; + bool otherOpsLo = false; list::const_iterator iter; for(iter=vnHi->beginDescend();iter!=vnHi->endDescend();++iter) { PcodeOp *op = *iter; if (op->code() == CPUI_PIECE && op->getIn(0) == vnHi && op->getIn(1) == vnLo) pieceList.push_back(op); else - otherOps = true; + otherOpsHi = true; + } + for(iter=vnLo->beginDescend();iter!=vnLo->endDescend();++iter) { + PcodeOp *op = *iter; + if (op->code() != CPUI_PIECE || op->getIn(0) != vnHi || op->getIn(1) != vnLo) + otherOpsLo = true; } for(int4 i=0;igetStart()); opSetOpcode(subHi, CPUI_SUBPIECE); opSetInput(subHi,newConstant(4, vnLo->getSize()),1); Varnode *newHi = newVarnodeOut(vnHi->getSize(),vnHi->getAddr(),subHi); opInsertBegin(subHi, bb); + totalReplace(vnHi, newHi); + } + if (otherOpsLo) { + BlockBasic *bb = (BlockBasic *)bblocks.getBlock(0); subLo = newOp(2,bb->getStart()); opSetOpcode(subLo, CPUI_SUBPIECE); opSetInput(subLo,newConstant(4, 0),1); Varnode *newLo = newVarnodeOut(vnLo->getSize(),vnLo->getAddr(),subLo); opInsertBegin(subLo, bb); - totalReplace(vnHi, newHi); totalReplace(vnLo, newLo); } int4 outSize = vnHi->getSize() + vnLo->getSize(); @@ -438,10 +447,10 @@ void Funcdata::combineInputVarnodes(Varnode *vnHi,Varnode *vnLo) opSetInput(pieceList[i],inVn,0); opSetOpcode(pieceList[i], CPUI_COPY); } - if (otherOps) { + if (otherOpsHi) opSetInput(subHi,inVn,0); + if (otherOpsLo) opSetInput(subLo,inVn,0); - } } /// Construct a constant Varnode up to 128 bits, using INT_ZEXT and PIECE if necessary. diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc index 20df2465d6..3d05b1c221 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -98,6 +98,43 @@ int4 LocationMap::findPass(const Address &addr) const return -1; } +/// Addresses fed to this method must already be sorted. If the given range intersects the last range in +/// the list, the last range is extended to cover it. Otherwise the range is added as a new element to the +/// end of the list. The given boolean properties are associated with any new element. If an old element is +/// extended, any new boolean properties are ORed into the old element's properties. +/// \param addr is the starting address of the given range +/// \param size is the number of bytes in the given range +/// \param fl are the given boolean properties to associate with the range +void TaskList::add(Address addr,int4 size,uint4 fl) + +{ + if (!tasklist.empty()) { + MemRange &entry(tasklist.back()); + int4 over = addr.overlap(0,entry.addr,entry.size); + if (over >= 0) { + int4 relsize = size + over; + if (relsize > entry.size) + entry.size = relsize; + entry.flags |= fl; + return; + } + } + tasklist.emplace_back(addr,size,fl); +} + +/// This can be used to add a range anywhere in the list, but the new range must already be disjoint +/// from ranges in the list. +/// \param pos is the position in the list before which the new range will be inserted +/// \param addr is the starting address of the new range +/// \param size is the number of bytes in the new range +/// \param fl is the boolean properties to be associated with the new range +/// \return an iterator to the new range +TaskList::iterator TaskList::insert(iterator pos,Address addr,int4 size,uint4 fl) + +{ + return tasklist.emplace(pos,addr,size,fl); +} + /// Any basic blocks currently in \b this queue are removed. Space is /// reserved for a new set of prioritized stacks. /// \param maxdepth is the number of stacks to allocate @@ -194,13 +231,13 @@ void Heritage::clearInfoList(void) (*iter).reset(); } -/// \brief Remove deprecated CPUI_MULTIEQUAL or CPUI_INDIRECT ops, preparing to re-heritage +/// \brief Remove deprecated CPUI_MULTIEQUAL, CPUI_INDIRECT, or CPUI_COPY ops, preparing to re-heritage /// /// If a previous Varnode was heritaged through a MULTIEQUAL or INDIRECT op, but now /// a larger range containing the Varnode is being heritaged, we throw away the op, /// letting the data-flow for the new larger range determine the data-flow for the /// old Varnode. The original Varnode is redefined as the output of a SUBPIECE -/// of a larger free Varnode. +/// of a larger free Varnode. Return form COPYs are simply removed, in preparation for a larger COPY. /// \param remove is the list of Varnodes written by MULTIEQUAL or INDIRECT /// \param addr is the start of the larger range /// \param size is the size of the range @@ -233,13 +270,18 @@ void Heritage::removeRevisitedMarkers(const vector &remove,const Addr else pos = targetOp->getBasicIter(); ++pos; // Insert SUBPIECE after target of INDIRECT + vn->clearAddrForce(); // Replacement INDIRECT will hold the address } - else { + else if (op->code() == CPUI_MULTIEQUAL) { pos = op->getBasicIter(); // Insert SUBPIECE after all MULTIEQUALs in block ++pos; while(pos != bl->endOp() && (*pos)->code() == CPUI_MULTIEQUAL) ++pos; } + else { // Remove return form COPY + fd->opUnlink(op); + continue; + } int4 offset = vn->overlap(addr,size); fd->opUninsert(op); newInputs.clear(); @@ -256,48 +298,50 @@ void Heritage::removeRevisitedMarkers(const vector &remove,const Addr /// \brief Collect free reads, writes, and inputs in the given address range /// -/// \param addr is the starting address of the range -/// \param size is the number of bytes in the range +/// \param memrange is the given address range /// \param read will hold any read Varnodes in the range /// \param write will hold any written Varnodes /// \param input will hold any input Varnodes /// \param remove will hold any PcodeOps that need to be removed /// \return the maximum size of a write -int4 Heritage::collect(Address addr,int4 size, - vector &read,vector &write, - vector &input,vector &remove) const - +int4 Heritage::collect(MemRange &memrange,vector &read,vector &write, + vector &input,vector &remove) const { - Varnode *vn; - VarnodeLocSet::const_iterator viter = fd->beginLoc(addr); + read.clear(); + write.clear(); + input.clear(); + remove.clear(); VarnodeLocSet::const_iterator enditer; - uintb start = addr.getOffset(); - addr = addr + size; - if (addr.getOffset() < start) { // Wraparound - Address tmp(addr.getSpace(),addr.getSpace()->getHighest()); + uintb start = memrange.addr.getOffset(); + Address endaddr = memrange.addr + memrange.size; + if (endaddr.getOffset() < start) { // Wraparound + Address tmp(endaddr.getSpace(),endaddr.getSpace()->getHighest()); enditer = fd->endLoc(tmp); } else - enditer = fd->beginLoc(addr); + enditer = fd->beginLoc(endaddr); int4 maxsize = 0; - while( viter != enditer ) { - vn = *viter; + for(VarnodeLocSet::const_iterator viter = fd->beginLoc(memrange.addr); viter != enditer; ++viter) { + Varnode *vn = *viter; if (!vn->isWriteMask()) { if (vn->isWritten()) { - if (vn->getSize() < size && vn->getDef()->isMarker()) - remove.push_back(vn); - else { - if (vn->getSize() > maxsize) // Look for maximum write size - maxsize = vn->getSize(); - write.push_back(vn); + PcodeOp *op = vn->getDef(); + if (op->isMarker() || op->isReturnCopy()) { // Evidence of previous heritage in this range + if (vn->getSize() < memrange.size) { + remove.push_back(vn); + continue; + } + memrange.clearProperty(MemRange::new_addresses); // Previous pass covered everything } + if (vn->getSize() > maxsize) // Look for maximum write size + maxsize = vn->getSize(); + write.push_back(vn); } else if ((!vn->isHeritageKnown())&&(!vn->hasNoDescend())) read.push_back(vn); else if (vn->isInput()) input.push_back(vn); } - ++viter; } return maxsize; } @@ -1101,15 +1145,15 @@ void Heritage::reprocessFreeStores(AddrSpace *spc,vector &freeStores) /// The traditional phi-node placement and renaming algorithms don't expect /// variable pairs where there is partial overlap. For the given address range, /// we make all the free Varnode sizes look uniform by adding PIECE and SUBPIECE -/// ops. We also add INDIRECT ops, so that we can ignore indirect effects -/// of LOAD/STORE/CALL ops. +/// ops. If enabled, we also add INDIRECT ops, so that renaming takes into account +/// indirect effects of LOAD/STORE/CALL ops. /// \param addr is the starting address of the given range /// \param size is the number of bytes in the given range -/// \param guardPerformed is true if a guard has been previously performed on the range +/// \param addIndirects is true if a guard has been previously performed on the range /// \param read is the set of Varnode values reading from the range /// \param write is the set of written Varnodes in the range /// \param inputvars is the set of Varnodes in the range already marked as input -void Heritage::guard(const Address &addr,int4 size,bool guardPerformed, +void Heritage::guard(const Address &addr,int4 size,bool addIndirects, vector &read,vector &write,vector &inputvars) { @@ -1141,7 +1185,7 @@ void Heritage::guard(const Address &addr,int4 size,bool guardPerformed, // free for an address that has already been guarded before. // Because INDIRECTs for a single CALL or STORE really issue simultaneously, having multiple INDIRECT guards // for the same address confuses the renaming algorithm, so we don't add guards if we've added them before. - if (!guardPerformed) { + if (addIndirects) { fl = 0; // Query for generic properties of address (use empty usepoint) fd->getScopeLocal()->queryProperties(addr,size,Address(),fl); @@ -1639,7 +1683,7 @@ void Heritage::guardReturns(uint4 fl,const Address &addr,int4 size,vectorsetAddrForce(); vn->setActiveHeritage(); fd->opSetOpcode(copyop,CPUI_COPY); - copyop->setStopCopyPropagation(); + fd->markReturnCopy(copyop); Varnode *invn = fd->newVarnode(size,addr); invn->setActiveHeritage(); fd->opSetInput(copyop,invn,0); @@ -1656,9 +1700,8 @@ void Heritage::guardReturns(uint4 fl,const Address &addr,int4 size,vector &refine,const Address &addr,int4 size,const vector &vnlist) +void Heritage::buildRefinement(vector &refine,const Address &addr,const vector &vnlist) { for(uint4 i=0;i &refine) /// \brief Find the common refinement of all reads and writes in the address range /// /// Split the reads and writes so they match the refinement. -/// \param addr is the first address in the range -/// \param size is the number of bytes in the range +/// \param memiter points to the address range to be refined /// \param readvars is all \e free Varnodes overlapping the address range /// \param writevars is all written Varnodes overlapping the address range /// \param inputvars is all known input Varnodes overlapping the address range /// \return \b true if there is a non-trivial refinement -bool Heritage::refinement(const Address &addr,int4 size,const vector &readvars,const vector &writevars,const vector &inputvars) - +TaskList::iterator Heritage::refinement(TaskList::iterator memiter,const vector &readvars, + const vector &writevars,const vector &inputvars) { - if (size > 1024) return false; + int4 size = (*memiter).size; + if (size > 1024) return disjoint.end(); + Address addr = (*memiter).addr; vector refine(size+1,0); - buildRefinement(refine,addr,size,readvars); - buildRefinement(refine,addr,size,writevars); - buildRefinement(refine,addr,size,inputvars); + buildRefinement(refine,addr,readvars); + buildRefinement(refine,addr,writevars); + buildRefinement(refine,addr,inputvars); int4 lastpos = 0; for(int4 curpos=1;curpos < size;++curpos) { // Convert boundary points to partition sizes if (refine[curpos] != 0) { @@ -1859,7 +1903,7 @@ bool Heritage::refinement(const Address &addr,int4 size,const vector lastpos = curpos; } } - if (lastpos == 0) return false; // No non-trivial refinements + if (lastpos == 0) return disjoint.end(); // No non-trivial refinements refine[lastpos] = size-lastpos; remove13Refinement(refine); vector newvn; @@ -1871,22 +1915,26 @@ bool Heritage::refinement(const Address &addr,int4 size,const vector refineInput(inputvars[i],addr,refine,newvn); // Alter the disjoint cover (both locally and globally) to reflect our refinement - LocationMap::iterator iter = disjoint.find(addr); - int4 addrPass = (*iter).second.pass; - disjoint.erase(iter); - iter = globaldisjoint.find(addr); + uint4 flags = (*memiter).flags; + memiter = disjoint.erase(memiter); + LocationMap::iterator iter = globaldisjoint.find(addr); + int4 curPass = (*iter).second.pass; globaldisjoint.erase(iter); - Address curaddr = addr; int4 cut = 0; + int4 sz = refine[cut]; int4 intersect; + TaskList::iterator resiter = disjoint.insert(memiter,addr,sz,flags); + globaldisjoint.add(addr,sz,curPass,intersect); + cut += sz; + addr = addr + sz; while(cut < size) { - int4 sz = refine[cut]; - disjoint.add(curaddr,sz,addrPass,intersect); - globaldisjoint.add(curaddr,sz,addrPass,intersect); + sz = refine[cut]; + disjoint.insert(memiter,addr,sz,flags); + globaldisjoint.add(addr,sz,curPass,intersect); cut += sz; - curaddr = curaddr + sz; + addr = addr + sz; } - return true; + return resiter; } /// \brief Make sure existing inputs for the given range fill it entirely @@ -2549,52 +2597,43 @@ void Heritage::rename(void) void Heritage::placeMultiequals(void) { - LocationMap::iterator iter; + TaskList::iterator iter; vector readvars; vector writevars; vector inputvars; vector removevars; for(iter=disjoint.begin();iter!=disjoint.end();++iter) { - Address addr = (*iter).first; - int4 size = (*iter).second.size; - bool guardPerformed = (*iter).second.pass < pass; - readvars.clear(); - writevars.clear(); - inputvars.clear(); - removevars.clear(); - int4 max = collect(addr,size,readvars,writevars,inputvars,removevars); // Collect reads/writes - if ((size > 4)&&(max < size)) { - if (refinement(addr,size,readvars,writevars,inputvars)) { - iter = disjoint.find(addr); - size =(*iter).second.size; - readvars.clear(); - writevars.clear(); - inputvars.clear(); - removevars.clear(); - collect(addr,size,readvars,writevars,inputvars,removevars); + int4 max = collect(*iter,readvars,writevars,inputvars,removevars); // Collect reads/writes + if ((*iter).size > 4 && max < (*iter).size) { + TaskList::iterator refiter = refinement(iter,readvars,writevars,inputvars); + if (refiter != disjoint.end()) { + iter = refiter; + collect(*iter,readvars,writevars,inputvars,removevars); } } + const MemRange &memrange(*iter); + int4 size = memrange.size; if (readvars.empty()) { if (writevars.empty() && inputvars.empty()) { continue; } - if (addr.getSpace()->getType() == IPTR_INTERNAL || guardPerformed) + if (memrange.addr.getSpace()->getType() == IPTR_INTERNAL || memrange.oldAddresses()) continue; } if (!removevars.empty()) - removeRevisitedMarkers(removevars, addr, size); - guardInput(addr,size,inputvars); - guard(addr,size,guardPerformed,readvars,writevars,inputvars); + removeRevisitedMarkers(removevars, memrange.addr, size); + guardInput(memrange.addr,size,inputvars); + guard(memrange.addr,size,memrange.newAddresses(),readvars,writevars,inputvars); calcMultiequals(writevars); // Calculate where MULTIEQUALs go for(int4 i=0;inewOp(bl->sizeIn(),bl->getStart()); - Varnode *vnout = fd->newVarnodeOut(size,addr,multiop); + Varnode *vnout = fd->newVarnodeOut(size,memrange.addr,multiop); vnout->setActiveHeritage(); fd->opSetOpcode(multiop,CPUI_MULTIEQUAL); // Create each MULTIEQUAL for(int4 j=0;jsizeIn();++j) { - Varnode *vnin = fd->newVarnode(size,addr); + Varnode *vnin = fd->newVarnode(size,memrange.addr); fd->opSetInput(multiop,vnin,j); } fd->opInsertBegin(multiop,bl); // Insert at beginning of block @@ -2666,7 +2705,7 @@ void Heritage::heritage(void) int4 prev = 0; LocationMap::iterator liter = globaldisjoint.add(vn->getAddr(),vn->getSize(),pass,prev); if (prev == 0) // All new location being heritaged, or intersecting with something new - disjoint.add((*liter).first,(*liter).second.size,pass,prev); + disjoint.add((*liter).first,(*liter).second.size,MemRange::new_addresses); else if (prev==2) { // If completely contained in range from previous pass if (vn->isHeritageKnown()) continue; // Don't heritage if we don't have to if (vn->hasNoDescend()) continue; @@ -2675,10 +2714,10 @@ void Heritage::heritage(void) bumpDeadcodeDelay(vn->getSpace()); warnvn = vn; } - disjoint.add((*liter).first,(*liter).second.size,(*liter).second.pass,prev); + disjoint.add((*liter).first,(*liter).second.size,MemRange::old_addresses); } else { // Partially contained in old range, but may contain new stuff - disjoint.add((*liter).first,(*liter).second.size,(*liter).second.pass,prev); + disjoint.add((*liter).first,(*liter).second.size,MemRange::old_addresses | MemRange::new_addresses); if ((!needwarning)&&(info->deadremoved>0)&&!fd->isJumptableRecoveryOn()) { // TODO: We should check if this varnode is tiled by previously heritaged ranges if (vn->isHeritageKnown()) continue; // Assume that it is tiled and produced by merging diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh index a309fb27aa..804c96a525 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh @@ -5,9 +5,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,12 +28,6 @@ namespace ghidra { /// range (indexed by its initial address) maps to its own Varnode stack. typedef map > VariableStack; -/// \brief Label for describing extent of address range that has been heritaged -struct SizePass { - int4 size; ///< Size of the range (in bytes) - int4 pass; ///< Pass when the range was heritaged -}; - /// \brief Map object for keeping track of which address ranges have been heritaged /// /// We keep track of a fairly fine grained description of when each address range @@ -43,6 +37,11 @@ struct SizePass { /// that informs the caller whether the address has been heritaged and if so in which pass. class LocationMap { public: + /// \brief Label for describing extent of address range that has been heritaged + struct SizePass { + int4 size; ///< Size of the range (in bytes) + int4 pass; ///< Pass when the range was heritaged + }; /// Iterator into the main map typedef map::iterator iterator; private: @@ -51,12 +50,48 @@ public: iterator add(Address addr,int4 size,int4 pass,int4 &intersect); ///< Mark new address as \b heritaged iterator find(const Address &addr); ///< Look up if/how given address was heritaged int4 findPass(const Address &addr) const; ///< Look up if/how given address was heritaged - void erase(iterator iter) { themap.erase(iter); } ///< Remove a particular entry from the map + iterator erase(iterator iter) { return themap.erase(iter); } ///< Remove a particular entry from the map iterator begin(void) { return themap.begin(); } ///< Get starting iterator over heritaged ranges iterator end(void) { return themap.end(); } ///< Get ending iterator over heritaged ranges void clear(void) { themap.clear(); } ///< Clear the map of heritaged ranges }; +/// \brief An address range to be processed +struct MemRange { + /// Properties of a single address range + enum { + new_addresses = 1, ///< The range covers addresses that have not been seen in previous passes + old_addresses = 2 ///< The range covers addresses that were seen in previous passes + }; + Address addr; ///< Starting address of the range + int4 size; ///< Number of bytes in the range + uint4 flags; ///< Property flags + MemRange(const Address &ad,int4 sz,uint4 fl) : addr(ad) { size = sz; flags = fl; } ///< Constructor + bool newAddresses(void) const { return ((flags & new_addresses)!=0); } ///< Does \b this range cover new addresses? + bool oldAddresses(void) const { return ((flags & old_addresses)!=0); } ///< Does \b this range cover old addresses? + void clearProperty(uint4 val) { flags &= ~val; } ///< Clear specific properties from the memory range +}; + +/// \brief A list of address ranges that need to be converted to SSA form +/// +/// The disjoint list of ranges are built up and processed in a single pass. The container is fed a list +/// of ranges that may be overlapping but are already in address order. It constructs a disjoint list by taking +/// the union of overlapping ranges. +class TaskList { +public: + /// Iterator in the list + typedef list::iterator iterator; +private: + list tasklist; ///< List of address ranges that needs to be processed +public: + void add(Address addr,int4 size,uint4 fl); ///< Add a range to the list + iterator insert(iterator pos,Address addr,int4 size,uint4 fl); ///< Insert a disjoint range in the list + iterator erase(iterator iter) { return tasklist.erase(iter); } ///< Remove a particular range + iterator begin(void) { return tasklist.begin(); } ///< Get iterator to beginning of \b this list + iterator end(void) { return tasklist.end(); } ///< Get iterator to end of \b this list + void clear(void) { tasklist.clear(); } ///< Clear all ranges in the list +}; + /// \brief Priority queue for the phi-node (MULTIEQUAL) placement algorithm /// /// A \e work-list for basic blocks used during phi-node placement. Implemented as @@ -202,7 +237,7 @@ class Heritage { Funcdata *fd; ///< The function \b this is controlling SSA construction LocationMap globaldisjoint; ///< Disjoint cover of every heritaged memory location - LocationMap disjoint; ///< Disjoint cover of memory locations currently being heritaged + TaskList disjoint; ///< Disjoint cover of memory locations currently being heritaged vector > domchild; ///< Parent->child edges in dominator tree vector > augment; ///< Augmented edges vector flags; ///< Block properties for phi-node placement algorithm @@ -233,7 +268,7 @@ class Heritage { void processJoins(void); void buildADT(void); ///< Build the augmented dominator tree void removeRevisitedMarkers(const vector &remove,const Address &addr,int4 size); - int4 collect(Address addr,int4 size,vector &read,vector &write,vector &input,vector &remove) const; + int4 collect(MemRange &memrange,vector &read,vector &write,vector &input,vector &remove) const; bool callOpIndirectEffect(const Address &addr,int4 size,PcodeOp *op) const; Varnode *normalizeReadSize(Varnode *vn,PcodeOp *op,const Address &addr,int4 size); Varnode *normalizeWriteSize(Varnode *vn,const Address &addr,int4 size); @@ -264,13 +299,14 @@ class Heritage { void guardLoads(uint4 fl,const Address &addr,int4 size,vector &write); void guardReturnsOverlapping(const Address &addr,int4 size); void guardReturns(uint4 fl,const Address &addr,int4 size,vector &write); - static void buildRefinement(vector &refine,const Address &addr,int4 size,const vector &vnlist); + static void buildRefinement(vector &refine,const Address &addr,const vector &vnlist); void splitByRefinement(Varnode *vn,const Address &addr,const vector &refine,vector &split); void refineRead(Varnode *vn,const Address &addr,const vector &refine,vector &newvn); void refineWrite(Varnode *vn,const Address &addr,const vector &refine,vector &newvn); void refineInput(Varnode *vn,const Address &addr,const vector &refine,vector &newvn); void remove13Refinement(vector &refine); - bool refinement(const Address &addr,int4 size,const vector &readvars,const vector &writevars,const vector &inputvars); + TaskList::iterator refinement(TaskList::iterator memiter,const vector &readvars, + const vector &writevars,const vector &inputvars); void visitIncr(FlowBlock *qnode,FlowBlock *vnode); void calcMultiequals(const vector &write); void renameRecurse(BlockBasic *bl,VariableStack &varstack); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc index d76a799960..d51460be84 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc @@ -279,7 +279,7 @@ void PcodeOp::setOpcode(TypeOp *t_op) flags &= ~(PcodeOp::branch | PcodeOp::call | PcodeOp::coderef | PcodeOp::commutative | PcodeOp::returns | PcodeOp::nocollapse | PcodeOp::marker | PcodeOp::booloutput | PcodeOp::unary | PcodeOp::binary | PcodeOp::ternary | PcodeOp::special | - PcodeOp::has_callspec | PcodeOp::no_copy_propagation); + PcodeOp::has_callspec | PcodeOp::return_copy); opcode = t_op; flags |= t_op->getFlags(); } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh index 204060986b..72081ad726 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -90,7 +90,7 @@ public: binary = 0x10000, ///< Evaluate as binary expression special = 0x20000, ///< Cannot be evaluated (without special processing) ternary = 0x40000, ///< Evaluate as ternary operator (or higher) - no_copy_propagation = 0x80000, ///< Op does not allow COPY propagation through its inputs + return_copy = 0x80000, ///< Special form of COPY op for holding global values to (past) the end of the function nonprinting = 0x100000, ///< Op should not be directly printed as source halt = 0x200000, ///< instruction causes processor or process to halt badinstruction = 0x400000, ///< placeholder for bad instruction data @@ -218,8 +218,7 @@ public: void setHoldOutput(void) { addlflags |= hold_output; } ///< Prevent output from being removed as dead code bool isPartialRoot(void) const { return ((addlflags&concat_root)!=0); } ///< Output is root of CONCAT tree void setPartialRoot(void) { addlflags |= concat_root; } ///< Mark \b this as root of CONCAT tree - bool stopsCopyPropagation(void) const { return ((flags&no_copy_propagation)!=0); } ///< Does \b this allow COPY propagation - void setStopCopyPropagation(void) { flags |= no_copy_propagation; } ///< Stop COPY propagation through inputs + bool isReturnCopy(void) const { return ((flags&return_copy)!=0); } ///< Is \b this a \e return form COPY bool noIndirectCollapse(void) const { return ((addlflags & no_indirect_collapse)!=0); } ///< Check if INDIRECT collapse is possible void setNoIndirectCollapse(void) { addlflags |= no_indirect_collapse; } ///< Prevent collapse of INDIRECT bool isStoreUnmapped(void) const { return ((addlflags & store_unmapped)!=0); } ///< Is STORE location supposed to be unmapped diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index f667c6ca95..3c2c0adc26 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -3606,7 +3606,7 @@ int4 RulePropagateCopy::applyOp(PcodeOp *op,Funcdata &data) PcodeOp *copyop; Varnode *vn,*invn; - if (op->stopsCopyPropagation()) return 0; + if (op->isReturnCopy()) return 0; for(i=0;inumInput();++i) { vn = op->getIn(i); if (!vn->isWritten()) continue; // Varnode must be written to diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc index 3d29d36222..b9682339a0 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc @@ -816,7 +816,7 @@ Datatype *TypeOpCallother::getOutputLocal(const PcodeOp *op) const TypeOpReturn::TypeOpReturn(TypeFactory *t) : TypeOp(t,CPUI_RETURN,"return") { - opflags = PcodeOp::special|PcodeOp::returns|PcodeOp::nocollapse|PcodeOp::no_copy_propagation; + opflags = PcodeOp::special|PcodeOp::returns|PcodeOp::nocollapse|PcodeOp::return_copy; behave = new OpBehavior(CPUI_RETURN,false,true); // Dummy behavior } diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/revisit.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/revisit.xml new file mode 100644 index 0000000000..641d4dd646 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/revisit.xml @@ -0,0 +1,31 @@ + + + + +488d1d6d00000048891d52000000488b +0d4b0000008b0183c00a890166e84800 +668b054d000000660564006689054200 +0000c3 + + + + +EAX\(0x00100017:.*\) = r0x00100074:4\(i\) \+ #0xa +r0x00100074:4\(0x0010001c:.*\) = EAX\(0x00100017:.*\) \[\] i0x0010001c: +r0x00100074:2\(0x0010001c:.*\) = SUB42\(r0x00100074:4\(0x0010001c:.*\),#0x0:4\) +AX\(0x00100027:.*\) = r0x00100074:2\(0x0010001c:.*\) \+ #0x64:2 +r0x00100076:2\(0x0010002b:.*\) = SUB42\(r0x00100074:4\(0x0010001c:.*\),#0x2\) +r0x00100074:4\(0x0010002b:.*\) = CONCAT22\(r0x00100076:2\(0x0010002b:.*\),AX\(0x00100027:.*\)\) +r0x00100074:4\(0x00100032:.*\) = r0x00100074:4\(0x0010002b:.*\) +r0x00100074:2\(.*\) = .* \[\] i0x +r0x00100074:2\(0x00100032:.*\) = r0x00100074:2.* +