From 62fe9ad75e58edfa2b51706f4c35ba795d267415 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Thu, 2 May 2019 13:54:36 -0400 Subject: [PATCH] fine tuning for dominant COPY model --- .../src/decompile/cpp/coreaction.cc | 103 +------------ .../src/decompile/cpp/coreaction.hh | 14 +- .../Decompiler/src/decompile/cpp/cover.cc | 4 +- .../Decompiler/src/decompile/cpp/cover.hh | 2 +- .../Decompiler/src/decompile/cpp/merge.cc | 141 +++++++++++++++++- .../Decompiler/src/decompile/cpp/merge.hh | 7 +- .../Decompiler/src/decompile/cpp/variable.cc | 41 ++--- .../Decompiler/src/decompile/cpp/variable.hh | 13 +- 8 files changed, 198 insertions(+), 127 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 47613d697c..f6ac48691e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -3539,118 +3539,24 @@ int4 ActionUnjustifiedParams::apply(Funcdata &data) int4 ActionHideShadow::apply(Funcdata &data) { - VarnodeDefSet::const_iterator iter; + VarnodeDefSet::const_iterator iter,enditer; HighVariable *high; - for(iter=data.beginDef();iter!=data.endDef(Varnode::written);++iter) { + enditer = data.endDef(Varnode::written); + for(iter=data.beginDef();iter!=enditer;++iter) { high = (*iter)->getHigh(); if (high->isMark()) continue; if (data.getMerge().hideShadows(high)) count += 1; high->setMark(); } - for(iter=data.beginDef();iter!=data.endDef(Varnode::written);++iter) { + for(iter=data.beginDef();iter!=enditer;++iter) { high = (*iter)->getHigh(); high->clearMark(); } return 0; } -/// \brief Determine if given Varnode is shadowed by another Varnode in the same HighVariable -/// -/// \param vn is the Varnode to check for shadowing -/// \return \b true if \b vn is shadowed by another Varnode in its high-level variable -bool ActionCopyMarker::shadowedVarnode(const Varnode *vn) - -{ - const Varnode *othervn; - const HighVariable *high = vn->getHigh(); - int4 num,i; - - num = high->numInstances(); - for(i=0;igetInstance(i); - if (othervn == vn) continue; - if (vn->getCover()->intersect(*othervn->getCover()) == 2) return true; - } - return false; -} - -int4 ActionCopyMarker::apply(Funcdata &data) - -{ - vector multiCopy; - list::const_iterator iter; - PcodeOp *op; - HighVariable *h1,*h2,*h3; - Varnode *v1,*v2,*v3; - int4 val; - - for(iter=data.beginOpAlive();iter!=data.endOpAlive();++iter) { - op = *iter; - switch(op->code()) { - case CPUI_COPY: - v1 = op->getOut(); - h1 = v1->getHigh(); - if (h1 == op->getIn(0)->getHigh()) { - data.opSetFlag(op, PcodeOp::nonprinting); - count += 1; - } - else { // COPY between different HighVariables - if (!h1->hasCopyIn1()) { // If this is the first COPY we've seen for this high - h1->setCopyIn1(); // Mark it - multiCopy.push_back(h1); - } - else - h1->setCopyIn2(); // This is at least the second COPY we've seen - if (v1->hasNoDescend()) { // Don't print shadow assignments - if (shadowedVarnode(v1)) { - data.opSetFlag(op, PcodeOp::nonprinting); - count += 1; - } - } - } - break; - case CPUI_PIECE: // Check if output is built out of pieces of itself - h1 = op->getOut()->getHigh(); - h2 = op->getIn(0)->getHigh(); - h3 = op->getIn(1)->getHigh(); - if (!h1->isAddrTied()) break; - if (!h2->isAddrTied()) break; - if (!h3->isAddrTied()) break; - v1 = h1->getTiedVarnode(); - v2 = h2->getTiedVarnode(); - v3 = h3->getTiedVarnode(); - if (v3->overlap(*v1) != 0) break; - if (v2->overlap(*v1) != v3->getSize()) break; - data.opSetFlag(op,PcodeOp::nonprinting); - count += 1; - break; - case CPUI_SUBPIECE: - h1 = op->getOut()->getHigh(); - h2 = op->getIn(0)->getHigh(); - if (!h1->isAddrTied()) break; - if (!h2->isAddrTied()) break; - v1 = h1->getTiedVarnode(); - v2 = h2->getTiedVarnode(); - val = op->getIn(1)->getOffset(); - if (v1->overlap(*v2) != val) break; - data.opSetFlag(op,PcodeOp::nonprinting); - count += 1; - break; - default: - break; - } - } - for(int4 i=0;ihasCopyIn2()) - data.getMerge().processHighRedundantCopy(high); - high->clearCopyIns(); - } - return 0; -} - int4 ActionDynamicMapping::apply(Funcdata &data) { @@ -4706,6 +4612,7 @@ void universal_action(Architecture *conf) act->addAction( new ActionMarkExplicit("merge") ); act->addAction( new ActionMarkImplied("merge") ); // This must come BEFORE general merging act->addAction( new ActionMergeCopy("merge") ); + act->addAction( new ActionDominantCopy("merge") ); act->addAction( new ActionMarkIndirectOnly("merge") ); // Must come after required merges but before speculative act->addAction( new ActionMergeAdjacent("merge") ); act->addAction( new ActionMergeType("merge") ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh index 471667591f..5c86fc30e2 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh @@ -916,16 +916,26 @@ public: virtual int4 apply(Funcdata &data); }; +/// \brief Replace COPYs from the same source with a single dominant COPY +class ActionDominantCopy : public Action { +public: + ActionDominantCopy(const string &g) : Action(rule_onceperfunc,"dominantcopy",g) {} ///< Constructor + virtual Action *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Action *)0; + return new ActionDominantCopy(getGroup()); + } + virtual int4 apply(Funcdata &data) { data.getMerge().processCopyTrims(); return 0; } +}; + /// \brief Mark COPY operations between Varnodes representing the object as \e non-printing class ActionCopyMarker : public Action { - static bool shadowedVarnode(const Varnode *vn); public: ActionCopyMarker(const string &g) : Action(rule_onceperfunc,"copymarker",g) {} ///< Constructor virtual Action *clone(const ActionGroupList &grouplist) const { if (!grouplist.contains(getGroup())) return (Action *)0; return new ActionCopyMarker(getGroup()); } - virtual int4 apply(Funcdata &data); + virtual int4 apply(Funcdata &data) { data.getMerge().markInternalCopies(); return 0; } }; /// \brief Attach \e dynamically mapped symbols to Varnodes in time for data-type propagation diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/cover.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/cover.cc index 30be4969ab..6e2b88a063 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/cover.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/cover.cc @@ -16,6 +16,8 @@ #include "cover.hh" #include "block.hh" +const CoverBlock Cover::emptyBlock; + /// PcodeOp objects and a CoverBlock start/stop boundaries have /// a natural ordering that can be used to tell if a PcodeOp falls /// between boundary points and if CoverBlock objects intersect. @@ -251,7 +253,7 @@ const CoverBlock &Cover::getCoverBlock(int4 i) const { map::const_iterator iter = cover.find(i); if (iter == cover.end()) - return emptyblock; + return emptyBlock; return (*iter).second; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/cover.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/cover.hh index f3a4ffe740..4d874ff9fd 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/cover.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/cover.hh @@ -67,7 +67,7 @@ public: /// Internally this is implemented as a map from basic block to their non-empty CoverBlock class Cover { map cover; ///< block index -> CoverBlock - CoverBlock emptyblock; ///< Template CoverBlock for blocks not covered by \b this + static const CoverBlock emptyBlock; ///< Global empty CoverBlock for blocks not covered by \b this void addRefRecurse(const FlowBlock *bl); ///< Fill-in \b this recursively from the given block public: void clear(void) { cover.clear(); } ///< Clear \b this to an empty Cover diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc index 93a07147ff..f9e409c54e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc @@ -272,7 +272,6 @@ void Merge::mergeOpcode(OpCode opc) } } } - processCopyTrims(); // TODO: move this into its own action } /// \brief Try to merge all HighVariables in the given range that have the same data-type @@ -1080,6 +1079,26 @@ void Merge::markRedundantCopies(HighVariable *high,vector ©,int4 } } +/// \brief Determine if given Varnode is shadowed by another Varnode in the same HighVariable +/// +/// \param vn is the Varnode to check for shadowing +/// \return \b true if \b vn is shadowed by another Varnode in its high-level variable +bool Merge::shadowedVarnode(const Varnode *vn) + +{ + const Varnode *othervn; + const HighVariable *high = vn->getHigh(); + int4 num,i; + + num = high->numInstances(); + for(i=0;igetInstance(i); + if (othervn == vn) continue; + if (vn->getCover()->intersect(*othervn->getCover()) == 2) return true; + } + return false; +} + /// \brief Find all the COPY ops into the given HighVariable /// /// Collect all the COPYs whose output is the given HighVariable but @@ -1104,6 +1123,11 @@ void Merge::findAllIntoCopies(HighVariable *high,vector ©Ins,bool sort(copyIns.begin(),copyIns.end(),compareCopyByInVarnode); } +/// \brief Try to replace COPYs into the given HighVariable with a single dominant COPY +/// +/// Find groups of COPYs into the given HighVariable that come from a single source Varnode, +/// then try to replace them with a COPY. +/// \param high is the given HighVariable void Merge::processHighDominantCopy(HighVariable *high) { @@ -1127,6 +1151,12 @@ void Merge::processHighDominantCopy(HighVariable *high) } } +/// \brief Mark COPY ops into the given HighVariable that are redundant +/// +/// A COPY is redundant if another COPY performs the same action and has +/// dominant control flow. The redundant COPY is not removed but is marked so that +/// it doesn't print in the final source output. +/// \param high is the given HighVariable void Merge::processHighRedundantCopy(HighVariable *high) { @@ -1151,6 +1181,12 @@ void Merge::processHighRedundantCopy(HighVariable *high) } } +/// \brief Try to reduce/eliminate COPYs produced by the merge trimming process +/// +/// In order to force merging of certain Varnodes, extra COPY operations may be inserted +/// to reduce their Cover ranges, and multiple COPYs from the same Varnode can be created this way. +/// This method collects sets of COPYs generated in this way that have the same input Varnode +/// and then tries to replace the COPYs with fewer or a single COPY. void Merge::processCopyTrims(void) { @@ -1174,6 +1210,85 @@ void Merge::processCopyTrims(void) } } +/// \brief Mark redundant/internal COPY PcodeOps +/// +/// Run through all COPY, SUBPIECE, and PIECE operations (PcodeOps that copy data) and +/// characterize those that are \e internal (copy data between storage locations representing +/// the same variable) or \e redundant (perform the same copy as an earlier operation). +/// These, as a result, are not printed in the final source code representation. +void Merge::markInternalCopies(void) + +{ + vector multiCopy; + list::const_iterator iter; + PcodeOp *op; + HighVariable *h1,*h2,*h3; + Varnode *v1,*v2,*v3; + int4 val; + + for(iter=data.beginOpAlive();iter!=data.endOpAlive();++iter) { + op = *iter; + switch(op->code()) { + case CPUI_COPY: + v1 = op->getOut(); + h1 = v1->getHigh(); + if (h1 == op->getIn(0)->getHigh()) { + data.opSetFlag(op, PcodeOp::nonprinting); + } + else { // COPY between different HighVariables + if (!h1->hasCopyIn1()) { // If this is the first COPY we've seen for this high + h1->setCopyIn1(); // Mark it + multiCopy.push_back(h1); + } + else + h1->setCopyIn2(); // This is at least the second COPY we've seen + if (v1->hasNoDescend()) { // Don't print shadow assignments + if (shadowedVarnode(v1)) { + data.opSetFlag(op, PcodeOp::nonprinting); + } + } + } + break; + case CPUI_PIECE: // Check if output is built out of pieces of itself + h1 = op->getOut()->getHigh(); + h2 = op->getIn(0)->getHigh(); + h3 = op->getIn(1)->getHigh(); + if (!h1->isAddrTied()) break; + if (!h2->isAddrTied()) break; + if (!h3->isAddrTied()) break; + v1 = h1->getTiedVarnode(); + v2 = h2->getTiedVarnode(); + v3 = h3->getTiedVarnode(); + if (v3->overlap(*v1) != 0) break; + if (v2->overlap(*v1) != v3->getSize()) break; + data.opSetFlag(op,PcodeOp::nonprinting); + break; + case CPUI_SUBPIECE: + h1 = op->getOut()->getHigh(); + h2 = op->getIn(0)->getHigh(); + if (!h1->isAddrTied()) break; + if (!h2->isAddrTied()) break; + v1 = h1->getTiedVarnode(); + v2 = h2->getTiedVarnode(); + val = op->getIn(1)->getOffset(); + if (v1->overlap(*v2) != val) break; + data.opSetFlag(op,PcodeOp::nonprinting); + break; + default: + break; + } + } + for(int4 i=0;ihasCopyIn2()) + data.getMerge().processHighRedundantCopy(high); + high->clearCopyIns(); + } +#ifdef MERGEMULTI_DEBUG + verifyHighCovers(); +#endif +} + /// \brief Perform low-level details of merging two HighVariables if possible /// /// This routine only fails (returning \b false) if there is a Cover intersection between @@ -1414,3 +1529,27 @@ bool Merge::mergeTest(HighVariable *high,vector &tmplist) tmplist.push_back(high); return true; } + +#ifdef MERGEMULTI_DEBUG +/// \brief Check that all HighVariable covers are consistent +/// +/// For each HighVariable make sure there are no internal intersections between +/// its instance Varnodes (unless one is a COPY shadow of the other). +void Merge::verifyHighCovers(void) + +{ + VarnodeLocSet::const_iterator iter,enditer; + + enditer = data.endLoc(); + for(iter=data.beginLoc();iter!=enditer;++iter) { + Varnode *vn = *iter; + if (vn->hasCover()) { + HighVariable *high = vn->getHigh(); + if (!high->hasCopyIn1()) { + high->setCopyIn1(); + high->verifyCover(); + } + } + } +} +#endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh index cc00166acb..eede994e1a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh @@ -91,6 +91,7 @@ class Merge { static void findSingleCopy(HighVariable *high,vector &singlelist); static bool compareHighByBlock(const HighVariable *a,const HighVariable *b); static bool compareCopyByInVarnode(PcodeOp *op1,PcodeOp *op2); + static bool shadowedVarnode(const Varnode *vn); static void findAllIntoCopies(HighVariable *high,vector ©Ins,bool filterTemps); void collectCovering(vector &vlist,HighVariable *high,PcodeOp *op); bool collectCorrectable(const vector &vlist,list &oplist,vector &slotlist, @@ -111,6 +112,7 @@ class Merge { void buildDominantCopy(HighVariable *high,vector ©,int4 pos,int4 size); void markRedundantCopies(HighVariable *high,vector ©,int4 pos,int4 size); void processHighDominantCopy(HighVariable *high); + void processHighRedundantCopy(HighVariable *high); public: Merge(Funcdata &fd) : data(fd) {} ///< Construct given a specific function bool intersection(HighVariable *a,HighVariable *b); @@ -125,7 +127,10 @@ public: void mergeAdjacent(void); bool hideShadows(HighVariable *high); void processCopyTrims(void); - void processHighRedundantCopy(HighVariable *high); + void markInternalCopies(void); +#ifdef MERGEMULTI_DEBUG + void verifyHighCovers(void); +#endif }; /// \brief Compare HighVariables by the blocks they cover diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc index 860dbd343d..434b88cca1 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc @@ -348,23 +348,28 @@ int4 HighVariable::instanceIndex(const Varnode *vn) const return -1; } -// Varnode *HighVariable::findGlobalRep(void) const +#ifdef MERGEMULTI_DEBUG +/// \brief Check that there are no internal Cover intersections within \b this +/// +/// Look for any pair of Varnodes whose covers intersect, but they are not +/// COPY shadows. Throw an exception in this case. +void HighVariable::verifyCover(void) const -// { -// vector::const_iterator iter; -// Varnode *vn = (Varnode *)0; -// Varnode *vn2; +{ + Cover accumCover; -// for(iter=inst.begin();iter!=inst.end();++iter) { -// vn2 = *iter; -// if (!vn2->getSpace()->globalDiscovery()) -// continue; -// if (vn==(Varnode *)0) -// vn = vn2; -// else { -// if (vn->getAddr()!=vn2->getAddr()) // Not a consistent address -// return (Varnode *)0; -// } -// } -// return vn; -// } + for(int4 i=0;igetCover()) == 2) { + for(int4 j=0;jgetCover()->intersect(*vn->getCover())==2) { + if (!otherVn->copyShadow(vn)) + throw LowlevelError("HighVariable has internal intersection"); + } + } + } + accumCover.merge(*vn->getCover()); + } +} +#endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh index a999666a72..edb3a23508 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh @@ -63,6 +63,11 @@ private: void updateFlags(void) const; ///< (Re)derive boolean properties of \b this from the member Varnodes void updateCover(void) const; ///< (Re)derive the cover of \b this from the member Varnodes void updateType(void) const; ///< (Re)derive the data-type for \b this from the member Varnodes + void setCopyIn1(void) const { highflags |= copy_in1; } ///< Mark the existence of one COPY into \b this + void setCopyIn2(void) const { highflags |= copy_in2; } ///< Mark the existence of two COPYs into \b this + void clearCopyIns(void) const { highflags &= ~(copy_in1 | copy_in2); } ///< Clear marks indicating COPYs into \b this + bool hasCopyIn1(void) const { return ((highflags©_in1)!=0); } ///< Is there at least one COPY into \b this + bool hasCopyIn2(void) const { return ((highflags©_in2)!=0); } ///< Is there at least two COPYs into \b this public: HighVariable(Varnode *vn); ///< Construct a HighVariable with a single member Varnode Datatype *getType(void) const { updateType(); return type; } ///< Get the data-type @@ -110,11 +115,6 @@ public: void setMark(void) const { flags |= Varnode::mark; } ///< Set the mark on this variable void clearMark(void) const { flags &= ~Varnode::mark; } ///< Clear the mark on this variable bool isMark(void) const { return ((flags&Varnode::mark)!=0); } ///< Return \b true if \b this is marked - void setCopyIn1(void) const { highflags |= copy_in1; } ///< Mark the existence of one COPY into \b this - void setCopyIn2(void) const { highflags |= copy_in2; } ///< Mark the existence of two COPYs into \b this - void clearCopyIns(void) const { highflags &= ~(copy_in1 | copy_in2); } ///< Clear marks indicating COPYs into \b this - bool hasCopyIn1(void) const { return ((highflags©_in1)!=0); } ///< Is there at least one COPY into \b this - bool hasCopyIn2(void) const { return ((highflags©_in2)!=0); } ///< Is there at least two COPYs into \b this /// \brief Determine if \b this HighVariable has an associated cover. /// @@ -127,6 +127,9 @@ public: bool isUnattached(void) const { return inst.empty(); } ///< Return \b true if \b this has no member Varnode bool isTypeLock(void) const { updateType(); return ((flags & Varnode::typelock)!=0); } ///< Return \b true if \b this is \e typelocked bool isNameLock(void) const { updateFlags(); return ((flags & Varnode::namelock)!=0); } ///< Return \b true if \b this is \e namelocked +#ifdef MERGEMULTI_DEBUG + void verifyCover(void) const; +#endif // Varnode *findGlobalRep(void) const; static bool compareName(Varnode *vn1,Varnode *vn2); ///< Determine which given Varnode is most nameable static bool compareJustLoc(const Varnode *a,const Varnode *b); ///< Compare based on storage location