diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc index 924e071225..f7b8afb132 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc @@ -17,6 +17,7 @@ #include "emulate.hh" #include "flow.hh" +/// \param s is the XML stream to write to void LoadTable::saveXml(ostream &s) const { @@ -28,6 +29,8 @@ void LoadTable::saveXml(ostream &s) const s << "\n"; } +/// \param el is the root \ tag +/// \param glb is the architecture for resolving address space tags void LoadTable::restoreXml(const Element *el,Architecture *glb) { @@ -42,9 +45,12 @@ void LoadTable::restoreXml(const Element *el,Architecture *glb) addr = Address::restoreXml( *iter, glb); } +/// We assume the list of LoadTable entries is sorted and perform an in-place +/// collapse of any sequences into a single LoadTable entry. +/// \param table is the list of entries to collapse void LoadTable::collapseTable(vector &table) -{ // Assuming -table- is sorted, collapse sequential LoadTable entries into single LoadTable entries +{ if (table.empty()) return; vector::iterator iter,lastiter; int4 count = 1; @@ -114,6 +120,7 @@ void EmulateFunction::executeCallother(void) fallthruOp(); } +/// \param f is the function to emulate within EmulateFunction::EmulateFunction(Funcdata *f) : EmulatePcodeOp(f->getArch()) { @@ -161,6 +168,15 @@ void EmulateFunction::fallthruOp(void) // Otherwise do nothing: outer loop is controlling execution flow } +/// \brief Execute from a given starting point and value to the common end-point of the path set +/// +/// Flow the given value through all paths in the path container to produce the +/// single output value. +/// \param val is the starting value +/// \param pathMeld is the set of paths to execute +/// \param startop is the starting PcodeOp within the path set +/// \param startvn is the Varnode holding the starting value +/// \return the calculated value at the common end-point uintb EmulateFunction::emulatePath(uintb val,const PathMeld &pathMeld, PcodeOp *startop,Varnode *startvn) { @@ -201,6 +217,9 @@ uintb EmulateFunction::emulatePath(uintb val,const PathMeld &pathMeld, return getVarnodeValue(invn); } +/// Pass back any LOAD records collected during emulation. The individual records +/// are sorted and collapsed into concise \e table descriptions. +/// \param res will hold any resulting table descriptions void EmulateFunction::collectLoadPoints(vector &res) const { @@ -392,6 +411,8 @@ JumpModel *JumpModelTrivial::clone(JumpTable *jt) const return res; } +/// \param vn is the Varnode we are testing for pruning +/// \return \b true if the search should be pruned here bool JumpBasic::isprune(Varnode *vn) { @@ -402,9 +423,11 @@ bool JumpBasic::isprune(Varnode *vn) return false; } +/// \param vn is the given Varnode to test +/// \return \b false if it is impossible for the Varnode to be the switch variable bool JumpBasic::ispoint(Varnode *vn) -{ // Is this a possible switch variable +{ if (vn->isConstant()) return false; if (vn->isAnnotation()) return false; if (vn->isReadOnly()) return false; @@ -429,9 +452,18 @@ int4 JumpBasic::getStride(Varnode *vn) return stride; } +/// \brief Back up the constant value in the output Varnode to the value in the input Varnode +/// +/// This does the work of going from a normalized switch value to the unnormalized value. +/// PcodeOps between the output and input Varnodes must be reversible or an exception is thrown. +/// \param fd is the function containing the switch +/// \param output is the constant value to back up +/// \param outvn is the output Varnode of the data-flow +/// \param invn is the input Varnode to back up to +/// \return the recovered value associated with the input Varnode uintb JumpBasic::backup2Switch(Funcdata *fd,uintb output,Varnode *outvn,Varnode *invn) -{ // Back up constant normalized value -outvn- to unnormalized +{ Varnode *curvn = outvn; PcodeOp *op; TypeOp *top; @@ -465,6 +497,12 @@ uintb JumpBasic::backup2Switch(Funcdata *fd,uintb output,Varnode *outvn,Varnode return output; } +/// \brief Calculate the initial set of Varnodes that might be switch variables +/// +/// Paths that terminate at the given PcodeOp are calculated and organized +/// in a PathMeld object that determines Varnodes that are common to all the paths. +/// \param op is the given PcodeOp +/// \param slot is input slot to the PcodeOp all paths must terminate at void JumpBasic::findDeterminingVarnodes(PcodeOp *op,int4 slot) { @@ -510,6 +548,11 @@ void JumpBasic::findDeterminingVarnodes(PcodeOp *op,int4 slot) } } +/// \brief Check if the two given Varnodes are matching constants +/// +/// \param vn1 is the first given Varnode +/// \param vn2 is the second given Varnode +/// \return \b true if the Varnodes are both constants with the same value static bool matching_constants(Varnode *vn1,Varnode *vn2) { @@ -519,6 +562,10 @@ static bool matching_constants(Varnode *vn1,Varnode *vn2) return true; } +/// \param op is the CBRANCH \e guarding the switch +/// \param path is the specific branch to take from the CBRANCH to reach the switch +/// \param rng is the range of values causing the switch path to be taken +/// \param v is the Varnode holding the value controlling the CBRANCH GuardRecord::GuardRecord(PcodeOp *op,int4 path,const CircleRange &rng,Varnode *v) { @@ -529,11 +576,21 @@ GuardRecord::GuardRecord(PcodeOp *op,int4 path,const CircleRange &rng,Varnode *v baseVn = quasiCopy(v,bitsPreserved,false); // Look for varnode whose bits are copied } +/// \brief Determine if \b this guard applies to the given Varnode +/// +/// The guard applies if we know the given Varnode holds the same value as the Varnode +/// attached to the guard. So we return: +/// - 0, if the two Varnodes do not clearly hold the same value. +/// - 1, if the two Varnodes clearly hold the same value. +/// - 2, if the two Varnode clearly hold the same value, pending no writes between their defining op. +/// +/// \param vn2 is the given Varnode being tested against \b this guard +/// \param baseVn2 is the earliest Varnode from which the given Varnode is quasi-copied. +/// \param bitsPreserved2 is the number of potentially non-zero bits in the given Varnode +/// \return the matching code 0, 1, or 2 int4 GuardRecord::valueMatch(Varnode *vn2,Varnode *baseVn2,int4 bitsPreserved2) const -{ // Return 0, if -vn- and -vn2- are not clearly the same value - // Return 1, if -vn- and -vn2- are clearly the same value - // Return 2, if -vn- and -vn2- are clearly the same value, pending no writes beteen the def of -vn- and -vn2- +{ if (vn == vn2) return 1; // Same varnode, same value PcodeOp *loadOp,*loadOp2; if (bitsPreserved == bitsPreserved2) { // Are the same number of bits being copied @@ -571,10 +628,16 @@ int4 GuardRecord::valueMatch(Varnode *vn2,Varnode *baseVn2,int4 bitsPreserved2) return 2; } +/// \brief Return 1 if the two given PcodeOps produce exactly the same value, 0 if otherwise +/// +/// We up through only one level of PcodeOp calculation and only for certain binary ops +/// where the second parameter is a constant. +/// \param op1 is the first given PcodeOp to test +/// \param op2 is the second given PcodeOp +/// \return 1 if the same value is produced, 0 otherwise int4 GuardRecord::oneOffMatch(PcodeOp *op1,PcodeOp *op2) -{ // Return 1 if -op1- and -op2- produce exactly the same value, 0 if otherwise - // (one value is allowed to be the zero extension of the other) +{ if (op1->code() != op2->code()) return 0; switch(op1->code()) { @@ -597,6 +660,16 @@ int4 GuardRecord::oneOffMatch(PcodeOp *op1,PcodeOp *op2) return 0; } +/// \brief Compute the source of a quasi-COPY chain for the given Varnode +/// +/// A value is a \b quasi-copy if a sequence of PcodeOps producing it always hold +/// the value as the least significant bits of their output Varnode, but the sequence +/// may put other non-zero values in the upper bits. +/// This method computes the earliest ancestor Varnode for which the given Varnode +/// can be viewed as a quasi-copy. +/// \param vn is the given Varnode +/// \param bitsPreserved will hold the number of least significant bits preserved by the sequence +/// \return the earliest source of the quasi-copy, which may just be the given Varnode Varnode *GuardRecord::quasiCopy(Varnode *vn,int4 &bitsPreserved,bool noWholeValue) { @@ -671,11 +744,16 @@ Varnode *GuardRecord::quasiCopy(Varnode *vn,int4 &bitsPreserved,bool noWholeValu return vn; } +/// \brief Calculate intersection of a new Varnode path with the old path +/// +/// The new path of Varnodes must all be \e marked. The old path, commonVn, +/// is replaced with the intersection. A map is created from the index of each +/// Varnode in the old path with its index in the new path. If the Varnode is +/// not in the intersection, its index is mapped to -1. +/// \param parentMap will hold the new index map void PathMeld::internalIntersect(vector &parentMap) -{ // Calculate intersection of new path (marked vn's) with old path (commonVn) - // Put intersection back into commonVn - // Calculate parentMap : from old commonVn index to new commonVn index +{ vector newVn; int4 lastIntersect = -1; for(int4 i=0;i &parentMap) } } +/// \brief Meld in PcodeOps from a new path into \b this container +/// +/// Execution order of the PcodeOps in the container is maintained. Each PcodeOp, old or new, +/// has its split point from the common path recalculated. +/// PcodeOps that split (use a vn not in intersection) and do not rejoin +/// (have a predecessor Varnode in the intersection) get removed. +/// If splitting PcodeOps can't be ordered with the existing meld, we get a new cut point. +/// \param path is the new path of PcodeOps in sequence +/// \param cutOff is the number of PcodeOps with an input in the common path +/// \param parentMap is the map from old common Varnodes to the new common Varnodes +/// \return the index of the last (earliest) Varnode in the common path or -1 int4 PathMeld::meldOps(const vector &path,int4 cutOff,const vector &parentMap) -{ // Meld old ops (opMeld) with new ops (path), updating rootVn with new commonVn order - // Ops should remain in (reverse) execution order - // Ops that split (use a vn not in intersection) and do not rejoin (have a predecessor vn in intersection) - // get cut - // If splitting ops arent can't be ordered with the existing meld, we get a new cut point - +{ // First update opMeld.rootVn with new intersection information for(int4 i=0;i &path,int4 cutOff,const vector 1) { if (opMeld.back().rootVn < cutPoint) // If we see op using varnode earlier than cut point break; // Keep that and all subsequent ops @@ -781,6 +869,7 @@ void PathMeld::truncatePaths(int4 cutPoint) commonVn.resize(cutPoint); // Since intersection is ordered, just resize to cutPoint } +/// \param op2 is the path container to copy from void PathMeld::set(const PathMeld &op2) { @@ -788,6 +877,9 @@ void PathMeld::set(const PathMeld &op2) opMeld = op2.opMeld; } +/// This container is initialized to hold a single data-flow path. +/// \param path is the list of PcodeOps in the path (in reverse execution order) +/// \param slot is the list of each Varnode presented as an input slot in the corresponding PcodeOp void PathMeld::set(const vector &path,const vector &slot) { @@ -799,13 +891,20 @@ void PathMeld::set(const vector &path,const vector &slot) } } +/// \param op is the one PcodeOp in the path +/// \param vn is the one Varnode (input to the PcodeOp) in the path void PathMeld::set(PcodeOp *op,Varnode *vn) -{ // Set a single varnode and op as the path +{ commonVn.push_back(vn); opMeld.push_back(RootedOp(op,0)); } +/// The new paths must all start at the common end-point of the paths in +/// \b this container. The new set of melded paths start at the original common start +/// point for \b this container, flow through this old common end-point, and end at +/// the new common end-point. +/// \param op2 is the set of paths to be appended void PathMeld::append(const PathMeld &op2) { @@ -823,10 +922,14 @@ void PathMeld::clear(void) opMeld.clear(); } +/// Add the new path, recalculating the set of Varnodes common to all paths. +/// Paths are trimmed to ensure that any path that splits from the common intersection +/// must eventually rejoin. +/// \param path is the new path of PcodeOps to meld, in reverse execution order +/// \param slot is the set of Varnodes in the new path presented as input slots to the corresponding PcodeOp void PathMeld::meld(vector &path,vector &slot) -{ // Meld the new -path- into our collection of paths - // making sure all ops that split from the main path intersection eventually rejoin +{ vector parentMap; for(int4 i=0;i &path,vector &slot) slot.resize(cutOff); } +/// The Varnode is specified by an index into sequence of Varnodes common to all paths in \b this PathMeld. +/// We find the earliest (as in executed first) PcodeOp, within \b this PathMeld that uses the Varnode as input. +/// \param pos is the index of the Varnode +/// \return the earliest PcodeOp using the Varnode PcodeOp *PathMeld::getEarliestOp(int4 pos) const -{ // Find "earliest" op that has commonVn[i] as input +{ for(int4 i=opMeld.size()-1;i>=0;--i) { if (opMeld[i].rootVn == pos) return opMeld[i].op; @@ -862,14 +969,22 @@ PcodeOp *PathMeld::getEarliestOp(int4 pos) const return (PcodeOp *)0; } +/// \brief Analyze CBRANCHs leading up to the given basic-block as a potential switch \e guard. +/// +/// In general there is only one path to the switch, and the given basic-block will +/// hold the BRANCHIND. In some models, there is more than one path to the switch block, +/// and a path must be specified. In this case, the given basic-block will be a block that +/// flows into the switch block, and the \e pathout parameter describes which path leads +/// to the switch block. +/// +/// For each CBRANCH, range restrictions on the various variables which allow +/// control flow to pass through the CBRANCH to the switch are analyzed. +/// A GuardRecord is created for each of these restrictions. +/// \param bl is the given basic-block +/// \param pathout is an optional path from the basic-block to the switch or -1 void JumpBasic::analyzeGuards(BlockBasic *bl,int4 pathout) -{ // Analyze each CBRANCH leading up to -bl- switch. - // (if pathout>=0, also analyze the CBRANCH in -bl- that chooses this path) - // Analyze the range restrictions on the various variables which allow - // control flow to pass through the CBRANCHs to the switch. - // Make note of all these restrictions in the guard list - // For later determination of the correct switch variable. +{ int4 i,j,indpath; int4 maxbranch = 2; // Maximum number of CBRANCHs to consider int4 maxpullback = 2; @@ -920,12 +1035,15 @@ void JumpBasic::analyzeGuards(BlockBasic *bl,int4 pathout) } } +/// \brief Calculate the range of values in the given Varnode that direct control-flow to the switch +/// +/// The Varnode is evaluated against each GuardRecord to determine if its range of values +/// can be restricted. Multiple guards may provide different restrictions. +/// \param vn is the given Varnode +/// \param rng will hold resulting range of values the Varnode can hold at the switch void JumpBasic::calcRange(Varnode *vn,CircleRange &rng) const -{ // For a putative switch variable, calculate the range of - // possible values that variable can have AT the switch - // by using the precalculated guard ranges. - +{ // Get an initial range, based on the size/type of -vn- int4 stride = 1; if (vn->isConstant()) @@ -971,9 +1089,16 @@ void JumpBasic::calcRange(Varnode *vn,CircleRange &rng) const } } +/// \brief Find the putative switch variable with the smallest range of values reaching the switch +/// +/// The Varnode with the smallest range and closest to the BRANCHIND is assumed to be the normalized +/// switch variable. If an expected range size is provided, it is used to \e prefer a particular +/// Varnode as the switch variable. Whatever Varnode is selected, +/// the JumpValue object is set up to iterator over its range. +/// \param matchsize optionally gives an expected size of the range, or it can be 0 void JumpBasic::findSmallestNormal(uint4 matchsize) -{ // Find normalized switch variable with smallest range of values +{ CircleRange rng; uintb sz,maxsize; @@ -1001,9 +1126,18 @@ void JumpBasic::findSmallestNormal(uint4 matchsize) } } +/// \brief Do all the work necessary to recover the normalized switch variable +/// +/// The switch can be specified as the basic-block containing the BRANCHIND, or +/// as a block that flows to the BRANCHIND block by following the specified path out. +/// \param fd is the function containing the switch +/// \param rootbl is the basic-block +/// \param pathout is the (optional) path to the BRANCHIND or -1 +/// \param matchsize is an (optional) size to expect for the normalized switch variable range +/// \param maxtablesize is the maximum size expected for the normalized switch variable range void JumpBasic::findNormalized(Funcdata *fd,BlockBasic *rootbl,int4 pathout,uint4 matchsize,uint4 maxtablesize) -{ // Find the normalized switch variable +{ uintb sz; analyzeGuards(rootbl,pathout); @@ -1032,9 +1166,13 @@ void JumpBasic::findNormalized(Funcdata *fd,BlockBasic *rootbl,int4 pathout,uint } } +/// \brief Mark the guard CBRANCHs that are truly part of the model. +/// +/// These CBRANCHs will be removed from the active control-flow graph, their +/// function \e folded into the action of the model, as represented by BRANCHIND. void JumpBasic::markFoldableGuards(void) -{ // Indicate which are the true guards (that need to be folded) by leaving their cbranch non-null +{ Varnode *vn = pathMeld.getVarnode(varnodeIndex); int4 bitsPreserved; Varnode *baseVn = GuardRecord::quasiCopy(vn, bitsPreserved, true); @@ -1096,7 +1234,7 @@ JumpBasic::~JumpBasic(void) bool JumpBasic::recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint4 maxtablesize) -{ // Try to recover a jumptable using the basic model +{ // Basically there needs to be a straight line calculation from a switch variable to the final // address used for the BRANCHIND. The switch variable is restricted to a small range by one // or more "guard" instructions that, if the switch variable is not in range, branch to a default @@ -1135,8 +1273,7 @@ void JumpBasic::buildAddresses(Funcdata *fd,PcodeOp *indop,vector
&addr void JumpBasic::findUnnormalized(uint4 maxaddsub,uint4 maxleftright,uint4 maxext) -{ // Assuming normalized is recovered, try to work - // back to the unnormalized varnode +{ int4 i,j; Varnode *testvn; PcodeOp *normop; @@ -1180,8 +1317,7 @@ void JumpBasic::findUnnormalized(uint4 maxaddsub,uint4 maxleftright,uint4 maxext void JumpBasic::buildLabels(Funcdata *fd,vector
&addresstable,vector &label,const JumpModel *orig) const -{ // Trace back each normal value to - // the unnormalized value, this is the "case" label +{ uintb val,switchval; const JumpValuesRange *origrange = (( const JumpBasic *)orig)->getValueRange(); @@ -1247,9 +1383,9 @@ bool JumpBasic::foldInGuards(Funcdata *fd,JumpTable *jump) bool JumpBasic::sanityCheck(Funcdata *fd,PcodeOp *indop,vector
&addresstable) -{ // Test all the addresses in the addresstable checking - // that they are reasonable. We cut off at first - // unreasonable +{ + // Test all the addresses in \b this address table checking + // that they are reasonable. We cut off at the first unreasonable address. int4 i; uintb diff; if (addresstable.empty()) return true; @@ -1285,9 +1421,9 @@ bool JumpBasic::sanityCheck(Funcdata *fd,PcodeOp *indop,vector
&address JumpModel *JumpBasic::clone(JumpTable *jt) const -{ // We only need to clone the JumpValues +{ JumpBasic *res = new JumpBasic(jt); - res->jrange = (JumpValuesRange *)jrange->clone(); + res->jrange = (JumpValuesRange *)jrange->clone(); // We only need to clone the JumpValues return res; } @@ -1323,11 +1459,12 @@ bool JumpBasic2::foldInOneGuard(Funcdata *fd,GuardRecord &guard,JumpTable *jump) void JumpBasic2::initializeStart(const PathMeld &pathMeld) -{ // Initialize with the point at which model 1 failed +{ if (pathMeld.empty()) { extravn = (Varnode *)0; return; } + // Initialize at point where the JumpBasic model failed extravn = pathMeld.getVarnode(pathMeld.numCommonVarnode()-1); origPathMeld.set(pathMeld); } @@ -1384,9 +1521,12 @@ bool JumpBasic2::recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint4 return true; } +/// \brief Check if the block that defines the normalized switch variable dominates the block containing the switch +/// +/// \return \b true if the switch block is dominated bool JumpBasic2::checkNormalDominance(void) const -{ // Check if the block that defines the normalized switch variable dominates the block containing the switch +{ if (normalvn->isInput()) return true; FlowBlock *defblock = normalvn->getDef()->getParent(); @@ -1421,9 +1561,9 @@ void JumpBasic2::findUnnormalized(uint4 maxaddsub,uint4 maxleftright,uint4 maxex JumpModel *JumpBasic2::clone(JumpTable *jt) const -{ // We only need to clone the JumpValues +{ JumpBasic2 *res = new JumpBasic2(jt); - res->jrange = (JumpValuesRange *)jrange->clone(); + res->jrange = (JumpValuesRange *)jrange->clone(); // We only need to clone the JumpValues return res; } @@ -1435,6 +1575,7 @@ void JumpBasic2::clear(void) JumpBasic::clear(); } +/// \param jt is the parent JumpTable JumpBasicOverride::JumpBasicOverride(JumpTable *jt) : JumpBasic(jt) { @@ -1443,6 +1584,7 @@ JumpBasicOverride::JumpBasicOverride(JumpTable *jt) istrivial = false; } +/// \param adtable is the list of externally provided addresses, which will be deduped void JumpBasicOverride::setAddresses(const vector
&adtable) { @@ -1450,9 +1592,14 @@ void JumpBasicOverride::setAddresses(const vector
&adtable) adset.insert(adtable[i]); } +/// \brief Return the PcodeOp (within the PathMeld set) that takes the given Varnode as input +/// +/// If there no PcodeOp in the set reading the Varnode, null is returned +/// \param vn is the given Varnode +/// \return the PcodeOp or null int4 JumpBasicOverride::findStartOp(Varnode *vn) -{ // Return the op (within determop) that takes -vn- as input, otherwise return null +{ list::const_iterator iter,enditer; iter = vn->beginDescend(); enditer = vn->endDescend(); @@ -1470,12 +1617,21 @@ int4 JumpBasicOverride::findStartOp(Varnode *vn) return res; } +/// \brief Test a given Varnode as a potential normalized switch variable +/// +/// This method tries to figure out the set of values for the Varnode that +/// produce the manually provided set of addresses. Starting with \e startingvalue +/// and simply incrementing by one to obtain new values, the path from the potential variable +/// to the BRANCHIND is emulated to produce addresses in the manual set. Duplicates and +/// misses are allowed. Once we see all addresses in the manual set, +/// the method returns the index of the starting op, otherwise -1 is returned. +/// \param fd is the function containing the switch +/// \param trialvn is the given trial normalized switch variable +/// \param tolerance is the number of misses that will be tolerated +/// \return the index of the starting PcodeOp within the PathMeld or -1 int4 JumpBasicOverride::trialNorm(Funcdata *fd,Varnode *trialvn,uint4 tolerance) -{ // Given a potential normalized switch variable, try to figure out the set of values that - // produce the addresses in the -adset-. Basically we start with value -startingvalue- - // and increment from there, allowing for duplicates and misses. Once we see all addresses - // in -adset- we returning the index of the starting op, otherwise return -1 +{ int4 opi = findStartOp(trialvn); if (opi < 0) return -1; PcodeOp *startop = pathMeld.getOp(opi); @@ -1528,10 +1684,13 @@ int4 JumpBasicOverride::trialNorm(Funcdata *fd,Varnode *trialvn,uint4 tolerance) return -1; } +/// \brief Convert \b this to a trivial model +/// +/// Since we have an absolute set of addresses, if all else fails we can use the indirect variable +/// as the normalized switch and the addresses as the values, similar to JumpModelTrivial void JumpBasicOverride::setupTrivial(void) -{ // Since we have an absolute set of addresses, if all else fails we can use the indirect variable - // as the normalized switch and the addresses as the values, similar to the trivial model +{ set
::const_iterator iter; if (addrtable.empty()) { for(iter=adset.begin();iter!=adset.end();++iter) { @@ -1547,10 +1706,15 @@ void JumpBasicOverride::setupTrivial(void) istrivial = true; } +/// \brief Find a potential normalized switch variable +/// +/// This method is called if the normalized switch variable is not explicitly provided. +/// It looks for the normalized Varnode in the most common jump-table constructions, +/// otherwise it returns null. +/// \return the potential normalized switch variable or null Varnode *JumpBasicOverride::findLikelyNorm(void) -{ // If the normalized switch variable is explicitly provided, look for the norm varnode in the - // most common jumptable constructions, otherwise return null +{ Varnode *res = (Varnode *)0; PcodeOp *op; uint4 i; @@ -1584,9 +1748,10 @@ Varnode *JumpBasicOverride::findLikelyNorm(void) return res; } +/// \brief Clear varnodes and ops that are specific to one instance of a function void JumpBasicOverride::clearCopySpecific(void) -{ // Clear varnodes and ops that are specific to one instance of a Funcdata +{ selectguards.clear(); pathMeld.clear(); normalvn = (Varnode *)0; @@ -1650,7 +1815,7 @@ void JumpBasicOverride::buildLabels(Funcdata *fd,vector
&addresstable,v JumpModel *JumpBasicOverride::clone(JumpTable *jt) const -{ // We only need to clone the values and addresses +{ JumpBasicOverride *res = new JumpBasicOverride(jt); res->adset = adset; res->values = values; @@ -1728,7 +1893,7 @@ void JumpBasicOverride::restoreXml(const Element *el,Architecture *glb) bool JumpAssisted::recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint4 maxtablesize) -{ // Try to recover a jumptable using the assisted model model +{ // Look for the special "jumpassist" pseudo-op Varnode *addrVn = indop->getIn(0); if (!addrVn->isWritten()) return false; @@ -1852,9 +2017,11 @@ JumpModel *JumpAssisted::clone(JumpTable *jt) const return clone; } +/// Try to recover each model in turn, until we find one that matches the specific BRANCHIND. +/// \param fd is the function containing the switch void JumpTable::recoverModel(Funcdata *fd) -{ // Try to recover each model in turn, until we find one that matches +{ if (jmodel != (JumpModel *)0) { if (jmodel->isOverride()) { // If preexisting model is override jmodel->recoverModel(fd,indirect,0,maxtablesize); @@ -1885,6 +2052,11 @@ void JumpTable::recoverModel(Funcdata *fd) jmodel = (JumpModel *)0; } +/// Check that the BRANCHIND is still reachable, if not throw JumptableNotReachableError. +/// Check pathological cases when there is only one address in the table, if we find +/// this, throw the JumptableThunkError. Let the model run its sanity check. +/// Print a warning if the sanity check truncates the original address table. +/// \param fd is the function containing the switch void JumpTable::sanityCheck(Funcdata *fd) { @@ -1919,6 +2091,12 @@ void JumpTable::sanityCheck(Funcdata *fd) fd->warning("Sanity check requires truncation of jumptable",opaddress); } +/// Given a specific basic-block, figure out which edge out of the switch block +/// hits it. This \e position is different from the index into the address table, +/// the out edges are deduped and may include additional guard destinations. +/// If no edge hits it, throw an exception. +/// \param bl is the specific basic-block +/// \return the position of the basic-block uint4 JumpTable::block2Position(const FlowBlock *bl) const { @@ -1936,10 +2114,12 @@ uint4 JumpTable::block2Position(const FlowBlock *bl) const return position; } +/// We are not doing a complete check, we are looking for a guard that has collapsed to "if (false)" +/// \param op is the given PcodeOp to check +/// \return \b true is the PcodeOp is reachable bool JumpTable::isReachable(PcodeOp *op) -{ // Check if -op- seems reachable in current flow - // We are not doing a complete check, we are looking for a guard that has collapsed to "if (false)" +{ BlockBasic *parent = op->getParent(); for(int4 i=0;i<2;++i) { // Only check two levels @@ -1961,6 +2141,8 @@ bool JumpTable::isReachable(PcodeOp *op) return true; } +/// \param g is the Architecture the table exists within +/// \param ad is the Address of the BRANCHIND \b this models JumpTable::JumpTable(Architecture *g,Address ad) : opaddress(ad) { @@ -1978,9 +2160,12 @@ JumpTable::JumpTable(Architecture *g,Address ad) collectloads = false; } +/// This is a partial clone of another jump-table. Objects that are specific +/// to the particular Funcdata instance must be recalculated. +/// \param op2 is the jump-table to clone JumpTable::JumpTable(const JumpTable *op2) -{ // Partial clone of the jumptable +{ glb = op2->glb; jmodel = (JumpModel *)0; origmodel = (JumpModel *)0; @@ -2010,9 +2195,13 @@ JumpTable::~JumpTable(void) delete origmodel; } +/// \brief Return the number of address table entries that target the given basic-block +/// +/// \param bl is the given basic-block +/// \return the count of entries int4 JumpTable::numIndicesByBlock(const FlowBlock *bl) const -{ // Number of jumptable entries for this block +{ uint4 position,count; int4 i; @@ -2032,9 +2221,20 @@ bool JumpTable::isOverride(void) const return jmodel->isOverride(); } +/// \brief Force manual override information on \b this jump-table. +/// +/// The model is switched over to JumpBasicOverride, which is initialized with an externally +/// provided list of addresses. The addresses are forced as the output addresses the BRANCHIND +/// for \b this jump-table. If a non-zero hash and an address is provided, this identifies a +/// specific Varnode to use as the normalized switch variable. A potential starting value for +/// normalized switch variable range is provided. +/// \param addrtable is the manually provided list of addresses to put in the address table +/// \param naddr is the address where the normalized switch variable is defined +/// \param h is a hash identifying the normalized switch variable (or 0) +/// \param sv is the starting value for the range of possible normalized switch variable values (usually 0) void JumpTable::setOverride(const vector
&addrtable,const Address &naddr,uintb h,uintb sv) -{ // Force an override on a jumptable +{ if (jmodel != (JumpModel *)0) delete jmodel; @@ -2045,6 +2245,12 @@ void JumpTable::setOverride(const vector
&addrtable,const Address &nadd override->setStartingValue(sv); } +/// \brief Get the index of the i-th address table entry that corresponds to the given basic-block +/// +/// An exception is thrown if no address table entry targets the block. +/// \param bl is the given basic-block +/// \param i requests a specific position within the duplicate entries +/// \return the address table index int4 JumpTable::getIndexByBlock(const FlowBlock *bl,int4 i) const { @@ -2062,21 +2268,34 @@ int4 JumpTable::getIndexByBlock(const FlowBlock *bl,int4 i) const throw LowlevelError("Could not get jumptable index for block"); } +/// Set the most common address destination by supplying an index into the address table +/// \param tableind is the supplied address table index void JumpTable::setMostCommonIndex(uint4 tableind) -{ // Set the most common address jump destination by supplying the (an) index for its address +{ mostcommon = blocktable[tableind]; // Translate addresstable index to switch block out index } +/// This is used to add address targets from guard branches if they are +/// not already in the address table. A specific case label for the block +/// can also be provided. The new target is appended directly to the end of the table. +/// \param bl is the given basic-block +/// \param lab is the case label for the block void JumpTable::addBlockToSwitch(BlockBasic *bl,uintb lab) -{ // Force a block to be possible switch destination +{ addresstable.push_back(bl->getStart()); uint4 pos = indirect->getParent()->sizeOut(); blocktable.push_back(pos); label.push_back(lab); } +/// Convert addresses in \b this table to actual targeted basic-blocks. +/// +/// This constructs a map from each address table entry to the corresponding +/// out-edge from the the basic-block containing the BRANCHIND. The most common +/// address table entry is also calculated here. +/// \param flow is used to resolve address targets void JumpTable::switchOver(const FlowInfo &flow) { @@ -2136,6 +2355,7 @@ void JumpTable::foldInNormalization(Funcdata *fd) } } +/// Make exactly one case for each output edge of the switch block. void JumpTable::trivialSwitchOver(void) { @@ -2152,10 +2372,21 @@ void JumpTable::trivialSwitchOver(void) mostcommon = ~((uint4)0); // There is no "mostcommon" } +/// The addresses that the raw BRANCHIND op might branch to itself are recovered, +/// not including other targets of the final model, like guard addresses. The normalized switch +/// variable and the guards are identified in the process however. +/// +/// Generally this method is run during flow analysis when we only have partial information about +/// the function (and possibly the switch itself). The Funcdata instance is a partial clone of the +/// function and is different from the final instance that will hold the fully recovered jump-table. +/// The final instance inherits the addresses recovered here, but recoverModel() will need to be +/// run on it separately. +/// +/// A sanity check is also run, which might truncate the original set of addresses. +/// \param fd is the function containing the switch void JumpTable::recoverAddresses(Funcdata *fd) -{ // Assuming we only have a partial function - // recover just the jumptable addresses +{ recoverModel(fd); if (jmodel == (JumpModel *)0) { ostringstream err; @@ -2176,9 +2407,11 @@ void JumpTable::recoverAddresses(Funcdata *fd) sanityCheck(fd); } +/// Do a normal recoverAddresses, but save off the old JumpModel, and if we fail recovery, put back the old model. +/// \param fd is the function containing the switch void JumpTable::recoverMultistage(Funcdata *fd) -{ // Do a normal recoverAddresses, but save off old model, and if we fail recovery, put back the old model +{ if (origmodel != (JumpModel *)0) delete origmodel; origmodel = jmodel; @@ -2213,9 +2446,16 @@ void JumpTable::recoverMultistage(Funcdata *fd) } } +/// This is run assuming the address table has already been recovered, via recoverAddresses() in another +/// Funcdata instance. So recoverModel() needs to be rerun on the instance passed in here. +/// +/// The unnormalized switch variable is recovered, and for each possible address table entry, the variable +/// value that produces it is calculated and stored as the formal \e case label for the associated code block. +/// \param fd is the (final instance of the) function containing the switch +/// \return \b true if it looks like a multi-stage restart is needed. bool JumpTable::recoverLabels(Funcdata *fd) -{ // Assuming we have entire function, recover labels. Return -true- if it looks like a multistage restart is needed. +{ if (!isRecovered()) throw LowlevelError("Trying to recover jumptable labels without addresses"); @@ -2262,9 +2502,12 @@ bool JumpTable::recoverLabels(Funcdata *fd) return multistagerestart; } +/// Clear out any data that is specific to a Funcdata instance. The address table is not cleared +/// if it was recovered, and override information is left intact. +/// Right now this is only getting called, when the jumptable is an override in order to clear out derived data. void JumpTable::clear(void) -{ // Right now this is only getting called, when the jumptable is an override in order to clear out derived data +{ if (origmodel != (JumpModel *)0) { delete origmodel; origmodel = (JumpModel *)0; @@ -2284,9 +2527,12 @@ void JumpTable::clear(void) // -opaddress- -maxtablesize- -maxaddsub- -maxleftright- -maxext- -collectloads- are permanent } +/// The recovered addresses and case labels are saved to the XML stream. +/// If override information is present, this is also incorporated into the tag. +/// \param s is the stream to write to void JumpTable::saveXml(ostream &s) const -{ // Save addresses in a jump table in XML format +{ if (!isRecovered()) throw LowlevelError("Trying to save unrecovered jumptable"); @@ -2314,6 +2560,9 @@ void JumpTable::saveXml(ostream &s) const s << "\n"; } +/// Restore the addresses, \e case labels, and any override information from the tag. +/// Other parts of the model and jump-table will still need to be recovered. +/// \param el is the root \ tag to restore from void JumpTable::restoreXml(const Element *el) { @@ -2362,9 +2611,13 @@ void JumpTable::restoreXml(const Element *el) } } +/// Look for the override directive that indicates we need an additional recovery stage for +/// \b this jump-table. +/// \param fd is the function containing the switch +/// \return \b true if an additional recovery stage is required. bool JumpTable::checkForMultistage(Funcdata *fd) -{ // Look for a change in control that indicates we need an additional of jump recovery +{ if (addresstable.size()!=1) return false; if (recoverystage != 0) return false; if (indirect == (PcodeOp *)0) return false; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh index 134dae5400..70ac658039 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -// Abstract jump table, we do not specify how addresses are encoded in table +/// \file jumptable.hh +/// \brief Classes to support jump-tables and their recovery #ifndef __CPUI_JUMPTABLE__ #define __CPUI_JUMPTABLE__ @@ -23,62 +24,83 @@ class EmulateFunction; -struct JumptableThunkError : public LowlevelError { // Thunk that looks like a jumptable - /// Initialize the error with an explanatory string - JumptableThunkError(const string &s) : LowlevelError(s) {} +/// \brief Exception thrown for a thunk mechanism that looks like a jump-table +struct JumptableThunkError : public LowlevelError { + JumptableThunkError(const string &s) : LowlevelError(s) {} ///< Construct with an explanatory string }; -struct JumptableNotReachableError : public LowlevelError { // There are no legal flows to the switch - JumptableNotReachableError(const string &s) : LowlevelError(s) {} +/// \brief Exception thrown is there are no legal flows to a switch +struct JumptableNotReachableError : public LowlevelError { + JumptableNotReachableError(const string &s) : LowlevelError(s) {} ///< Constructor }; +/// \brief A description where and how data was loaded from memory +/// +/// This is a generic table description, giving the starting address +/// of the table, the size of an entry, and number of entries. class LoadTable { friend class EmulateFunction; - Address addr; // Starting address of table - int4 size; // Size of table entry - int4 num; // Number of entries in table; + Address addr; ///< Starting address of table + int4 size; ///< Size of table entry + int4 num; ///< Number of entries in table; public: - LoadTable(void) {} // For use with restoreXml - LoadTable(const Address &ad,int4 sz) { addr = ad, size = sz; num = 1; } - LoadTable(const Address &ad,int4 sz,int4 nm) { addr = ad; size = sz; num = nm; } - bool operator<(const LoadTable &op2) const { return (addr < op2.addr); } - void saveXml(ostream &s) const; - void restoreXml(const Element *el,Architecture *glb); - static void collapseTable(vector &table); + LoadTable(void) {} // Constructor for use with restoreXml + LoadTable(const Address &ad,int4 sz) { addr = ad, size = sz; num = 1; } ///< Constructor for a single entry table + LoadTable(const Address &ad,int4 sz,int4 nm) { addr = ad; size = sz; num = nm; } ///< Construct a full table + bool operator<(const LoadTable &op2) const { return (addr < op2.addr); } ///< Compare \b this with another table by address + void saveXml(ostream &s) const; ///< Save a description of \b this as an \ XML tag + void restoreXml(const Element *el,Architecture *glb); ///< Read in \b this table from a \ XML description + static void collapseTable(vector &table); ///< Collapse a sequence of table descriptions }; +/// \brief All paths from a (putative) switch variable to the CPUI_BRANCHIND +/// +/// This is a container for intersecting paths during the construction of a +/// JumpModel. It contains every PcodeOp from some starting Varnode through +/// all paths to a specific BRANCHIND. The paths can split and rejoin. This also +/// keeps track of Varnodes that are present on \e all paths, as these are the +/// potential switch variables for the model. class PathMeld { + + /// \brief A PcodeOp in the path set associated with the last Varnode in the intersection + /// + /// This links a PcodeOp to the point where the flow path to it split from common path struct RootedOp { - PcodeOp *op; - int4 rootVn; - RootedOp(PcodeOp *o,int4 root) { op = o; rootVn = root; } + PcodeOp *op; ///< An op in the container + int4 rootVn; ///< The index, within commonVn, of the Varnode at the split point + RootedOp(PcodeOp *o,int4 root) { op = o; rootVn = root; } ///< Constructor }; - vector commonVn; // Varnodes in common with all paths - vector opMeld; // All the ops for the melded paths + vector commonVn; ///< Varnodes in common with all paths + vector opMeld; ///< All the ops for the melded paths void internalIntersect(vector &parentMap); int4 meldOps(const vector &path,int4 cutOff,const vector &parentMap); void truncatePaths(int4 cutPoint); public: - void set(const PathMeld &op2); - void set(const vector &path,const vector &slot); - void set(PcodeOp *op,Varnode *vn); - void append(const PathMeld &op2); - void clear(void); - void meld(vector &path,vector &slot); - int4 numCommonVarnode(void) const { return commonVn.size(); } - int4 numOps(void) const { return opMeld.size(); } - Varnode *getVarnode(int4 i) const { return commonVn[i]; } - Varnode *getOpParent(int4 i) const { return commonVn[ opMeld[i].rootVn ]; } - PcodeOp *getOp(int4 i) const { return opMeld[i].op; } - PcodeOp *getEarliestOp(int4 pos) const; - bool empty(void) const { return commonVn.empty(); } + void set(const PathMeld &op2); ///< Copy paths from another container + void set(const vector &path,const vector &slot); ///< Initialize \b this to be a single path + void set(PcodeOp *op,Varnode *vn); ///< Initialize \b this container to a single node "path" + void append(const PathMeld &op2); ///< Append a new set of paths to \b this set of paths + void clear(void); ///< Clear \b this to be an empty container + void meld(vector &path,vector &slot); ///< Meld a new path into \b this container + int4 numCommonVarnode(void) const { return commonVn.size(); } ///< Return the number of Varnodes common to all paths + int4 numOps(void) const { return opMeld.size(); } ///< Return the number of PcodeOps across all paths + Varnode *getVarnode(int4 i) const { return commonVn[i]; } ///< Get the i-th common Varnode + Varnode *getOpParent(int4 i) const { return commonVn[ opMeld[i].rootVn ]; } ///< Get the split-point for the i-th PcodeOp + PcodeOp *getOp(int4 i) const { return opMeld[i].op; } ///< Get the i-th PcodeOp + PcodeOp *getEarliestOp(int4 pos) const; ///< Find \e earliest PcodeOp that has a specific common Varnode as input + bool empty(void) const { return commonVn.empty(); } ///< Return \b true if \b this container holds no paths }; +/// \brief A light-weight emulator to calculate switch targets from switch variables +/// +/// We assume we only have to store memory state for individual Varnodes and that dynamic +/// LOADs are resolved from the LoadImage. BRANCH and CBRANCH emulation will fail, there can +/// only be one execution path, although there can be multiple data-flow paths. class EmulateFunction : public EmulatePcodeOp { - Funcdata *fd; - map varnodeMap; // Lightweight memory state based on Varnodes - bool collectloads; - vector loadpoints; + Funcdata *fd; ///< The function being emulated + map varnodeMap; ///< Light-weight memory state based on Varnodes + bool collectloads; ///< Set to \b true if the emulator collects individual LOAD addresses + vector loadpoints; ///< The set of collected LOAD records virtual void executeLoad(void); virtual void executeBranch(void); virtual void executeBranchind(void); @@ -87,65 +109,79 @@ class EmulateFunction : public EmulatePcodeOp { virtual void executeCallother(void); virtual void fallthruOp(void); public: - EmulateFunction(Funcdata *f); - void setLoadCollect(bool val) { collectloads = val; } + EmulateFunction(Funcdata *f); ///< Constructor + void setLoadCollect(bool val) { collectloads = val; } ///< Set whether we collect LOAD information virtual void setExecuteAddress(const Address &addr); virtual uintb getVarnodeValue(Varnode *vn) const; virtual void setVarnodeValue(Varnode *vn,uintb val); uintb emulatePath(uintb val,const PathMeld &pathMeld,PcodeOp *startop,Varnode *startvn); - void collectLoadPoints(vector &res) const; + void collectLoadPoints(vector &res) const; ///< Recover any LOAD table descriptions }; class FlowInfo; class JumpTable; +/// \brief A (putative) switch variable Varnode and a constraint imposed by a CBRANCH +/// +/// The record constrains a specific Varnode. If the associated CBRANCH is followed +/// along the path that reaches the switch's BRANCHIND, then we have an explicit +/// description of the possible values the Varnode can hold. class GuardRecord { - PcodeOp *cbranch; // instruction branching around switch - int4 indpath; // branch going to switch - CircleRange range; // range of values which goto switch - Varnode *vn; // Varnode being restricted - Varnode *baseVn; // Value being (quasi)copied to vn - int4 bitsPreserved; // Number of bits copied (all other bits are zero) + PcodeOp *cbranch; ///< PcodeOp CBRANCH the branches around the switch + int4 indpath; ///< Specific CBRANCH path going to the switch + CircleRange range; ///< Range of values causing the CBRANCH to take the path to the switch + Varnode *vn; ///< The Varnode being restricted + Varnode *baseVn; ///< Value being (quasi)copied to the Varnode + int4 bitsPreserved; ///< Number of bits copied (all other bits are zero) public: - GuardRecord(PcodeOp *op,int4 path,const CircleRange &rng,Varnode *v); - PcodeOp *getBranch(void) const { return cbranch; } - int4 getPath(void) const { return indpath; } - const CircleRange &getRange(void) const { return range; } - bool isClear(void) const { return (cbranch == (PcodeOp *)0); } - void clear(void) { cbranch = (PcodeOp *)0; } + GuardRecord(PcodeOp *op,int4 path,const CircleRange &rng,Varnode *v); ///< Constructor + PcodeOp *getBranch(void) const { return cbranch; } ///< Get the CBRANCH associated with \b this guard + int4 getPath(void) const { return indpath; } ///< Get the specific path index going towards the switch + const CircleRange &getRange(void) const { return range; } ///< Get the range of values causing the switch path to be taken + void clear(void) { cbranch = (PcodeOp *)0; } ///< Mark \b this guard as unused int4 valueMatch(Varnode *vn2,Varnode *baseVn2,int4 bitsPreserved2) const; static int4 oneOffMatch(PcodeOp *op1,PcodeOp *op2); static Varnode *quasiCopy(Varnode *vn,int4 &bitsPreserved,bool noWholeValue); }; -// This class represents a set of switch variables, and the values that they can take +/// \brief An iterator over values a switch variable can take +/// +/// This iterator is intended to provide the start value for emulation +/// of a jump-table model to obtain the associated jump-table destination. +/// Each value can be associated with a starting Varnode and PcodeOp in +/// the function being emulated, via getStartVarnode() and getStartOp(). class JumpValues { public: virtual ~JumpValues(void) {} - virtual void truncate(int4 nm)=0; - virtual uintb getSize(void) const=0; - virtual bool contains(uintb val) const=0; + virtual void truncate(int4 nm)=0; ///< Truncate the number of values to the given number + virtual uintb getSize(void) const=0; ///< Return the number of values the variables can take + virtual bool contains(uintb val) const=0; ///< Return \b true if the given value is in the set of possible values + + /// \brief Initialize \b this for iterating over the set of possible values + /// + /// \return \b true if there are any values to iterate over virtual bool initializeForReading(void) const=0; - virtual bool next(void) const=0; - virtual uintb getValue(void) const=0; - virtual Varnode *getStartVarnode(void) const=0; - virtual PcodeOp *getStartOp(void) const=0; - virtual bool isReversible(void) const=0; // Can the current value be reversed to get a label - virtual JumpValues *clone(void) const=0; + + virtual bool next(void) const=0; ///< Advance the iterator, return \b true if there is another value + virtual uintb getValue(void) const=0; ///< Get the current value + virtual Varnode *getStartVarnode(void) const=0; ///< Get the Varnode associated with the current value + virtual PcodeOp *getStartOp(void) const=0; ///< Get the PcodeOp associated with the current value + virtual bool isReversible(void) const=0; ///< Return \b true if the current value can be reversed to get a label + virtual JumpValues *clone(void) const=0; ///< Clone \b this iterator }; -// This class implements a single entry switch variable that can take a range of values +/// \brief single entry switch variable that can take a range of values class JumpValuesRange : public JumpValues { protected: - CircleRange range; // Acceptable range of values for normalvn - Varnode *normqvn; - PcodeOp *startop; - mutable uintb curval; + CircleRange range; ///< Acceptable range of values for the normalized switch variable + Varnode *normqvn; ///< Varnode representing the normalized switch variable + PcodeOp *startop; ///< First PcodeOp in the jump-table calculation + mutable uintb curval; ///< The current value pointed to be the iterator public: - void setRange(const CircleRange &rng) { range = rng; } - void setStartVn(Varnode *vn) { normqvn = vn; } - void setStartOp(PcodeOp *op) { startop = op; } - virtual void truncate(int4 nm); ///< Truncate the number of values to the given number + void setRange(const CircleRange &rng) { range = rng; } ///< Set the range of values explicitly + void setStartVn(Varnode *vn) { normqvn = vn; } ///< Set the normalized switch Varnode explicitly + void setStartOp(PcodeOp *op) { startop = op; } ///< Set the starting PcodeOp explicitly + virtual void truncate(int4 nm); virtual uintb getSize(void) const; virtual bool contains(uintb val) const; virtual bool initializeForReading(void) const; @@ -157,17 +193,19 @@ public: virtual JumpValues *clone(void) const; }; -// This class extends having a single entry switch variable with range and -// adds a second entry point that takes only a single value -class JumpValuesRangeDefault : public JumpValuesRange { // Range like model1, but with extra default value - uintb extravalue; - Varnode *extravn; - PcodeOp *extraop; - mutable bool lastvalue; +/// \brief A jump-table starting range with two possible execution paths +/// +/// This extends the basic JumpValuesRange having a single entry switch variable and +/// adds a second entry point that takes only a single value. This value comes last in the iteration. +class JumpValuesRangeDefault : public JumpValuesRange { + uintb extravalue; ///< The extra value + Varnode *extravn; ///< The starting Varnode associated with the extra value + PcodeOp *extraop; ///< The starting PcodeOp associated with the extra value + mutable bool lastvalue; ///< \b true is the extra value has been visited by the iterator public: - void setExtraValue(uintb val) { extravalue = val; } - void setDefaultVn(Varnode *vn) { extravn = vn; } - void setDefaultOp(PcodeOp *op) { extraop = op; } + void setExtraValue(uintb val) { extravalue = val; } ///< Set the extra value explicitly + void setDefaultVn(Varnode *vn) { extravn = vn; } ///< Set the associated start Varnode + void setDefaultOp(PcodeOp *op) { extraop = op; } ///< Set the associated start PcodeOp virtual uintb getSize(void) const; virtual bool contains(uintb val) const; virtual bool initializeForReading(void) const; @@ -178,19 +216,64 @@ public: virtual JumpValues *clone(void) const; }; -// This class represents the entire recovery process, recognizing the model, tracing -// from the switch entry to the address, and folding in guards +/// \brief A jump-table execution model +/// +/// This class holds details of the model and recovers these details in various stages. +/// The model concepts include: +/// - Address Table, the set of destination addresses the jump-table can produce. +/// - Normalized Switch Variable, the Varnode with the most restricted set of values used +/// by the model to produce the destination addresses. +/// - Unnormalized Switch Variable, the Varnode being switched on, as seen in the decompiler output. +/// - Case labels, switch variable values associated with specific destination addresses. +/// - Guards, CBRANCH ops that enforce the normalized switch variable's value range. class JumpModel { protected: - JumpTable *jumptable; // The jumptable that is building this model + JumpTable *jumptable; ///< The jump-table that is building \b this model public: - JumpModel(JumpTable *jt) { jumptable = jt; } - virtual ~JumpModel(void) {} - virtual bool isOverride(void) const=0; - virtual int4 getTableSize(void) const=0; + JumpModel(JumpTable *jt) { jumptable = jt; } ///< Construct given a parent jump-table + virtual ~JumpModel(void) {} ///< Destructor + virtual bool isOverride(void) const=0; ///< Return \b true if \b this model was manually overridden + virtual int4 getTableSize(void) const=0; ///< Return the number of entries in the address table + + /// \brief Attempt to recover details of the model, given a specific BRANCHIND + /// + /// This generally recovers the normalized switch variable and any guards. + /// \param fd is the function containing the switch + /// \param indop is the given BRANCHIND + /// \param matchsize is the expected number of address table entries to recover, or 0 for no expectation + /// \param maxtablesize is maximum number of address table entries to allow in the model + /// \return \b true if details of the model were successfully recovered virtual bool recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint4 maxtablesize)=0; + + /// \brief Construct the explicit list of target addresses (the Address Table) from \b this model + /// + /// The addresses produced all come from the BRANCHIND and may not be deduped. Alternate guard + /// destinations are not yet included. + /// \param fd is the function containing the switch + /// \param indop is the root BRANCHIND of the switch + /// \param addresstable will hold the list of Addresses + /// \param loadpoints if non-null will hold LOAD table information used by the model virtual void buildAddresses(Funcdata *fd,PcodeOp *indop,vector
&addresstable,vector *loadpoints) const=0; + + /// \brief Recover the unnormalized switch variable + /// + /// The normalized switch variable must already be recovered. The amount of normalization between + /// the two switch variables can be restricted. + /// \param maxaddsub is a restriction on arithmetic operations + /// \param maxleftright is a restriction on shift operations + /// \param maxext is a restriction on extension operations virtual void findUnnormalized(uint4 maxaddsub,uint4 maxleftright,uint4 maxext)=0; + + /// \brief Recover \e case labels associated with the Address table + /// + /// The unnormalized switch variable must already be recovered. Values that the normalized + /// switch value can hold or walked back to obtain the value that the unnormalized switch + /// variable would hold. Labels are returned in the order provided by normalized switch + /// variable iterator JumpValues. + /// \param fd is the function containing the switch + /// \param addresstable is the address table (used to label code blocks with bad or missing labels) + /// \param label will hold recovered labels in JumpValues order + /// \param orig is the JumpModel to use for the JumpValues iterator virtual void buildLabels(Funcdata *fd,vector
&addresstable,vector &label,const JumpModel *orig) const=0; /// \brief Do normalization of the given switch specific to \b this model. @@ -209,19 +292,34 @@ public: /// \param fd is the function containing the switch /// \param jump is the JumpTable owning \b this model. virtual bool foldInGuards(Funcdata *fd,JumpTable *jump)=0; + + /// \brief Perform a sanity check on recovered addresses + /// + /// Individual addresses are checked against the function or its program to determine + /// if they are reasonable. This method can optionally remove addresses from the table. + /// If it does so, the underlying model is changed to reflect the removal. + /// \param fd is the function containing the switch + /// \param indop is the root BRANCHIND of the switch + /// \param addresstable is the list of recovered Addresses, which may be modified + /// \return \b true if there are (at least some) reasonable addresses in the table virtual bool sanityCheck(Funcdata *fd,PcodeOp *indop,vector
&addresstable)=0; - virtual JumpModel *clone(JumpTable *jt) const=0; - virtual void clear(void) {}; // Clear any non-permanent aspects of the model - virtual void saveXml(ostream &s) const {} // For use with override models - virtual void restoreXml(const Element *el,Architecture *glb) {} // For use with override models + + virtual JumpModel *clone(JumpTable *jt) const=0; ///< Clone \b this model + virtual void clear(void) {} ///< Clear any non-permanent aspects of the model + virtual void saveXml(ostream &s) const {} ///< Save this model as an XML tag + virtual void restoreXml(const Element *el,Architecture *glb) {} ///< Restore \b this model from an XML tag }; -// This class treats the branch indirection variable as the switch variable, and recovers -// its possible values from the existing block structure +/// \brief A trivial jump-table model, where the BRANCHIND input Varnode is the switch variable +/// +/// This class treats the input Varnode to the BRANCHIND as the switch variable, and recovers +/// its possible values from the existing block structure. This is used when the flow following +/// fork recovers destination addresses, but the switch normalization action is unable to recover +/// the model. class JumpModelTrivial : public JumpModel { - uint4 size; + uint4 size; ///< Number of addresses in the table as reported by the JumpTable public: - JumpModelTrivial(JumpTable *jt) : JumpModel(jt) { size = 0; } + JumpModelTrivial(JumpTable *jt) : JumpModel(jt) { size = 0; } ///< Construct given a parent JumpTable virtual bool isOverride(void) const { return false; } virtual int4 getTableSize(void) const { return size; } virtual bool recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint4 maxtablesize); @@ -234,20 +332,22 @@ public: virtual JumpModel *clone(JumpTable *jt) const; }; -// This is the basic switch model. In brief -// 1) Straight-line calculation from switch variable to BRANCHIND -// 2) Switch variable is bounded by one or more "guards" that branch around the BRANCHIND -// 3) Recover unnormalized switch from bounded switch, through some basic transforms +/// \brief The basic switch model +/// +/// This is the most common model: +/// - A straight-line calculation from switch variable to BRANCHIND +/// - The switch variable is bounded by one or more \e guards that branch around the BRANCHIND +/// - The unnormalized switch variable is recovered from the normalized variable through some basic transforms class JumpBasic : public JumpModel { protected: - JumpValuesRange *jrange; - PathMeld pathMeld; // Set of PcodeOps and Varnodes producing the final switch addresses - vector selectguards; - int4 varnodeIndex; // Position of the normalized switch varnode within PathMeld - Varnode *normalvn; // The normalized switch varnode - Varnode *switchvn; // The unnormalized switch varnode - static bool isprune(Varnode *vn); - static bool ispoint(Varnode *vn); + JumpValuesRange *jrange; ///< Range of values for the (normalized) switch variable + PathMeld pathMeld; ///< Set of PcodeOps and Varnodes producing the final target addresses + vector selectguards; ///< Any guards associated with \b model + int4 varnodeIndex; ///< Position of the normalized switch Varnode within PathMeld + Varnode *normalvn; ///< Normalized switch Varnode + Varnode *switchvn; ///< Unnormalized switch Varnode + static bool isprune(Varnode *vn); ///< Do we prune in here in our depth-first search for the normalized switch variable + static bool ispoint(Varnode *vn); ///< Is it possible for the given Varnode to be a switch variable? static int4 getStride(Varnode *vn); ///< Get the step/stride associated with the Varnode static uintb backup2Switch(Funcdata *fd,uintb output,Varnode *outvn,Varnode *invn); void findDeterminingVarnodes(PcodeOp *op,int4 slot); @@ -268,9 +368,9 @@ protected: /// \return \b true if a change was made to data-flow virtual bool foldInOneGuard(Funcdata *fd,GuardRecord &guard,JumpTable *jump); public: - JumpBasic(JumpTable *jt) : JumpModel(jt) { jrange = (JumpValuesRange *)0; } - const PathMeld &getPathMeld(void) const { return pathMeld; } - const JumpValuesRange *getValueRange(void) const { return jrange; } + JumpBasic(JumpTable *jt) : JumpModel(jt) { jrange = (JumpValuesRange *)0; } ///< Construct given a parent JumpTable + const PathMeld &getPathMeld(void) const { return pathMeld; } ///< Get the possible of paths to the switch + const JumpValuesRange *getValueRange(void) const { return jrange; } ///< Get the normalized value iterator virtual ~JumpBasic(void); virtual bool isOverride(void) const { return false; } virtual int4 getTableSize(void) const { return jrange->getSize(); } @@ -285,47 +385,56 @@ public: virtual void clear(void); }; -// This model expects two paths to the switch, 1 from a default value, 1 from the other values that hit the switch -// If A is the guarding control-flow block, C is the block setting the default value, and S the switch block itself, -// We expect one of the following situations: -// A -> C or S and C -> S -// A -> C or D and C -> S D -> S -// C -> S and S -> A A -> S or "out of loop", i.e. S is in a loop, and the guard block doubles as the loop condition +/// \brief A basic jump-table model with an added default address path +/// +/// This model expects two paths to the switch, 1 from a default value, 1 from the other values that hit the switch +/// If A is the guarding control-flow block, C is the block setting the default value, and S the switch block itself, +/// We expect one of the following situations: +/// - A -> C or S and C -> S +/// - A -> C or D and C -> S D -> S +/// - C -> S and S -> A A -> S or "out of loop", i.e. S is in a loop, and the guard block doubles as the loop condition +/// +/// This builds on the analysis performed for JumpBasic, which fails because there are too many paths +/// to the BRANCHIND, preventing the guards from being interpreted properly. This class expects to reuse +/// the PathMeld calculation from JumpBasic. class JumpBasic2 : public JumpBasic { - Varnode *extravn; - PathMeld origPathMeld; + Varnode *extravn; ///< The extra Varnode holding the default value + PathMeld origPathMeld; ///< The set of paths that produce non-default addresses bool checkNormalDominance(void) const; virtual bool foldInOneGuard(Funcdata *fd,GuardRecord &guard,JumpTable *jump); public: - JumpBasic2(JumpTable *jt) : JumpBasic(jt) {} - void initializeStart(const PathMeld &pathMeld); + JumpBasic2(JumpTable *jt) : JumpBasic(jt) {} ///< Constructor + void initializeStart(const PathMeld &pathMeld); ///< Pass in the prior PathMeld calculation virtual bool recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint4 maxtablesize); virtual void findUnnormalized(uint4 maxaddsub,uint4 maxleftright,uint4 maxext); virtual JumpModel *clone(JumpTable *jt) const; virtual void clear(void); }; -// This is the basic model for manually specifying the list of addresses the switch goes to -// It tries to repurpose some of the analysis that JumpBasic does to recover what the switch variable -// is, but will revert to the trivial model if it can't find a suitable switch variable +/// \brief A basic jump-table model incorporating manual override information +/// +/// The list of potential target addresses produced by the BRANCHIND is not recovered by \b this +/// model, but must provided explicitly via setAddresses(). +/// The model tries to repurpose some of the analysis that JumpBasic does to recover the switch variable. +/// But it will revert to the trivial model if it can't find a suitable switch variable. class JumpBasicOverride : public JumpBasic { - set
adset; // Absolute address table (manually specified) - vector values; // Normalized switch variable values associated with addresses - vector
addrtable; // Address associated with each value - uintb startingvalue; // Possible start for guessing values that match addresses - Address normaddress; // Dynamic info for recovering normalized switch variable - uint8 hash; // if (hash==0) there is no normalized switch (use trivial model) - bool istrivial; // true if we use a trivial value model + set
adset; ///< Absolute address table (manually specified) + vector values; ///< Normalized switch variable values associated with addresses + vector
addrtable; ///< Address associated with each value + uintb startingvalue; ///< Possible start for guessing values that match addresses + Address normaddress; ///< Dynamic info for recovering normalized switch variable + uint8 hash; ///< if (hash==0) there is no normalized switch (use trivial model) + bool istrivial; ///< \b true if we use a trivial value model int4 findStartOp(Varnode *vn); int4 trialNorm(Funcdata *fd,Varnode *trialvn,uint4 tolerance); void setupTrivial(void); Varnode *findLikelyNorm(void); void clearCopySpecific(void); public: - JumpBasicOverride(JumpTable *jt); - void setAddresses(const vector
&adtable); - void setNorm(const Address &addr,uintb h) { normaddress = addr; hash = h; } - void setStartingValue(uintb val) { startingvalue = val; } + JumpBasicOverride(JumpTable *jt); ///< Constructor + void setAddresses(const vector
&adtable); ///< Manually set the address table for \b this model + void setNorm(const Address &addr,uintb h) { normaddress = addr; hash = h; } ///< Set the normalized switch variable + void setStartingValue(uintb val) { startingvalue = val; } ///< Set the starting value for the normalized range virtual bool isOverride(void) const { return true; } virtual int4 getTableSize(void) const { return addrtable.size(); } virtual bool recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint4 maxtablesize); @@ -343,22 +452,25 @@ public: class JumpAssistOp; -// This model looks for a special "jumpassist" pseudo-op near the branch site, which contains -// p-code models describing how to parse a jump-table for case labels and addresses. -// It views the switch table calculation as a two-stage process: -// case2index: convert the switchvar to an index into a table -// index2address: convert the index to an address -// The pseudo-op holds: -// the table address, size (number of indices) -// exemplar p-code for inverting the case2index part of the calculation -// exemplar p-code for calculating index2address +/// \brief A jump-table model assisted by pseudo-op directives in the code +/// +/// This model looks for a special \e jumpassist pseudo-op near the branch site, which contains +/// p-code models describing how to parse a jump-table for case labels and addresses. +/// It views the switch table calculation as a two-stage process: +/// - case2index: convert the switchvar to an index into a table +/// - index2address: convert the index to an address +/// +/// The pseudo-op holds: +/// - the table address, size (number of indices) +/// - exemplar p-code for inverting the case2index part of the calculation +/// - exemplar p-code for calculating index2address class JumpAssisted : public JumpModel { - PcodeOp *assistOp; - JumpAssistOp *userop; - int4 sizeIndices; // Total number of indices in the table (not including the defaultaddress) - Varnode *switchvn; // The switch variable + PcodeOp *assistOp; ///< The \e jumpassist PcodeOp + JumpAssistOp *userop; ///< The \e jumpassist p-code models + int4 sizeIndices; ///< Total number of indices in the table (not including the defaultaddress) + Varnode *switchvn; ///< The switch variable public: - JumpAssisted(JumpTable *jt) : JumpModel(jt) { assistOp = (PcodeOp *)0; switchvn = (Varnode *)0; sizeIndices=0; } + JumpAssisted(JumpTable *jt) : JumpModel(jt) { assistOp = (PcodeOp *)0; switchvn = (Varnode *)0; sizeIndices=0; } ///< Constructor // virtual ~JumpAssisted(void); virtual bool isOverride(void) const { return false; } virtual int4 getTableSize(void) const { return sizeIndices+1; } @@ -387,21 +499,21 @@ class JumpTable { vector blocktable; ///< Addresses converted to basic blocks vector label; ///< The case label for each explicit target vector loadpoints; ///< Any recovered in-memory data for the jump-table - Address opaddress; ///< Absolute address of the INDIRECT jump - PcodeOp *indirect; ///< CPUI_INDIRECT op referring linked to \b this jump-table + Address opaddress; ///< Absolute address of the BRANCHIND jump + PcodeOp *indirect; ///< CPUI_BRANCHIND linked to \b this jump-table uintb switchVarConsume; ///< Bits of the switch variable being consumed - uint4 mostcommon; ///< Index of the most common position in table, prior to deduping + uint4 mostcommon; ///< The out-edge corresponding to the most common address in the address table uint4 maxtablesize; ///< Maximum table size we allow to be built (sanity check) uint4 maxaddsub; ///< Maximum ADDs or SUBs to normalize uint4 maxleftright; ///< Maximum shifts to normalize uint4 maxext; ///< Maximum extensions to normalize - int4 recoverystage; ///< 0=no stages, 1=needs additional stage, 2=complete + int4 recoverystage; ///< 0=no stages recovered, 1=additional stage needed, 2=complete bool collectloads; ///< Set to \b true if information about in-memory model data is/should be collected void recoverModel(Funcdata *fd); ///< Attempt recovery of the jump-table model - void trivialSwitchOver(void); + void trivialSwitchOver(void); ///< Switch \b this table over to a trivial model void sanityCheck(Funcdata *fd); ///< Perform sanity check on recovered address targets - uint4 block2Position(const FlowBlock *bl) const; - static bool isReachable(PcodeOp *op); + uint4 block2Position(const FlowBlock *bl) const; ///< Convert a basic-block to an out-edge index from the switch. + static bool isReachable(PcodeOp *op); ///< Check if the given PcodeOp still seems reachable in its function public: JumpTable(Architecture *g,Address ad=Address()); ///< Constructor JumpTable(const JumpTable *op2); ///< Copy constructor @@ -410,36 +522,36 @@ public: bool isRecovered(void) const { return !addresstable.empty(); } ///< Return \b true if a model has been recovered bool isLabelled(void) const { return !label.empty(); } ///< Return \b true if \e case labels are computed bool isOverride(void) const; ///< Return \b true if \b this table was manually overridden - bool isPossibleMultistage(void) const { return (addresstable.size()==1); } - int4 getStage(void) const { return recoverystage; } - int4 numEntries(void) const { return addresstable.size(); } + bool isPossibleMultistage(void) const { return (addresstable.size()==1); } ///< Return \b true if this could be multi-staged + int4 getStage(void) const { return recoverystage; } ///< Return what stage of recovery this jump-table is in. + int4 numEntries(void) const { return addresstable.size(); } ///< Return the size of the address table for \b this jump-table uintb getSwitchVarConsume(void) const { return switchVarConsume; } ///< Get bits of switch variable consumed by \b this table - int4 getMostCommon(void) const { return mostcommon; } - const Address &getOpAddress(void) const { return opaddress; } - PcodeOp *getIndirectOp(void) const { return indirect; } - void setIndirectOp(PcodeOp *ind) { opaddress = ind->getAddr(); indirect = ind; } - void setMaxTableSize(uint4 val) { maxtablesize = val; } + int4 getMostCommon(void) const { return mostcommon; } ///< Get the out-edge corresponding to the most common address table entry + const Address &getOpAddress(void) const { return opaddress; } ///< Get the address of the BRANCHIND for the switch + PcodeOp *getIndirectOp(void) const { return indirect; } ///< Get the BRANCHIND PcodeOp + void setIndirectOp(PcodeOp *ind) { opaddress = ind->getAddr(); indirect = ind; } ///< Set the BRANCHIND PcodeOp + void setMaxTableSize(uint4 val) { maxtablesize = val; } ///< Set the maximum entries allowed in the address table void setNormMax(uint4 maddsub,uint4 mleftright,uint4 mext) { - maxaddsub = maddsub; maxleftright = mleftright; maxext = mext; } + maxaddsub = maddsub; maxleftright = mleftright; maxext = mext; } ///< Set the switch variable normalization model restrictions void setOverride(const vector
&addrtable,const Address &naddr,uintb h,uintb sv); int4 numIndicesByBlock(const FlowBlock *bl) const; int4 getIndexByBlock(const FlowBlock *bl,int4 i) const; - Address getAddressByIndex(int4 index) const { return addresstable[index]; } - void setMostCommonIndex(uint4 tableind); - void setMostCommonBlock(uint4 bl) { mostcommon = bl; } - void setLoadCollect(bool val) { collectloads = val; } - void addBlockToSwitch(BlockBasic *bl,uintb lab); + Address getAddressByIndex(int4 i) const { return addresstable[i]; } ///< Get the i-th address table entry + void setMostCommonIndex(uint4 tableind); ///< Set the most common jump-table target by index + void setMostCommonBlock(uint4 bl) { mostcommon = bl; } ///< Set the most common jump-table target by out-edge + void setLoadCollect(bool val) { collectloads = val; } ///< Set whether LOAD records should be collected + void addBlockToSwitch(BlockBasic *bl,uintb lab); ///< Force a given basic-block to be a switch destination void switchOver(const FlowInfo &flow); ///< Convert absolute addresses to block indices uintb getLabelByIndex(int4 index) const { return label[index]; } ///< Given a \e case index, get its label void foldInNormalization(Funcdata *fd); ///< Hide the normalization code for the switch bool foldInGuards(Funcdata *fd) { return jmodel->foldInGuards(fd,this); } ///< Hide any guard code for \b this switch - void recoverAddresses(Funcdata *fd); - void recoverMultistage(Funcdata *fd); - bool recoverLabels(Funcdata *fd); - bool checkForMultistage(Funcdata *fd); - void clear(void); - void saveXml(ostream &s) const; - void restoreXml(const Element *el); + void recoverAddresses(Funcdata *fd); ///< Recover the raw jump-table addresses (the address table) + void recoverMultistage(Funcdata *fd); ///< Recover jump-table addresses keeping track of a possible previous stage + bool recoverLabels(Funcdata *fd); ///< Recover the case labels for \b this jump-table + bool checkForMultistage(Funcdata *fd); ///< Check if this jump-table requires an additional recovery stage + void clear(void); ///< Clear instance specific data for \b this jump-table + void saveXml(ostream &s) const; ///< Save \b this jump-table as a \ XML tag + void restoreXml(const Element *el); ///< Recover \b this jump-table from a \ XML tag }; #endif