GP-4979 Better support for partial array optimizations

This commit is contained in:
caheckman 2024-10-28 19:34:30 +00:00
parent 43655ab76c
commit 784540f1c0
14 changed files with 317 additions and 134 deletions

View File

@ -2345,7 +2345,7 @@ int4 BlockBasic::flipInPlaceTest(vector<PcodeOp *> &fliplist) const
PcodeOp *lastop = op.back();
if (lastop->code() != CPUI_CBRANCH)
return 2;
return opFlipInPlaceTest(lastop,fliplist);
return Funcdata::opFlipInPlaceTest(lastop,fliplist);
}
void BlockBasic::flipInPlaceExecute(void)
@ -2726,6 +2726,29 @@ PcodeOp *BlockBasic::findMultiequal(const vector<Varnode *> &varArray)
return op;
}
/// \brief Get the earliest use/read of a Varnode in \b this basic block
///
/// \param vn is the Varnode to search for
/// \return the earliest PcodeOp reading the Varnode or NULL
PcodeOp *BlockBasic::earliestUse(Varnode *vn)
{
list<PcodeOp *>::const_iterator iter;
PcodeOp *res = (PcodeOp *)0;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
PcodeOp *op = *iter;
if (op->getParent() != this) continue;
if (res == (PcodeOp *)0)
res = op;
else {
if (op->getSeqNum().getOrder() < res->getSeqNum().getOrder())
res = op;
}
}
return res;
}
/// Each Varnode must be defined by a PcodeOp with the same OpCode. The Varnode, within the array, is replaced
/// with the input Varnode in the indicated slot.
/// \param varArray is the given array of Varnodes
@ -3035,7 +3058,7 @@ bool BlockIf::preferComplement(Funcdata &data)
if (0 != split->flipInPlaceTest(fliplist))
return false;
split->flipInPlaceExecute();
opFlipInPlaceExecute(data,fliplist);
data.opFlipInPlaceExecute(fliplist);
swapBlocks(1,2);
return true;
}

View File

@ -492,6 +492,7 @@ public:
bool emptyOp(void) const { return op.empty(); } ///< Return \b true if \b block contains no operations
bool noInterveningStatement(void) const;
PcodeOp *findMultiequal(const vector<Varnode *> &varArray); ///< Find MULTIEQUAL with given inputs
PcodeOp *earliestUse(Varnode *vn);
static bool liftVerifyUnroll(vector<Varnode *> &varArray,int4 slot); ///< Verify given Varnodes are defined with same PcodeOp
};

View File

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -2135,9 +2135,9 @@ int4 ActionNormalizeBranches::apply(Funcdata &data)
if (cbranch == (PcodeOp *)0) continue;
if (cbranch->code() != CPUI_CBRANCH) continue;
fliplist.clear();
if (opFlipInPlaceTest(cbranch,fliplist) != 0)
if (Funcdata::opFlipInPlaceTest(cbranch,fliplist) != 0)
continue;
opFlipInPlaceExecute(data,fliplist);
data.opFlipInPlaceExecute(fliplist);
bb->flipInPlaceExecute();
count += 1; // Indicate a change was made
}

View File

@ -459,7 +459,7 @@ bool StringSequence::transform(void)
return true;
}
/// From a starting pointer, backtrack through PTRADDs to a putative root Varnode pointer.
/// From a starting pointer, backtrack through PTRADDs and COPYs to a putative root Varnode pointer.
/// \param initPtr is pointer Varnode into the root STORE
void HeapSequence::findBasePointer(Varnode *initPtr)
@ -467,22 +467,84 @@ void HeapSequence::findBasePointer(Varnode *initPtr)
basePointer = initPtr;
while(basePointer->isWritten()) {
PcodeOp *op = basePointer->getDef();
if (op->code() != CPUI_PTRADD) break;
int8 sz = op->getIn(2)->getOffset();
if (sz != charType->getAlignSize()) break;
OpCode opc = op->code();
if (opc == CPUI_PTRADD) {
int8 sz = op->getIn(2)->getOffset();
if (sz != charType->getAlignSize()) break;
}
else if (opc != CPUI_COPY)
break;
basePointer = op->getIn(0);
}
}
/// Back-track from \b basePointer through PTRSUBs, PTRADDs, and INT_ADDs to an earlier root, keeping track
/// of any offsets. If an earlier root exists, trace forward, through ops trying to match the offsets.
/// For trace of ops whose offsets match exactly, the resulting Varnode is added to the list of duplicates.
/// \param duplist will hold the list of duplicate Varnodes (including \b basePointer)
void HeapSequence::findDuplicateBases(vector<Varnode *> &duplist)
{
if (!basePointer->isWritten()) {
duplist.push_back(basePointer);
return;
}
PcodeOp *op = basePointer->getDef();
OpCode opc = op->code();
if ((opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD) || !op->getIn(1)->isConstant()) {
duplist.push_back(basePointer);
return;
}
Varnode *copyRoot = basePointer;
vector<uintb> offset;
do {
uintb off = op->getIn(1)->getOffset();
if (opc == CPUI_PTRADD)
off *= op->getIn(2)->getOffset();
offset.push_back(off);
copyRoot = op->getIn(0);
if (!copyRoot->isWritten()) break;
op = copyRoot->getDef();
opc = op->code();
if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB)
break;
} while(op->getIn(1)->isConstant());
duplist.push_back(copyRoot);
vector<Varnode *> midlist;
for(int4 i=offset.size()-1;i>=0;--i) {
duplist.swap(midlist);
duplist.clear();
for(int4 j=0;j<midlist.size();++j) {
Varnode *vn = midlist[j];
list<PcodeOp *>::const_iterator iter = vn->beginDescend();
while(iter != vn->endDescend()) {
op = *iter;
++iter;
opc = op->code();
if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB)
continue;
if (op->getIn(0) != vn || !op->getIn(1)->isConstant())
continue;
uintb off = op->getIn(1)->getOffset();
if (opc == CPUI_PTRADD)
off *= op->getIn(2)->getOffset();
if (off != offset[i])
continue;
duplist.push_back(op->getOut());
}
}
}
}
/// Find STOREs with pointers derived from the \b basePointer and that are in the same
/// basic block as the root STORE. The root STORE is \e not included in the resulting set.
/// \param stores holds the collected STOREs
void HeapSequence::findInitialStores(vector<PcodeOp *> &stores)
{
Datatype *ptrType = rootOp->getIn(1)->getTypeReadFacing(rootOp);
vector<Varnode *> ptradds;
ptradds.push_back(basePointer);
findDuplicateBases(ptradds);
int4 pos = 0;
int4 alignSize = charType->getAlignSize();
while(pos < ptradds.size()) {
@ -494,10 +556,14 @@ void HeapSequence::findInitialStores(vector<PcodeOp *> &stores)
OpCode opc = op->code();
if (opc == CPUI_PTRADD) {
if (op->getIn(0) != vn) continue;
if (op->getOut()->getTypeDefFacing() != ptrType) continue;
// We only check array element size here, if we checked the data-type, we would
// need to take into account different pointer styles to the same element data-type
if (op->getIn(2)->getOffset() != alignSize) continue;
ptradds.push_back(op->getOut());
}
else if (opc == CPUI_COPY) {
ptradds.push_back(op->getOut());
}
else if (opc == CPUI_STORE && op->getParent() == block && op != rootOp) {
if (op->getIn(1) != vn) continue;
stores.push_back(op);
@ -530,7 +596,7 @@ uint8 HeapSequence::calcAddElements(Varnode *vn,vector<Varnode *> &nonConst,int4
/// \brief Calculate the offset and any non-constant additive elements between the given Varnode and the \b basePointer
///
/// Walk backward from the given Varnode thru PTRADDs and ADDs, summing any offsets encountered.
/// Walk backward from the given Varnode thru PTRADDs and COPYs, summing any offsets encountered.
/// Any non-constant Varnodes encountered in the path, that are not themselves a pointer, are passed back in a list.
/// \param vn is the given Varnode to trace back to the \b basePointer
/// \param nonConst will hold the list of non-constant Varnodes being passed back
@ -539,12 +605,23 @@ uint8 HeapSequence::calcPtraddOffset(Varnode *vn,vector<Varnode *> &nonConst)
{
uint8 res = 0;
while(vn != basePointer) {
PcodeOp *ptradd = vn->getDef();
uint8 off = calcAddElements(ptradd->getIn(1),nonConst,3);
off *= (uint8)ptradd->getIn(2)->getOffset();
res += off;
vn = ptradd->getIn(0);
while(vn->isWritten()) {
PcodeOp *op = vn->getDef();
OpCode opc = op->code();
if (opc == CPUI_PTRADD) {
uint8 mult = op->getIn(2)->getOffset();
if (mult != charType->getAlignSize())
break;
uint8 off = calcAddElements(op->getIn(1),nonConst,3);
off *= mult;
res += off;
vn = op->getIn(0);
}
else if (opc == CPUI_COPY) {
vn = op->getIn(0);
}
else
break;
}
return res;
}

View File

@ -89,6 +89,7 @@ class HeapSequence : public ArraySequence {
uint8 baseOffset; ///< Offset relative to pointer to root STORE
vector<Varnode *> nonConstAdds; ///< non-constant Varnodes being added into pointer calculation
void findBasePointer(Varnode *initPtr); ///< Find the base pointer for the sequence
void findDuplicateBases(vector<Varnode *> &duplist); ///< Find any duplicates of \b basePointer
void findInitialStores(vector<PcodeOp *> &stores);
static uint8 calcAddElements(Varnode *vn,vector<Varnode *> &nonConst,int4 maxDepth);
uint8 calcPtraddOffset(Varnode *vn,vector<Varnode *> &nonConst);

View File

@ -488,6 +488,8 @@ public:
Varnode *opStackLoad(AddrSpace *spc,uintb off,uint4 sz,PcodeOp *op,Varnode *stackptr,bool insertafter);
PcodeOp *opStackStore(AddrSpace *spc,uintb off,PcodeOp *op,bool insertafter);
void opUndoPtradd(PcodeOp *op,bool finalize); ///< Convert a CPUI_PTRADD back into a CPUI_INT_ADD
static int4 opFlipInPlaceTest(PcodeOp *op,vector<PcodeOp *> &fliplist);
void opFlipInPlaceExecute(vector<PcodeOp *> &fliplist);
/// \brief Start of PcodeOp objects with the given op-code
list<PcodeOp *>::const_iterator beginOp(OpCode opc) const { return obank.begin(opc); }
@ -563,6 +565,11 @@ public:
bool replaceLessequal(PcodeOp *op); ///< Replace INT_LESSEQUAL and INT_SLESSEQUAL expressions
bool distributeIntMultAdd(PcodeOp *op); ///< Distribute constant coefficient to additive input
bool collapseIntMultMult(Varnode *vn); ///< Collapse constant coefficients for two chained CPUI_INT_MULT
Varnode *buildCopyTemp(Varnode *vn,PcodeOp *point); ///< Create a COPY of given Varnode in a temporary register
static PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest);
PcodeOp *cseElimination(PcodeOp *op1,PcodeOp *op2);
void cseEliminateList(vector< pair<uintm,PcodeOp *> > &list,vector<Varnode *> &outlist);
static bool compareCallspecs(const FuncCallSpecs *a,const FuncCallSpecs *b);
#ifdef OPACTION_DEBUG
@ -688,14 +695,5 @@ public:
bool execute(PcodeOp *op,int4 slot,ParamTrial *t,bool allowFail);
};
extern int4 opFlipInPlaceTest(PcodeOp *op,vector<PcodeOp *> &fliplist);
extern void opFlipInPlaceExecute(Funcdata &data,vector<PcodeOp *> &fliplist);
extern PcodeOp *earliestUseInBlock(Varnode *vn,BlockBasic *bl);
extern PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest);
extern PcodeOp *cseElimination(Funcdata &data,PcodeOp *op1,PcodeOp *op2);
extern void cseEliminateList(Funcdata &data,vector< pair<uintm,PcodeOp *> > &list,
vector<Varnode *> &outlist);
} // End namespace ghidra
#endif

View File

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -1100,6 +1100,66 @@ bool Funcdata::collapseIntMultMult(Varnode *vn)
return true;
}
/// Return a Varnode in the \e unique space that is defined by a COPY op taking the given Varnode as input.
/// If a COPY op to a \e unique already exists, it may be returned. If the preexisting COPY is not usable
/// at the specified \b point, it is redefined at an earlier point in the control-flow so that it can be used.
/// \param vn is the given Varnode to COPY
/// \param point is the PcodeOp where the copy needs to be available
/// \return the \e unique Varnode COPY
Varnode *Funcdata::buildCopyTemp(Varnode *vn,PcodeOp *point)
{
PcodeOp *otherOp = (PcodeOp *)0;
PcodeOp *usedCopy = (PcodeOp *)0;
list<PcodeOp *>::const_iterator iter;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
PcodeOp *op = *iter;
if (op->code() != CPUI_COPY) continue;
Varnode *outvn = op->getOut();
if (outvn->getSpace()->getType() == IPTR_INTERNAL) {
if (outvn->isTypeLock())
continue;
otherOp = op;
break;
}
}
if (otherOp != (PcodeOp *)0) {
if (point->getParent() == otherOp->getParent()) {
if (point->getSeqNum().getOrder() < otherOp->getSeqNum().getOrder())
usedCopy = (PcodeOp *)0;
else
usedCopy = otherOp;
}
else {
BlockBasic *common;
common = (BlockBasic *)FlowBlock::findCommonBlock(point->getParent(),otherOp->getParent());
if (common == point->getParent())
usedCopy = (PcodeOp *)0;
else if (common == otherOp->getParent())
usedCopy = otherOp;
else { // Neither op is ancestor of the other
usedCopy = newOp(1,common->getStop());
opSetOpcode(usedCopy,CPUI_COPY);
newUniqueOut(vn->getSize(),usedCopy);
opSetInput(usedCopy,vn,0);
opInsertEnd(usedCopy,common);
}
}
}
if (usedCopy == (PcodeOp *)0) {
usedCopy = newOp(1,point->getAddr());
opSetOpcode(usedCopy, CPUI_COPY);
newUniqueOut(vn->getSize(), usedCopy);
opSetInput(usedCopy, vn, 0);
opInsertBefore(usedCopy, point);
}
if (otherOp != (PcodeOp *)0 && otherOp != usedCopy) {
totalReplace(otherOp->getOut(),usedCopy->getOut());
opDestroy(otherOp);
}
return usedCopy->getOut();
}
/// \brief Trace a boolean value to a set of PcodeOps that can be changed to flip the boolean value
///
/// The boolean Varnode is either the output of the given PcodeOp or the
@ -1108,7 +1168,7 @@ bool Funcdata::collapseIntMultMult(Varnode *vn)
/// \param op is the given PcodeOp
/// \param fliplist is the array that will hold the ops to flip
/// \return 0 if the change normalizes, 1 if the change is ambivalent, 2 if the change does not normalize
int4 opFlipInPlaceTest(PcodeOp *op,vector<PcodeOp *> &fliplist)
int4 Funcdata::opFlipInPlaceTest(PcodeOp *op,vector<PcodeOp *> &fliplist)
{
Varnode *vn;
@ -1168,7 +1228,7 @@ int4 opFlipInPlaceTest(PcodeOp *op,vector<PcodeOp *> &fliplist)
/// facilitate the flip.
/// \param data is the function being modified
/// \param fliplist is the list of PcodeOps to modify
void opFlipInPlaceExecute(Funcdata &data,vector<PcodeOp *> &fliplist)
void Funcdata::opFlipInPlaceExecute(vector<PcodeOp *> &fliplist)
{
Varnode *vn;
@ -1180,53 +1240,29 @@ void opFlipInPlaceExecute(Funcdata &data,vector<PcodeOp *> &fliplist)
vn = op->getIn(0);
PcodeOp *otherop = op->getOut()->loneDescend(); // Must be a lone descendant
int4 slot = otherop->getSlot(op->getOut());
data.opSetInput(otherop,vn,slot); // Propagate -vn- into otherop
data.opDestroy(op);
opSetInput(otherop,vn,slot); // Propagate -vn- into otherop
opDestroy(op);
}
else if (opc == CPUI_MAX) {
if (op->code() == CPUI_BOOL_AND)
data.opSetOpcode(op,CPUI_BOOL_OR);
opSetOpcode(op,CPUI_BOOL_OR);
else if (op->code() == CPUI_BOOL_OR)
data.opSetOpcode(op,CPUI_BOOL_AND);
opSetOpcode(op,CPUI_BOOL_AND);
else
throw LowlevelError("Bad flipInPlace op");
}
else {
data.opSetOpcode(op,opc);
opSetOpcode(op,opc);
if (flipyes) {
data.opSwapInput(op,0,1);
opSwapInput(op,0,1);
if ((opc == CPUI_INT_LESSEQUAL)||(opc == CPUI_INT_SLESSEQUAL))
data.replaceLessequal(op);
replaceLessequal(op);
}
}
}
}
/// \brief Get the earliest use/read of a Varnode in a specified basic block
///
/// \param vn is the Varnode to search for
/// \param bl is the specified basic block in which to search
/// \return the earliest PcodeOp reading the Varnode or NULL
PcodeOp *earliestUseInBlock(Varnode *vn,BlockBasic *bl)
{
list<PcodeOp *>::const_iterator iter;
PcodeOp *res = (PcodeOp *)0;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
PcodeOp *op = *iter;
if (op->getParent() != bl) continue;
if (res == (PcodeOp *)0)
res = op;
else {
if (op->getSeqNum().getOrder() < res->getSeqNum().getOrder())
res = op;
}
}
return res;
}
/// \brief Find a duplicate calculation of a given PcodeOp reading a specific Varnode
///
/// We only match 1 level of calculation. Additionally the duplicate must occur in the
@ -1236,7 +1272,7 @@ PcodeOp *earliestUseInBlock(Varnode *vn,BlockBasic *bl)
/// \param bl is the indicated basic block
/// \param earliest is the specified op to be earlier than
/// \return the discovered duplicate PcodeOp or NULL
PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest)
PcodeOp *Funcdata::cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest)
{
list<PcodeOp *>::const_iterator iter;
@ -1265,11 +1301,10 @@ PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest
/// (depth 1 functional equivalence) eliminate the redundancy. Return the remaining (dominating)
/// PcodeOp. If neither op dominates the other, both are eliminated, and a new PcodeOp
/// is built at a commonly accessible point.
/// \param data is the function being modified
/// \param op1 is the first of the given PcodeOps
/// \param op2 is the second given PcodeOp
/// \return the dominating PcodeOp
PcodeOp *cseElimination(Funcdata &data,PcodeOp *op1,PcodeOp *op2)
PcodeOp *Funcdata::cseElimination(PcodeOp *op1,PcodeOp *op2)
{
PcodeOp *replace;
@ -1288,25 +1323,25 @@ PcodeOp *cseElimination(Funcdata &data,PcodeOp *op1,PcodeOp *op2)
else if (common == op2->getParent())
replace = op2;
else { // Neither op is ancestor of the other
replace = data.newOp(op1->numInput(),common->getStop());
data.opSetOpcode(replace,op1->code());
data.newVarnodeOut(op1->getOut()->getSize(),op1->getOut()->getAddr(),replace);
replace = newOp(op1->numInput(),common->getStop());
opSetOpcode(replace,op1->code());
newVarnodeOut(op1->getOut()->getSize(),op1->getOut()->getAddr(),replace);
for(int4 i=0;i<op1->numInput();++i) {
if (op1->getIn(i)->isConstant())
data.opSetInput(replace,data.newConstant(op1->getIn(i)->getSize(),op1->getIn(i)->getOffset()),i);
opSetInput(replace,newConstant(op1->getIn(i)->getSize(),op1->getIn(i)->getOffset()),i);
else
data.opSetInput(replace,op1->getIn(i),i);
opSetInput(replace,op1->getIn(i),i);
}
data.opInsertEnd(replace,common);
opInsertEnd(replace,common);
}
}
if (replace != op1) {
data.totalReplace(op1->getOut(),replace->getOut());
data.opDestroy(op1);
totalReplace(op1->getOut(),replace->getOut());
opDestroy(op1);
}
if (replace != op2) {
data.totalReplace(op2->getOut(),replace->getOut());
data.opDestroy(op2);
totalReplace(op2->getOut(),replace->getOut());
opDestroy(op2);
}
return replace;
}
@ -1329,10 +1364,9 @@ static bool compareCseHash(const pair<uintm,PcodeOp *> &a,const pair<uintm,Pcode
/// The hash serves as a primary test for duplicate calculations; if it doesn't match
/// the PcodeOps aren't common subexpressions. This method searches for hash matches
/// then does secondary testing and eliminates any redundancy it finds.
/// \param data is the function being modified
/// \param list is the list of (hash, PcodeOp) pairs
/// \param outlist will hold Varnodes produced by duplicate calculations
void cseEliminateList(Funcdata &data,vector< pair<uintm,PcodeOp *> > &list,vector<Varnode *> &outlist)
void Funcdata::cseEliminateList(vector< pair<uintm,PcodeOp *> > &list,vector<Varnode *> &outlist)
{
PcodeOp *op1,*op2,*resop;
@ -1350,9 +1384,9 @@ void cseEliminateList(Funcdata &data,vector< pair<uintm,PcodeOp *> > &list,vecto
if ((!op1->isDead())&&(!op2->isDead())&&op1->isCseMatch(op2)) {
Varnode *outvn1 = op1->getOut();
Varnode *outvn2 = op2->getOut();
if ((outvn1 == (Varnode *)0)||data.isHeritaged(outvn1)) {
if ((outvn2 == (Varnode *)0)||data.isHeritaged(outvn2)) {
resop = cseElimination(data,op1,op2);
if ((outvn1 == (Varnode *)0)||isHeritaged(outvn1)) {
if ((outvn2 == (Varnode *)0)||isHeritaged(outvn2)) {
resop = cseElimination(op1,op2);
outlist.push_back(resop->getOut());
}
}

View File

@ -204,7 +204,7 @@ int4 RuleSelectCse::applyOp(PcodeOp *op,Funcdata &data)
list.push_back(pair<uintm,PcodeOp *>(hash,otherop));
}
if (list.size()<=1) return 0;
cseEliminateList(data,list,vlist);
data.cseEliminateList(list,vlist);
if (vlist.empty()) return 0;
return 1;
}
@ -1048,7 +1048,7 @@ PcodeOp *RulePushMulti::findSubstitute(Varnode *in1,Varnode *in2,BlockBasic *bb,
Varnode *vn = op1->getIn(i);
if (vn->isConstant()) continue;
if (vn == op2->getIn(i)) // Find matching inputs to op1 and op2,
return cseFindInBlock(op1,vn,bb,earliest); // search for cse of op1 in bb
return Funcdata::cseFindInBlock(op1,vn,bb,earliest); // search for cse of op1 in bb
}
return (PcodeOp *)0;
@ -1087,7 +1087,7 @@ int4 RulePushMulti::applyOp(PcodeOp *op,Funcdata &data)
if (op1->code() == CPUI_SUBPIECE) return 0; // SUBPIECE is pulled not pushed
BlockBasic *bl = op->getParent();
PcodeOp *earliest = earliestUseInBlock(op->getOut(),bl);
PcodeOp *earliest = bl->earliestUse(op->getOut());
if (op1->code() == CPUI_COPY) { // Special case of MERGE of 2 shadowing varnodes
if (res==0) return 0;
PcodeOp *substitute = findSubstitute(buf1[0],buf2[0],bl,earliest);
@ -3036,13 +3036,13 @@ int4 RuleMultiCollapse::applyOp(PcodeOp *op,Funcdata &data)
copyr->clearMark();
op = copyr->getDef();
if (func_eq) { // We have only functional equality
PcodeOp *earliest = earliestUseInBlock(op->getOut(),op->getParent());
PcodeOp *earliest = op->getParent()->earliestUse(op->getOut());
newop = defcopyr->getDef(); // We must copy newop (defcopyr)
PcodeOp *substitute = (PcodeOp *)0;
for(int4 i=0;i<newop->numInput();++i) {
Varnode *invn = newop->getIn(i);
if (!invn->isConstant()) {
substitute = cseFindInBlock(newop,invn,op->getParent(),earliest); // Has newop already been copied in this block
substitute = Funcdata::cseFindInBlock(newop,invn,op->getParent(),earliest); // Has newop already been copied in this block
break;
}
}

View File

@ -1797,21 +1797,33 @@ bool SplitFlow::doTrace(void)
return true;
}
/// If \b pointer Varnode is written by an INT_ADD, PTRSUB, or PTRADD from a another pointer
/// to a structure or array, update \b pointer Varnode, \b baseOffset, and \b ptrType to this.
/// If \b pointer Varnode is written by a COPY, INT_ADD, PTRSUB, or PTRADD from another pointer to a
/// - structure
/// - array OR
/// - to an implied array with the given base type
///
/// then update \b pointer Varnode, \b baseOffset, and \b ptrType to this.
/// \param impliedBase if non-null is the allowed element data-type for an implied array
/// \return \b true if \b pointer was successfully updated
bool SplitDatatype::RootPointer::backUpPointer(Datatype *impliedBase)
{
if (!pointer->isWritten())
return false;
int4 off;
PcodeOp *addOp = pointer->getDef();
OpCode opc = addOp->code();
if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD)
return false;
Varnode *cvn = addOp->getIn(1);
if (!cvn->isConstant())
if (opc == CPUI_PTRSUB || opc == CPUI_INT_ADD || opc == CPUI_PTRADD) {
Varnode *cvn = addOp->getIn(1);
if (!cvn->isConstant())
return false;
off = (int4)cvn->getOffset();
}
else if (opc == CPUI_COPY)
off = 0;
else {
return false;
}
Varnode *tmpPointer = addOp->getIn(0);
Datatype *ct = tmpPointer->getTypeReadFacing(addOp);
if (ct->getMetatype() != TYPE_PTR)
@ -1819,11 +1831,10 @@ bool SplitDatatype::RootPointer::backUpPointer(Datatype *impliedBase)
Datatype *parent = ((TypePointer *)ct)->getPtrTo();
type_metatype meta = parent->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_ARRAY) {
if (opc != CPUI_PTRADD || parent != impliedBase)
if ((opc != CPUI_PTRADD && opc != CPUI_COPY) || parent != impliedBase)
return false;
}
ptrType = (TypePointer *)ct;
int4 off = (int4)cvn->getOffset();
if (opc == CPUI_PTRADD)
off *= (int4)addOp->getIn(2)->getOffset();
off = AddrSpace::addressToByteInt(off, ptrType->getWordSize());
@ -1832,10 +1843,11 @@ bool SplitDatatype::RootPointer::backUpPointer(Datatype *impliedBase)
return true;
}
/// The LOAD or STORE pointer Varnode is examined. If it is a pointer to the given data-type, the
/// root \b pointer is returned. If not, we try to recursively walk back through either PTRSUB or INT_ADD instructions,
/// until a pointer Varnode matching the data-type is found. Any accumulated offset, relative to the original
/// LOAD or STORE pointer is recorded in the \b baseOffset. If a matching pointer is not found, \b false is returned.
/// We search for a pointer to the specified data-type starting with the LOAD/STORE. If we don't immediately
/// find it, we back up one level (through a PTRSUB, PTRADD, or INT_ADD). If it isn't found after 1 hop,
/// \b false is returned. Once this pointer is found, we back up through any single path of nested TYPE_STRUCT
/// and TYPE_ARRAY offsets to establish the final root \b pointer, and \b true is returned. Any accumulated offset,
/// relative to the original LOAD or STORE pointer is recorded in the \b baseOffset.
/// \param op is the LOAD or STORE
/// \param valueType is the specific data-type to match
/// \return \b true if the root pointer is found
@ -1843,11 +1855,11 @@ bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType)
{
Datatype *impliedBase = (Datatype *)0;
if (valueType->getMetatype() == TYPE_PARTIALSTRUCT)
if (valueType->getMetatype() == TYPE_PARTIALSTRUCT) // Strip off partial to get containing struct or array
valueType = ((TypePartialStruct *)valueType)->getParent();
else if (valueType->getMetatype() == TYPE_ARRAY) {
if (valueType->getMetatype() == TYPE_ARRAY) { // If the data-type is an array
valueType = ((TypeArray *)valueType)->getBase();
impliedBase = valueType;
impliedBase = valueType; // we allow an implied array (pointer to element) as a match
}
loadStore = op;
baseOffset = 0;
@ -1864,6 +1876,7 @@ bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType)
if (ptrType->getPtrTo() != valueType)
return false;
}
// The required pointer is found. We try to back up to pointers to containing structures or arrays
for(int4 i=0;i<3;++i) {
if (pointer->isAddrTied() || pointer->loneDescend() == (PcodeOp *)0) break;
if (!backUpPointer(impliedBase))
@ -1872,6 +1885,19 @@ bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType)
return true;
}
/// Add a COPY op from the \b pointer Varnode to temporary register and make it the new root \b pointer.
/// This guarantees that the \b pointer Varnode will not be modified by subsequent STOREs and
/// can be implicit in the expressions.
/// \param data is the containing function
/// \param followOp is the point where the COPY should be inserted
void SplitDatatype::RootPointer::duplicateToTemp(Funcdata &data,PcodeOp *followOp)
{
Varnode *newRoot = data.buildCopyTemp(pointer, followOp);
newRoot->updateType(ptrType, false, false);
pointer = newRoot;
}
/// If the pointer Varnode is no longer used, recursively check and remove the op producing it,
/// which will be either an INT_ADD or PTRSUB, until the root \b pointer is reached or
/// a Varnode still being used is encountered.
@ -1920,8 +1946,9 @@ Datatype *SplitDatatype::getComponent(Datatype *ct,int4 offset,bool &isHole)
/// For the given data-type, taking into account configuration options, return:
/// - -1 for not splittable
/// - 0 for data-type that needs to be split
/// - 1 for data-type that can be split multiple ways
/// - 0 for struct based data-type that needs to be split
/// - 1 for array based data-type that needs to be split
/// - 2 for primitive data-type that can be split multiple ways
/// \param ct is the given data-type
/// \return the categorization
int4 SplitDatatype::categorizeDatatype(Datatype *ct)
@ -1933,18 +1960,18 @@ int4 SplitDatatype::categorizeDatatype(Datatype *ct)
if (!splitArrays) break;
subType = ((TypeArray *)ct)->getBase();
if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1)
return 0;
return 1;
else
return 1; // unknown1 array does not need splitting and acts as (large) primitive
return 2; // unknown1 array does not need splitting and acts as (large) primitive
case TYPE_PARTIALSTRUCT:
subType = ((TypePartialStruct *)ct)->getParent();
if (subType->getMetatype() == TYPE_ARRAY) {
if (!splitArrays) break;
subType = ((TypeArray *)subType)->getBase();
if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1)
return 0;
return 1;
else
return 1; // unknown1 array does not need splitting and acts as (large) primitive
return 2; // unknown1 array does not need splitting and acts as (large) primitive
}
else if (subType->getMetatype() == TYPE_STRUCT) {
if (!splitStructures) break;
@ -1959,7 +1986,7 @@ int4 SplitDatatype::categorizeDatatype(Datatype *ct)
case TYPE_INT:
case TYPE_UINT:
case TYPE_UNKNOWN:
return 1;
return 2;
default:
break;
}
@ -1985,22 +2012,21 @@ bool SplitDatatype::testDatatypeCompatibility(Datatype *inBase,Datatype *outBase
int4 outCategory = categorizeDatatype(outBase);
if (outCategory < 0)
return false;
if (outCategory != 0 && inCategory != 0)
if (outCategory == 2 && inCategory == 2)
return false;
if (!inConstant && inBase == outBase && inBase->getMetatype() == TYPE_STRUCT)
return false; // Don't split a whole structure unless it is getting initialized from a constant
if (isLoadStore && outCategory == 1 && inBase->getMetatype() == TYPE_ARRAY)
if (isLoadStore && outCategory == 2 && inCategory == 1)
return false; // Don't split array pointer writing into primitive
if (isLoadStore && inCategory == 1 && !inConstant && outBase->getMetatype() == TYPE_ARRAY)
if (isLoadStore && inCategory == 2 && !inConstant && outCategory == 1)
return false; // Don't split primitive into an array pointer, TODO: We could check if primitive is defined by PIECE
if (isLoadStore && inCategory == 0 && outCategory == 0 && !inConstant &&
inBase->getMetatype() == TYPE_ARRAY && outBase->getMetatype() == TYPE_ARRAY)
if (isLoadStore && inCategory == 1 && outCategory == 1 && !inConstant)
return false; // Don't split copies between arrays
bool inHole;
bool outHole;
int4 curOff = 0;
int4 sizeLeft = inBase->getSize();
if (inCategory == 1) {
if (inCategory == 2) { // If input is primitive
while(sizeLeft > 0) {
Datatype *curOut = getComponent(outBase,curOff,outHole);
if (curOut == (Datatype *)0) return false;
@ -2017,7 +2043,7 @@ bool SplitDatatype::testDatatypeCompatibility(Datatype *inBase,Datatype *outBase
}
}
}
else if (outCategory == 1) {
else if (outCategory == 2) { // If output is primitive
while(sizeLeft > 0) {
Datatype *curIn = getComponent(inBase,curOff,inHole);
if (curIn == (Datatype *)0) return false;
@ -2555,6 +2581,8 @@ bool SplitDatatype::splitStore(PcodeOp *storeOp,Datatype *outType)
buildInSubpieces(inVn,storeOp,inVarnodes);
vector<Varnode *> storePtrs;
if (storeRoot.pointer->isAddrTied())
storeRoot.duplicateToTemp(data, storeOp);
buildPointers(storeRoot.pointer, storeRoot.ptrType, storeRoot.baseOffset, storeOp, storePtrs, false);
// Preserve original STORE object, so that INDIRECT references are still valid
// but convert it into the first of the smaller STOREs

View File

@ -178,6 +178,7 @@ class SplitDatatype {
bool backUpPointer(Datatype *impliedBase); ///< Follow flow of \b pointer back thru INT_ADD or PTRSUB
public:
bool find(PcodeOp *op,Datatype *valueType); ///< Locate root pointer for underlying LOAD or STORE
void duplicateToTemp(Funcdata &data,PcodeOp *followOp); ///< COPY the root varnode into a temp register
void freePointerChain(Funcdata &data); ///< Remove unused pointer calculations
};
Funcdata &data; ///< The containing function

View File

@ -2282,6 +2282,19 @@ TypePartialStruct::TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype
offset = off;
}
/// If the parent is an array, return the element data-type. Otherwise return the \b stripped data-type.
/// \return the array element data-type or the \b stripped data-type.
Datatype *TypePartialStruct::getComponentForPtr(void) const
{
if (container->getMetatype() == TYPE_ARRAY) {
Datatype *eltype = ((TypeArray *)container)->getBase();
if (eltype->getMetatype() != TYPE_UNKNOWN && (offset % eltype->getAlignSize()) == 0)
return eltype;
}
return stripped;
}
void TypePartialStruct::printRaw(ostream &s) const
{
@ -3780,21 +3793,6 @@ TypePointer *TypeFactory::getTypePointer(int4 s,Datatype *pt,uint4 ws,const stri
return res;
}
/// Don't create more than a depth of 1, i.e. ptr->ptr
/// \param s is the size of the pointer
/// \param pt is the pointed-to data-type
/// \param ws is the wordsize associated with the pointer
/// \return the TypePointer object
TypePointer *TypeFactory::getTypePointerNoDepth(int4 s,Datatype *pt,uint4 ws)
{
if (pt->getMetatype()==TYPE_PTR) {
// Make sure that at least we return a pointer to something the size of -pt-
pt = getBase(pt->getSize(),TYPE_UNKNOWN); // Pass back unknown *
}
return getTypePointer(s,pt,ws);
}
/// \param as is the number of elements in the desired array
/// \param ao is the data-type of the array element
/// \return the TypeArray object

View File

@ -564,6 +564,7 @@ public:
TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype *strip); ///< Constructor
int4 getOffset(void) const { return offset; } ///< Get the byte offset into the containing data-type
Datatype *getParent(void) const { return container; } ///< Get the data-type containing \b this piece
Datatype *getComponentForPtr(void) const; ///< Get (initial) component of array represented by \b this
virtual void printRaw(ostream &s) const;
virtual Datatype *getSubType(int8 off,int8 *newoff) const;
virtual int4 getHoleSize(int4 off) const;
@ -792,7 +793,6 @@ public:
TypePointer *getTypePointerStripArray(int4 s,Datatype *pt,uint4 ws); ///< Construct a pointer data-type, stripping an ARRAY level
TypePointer *getTypePointer(int4 s,Datatype *pt,uint4 ws); ///< Construct an absolute pointer data-type
TypePointer *getTypePointer(int4 s,Datatype *pt,uint4 ws,const string &n); ///< Construct a named pointer data-type
TypePointer *getTypePointerNoDepth(int4 s,Datatype *pt,uint4 ws); ///< Construct a depth limited pointer data-type
TypeArray *getTypeArray(int4 as,Datatype *ao); ///< Construct an array data-type
TypeStruct *getTypeStruct(const string &n); ///< Create an (empty) structure
TypePartialStruct *getTypePartialStruct(Datatype *contain,int4 off,int4 sz); ///< Create a partial structure

View File

@ -175,6 +175,27 @@ OpCode TypeOp::floatSignManipulation(PcodeOp *op)
return CPUI_MAX;
}
/// \brief Propagate a dereferenced data-type up to its pointer data-type
///
/// Don't create more than a depth of 1, i.e. ptr->ptr
/// \param pt is the pointed-to data-type
/// \param sz is the size of the pointer
/// \param wordsz is the wordsize associated with the pointer
/// \return the TypePointer object
Datatype *TypeOp::propagateToPointer(TypeFactory *t,Datatype *dt,int4 sz,int4 wordsz)
{
type_metatype meta = dt->getMetatype();
if (meta==TYPE_PTR) {
// Make sure that at least we return a pointer to something the size of -pt-
dt = t->getBase(dt->getSize(),TYPE_UNKNOWN); // Pass back unknown *
}
else if (meta == TYPE_PARTIALSTRUCT) {
dt = ((TypePartialStruct *)dt)->getComponentForPtr();
}
return t->getTypePointer(sz,dt,wordsz);
}
/// \param t is the TypeFactory used to construct data-types
/// \param opc is the op-code value the new object will represent
/// \param n is the display name that will represent the op-code
@ -440,7 +461,7 @@ Datatype *TypeOpLoad::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,
Datatype *newtype;
if (inslot == -1) { // Propagating output to input (value to ptr)
AddrSpace *spc = op->getIn(0)->getSpaceFromConst();
newtype = tlst->getTypePointerNoDepth(outvn->getSize(),alttype,spc->getWordSize());
newtype = propagateToPointer(tlst,alttype,outvn->getSize(),spc->getWordSize());
}
else if (alttype->getMetatype()==TYPE_PTR) {
newtype = ((TypePointer *)alttype)->getPtrTo();
@ -515,7 +536,7 @@ Datatype *TypeOpStore::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn
Datatype *newtype;
if (inslot==2) { // Propagating value to ptr
AddrSpace *spc = op->getIn(0)->getSpaceFromConst();
newtype = tlst->getTypePointerNoDepth(outvn->getSize(),alttype,spc->getWordSize());
newtype = propagateToPointer(tlst,alttype,outvn->getSize(),spc->getWordSize());
}
else if (alttype->getMetatype()==TYPE_PTR) {
newtype = ((TypePointer *)alttype)->getPtrTo();

View File

@ -180,6 +180,7 @@ public:
/// \brief Return the floating-point operation associated with the \e sign bit manipulation by the given PcodeOp
static OpCode floatSignManipulation(PcodeOp *op);
static Datatype *propagateToPointer(TypeFactory *t,Datatype *dt,int4 sz,int4 wordsz);
};
// Major classes of operations