GP-2563 SplitDatatype transformer

This commit is contained in:
caheckman 2023-04-03 19:47:17 -04:00
parent 269ea1ae7a
commit bdf1314b4f
28 changed files with 1699 additions and 161 deletions

View File

@ -44,6 +44,7 @@ src/decompile/datatests/noforloop_iterused.xml||GHIDRA||||END|
src/decompile/datatests/offsetarray.xml||GHIDRA||||END|
src/decompile/datatests/packstructaccess.xml||GHIDRA||||END|
src/decompile/datatests/partialmerge.xml||GHIDRA||||END|
src/decompile/datatests/partialsplit.xml||GHIDRA||||END|
src/decompile/datatests/partialunion.xml||GHIDRA||||END|
src/decompile/datatests/piecestruct.xml||GHIDRA||||END|
src/decompile/datatests/pointercmp.xml||GHIDRA||||END|

View File

@ -1401,7 +1401,9 @@ void Architecture::resetDefaultsInternal(void)
infer_pointers = true;
analyze_for_loops = true;
readonlypropagate = false;
alias_block_level = 2; // Block structs and arrays by default
alias_block_level = 2; // Block structs and arrays by default, but not more primitive data-types
split_datatype_config = OptionSplitDatatypes::option_struct | OptionSplitDatatypes::option_array
| OptionSplitDatatypes::option_pointer;
}
/// Reset options that can be modified by the OptionDatabase. This includes

View File

@ -181,6 +181,7 @@ public:
uint4 flowoptions; ///< options passed to flow following engine
uint4 max_instructions; ///< Maximum instructions that can be processed in one function
int4 alias_block_level; ///< Aliases blocked by 0=none, 1=struct, 2=array, 3=all
uint4 split_datatype_config; ///< Toggle for data-types splitting: Bit 0=structs, 1=arrays, 2=pointers
vector<Rule *> extra_pool_rules; ///< Extra rules that go in the main pool (cpu specific, experimental)
Database *symboltab; ///< Memory map of global variables and functions

View File

@ -2868,8 +2868,7 @@ int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref)
else if (useOp->code() == CPUI_PIECE) {
Varnode *rootVn = PieceNode::findRoot(vn);
if (vn == rootVn) return -1;
Datatype *ct = rootVn->getStructuredType();
if (ct != (Datatype *)0) {
if (rootVn->getDef()->isPartialRoot()) {
// Getting PIECEd into a structured thing. Unless vn is a leaf, it should be implicit
if (def->code() != CPUI_PIECE) return -1;
if (vn->loneDescend() == (PcodeOp *)0) return -1;
@ -5205,7 +5204,7 @@ void ActionDatabase::buildDefaultGroups(void)
const char *members[] = { "base", "protorecovery", "protorecovery_a", "deindirect", "localrecovery",
"deadcode", "typerecovery", "stackptrflow",
"blockrecovery", "stackvars", "deadcontrolflow", "switchnorm",
"cleanup", "merge", "dynamic", "casts", "analysis",
"cleanup", "splitcopy", "splitpointer", "merge", "dynamic", "casts", "analysis",
"fixateglobals", "fixateproto",
"segment", "returnsplit", "nodejoin", "doubleload", "doubleprecis",
"unreachable", "subvar", "floatprecision",
@ -5472,6 +5471,9 @@ void ActionDatabase::universalAction(Architecture *conf)
actcleanup->addRule( new RulePtrsubCharConstant("cleanup") );
actcleanup->addRule( new RuleExtensionPush("cleanup") );
actcleanup->addRule( new RulePieceStructure("cleanup") );
actcleanup->addRule( new RuleSplitCopy("splitcopy") );
actcleanup->addRule( new RuleSplitLoad("splitpointer") );
actcleanup->addRule( new RuleSplitStore("splitpointer") );
}
act->addAction( actcleanup );

View File

@ -151,25 +151,14 @@ bool SymbolEntry::updateType(Varnode *vn) const
Datatype *SymbolEntry::getSizedType(const Address &inaddr,int4 sz) const
{
uintb off;
int4 off;
if (isDynamic())
off = offset;
else
off = (inaddr.getOffset() - addr.getOffset()) + offset;
off = (int4)(inaddr.getOffset() - addr.getOffset()) + offset;
Datatype *cur = symbol->getType();
do {
if (offset == 0 && cur->getSize() == sz)
return cur;
cur = cur->getSubType(off,&off);
} while(cur != (Datatype *)0);
// else {
// This case occurs if the varnode is a "partial type" of some sort
// This PROBABLY means the varnode shouldn't be considered addrtied
// I.e. it shouldn't be considered part of the same variable as symbol
// }
return (Datatype *)0;
return symbol->getScope()->getArch()->types->getExactPiece(cur, off, sz);
}
/// Give a contained one-line description of \b this storage, suitable for a debug console

View File

@ -888,6 +888,21 @@ bool Funcdata::setUnionField(const Datatype *parent,const PcodeOp *op,int4 slot,
}
(*res.first).second = resolve;
}
if (op->code() == CPUI_MULTIEQUAL && slot >= 0) {
// Data-type propagation doesn't happen between MULTIEQUAL input slots holding the same Varnode
// So if this is a MULTIEQUAL, copy resolution to any other input slots holding the same Varnode
const Varnode *vn = op->getIn(slot); // The Varnode being directly set
for(int4 i=0;i<op->numInput();++i) {
if (i == slot) continue;
if (op->getIn(i) != vn) continue; // Check that different input slot holds same Varnode
ResolveEdge dupedge(parent,op,i);
res = unionMap.emplace(dupedge,resolve);
if (!res.second) {
if (!(*res.first).second.isLocked())
(*res.first).second = resolve;
}
}
}
return true;
}

View File

@ -1160,6 +1160,6 @@ ElementId ELEM_VAL = ElementId("val",8);
ElementId ELEM_VALUE = ElementId("value",9);
ElementId ELEM_VOID = ElementId("void",10);
ElementId ELEM_UNKNOWN = ElementId("XMLunknown",270); // Number serves as next open index
ElementId ELEM_UNKNOWN = ElementId("XMLunknown",271); // Number serves as next open index
} // End namespace ghidra

View File

@ -639,6 +639,7 @@ void Merge::trimOpOutput(PcodeOp *op)
vn = op->getOut();
Datatype *ct = vn->getType();
copyop = data.newOp(1,op->getAddr());
data.opSetOpcode(copyop,CPUI_COPY);
if (ct->needsResolution()) {
int4 fieldNum = data.inheritResolution(ct, copyop, -1, op, -1);
data.forceFacingType(ct, fieldNum, copyop, 0);
@ -647,7 +648,6 @@ void Merge::trimOpOutput(PcodeOp *op)
}
uniq = data.newUnique(vn->getSize(),ct);
data.opSetOutput(op,uniq); // Output of op is now stubby uniq
data.opSetOpcode(copyop,CPUI_COPY);
data.opSetOutput(copyop,vn); // Original output is bumped forward slightly
data.opSetInput(copyop,uniq,0);
data.opInsertAfter(copyop,afterop);
@ -1385,15 +1385,24 @@ void Merge::groupPartialRoot(Varnode *vn)
}
PieceNode::gatherPieces(pieces, vn, vn->getDef(), baseOffset);
bool throwOut = false;
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
// Make sure each node is still marked and hasn't merged with anything else
if (!nodeVn->isProtoPartial()) return;
if (nodeVn->getHigh()->numInstances() != 1) return;
if (!nodeVn->isProtoPartial() || nodeVn->getHigh()->numInstances() != 1) {
throwOut = true;
break;
}
}
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
nodeVn->getHigh()->groupWith(pieces[i].getTypeOffset() - baseOffset,high);
if (throwOut) {
for(int4 i=0;i<pieces.size();++i)
pieces[i].getVarnode()->clearProtoPartial();
}
else {
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
nodeVn->getHigh()->groupWith(pieces[i].getTypeOffset() - baseOffset,high);
}
}
}

View File

@ -54,6 +54,7 @@ ElementId ELEM_PARAM3 = ElementId("param3",204);
ElementId ELEM_PROTOEVAL = ElementId("protoeval",205);
ElementId ELEM_SETACTION = ElementId("setaction",206);
ElementId ELEM_SETLANGUAGE = ElementId("setlanguage",207);
ElementId ELEM_SPLITDATATYPE = ElementId("splitdatatype",270);
ElementId ELEM_STRUCTALIGN = ElementId("structalign",208);
ElementId ELEM_TOGGLERULE = ElementId("togglerule",209);
ElementId ELEM_WARNING = ElementId("warning",210);
@ -124,6 +125,7 @@ OptionDatabase::OptionDatabase(Architecture *g)
registerOption(new OptionAliasBlock());
registerOption(new OptionMaxInstruction());
registerOption(new OptionNamespaceStrategy());
registerOption(new OptionSplitDatatypes());
}
OptionDatabase::~OptionDatabase(void)
@ -920,4 +922,45 @@ string OptionNamespaceStrategy::apply(Architecture *glb,const string &p1,const s
return "Namespace strategy set";
}
/// Possible value are:
/// - (empty string) = 0
/// - "struct" = 1
/// - "array" = 2
/// - "pointer" = 4
///
/// \param val is the option string
/// \return the corresponding configuration bit
uint4 OptionSplitDatatypes::getOptionBit(const string &val)
{
if (val.size() == 0) return 0;
if (val == "struct") return option_struct;
if (val == "array") return option_array;
if (val == "pointer") return option_pointer;
throw LowlevelError("Unknown data-type split option: "+val);
}
string OptionSplitDatatypes::apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const
{
uint4 oldConfig = glb->split_datatype_config;
glb->split_datatype_config = getOptionBit(p1);
glb->split_datatype_config |= getOptionBit(p2);
glb->split_datatype_config |= getOptionBit(p3);
if ((glb->split_datatype_config & (option_struct | option_array)) == 0) {
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitcopy",false);
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitpointer",false);
}
else {
bool pointers = (glb->split_datatype_config & option_pointer) != 0;
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitcopy",true);
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitpointer",pointers);
}
if (oldConfig == glb->split_datatype_config)
return "Split data-type configuration unchanged";
return "Split data-type configuration set";
}
} // End namespace ghidra

View File

@ -60,6 +60,7 @@ extern ElementId ELEM_PARAM3; ///< Marshaling element \<param3>
extern ElementId ELEM_PROTOEVAL; ///< Marshaling element \<protoeval>
extern ElementId ELEM_SETACTION; ///< Marshaling element \<setaction>
extern ElementId ELEM_SETLANGUAGE; ///< Marshaling element \<setlanguage>
extern ElementId ELEM_SPLITDATATYPE; ///< Marshaling element \<splitdatatype>
extern ElementId ELEM_STRUCTALIGN; ///< Marshaling element \<structalign>
extern ElementId ELEM_TOGGLERULE; ///< Marshaling element \<togglerule>
extern ElementId ELEM_WARNING; ///< Marshaling element \<warning>
@ -322,5 +323,18 @@ public:
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
class OptionSplitDatatypes : public ArchOption {
public:
enum {
option_struct = 1, ///< Split combined structure fields
option_array = 2, ///< Split combined array elements
option_pointer = 4 ///< Split combined LOAD and STORE operations
};
static uint4 getOptionBit(const string &val); ///< Translate option string to a configuration bit
public:
OptionSplitDatatypes(void) { name = "splitdatatype"; } ///< Constructor
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
} // End namespace ghidra
#endif

View File

@ -7119,6 +7119,86 @@ int4 RulePieceStructure::applyOp(PcodeOp *op,Funcdata &data)
return 1;
}
/// \class RuleSplitCopy
/// \brief Split COPY ops based on TypePartialStruct
///
/// If more than one logical component of a structure or array is copied at once,
/// rewrite the COPY operator as multiple COPYs.
void RuleSplitCopy::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_COPY);
}
int4 RuleSplitCopy::applyOp(PcodeOp *op,Funcdata &data)
{
Datatype *inType = op->getIn(0)->getTypeReadFacing(op);
Datatype *outType = op->getOut()->getTypeDefFacing();
type_metatype metain = inType->getMetatype();
type_metatype metaout = outType->getMetatype();
if (metain != TYPE_PARTIALSTRUCT && metaout != TYPE_PARTIALSTRUCT &&
metain != TYPE_ARRAY && metaout != TYPE_ARRAY &&
metain != TYPE_STRUCT && metaout != TYPE_STRUCT)
return false;
SplitDatatype splitter(data);
if (splitter.splitCopy(op, inType, outType))
return 1;
return 0;
}
/// \class RuleSplitLoad
/// \brief Split LOAD ops based on TypePartialStruct
///
/// If more than one logical component of a structure or array is loaded at once,
/// rewrite the LOAD operator as multiple LOADs.
void RuleSplitLoad::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_LOAD);
}
int4 RuleSplitLoad::applyOp(PcodeOp *op,Funcdata &data)
{
Datatype *inType = SplitDatatype::getValueDatatype(op, op->getOut()->getSize(), data.getArch()->types);
if (inType == (Datatype *)0)
return 0;
type_metatype metain = inType->getMetatype();
if (metain != TYPE_STRUCT && metain != TYPE_ARRAY && metain != TYPE_PARTIALSTRUCT)
return 0;
SplitDatatype splitter(data);
if (splitter.splitLoad(op, inType))
return 1;
return 0;
}
/// \class RuleSplitStore
/// \brief Split STORE ops based on TypePartialStruct
///
/// If more than one logical component of a structure or array is stored at once,
/// rewrite the STORE operator as multiple STOREs.
void RuleSplitStore::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_STORE);
}
int4 RuleSplitStore::applyOp(PcodeOp *op,Funcdata &data)
{
Datatype *outType = SplitDatatype::getValueDatatype(op, op->getIn(2)->getSize(), data.getArch()->types);
if (outType == (Datatype *)0)
return 0;
type_metatype metain = outType->getMetatype();
if (metain != TYPE_STRUCT && metain != TYPE_ARRAY && metain != TYPE_PARTIALSTRUCT)
return 0;
SplitDatatype splitter(data);
if (splitter.splitStore(op, outType))
return 1;
return 0;
}
/// \class RuleSubNormal
/// \brief Pull-back SUBPIECE through INT_RIGHT and INT_SRIGHT
///

View File

@ -1174,6 +1174,39 @@ public:
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSplitCopy : public Rule {
public:
RuleSplitCopy(const string &g) : Rule( g, 0, "splitcopy") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleSplitCopy(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSplitLoad : public Rule {
public:
RuleSplitLoad(const string &g) : Rule( g, 0, "splitload") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleSplitLoad(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSplitStore : public Rule {
public:
RuleSplitStore(const string &g) : Rule( g, 0, "splitstore") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleSplitStore(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSubNormal : public Rule {
public:
RuleSubNormal(const string &g) : Rule( g, 0, "subnormal") {} ///< Constructor

View File

@ -99,7 +99,7 @@ SubvariableFlow::ReplaceVarnode *SubvariableFlow::setReplacement(Varnode *vn,uin
if ((!aggressive)&& vn->isInput()) return (ReplaceVarnode *)0; // Cannot assume input is sign extended
if (vn->isPersist()) return (ReplaceVarnode *)0;
}
if (vn->isTypeLock()) {
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
if (vn->getType()->getSize() != flowsize)
return (ReplaceVarnode *)0;
}
@ -110,7 +110,7 @@ SubvariableFlow::ReplaceVarnode *SubvariableFlow::setReplacement(Varnode *vn,uin
// are packed into a single location, i.e. always consider it a single variable
if ((!aggressive)&&((vn->getConsume()&~mask)!=0)) // If there is any use of value outside of the logical variable
return (ReplaceVarnode *)0; // This probably means the whole thing is a variable, i.e. quit
if (vn->isTypeLock()) {
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
int4 sz = vn->getType()->getSize();
if (sz != flowsize)
return (ReplaceVarnode *)0;
@ -1470,7 +1470,7 @@ TransformVar *SplitFlow::setReplacement(Varnode *vn)
return res;
}
if (vn->isTypeLock())
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT)
return (TransformVar *)0;
if (vn->isInput())
return (TransformVar *)0; // Right now we can't split inputs
@ -1744,6 +1744,805 @@ bool SplitFlow::doTrace(void)
return true;
}
/// If \b pointer Varnode is written by an INT_ADD, PTRSUB, or PTRADD from a another pointer
/// to a structure or array, update \b pointer Varnode, \b baseOffset, and \b ptrType to this.
/// \return \b true if \b pointer was successfully updated
bool SplitDatatype::RootPointer::backUpPointer(void)
{
if (!pointer->isWritten())
return false;
PcodeOp *addOp = pointer->getDef();
OpCode opc = addOp->code();
if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD)
return false;
Varnode *cvn = addOp->getIn(1);
if (!cvn->isConstant())
return false;
Varnode *tmpPointer = addOp->getIn(0);
Datatype *ct = tmpPointer->getTypeReadFacing(addOp);
if (ct->getMetatype() != TYPE_PTR)
return false;
Datatype *parent = ((TypePointer *)ct)->getPtrTo();
type_metatype meta = parent->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_ARRAY)
return false;
ptrType = (TypePointer *)ct;
int4 off = (int4)cvn->getOffset();
if (opc == CPUI_PTRADD)
off *= (int4)addOp->getIn(2)->getOffset();
off = AddrSpace::addressToByteInt(off, ptrType->getWordSize());
baseOffset += off;
pointer = tmpPointer;
return true;
}
/// The LOAD or STORE pointer Varnode is examined. If it is a pointer to the given data-type, the
/// root \b pointer is returned. If not, we try to recursively walk back through either PTRSUB or INT_ADD instructions,
/// until a pointer Varnode matching the data-type is found. Any accumulated offset, relative to the original
/// LOAD or STORE pointer is recorded in the \b baseOffset. If a matching pointer is not found, \b false is returned.
/// \param op is the LOAD or STORE
/// \param valueType is the specific data-type to match
/// \return \b true if the root pointer is found
bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType)
{
if (valueType->getMetatype() == TYPE_PARTIALSTRUCT)
valueType = ((TypePartialStruct *)valueType)->getParent();
loadStore = op;
baseOffset = 0;
firstPointer = pointer = op->getIn(1);
Datatype *ct = pointer->getTypeReadFacing(op);
if (ct->getMetatype() != TYPE_PTR)
return false;
ptrType = (TypePointer *)ct;
if (ptrType->getPtrTo() != valueType) {
if (!backUpPointer())
return false;
if (ptrType->getPtrTo() != valueType)
return false;
}
for(int4 i=0;i<2;++i) {
if (pointer->isAddrTied() || pointer->loneDescend() == (PcodeOp *)0) break;
if (!backUpPointer())
break;
}
return true;
}
/// If the pointer Varnode is no longer used, recursively check and remove the op producing it,
/// which will be either an INT_ADD or PTRSUB, until the root \b pointer is reached or
/// a Varnode still being used is encountered.
/// \param data is the containing function
void SplitDatatype::RootPointer::freePointerChain(Funcdata &data)
{
while (firstPointer != pointer && !firstPointer->isAddrTied() && firstPointer->hasNoDescend()) {
PcodeOp *tmpOp = firstPointer->getDef();
firstPointer = tmpOp->getIn(0);
data.opDestroy(tmpOp);
}
}
/// \brief Obtain the component of the given data-type at the specified offset
///
/// The data-type must be a composite of some form. This method finds a component data-type
/// starting exactly at the offset, if it exists. The component may be nested more than 1 level deep.
/// If the given data-type is of composite form and has no component defined at the specified offset,
/// an undefined data-type matching the size of the \e hole is returned and \b isHole is set to \b true.
/// \param ct is the given data-type
/// \param offset is the specified offset
/// \param isHole passes back whether a hole in the composite was encountered
/// \return the component data-type at the offset or null, if no such component exists
Datatype *SplitDatatype::getComponent(Datatype *ct,int4 offset,bool &isHole)
{
isHole = false;
Datatype *curType = ct;
uintb curOff = offset;
do {
curType = curType->getSubType(curOff,&curOff);
if (curType == (Datatype *)0) {
int4 hole = ct->getHoleSize(offset);
if (hole > 0) {
if (hole > 8)
hole = 8;
isHole = true;
return types->getBase(hole, TYPE_UNKNOWN);
}
return curType;
}
} while(curOff != 0 || curType->getMetatype() == TYPE_ARRAY);
return curType;
}
/// For the given data-type, taking into account configuration options, return:
/// - -1 for not splittable
/// - 0 for data-type that needs to be split
/// - 1 for data-type that can be split multiple ways
/// \param ct is the given data-type
/// \return the categorization
int4 SplitDatatype::categorizeDatatype(Datatype *ct)
{
Datatype *subType;
switch(ct->getMetatype()) {
case TYPE_ARRAY:
if (!splitArrays) break;
subType = ((TypeArray *)ct)->getBase();
if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1)
return 0;
else
return 1; // unknown1 array does not need splitting and acts as (large) primitive
case TYPE_PARTIALSTRUCT:
subType = ((TypePartialStruct *)ct)->getParent();
if (subType->getMetatype() == TYPE_ARRAY) {
if (!splitArrays) break;
subType = ((TypeArray *)subType)->getBase();
if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1)
return 0;
else
return 1; // unknown1 array does not need splitting and acts as (large) primitive
}
else if (subType->getMetatype() == TYPE_STRUCT) {
if (!splitStructures) break;
return 0;
}
break;
case TYPE_STRUCT:
if (!splitStructures) break;
if (ct->numDepend() > 1)
return 0;
break;
case TYPE_INT:
case TYPE_UINT:
case TYPE_UNKNOWN:
return 1;
default:
break;
}
return -1;
}
/// \brief Can the two given data-types be mutually split into matching logical components
///
/// Test if the data-types have components with matching size and offset. If so, the component
/// data-types and offsets are saved to the \b pieces array and \b true is returned.
/// At least one of the data-types must be a partial data-type, but the other may be a
/// TYPE_UNKNOWN, which this method assumes can be split into components of arbitrary size.
/// \param inBase is the data-type coming into the operation
/// \param outBase is the data-type coming out of the operation
/// \param inConstant is \b true if the incoming data-type labels a constant
/// \return \b true if the data-types have compatible components, \b false otherwise
bool SplitDatatype::testDatatypeCompatibility(Datatype *inBase,Datatype *outBase,bool inConstant)
{
int4 inCategory = categorizeDatatype(inBase);
if (inCategory < 0)
return false;
int4 outCategory = categorizeDatatype(outBase);
if (outCategory < 0)
return false;
if (outCategory != 0 && inCategory != 0)
return false;
if (!inConstant && inBase == outBase && inBase->getMetatype() == TYPE_STRUCT)
return false; // Don't split a whole structure unless it is getting initialized from a constant
bool inHole;
bool outHole;
int4 curOff = 0;
int4 sizeLeft = inBase->getSize();
if (inCategory == 1) {
while(sizeLeft > 0) {
Datatype *curOut = getComponent(outBase,curOff,outHole);
if (curOut == (Datatype *)0) return false;
// Throw away primitive data-type if it is a constant
Datatype *curIn = inConstant ? curOut : types->getBase(curOut->getSize(), TYPE_UNKNOWN);
dataTypePieces.emplace_back(curIn,curOut,curOff);
sizeLeft -= curOut->getSize();
curOff += curOut->getSize();
if (outHole) {
if (dataTypePieces.size() == 1)
return false; // Initial offset into structure is at a hole
if (sizeLeft == 0 && dataTypePieces.size() == 2)
return false; // Two pieces, one is a hole. Likely padding.
}
}
}
else if (outCategory == 1) {
while(sizeLeft > 0) {
Datatype *curIn = getComponent(inBase,curOff,inHole);
if (curIn == (Datatype *)0) return false;
Datatype *curOut = types->getBase(curIn->getSize(), TYPE_UNKNOWN);
dataTypePieces.emplace_back(curIn,curOut,curOff);
sizeLeft -= curIn->getSize();
curOff += curIn->getSize();
if (inHole) {
if (dataTypePieces.size() == 1)
return false; // Initial offset into structure is at a hole
if (sizeLeft == 0 && dataTypePieces.size() == 2)
return false; // Two pieces, one is a hole. Likely padding.
}
}
}
else { // Both in and out data-types have components
while(sizeLeft > 0) {
Datatype *curIn = getComponent(inBase,curOff,inHole);
if (curIn == (Datatype *)0) return false;
Datatype *curOut = getComponent(outBase,curOff,outHole);
if (curOut == (Datatype *)0) return false;
while(curIn->getSize() != curOut->getSize()) {
if (curIn->getSize() > curOut->getSize()) {
if (inHole)
curIn = types->getBase(curOut->getSize(), TYPE_UNKNOWN);
else
curIn = getComponent(curIn,0,inHole);
if (curIn == (Datatype *)0) return false;
}
else {
if (outHole)
curOut = types->getBase(curIn->getSize(), TYPE_UNKNOWN);
else
curOut = getComponent(curOut,0,outHole);
if (curOut == (Datatype *)0) return false;
}
}
dataTypePieces.emplace_back(curIn,curOut,curOff);
sizeLeft -= curIn->getSize();
curOff += curIn->getSize();
}
}
return dataTypePieces.size() > 1;
}
/// \brief Test specific constraints for splitting the given COPY operation into pieces
///
/// Don't split function inputs. Don't split hidden COPYs.
/// \return \b true if the split can proceed
bool SplitDatatype::testCopyConstraints(PcodeOp *copyOp)
{
Varnode *inVn = copyOp->getIn(0);
if (inVn->isInput()) return false;
if (inVn->isAddrTied()) {
Varnode *outVn = copyOp->getOut();
if (outVn->isAddrTied() && outVn->getAddr() == inVn->getAddr())
return false;
}
else if (inVn->isWritten() && inVn->getDef()->code() == CPUI_LOAD) {
if (inVn->loneDescend() == copyOp)
return false; // This situation is handled by splitCopy()
}
return true;
}
/// \brief If the given Varnode is an extended precision constant, create split constants
///
/// Look for ZEXT(#c) and CONCAT(#c1,#c2) forms. Try to split into single precision Varnodes.
/// \param vn is the given Varnode
/// \param inVarnodes will contain the split constant Varnodes
/// \return \b true if the Varnode is an extended precision constant and the split is successful
bool SplitDatatype::generateConstants(Varnode *vn,vector<Varnode *> &inVarnodes)
{
if (vn->loneDescend() == (PcodeOp *)0) return false;
if (!vn->isWritten()) return false;
PcodeOp *op = vn->getDef();
OpCode opc = op->code();
if (opc == CPUI_INT_ZEXT) {
if (!op->getIn(0)->isConstant()) return false;
}
else if (opc == CPUI_PIECE) {
if (!op->getIn(0)->isConstant() || !op->getIn(1)->isConstant())
return false;
}
else
return false;
uintb lo,hi;
int4 losize;
int4 fullsize = vn->getSize();
bool isBigEndian = vn->getSpace()->isBigEndian();
if (opc == CPUI_INT_ZEXT) {
hi = 0;
lo = op->getIn(0)->getOffset();
losize = op->getIn(0)->getSize();
}
else {
hi = op->getIn(0)->getOffset();
lo = op->getIn(1)->getOffset();
losize = op->getIn(1)->getSize();
}
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].inType;
if (dt->getSize() > sizeof(uintb)) {
inVarnodes.clear();
return false;
}
int4 sa;
if (isBigEndian)
sa = fullsize - (dataTypePieces[i].offset + dt->getSize());
else
sa = dataTypePieces[i].offset;
uintb val;
if (sa >= losize)
val = hi >> (sa-losize);
else {
val = lo >> sa * 8;
if (sa + dt->getSize() > losize)
val |= hi << (losize - sa)*8;
}
val &= calc_mask(dt->getSize());
Varnode *outVn = data.newConstant(dt->getSize(), val);
inVarnodes.push_back(outVn);
outVn->updateType(dt, false, false);
}
data.opDestroy(op);
return true;
}
/// \brief Assuming the input is a constant, build split constants
///
/// Build constant input Varnodes, extracting the constant value from the given root constant
/// based on the input offsets in \b dataTypePieces.
/// \param rootVn is the given root constant
/// \param inVarnodes is the container for the new Varnodes
void SplitDatatype::buildInConstants(Varnode *rootVn,vector<Varnode *> &inVarnodes)
{
uintb baseVal = rootVn->getOffset();
bool bigEndian = rootVn->getSpace()->isBigEndian();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].inType;
int4 off = dataTypePieces[i].offset;
if (bigEndian)
off = rootVn->getSize() - off - dt->getSize();
uintb val = (baseVal >> (8*off)) & calc_mask(dt->getSize());
Varnode *outVn = data.newConstant(dt->getSize(), val);
inVarnodes.push_back(outVn);
outVn->updateType(dt, false, false);
}
}
/// \brief Build input Varnodes by extracting SUBPIECEs from the root
///
/// Extract different pieces from the given root based on the offsets and
/// input data-types in \b dataTypePieces.
/// \param rootVn is the given root Varnode
/// \param followOp is the point at which the SUBPIECEs should be inserted (before)
/// \param inVarnodes is the container for the new Varnodes
void SplitDatatype::buildInSubpieces(Varnode *rootVn,PcodeOp *followOp,vector<Varnode *> &inVarnodes)
{
if (generateConstants(rootVn, inVarnodes))
return;
Address baseAddr = rootVn->getAddr();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].inType;
int4 off = dataTypePieces[i].offset;
Address addr = baseAddr + off;
addr.renormalize(dt->getSize());
if (addr.isBigEndian())
off = rootVn->getSize() - off - dt->getSize();
PcodeOp *subpiece = data.newOp(2, followOp->getAddr());
data.opSetOpcode(subpiece, CPUI_SUBPIECE);
data.opSetInput(subpiece,rootVn,0);
data.opSetInput(subpiece,data.newConstant(4, off), 1);
Varnode *outVn = data.newVarnodeOut(dt->getSize(), addr, subpiece);
inVarnodes.push_back(outVn);
outVn->updateType(dt, false, false);
data.opInsertBefore(subpiece, followOp);
}
}
/// \brief Build output Varnodes with storage based on the given root
///
/// Extract different pieces from the given root based on the offsets and
/// output data-types in \b dataTypePieces.
/// \param rootVn is the given root Varnode
/// \param inVarnodes is the container for the new Varnodes
void SplitDatatype::buildOutVarnodes(Varnode *rootVn,vector<Varnode *> &outVarnodes)
{
Address baseAddr = rootVn->getAddr();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].outType;
int4 off = dataTypePieces[i].offset;
Address addr = baseAddr + off;
addr.renormalize(dt->getSize());
Varnode *outVn = data.newVarnode(dt->getSize(), addr, dt);
outVarnodes.push_back(outVn);
}
}
/// \brief Concatenate output Varnodes into given root Varnode
///
/// Insert PIECE operators concatenating all output Varnodes from most significant to least significant
/// producing the root Varnode as the final result.
/// \param rootVn is the given root Varnode
/// \param previousOp is the point at which to insert (after)
/// \param outVarnodes is the list of output Varnodes
void SplitDatatype::buildOutConcats(Varnode *rootVn,PcodeOp *previousOp,vector<Varnode *> &outVarnodes)
{
if (rootVn->hasNoDescend())
return; // Don't need to produce concatenation if its unused
Address baseAddr = rootVn->getAddr();
Varnode *vn;
PcodeOp *concatOp;
PcodeOp *preOp = previousOp;
bool addressTied = rootVn->isAddrTied();
// We are creating a CONCAT stack, mark varnodes appropriately
for(int4 i=0;i<outVarnodes.size();++i) {
if (!addressTied)
outVarnodes[i]->setProtoPartial();
}
if (baseAddr.isBigEndian()) {
vn = outVarnodes[0];
for(int4 i=1;;++i) { // Traverse most to least significant
concatOp = data.newOp(2,previousOp->getAddr());
data.opSetOpcode(concatOp,CPUI_PIECE);
data.opSetInput(concatOp,vn,0); // Most significant
data.opSetInput(concatOp,outVarnodes[i],1); // Least significant
data.opInsertAfter(concatOp, preOp);
if (i + 1 >= outVarnodes.size()) break;
preOp = concatOp;
int4 sz = vn->getSize() + outVarnodes[i]->getSize();
Address addr = baseAddr;
addr.renormalize(sz);
vn = data.newVarnodeOut(sz,addr,concatOp);
if (!addressTied)
vn->setProtoPartial();
}
}
else {
vn = outVarnodes[outVarnodes.size()-1];
for(int4 i=outVarnodes.size()-2;;--i) { // Traverse most to least significant
concatOp = data.newOp(2,previousOp->getAddr());
data.opSetOpcode(concatOp,CPUI_PIECE);
data.opSetInput(concatOp,vn,0); // Most significant
data.opSetInput(concatOp,outVarnodes[i],1); // Least significant
data.opInsertAfter(concatOp, preOp);
if (i<=0) break;
preOp = concatOp;
int4 sz = vn->getSize() + outVarnodes[i]->getSize();
Address addr = outVarnodes[i]->getAddr();
addr.renormalize(sz);
vn = data.newVarnodeOut(sz,addr,concatOp);
if (!addressTied)
vn->setProtoPartial();
}
}
concatOp->setPartialRoot();
data.opSetOutput(concatOp, rootVn);
if (!addressTied)
data.getMerge().registerProtoPartialRoot(rootVn);
}
/// \brief Build a a series of PTRSUB ops at different offsets, given a root pointer
///
/// Offsets and data-types are based on \b dataTypePieces, taking input data-types if \b isInput is \b true,
/// output data-types otherwise. The data-types, relative to the root pointer, are assumed to start at
/// the given base offset.
/// \param rootVn is the root pointer
/// \param ptrType is the pointer data-type associated with the root
/// \param baseOffset is the given base offset
/// \param followOp is the point at which the new PTRSUB ops are inserted (before)
/// \param ptrVarnodes is the container for the new pointer Varnodes
/// \param isInput specifies either input (\b true) or output (\b false) data-types
void SplitDatatype::buildPointers(Varnode *rootVn,TypePointer *ptrType,int4 baseOffset,PcodeOp *followOp,
vector<Varnode *> &ptrVarnodes,bool isInput)
{
Datatype *baseType = ptrType->getPtrTo();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *matchType = isInput ? dataTypePieces[i].inType : dataTypePieces[i].outType;
int4 byteOffset = baseOffset + dataTypePieces[i].offset;
Datatype *tmpType = baseType;
uintb curOff = byteOffset;
Varnode *inPtr = rootVn;
do {
uintb newOff;
PcodeOp *newOp;
Datatype *newType;
if (curOff >= tmpType->getSize()) { // An offset bigger than current data-type indicates an array
newType = tmpType; // The new data-type will be the same as current data-type
intb sNewOff = (intb)curOff % tmpType->getSize(); // But new offset will be old offset modulo data-type size
newOff = (sNewOff < 0) ? (sNewOff + tmpType->getSize()) : sNewOff;
}
else {
newType = tmpType->getSubType(curOff, &newOff);
if (newType == (Datatype *)0) {
// Null should only be returned for a hole in a structure, in which case use precomputed data-type
newType = matchType;
newOff = 0;
}
}
if (tmpType == newType || tmpType->getMetatype() == TYPE_ARRAY) {
int4 finalOffset = (int4)curOff - (int4)newOff;
int4 sz = newType->getSize(); // Element size in bytes
finalOffset = finalOffset / sz; // Number of elements
sz = AddrSpace::byteToAddressInt(sz, ptrType->getWordSize());
newOp = data.newOp(3,followOp->getAddr());
data.opSetOpcode(newOp, CPUI_PTRADD);
data.opSetInput(newOp, inPtr, 0);
Varnode *indexVn = data.newConstant(inPtr->getSize(), finalOffset);
data.opSetInput(newOp, indexVn, 1);
data.opSetInput(newOp, data.newConstant(inPtr->getSize(), sz), 2);
Datatype *indexType = types->getBase(indexVn->getSize(),TYPE_INT);
indexVn->updateType(indexType, false, false);
}
else {
int4 finalOffset = AddrSpace::byteToAddressInt((int4)curOff - (int4)newOff,ptrType->getWordSize());
newOp = data.newOp(2,followOp->getAddr());
data.opSetOpcode(newOp, CPUI_PTRSUB);
data.opSetInput(newOp, inPtr, 0);
data.opSetInput(newOp, data.newConstant(inPtr->getSize(), finalOffset), 1);
}
inPtr = data.newUniqueOut(inPtr->getSize(), newOp);
Datatype *tmpPtr = types->getTypePointerStripArray(ptrType->getSize(), newType, ptrType->getWordSize());
inPtr->updateType(tmpPtr, false, false);
data.opInsertBefore(newOp, followOp);
tmpType = newType;
curOff = newOff;
} while(tmpType->getSize() > matchType->getSize());
ptrVarnodes.push_back(inPtr);
}
}
/// Iterate through descendants of the given Varnode, looking for arithmetic ops.
/// \param vn is the given Varnode
/// \return \b true if the Varnode has an arithmetic op as a descendant
bool SplitDatatype::isArithmeticInput(Varnode *vn)
{
list<PcodeOp *>::const_iterator iter = vn->beginDescend();
while(iter != vn->endDescend()) {
PcodeOp *op = *iter;
if (op->getOpcode()->isArithmeticOp())
return true;
++iter;
}
return false;
}
/// Check if the defining PcodeOp is arithmetic.
/// \param vn is the given Varnode
/// \return \b true if the defining op is arithemetic
bool SplitDatatype::isArithmeticOutput(Varnode *vn)
{
if (!vn->isWritten())
return false;
return vn->getDef()->getOpcode()->isArithmeticOp();
}
SplitDatatype::SplitDatatype(Funcdata &func)
: data(func)
{
Architecture *glb = func.getArch();
types = glb->types;
splitStructures = (glb->split_datatype_config & OptionSplitDatatypes::option_struct) != 0;
splitArrays = (glb->split_datatype_config & OptionSplitDatatypes::option_array) != 0;
}
/// Based on the input and output data-types, determine if and how the given COPY operation
/// should be split into pieces. Then if possible, perform the split.
/// \param copyOp is the given COPY
/// \param inType is the data-type of the COPY input
/// \param outType is the data-type of the COPY output
/// \return \b true if the split was performed
bool SplitDatatype::splitCopy(PcodeOp *copyOp,Datatype *inType,Datatype *outType)
{
if (!testCopyConstraints(copyOp))
return false;
Varnode *inVn = copyOp->getIn(0);
if (!testDatatypeCompatibility(inType, outType, inVn->isConstant()))
return false;
if (isArithmeticOutput(inVn)) // Sanity check on input
return false;
Varnode *outVn = copyOp->getOut();
if (isArithmeticInput(outVn)) // Sanity check on output
return false;
vector<Varnode *> inVarnodes;
vector<Varnode *> outVarnodes;
if (inVn->isConstant())
buildInConstants(inVn,inVarnodes);
else
buildInSubpieces(inVn,copyOp,inVarnodes);
buildOutVarnodes(outVn,outVarnodes);
buildOutConcats(outVn,copyOp,outVarnodes);
for(int4 i=0;i<inVarnodes.size();++i) {
PcodeOp *newCopyOp = data.newOp(1,copyOp->getAddr());
data.opSetOpcode(newCopyOp,CPUI_COPY);
data.opSetInput(newCopyOp,inVarnodes[i],0);
data.opSetOutput(newCopyOp,outVarnodes[i]);
data.opInsertBefore(newCopyOp, copyOp);
}
data.opDestroy(copyOp);
return true;
}
/// Based on the LOAD data-type, determine if the given LOAD can be split into smaller LOADs.
/// Then, if possible, perform the split. The input data-type describes the size and composition of
/// the value being loaded. Check for the special case where, the LOAD output is a lone input to a COPY,
/// and split the outputs of the COPY as well.
/// \param loadOp is the given LOAD to split
/// \param inType is the data-type associated with the value being loaded
/// \return \b true if the split was performed
bool SplitDatatype::splitLoad(PcodeOp *loadOp,Datatype *inType)
{
Varnode *outVn = loadOp->getOut();
PcodeOp *copyOp = (PcodeOp *)0;
if (!outVn->isAddrTied())
copyOp = outVn->loneDescend();
if (copyOp != (PcodeOp *)0) {
OpCode opc = copyOp->code();
if (opc == CPUI_STORE) return false; // Handled by RuleSplitStore
if (opc != CPUI_COPY)
copyOp = (PcodeOp *)0;
}
if (copyOp != (PcodeOp *)0)
outVn = copyOp->getOut();
Datatype *outType = outVn->getTypeDefFacing();
if (!testDatatypeCompatibility(inType, outType, false))
return false;
if (isArithmeticInput(outVn)) // Sanity check on output
return false;
RootPointer root;
if (!root.find(loadOp,inType))
return false;
vector<Varnode *> ptrVarnodes;
vector<Varnode *> outVarnodes;
PcodeOp *insertPoint = (copyOp == (PcodeOp *)0) ? loadOp:copyOp;
buildPointers(root.pointer, root.ptrType, root.baseOffset, loadOp, ptrVarnodes, true);
buildOutVarnodes(outVn, outVarnodes);
buildOutConcats(outVn, insertPoint, outVarnodes);
AddrSpace *spc = loadOp->getIn(0)->getSpaceFromConst();
for(int4 i=0;i<ptrVarnodes.size();++i) {
PcodeOp *newLoadOp = data.newOp(2,insertPoint->getAddr());
data.opSetOpcode(newLoadOp,CPUI_LOAD);
data.opSetInput(newLoadOp,data.newVarnodeSpace(spc),0);
data.opSetInput(newLoadOp,ptrVarnodes[i],1);
data.opSetOutput(newLoadOp,outVarnodes[i]);
data.opInsertBefore(newLoadOp, insertPoint);
}
if (copyOp != (PcodeOp *)0)
data.opDestroy(copyOp);
data.opDestroy(loadOp);
root.freePointerChain(data);
return true;
}
/// Based on the STORE data-type, determine if the given STORE can be split into smaller STOREs.
/// Then, if possible, perform the split. The output data-type describes the size and composition of
/// the value being stored.
/// \param storeOp is the given STORE to split
/// \param outType is the data-type associated with the value being stored
/// \return \b true if the split was performed
bool SplitDatatype::splitStore(PcodeOp *storeOp,Datatype *outType)
{
Varnode *inVn = storeOp->getIn(2);
PcodeOp *loadOp = (PcodeOp *)0;
Datatype *inType = (Datatype *)0;
if (inVn->isWritten() && inVn->getDef()->code() == CPUI_LOAD && inVn->loneDescend() == storeOp) {
loadOp = inVn->getDef();
inType = getValueDatatype(loadOp, inVn->getSize(), data.getArch()->types);
if (inType == (Datatype *)0)
loadOp = (PcodeOp *)0;
}
if (inType == (Datatype *)0) {
inType = inVn->getTypeReadFacing(storeOp);
}
if (!testDatatypeCompatibility(inType, outType, inVn->isConstant())) {
if (loadOp != (PcodeOp *)0) {
// If not compatible while considering the LOAD, check again, but without the LOAD
loadOp = (PcodeOp *)0;
inType = inVn->getTypeReadFacing(storeOp);
dataTypePieces.clear();
if (!testDatatypeCompatibility(inType, outType, inVn->isConstant()))
return false;
}
else
return false;
}
if (isArithmeticOutput(inVn)) // Sanity check
return false;
RootPointer storeRoot;
if (!storeRoot.find(storeOp,outType))
return false;
RootPointer loadRoot;
if (loadOp != (PcodeOp *)0) {
if (!loadRoot.find(loadOp,inType))
return false;
}
vector<Varnode *> inVarnodes;
if (inVn->isConstant())
buildInConstants(inVn,inVarnodes);
else if (loadOp != (PcodeOp *)0) {
vector<Varnode *> loadPtrs;
buildPointers(loadRoot.pointer, loadRoot.ptrType, loadRoot.baseOffset, loadOp, loadPtrs, true);
AddrSpace *loadSpace = loadOp->getIn(0)->getSpaceFromConst();
for(int4 i=0;i<loadPtrs.size();++i) {
PcodeOp *newLoadOp = data.newOp(2,loadOp->getAddr());
data.opSetOpcode(newLoadOp,CPUI_LOAD);
data.opSetInput(newLoadOp,data.newVarnodeSpace(loadSpace),0);
data.opSetInput(newLoadOp,loadPtrs[i],1);
Datatype *dt = dataTypePieces[i].inType;
Varnode *vn = data.newUniqueOut(dt->getSize(), newLoadOp);
vn->updateType(dt, false, false);
inVarnodes.push_back(vn);
data.opInsertBefore(newLoadOp, loadOp);
}
}
else
buildInSubpieces(inVn,storeOp,inVarnodes);
vector<Varnode *> storePtrs;
buildPointers(storeRoot.pointer, storeRoot.ptrType, storeRoot.baseOffset, storeOp, storePtrs, false);
AddrSpace *storeSpace = storeOp->getIn(0)->getSpaceFromConst();
// Preserve original STORE object, so that INDIRECT references are still valid
// but convert it into the first of the smaller STOREs
data.opSetInput(storeOp,storePtrs[0],1);
data.opSetInput(storeOp,inVarnodes[0],2);
PcodeOp *lastStore = storeOp;
for(int4 i=1;i<storePtrs.size();++i) {
PcodeOp *newStoreOp = data.newOp(3,storeOp->getAddr());
data.opSetOpcode(newStoreOp,CPUI_STORE);
data.opSetInput(newStoreOp,data.newVarnodeSpace(storeSpace),0);
data.opSetInput(newStoreOp,storePtrs[i],1);
data.opSetInput(newStoreOp,inVarnodes[i],2);
data.opInsertAfter(newStoreOp, lastStore);
lastStore = newStoreOp;
}
if (loadOp != (PcodeOp *)0) {
data.opDestroy(loadOp);
loadRoot.freePointerChain(data);
}
storeRoot.freePointerChain(data);
return true;
}
/// \brief Get a data-type description of the value being pointed at by the given LOAD or STORE
///
/// Take the data-type of the pointer and construct the data-type of the thing being pointed at
/// so that it matches a specific size. This takes into account TypePointerRel and can produce
/// TypePartialStruct in order to match the size. If no interpretation of the value as a
/// splittable data-type is possible, null is returned.
/// \param loadStore is the given LOAD or STORE
/// \param size is the number of bytes in the value being pointed at
/// \param tlst is the TypeFactory for constructing partial data-types if necessary
/// \return the data-type description of the value or null
Datatype *SplitDatatype::getValueDatatype(PcodeOp *loadStore,int4 size,TypeFactory *tlst)
{
Datatype *resType;
Datatype *ptrType = loadStore->getIn(1)->getTypeReadFacing(loadStore);
if (ptrType->getMetatype() != TYPE_PTR)
return (Datatype *)0;
int4 baseOffset;
if (ptrType->isPointerRel()) {
TypePointerRel *ptrRel = (TypePointerRel *)ptrType;
resType = ptrRel->getParent();
baseOffset = ptrRel->getPointerOffset();
baseOffset = AddrSpace::addressToByteInt(baseOffset, ptrRel->getWordSize());
}
else {
resType = ((TypePointer *)ptrType)->getPtrTo();
baseOffset = 0;
}
type_metatype metain = resType->getMetatype();
if (metain != TYPE_STRUCT && metain == TYPE_ARRAY)
return (Datatype *)0;
return tlst->getExactPiece(resType, baseOffset, size);
}
/// \brief Create and return a placeholder associated with the given Varnode
///
/// Add the placeholder to the worklist if it hasn't been visited before
@ -1769,7 +2568,7 @@ TransformVar *SubfloatFlow::setReplacement(Varnode *vn)
if (vn->isAddrForce() && (vn->getSize() != precision))
return (TransformVar *)0;
if (vn->isTypeLock()) {
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
int4 sz = vn->getType()->getSize();
if (sz != precision)
return (TransformVar *)0;
@ -2047,8 +2846,9 @@ TransformVar *LaneDivide::setReplacement(Varnode *vn,int4 numLanes,int4 skipLane
// if (vn->isFree())
// return (TransformVar *)0;
if (vn->isTypeLock())
return (TransformVar *)0;
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
return (TransformVar *)0;
}
vn->setMark();
TransformVar *res = newSplit(vn, description, numLanes, skipLanes);

View File

@ -145,6 +145,65 @@ public:
bool doTrace(void); ///< Trace split through data-flow, constructing transform
};
/// \brief Split a p-code COPY, LOAD, or STORE op based on underlying composite data-type
///
/// During the cleanup phase, if a COPY, LOAD, or STORE occurs on a partial structure or array
/// (TypePartialStruct), try to break it up into multiple operations that each act on logical component
/// of the structure or array.
class SplitDatatype {
/// \brief A helper class describing a pair of matching data-types for the split
///
/// Data-types being copied simultaneously are split up into these matching pairs.
class Component {
friend class SplitDatatype;
Datatype *inType; ///< Data-type coming into the logical COPY operation
Datatype *outType; ///< Data-type coming out of the logical COPY operation
int4 offset; ///< Offset of this logical piece within the whole
public:
Component(Datatype *in,Datatype *out,int4 off) { inType=in; outType=out; offset=off; } ///< Constructor
};
/// \brief A helper class describing the pointer being passed to a LOAD or STORE
///
/// It makes distinction between the immediate pointer to the LOAD or STORE and a \e root pointer
/// to the main structure or array, which the immediate pointer may be at an offset from.
class RootPointer {
friend class SplitDatatype;
PcodeOp *loadStore; ///< LOAD or STORE op
TypePointer *ptrType; ///< Base pointer data-type of LOAD or STORE
Varnode *firstPointer; ///< Direct pointer input for LOAD or STORE
Varnode *pointer; ///< The root pointer
int4 baseOffset; ///< Offset of the LOAD or STORE relative to root pointer
bool backUpPointer(void); ///< Follow flow of \b pointer back thru INT_ADD or PTRSUB
public:
bool find(PcodeOp *op,Datatype *valueType); ///< Locate root pointer for underlying LOAD or STORE
void freePointerChain(Funcdata &data); ///< Remove unused pointer calculations
};
Funcdata &data; ///< The containing function
TypeFactory *types; ///< The data-type container
vector<Component> dataTypePieces; ///< Sequence of all data-type pairs being copied
bool splitStructures; ///< Whether or not structures should be split
bool splitArrays; ///< Whether or not arrays should be split
Datatype *getComponent(Datatype *ct,int4 offset,bool &isHole);
int4 categorizeDatatype(Datatype *ct); ///< Categorize if and how data-type should be split
bool testDatatypeCompatibility(Datatype *inBase,Datatype *outBase,bool inConstant);
bool testCopyConstraints(PcodeOp *copyOp);
bool generateConstants(Varnode *vn,vector<Varnode *> &inVarnodes);
void buildInConstants(Varnode *rootVn,vector<Varnode *> &inVarnodes);
void buildInSubpieces(Varnode *rootVn,PcodeOp *followOp,vector<Varnode *> &inVarnodes);
void buildOutVarnodes(Varnode *rootVn,vector<Varnode *> &outVarnodes);
void buildOutConcats(Varnode *rootVn,PcodeOp *previousOp,vector<Varnode *> &outVarnodes);
void buildPointers(Varnode *rootVn,TypePointer *ptrType,int4 baseOffset,PcodeOp *followOp,
vector<Varnode *> &ptrVarnodes,bool isInput);
static bool isArithmeticInput(Varnode *vn); ///< Is \b this the input to an arithmetic operation
static bool isArithmeticOutput(Varnode *vn); ///< Is \b this defined by an arithmetic operation
public:
SplitDatatype(Funcdata &func); ///< Constructor
bool splitCopy(PcodeOp *copyOp,Datatype *inType,Datatype *outType); ///< Split a COPY operation
bool splitLoad(PcodeOp *loadOp,Datatype *inType); ///< Split a LOAD operation
bool splitStore(PcodeOp *storeOp,Datatype *outType); ///< Split a STORE operation
static Datatype *getValueDatatype(PcodeOp *loadStore,int4 size,TypeFactory *tlst);
};
/// \brief Class for tracing changes of precision in floating point variables
///
/// It follows the flow of a logical lower precision value stored in higher precision locations

View File

@ -1011,6 +1011,13 @@ Datatype *TypeArray::getSubType(uintb off,uintb *newoff) const
return arrayof;
}
int4 TypeArray::getHoleSize(int4 off) const
{
int4 newOff = off % arrayof->getSize();
return arrayof->getHoleSize(newOff);
}
/// Given some contiguous piece of the array, figure out which element overlaps
/// the piece, and pass back the element index and the renormalized offset
/// \param off is the offset into the array
@ -1403,6 +1410,23 @@ Datatype *TypeStruct::getSubType(uintb off,uintb *newoff) const
return curfield.type;
}
int4 TypeStruct::getHoleSize(int4 off) const
{
int4 i = getLowerBoundField(off);
if (i >= 0) {
const TypeField &curfield( field[i] );
int4 newOff = off - curfield.offset;
if (newOff < curfield.type->getSize())
return curfield.type->getHoleSize(newOff);
}
i += 1; // advance to first field following off
if (i < field.size()) {
return field[i].offset - off; // Distance to following field
}
return getSize() - off; // Distance to end of structure
}
Datatype *TypeStruct::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
{
@ -1827,12 +1851,14 @@ const TypeField *TypeUnion::resolveTruncation(int4 offset,PcodeOp *op,int4 slot,
{
Funcdata *fd = op->getParent()->getFuncdata();
const ResolvedUnion *res = fd->getUnionField(this, op, slot);
if (res != (ResolvedUnion *)0 && res->getFieldNum() >= 0) {
const TypeField *field = getField(res->getFieldNum());
newoff = offset - field->offset;
return field;
if (res != (ResolvedUnion *)0) {
if (res->getFieldNum() >= 0) {
const TypeField *field = getField(res->getFieldNum());
newoff = offset - field->offset;
return field;
}
}
if (op->code() == CPUI_SUBPIECE && slot == 1) { // The slot is artificial in this case
else if (op->code() == CPUI_SUBPIECE && slot == 1) { // The slot is artificial in this case
ScoreUnionFields scoreFields(*fd->getArch()->types,this,offset,op);
fd->setUnionField(this, op, slot, scoreFields.getResult());
if (scoreFields.getResult().getFieldNum() >= 0) {
@ -1927,8 +1953,28 @@ void TypePartialStruct::printRaw(ostream &s) const
Datatype *TypePartialStruct::getSubType(uintb off,uintb *newoff) const
{
int4 sizeLeft = (size - (int4)off);
off += offset;
return container->getSubType(off, newoff);
Datatype *ct = container;
do {
ct = ct->getSubType(off, newoff);
if (ct == (Datatype *)0)
break;
off = *newoff;
// Component can extend beyond range of this partial, in which case we go down another level
} while(ct->getSize() - (int4)off > sizeLeft);
return ct;
}
int4 TypePartialStruct::getHoleSize(int4 off) const
{
int4 sizeLeft = size-off;
off += offset;
int4 res = container->getHoleSize(off);
if (res > sizeLeft)
res = sizeLeft;
return res;
}
int4 TypePartialStruct::compare(const Datatype &op,int4 level) const
@ -3566,7 +3612,10 @@ TypePointer *TypeFactory::getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc
Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size)
{
if (offset + size > ct->getSize())
return (Datatype *)0;
Datatype *lastType = (Datatype *)0;
uintb lastOff = 0;
uintb curOff = offset;
do {
if (ct->getSize() <= size) {
@ -3578,11 +3627,12 @@ Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size)
return getTypePartialUnion((TypeUnion *)ct, curOff, size);
}
lastType = ct;
lastOff = curOff;
ct = ct->getSubType(curOff,&curOff);
} while(ct != (Datatype *)0);
// If we reach here, lastType is bigger than size
if (lastType->getMetatype() == TYPE_STRUCT || lastType->getMetatype() == TYPE_ARRAY)
return getTypePartialStruct(lastType, curOff, size);
return getTypePartialStruct(lastType, lastOff, size);
return (Datatype *)0;
}

View File

@ -209,6 +209,7 @@ public:
virtual Datatype *getSubType(uintb off,uintb *newoff) const; ///< Recover component data-type one-level down
virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
virtual int4 getHoleSize(int4 off) const { return 0; } ///< Get number of bytes at the given offset that are padding
virtual int4 numDepend(void) const { return 0; } ///< Return number of component sub-types
virtual Datatype *getDepend(int4 index) const { return (Datatype *)0; } ///< Return the i-th component sub-type
virtual void printNameBase(ostream &s) const { if (!name.empty()) s<<name[0]; } ///< Print name as short prefix
@ -386,6 +387,7 @@ public:
Datatype *getSubEntry(int4 off,int4 sz,int4 *newoff,int4 *el) const; ///< Figure out what a byte range overlaps
virtual void printRaw(ostream &s) const;
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual int4 getHoleSize(int4 off) const;
virtual int4 numDepend(void) const { return 1; }
virtual Datatype *getDepend(int4 index) const { return arrayof; }
virtual void printNameBase(ostream &s) const { s << 'a'; arrayof->printNameBase(s); }
@ -446,6 +448,7 @@ public:
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
virtual int4 getHoleSize(int4 off) const;
virtual int4 numDepend(void) const { return field.size(); }
virtual Datatype *getDepend(int4 index) const { return field[index].type; }
virtual int4 compare(const Datatype &op,int4 level) const; // For tree structure
@ -498,6 +501,7 @@ public:
Datatype *getParent(void) const { return container; } ///< Get the data-type containing \b this piece
virtual void printRaw(ostream &s) const;
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual int4 getHoleSize(int4 off) const;
virtual int4 compare(const Datatype &op,int4 level) const;
virtual int4 compareDependency(const Datatype &op) const;
virtual Datatype *clone(void) const { return new TypePartialStruct(*this); }

View File

@ -1102,7 +1102,7 @@ TypeOpIntAdd::TypeOpIntAdd(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_ADD,"+",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntAdd();
}
@ -1253,7 +1253,7 @@ TypeOpIntSub::TypeOpIntSub(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_SUB,"-",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntSub();
}
@ -1267,6 +1267,7 @@ TypeOpIntCarry::TypeOpIntCarry(TypeFactory *t)
: TypeOpFunc(t,CPUI_INT_CARRY,"CARRY",TYPE_BOOL,TYPE_UINT)
{
opflags = PcodeOp::binary;
addlflags = arithmetic_op;
behave = new OpBehaviorIntCarry();
}
@ -1297,6 +1298,7 @@ TypeOpIntSborrow::TypeOpIntSborrow(TypeFactory *t)
: TypeOpFunc(t,CPUI_INT_SBORROW,"SBORROW",TYPE_BOOL,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = arithmetic_op;
behave = new OpBehaviorIntSborrow();
}
@ -1312,7 +1314,7 @@ TypeOpInt2Comp::TypeOpInt2Comp(TypeFactory *t)
: TypeOpUnary(t,CPUI_INT_2COMP,"-",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::unary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorInt2Comp();
}
@ -1326,7 +1328,7 @@ TypeOpIntNegate::TypeOpIntNegate(TypeFactory *t)
: TypeOpUnary(t,CPUI_INT_NEGATE,"~",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::unary;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntNegate();
}
@ -1340,7 +1342,7 @@ TypeOpIntXor::TypeOpIntXor(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_XOR,"^",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntXor();
}
@ -1368,7 +1370,7 @@ TypeOpIntAnd::TypeOpIntAnd(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_AND,"&",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntAnd();
}
@ -1396,7 +1398,7 @@ TypeOpIntOr::TypeOpIntOr(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_OR,"|",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntOr();
}
@ -1539,7 +1541,7 @@ TypeOpIntMult::TypeOpIntMult(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_MULT,"*",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntMult();
}
@ -1553,7 +1555,7 @@ TypeOpIntDiv::TypeOpIntDiv(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_DIV,"/",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntDiv();
}
@ -1573,7 +1575,7 @@ TypeOpIntSdiv::TypeOpIntSdiv(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_SDIV,"/",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntSdiv();
}
@ -1593,7 +1595,7 @@ TypeOpIntRem::TypeOpIntRem(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_REM,"%",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign | inherits_sign_zero;
addlflags = arithmetic_op | inherits_sign | inherits_sign_zero;
behave = new OpBehaviorIntRem();
}
@ -1613,7 +1615,7 @@ TypeOpIntSrem::TypeOpIntSrem(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_SREM,"%",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign | inherits_sign_zero;
addlflags = arithmetic_op | inherits_sign | inherits_sign_zero;
behave = new OpBehaviorIntSrem();
}
@ -1633,6 +1635,7 @@ TypeOpBoolNegate::TypeOpBoolNegate(TypeFactory *t)
: TypeOpUnary(t,CPUI_BOOL_NEGATE,"!",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::unary | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolNegate();
}
@ -1640,6 +1643,7 @@ TypeOpBoolXor::TypeOpBoolXor(TypeFactory *t)
: TypeOpBinary(t,CPUI_BOOL_XOR,"^^",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::binary | PcodeOp::commutative | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolXor();
}
@ -1647,6 +1651,7 @@ TypeOpBoolAnd::TypeOpBoolAnd(TypeFactory *t)
: TypeOpBinary(t,CPUI_BOOL_AND,"&&",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::binary | PcodeOp::commutative | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolAnd();
}
@ -1654,6 +1659,7 @@ TypeOpBoolOr::TypeOpBoolOr(TypeFactory *t)
: TypeOpBinary(t,CPUI_BOOL_OR,"||",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::binary | PcodeOp::commutative | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolOr();
}
@ -1661,6 +1667,7 @@ TypeOpFloatEqual::TypeOpFloatEqual(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_EQUAL,"==",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatEqual(trans);
}
@ -1668,6 +1675,7 @@ TypeOpFloatNotEqual::TypeOpFloatNotEqual(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_NOTEQUAL,"!=",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatNotEqual(trans);
}
@ -1675,6 +1683,7 @@ TypeOpFloatLess::TypeOpFloatLess(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_LESS,"<",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatLess(trans);
}
@ -1682,6 +1691,7 @@ TypeOpFloatLessEqual::TypeOpFloatLessEqual(TypeFactory *t,const Translate *trans
: TypeOpBinary(t,CPUI_FLOAT_LESSEQUAL,"<=",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatLessEqual(trans);
}
@ -1689,6 +1699,7 @@ TypeOpFloatNan::TypeOpFloatNan(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_NAN,"NAN",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::unary | PcodeOp::booloutput;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatNan(trans);
}
@ -1696,6 +1707,7 @@ TypeOpFloatAdd::TypeOpFloatAdd(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_ADD,"+",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatAdd(trans);
}
@ -1703,6 +1715,7 @@ TypeOpFloatDiv::TypeOpFloatDiv(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_DIV,"/",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatDiv(trans);
}
@ -1710,6 +1723,7 @@ TypeOpFloatMult::TypeOpFloatMult(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_MULT,"*",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatMult(trans);
}
@ -1717,6 +1731,7 @@ TypeOpFloatSub::TypeOpFloatSub(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_SUB,"-",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatSub(trans);
}
@ -1724,6 +1739,7 @@ TypeOpFloatNeg::TypeOpFloatNeg(TypeFactory *t,const Translate *trans)
: TypeOpUnary(t,CPUI_FLOAT_NEG,"-",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatNeg(trans);
}
@ -1731,6 +1747,7 @@ TypeOpFloatAbs::TypeOpFloatAbs(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_ABS,"ABS",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatAbs(trans);
}
@ -1738,6 +1755,7 @@ TypeOpFloatSqrt::TypeOpFloatSqrt(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_SQRT,"SQRT",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatSqrt(trans);
}
@ -1745,6 +1763,7 @@ TypeOpFloatInt2Float::TypeOpFloatInt2Float(TypeFactory *t,const Translate *trans
: TypeOpFunc(t,CPUI_FLOAT_INT2FLOAT,"INT2FLOAT",TYPE_FLOAT,TYPE_INT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatInt2Float(trans);
}
@ -1752,6 +1771,7 @@ TypeOpFloatFloat2Float::TypeOpFloatFloat2Float(TypeFactory *t,const Translate *t
: TypeOpFunc(t,CPUI_FLOAT_FLOAT2FLOAT,"FLOAT2FLOAT",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatFloat2Float(trans);
}
@ -1759,6 +1779,7 @@ TypeOpFloatTrunc::TypeOpFloatTrunc(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_TRUNC,"TRUNC",TYPE_INT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatTrunc(trans);
}
@ -1766,6 +1787,7 @@ TypeOpFloatCeil::TypeOpFloatCeil(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_CEIL,"CEIL",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatCeil(trans);
}
@ -1773,6 +1795,7 @@ TypeOpFloatFloor::TypeOpFloatFloor(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_FLOOR,"FLOOR",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatFloor(trans);
}
@ -1780,6 +1803,7 @@ TypeOpFloatRound::TypeOpFloatRound(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_ROUND,"ROUND",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatRound(trans);
}
@ -1794,15 +1818,6 @@ Datatype *TypeOpMulti::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn
int4 inslot,int4 outslot)
{
if ((inslot!=-1)&&(outslot!=-1)) {
if (invn == outvn && outvn->getTempType()->needsResolution()) {
// If same Varnode occupies two input slots of the MULTIEQUAL
// the second input slot should inherit the resolution of the first
Funcdata *fd = op->getParent()->getFuncdata();
Datatype *unionType = outvn->getTempType();
const ResolvedUnion *res = fd->getUnionField(unionType, op, inslot);
if (res != (const ResolvedUnion *)0)
fd->setUnionField(unionType, op, outslot, *res);
}
return (Datatype *)0; // Must propagate input <-> output
}
Datatype *newtype;
@ -2025,6 +2040,7 @@ TypeOpPtradd::TypeOpPtradd(TypeFactory *t) : TypeOp(t,CPUI_PTRADD,"+")
{
opflags = PcodeOp::ternary | PcodeOp::nocollapse;
addlflags = arithmetic_op;
behave = new OpBehavior(CPUI_PTRADD,false); // Dummy behavior
}
@ -2094,6 +2110,7 @@ TypeOpPtrsub::TypeOpPtrsub(TypeFactory *t) : TypeOp(t,CPUI_PTRSUB,"->")
// But the typing information doesn't really
// allow this to be commutative.
opflags = PcodeOp::binary|PcodeOp::nocollapse;
addlflags = arithmetic_op;
behave = new OpBehavior(CPUI_PTRSUB,false); // Dummy behavior
}

View File

@ -41,7 +41,10 @@ public:
enum {
inherits_sign = 1, ///< Operator token inherits signedness from its inputs
inherits_sign_zero = 2, ///< Only inherits sign from first operand, not the second
shift_op = 4 ///< Shift operation
shift_op = 4, ///< Shift operation
arithmetic_op = 8, ///< Operation involving addition, multiplication, or division
logical_op = 0x10, ///< Logical operation
floatingpoint_op = 0x20 ///< Floating-point operation
};
protected:
TypeFactory *tlst; ///< Pointer to data-type factory
@ -114,6 +117,15 @@ public:
/// \brief Return \b true if the op-code is a shift (INT_LEFT, INT_RIGHT, or INT_SRIGHT)
bool isShiftOp(void) const { return ((addlflags & shift_op)!=0); }
/// \brief Return \b true if the opcode is INT_ADD, INT_MULT, INT_DIV, INT_REM, or other arithmetic op
bool isArithmeticOp(void) const { return ((addlflags & arithmetic_op)!=0); }
/// \brief Return \b true if the opcode is INT_AND, INT_OR, INT_XOR, or other logical op
bool isLogicalOp(void) const { return ((addlflags & logical_op)!=0); }
/// \brief Return \b true if the opcode is FLOAT_ADD, FLOAT_MULT, or other floating-point operation
bool isFloatingPointOp(void) const { return ((addlflags & floatingpoint_op)!=0); }
/// \brief Find the minimal (or suggested) data-type of an output to \b this op-code
virtual Datatype *getOutputLocal(const PcodeOp *op) const;

View File

@ -70,6 +70,23 @@ void VariableGroup::removePiece(VariablePiece *piece)
// We currently don't adjust size here as removePiece is currently only called during clean up
}
/// Every VariablePiece in the given group is moved into \b this and the VariableGroup object is deleted.
/// There must be no matching VariablePieces with the same size and offset between the two groups
/// or a LowlevelError exception is thrown.
/// \param op2 is the given VariableGroup to merge into \b this
void VariableGroup::combineGroups(VariableGroup *op2)
{
set<VariablePiece *,VariableGroup::PieceCompareByOffset>::iterator iter = op2->pieceSet.begin();
set<VariablePiece *,VariableGroup::PieceCompareByOffset>::iterator enditer = op2->pieceSet.end();
while(iter != enditer) {
VariablePiece *piece = *iter;
++iter;
piece->transferGroup(this);
}
}
/// Construct piece given a HighVariable and its position within the whole.
/// If \b this is the first piece in the group, allocate a new VariableGroup object.
/// \param h is the given HighVariable to treat as a piece
@ -164,15 +181,15 @@ void VariablePiece::transferGroup(VariableGroup *newGroup)
newGroup->addPiece(this);
}
/// Combine the VariableGroup associated with the given other VariablePiece and the VariableGroup of \b this
/// into one group. Combining in this way requires pieces of the same size and offset to be merged. This
/// Combine the VariableGroup associated \b this and the given other VariablePiece into one group.
/// Offsets are adjusted so that \b this and the other VariablePiece have the same offset.
/// Combining in this way requires pieces of the same size and offset to be merged. This
/// method does not do the merging but passes back a list of HighVariable pairs that need to be merged.
/// The first element in the pair will have its VariablePiece in the new group, and the second element
/// will have its VariablePiece freed in preparation for the merge.
/// Offsets are adjusted so that \b this and the given other piece have the same offset;
/// \param op2 is the given other VariablePiece
/// \param mergePairs passes back the collection of HighVariable pairs that must be merged
void VariablePiece::combineOtherGroup(VariablePiece *op2,vector<HighVariable *> &mergePairs)
void VariablePiece::mergeGroups(VariablePiece *op2,vector<HighVariable *> &mergePairs)
{
int4 diff = groupOffset - op2->groupOffset; // Add to op2, or subtract from this
@ -523,6 +540,17 @@ void HighVariable::finalizeDatatype(Datatype *tp)
{
type = tp;
if (type->hasStripped()) {
if (type->getMetatype() == TYPE_PARTIALUNION) {
if (symbol != (Symbol *)0 && symboloffset != -1) {
type_metatype meta = symbol->getType()->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_UNION) // If partial union does not have a bigger backing symbol
type = type->getStripped(); // strip the partial union
}
}
else
type = type->getStripped();
}
highflags |= type_finalized;
}
@ -557,7 +585,11 @@ void HighVariable::groupWith(int4 off,HighVariable *hi2)
hi2->piece = new VariablePiece(hi2,hi2Off,this);
}
else {
throw LowlevelError("Cannot group HighVariables that are already grouped");
int4 offDiff = hi2->piece->getOffset() + off - piece->getOffset();
if (offDiff != 0)
piece->getGroup()->adjustOffsets(offDiff);
hi2->piece->getGroup()->combineGroups(piece->getGroup());
hi2->piece->markIntersectionDirty();
}
}
@ -657,7 +689,7 @@ void HighVariable::merge(HighVariable *tv2,HighIntersectTest *testCache,bool iss
if (isspeculative)
throw LowlevelError("Trying speculatively merge variables in separate groups");
vector<HighVariable *> mergePairs;
piece->combineOtherGroup(tv2->piece, mergePairs);
piece->mergeGroups(tv2->piece, mergePairs);
for(int4 i=0;i<mergePairs.size();i+=2) {
HighVariable *high1 = mergePairs[i];
HighVariable *high2 = mergePairs[i+1];

View File

@ -61,6 +61,7 @@ public:
int4 getSize(void) const { return size; } ///< Get the number of bytes \b this group covers
void setSymbolOffset(int4 val) { symbolOffset = val; } ///< Cache the symbol offset for the group
int4 getSymbolOffset(void) const { return symbolOffset; } ///< Get offset of \b this group within its Symbol
void combineGroups(VariableGroup *op2); ///< Combine given VariableGroup into \b this
};
/// \brief Information about how a HighVariable fits into a larger group or Symbol
@ -91,7 +92,7 @@ public:
void updateCover(void) const; ///< Calculate extended cover based on intersections
void transferGroup(VariableGroup *newGroup); ///< Transfer \b this piece to another VariableGroup
void setHigh(HighVariable *newHigh) { high = newHigh; } ///< Move ownership of \b this to another HighVariable
void combineOtherGroup(VariablePiece *op2,vector<HighVariable *> &mergePairs); ///< Combine two VariableGroups
void mergeGroups(VariablePiece *op2,vector<HighVariable *> &mergePairs); ///< Combine two VariableGroups
};
class HighIntersectTest;

View File

@ -89,7 +89,7 @@ bool RangeHint::contain(const RangeHint *b) const
/// Otherwise data-type ordering is used.
/// \param b is the other given range
/// \param reconcile is \b true is the two ranges have \e reconciled data-types
/// \return \b true if the \b this ranges's data-type is preferred
/// \return \b true if \b this ranges's data-type is preferred
bool RangeHint::preferred(const RangeHint *b,bool reconcile) const
{
@ -104,16 +104,16 @@ bool RangeHint::preferred(const RangeHint *b,bool reconcile) const
return true;
if (!reconcile) { // If the ranges don't reconcile
if ((rangeType == RangeHint::open)&&(b->rangeType != RangeHint::open)) // Throw out the open range
if (rangeType == open && b->rangeType != open) // Throw out the open range
return false;
if ((b->rangeType == RangeHint::open)&&(rangeType != RangeHint::open))
if (b->rangeType == open && rangeType != open)
return true;
}
return (0>type->typeOrder(*b->type)); // Prefer the more specific
}
/// If \b this RangeHint is an array and the following details line up, adjust \b this
/// If \b this RangeHint is an array and the following RangeHint line up, adjust \b this
/// so that it \e absorbs the other given RangeHint and return \b true.
/// The second RangeHint:
/// - must have the same element size
@ -124,12 +124,12 @@ bool RangeHint::preferred(const RangeHint *b,bool reconcile) const
///
/// \param b is the other RangeHint to absorb
/// \return \b true if the other RangeHint was successfully absorbed
bool RangeHint::absorb(RangeHint *b)
bool RangeHint::attemptJoin(RangeHint *b)
{
if (rangeType != RangeHint::open) return false;
if (rangeType != open) return false;
if (highind < 0) return false;
if (b->rangeType == RangeHint::endpoint) return false; // Don't merge with bounding range
if (b->rangeType == endpoint) return false; // Don't merge with bounding range
Datatype *settype = type; // Assume we will keep this data-type
if (settype->getSize() != b->type->getSize()) return false;
if (settype != b->type) {
@ -160,14 +160,29 @@ bool RangeHint::absorb(RangeHint *b)
diffsz /= settype->getSize();
if (diffsz > highind) return false;
type = settype;
if (b->rangeType == RangeHint::open && (0 <= b->highind)) { // If b has array indexing
int4 trialhi = b->highind + diffsz;
if (highind < trialhi)
highind = trialhi;
}
absorb(b);
return true;
}
/// Absorb details of the other RangeHint into \b this, except for the data-type. Inherit an \e open range
/// type and any indexing information. The data-type for \b this is assumed to be compatible and preferred
/// over the other data-type and is not changed.
/// \param b is the other RangeHint to absorb
void RangeHint::absorb(RangeHint *b)
{
if (b->rangeType == open && type->getSize() == b->type->getSize()) {
rangeType = open;
if (0 <= b->highind) { // If b has array indexing
intb diffsz = b->sstart - sstart;
diffsz /= type->getSize();
int4 trialhi = b->highind + diffsz;
if (highind < trialhi)
highind = trialhi;
}
}
}
/// Given that \b this and the other RangeHint intersect, redefine \b this so that it
/// becomes the union of the two original ranges. The union must succeed in some form.
/// An attempt is made to preserve the data-type information of both the original ranges,
@ -180,84 +195,60 @@ bool RangeHint::absorb(RangeHint *b)
bool RangeHint::merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory)
{
uintb aend,bend;
uintb end;
Datatype *resType;
uint4 resFlags;
bool didReconcile;
int4 resHighIndex;
bool overlapProblems = false;
aend = space->wrapOffset(start+size);
bend = space->wrapOffset(b->start+b->size);
RangeHint::RangeType resRangeType = RangeHint::fixed;
resHighIndex = -1;
if ((aend==0)||(bend==0))
end = 0;
else
end = (aend > bend) ? aend : bend;
int4 resType; // 0=this, 1=b, 2=confuse
if (contain(b)) { // Does one range contain the other
didReconcile = reconcile(b); // Can the data-type layout be reconciled
if (preferred(b,didReconcile)) { // If a's data-type is preferred over b
resType = type;
resFlags = flags;
resRangeType = rangeType;
resHighIndex = highind;
}
else {
resType = b->type;
resFlags = b->flags;
resRangeType = b->rangeType;
resHighIndex = b->highind;
}
if ((start==b->start)&&(size==b->size)) {
resRangeType = (rangeType==RangeHint::open || b->rangeType==RangeHint::open) ? RangeHint::open : RangeHint::fixed;
if (resRangeType == RangeHint::open)
resHighIndex = (highind < b->highind) ? b->highind : highind;
}
if (!didReconcile) { // See if two types match up
if ((b->rangeType != RangeHint::open)&&(rangeType != RangeHint::open))
overlapProblems = true;
}
if (!didReconcile && start != b->start)
resType = 2;
else
resType = preferred(b,didReconcile) ? 0 : 1;
}
else {
didReconcile = false;
resType = (Datatype *)0; // Unable to resolve the type
resFlags = 0;
resType = ((flags & Varnode::typelock) != 0) ? 0 : 2;
}
// Check for really problematic cases
if (!didReconcile) {
if ((b->flags & Varnode::typelock)!=0) {
if ((flags & Varnode::typelock)!=0)
if ((flags & Varnode::typelock)!=0) {
if ((b->flags & Varnode::typelock)!=0)
throw LowlevelError("Overlapping forced variable types : " + type->getName() + " " + b->type->getName());
if (start != b->start)
return false; // Discard b entirely
}
}
if (resType == (Datatype *)0) // If all else fails
resType = typeFactory->getBase(1,TYPE_UNKNOWN); // Do unknown array (size 1)
type = resType;
flags = resFlags;
rangeType = resRangeType;
highind = resHighIndex;
if ((!didReconcile)&&(start != b->start)) { // Truncation is forced
if ((flags & Varnode::typelock)!=0) { // If a is locked
return overlapProblems; // Discard b entirely in favor of a
}
// Concede confusion about types, set unknown type rather than a or b's type
rangeType = RangeHint::fixed;
size = space->wrapOffset(end-start);
if (resType == 0) {
if (didReconcile)
absorb(b);
}
else if (resType == 1) {
RangeHint copyRange = *this;
type = b->type;
flags = b->flags;
rangeType = b->rangeType;
highind = b->highind;
size = b->size;
absorb(&copyRange);
}
else if (resType == 2) {
// Concede confusion about types, set unknown type rather than this or b's type
flags = 0;
rangeType = fixed;
int4 diff = (int4)(b->sstart - sstart);
if (diff + b->size > size)
size = diff + b->size;
if (size != 1 && size != 2 && size != 4 && size != 8) {
size = 1;
rangeType = RangeHint::open;
rangeType = open;
}
type = typeFactory->getBase(size,TYPE_UNKNOWN);
flags = 0;
highind = -1;
return overlapProblems;
return false;
}
size = resType->getSize();
return overlapProblems;
return false;
}
/// Compare (signed) offset, size, RangeType, type lock, and high index, in that order.
@ -1146,7 +1137,7 @@ bool ScopeLocal::restructure(MapState &state)
overlapProblems = true;
}
else {
if (!cur.absorb(next)) {
if (!cur.attemptJoin(next)) {
if (cur.rangeType == RangeHint::open)
cur.size = next->sstart-cur.sstart;
if (adjustFit(cur))

View File

@ -112,7 +112,8 @@ public:
bool reconcile(const RangeHint *b) const;
bool contain(const RangeHint *b) const;
bool preferred(const RangeHint *b,bool reconcile) const;
bool absorb(RangeHint *b); ///< Try to absorb the other RangeHint into \b this
bool attemptJoin(RangeHint *b); ///< Try to concatenate another RangeHint onto \b this
void absorb(RangeHint *b); ///< Absorb the other RangeHint into \b this
bool merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory); ///< Try to form the union of \b this with another RangeHint
int4 compare(const RangeHint &op2) const; ///< Order \b this with another RangeHint
static bool compareRanges(const RangeHint *a,const RangeHint *b) { return (a->compare(*b) < 0); } ///< Compare two RangeHint pointers

View File

@ -0,0 +1,97 @@
<decompilertest>
<binaryimage arch="x86:LE:64:default:gcc">
<!--
Examples of functions reading and writing more than one element (or field) simultaneously
-->
<bytechunk space="ram" offset="0x100000" readonly="true">
53be200000004889fb4883ec500fbf17
4889e764488b04252800000048894424
4831c0e8d80f00008b4304488b4c2448
6448330c252800000089442414486343
080fb7044475064883c4505bc3e8b60f
00000000000000000000000000000000
48b8000100000002ffffc74708000000
00488907c30000000000000000000000
488b0648894704c30000000000000000
55534889fd4883ec28488d5c240c6448
8b042528000000488944241831c04889
dfe85a0f0000488b45004889df488944
240ce8510f0000488b44241864483304
252800000075074883c4285b5dc3e825
0f000000000000000000000000000000
534883ec20488d5c240c64488b042528
000000488944241831c04889dfe8fe0e
0000488d7c2404e8040f0000488b4424
044889df4889442410e8ea0e0000488b
44241864483304252800000075064883
c4205bc3e8bf0e000000000000000000
534883ec20488d5c240c64488b042528
000000488944241831c04889dfe89e0e
00004889dfc744241007000700e8960e
0000488b442418644833042528000000
75064883c4205bc3e86b0e0000
</bytechunk>
<symbol space="ram" offset="0x100000" name="loadalone"/>
<symbol space="ram" offset="0x100060" name="storeconst"/>
<symbol space="ram" offset="0x100080" name="copyloadstore"/>
<symbol space="ram" offset="0x100090" name="storealone"/>
<symbol space="ram" offset="0x1000f0" name="copy"/>
<symbol space="ram" offset="0x100150" name="copyconst"/>
<symbol space="ram" offset="0x101008" name="__stack_chk_fail"/>
</binaryimage>
<script>
<com>option noreturn __stack_chk_fail</com>
<com>parse line struct myfoo { int4 a; int2 b; int2 c; int4 d; };</com>
<com>parse line struct otherfoo { int2 A; int2 B; uint4 C; };</com>
<com>parse line extern int2 loadalone(myfoo *ptralone);</com>
<com>lo fu loadalone</com>
<com>map addr s0xffffffffffffffa8 int2 arr[32]</com>
<com>decompile</com>
<com>print C</com>
<com>parse line extern void storeconst(myfoo *ptr);</com>
<com>lo fu storeconst</com>
<com>decompile</com>
<com>print C</com>
<com>parse line extern void copyloadstore(myfoo *ptrstore,otherfoo *ptrload);</com>
<com>lo fu copyloadstore</com>
<com>decompile</com>
<com>print C</com>
<com>parse line extern void storealone(myfoo *point);</com>
<com>lo fu storealone</com>
<com>map addr s0xffffffffffffffd4 myfoo stackfoo</com>
<com>decompile</com>
<com>print C</com>
<com>parse line extern void copy(void);</com>
<com>lo fu copy</com>
<com>map addr s0xffffffffffffffe4 myfoo stackmy</com>
<com>map addr s0xffffffffffffffdc otherfoo stackother</com>
<com>decompile</com>
<com>print C</com>
<com>parse line extern void copyconst(void);</com>
<com>lo fu copyconst</com>
<com>map addr s0xffffffffffffffe4 myfoo stackconst</com>
<com>decompile</com>
<com>print C</com>
<com>quit</com>
</script>
<stringmatch name="Partial splitting #1" min="1" max="1">arr\[10\] = ptralone-&gt;b;</stringmatch>
<stringmatch name="Partial splitting #2" min="1" max="1">arr\[11\] = ptralone-&gt;c;</stringmatch>
<stringmatch name="Partial splitting #3" min="1" max="1">return arr\[ptralone-&gt;d\];</stringmatch>
<stringmatch name="Partial splitting #4" min="1" max="1">ptr-&gt;d = 0;</stringmatch>
<stringmatch name="Partial splitting #5" min="1" max="1">ptr-&gt;a = 0x100;</stringmatch>
<stringmatch name="Partial splitting #6" min="1" max="1">ptr-&gt;b = 0x200;</stringmatch>
<stringmatch name="Partial splitting #7" min="1" max="1">ptr-&gt;c = -1;</stringmatch>
<stringmatch name="Partial splitting #8" min="1" max="1">iVar. = ptrload-&gt;B</stringmatch>
<stringmatch name="Partial splitting #9" min="1" max="1">uVar. = ptrload-&gt;C</stringmatch>
<stringmatch name="Partial splitting #9" min="1" max="1">ptrstore-&gt;b = ptrload-&gt;A</stringmatch>
<stringmatch name="Partial splitting #10" min="1" max="1">ptrstore-&gt;c = iVar.;</stringmatch>
<stringmatch name="Partial splitting #11" min="1" max="1">ptrstore-&gt;d = uVar.;</stringmatch>
<stringmatch name="Partial splitting #12" min="1" max="1">stackfoo\.a = point-&gt;a;</stringmatch>
<stringmatch name="Partial splitting #13" min="1" max="1">stackfoo\.b = point-&gt;b;</stringmatch>
<stringmatch name="Partial splitting #14" min="1" max="1">stackfoo\.c = point-&gt;c;</stringmatch>
<stringmatch name="Partial splitting #15" min="1" max="1">stackmy\.b = stackother\.A;</stringmatch>
<stringmatch name="Partial splitting #16" min="1" max="1">stackmy\.c = stackother\.B;</stringmatch>
<stringmatch name="Partial splitting #17" min="1" max="1">stackmy\.d = stackother\.C;</stringmatch>
<stringmatch name="Partial splitting #18" min="1" max="1">stackconst\.b = 7;</stringmatch>
<stringmatch name="Partial splitting #19" min="1" max="1">stackconst\.c = 7;</stringmatch>
</decompilertest>

View File

@ -1917,7 +1917,8 @@
<sect4 id="TypeBoolean">
<title>Boolean</title>
<para>
A 1-byte boolean data-type is supported.
A 1-byte boolean data-type is supported. Boolean constants are rendered as either the
token <emphasis role="bold">true</emphasis> or the token <emphasis role="bold">false</emphasis>.
</para>
</sect4>
<sect4 id="TypeFloat">
@ -1972,10 +1973,15 @@
<para>
Pointer data-types are fully supported. A pointer to any other supported data-type is
possible. The data-type being pointed to, whether it is a primitive, structure, or another pointer,
informs how the Decompiler renders a dereferenced pointer.
The Decompiler assumes that a pointer variable may refer to an array of
the underlying data-type and will use array notation if there is evidence of more than
one element.
informs how the Decompiler renders a dereferenced pointer or other pointer expression.
</para>
<para>
The Decompiler automatically assumes that a pointer may reference an array of the underlying data-type.
If an integer value is added to the pointer and the value is known to be a multiple of the data-type's size,
the expression is treated as either <emphasis>pointer arithmetic</emphasis> or as an array access, and the multiplication
factor is hidden. Adding smaller integers to a structure pointer typically results in a
<emphasis>field access</emphasis> expression using the '->' or other language specific token. See the discussion
on <emphasis>Structures</emphasis> below.
</para>
<para>
The default pointer size is set based on the processor and compiler selected when the Program is
@ -1985,9 +1991,11 @@
architectures can model different size pointers into the space (such as <emphasis>near</emphasis> pointers).
</para>
<para>
For processors with more than one memory address space, pointer data-types currently cannot be directly
annotated to indicate a preferred address space. Where there is ambiguity, the Decompiler attempts to
determine the correct address space from the context of its use within the function.
For processors with more than one memory address space, pointer data-types do not by default indicate
a preferred address space and can be used to reference data in any address space.
Where there is ambiguity, the Decompiler attempts to determine the correct address space from the context
of its use within the function. It is possible to create pointer data-types with an
explicitly preferred address space, see <xref linkend="AttributeAddressPointer"/>.
</para>
</sect4>
<sect4 id="TypeArray">
@ -1996,6 +2004,28 @@
Array data-types are fully supported. The array element can be any other supported data-type
with a fixed size.
</para>
<para>
For code that accesses arrays, the Decompiler keeps track of the array index and automatically
hides the underlying multiplication needed to account for the size of an element. The access is
displayed as an expression using standard square brace notation, '[' and ']'.
For an access that covers more than one array element simultaneously, the Decompiler will either
generate a pointer expression that casts to a data-type of the correct size or may generate a
special token representing the accessed portion of the array.
<informalexample>
<programlisting>
text[iVar1] = 'a'; // Assigning a character to a variable index of the array
cVar2 = text[7]; // Reading a fixed element from an array
text._8_2_ = 0x7677; // Auto-generated token indicating multiple elements are assigned at once
</programlisting>
</informalexample>
For an auto-generated token like <code>_8_2_</code>, the first integer indicates the offset in bytes
of the access from the start of the array, and the second integer indicates the number of bytes being accessed.
</para>
<para>
If more then one element is being accessed simultaneously, the Decompiler may try to split
the access into logical pieces. See the description of the analysis option for
<link linkend="AnalysisSplitArray">Splitting Array Accesses</link>.
</para>
</sect4>
<sect4 id="TypeStructure">
<title>Structure</title>
@ -2005,6 +2035,29 @@
annotated sources, like input parameters or global variables. Decompiler-directed creation of
structures can be triggered by the user (see <xref linkend="ActionAutoStructure"/>).
</para>
<para>
For variables that are known to be structures, or pointers to structures, the Decompiler keeps
track of offsets into the variable and will render the name of the specific field being
accessed, using language specific access operators such as '.' or '->'. If the part of the
structure being accessed does not have a normal field name, either because the structure data-type
does not list a name at that position or because more than one field is being accessed
simultaneously, the Decompiler will either cast a pointer to a data-type of the correct size or
may automatically generate a token name representing the accessed portion.
<informalexample>
<programlisting>
struct1.a = 1; // Assigning an integer to a field named "a"
fVar1 = ptr->b; // Reading field "b" through a pointer
struct1._20_4_ = 0xff00ff00; // Auto-generated name for assigning multiple fields at once
</programlisting>
</informalexample>
For an auto-generated token like <code>_20_4_</code>, the first integer indicates the offset in bytes
of the access from the start of the structure, and the second integer indicates the number of bytes being accessed.
</para>
<para>
If more than one field is being accessed simultaneously, the Decompiler may try to split
the access into logical pieces. See the description of the analysis option for
<link linkend="AnalysisSplitStruct">Splitting Structure Accesses</link>.
</para>
</sect4>
<sect4 id="TypeEnum">
<title>Enumeration</title>
@ -3089,6 +3142,62 @@
</para>
</listitem>
</varlistentry>
<varlistentry id="AnalysisSplitStruct">
<term><emphasis role="bold">Split combined structure fields</emphasis></term>
<listitem>
<para>
When this option is active, the Decompiler attempts to identify places in the code where multiple
fields of a structure data-type are being moved simultaneously with a single operation. Then it splits the
operation into multiple pieces so that the logical fields can be seen individually.
When this option isn't active, the Decompiler creates an artificial token to
represent the combined fields being written to or read from.
<informalexample>
<programlisting>
struct1._20_4_ = 0xff00ff00; // Auto-generated name for assigning multiple fields at once
...
struct1.a = 0xff00; // The same assignment, after splitting
struct1.b = 0xff00;
</programlisting>
</informalexample>
</para>
</listitem>
</varlistentry>
<varlistentry id="AnalysisSplitArray">
<term><emphasis role="bold">Split combined array elements</emphasis></term>
<listitem>
<para>
When this option is active, the Decompiler attempts to identify places in the code where multiple
elements of an array are being moved simultaneously with a single operation. Then it splits the
operation into multiple pieces so each element of the array can be seen individually.
When this option isn't active, the Decompiler creates an artificial token to
represent the combined elements being written to or read from.
<informalexample>
<programlisting>
text._20_2_ = 0x4241; // Auto-generated name for assigning multiple elements at once
...
text[20] = 'A'; // The same assignment, after splitting
test[21] = 'B';
</programlisting>
</informalexample>
</para>
</listitem>
</varlistentry>
<varlistentry id="AnalysisSplitPointers">
<term><emphasis role="bold">Split pointers to combined elements</emphasis></term>
<listitem>
<para>
This options affects when the Decompiler's splitting actions are applied. See
the discussion above about
<link linkend="AnalysisSplitStruct">Splitting Structure Accesses</link> and
<link linkend="AnalysisSplitArray">Splitting Array Accesses</link>.
If this option is on, a split is performed whenever combined elements or combined
fields are copied. In particular, either the read access, or the write access, or both,
can be through a pointer.
If this option is off, splitting is limited to copies between structures or arrays at fixed
locations, either at a global address, or on the local stack.
</para>
</listitem>
</varlistentry>
<varlistentry id="AnalysisInPlace">
<term><emphasis role="bold">Use in-place assignment operators</emphasis></term>
<listitem>

View File

@ -548,7 +548,8 @@
<a name="TypeBoolean"></a>Boolean</h5></div></div></div>
<p>
A 1-byte boolean data-type is supported.
A 1-byte boolean data-type is supported. Boolean constants are rendered as either the
token <span class="bold"><strong>true</strong></span> or the token <span class="bold"><strong>false</strong></span>.
</p>
</div>
<div class="sect4">
@ -613,10 +614,15 @@
<p>
Pointer data-types are fully supported. A pointer to any other supported data-type is
possible. The data-type being pointed to, whether it is a primitive, structure, or another pointer,
informs how the Decompiler renders a dereferenced pointer.
The Decompiler assumes that a pointer variable may refer to an array of
the underlying data-type and will use array notation if there is evidence of more than
one element.
informs how the Decompiler renders a dereferenced pointer or other pointer expression.
</p>
<p>
The Decompiler automatically assumes that a pointer may reference an array of the underlying data-type.
If an integer value is added to the pointer and the value is known to be a multiple of the data-type's size,
the expression is treated as either <span class="emphasis"><em>pointer arithmetic</em></span> or as an array access, and the multiplication
factor is hidden. Adding smaller integers to a structure pointer typically results in a
<span class="emphasis"><em>field access</em></span> expression using the '-&gt;' or other language specific token. See the discussion
on <span class="emphasis"><em>Structures</em></span> below.
</p>
<p>
The default pointer size is set based on the processor and compiler selected when the Program is
@ -626,9 +632,11 @@
architectures can model different size pointers into the space (such as <span class="emphasis"><em>near</em></span> pointers).
</p>
<p>
For processors with more than one memory address space, pointer data-types currently cannot be directly
annotated to indicate a preferred address space. Where there is ambiguity, the Decompiler attempts to
determine the correct address space from the context of its use within the function.
For processors with more than one memory address space, pointer data-types do not by default indicate
a preferred address space and can be used to reference data in any address space.
Where there is ambiguity, the Decompiler attempts to determine the correct address space from the context
of its use within the function. It is possible to create pointer data-types with an
explicitly preferred address space, see <a class="xref" href="DecompilerAnnotations.html#AttributeAddressPointer" title="Address Space Pointers">Address Space Pointers</a>.
</p>
</div>
<div class="sect4">
@ -639,6 +647,30 @@
Array data-types are fully supported. The array element can be any other supported data-type
with a fixed size.
</p>
<p>
For code that accesses arrays, the Decompiler keeps track of the array index and automatically
hides the underlying multiplication needed to account for the size of an element. The access is
displayed as an expression using standard square brace notation, '[' and ']'.
For an access that covers more than one array element simultaneously, the Decompiler will either
generate a pointer expression that casts to a data-type of the correct size or may generate a
special token representing the accessed portion of the array.
</p>
<div class="informalexample">
<pre class="programlisting">
text[iVar1] = 'a'; // Assigning a character to a variable index of the array
cVar2 = text[7]; // Reading a fixed element from an array
text._8_2_ = 0x7677; // Auto-generated token indicating multiple elements are assigned at once
</pre>
</div>
<p>
For an auto-generated token like <code class="code">_8_2_</code>, the first integer indicates the offset in bytes
of the access from the start of the array, and the second integer indicates the number of bytes being accessed.
</p>
<p>
If more then one element is being accessed simultaneously, the Decompiler may try to split
the access into logical pieces. See the description of the analysis option for
<a class="link" href="DecompilerOptions.html#AnalysisSplitArray">Splitting Array Accesses</a>.
</p>
</div>
<div class="sect4">
<div class="titlepage"><div><div><h5 class="title">
@ -650,6 +682,31 @@
annotated sources, like input parameters or global variables. Decompiler-directed creation of
structures can be triggered by the user (see <a class="xref" href="DecompilerWindow.html#ActionAutoStructure" title="Auto Create Structure">Auto Create Structure</a>).
</p>
<p>
For variables that are known to be structures, or pointers to structures, the Decompiler keeps
track of offsets into the variable and will render the name of the specific field being
accessed, using language specific access operators such as '.' or '-&gt;'. If the part of the
structure being accessed does not have a normal field name, either because the structure data-type
does not list a name at that position or because more than one field is being accessed
simultaneously, the Decompiler will either cast a pointer to a data-type of the correct size or
may automatically generate a token name representing the accessed portion.
</p>
<div class="informalexample">
<pre class="programlisting">
struct1.a = 1; // Assigning an integer to a field named "a"
fVar1 = ptr-&gt;b; // Reading field "b" through a pointer
struct1._20_4_ = 0xff00ff00; // Auto-generated name for assigning multiple fields at once
</pre>
</div>
<p>
For an auto-generated token like <code class="code">_20_4_</code>, the first integer indicates the offset in bytes
of the access from the start of the structure, and the second integer indicates the number of bytes being accessed.
</p>
<p>
If more than one field is being accessed simultaneously, the Decompiler may try to split
the access into logical pieces. See the description of the analysis option for
<a class="link" href="DecompilerOptions.html#AnalysisSplitStruct">Splitting Structure Accesses</a>.
</p>
</div>
<div class="sect4">
<div class="titlepage"><div><div><h5 class="title">

View File

@ -297,6 +297,66 @@
</p>
</dd>
<dt>
<a name="AnalysisSplitStruct"></a><span class="term"><span class="bold"><strong>Split combined structure fields</strong></span></span>
</dt>
<dd>
<p>
When this option is active, the Decompiler attempts to identify places in the code where multiple
fields of a structure data-type are being moved simultaneously with a single operation. Then it splits the
operation into multiple pieces so that the logical fields can be seen individually.
When this option isn't active, the Decompiler creates an artificial token to
represent the combined fields being written to or read from.
</p>
<div class="informalexample">
<pre class="programlisting">
struct1._20_4_ = 0xff00ff00; // Auto-generated name for assigning multiple fields at once
...
struct1.a = 0xff00; // The same assignment, after splitting
struct1.b = 0xff00;
</pre>
</div>
<p>
</p>
</dd>
<dt>
<a name="AnalysisSplitArray"></a><span class="term"><span class="bold"><strong>Split combined array elements</strong></span></span>
</dt>
<dd>
<p>
When this option is active, the Decompiler attempts to identify places in the code where multiple
elements of an array are being moved simultaneously with a single operation. Then it splits the
operation into multiple pieces so each element of the array can be seen individually.
When this option isn't active, the Decompiler creates an artificial token to
represent the combined elements being written to or read from.
</p>
<div class="informalexample">
<pre class="programlisting">
text._20_2_ = 0x4241; // Auto-generated name for assigning multiple elements at once
...
text[20] = 'A'; // The same assignment, after splitting
test[21] = 'B';
</pre>
</div>
<p>
</p>
</dd>
<dt>
<a name="AnalysisSplitPointers"></a><span class="term"><span class="bold"><strong>Split pointers to combined elements</strong></span></span>
</dt>
<dd>
<p>
This options affects when the Decompiler's splitting actions are applied. See
the discussion above about
<a class="link" href="DecompilerOptions.html#AnalysisSplitStruct">Splitting Structure Accesses</a> and
<a class="link" href="DecompilerOptions.html#AnalysisSplitArray">Splitting Array Accesses</a>.
If this option is on, a split is performed whenever combined elements or combined
fields are copied. In particular, either the read access, or the write access, or both,
can be through a pointer.
If this option is off, splitting is limited to copies between structures or arrays at fixed
locations, either at a global address, or on the local stack.
</p>
</dd>
<dt>
<a name="AnalysisInPlace"></a><span class="term"><span class="bold"><strong>Use in-place assignment operators</strong></span></span>
</dt>
<dd>

View File

@ -98,6 +98,32 @@ public class DecompileOptions {
private final static boolean ANALYZEFORLOOPS_OPTIONDEFAULT = true; // Must match Architecture::resetDefaultsInternal
private boolean analyzeForLoops;
private final static String SPLITSTRUCTURES_OPTIONSTRING =
"Analysis.Split combined structure fields";
private final static String SPLITSTRUCTURES_OPTIONDESCRIPTION =
"If set, the decompiler will split a copy operation to or from a structure that affects more than " +
"one field. The copy will be split into multiple operations so that each logical field is copied " +
"separately.";
private final static boolean SPLITSTRUCTURES_OPTIONDEFAULT = true; // Must match Architecture::resetDefaultsInternal
private boolean splitStructures;
private final static String SPLITARRAYS_OPTIONSTRING = "Analysis.Split combined array elements";
private final static String SPLITARRAYS_OPTIONDESCRIPTION =
"If set, the decompiler will split a copy operation to or from an array that affects more than " +
"one element. The copy will be split into multiple operations so that each logical element is copied " +
"separately.";
private final static boolean SPLITARRAYS_OPTIONDEFAULT = true; // Must match Architecture::resetDefaultsInternal
private boolean splitArrays;
private final static String SPLITPOINTERS_OPTIONSTRING =
"Analysis.Split pointers to combined elements";
private final static String SPLITPOINTERS_OPTIONDESCRIPTION =
"If set, a single copy, through a pointer, to either multiple array elements or multiple structure fields " +
"will be split. The copy, via LOAD or STORE, will be split into multiple operations so that each " +
"logical element is accessed separately.";
private final static boolean SPLITPOINTERS_OPTIONDEFAULT = true; // Must match Architecture::resetDefaultsInternal
private boolean splitPointers;
private final static String NULLTOKEN_OPTIONSTRING = "Display.Print 'NULL' for null pointers";
private final static String NULLTOKEN_OPTIONDESCRIPTION =
"If set, any zero valued pointer (null pointer) will " +
@ -380,6 +406,9 @@ public class DecompileOptions {
readOnly = READONLY_OPTIONDEFAULT; // This flipped values
eliminateUnreachable = ELIMINATE_UNREACHABLE_OPTIONDEFAULT;
simplifyDoublePrecision = SIMPLIFY_DOUBLEPRECISION_OPTIONDEFAULT;
splitStructures = SPLITSTRUCTURES_OPTIONDEFAULT;
splitArrays = SPLITARRAYS_OPTIONDEFAULT;
splitPointers = SPLITPOINTERS_OPTIONDEFAULT;
ignoreunimpl = IGNOREUNIMPL_OPTIONDEFAULT;
inferconstptr = INFERCONSTPTR_OPTIONDEFAULT;
analyzeForLoops = ANALYZEFORLOOPS_OPTIONDEFAULT;
@ -435,6 +464,11 @@ public class DecompileOptions {
inferconstptr = opt.getBoolean(INFERCONSTPTR_OPTIONSTRING, INFERCONSTPTR_OPTIONDEFAULT);
analyzeForLoops =
opt.getBoolean(ANALYZEFORLOOPS_OPTIONSTRING, ANALYZEFORLOOPS_OPTIONDEFAULT);
splitStructures =
opt.getBoolean(SPLITSTRUCTURES_OPTIONSTRING, SPLITSTRUCTURES_OPTIONDEFAULT);
splitArrays = opt.getBoolean(SPLITARRAYS_OPTIONSTRING, SPLITARRAYS_OPTIONDEFAULT);
splitPointers = opt.getBoolean(SPLITPOINTERS_OPTIONSTRING, SPLITPOINTERS_OPTIONDEFAULT);
nullToken = opt.getBoolean(NULLTOKEN_OPTIONSTRING, NULLTOKEN_OPTIONDEFAULT);
inplaceTokens = opt.getBoolean(INPLACEOP_OPTIONSTRING, INPLACEOP_OPTIONDEFAULT);
aliasBlock = opt.getEnum(ALIASBLOCK_OPTIONSTRING, ALIASBLOCK_OPTIONDEFAULT);
@ -537,6 +571,15 @@ public class DecompileOptions {
opt.registerOption(ANALYZEFORLOOPS_OPTIONSTRING, ANALYZEFORLOOPS_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "AnalysisForLoops"),
ANALYZEFORLOOPS_OPTIONDESCRIPTION);
opt.registerOption(SPLITSTRUCTURES_OPTIONSTRING, SPLITSTRUCTURES_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "AnalysisSplitStruct"),
SPLITSTRUCTURES_OPTIONDESCRIPTION);
opt.registerOption(SPLITARRAYS_OPTIONSTRING, SPLITARRAYS_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "AnalysisSplitArray"),
SPLITARRAYS_OPTIONDESCRIPTION);
opt.registerOption(SPLITPOINTERS_OPTIONSTRING, SPLITPOINTERS_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "AnalysisSplitPointers"),
SPLITPOINTERS_OPTIONDESCRIPTION);
opt.registerOption(NULLTOKEN_OPTIONSTRING, NULLTOKEN_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "DisplayNull"), NULLTOKEN_OPTIONDESCRIPTION);
opt.registerOption(INPLACEOP_OPTIONSTRING, INPLACEOP_OPTIONDEFAULT,
@ -680,13 +723,28 @@ public class DecompileOptions {
*/
public void encode(Encoder encoder, DecompInterface iface) throws IOException {
encoder.openElement(ELEM_OPTIONSLIST);
appendOption(encoder, ELEM_CURRENTACTION, "conditionalexe", predicate ? "on" : "off", "");
appendOption(encoder, ELEM_READONLY, readOnly ? "on" : "off", "", "");
appendOption(encoder, ELEM_CURRENTACTION, iface.getSimplificationStyle(), "unreachable",
eliminateUnreachable ? "on" : "off");
appendOption(encoder, ELEM_CURRENTACTION, iface.getSimplificationStyle(), "doubleprecis",
simplifyDoublePrecision ? "on" : "off");
if (predicate != PREDICATE_OPTIONDEFAULT) {
appendOption(encoder, ELEM_CURRENTACTION, "conditionalexe", predicate ? "on" : "off",
"");
}
if (eliminateUnreachable != ELIMINATE_UNREACHABLE_OPTIONDEFAULT) {
appendOption(encoder, ELEM_CURRENTACTION, iface.getSimplificationStyle(), "unreachable",
eliminateUnreachable ? "on" : "off");
}
if (simplifyDoublePrecision != SIMPLIFY_DOUBLEPRECISION_OPTIONDEFAULT) {
appendOption(encoder, ELEM_CURRENTACTION, iface.getSimplificationStyle(),
"doubleprecis", simplifyDoublePrecision ? "on" : "off");
}
if (splitStructures != SPLITSTRUCTURES_OPTIONDEFAULT ||
splitArrays != SPLITARRAYS_OPTIONDEFAULT ||
splitPointers != SPLITPOINTERS_OPTIONDEFAULT) {
String p1 = splitStructures ? "struct" : "";
String p2 = splitArrays ? "array" : "";
String p3 = splitPointers ? "pointer" : "";
appendOption(encoder, ELEM_SPLITDATATYPE, p1, p2, p3);
}
appendOption(encoder, ELEM_READONLY, readOnly ? "on" : "off", "", "");
// Must set language early so that the object is in place before other option changes
appendOption(encoder, ELEM_SETLANGUAGE, displayLanguage.toString(), "", "");

View File

@ -421,5 +421,6 @@ public record ElementId(String name, int id) {
public static final ElementId ELEM_COMMAND_GETUSEROPNAME =
new ElementId("command_getuseropname", COMMAND_GETUSEROPNAME);
public static final ElementId ELEM_UNKNOWN = new ElementId("XMLunknown", 270);
public static final ElementId ELEM_SPLITDATATYPE = new ElementId("splitdatatype", 270);
public static final ElementId ELEM_UNKNOWN = new ElementId("XMLunknown", 271);
}