Merge remote-tracking branch 'origin/GT-2857_caheckman_impliedcasts'

This commit is contained in:
ghidravore 2019-05-10 12:41:05 -04:00
commit 13b434adfe
6 changed files with 177 additions and 56 deletions

View File

@ -394,6 +394,21 @@ string OptionNoCastPrinting::apply(Architecture *glb,const string &p1,const stri
return "No cast printing turned "+prop;
}
/// \class OptionHideExtensions
/// \brief Toggle whether implied extensions (ZEXT or SEXT) are printed
string OptionHideExtensions::apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const
{
bool val = onOrOff(p1);
PrintC *lng = dynamic_cast<PrintC *>(glb->print);
if (lng == (PrintC *)0)
return "Can only toggle extension hiding for C language";
lng->setHideImpliedExts(val);
string prop;
prop = val ? "on" : "off";
return "Implied extension hiding turned "+prop;
}
/// \class OptionMaxLineWidth
/// \brief Set the maximum number of characters per decompiled line
///

View File

@ -144,6 +144,12 @@ public:
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
class OptionHideExtensions : public ArchOption {
public:
OptionHideExtensions(void) { name="hideextensions"; } ///< Constructor
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
class OptionMaxLineWidth : public ArchOption {
public:
OptionMaxLineWidth(void) { name = "maxlinewidth"; } ///< Constructor

View File

@ -18,6 +18,7 @@
// Operator tokens for expressions
// token #in prec assoc optype space bump
OpToken PrintC::hidden = { "", 1, 70, false, OpToken::hiddenfunction, 0, 0, (OpToken *)0 };
OpToken PrintC::scope = { "::", 2, 70, true, OpToken::binary, 0, 0, (OpToken *)0 };
OpToken PrintC::object_member = { ".", 2, 66, true, OpToken::binary, 0, 0, (OpToken *)0 };
OpToken PrintC::pointer_member = { "->", 2, 66, true, OpToken::binary, 0, 0, (OpToken *)0 };
@ -98,6 +99,7 @@ PrintC::PrintC(Architecture *g,const string &nm) : PrintLanguage(g,nm)
option_convention = true;
option_nocasts = false;
option_unplaced = false;
option_hide_exts = true;
nullToken = "NULL";
// Set the flip tokens
@ -319,6 +321,21 @@ void PrintC::opTypeCast(const PcodeOp *op)
pushVnImplied(op->getIn(0),op,mods);
}
/// The syntax represents the given op using a function with one input,
/// where the function name is not printed. The input expression is simply printed
/// without adornment inside the larger expression, with one minor difference.
/// The hidden operator protects against confusing evaluation order between
/// the operators inside and outside the hidden function. If both the inside
/// and outside operators are the same associative token, the hidden token
/// makes sure the inner expression is surrounded with parentheses.
/// \param op is the given PcodeOp
void PrintC::opHiddenFunc(const PcodeOp *op)
{
pushOp(&hidden,op);
pushVnImplied(op->getIn(0),op,mods);
}
void PrintC::opCopy(const PcodeOp *op)
{
@ -577,8 +594,12 @@ void PrintC::opReturn(const PcodeOp *op)
void PrintC::opIntZext(const PcodeOp *op)
{
if (castStrategy->isZextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType()))
opTypeCast(op);
if (castStrategy->isZextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) {
if (isExtensionCastImplied(op))
opHiddenFunc(op);
else
opTypeCast(op);
}
else
opFunc(op);
}
@ -586,8 +607,12 @@ void PrintC::opIntZext(const PcodeOp *op)
void PrintC::opIntSext(const PcodeOp *op)
{
if (castStrategy->isSextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType()))
opTypeCast(op);
if (castStrategy->isSextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) {
if (isExtensionCastImplied(op))
opHiddenFunc(op);
else
opTypeCast(op);
}
else
opFunc(op);
}
@ -1247,6 +1272,60 @@ bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize
return res;
}
/// \brief Is the given ZEXT/SEXT cast implied by the expression its in
///
/// We know that the given ZEXT or SEXT op can be viewed as a natural \e cast operation.
/// Sometimes such a cast is implied by the expression its in, and the cast itself
/// doesn't need to be printed.
/// \param op is the given ZEXT or SEXT PcodeOp
/// \return \b true if the op as a cast does not need to be printed
bool PrintC::isExtensionCastImplied(const PcodeOp *op) const
{
if (!option_hide_exts)
return false; // If hiding extensions is not on, we must always print extension
const Varnode *outVn = op->getOut();
if (outVn->isExplicit()) {
}
else {
type_metatype metatype = outVn->getHigh()->getType()->getMetatype();
list<PcodeOp *>::const_iterator iter;
for(iter=outVn->beginDescend();iter!=outVn->endDescend();++iter) {
PcodeOp *expOp = *iter;
Varnode *otherVn;
int4 slot;
switch(expOp->code()) {
case CPUI_PTRADD:
break;
case CPUI_INT_ADD:
case CPUI_INT_SUB:
case CPUI_INT_MULT:
case CPUI_INT_DIV:
case CPUI_INT_AND:
case CPUI_INT_OR:
case CPUI_INT_XOR:
case CPUI_INT_LESS:
case CPUI_INT_LESSEQUAL:
case CPUI_INT_SLESS:
case CPUI_INT_SLESSEQUAL:
slot = expOp->getSlot(outVn);
otherVn = expOp->getIn(1-slot);
// Check if the expression involves an explicit variable of the right integer type
if (!otherVn->isExplicit())
return false;
if (otherVn->getHigh()->getType()->getMetatype() != metatype)
return false;
break;
default:
return false;
}
}
return true; // Everything is integer promotion
}
return false;
}
/// \brief Push a single character constant to the RPN stack
///
/// For C, a character constant is usually emitted as the character in single quotes.

View File

@ -62,6 +62,7 @@ struct PartialSymbolEntry {
/// - etc.
class PrintC : public PrintLanguage {
protected:
static OpToken hidden; ///< Hidden functional (that may force parentheses)
static OpToken scope; ///< The sub-scope/namespace operator
static OpToken object_member; ///< The \e member operator
static OpToken pointer_member; ///< The \e points \e to \e member operator
@ -116,6 +117,7 @@ protected:
bool option_convention; ///< Set to \b true if we should print calling convention
bool option_nocasts; ///< Don't print a cast if \b true
bool option_unplaced; ///< Set to \b true if we should display unplaced comments
bool option_hide_exts; ///< Set to \b true if we should hide implied extension operations
string nullToken; ///< Token to use for 'null'
CommentSorter commsorter; ///< Container/organizer for comments in the current function
@ -153,9 +155,11 @@ protected:
void emitCommentGroup(const PcodeOp *inst); ///< Emit comments associated with a given statement
void emitCommentFuncHeader(const Funcdata *fd); ///< Emit comments in the given function's header
void opFunc(const PcodeOp *op); ///< Push a \e functional expression based on the given p-code op to the RPN stack
void opTypeCast(const PcodeOp *op); ///< Pus the given p-code op using type-cast syntax to the RPN stack
void opTypeCast(const PcodeOp *op); ///< Push the given p-code op using type-cast syntax to the RPN stack
void opHiddenFunc(const PcodeOp *op); ///< Push the given p-code op as a hidden token
static bool hasCharTerminator(uint1 *buffer,int4 size,int4 charsize);
bool printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const;
bool isExtensionCastImplied(const PcodeOp *op) const;
virtual void pushConstant(uintb val,const Datatype *ct,
const Varnode *vn,const PcodeOp *op);
virtual bool pushEquate(uintb val,int4 sz,const EquateSymbol *sym,
@ -195,6 +199,7 @@ public:
void setCStyleComments(void) { setCommentDelimeter("/* "," */",false); } ///< Set c-style "/* */" comment delimiters
void setCPlusPlusStyleComments(void) { setCommentDelimeter("// ","",true); } ///< Set c++-style "//" comment delimiters
void setDisplayUnplaced(bool val) { option_unplaced = val; } ///< Toggle whether \e unplaced comments are displayed in the header
void setHideImpliedExts(bool val) { option_hide_exts = val; } ///< Toggle whether implied extensions are hidden
virtual ~PrintC(void) {}
virtual void adjustTypeOperators(void);
virtual void setCommentStyle(const string &nm);

View File

@ -52,55 +52,6 @@ PrintLanguageCapability *PrintLanguageCapability::findCapability(const string &n
return (PrintLanguageCapability *)0;
}
/// \brief Determine if the given token should be emitted in its own parenthetic expression
///
/// This token is being emitted. Check if its input expression, ending with the given
/// operator token, needs to be surrounded by parentheses to convey the proper meaning.
/// \param op2 is the input token to \b this operator
/// \param stage is the stage of \b this operator currently being printed
/// \return \b true if \b op2 (as input to \b this) should be parenthesized
bool OpToken::parentheses(const OpToken &op2,int4 stage) const
{
switch(type) {
case space:
case binary:
if (precedence > op2.precedence) return true;
if (precedence < op2.precedence) return false;
if (associative && (this == &op2)) return false;
// If operators are adjacent to each other, the
// operator printed first must be evaluated first
// In this case op2 must be evaluated first, so we
// check if it is printed first (in first stage of binary)
if ((op2.type==postsurround)&&(stage==0)) return false;
return true;
case unary_prefix:
if (precedence > op2.precedence) return true;
if (precedence < op2.precedence) return false;
// if (associative && (this == &op2)) return false;
if ((op2.type==unary_prefix)||(op2.type==presurround)) return false;
return true;
case postsurround:
if (stage==1) return false; // Inside the surround
if (precedence > op2.precedence) return true;
if (precedence < op2.precedence) return false;
// If the precedences are equal, we know this postsurround
// comes after, so op2 being first doesn't need parens
if ((op2.type==postsurround)||(op2.type==binary)) return false;
// if (associative && (this == &op2)) return false;
return true;
case presurround:
if (stage==0) return false; // Inside the surround
if (precedence > op2.precedence) return true;
if (precedence < op2.precedence) return false;
// if (associative && (this == &op2)) return false;
if ((op2.type==unary_prefix)||(op2.type==presurround)) return false;
return true;
}
return true;
}
/// \param g is the Architecture that owns and will use this PrintLanguage
/// \param nm is the formal name of the language
PrintLanguage::PrintLanguage(Architecture *g,const string &nm)
@ -178,7 +129,7 @@ void PrintLanguage::pushOp(const OpToken *tok,const PcodeOp *op)
}
else {
emitOp(revpol.back());
paren = revpol.back().tok->parentheses(*tok,revpol.back().visited);
paren = parentheses(tok);
if (paren)
id = emit->openParen('(');
else
@ -308,6 +259,68 @@ void PrintLanguage::pushVnLHS(const Varnode *vn,const PcodeOp *op)
}
}
/// The token at the top of the stack is being emitted. Check if its input expression,
/// ending with the given operator token, needs to be surrounded by parentheses to convey
/// the proper meaning.
/// \param op2 is the input token to \b this operator
/// \param stage is the stage of \b this operator currently being printed
/// \return \b true if \b op2 (as input to \b this) should be parenthesized
bool PrintLanguage::parentheses(const OpToken *op2)
{
ReversePolish &top( revpol.back() );
const OpToken *topToken = top.tok;
int4 stage = top.visited;
switch(topToken->type) {
case OpToken::space:
case OpToken::binary:
if (topToken->precedence > op2->precedence) return true;
if (topToken->precedence < op2->precedence) return false;
if (topToken->associative && (topToken == op2)) return false;
// If operators are adjacent to each other, the
// operator printed first must be evaluated first
// In this case op2 must be evaluated first, so we
// check if it is printed first (in first stage of binary)
if ((op2->type==OpToken::postsurround)&&(stage==0)) return false;
return true;
case OpToken::unary_prefix:
if (topToken->precedence > op2->precedence) return true;
if (topToken->precedence < op2->precedence) return false;
// if (associative && (this == &op2)) return false;
if ((op2->type==OpToken::unary_prefix)||(op2->type==OpToken::presurround)) return false;
return true;
case OpToken::postsurround:
if (stage==1) return false; // Inside the surround
if (topToken->precedence > op2->precedence) return true;
if (topToken->precedence < op2->precedence) return false;
// If the precedences are equal, we know this postsurround
// comes after, so op2 being first doesn't need parens
if ((op2->type==OpToken::postsurround)||(op2->type==OpToken::binary)) return false;
// if (associative && (this == &op2)) return false;
return true;
case OpToken::presurround:
if (stage==0) return false; // Inside the surround
if (topToken->precedence > op2->precedence) return true;
if (topToken->precedence < op2->precedence) return false;
// if (associative && (this == &op2)) return false;
if ((op2->type==OpToken::unary_prefix)||(op2->type==OpToken::presurround)) return false;
return true;
case OpToken::hiddenfunction:
if ((stage==0)&&(revpol.size() > 1)) { // If there is an unresolved previous token
// New token is printed next to the previous token.
const OpToken *prevToken = revpol[revpol.size()-2].tok;
if (prevToken->type != OpToken::binary && prevToken->type != OpToken::unary_prefix)
return false;
if (prevToken->precedence < op2->precedence) return false;
// If precedence is equal, make sure we don't treat two tokens as associative,
// i.e. we should have parentheses
}
return true;
}
return true;
}
/// An OpToken directly from the RPN is sent to the low-level emitter,
/// resolving any final spacing or parentheses.
/// \param entry is the RPN entry to be emitted
@ -351,6 +364,8 @@ void PrintLanguage::emitOp(const ReversePolish &entry)
if (entry.visited != 1) return;
emit->spaces(entry.tok->spacing,entry.tok->bump);
break;
case OpToken::hiddenfunction:
return; // Never directly prints anything
}
}

View File

@ -87,6 +87,7 @@ public:
postsurround, ///< Function or array operator form
presurround, ///< Modifier form (like a cast operation)
space, ///< No explicitly printed token
hiddenfunction ///< Operation that isn't explicitly printed
};
const char *print; ///< Printing characters for the token
int4 stage; ///< Additional elements consumed from the RPN stack when emitting this token
@ -96,7 +97,6 @@ public:
int4 spacing; ///< Spaces to print around operator
int4 bump; ///< Spaces to indent if we break here
OpToken *negate; ///< The token representing the negation of this token
bool parentheses(const OpToken &op2,int4 stage) const;
};
/// \brief The base class API for emitting a high-level language
@ -263,6 +263,7 @@ protected:
void pushVnExplicit(const Varnode *vn,const PcodeOp *op); ///< Push an explicit variable onto the RPN stack
void pushVnLHS(const Varnode *vn,const PcodeOp *op); ///< Push a variable as the left-hand side of an expression
bool parentheses(const OpToken *op2); ///< Determine if the given token should be emitted in its own parenthetic expression
void emitOp(const ReversePolish &entry); ///< Send an operator token from the RPN to the emitter
void emitAtom(const Atom &atom); ///< Send an variable token from the RPN to the emitter
static bool unicodeNeedsEscape(int4 codepoint); ///< Determine if the given codepoint needs to be escaped