diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc index f0b2a4f460..d998c5ec7f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc @@ -394,6 +394,21 @@ string OptionNoCastPrinting::apply(Architecture *glb,const string &p1,const stri return "No cast printing turned "+prop; } +/// \class OptionHideExtensions +/// \brief Toggle whether implied extensions (ZEXT or SEXT) are printed +string OptionHideExtensions::apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const + +{ + bool val = onOrOff(p1); + PrintC *lng = dynamic_cast(glb->print); + if (lng == (PrintC *)0) + return "Can only toggle extension hiding for C language"; + lng->setHideImpliedExts(val); + string prop; + prop = val ? "on" : "off"; + return "Implied extension hiding turned "+prop; +} + /// \class OptionMaxLineWidth /// \brief Set the maximum number of characters per decompiled line /// diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh index 372d6eb83e..ad166b1289 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh @@ -144,6 +144,12 @@ public: virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const; }; +class OptionHideExtensions : public ArchOption { +public: + OptionHideExtensions(void) { name="hideextensions"; } ///< Constructor + virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const; +}; + class OptionMaxLineWidth : public ArchOption { public: OptionMaxLineWidth(void) { name = "maxlinewidth"; } ///< Constructor diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index 7c3fa0f073..3d6e4a6d9f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -18,6 +18,7 @@ // Operator tokens for expressions // token #in prec assoc optype space bump +OpToken PrintC::hidden = { "", 1, 70, false, OpToken::hiddenfunction, 0, 0, (OpToken *)0 }; OpToken PrintC::scope = { "::", 2, 70, true, OpToken::binary, 0, 0, (OpToken *)0 }; OpToken PrintC::object_member = { ".", 2, 66, true, OpToken::binary, 0, 0, (OpToken *)0 }; OpToken PrintC::pointer_member = { "->", 2, 66, true, OpToken::binary, 0, 0, (OpToken *)0 }; @@ -98,6 +99,7 @@ PrintC::PrintC(Architecture *g,const string &nm) : PrintLanguage(g,nm) option_convention = true; option_nocasts = false; option_unplaced = false; + option_hide_exts = true; nullToken = "NULL"; // Set the flip tokens @@ -319,6 +321,21 @@ void PrintC::opTypeCast(const PcodeOp *op) pushVnImplied(op->getIn(0),op,mods); } +/// The syntax represents the given op using a function with one input, +/// where the function name is not printed. The input expression is simply printed +/// without adornment inside the larger expression, with one minor difference. +/// The hidden operator protects against confusing evaluation order between +/// the operators inside and outside the hidden function. If both the inside +/// and outside operators are the same associative token, the hidden token +/// makes sure the inner expression is surrounded with parentheses. +/// \param op is the given PcodeOp +void PrintC::opHiddenFunc(const PcodeOp *op) + +{ + pushOp(&hidden,op); + pushVnImplied(op->getIn(0),op,mods); +} + void PrintC::opCopy(const PcodeOp *op) { @@ -577,8 +594,12 @@ void PrintC::opReturn(const PcodeOp *op) void PrintC::opIntZext(const PcodeOp *op) { - if (castStrategy->isZextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) - opTypeCast(op); + if (castStrategy->isZextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) { + if (isExtensionCastImplied(op)) + opHiddenFunc(op); + else + opTypeCast(op); + } else opFunc(op); } @@ -586,8 +607,12 @@ void PrintC::opIntZext(const PcodeOp *op) void PrintC::opIntSext(const PcodeOp *op) { - if (castStrategy->isSextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) - opTypeCast(op); + if (castStrategy->isSextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) { + if (isExtensionCastImplied(op)) + opHiddenFunc(op); + else + opTypeCast(op); + } else opFunc(op); } @@ -1247,6 +1272,60 @@ bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize return res; } +/// \brief Is the given ZEXT/SEXT cast implied by the expression its in +/// +/// We know that the given ZEXT or SEXT op can be viewed as a natural \e cast operation. +/// Sometimes such a cast is implied by the expression its in, and the cast itself +/// doesn't need to be printed. +/// \param op is the given ZEXT or SEXT PcodeOp +/// \return \b true if the op as a cast does not need to be printed +bool PrintC::isExtensionCastImplied(const PcodeOp *op) const + +{ + if (!option_hide_exts) + return false; // If hiding extensions is not on, we must always print extension + const Varnode *outVn = op->getOut(); + if (outVn->isExplicit()) { + + } + else { + type_metatype metatype = outVn->getHigh()->getType()->getMetatype(); + list::const_iterator iter; + for(iter=outVn->beginDescend();iter!=outVn->endDescend();++iter) { + PcodeOp *expOp = *iter; + Varnode *otherVn; + int4 slot; + switch(expOp->code()) { + case CPUI_PTRADD: + break; + case CPUI_INT_ADD: + case CPUI_INT_SUB: + case CPUI_INT_MULT: + case CPUI_INT_DIV: + case CPUI_INT_AND: + case CPUI_INT_OR: + case CPUI_INT_XOR: + case CPUI_INT_LESS: + case CPUI_INT_LESSEQUAL: + case CPUI_INT_SLESS: + case CPUI_INT_SLESSEQUAL: + slot = expOp->getSlot(outVn); + otherVn = expOp->getIn(1-slot); + // Check if the expression involves an explicit variable of the right integer type + if (!otherVn->isExplicit()) + return false; + if (otherVn->getHigh()->getType()->getMetatype() != metatype) + return false; + break; + default: + return false; + } + } + return true; // Everything is integer promotion + } + return false; +} + /// \brief Push a single character constant to the RPN stack /// /// For C, a character constant is usually emitted as the character in single quotes. diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh index d65e9be483..65f67031ce 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh @@ -62,6 +62,7 @@ struct PartialSymbolEntry { /// - etc. class PrintC : public PrintLanguage { protected: + static OpToken hidden; ///< Hidden functional (that may force parentheses) static OpToken scope; ///< The sub-scope/namespace operator static OpToken object_member; ///< The \e member operator static OpToken pointer_member; ///< The \e points \e to \e member operator @@ -116,6 +117,7 @@ protected: bool option_convention; ///< Set to \b true if we should print calling convention bool option_nocasts; ///< Don't print a cast if \b true bool option_unplaced; ///< Set to \b true if we should display unplaced comments + bool option_hide_exts; ///< Set to \b true if we should hide implied extension operations string nullToken; ///< Token to use for 'null' CommentSorter commsorter; ///< Container/organizer for comments in the current function @@ -153,9 +155,11 @@ protected: void emitCommentGroup(const PcodeOp *inst); ///< Emit comments associated with a given statement void emitCommentFuncHeader(const Funcdata *fd); ///< Emit comments in the given function's header void opFunc(const PcodeOp *op); ///< Push a \e functional expression based on the given p-code op to the RPN stack - void opTypeCast(const PcodeOp *op); ///< Pus the given p-code op using type-cast syntax to the RPN stack + void opTypeCast(const PcodeOp *op); ///< Push the given p-code op using type-cast syntax to the RPN stack + void opHiddenFunc(const PcodeOp *op); ///< Push the given p-code op as a hidden token static bool hasCharTerminator(uint1 *buffer,int4 size,int4 charsize); bool printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const; + bool isExtensionCastImplied(const PcodeOp *op) const; virtual void pushConstant(uintb val,const Datatype *ct, const Varnode *vn,const PcodeOp *op); virtual bool pushEquate(uintb val,int4 sz,const EquateSymbol *sym, @@ -195,6 +199,7 @@ public: void setCStyleComments(void) { setCommentDelimeter("/* "," */",false); } ///< Set c-style "/* */" comment delimiters void setCPlusPlusStyleComments(void) { setCommentDelimeter("// ","",true); } ///< Set c++-style "//" comment delimiters void setDisplayUnplaced(bool val) { option_unplaced = val; } ///< Toggle whether \e unplaced comments are displayed in the header + void setHideImpliedExts(bool val) { option_hide_exts = val; } ///< Toggle whether implied extensions are hidden virtual ~PrintC(void) {} virtual void adjustTypeOperators(void); virtual void setCommentStyle(const string &nm); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc index 94fe032e51..d91c59a0c9 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc @@ -52,55 +52,6 @@ PrintLanguageCapability *PrintLanguageCapability::findCapability(const string &n return (PrintLanguageCapability *)0; } -/// \brief Determine if the given token should be emitted in its own parenthetic expression -/// -/// This token is being emitted. Check if its input expression, ending with the given -/// operator token, needs to be surrounded by parentheses to convey the proper meaning. -/// \param op2 is the input token to \b this operator -/// \param stage is the stage of \b this operator currently being printed -/// \return \b true if \b op2 (as input to \b this) should be parenthesized -bool OpToken::parentheses(const OpToken &op2,int4 stage) const - -{ - switch(type) { - case space: - case binary: - if (precedence > op2.precedence) return true; - if (precedence < op2.precedence) return false; - if (associative && (this == &op2)) return false; - // If operators are adjacent to each other, the - // operator printed first must be evaluated first - // In this case op2 must be evaluated first, so we - // check if it is printed first (in first stage of binary) - if ((op2.type==postsurround)&&(stage==0)) return false; - return true; - case unary_prefix: - if (precedence > op2.precedence) return true; - if (precedence < op2.precedence) return false; - // if (associative && (this == &op2)) return false; - if ((op2.type==unary_prefix)||(op2.type==presurround)) return false; - return true; - case postsurround: - if (stage==1) return false; // Inside the surround - if (precedence > op2.precedence) return true; - if (precedence < op2.precedence) return false; - // If the precedences are equal, we know this postsurround - // comes after, so op2 being first doesn't need parens - if ((op2.type==postsurround)||(op2.type==binary)) return false; - // if (associative && (this == &op2)) return false; - return true; - case presurround: - if (stage==0) return false; // Inside the surround - if (precedence > op2.precedence) return true; - if (precedence < op2.precedence) return false; - // if (associative && (this == &op2)) return false; - if ((op2.type==unary_prefix)||(op2.type==presurround)) return false; - return true; - } - - return true; -} - /// \param g is the Architecture that owns and will use this PrintLanguage /// \param nm is the formal name of the language PrintLanguage::PrintLanguage(Architecture *g,const string &nm) @@ -178,7 +129,7 @@ void PrintLanguage::pushOp(const OpToken *tok,const PcodeOp *op) } else { emitOp(revpol.back()); - paren = revpol.back().tok->parentheses(*tok,revpol.back().visited); + paren = parentheses(tok); if (paren) id = emit->openParen('('); else @@ -308,6 +259,68 @@ void PrintLanguage::pushVnLHS(const Varnode *vn,const PcodeOp *op) } } +/// The token at the top of the stack is being emitted. Check if its input expression, +/// ending with the given operator token, needs to be surrounded by parentheses to convey +/// the proper meaning. +/// \param op2 is the input token to \b this operator +/// \param stage is the stage of \b this operator currently being printed +/// \return \b true if \b op2 (as input to \b this) should be parenthesized +bool PrintLanguage::parentheses(const OpToken *op2) + +{ + ReversePolish &top( revpol.back() ); + const OpToken *topToken = top.tok; + int4 stage = top.visited; + switch(topToken->type) { + case OpToken::space: + case OpToken::binary: + if (topToken->precedence > op2->precedence) return true; + if (topToken->precedence < op2->precedence) return false; + if (topToken->associative && (topToken == op2)) return false; + // If operators are adjacent to each other, the + // operator printed first must be evaluated first + // In this case op2 must be evaluated first, so we + // check if it is printed first (in first stage of binary) + if ((op2->type==OpToken::postsurround)&&(stage==0)) return false; + return true; + case OpToken::unary_prefix: + if (topToken->precedence > op2->precedence) return true; + if (topToken->precedence < op2->precedence) return false; + // if (associative && (this == &op2)) return false; + if ((op2->type==OpToken::unary_prefix)||(op2->type==OpToken::presurround)) return false; + return true; + case OpToken::postsurround: + if (stage==1) return false; // Inside the surround + if (topToken->precedence > op2->precedence) return true; + if (topToken->precedence < op2->precedence) return false; + // If the precedences are equal, we know this postsurround + // comes after, so op2 being first doesn't need parens + if ((op2->type==OpToken::postsurround)||(op2->type==OpToken::binary)) return false; + // if (associative && (this == &op2)) return false; + return true; + case OpToken::presurround: + if (stage==0) return false; // Inside the surround + if (topToken->precedence > op2->precedence) return true; + if (topToken->precedence < op2->precedence) return false; + // if (associative && (this == &op2)) return false; + if ((op2->type==OpToken::unary_prefix)||(op2->type==OpToken::presurround)) return false; + return true; + case OpToken::hiddenfunction: + if ((stage==0)&&(revpol.size() > 1)) { // If there is an unresolved previous token + // New token is printed next to the previous token. + const OpToken *prevToken = revpol[revpol.size()-2].tok; + if (prevToken->type != OpToken::binary && prevToken->type != OpToken::unary_prefix) + return false; + if (prevToken->precedence < op2->precedence) return false; + // If precedence is equal, make sure we don't treat two tokens as associative, + // i.e. we should have parentheses + } + return true; + } + + return true; +} + /// An OpToken directly from the RPN is sent to the low-level emitter, /// resolving any final spacing or parentheses. /// \param entry is the RPN entry to be emitted @@ -351,6 +364,8 @@ void PrintLanguage::emitOp(const ReversePolish &entry) if (entry.visited != 1) return; emit->spaces(entry.tok->spacing,entry.tok->bump); break; + case OpToken::hiddenfunction: + return; // Never directly prints anything } } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh index 433ab7a75b..cf307ae772 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh @@ -87,6 +87,7 @@ public: postsurround, ///< Function or array operator form presurround, ///< Modifier form (like a cast operation) space, ///< No explicitly printed token + hiddenfunction ///< Operation that isn't explicitly printed }; const char *print; ///< Printing characters for the token int4 stage; ///< Additional elements consumed from the RPN stack when emitting this token @@ -96,7 +97,6 @@ public: int4 spacing; ///< Spaces to print around operator int4 bump; ///< Spaces to indent if we break here OpToken *negate; ///< The token representing the negation of this token - bool parentheses(const OpToken &op2,int4 stage) const; }; /// \brief The base class API for emitting a high-level language @@ -263,6 +263,7 @@ protected: void pushVnExplicit(const Varnode *vn,const PcodeOp *op); ///< Push an explicit variable onto the RPN stack void pushVnLHS(const Varnode *vn,const PcodeOp *op); ///< Push a variable as the left-hand side of an expression + bool parentheses(const OpToken *op2); ///< Determine if the given token should be emitted in its own parenthetic expression void emitOp(const ReversePolish &entry); ///< Send an operator token from the RPN to the emitter void emitAtom(const Atom &atom); ///< Send an variable token from the RPN to the emitter static bool unicodeNeedsEscape(int4 codepoint); ///< Determine if the given codepoint needs to be escaped