From 955838cd62af134fe698cc26173e0e4c2d98f22b Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Wed, 5 Jan 2022 19:32:44 -0500 Subject: [PATCH] GP-1642 Display formats for data-types --- .../Decompiler/certification.manifest | 1 + .../Decompiler/src/decompile/cpp/cast.hh | 11 +++ .../Decompiler/src/decompile/cpp/database.cc | 44 +++------ .../Decompiler/src/decompile/cpp/grammar.cc | 7 +- .../Decompiler/src/decompile/cpp/grammar.y | 7 +- .../src/decompile/cpp/ifacedecomp.cc | 98 +++++++++---------- .../src/decompile/cpp/ifacedecomp.hh | 5 +- .../Decompiler/src/decompile/cpp/printc.cc | 20 ++-- .../Decompiler/src/decompile/cpp/type.cc | 82 +++++++++++++++- .../Decompiler/src/decompile/cpp/type.hh | 41 ++++++-- .../src/decompile/datatests/displayformat.xml | 36 +++++++ .../model/pcode/PcodeDataTypeManager.java | 13 +++ 12 files changed, 266 insertions(+), 99 deletions(-) create mode 100644 Ghidra/Features/Decompiler/src/decompile/datatests/displayformat.xml diff --git a/Ghidra/Features/Decompiler/certification.manifest b/Ghidra/Features/Decompiler/certification.manifest index 37ce688c67..16b0d4fb30 100644 --- a/Ghidra/Features/Decompiler/certification.manifest +++ b/Ghidra/Features/Decompiler/certification.manifest @@ -13,6 +13,7 @@ src/decompile/cpp/Makefile||GHIDRA||||END| src/decompile/datatests/convert.xml||GHIDRA||||END| src/decompile/datatests/deadvolatile.xml||GHIDRA||||END| src/decompile/datatests/deindirect.xml||GHIDRA||||END| +src/decompile/datatests/displayformat.xml||GHIDRA||||END| src/decompile/datatests/dupptr.xml||GHIDRA||||END| src/decompile/datatests/elseif.xml||GHIDRA||||END| src/decompile/datatests/floatprint.xml||GHIDRA||||END| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh index 86d3bfd5c8..3db280331e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh @@ -159,6 +159,17 @@ public: /// \brief Check is a constant input should be explicitly labeled as a \e long integer token bool markExplicitLongSize(PcodeOp *op,int4 slot) const; + + /// \brief For the given PcodeOp, does it matter if a constant operand is presented as a character or integer + /// + /// In most languages, character constants are promoted to integers as a matter of course, so it + /// doesn't matter if the constant is represented as an integer (a string of digits) or a character + /// (surrounded by quotes). But its possible that a particular operator does care. If the operator + /// needs an explicit character representation for an operand with a character data-type, return \b true. + /// \param vn is the constant with character data-type + /// \param op is the given PcodeOp which reads the constant (may be null) + /// \return \b true if the constant must be represented as an explicit character + bool caresAboutCharRepresentation(const Varnode *vn,const PcodeOp *op) const { return false; } }; /// \brief Casting strategies that are specific to the C language diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc index 498d4aef1e..c71e554be6 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc @@ -375,18 +375,8 @@ void Symbol::saveXmlHeader(ostream &s) const int4 format = getDisplayFormat(); if (format != 0) { s << " format=\""; - if (format == force_hex) - s << "hex\""; - else if (format == force_dec) - s << "dec\""; - else if (format == force_char) - s << "char\""; - else if (format == force_oct) - s << "oct\""; - else if (format == force_bin) - s << "bin\""; - else - s << "hex\""; + s << Datatype::decodeIntegerFormat(format); + s << '\"'; } a_v_i(s,"cat",category); if (category >= 0) @@ -413,16 +403,7 @@ void Symbol::restoreXmlHeader(const Element *el) case 'f': if (attName == "format") { const string &formString(el->getAttributeValue(i)); - if (formString == "hex") - dispflags |= force_hex; - else if (formString == "dec") - dispflags |= force_dec; - else if (formString == "char") - dispflags |= force_char; - else if (formString == "oct") - dispflags |= force_oct; - else if (formString == "bin") - dispflags |= force_bin; + dispflags |= Datatype::encodeIntegerFormat(formString); } break; case 'h': @@ -3088,9 +3069,9 @@ Scope *Database::resolveScope(uint8 id) const /// /// The name is parsed using a \b delimiter that is passed in. The name can /// be only partially qualified by passing in a starting Scope, which the -/// name is assumed to be relative to. Otherwise the name is assumed to be -/// relative to the global Scope. The unqualified (base) name of the Symbol -/// is passed back to the caller. +/// name is assumed to be relative to. If the starting scope is \b null, or the name +/// starts with the delimiter, the name is assumed to be relative to the global Scope. +/// The unqualified (base) name of the Symbol is passed back to the caller. /// \param fullname is the qualified Symbol name /// \param delim is the delimiter separating names /// \param basename will hold the passed back base Symbol name @@ -3107,10 +3088,15 @@ Scope *Database::resolveScopeFromSymbolName(const string &fullname,const string for(;;) { endmark = fullname.find(delim,mark); if (endmark == string::npos) break; - string scopename = fullname.substr(mark,endmark-mark); - start = start->resolveScope(scopename,idByNameHash); - if (start == (Scope *)0) // Was the scope name bad - return start; + if (endmark == 0) { // Path is "absolute" + start = globalscope; // Start from the global scope + } + else { + string scopename = fullname.substr(mark,endmark-mark); + start = start->resolveScope(scopename,idByNameHash); + if (start == (Scope *)0) // Was the scope name bad + return start; + } mark = endmark + delim.size(); } basename = fullname.substr(mark,endmark); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc index 08f6e9429c..aab637f398 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc @@ -3191,7 +3191,12 @@ void parse_C(Architecture *glb,istream &s) Datatype *ct = decl->buildType(glb); if (decl->getIdentifier().size() == 0) throw ParseError("Missing identifier for typedef"); - glb->types->setName(ct,decl->getIdentifier()); + if (ct->getMetatype() == TYPE_STRUCT) { + glb->types->setName(ct,decl->getIdentifier()); + } + else { + glb->types->getTypedef(ct,decl->getIdentifier(),0,0); + } } else if (decl->getBaseType()->getMetatype()==TYPE_STRUCT) { // We parsed a struct, treat as a typedef diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y index 3de9192312..f210843fd4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y @@ -1406,7 +1406,12 @@ void parse_C(Architecture *glb,istream &s) Datatype *ct = decl->buildType(glb); if (decl->getIdentifier().size() == 0) throw ParseError("Missing identifier for typedef"); - glb->types->setName(ct,decl->getIdentifier()); + if (ct->getMetatype() == TYPE_STRUCT) { + glb->types->setName(ct,decl->getIdentifier()); + } + else { + glb->types->getTypedef(ct,decl->getIdentifier(),0,0); + } } else if (decl->getBaseType()->getMetatype()==TYPE_STRUCT) { // We parsed a struct, treat as a typedef diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc index 7a264d0b9e..5b1d265771 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc @@ -37,7 +37,6 @@ void IfaceDecompCapability::registerCommands(IfaceStatus *status) status->registerCom(new IfcComment(),"%"); //Note: A space must follow this when used. status->registerCom(new IfcQuit(),"quit"); status->registerCom(new IfcHistory(),"history"); - status->registerCom(new IfcOpenfile(),"openfile"); status->registerCom(new IfcOpenfile(),"openfile", "write"); status->registerCom(new IfcOpenfileAppend(),"openfile","append"); status->registerCom(new IfcClosefile(),"closefile"); @@ -63,8 +62,8 @@ void IfaceDecompCapability::registerCommands(IfaceStatus *status) status->registerCom(new IfcDump(),"dump"); status->registerCom(new IfcDumpbinary(),"binary"); status->registerCom(new IfcForcegoto(),"force","goto"); - status->registerCom(new IfcForceHex(),"force","hex"); - status->registerCom(new IfcForceDec(),"force","dec"); + status->registerCom(new IfcForceFormat(),"force","varnode"); + status->registerCom(new IfcForceDatatypeFormat(),"force","datatype"); status->registerCom(new IfcProtooverride(),"override","prototype"); status->registerCom(new IfcJumpOverride(),"override","jumptable"); status->registerCom(new IfcFlowOverride(),"override","flow"); @@ -1254,10 +1253,7 @@ void IfcRename::execute(istream &s) Symbol *sym; vector symList; - if (dcp->fd != (Funcdata *)0) - dcp->fd->getScopeLocal()->queryByName(oldname,symList); - else - dcp->conf->symboltab->getGlobalScope()->queryByName(oldname,symList); + dcp->readSymbol(oldname,symList); if (symList.empty()) throw IfaceExecutionError("No symbol named: "+oldname); @@ -1273,7 +1269,7 @@ void IfcRename::execute(istream &s) } /// \class IfcRemove -/// \brief Remove a symbol by name: `remove ` +/// \brief Remove a symbol by name: `remove ` /// /// The symbol is searched for starting in the current function's scope. /// The resulting symbol is removed completely from the symbol table. @@ -1287,10 +1283,7 @@ void IfcRemove::execute(istream &s) throw IfaceParseError("Missing symbol name"); vector symList; - if (dcp->fd != (Funcdata *)0) - dcp->fd->getScopeLocal()->queryByName(name,symList); - else - dcp->conf->symboltab->getGlobalScope()->queryByName(name,symList); + dcp->readSymbol(name,symList); if (symList.empty()) throw IfaceExecutionError("No symbol named: "+name); @@ -1300,7 +1293,7 @@ void IfcRemove::execute(istream &s) } /// \class IfcRetype -/// \brief Change the data-type of a symbol: `retype ` +/// \brief Change the data-type of a symbol: `retype ` /// /// The symbol is searched for by name starting in the current function's scope. /// If the type declaration includes a new name for the variable, the @@ -1318,10 +1311,7 @@ void IfcRetype::execute(istream &s) Symbol *sym; vector symList; - if (dcp->fd != (Funcdata *)0) - dcp->fd->getScopeLocal()->queryByName(name,symList); - else - dcp->conf->symboltab->getGlobalScope()->queryByName(name,symList); + dcp->readSymbol(name,symList); if (symList.empty()) throw IfaceExecutionError("No symbol named: "+name); @@ -1414,6 +1404,21 @@ Varnode *IfaceDecompData::readVarnode(istream &s) return vn; } +/// Find any symbols matching the given name in the current scope. Scope is either the +/// current function scope if a function is active, otherwise the global scope. +/// \param name is the given name, either absolute or partial +/// \param res will hold any matching symbols +void IfaceDecompData::readSymbol(const string &name,vector &res) + +{ + Scope *scope = (fd == (Funcdata *)0) ? conf->symboltab->getGlobalScope() : fd->getScopeLocal(); + string basename; + scope = conf->symboltab->resolveScopeFromSymbolName(name, "::", basename, scope); + if (scope == (Scope *)0) + throw IfaceParseError("Bad namespace for symbol: " + name); + scope->queryByName(basename,res); +} + /// \class IfcPrintVarnode /// \brief Print information about a Varnode: `print varnode ` /// @@ -1644,56 +1649,51 @@ void IfcTypeVarnode::execute(istream &s) *status->fileoptr << " to scope " << scope->getFullName() << endl; } -/// \class IfcForceHex -/// \brief Mark a constant to be printed in hex format: `force hex ` +/// \class IfcForceFormat +/// \brief Mark a constant to be printed in a specific format: `force varnode [hex|dec|oct|bin|char]` /// -/// A selected constant Varnode in the \e current function is marked so -/// that it will be printed in hexadecimal format in decompiler output. -void IfcForceHex::execute(istream &s) +/// A constant Varnode in the \e current function is marked so that is forced +/// to print in one of the formats: \b hex, \b dec, \b oct, \b bin, \b char. +void IfcForceFormat::execute(istream &s) { - if (dcp->fd == (Funcdata *)0) - throw IfaceExecutionError("No function selected"); - Varnode *vn = dcp->readVarnode(s); if (!vn->isConstant()) - throw IfaceExecutionError("Can only force hex on a constant"); + throw IfaceExecutionError("Can only force format on a constant"); type_metatype mt = vn->getType()->getMetatype(); if ((mt!=TYPE_INT)&&(mt!=TYPE_UINT)&&(mt!=TYPE_UNKNOWN)) - throw IfaceExecutionError("Can only force hex on integer type constant"); + throw IfaceExecutionError("Can only force format on integer type constant"); dcp->fd->buildDynamicSymbol(vn); Symbol *sym = vn->getHigh()->getSymbol(); if (sym == (Symbol *)0) throw IfaceExecutionError("Unable to create symbol"); - sym->getScope()->setDisplayFormat(sym,Symbol::force_hex); + string formatString; + s >> ws >> formatString; + uint4 format = Datatype::encodeIntegerFormat(formatString); + sym->getScope()->setDisplayFormat(sym,format); sym->getScope()->setAttribute(sym,Varnode::typelock); - *status->optr << "Successfully forced hex display" << endl; + *status->optr << "Successfully forced format display" << endl; } -/// \class IfcForceDec -/// \brief Mark a constant to be printed in decimal format: `force dec ` +/// \class IfcForceDatatypeFormat +/// \brief Mark constants of a data-type to be printed in a specific format: `force datatype [hex|dec|oct|bin|char]` /// -/// A selected constant Varnode in the \e current function is marked so -/// that it will be printed in decimal format in decompiler output. -void IfcForceDec::execute(istream &s) +/// A display format attribute is set on the indicated data-type. +void IfcForceDatatypeFormat::execute(istream &s) { - if (dcp->fd == (Funcdata *)0) - throw IfaceExecutionError("No function selected"); + Datatype *dt; - Varnode *vn = dcp->readVarnode(s); - if (!vn->isConstant()) - throw IfaceExecutionError("Can only force hex on a constant"); - type_metatype mt = vn->getType()->getMetatype(); - if ((mt!=TYPE_INT)&&(mt!=TYPE_UINT)&&(mt!=TYPE_UNKNOWN)) - throw IfaceExecutionError("Can only force dec on integer type constant"); - dcp->fd->buildDynamicSymbol(vn); - Symbol *sym = vn->getHigh()->getSymbol(); - if (sym == (Symbol *)0) - throw IfaceExecutionError("Unable to create symbol"); - sym->getScope()->setDisplayFormat(sym,Symbol::force_dec); - sym->getScope()->setAttribute(sym,Varnode::typelock); - *status->optr << "Successfully forced dec display" << endl; + string typeName; + s >> ws >> typeName; + dt = dcp->conf->types->findByName(typeName); + if (dt == (Datatype *)0) + throw IfaceExecutionError("Unknown data-type: " + typeName); + string formatString; + s >> ws >> formatString; + uint4 format = Datatype::encodeIntegerFormat(formatString); + dcp->conf->types->setDisplayFormat(dt, format); + *status->optr << "Successfully forced data-type display" << endl; } /// \class IfcForcegoto diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh index 3509f332ed..37264d20c3 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh @@ -59,6 +59,7 @@ public: void clearArchitecture(void); ///< Free all resources for the current architecture/program void followFlow(ostream &s,int4 size); Varnode *readVarnode(istream &s); ///< Read a varnode from the given stream + void readSymbol(const string &name,vector &res); ///< Find a symbol by name }; /// \brief Disassembly emitter that prints to a console stream @@ -352,12 +353,12 @@ public: virtual void execute(istream &s); }; -class IfcForceHex : public IfaceDecompCommand { +class IfcForceFormat : public IfaceDecompCommand { public: virtual void execute(istream &s); }; -class IfcForceDec : public IfaceDecompCommand { +class IfcForceDatatypeFormat : public IfaceDecompCommand { public: virtual void execute(istream &s); }; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index be166a5708..76dada0082 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -1158,7 +1158,8 @@ void PrintC::push_integer(uintb val,int4 sz,bool sign, force_unsigned_token = false; force_sized_token = false; if ((vn != (const Varnode *)0)&&(!vn->isAnnotation())) { - Symbol *sym = vn->getHigh()->getSymbol(); + HighVariable *high = vn->getHigh(); + Symbol *sym = high->getSymbol(); if (sym != (Symbol *)0) { if (sym->isNameLocked() && (sym->getCategory() == Symbol::equate)) { if (pushEquate(val,sz,(EquateSymbol *)sym,vn,op)) @@ -1168,6 +1169,8 @@ void PrintC::push_integer(uintb val,int4 sz,bool sign, } force_unsigned_token = vn->isUnsignedPrint(); force_sized_token = vn->isLongPrint(); + if (displayFormat == 0) // The symbol's formatting overrides any formatting on the data-type + displayFormat = high->getType()->getDisplayFormat(); } if (sign && displayFormat != Symbol::force_char) { // Print the constant as signed uintb mask = calc_mask(sz); @@ -1476,17 +1479,22 @@ void PrintC::pushCharConstant(uintb val,const Datatype *ct,const Varnode *vn,con uint4 displayFormat = 0; bool isSigned = (ct->getMetatype() == TYPE_INT); if ((vn != (const Varnode *)0)&&(!vn->isAnnotation())) { - Symbol *sym = vn->getHigh()->getSymbol(); + HighVariable *high = vn->getHigh(); + Symbol *sym = high->getSymbol(); if (sym != (Symbol *)0) { if (sym->isNameLocked() && (sym->getCategory() == Symbol::equate)) { if (pushEquate(val,vn->getSize(),(EquateSymbol *)sym,vn,op)) return; } displayFormat = sym->getDisplayFormat(); - if (displayFormat == Symbol::force_bin || displayFormat == Symbol::force_dec || displayFormat == Symbol::force_oct) { - push_integer(val, ct->getSize(), isSigned, vn, op); - return; - } + } + if (displayFormat == 0) + displayFormat = high->getType()->getDisplayFormat(); + } + if (displayFormat != 0 && displayFormat != Symbol::force_char) { + if (!castStrategy->caresAboutCharRepresentation(vn, op)) { + push_integer(val, ct->getSize(), isSigned, vn, op); + return; } } if ((ct->getSize()==1)&&(val >= 0x80)) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index 4517d9fb1e..97134636cb 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -316,6 +316,9 @@ void Datatype::saveXmlBasic(type_metatype meta,ostream &s) const a_v_b(s,"varlength",true); if ((flags & opaque_string)!=0) a_v_b(s,"opaquestring",true); + uint4 format = getDisplayFormat(); + if (format != 0) + a_v(s,"format",decodeIntegerFormat(format)); } /// Write a simple reference to \b this data-type as an XML \ tag, @@ -350,6 +353,9 @@ void Datatype::saveXmlTypedef(ostream &s) const s << ""; typedefImm->saveXmlRef(s); s << ""; @@ -445,6 +451,10 @@ void Datatype::restoreXmlBasic(const Element *el) if (xml_readbool(el->getAttributeValue(i))) flags |= opaque_string; } + else if (attribName == "format") { + uint4 val = encodeIntegerFormat(el->getAttributeValue(i)); + setDisplayFormat(val); + } } if ((id==0)&&(name.size()>0)) // If there is a type name id = hashName(name); // There must be some kind of id @@ -488,6 +498,53 @@ uint8 Datatype::hashSize(uint8 id,int4 size) id ^= sizeHash; return id; } +/// \brief Encode the \b format attribute from an XML element +/// +/// Possible values are: +/// - 1 - \b hex +/// - 2 - \b dec +/// - 3 - \b oct +/// - 4 - \b bin +/// - 5 - \b char +/// +/// \param val is the string to encode +/// \return the encoded value +uint4 Datatype::encodeIntegerFormat(const string &val) + +{ + if (val == "hex") + return 1; + else if (val == "dec") + return 2; + else if (val == "oct") + return 3; + else if (val == "bin") + return 4; + else if (val == "char") + return 5; + throw LowlevelError("Unrecognized integer format: " + val); +} + +/// \brief Decode the given format value into an XML attribute string +/// +/// Possible encoded values are 1-5 corresponding to "hex", "dec", "oct", "bin", "char" +/// \param val is the value to decode +/// \return the decoded string +string Datatype::decodeIntegerFormat(uint4 val) + +{ + if (val == 1) + return "hex"; + else if (val == 2) + return "dec"; + else if (val == 3) + return "oct"; + else if (val == 4) + return "bin"; + else if (val == 5) + return "char"; + throw LowlevelError("Unrecognized integer format encoding"); +} /// Contruct from the given \ element. /// \param el is the element @@ -2600,6 +2657,17 @@ Datatype *TypeFactory::setName(Datatype *ct,const string &n) return ct; } +/// The display format for the data-type is changed based on the given format. A value of +/// zero clears any preexisting format. Otherwise the value can be one of: +/// 1=\b hex, 2=\b dec, 4=\b oct, 8=\b bin, 16=\b char +/// \param ct is the given data-type to change +/// \param format is the given format +void TypeFactory::setDisplayFormat(Datatype *ct,uint4 format) + +{ + ct->setDisplayFormat(format); +} + /// Make sure all the offsets are fully established then set fields of the structure /// If \b fixedsize is greater than 0, force the final structure to have that size. /// This method should only be used on an incomplete structure. It will mark the structure as complete. @@ -2947,8 +3015,9 @@ void TypeFactory::recalcPointerSubmeta(Datatype *base,sub_metatype sub) /// \param ct is the given data-type to clone /// \param name is the new name for the clone /// \param id is the new id for the clone (or 0) +/// \param format is a particular format to force when printing (or zero) /// \return the (found or created) \e typedef data-type -Datatype *TypeFactory::getTypedef(Datatype *ct,const string &name,uint8 id) +Datatype *TypeFactory::getTypedef(Datatype *ct,const string &name,uint8 id,uint4 format) { if (id == 0) @@ -2964,6 +3033,7 @@ Datatype *TypeFactory::getTypedef(Datatype *ct,const string &name,uint8 id) res->id = id; // and new id res->flags &= ~((uint4)Datatype::coretype); // Not a core type res->typedefImm = ct; + res->setDisplayFormat(format); insert(res); return res; } @@ -3328,15 +3398,19 @@ Datatype *TypeFactory::restoreTypedef(const Element *el) { uint8 id = 0; string nm; + uint4 format = 0; // No forced display format by default for(int4 i=0;igetNumAttributes();++i) { const string &attribName(el->getAttributeName(i)); if (attribName == "id") { - istringstream s1(el->getAttributeValue("id")); + istringstream s1(el->getAttributeValue(i)); s1.unsetf(ios::dec | ios::hex | ios::oct); s1 >> id; } else if (attribName == "name") { - nm = el->getAttributeValue("name"); + nm = el->getAttributeValue(i); + } + else if (attribName == "format") { + format = Datatype::encodeIntegerFormat(el->getAttributeValue(i)); } } if (id == 0) { // Its possible the typedef is a builtin @@ -3367,7 +3441,7 @@ Datatype *TypeFactory::restoreTypedef(const Element *el) return prev; } } - return getTypedef(defedType, nm, id); + return getTypedef(defedType, nm, id, format); } /// If necessary create a stub object before parsing the field descriptions, to deal with recursive definitions diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index dd75efe83e..bd4fc6a7e3 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -93,11 +93,6 @@ protected: /// Boolean properties of datatypes enum { coretype = 1, ///< This is a basic type which will never be redefined - // Bits above the first bit are considered a sub-metatype - // If the metatypes are equal, we compare on sub-metatype - // Currently this is only used to order int, char, and enum - // The order of the sub-metatype is reversed so that - // char comes before int1 chartype = 2, ///< ASCII character data enumtype = 4, ///< An enumeration type (as well as an integer) poweroftwo = 8, ///< An enumeration type where all values are of 2^^n form @@ -108,7 +103,8 @@ protected: has_stripped = 0x100, ///< Datatype has a stripped form for formal declarations is_ptrrel = 0x200, ///< Datatype is a TypePointerRel type_incomplete = 0x400, ///< Set if \b this (recursive) data-type has not been fully defined yet - needs_resolution = 0x800 ///< Datatype (union, pointer to union) needs resolution before propagation + needs_resolution = 0x800, ///< Datatype (union, pointer to union) needs resolution before propagation + force_format = 0x7000, ///< 3-bits encoding display format, 0=none, 1=hex, 2=dec, 3=oct, 4=bin, 5=char }; friend class TypeFactory; friend struct DatatypeCompare; @@ -123,6 +119,7 @@ protected: void saveXmlBasic(type_metatype meta,ostream &s) const; ///< Save basic data-type properties void saveXmlTypedef(ostream &s) const; ///< Write \b this as a \e typedef tag to stream void markComplete(void) { flags &= ~(uint4)type_incomplete; } ///< Mark \b this data-type as completely defined + void setDisplayFormat(uint4 format); ///< Set a specific display format virtual Datatype *clone(void) const=0; ///< Clone the data-type static uint8 hashName(const string &nm); ///< Produce a data-type id by hashing the type name static uint8 hashSize(uint8 id,int4 size); ///< Reversibly hash size into id @@ -149,6 +146,7 @@ public: bool isIncomplete(void) const { return (flags & type_incomplete)!=0; } ///< Is \b this an incompletely defined data-type bool needsResolution(void) const { return (flags & needs_resolution)!=0; } ///< Is \b this a union or a pointer to union uint4 getInheritable(void) const { return (flags & coretype); } ///< Get properties pointers inherit + uint4 getDisplayFormat(void) const; ///< Get the display format for constants with \b this data-type type_metatype getMetatype(void) const { return metatype; } ///< Get the type \b meta-type sub_metatype getSubMeta(void) const { return submeta; } ///< Get the \b sub-metatype uint8 getId(void) const { return id; } ///< Get the type id @@ -173,6 +171,8 @@ public: int4 typeOrder(const Datatype &op) const { if (this==&op) return 0; return compare(op,10); } ///< Order this with -op- datatype int4 typeOrderBool(const Datatype &op) const; ///< Order \b this with -op-, treating \e bool data-type as special void saveXmlRef(ostream &s) const; ///< Write an XML reference of \b this to stream + static uint4 encodeIntegerFormat(const string &val); + static string decodeIntegerFormat(uint4 val); }; /// \brief A field within a structure or union @@ -575,6 +575,7 @@ public: Architecture *getArch(void) const { return glb; } ///< Get the Architecture object Datatype *findByName(const string &n); ///< Return type of given name Datatype *setName(Datatype *ct,const string &n); ///< Set the given types name + void setDisplayFormat(Datatype *ct,uint4 format); ///< Set the display format associated with the given data-type bool setFields(vector &fd,TypeStruct *ot,int4 fixedsize,uint4 flags); ///< Set fields on a TypeStruct bool setFields(vector &fd,TypeUnion *ot,int4 fixedsize,uint4 flags); ///< Set fields on a TypeUnion void setPrototype(const FuncProto *fp,TypeCode *newCode,uint4 flags); ///< Set the prototype on a TypeCode @@ -601,7 +602,7 @@ public: TypeCode *getTypeCode(ProtoModel *model,Datatype *outtype, const vector &intypes, bool dotdotdot); ///< Create a "function" datatype - Datatype *getTypedef(Datatype *ct,const string &name,uint8 id); ///< Create a new \e typedef data-type + Datatype *getTypedef(Datatype *ct,const string &name,uint8 id,uint4 format); ///< Create a new \e typedef data-type TypePointerRel *getTypePointerRel(TypePointer *parentPtr,Datatype *ptrTo,int4 off); ///< Get pointer offset relative to a container TypePointerRel *getTypePointerRel(int4 sz,Datatype *parent,Datatype *ptrTo,int4 ws,int4 off,const string &nm); TypePointer *getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc,const string &nm); @@ -618,6 +619,32 @@ public: void cacheCoreTypes(void); ///< Cache common types }; +/// The display format for the data-type is changed based on the given format. A value of +/// zero clears any preexisting format. Otherwise the value can be one of: +/// 1=\b hex, 2=\b dec, 3=\b oct, 4=\b bin, 5=\b char +/// \param format is the given format +inline void Datatype::setDisplayFormat(uint4 format) + +{ + flags &= ~(uint4)force_format; // Clear preexisting + flags |= (format << 12); +} + +/// A non-zero result indicates the type of formatting that is forced on the constant. +/// One of the following values is returned. +/// - 1 for hexadecimal +/// - 2 for decimal +/// - 3 for octal +/// - 4 for binary +/// - 5 for char +/// +/// \return the forced encoding type or zero +inline uint4 Datatype::getDisplayFormat(void) const + +{ + return (flags & force_format) >> 12; +} + /// Order data-types, with special handling of the \e bool data-type. Data-types are compared /// using the normal ordering, but \e bool is ordered after all other data-types. A return value /// of 0 indicates the data-types are the same, -1 indicates that \b this is prefered (ordered earlier), diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/displayformat.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/displayformat.xml new file mode 100644 index 0000000000..b1dd5661d5 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/displayformat.xml @@ -0,0 +1,36 @@ + + + + + 554889e5c705 +0c0a200064000000c6050d0a200077c7 +05ff0920006d0b0000c705fd092000aa +000000905dc3 + + + + +globalfree = 100; +globalhex = 0x77; +globaloct = 05555; +globalbin = 0b10101010; + diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java index 1a4918b564..fb7543f62c 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Arrays; import ghidra.app.plugin.processors.sleigh.SleighLanguage; +import ghidra.docking.settings.FormatSettingsDefinition; import ghidra.program.database.data.PointerTypedefInspector; import ghidra.program.model.address.AddressSpace; import ghidra.program.model.data.*; @@ -784,6 +785,7 @@ public class PcodeDataTypeManager { */ private void buildTypeDef(StringBuilder resBuf, TypeDef type, int size) { DataType refType = type.getDataType(); + String format = null; int sz = refType.getLength(); if (sz <= 0) { sz = size; @@ -810,9 +812,20 @@ public class PcodeDataTypeManager { } } } + else { + if (FormatSettingsDefinition.DEF.hasValue(type.getDefaultSettings())) { + format = FormatSettingsDefinition.DEF.getValueString(type.getDefaultSettings()); + if (format.length() > 4) { + format = format.substring(0, 3); + } + } + } resBuf.append("'); buildTypeRef(resBuf, refType, sz);