From f00e2a6e8476b960a0c9b85c69b2a8afaca0d7e5 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Wed, 13 Nov 2019 09:39:21 -0500 Subject: [PATCH] Make sure code data-type doesn't become concrete --- .../Decompiler/src/decompile/cpp/database.cc | 48 ++++++++++++------- .../Decompiler/src/decompile/cpp/database.hh | 5 +- .../Decompiler/src/decompile/cpp/type.cc | 18 +++++++ .../Decompiler/src/decompile/cpp/type.hh | 1 + .../Decompiler/src/decompile/cpp/varmap.cc | 2 +- 5 files changed, 54 insertions(+), 20 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc index 9f1d4ad79a..333289b617 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc @@ -398,34 +398,44 @@ void Symbol::restoreXml(const Element *el) restoreXmlBody(list.begin()); } -void FunctionSymbol::buildType(int4 size) +/// Get the number of bytes consumed by a SymbolEntry representing \b this Symbol. +/// By default, this is the number of bytes consumed by the Symbol's data-type. +/// This gives the amount of leeway a search has when the address queried does not match +/// the exact address of the Symbol. With functions, the bytes consumed by a SymbolEntry +/// may not match the data-type size. +/// \return the number of bytes in a default SymbolEntry +int4 Symbol::getBytesConsumed(void) const + +{ + return type->getSize(); +} + +void FunctionSymbol::buildType(void) { TypeFactory *types = scope->getArch()->types; type = types->getTypeCode(); - // Entries for functions have small size starting at the entry address - // of the function in order to deal with non-contiguous functions - // The size used to always be 1, but now we need sizes (slightly) larger than 1 - // to accomodate pointer constants that encode extra information in the lower bit(s) - // of an otherwise aligned pointer. If the encoding is not aprior detected, it is interpreted - // initially as a straight address that comes up 1 (or more) bytes off of the start of the function - // In order to detect this, we need to lay down a slightly larger size than 1 - if (size > 1) - type = types->getTypeArray(size,type); - flags |= Varnode::namelock | Varnode::typelock; } /// Build a function \e shell, made up of just the name of the function and /// a placeholder data-type, without the underlying Funcdata object. +/// A SymbolEntry for a function has a small size starting at the entry address, +/// in order to deal with non-contiguous functions. +/// We need a size (slightly) larger than 1 to accommodate pointer constants that encode +/// extra information in the lower bit(s) of an otherwise aligned pointer. +/// If the encoding is not initially detected, it is interpreted +/// as a straight address that comes up 1 (or more) bytes off of the start of the function +/// In order to detect this, we need to lay down a slightly larger size than 1 /// \param sc is the Scope that will contain the new Symbol /// \param nm is the name of the new Symbol -/// \param size is the number of bytes the Symbol should consume +/// \param size is the number of bytes a SymbolEntry should consume FunctionSymbol::FunctionSymbol(Scope *sc,const string &nm,int4 size) : Symbol(sc) { fd = (Funcdata *)0; - buildType(size); + consumeSize = size; + buildType(); name = nm; } @@ -433,7 +443,8 @@ FunctionSymbol::FunctionSymbol(Scope *sc,int4 size) : Symbol(sc) { fd = (Funcdata *)0; - buildType(size); + consumeSize = size; + buildType(); } FunctionSymbol::~FunctionSymbol(void) { @@ -469,9 +480,9 @@ void FunctionSymbol::restoreXml(const Element *el) fd = new Funcdata("",scope,Address()); fd->restoreXml(el); name = fd->getName(); - if (type->getSize() < fd->getSize()) { + if (consumeSize < fd->getSize()) { if ((fd->getSize()>1)&&(fd->getSize() <= 8)) - buildType(fd->getSize()); + consumeSize = fd->getSize(); } } else { // functionshell @@ -934,8 +945,9 @@ SymbolEntry *Scope::addMap(const SymbolEntry &entry) entry.symbol->flags |= Varnode::persist; SymbolEntry *res; + int4 consumeSize = entry.symbol->getBytesConsumed(); if (entry.addr.isInvalid()) - res = addDynamicMapInternal(entry.symbol,Varnode::mapped,entry.hash,0,entry.symbol->getType()->getSize(),entry.uselimit); + res = addDynamicMapInternal(entry.symbol,Varnode::mapped,entry.hash,0,consumeSize,entry.uselimit); else { if (entry.uselimit.empty()) { entry.symbol->flags |= Varnode::addrtied; @@ -943,7 +955,7 @@ SymbolEntry *Scope::addMap(const SymbolEntry &entry) // can only happen if use is not limited entry.symbol->flags |= glb->symboltab->getProperty(entry.addr); } - res = addMapInternal(entry.symbol,Varnode::mapped,entry.addr,0,entry.symbol->getType()->getSize(),entry.uselimit); + res = addMapInternal(entry.symbol,Varnode::mapped,entry.addr,0,consumeSize,entry.uselimit); if (entry.addr.isJoin()) { // The address is a join, we add extra SymbolEntry maps for each of the pieces JoinRecord *rec = glb->findJoin(entry.addr.getOffset()); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh index 430a374304..94eb3964b8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh @@ -208,6 +208,7 @@ public: void restoreXmlBody(List::const_iterator iter); ///< Restore details of the Symbol from XML virtual void saveXml(ostream &s) const; ///< Save \b this Symbol to an XML stream virtual void restoreXml(const Element *el); ///< Restore \b this Symbol from an XML stream + virtual int4 getBytesConsumed(void) const; ///< Get number of bytes consumed within the address->symbol map }; /// Force a specific display format for constant symbols @@ -235,14 +236,16 @@ inline bool SymbolEntry::isAddrTied(void) const { /// Symbol is thus associated with all the meta-data about the function. class FunctionSymbol : public Symbol { Funcdata *fd; ///< The underlying meta-data object for the function + int4 consumeSize; ///< Minimum number of bytes to consume with the start address virtual ~FunctionSymbol(void); - void buildType(int4 size); ///< Build the data-type associated with \b this Symbol + void buildType(void); ///< Build the data-type associated with \b this Symbol public: FunctionSymbol(Scope *sc,const string &nm,int4 size); ///< Construct given the name FunctionSymbol(Scope *sc,int4 size); ///< Constructor for use with restoreXml Funcdata *getFunction(void); ///< Get the underlying Funcdata object virtual void saveXml(ostream &s) const; virtual void restoreXml(const Element *el); + virtual int4 getBytesConsumed(void) const { return consumeSize; } }; /// \brief A Symbol that holds \b equate information for a constant diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index 4e511230a4..19a56d8060 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -1893,6 +1893,24 @@ Datatype *TypeFactory::downChain(Datatype *ptrtype,uintb &off) return getTypePointer(ptype->size,pt,ptype->getWordSize()); } +/// The data-type propagation system can push around data-types that are \e partial or are +/// otherwise unrepresentable in the source language. This method substitutes those data-types +/// with a concrete data-type that is representable, or returns the same data-type if is already concrete. +/// Its important that the returned data-type have the same size as the original data-type regardless. +/// \param ct is the given data-type +/// \return the concrete data-type +Datatype *TypeFactory::concretize(Datatype *ct) + +{ + type_metatype metatype = ct->getMetatype(); + if (metatype == TYPE_CODE) { + if (ct->getSize() != 1) + throw LowlevelError("Primitive code data-type that is not size 1"); + ct = getBase(1, TYPE_UNKNOWN); + } + return ct; +} + /// Restore a Datatype object from an XML tag description: either \, \, or \ /// \param el is the XML element describing the data-type /// \return the restored Datatype object diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index 7033e23030..aaff1c0491 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -435,6 +435,7 @@ public: bool dotdotdot); ///< Create a "function" datatype void destroyType(Datatype *ct); ///< Remove a data-type from \b this Datatype *downChain(Datatype *ptrtype,uintb &off); ///< Find a sub-type matching a pointer and offset + Datatype *concretize(Datatype *ct); ///< Convert given data-type to concrete form void dependentOrder(vector &deporder) const; ///< Place all data-types in dependency order void saveXml(ostream &s) const; ///< Save \b this container to stream void saveXmlCoreTypes(ostream &s) const; ///< Save core types to stream diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc index 422197df71..1d895041aa 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc @@ -513,7 +513,7 @@ void ScopeLocal::createEntry(const RangeHint &a) { Address addr(space,a.start); Address usepoint; - Datatype *ct = a.type; + Datatype *ct = glb->types->concretize(a.type); int4 num = a.size/ct->getSize(); if (num>1) ct = glb->types->getTypeArray(num,ct);