Merge remote-tracking branch 'origin/GP-3019_NaNWithComparison'

(Closes #4588)
This commit is contained in:
Ryan Kurtz 2023-08-01 07:01:38 -04:00
commit e8440a0875
13 changed files with 320 additions and 13 deletions

View File

@ -37,6 +37,7 @@ src/decompile/datatests/modulo.xml||GHIDRA||||END|
src/decompile/datatests/modulo2.xml||GHIDRA||||END|
src/decompile/datatests/multiret.xml||GHIDRA||||END|
src/decompile/datatests/namespace.xml||GHIDRA||||END|
src/decompile/datatests/nan.xml||GHIDRA||||END|
src/decompile/datatests/nestedoffset.xml||GHIDRA||||END|
src/decompile/datatests/noforloop_alias.xml||GHIDRA||||END|
src/decompile/datatests/noforloop_globcall.xml||GHIDRA||||END|

View File

@ -1401,6 +1401,8 @@ void Architecture::resetDefaultsInternal(void)
infer_pointers = true;
analyze_for_loops = true;
readonlypropagate = false;
nan_ignore_all = false;
nan_ignore_compare = true; // Ignore only NaN operations associated with floating-point comparisons by default
alias_block_level = 2; // Block structs and arrays by default, but not more primitive data-types
split_datatype_config = OptionSplitDatatypes::option_struct | OptionSplitDatatypes::option_array
| OptionSplitDatatypes::option_pointer;

View File

@ -177,6 +177,8 @@ public:
bool readonlypropagate; ///< true if readonly values should be treated as constants
bool infer_pointers; ///< True if we should infer pointers from constants that are likely addresses
bool analyze_for_loops; ///< True if we should attempt conversion of \e whiledo loops to \e for loops
bool nan_ignore_all; ///< True if we should ignore NaN operations, i.e. nan() always returns false
bool nan_ignore_compare; ///< True if we should ignore NaN operations protecting floating-point comparisons
vector<AddrSpace *> inferPtrSpaces; ///< Set of address spaces in which a pointer constant is inferable
int4 funcptr_align; ///< How many bits of alignment a function ptr has
uint4 flowoptions; ///< options passed to flow following engine

View File

@ -1160,6 +1160,6 @@ ElementId ELEM_VAL = ElementId("val",8);
ElementId ELEM_VALUE = ElementId("value",9);
ElementId ELEM_VOID = ElementId("void",10);
ElementId ELEM_UNKNOWN = ElementId("XMLunknown",272); // Number serves as next open index
ElementId ELEM_UNKNOWN = ElementId("XMLunknown",273); // Number serves as next open index
} // End namespace ghidra

View File

@ -59,6 +59,7 @@ ElementId ELEM_STRUCTALIGN = ElementId("structalign",208);
ElementId ELEM_TOGGLERULE = ElementId("togglerule",209);
ElementId ELEM_WARNING = ElementId("warning",210);
ElementId ELEM_JUMPTABLEMAX = ElementId("jumptablemax",271);
ElementId ELEM_NANIGNORE = ElementId("nanignore",272);
/// If the parameter is "on" return \b true, if "off" return \b false.
/// Any other value causes an exception.
@ -128,6 +129,7 @@ OptionDatabase::OptionDatabase(Architecture *g)
registerOption(new OptionMaxInstruction());
registerOption(new OptionNamespaceStrategy());
registerOption(new OptionSplitDatatypes());
registerOption(new OptionNanIgnore());
}
OptionDatabase::~OptionDatabase(void)
@ -985,4 +987,37 @@ string OptionSplitDatatypes::apply(Architecture *glb,const string &p1,const stri
return "Split data-type configuration set";
}
string OptionNanIgnore::apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const
{
bool oldIgnoreAll = glb->nan_ignore_all;
bool oldIgnoreCompare = glb->nan_ignore_compare;
if (p1 == "none") { // Don't ignore any NaN operation
glb->nan_ignore_all = false;
glb->nan_ignore_compare = false;
}
else if (p1 == "compare") { // Ignore only NaN operations protecting floating-point comparisons
glb->nan_ignore_all = false;
glb->nan_ignore_compare = true;
}
else if (p1 == "all") { // Ignore all NaN operations
glb->nan_ignore_all = true;
glb->nan_ignore_compare = true;
}
else {
throw LowlevelError("Unknown nanignore option: "+p1);
}
Action *root = glb->allacts.getCurrent();
if (!glb->nan_ignore_all && !glb->nan_ignore_compare) {
root->disableRule("ignorenan");
}
else {
root->enableRule("ignorenan");
}
if (oldIgnoreAll == glb->nan_ignore_all && oldIgnoreCompare == glb->nan_ignore_compare)
return "NaN ignore configuration unchanged";
return "Nan ignore configuration set to: " + p1;
}
} // End namespace ghidra

View File

@ -65,6 +65,7 @@ extern ElementId ELEM_STRUCTALIGN; ///< Marshaling element \<structalign>
extern ElementId ELEM_TOGGLERULE; ///< Marshaling element \<togglerule>
extern ElementId ELEM_WARNING; ///< Marshaling element \<warning>
extern ElementId ELEM_JUMPTABLEMAX; ///< Marshaling element \<jumptablemax>
extern ElementId ELEM_NANIGNORE; ///< Marshaling element \<nanignore>
/// \brief Base class for options classes that affect the configuration of the Architecture object
///
@ -343,5 +344,11 @@ public:
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
class OptionNanIgnore : public ArchOption {
public:
OptionNanIgnore(void) { name = "nanignore"; } ///< Constructor
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
} // End namespace ghidra
#endif

View File

@ -9437,26 +9437,151 @@ int4 RuleFloatCast::applyOp(PcodeOp *op,Funcdata &data)
}
/// \class RuleIgnoreNan
/// \brief Treat FLOAT_NAN as always evaluating to false
/// \brief Remove certain NaN operations by assuming their result is always \b false
///
/// This makes the assumption that all floating-point calculations
/// give valid results (not NaN).
/// This rule can be configured to remove either all FLOAT_NAN operations or only those that
/// protect floating-point comparisons.
void RuleIgnoreNan::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_FLOAT_NAN);
}
/// \brief Check if a boolean Varnode incorporates a floating-point comparison with the given value
///
/// The Varnode can either be the direct output of a comparison, or it can be a BOOL_OR or BOOL_AND,
/// combining output from the comparison.
/// \param floatVar is the given value the comparison must take as input
/// \param root is the boolean Varnode
/// \return \b true if the boolean Varnode incorporates the comparison
bool RuleIgnoreNan::checkBackForCompare(Varnode *floatVar,Varnode *root)
{
if (!root->isWritten()) return false;
PcodeOp *def1 = root->getDef();
if (!def1->isBoolOutput()) return false;
if (def1->getOpcode()->isFloatingPointOp()) {
if (def1->numInput() != 2) return false;
if (functionalEquality(floatVar, def1->getIn(0)))
return true;
if (functionalEquality(floatVar, def1->getIn(1)))
return true;
return false;
}
OpCode opc = def1->code();
if (opc != CPUI_BOOL_AND || opc != CPUI_BOOL_OR)
return false;
for(int4 i=0;i<2;++i) {
Varnode *vn = def1->getIn(i);
if (!vn->isWritten()) continue;
PcodeOp *def2 = vn->getDef();
if (!def2->isBoolOutput()) continue;
if (!def2->getOpcode()->isFloatingPointOp()) continue;
if (def2->numInput() != 2) continue;
if (functionalEquality(floatVar, def2->getIn(0)))
return true;
if (functionalEquality(floatVar, def2->getIn(1)))
return true;
}
return false;
}
/// \brief Test if a boolean expression incorporates a floating-point comparison, and remove the NaN data-flow if it does
///
/// The given PcodeOp takes input from a NaN operation through a specific slot. We look for a floating-point comparison
/// PcodeOp (FLOAT_LESS, FLOAT_LESSEQUAL, FLOAT_EQUAL, or FLOAT_NOTEQUAL) that is combined with the given PcodeOp and
/// has the same input Varnode as the NaN. The data-flow must be combined either through a BOOL_OR or BOOL_AND
/// operation, or the given PcodeOp must be a CBRANCH that protects immediate control-flow to another CBRANCH
/// taking the result of the comparison as input. If a matching comparison is found, the NaN input to the given
/// PcodeOp is removed, assuming the output of the NaN operation is always \b false.
/// Input from an unmodified NaN result must be combined through a BOOL_OR, but a NaN result that has been negated
/// must combine through a BOOL_AND.
/// \param floatVar is the input Varnode to NaN operation
/// \param op is the given PcodeOp to test
/// \param slot is the input index of the NaN operation
/// \param matchCode is BOOL_AND if the NaN result has been negated, BOOL_OR if not
/// \param count is incremented if a comparison is found and the NaN input is removed
/// \param data is the function
/// \return the output of the given PcodeOp if it has an opcode matching \b matchCode
Varnode *RuleIgnoreNan::testForComparison(Varnode *floatVar,PcodeOp *op,int4 slot,OpCode matchCode,int4 &count,Funcdata &data)
{
if (op->code() == matchCode) {
Varnode *vn = op->getIn(1-slot);
if (checkBackForCompare(floatVar,vn)) {
data.opSetOpcode(op, CPUI_COPY);
data.opRemoveInput(op, 1);
data.opSetInput(op, vn, 0);
count += 1;
}
return op->getOut();
}
if (op->code() != CPUI_CBRANCH)
return (Varnode *)0;
BlockBasic *parent = op->getParent();
bool flowToFromCompare = false;
PcodeOp *lastOp;
int4 outDir = (matchCode == CPUI_BOOL_OR) ? 0 : 1;
if (op->isBooleanFlip())
outDir = 1 - outDir;
FlowBlock *outBranch = parent->getOut(outDir);
lastOp = outBranch->lastOp();
if (lastOp != (PcodeOp *)0 && lastOp->code() == CPUI_CBRANCH) {
FlowBlock *otherBranch = parent->getOut(1-outDir);
if (outBranch->getOut(0) == otherBranch || outBranch->getOut(1) == otherBranch) {
if (checkBackForCompare(floatVar, lastOp->getIn(1)))
flowToFromCompare = true;
}
}
if (flowToFromCompare) {
data.opSetInput(op,data.newConstant(1, 0),1); // Treat result of NaN as false
count += 1;
}
return (Varnode *)0;
}
int4 RuleIgnoreNan::applyOp(PcodeOp *op,Funcdata &data)
{
if (op->numInput()==2)
data.opRemoveInput(op,1);
// Treat these operations as always returning false (0)
data.opSetOpcode(op,CPUI_COPY);
data.opSetInput(op,data.newConstant(1,0),0);
return 1;
if (data.getArch()->nan_ignore_all) {
// Treat these NaN operation as always returning false (0)
data.opSetOpcode(op,CPUI_COPY);
data.opSetInput(op,data.newConstant(1,0),0);
return 1;
}
Varnode *floatVar = op->getIn(0);
if (floatVar->isFree()) return 0;
Varnode *out1 = op->getOut();
int4 count = 0;
list<PcodeOp *>::const_iterator iter1 = out1->beginDescend();
while(iter1 != out1->endDescend()) {
PcodeOp *boolRead1 = *iter1;
++iter1; // out1 may be truncated from boolRead1 below, advance iterator now
Varnode *out2;
OpCode matchCode = CPUI_BOOL_OR;
if (boolRead1->code() == CPUI_BOOL_NEGATE) {
matchCode = CPUI_BOOL_AND;
out2 = boolRead1->getOut();
}
else {
out2 = testForComparison(floatVar, boolRead1, boolRead1->getSlot(out1), matchCode, count, data);
}
if (out2 == (Varnode *)0) continue;
list<PcodeOp *>::const_iterator iter2 = out2->beginDescend();
while(iter2 != out2->endDescend()) {
PcodeOp *boolRead2 = *iter2;
++iter2;
Varnode *out3 = testForComparison(floatVar,boolRead2, boolRead2->getSlot(out2), matchCode, count, data);
if (out3 == (Varnode *)0) continue;
list<PcodeOp *>::const_iterator iter3 = out3->beginDescend();
while(iter3 != out3->endDescend()) {
PcodeOp *boolRead3 = *iter3;
++iter3;
testForComparison(floatVar, boolRead3, boolRead3->getSlot(out3), matchCode, count, data);
}
}
}
return (count > 0) ? 1 : 0;
}
/// \class RuleFuncPtrEncoding

View File

@ -1549,6 +1549,8 @@ public:
};
class RuleIgnoreNan : public Rule {
static bool checkBackForCompare(Varnode *floatVar,Varnode *root);
static Varnode *testForComparison(Varnode *floatVar,PcodeOp *op,int4 slot,OpCode matchCode,int4 &count,Funcdata &data);
public:
RuleIgnoreNan(const string &g) : Rule( g, 0, "ignorenan") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {

View File

@ -0,0 +1,30 @@
<decompilertest>
<!--
Function with floating-point NaN operations, some of which should be removed.
-->
<binaryimage arch="x86:LE:64:default:gcc">
<bytechunk space="ram" offset="0x1011b7" readonly="true">
f30f1efa4883ec18f2
0f114c2408bf01000000660f2ec07a08
b8000000000f44f8e88cfffffff20f10
05230e0000660f2f442408400f97c740
0fb6ffe898ffffff4883c418c3
</bytechunk>
<bytechunk space="ram" offset="0x102008" readonly="true">
000000000000e83f
</bytechunk>
<symbol space="ram" offset="0x1011b7" name="nanops"/>
<symbol space="ram" offset="0x101169" name="read_nan"/>
<symbol space="ram" offset="0x101190" name="read_compare"/>
</binaryimage>
<script>
<com>option readonly on</com>
<com>lo fu nanops</com>
<com>dec</com>
<com>print C</com>
<com>quit</com>
</script>
<stringmatch name="NaN operations #1" min="1" max="1">read_nan\(NAN\(param_1\)\);</stringmatch>
<stringmatch name="NaN operations #2" min="1" max="1">read_compare\(param_2 &lt; 0\.75\);</stringmatch>
<stringmatch name="NaN operations #3" min="0" max="0">NAN\(param_2\)</stringmatch>
</decompilertest>

View File

@ -3088,6 +3088,35 @@
</para>
</listitem>
</varlistentry>
<varlistentry id="AnalysisNanIgnore">
<term><emphasis role="bold">NaN operations</emphasis></term>
<listitem>
<para>
This option determines how the Decompiler treats floating-point <emphasis role="bold">NaN</emphasis>
(Not a Number) operations. Many processors automatically perform NaN checks on the operands of
floating-point instructions, and unless specifically configured, these show up in Decompiler output
as <code>NAN()</code> functional tokens. Common floating-point source code operations, like
<code>&lt;</code>, <code>&gt;</code>, and <code>==</code> can generate <code>NAN</code> tokens as a
side effect, even if the original source code was not designed to handle NaN values, and the
tokens can clutter the output.
</para>
<para>
The user can optionally configure some or all of the NaN operations to be ignored, meaning that
inputs to the NaN operation are <emphasis>assumed</emphasis> to be valid floating-point values and the
<code>NAN</code> function is removed, replacing it with the value: <emphasis role="bold">false</emphasis>.
The possible settings are:
<informalexample>
<itemizedlist mark='bullet'>
<listitem><emphasis role="bold">Ignore none</emphasis> - No NaN operations are removed</listitem>
<listitem><emphasis role="bold">Ignore with comparisons</emphasis> - NaN operations associated with comparisons are removed</listitem>
<listitem><emphasis role="bold">Ignore all</emphasis> - All NaN operations are removed</listitem>
</itemizedlist>
</informalexample>
The Decompiler considers a NaN operation to be associated with a floating-point comparison if they both
can be considered boolean clauses of the same <emphasis role="bold">if</emphasis> condition.
</para>
</listitem>
</varlistentry>
<varlistentry id="AnalysisForLoops">
<term><emphasis role="bold">Recover -for- loops</emphasis></term>
<listitem>

View File

@ -4,7 +4,7 @@
<title>Decompiler Options</title>
<link rel="stylesheet" type="text/css" href="help/shared/DefaultStyle.css">
<link rel="stylesheet" type="text/css" href="../../shared/languages.css">
<meta name="generator" content="DocBook XSL Stylesheets Vsnapshot">
<meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
<link rel="home" href="Decompiler.html" title="Decompiler">
<link rel="up" href="Decompiler.html" title="Decompiler">
<link rel="prev" href="DecompilerAnnotations.html" title="Program Annotations Affecting the Decompiler">
@ -241,6 +241,40 @@
</p>
</dd>
<dt>
<a name="AnalysisNanIgnore"></a><span class="term"><span class="bold"><strong>NaN operations</strong></span></span>
</dt>
<dd>
<p>
This option determines how the Decompiler treats floating-point <span class="bold"><strong>NaN</strong></span>
(Not a Number) operations. Many processors automatically perform NaN checks on the operands of
floating-point instructions, and unless specifically configured, these show up in Decompiler output
as <code class="code">NAN()</code> functional tokens. Common floating-point source code operations, like
<code class="code">&lt;</code>, <code class="code">&gt;</code>, and <code class="code">==</code> can generate <code class="code">NAN</code> tokens as a
side effect, even if the original source code was not designed to handle NaN values, and the
tokens can clutter the output.
</p>
<p>
The user can optionally configure some or all of the NaN operations to be ignored, meaning that
inputs to the NaN operation are <span class="emphasis"><em>assumed</em></span> to be valid floating-point values and the
<code class="code">NAN</code> function is removed, replacing it with the value: <span class="bold"><strong>false</strong></span>.
The possible settings are:
</p>
<div class="informalexample">
<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: bullet; ">
<li class="listitem" style="list-style-type: disc">
<span class="bold"><strong>Ignore none</strong></span> - No NaN operations are removed</li>
<li class="listitem" style="list-style-type: disc">
<span class="bold"><strong>Ignore with comparisons</strong></span> - NaN operations associated with comparisons are removed</li>
<li class="listitem" style="list-style-type: disc">
<span class="bold"><strong>Ignore all</strong></span> - All NaN operations are removed</li>
</ul></div>
</div>
<p>
The Decompiler considers a NaN operation to be associated with a floating-point comparison if they both
can be considered boolean clauses of the same <span class="bold"><strong>if</strong></span> condition.
</p>
</dd>
<dt>
<a name="AnalysisForLoops"></a><span class="term"><span class="bold"><strong>Recover -for- loops</strong></span></span>
</dt>
<dd>

View File

@ -124,6 +124,37 @@ public class DecompileOptions {
private final static boolean SPLITPOINTERS_OPTIONDEFAULT = true; // Must match Architecture::resetDefaultsInternal
private boolean splitPointers;
private final static String NANIGNORE_OPTIONSTRING = "Analysis.NaN operations";
private final static String NANIGNORE_OPTIONDESCRIPTION =
"Specify how much to ignore floating-point NaN operations in decompiler output";
public enum NanIgnoreEnum {
None("none", "Ignore none"),
Compare("compare", "Ignore with comparisons"),
All("all", "Ignore all");
private String label;
private String optionString;
private NanIgnoreEnum(String optString, String label) {
this.label = label;
this.optionString = optString;
}
public String getOptionString() {
return optionString;
}
@Override
public String toString() {
return label;
}
}
private final static NanIgnoreEnum NANIGNORE_OPTIONDEFAULT = NanIgnoreEnum.Compare; // Must match Architecture::resetDefaultsInternal
private NanIgnoreEnum nanIgnore;
private final static String NULLTOKEN_OPTIONSTRING = "Display.Print 'NULL' for null pointers";
private final static String NULLTOKEN_OPTIONDESCRIPTION =
"If set, any zero valued pointer (null pointer) will " +
@ -412,6 +443,7 @@ public class DecompileOptions {
splitStructures = SPLITSTRUCTURES_OPTIONDEFAULT;
splitArrays = SPLITARRAYS_OPTIONDEFAULT;
splitPointers = SPLITPOINTERS_OPTIONDEFAULT;
nanIgnore = NANIGNORE_OPTIONDEFAULT;
ignoreunimpl = IGNOREUNIMPL_OPTIONDEFAULT;
inferconstptr = INFERCONSTPTR_OPTIONDEFAULT;
analyzeForLoops = ANALYZEFORLOOPS_OPTIONDEFAULT;
@ -473,6 +505,7 @@ public class DecompileOptions {
opt.getBoolean(SPLITSTRUCTURES_OPTIONSTRING, SPLITSTRUCTURES_OPTIONDEFAULT);
splitArrays = opt.getBoolean(SPLITARRAYS_OPTIONSTRING, SPLITARRAYS_OPTIONDEFAULT);
splitPointers = opt.getBoolean(SPLITPOINTERS_OPTIONSTRING, SPLITPOINTERS_OPTIONDEFAULT);
nanIgnore = opt.getEnum(NANIGNORE_OPTIONSTRING, NANIGNORE_OPTIONDEFAULT);
nullToken = opt.getBoolean(NULLTOKEN_OPTIONSTRING, NULLTOKEN_OPTIONDEFAULT);
inplaceTokens = opt.getBoolean(INPLACEOP_OPTIONSTRING, INPLACEOP_OPTIONDEFAULT);
@ -592,6 +625,9 @@ public class DecompileOptions {
opt.registerOption(SPLITPOINTERS_OPTIONSTRING, SPLITPOINTERS_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "AnalysisSplitPointers"),
SPLITPOINTERS_OPTIONDESCRIPTION);
opt.registerOption(NANIGNORE_OPTIONSTRING, NANIGNORE_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "AnalysisNanIgnore"),
NANIGNORE_OPTIONDESCRIPTION);
opt.registerOption(NULLTOKEN_OPTIONSTRING, NULLTOKEN_OPTIONDEFAULT,
new HelpLocation(HelpTopics.DECOMPILER, "DisplayNull"), NULLTOKEN_OPTIONDESCRIPTION);
opt.registerOption(INPLACEOP_OPTIONSTRING, INPLACEOP_OPTIONDEFAULT,
@ -758,6 +794,9 @@ public class DecompileOptions {
String p3 = splitPointers ? "pointer" : "";
appendOption(encoder, ELEM_SPLITDATATYPE, p1, p2, p3);
}
if (nanIgnore != NANIGNORE_OPTIONDEFAULT) {
appendOption(encoder, ELEM_NANIGNORE, nanIgnore.getOptionString(), "", "");
}
appendOption(encoder, ELEM_READONLY, readOnly ? "on" : "off", "", "");
// Must set language early so that the object is in place before other option changes

View File

@ -423,5 +423,6 @@ public record ElementId(String name, int id) {
public static final ElementId ELEM_SPLITDATATYPE = new ElementId("splitdatatype", 270);
public static final ElementId ELEM_JUMPTABLEMAX = new ElementId("jumptablemax", 271);
public static final ElementId ELEM_UNKNOWN = new ElementId("XMLunknown", 272);
public static final ElementId ELEM_NANIGNORE = new ElementId("nanignore", 272);
public static final ElementId ELEM_UNKNOWN = new ElementId("XMLunknown", 273);
}