Merge remote-tracking branch 'origin/Ghidra_9.0.3'

This commit is contained in:
ghidravore 2019-04-17 18:49:15 -04:00
commit 8f9a8dd1b1
15 changed files with 258 additions and 77 deletions

28
.gitignore vendored
View File

@ -1,11 +1,14 @@
*.class
excludedFiles.txt
ghidra.repos.config
# Misc files produced while executing application
repositories/
Ghidra/.ghidraSvrKeys
wrapper.log*
Thumbs.db
.DS_Store
.svn
excludedFiles.txt
.classpath
.project
ghidra.repos.config
/*/*/*/*/*/bin/
/*/*/*/*/*/build/
@ -21,7 +24,8 @@ ghidra.repos.config
/bin/
**/dist
repositories/
# Ignore Sleigh generated files
*.sla
**/data/build.xml
@ -30,22 +34,15 @@ repositories/
*.settings
*.directory
.gradle/
.settings/
# File locks
*.ulock
*.lock
# Gradle creates these per developer
**/vs/
# Misc files produced while executing application
Ghidra/.ghidraSvrKeys
wrapper.log*
# Ignore object files
*.o
*.obj
*.class
# Ignore MS Visual Studio artifcats
Release
@ -54,6 +51,8 @@ Release
*.suo
*.aps
*.vcproj.*
*.vcxproj.*
.vs/
# Ignore UNIX backup files
*~
@ -62,3 +61,4 @@ Release
# Ignore eclipse project files
.project
.classpath
.settings/

View File

@ -679,8 +679,11 @@ public class CreateThunkFunctionCmd extends BackgroundCommand {
flowType.equals(RefType.CALL_TERMINATOR)) && !flowType.isConditional()) {
// program counter should be assumed to be used
// assume PC is used when considering registers that have been set
Register PC = program.getLanguage().getProgramCounter();
usedRegisters.add(new Varnode(PC.getAddress(), PC.getMinimumByteSize()));
if (PC != null) {
usedRegisters.add(new Varnode(PC.getAddress(), PC.getMinimumByteSize()));
}
setRegisters.removeAll(usedRegisters);
// check that the setRegisters are all hidden, meaning don't care.

View File

@ -1250,7 +1250,7 @@ bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize
/// \brief Push a single character constant to the RPN stack
///
/// For C, a character constant is usually emitted as the character in single quotes.
/// Handle unicode, wide characters, etc.
/// Handle unicode, wide characters, etc. Characters come in with the compiler's raw encoding.
/// \param val is the constant value
/// \param ct is data-type attached to the value
/// \param vn is the Varnode holding the value
@ -1259,10 +1259,17 @@ void PrintC::pushCharConstant(uintb val,const TypeChar *ct,const Varnode *vn,con
{
ostringstream t;
if ((ct->getSize()==1)&&
((val<7)||(val>0x7e)||((val>13)&&(val<0x20)))) // not a good character constant
if ((ct->getSize()==1)&&(val >= 0x80)) {
// For byte characters, the encoding is assumed to be ASCII, UTF-8, or some other
// code-page that extends ASCII. At 0x80 and above, we cannot treat the value as a
// unicode code-point. Its either part of a multi-byte UTF-8 encoding or an unknown
// code-page value. In either case, we print it as an integer.
push_integer(val,1,true,vn,op);
}
else {
// From here we assume, the constant value is a direct unicode code-point.
// The value could be an illegal code-point (surrogates or beyond the max code-point),
// but this will just be emitted as an escape sequence.
if (doEmitWideCharPrefix() && ct->getSize() > 1)
t << 'L'; // Print symbol indicating wide character
t << '\''; // char is surrounded with single quotes

View File

@ -446,6 +446,10 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint)
if (codepoint == 0x3000) {
return true; // ideographic space
}
if (codepoint >= 0xd7fc) { // D7FC - D7FF are currently unassigned.
// D800 - DFFF are high and low surrogates, technically illegal.
return true; // Treat as needing to be escaped
}
return false;
}
if (codepoint < 0xf900) {

View File

@ -0,0 +1,69 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.extension.datatype.finder;
import java.util.List;
import ghidra.app.decompiler.ClangFieldToken;
import ghidra.app.decompiler.ClangLine;
import ghidra.app.services.DataTypeReference;
import ghidra.program.model.data.Composite;
import ghidra.program.model.data.DataType;
/**
* This class represents the use of a field of a {@link Composite} data type <b>where there is
* no variable in the Decompiler</b> for that data type. A normal variable access in the
* Decompiler may look like so:
* <pre>
* Foo f;
* ...
* return f.some_field;
* </pre>
*
* Alternatively, an anonymous variable access would look like this:
* <pre>
* Bar b;
* ...
* return b-><b>foo_array[1].some_field</b>;
* </pre>
*
* In this case, <code><b>foo_array[1]</b></code> is a <code>Foo</code>, whose
* <code><b>some_field</b></code> is
* being accessed anonymously, since there is no variable of <code>Foo</code> declared
* in the current function.
*/
public class AnonymousVariableAccessDR extends DecompilerReference {
protected AnonymousVariableAccessDR(ClangLine line, ClangFieldToken token) {
super(line, token);
}
@Override
public void accumulateMatches(DataType dt, String fieldName, List<DataTypeReference> results) {
ClangFieldToken field = (ClangFieldToken) sourceToken;
DataType fieldDt = field.getDataType();
if (!isEqual(dt, fieldDt)) {
return;
}
if (field.getText().equals(fieldName)) {
results.add(new DataTypeReference(fieldDt, fieldName, getFunction(), getAddress(),
getContext()));
}
}
}

View File

@ -20,6 +20,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.function.Predicate;
import org.apache.commons.collections4.IterableUtils;
import ghidra.app.decompiler.*;
import ghidra.app.decompiler.component.DecompilerUtils;
import ghidra.app.decompiler.parallel.*;
@ -225,13 +227,13 @@ public class DecompilerDataTypeReferenceFinder implements DataTypeReferenceFinde
private DataType dataType;
private String fieldName;
/** Search for Data Type access only--no field usage */
/* Search for Data Type access only--no field usage */
DecompilerDataTypeFinderQCallback(Program program, DataType dataType,
Consumer<DataTypeReference> callback) {
this(program, dataType, null, callback);
}
/** Search for composite field access */
/* Search for composite field access */
DecompilerDataTypeFinderQCallback(Program program, DataType dataType, String fieldName,
Consumer<DataTypeReference> callback) {
@ -336,14 +338,38 @@ public class DecompilerDataTypeReferenceFinder implements DataTypeReferenceFinde
return result;
}
/**
* Uses the given line to find variables (also parameters and return types) and any
* accesses to them in that line. A given variable may be used directly or, as in
* the case with Composite types, may have one of its fields accessed. Each result
* found by this method will be at least a variable access and may also itself have
* field accesses.
*
* <p>Sometimes a line is structured such that there are anonymous variable accesses. This
* is the case where a Composite is being accessed, but the Composite itself is
* not a variable in the current function. See {@link AnonymousVariableAccessDR} for
* more details.
*
* @param line the current line being processed from the Decompiler
* @param results the accumulator into which matches will be placed
*/
private void findVariablesInLine(ClangLine line, List<DecompilerReference> results) {
List<ClangToken> allTokens = line.getAllTokens();
Iterable<ClangToken> filteredTokens = IterableUtils.filteredIterable(allTokens,
token -> {
// Only include desirable tokens (this is really just for easier debugging).
// Update this filter if the loop below ever needs other types of tokens.
return (token instanceof ClangTypeToken) ||
(token instanceof ClangVariableToken) || (token instanceof ClangFieldToken);
});
// gather any casts until we can use them (the type they modify will follow)
List<DecompilerVariable> castsSoFar = new ArrayList<>();
VariableDR declaration = null;
VariableAccessDR access = null;
for (ClangToken token : line.getAllTokens()) {
for (ClangToken token : filteredTokens) {
if (token instanceof ClangTypeToken) {
@ -371,16 +397,15 @@ public class DecompilerDataTypeReferenceFinder implements DataTypeReferenceFinde
//
// Observations:
// 1) 'variableAccess' will be null if we are on a C statement that
// is a declaration (parameter or variable). In this case, 'ref' will
// be an instance of VariableDR.
// 2) 'variableAccess' will be null the first time a variable is used in
// 1) 'access' will be null if we are on a C statement that
// is a declaration (parameter or variable). In this case,
// 'declaration' will be an instance of VariableDR.
// 2) 'access' will be null the first time a variable is used in
// a statement.
// 3) if 'variableAccess' is non-null, but already has a variable assigned,
// 3) if 'access' is non-null, but already has a variable assigned,
// then this means the current ClangVariableToken represents a new
// variable access/usage.
//
if (declaration != null) {
declaration.setVariable((ClangVariableToken) token);
declaration = null;
@ -415,12 +440,32 @@ public class DecompilerDataTypeReferenceFinder implements DataTypeReferenceFinde
}
}
access.addField((ClangFieldToken) token, casts);
ClangFieldToken field = (ClangFieldToken) token;
if (typesDoNotMatch(access, field)) {
// this can happen when a field is used anonymously, such as directly
// after a nested array index operation
results.add(new AnonymousVariableAccessDR(line, field));
continue;
}
access.addField(field, casts);
castsSoFar.clear();
}
}
}
private boolean typesDoNotMatch(VariableAccessDR access, ClangFieldToken field) {
DecompilerVariable variable = access.getVariable();
if (variable == null) {
return false; // should not happen
}
DataType variableDt = variable.getDataType();
DataType fieldDt = field.getDataType();
return !DecompilerReference.isEqual(variableDt, fieldDt);
}
private VariableAccessDR getLastAccess(List<DecompilerReference> variables) {
// for now, assume that the last access will be the last item we added
if (variables.isEmpty()) {

View File

@ -83,6 +83,10 @@ public abstract class DecompilerReference {
return var.getAddress();
}
public ClangLine getLine() {
return line;
}
protected String getContext() {
String context = getContext(variable);
return context;

View File

@ -52,7 +52,7 @@ public abstract class DecompilerVariable {
}
// Note: this is the icky part of the API. How to know from where to get the data type?
HighVariable highVariable = variable.getHighVariable();
HighVariable highVariable = variable.getHighVariable();
if (highVariable != null) {
return highVariable.getDataType();
}
@ -62,7 +62,7 @@ public abstract class DecompilerVariable {
if (dataType != null) {
return dataType;
}
// Prefer the type of the first input varnode, unless that type is a 'void *'.
// Usually, in that special case, the output varnode has the correct type information.
PcodeOp op = variable.getPcodeOp();
@ -104,7 +104,19 @@ public abstract class DecompilerVariable {
}
Varnode output = op.getOutput();
return output.getHigh().getDataType();
if (output == null) {
// can happen when a variable in volatile memory is used in a write_volatile
// pseudo operation
return null;
}
HighVariable high = output.getHigh();
if (high == null) {
// not sure if this can happen; just in case
return null;
}
return high.getDataType();
}
private DataType getDataType(Varnode varnode) {

View File

@ -8,8 +8,8 @@ dependencies {
compile "cglib:cglib-nodep:2.2"
compile "com.google.guava:guava:19.0"
compile "org.jdom:jdom-legacy:1.1.3"
compile "org.apache.logging.log4j:log4j-api:2.8.1"
compile "org.apache.logging.log4j:log4j-core:2.8.1"
compile "org.apache.logging.log4j:log4j-api:2.8.2"
compile "org.apache.logging.log4j:log4j-core:2.8.2"
compile "org.apache.commons:commons-collections4:4.1"
compile "org.apache.commons:commons-lang3:3.5"
compile "org.lucee:commons-io:2.6.0"

View File

@ -2,13 +2,13 @@
##MODULE IP: Apache License 2.0
##MODULE IP: BSD
##MODULE IP: Christian Plattner
##MODULE IP: Crystal Clear Icons - LGPL 2.1
##MODULE IP: JDOM License
##MODULE IP: MIT
##MODULE IP: Nuvola Icons - LGPL 2.1
##MODULE IP: Modified Nuvola Icons - LGPL 2.1
##MODULE IP: Tango Icons - Public Domain
##MODULE IP: Nuvola Icons - LGPL 2.1
##MODULE IP: Oxygen Icons - LGPL 3.0
##MODULE IP: Crystal Clear Icons - LGPL 2.1
##MODULE IP: Tango Icons - Public Domain
.classpath||GHIDRA||||END|
.gitignore||GHIDRA||||END|
.project||GHIDRA||||END|

View File

@ -261,7 +261,16 @@ public class PcodeDataTypeManager {
if (type instanceof Array) {
return buildType(type, size);
}
if (!(type instanceof FunctionDefinition) && type.getLength() <= 0) {
if (type instanceof FunctionDefinition) {
long id = progDataTypes.getID(type);
if (id <= 0) {
// Its possible the FunctionDefinition was built on the fly and is not
// a permanent data-type of the program with an ID. In this case, we can't
// construct a <typeref> tag but must build a full <type> tag.
return buildType(type, size);
}
}
else if (type.getLength() <= 0) {
return buildType(type, size);
}
StringBuilder resBuf = new StringBuilder();

View File

@ -78,35 +78,46 @@ define pcodeop readIRQ;
################################################################
REL: reloc is rel [ reloc = inst_next + rel; ] { export *:2 reloc; }
# Immediate
OP1: "#"imm8 is bbb=2; imm8 { tmp:1 = imm8; export tmp; }
# Zero Page
OP1: imm8 is bbb=1; imm8 { export *:1 imm8; }
# Zero Page Indexed X
OP1: imm8,X is bbb=5 & X; imm8 { tmp:2 = zext(imm8 + X); export *:1 tmp; }
# Absolute
OP1: imm16 is bbb=3; imm16 { export *:1 imm16; }
# Absolute Indexed X
OP1: imm16,X is bbb=7 & X; imm16 { tmp:2 = imm16 + zext(X); export *:1 tmp; }
# Absolute Indexed Y
OP1: imm16,Y is bbb=6 & Y; imm16 { tmp:2 = imm16 + zext(Y); export *:1 tmp; }
# Indirect X
OP1: (imm8,X) is bbb=0 & X; imm8 { addr:2 = zext(imm8 + X); tmp:2 = *:2 addr; export *:1 tmp; }
# Indirect Y
OP1: (imm8),Y is bbb=4 & Y; imm8 { addr:2 = imm8; tmp:2 = *:2 addr; tmp = tmp + zext(Y); export *:1 tmp; }
OP1: (imm8,X) is bbb=0 & X; imm8 { addr:2 = imm8 + zext(X); tmp:2 = *:2 addr; export *:1 tmp; }
OP1: imm8 is bbb=1; imm8 { export *:1 imm8; }
OP1: "#"imm8 is bbb=2; imm8 { tmp:1 = imm8; export tmp; }
OP1: imm16 is bbb=3; imm16 { export *:1 imm16; }
OP1: (imm8),Y is bbb=4 & Y; imm8 { addr:2 = imm8; tmp:2 = *:2 addr; tmp = tmp + zext(Y); export *:1 tmp; }
OP1: imm8,X is bbb=5 & X; imm8 { tmp:2 = imm8 + zext(X); export *:1 tmp; }
OP1: imm16,Y is bbb=6 & Y; imm16 { tmp:2 = imm16 + zext(Y); export *:1 tmp; }
OP1: imm16,X is bbb=7 & X; imm16 { tmp:2 = imm16 + zext(X); export *:1 tmp; }
# Immediate
OP2: "#"imm8 is bbb=0; imm8 { tmp:1 = imm8; export tmp; }
# Zero Page
OP2: imm8 is bbb=1; imm8 { export *:1 imm8; }
OP2: A is bbb=2 & A { export A; }
# Absolute
OP2: imm16 is bbb=3; imm16 { export *:1 imm16; }
# Zero Page Indexed X
OP2: imm8,X is bbb=5 & X; imm8 { tmp:2 = zext(imm8 + X); export *:1 tmp; }
# Absolute Indexed X
OP2: imm16,X is bbb=7 & X; imm16 { tmp:2 = imm16 + zext(X); export *:1 tmp; }
OP2ST: OP2 is OP2 { export OP2; }
OP2ST: imm8,Y is bbb=5 & Y; imm8 { tmp:2 = zext(imm8 + Y); export *:1 tmp; }
OP2LD: OP2 is OP2 { export OP2; }
OP2LD: imm8,Y is bbb=5 & Y; imm8 { tmp:2 = zext(imm8 + Y); export *:1 tmp; }
OP2LD: imm16,Y is bbb=7 & Y; imm16 { tmp:2 = imm16 + zext(Y); export *:1 tmp; }
OP2: "#"imm8 is bbb=0; imm8 { tmp:1 = imm8; export tmp; }
OP2: imm8 is bbb=1; imm8 { export *:1 imm8; }
OP2: A is bbb=2 & A { export A; }
OP2: imm16 is bbb=3; imm16 { export *:1 imm16; }
OP2: imm8,X is bbb=5 & X; imm8 { tmp:2 = imm8 + zext(X); export *:1 tmp; }
OP2: imm16,X is bbb=7 & X; imm16 { tmp:2 = imm16 + zext(X); export *:1 tmp; }
OP2ST: OP2 is OP2 { export OP2; }
OP2ST: imm8,Y is bbb=5 & Y; imm8 { tmp:2 = imm8 + zext(Y); export *:1 tmp; }
OP2LD: OP2 is OP2 { export OP2; }
OP2LD: imm8,Y is bbb=5 & Y; imm8 { tmp:2 = imm8 + zext(Y); export *:1 tmp; }
OP2LD: imm16,Y is bbb=7 & Y; imm16 { tmp:2 = imm16 + zext(Y); export *:1 tmp; }
ADDR8: imm8 is imm8 { export *:1 imm8; }
ADDR16: imm16 is imm16 { export *:1 imm16; }
ADDRI: imm16 is imm16 { tmp:2 = imm16; export *:2 tmp; }
ADDR8: imm8 is imm8 { export *:1 imm8; }
ADDR16: imm16 is imm16 { export *:1 imm16; }
ADDRI: imm16 is imm16 { tmp:2 = imm16; export *:2 tmp; }
# Instructions

View File

@ -306,6 +306,8 @@ define pcodeop saveFPUStateFrame;
define pcodeop restoreFPUStateFrame;
define pcodeop pushInvalidateCaches;
define pcodeop bcdAdjust;
define pcodeop sin;
define pcodeop cos;
define pcodeop tan;
@ -638,12 +640,15 @@ macro clearflags_fp() {
$(NAN_FP) = 0;
}
# SCR 10997:
macro bcdflags(result) {
XF = CF;
ZF = (result == 0) * ZF + (result != 0);
}
macro getbit(res,in,bitnum) {
res = ((in >> bitnum) & 1) != 0;
}
# SCR 10997:
macro bitmask(res, width) {
res = (1 << width) - 1;
}
@ -656,7 +661,6 @@ macro getbitfield(res, off, width) {
res = (res << off) >> (32 - width);
}
# SCR 10997:
macro resbitflags(result, bitnum) {
NF = ((result >> bitnum) & 1) != 0;
ZF = result == 0;
@ -687,7 +691,7 @@ macro extendedResultFlags(result) {
with : extGUARD=1 {
:abcd Tyb,Txb is op=12 & op48=16 & Tyb & Txb unimpl
:abcd Tyb,Txb is op=12 & op48=16 & Tyb & Txb { CF = carry(Tyb,carry(Txb,XF)); Tyb = Tyb + Txb + XF; Tyb = bcdAdjust(Tyb); bcdflags(Tyb); }
:add.b eab,reg9dnb is (op=13 & reg9dnb & op68=0)... & eab { addflags(eab,reg9dnb); reg9dnb = reg9dnb + eab; resflags(reg9dnb); }
:add.w eaw,reg9dnw is (op=13 & reg9dnw & op68=1)... & eaw { addflags(eaw,reg9dnw); reg9dnw = reg9dnw + eaw; resflags(reg9dnw); }
@ -1391,7 +1395,8 @@ submul: regdr-regdq is regdq & divsgn=0 & divsz=1 & regdr { tmp1:8 = zext(glb
regdr=res(4); resflags(res); CF=0; VF=0; }
:mul^mulsize e2l,submul is opbig=0x4c & op67=0 & $(DAT_ALTER_ADDR_MODES); submul & mulsize; e2l [ savmod2=savmod1; regtsan=regtfan; ] { glbdenom=e2l; build submul; }
:nbcd eab is (opbig=0x48 & op67=0 & $(DAT_ALTER_ADDR_MODES))... & eab unimpl
:nbcd eab is (opbig=0x48 & op67=0 & $(DAT_ALTER_ADDR_MODES))... & eab
{ tmp:1 = eab; CF = (tmp != 0) || (XF == 1); tmp = 0 - tmp - XF; eab = bcdAdjust(tmp); bcdflags(tmp); }
# NB: For the neg insn the CF carry flag is not set like other insns, from the manual:
@ -1600,7 +1605,8 @@ ptestLevel: "#"^mregn is mregn { export *[const]:1 mregn; }
:rts is opbig=0x4e & op37=14 & op02=5 { PC = *SP; SP = SP+4; return [PC]; }
:sbcd Tyb,Txb is op=8 & op48=16 & Txb & Tyb unimpl
:sbcd Tyb,Txb is op=8 & op48=16 & Txb & Tyb
{ CF = (Tyb < Txb) || ( (XF == 1) && (Tyb == Txb) ); Tyb = Tyb - Txb - XF; Tyb = bcdAdjust(Tyb); bcdflags(Tyb); }
:s^cc eab is (op=5 & cc & op67=3 & $(DAT_ALTER_ADDR_MODES))... & eab { eab = -cc; }

View File

@ -56,8 +56,9 @@ public class ConstantPoolDex extends ConstantPool {
String classString =
DexUtil.convertTypeIndexToString(dexHeader, fieldIDItem.getClassIndex());
String[] pathArray = DexUtil.convertClassStringToPathArray("", classString);
if (pathArray != null)
if (pathArray != null) {
res.token = pathArray[pathArray.length - 1] + '.' + res.token;
}
}
DataType fieldDT = dexHeader.getDataType(program, fieldIDItem.getTypeIndex());
@ -72,13 +73,16 @@ public class ConstantPoolDex extends ConstantPool {
private String removeUniquifier(String name) {
int len = name.length();
if (len < 10 || name.charAt(len - 9) != '_')
if (len < 10 || name.charAt(len - 9) != '_') {
return name;
}
char matchChar = name.charAt(len - 8);
if (matchChar != '5' && matchChar != 'e')
if (matchChar != '5' && matchChar != 'e') {
return name;
if (name.charAt(len - 7) != '0')
}
if (name.charAt(len - 7) != '0') {
return name;
}
return name.substring(0, len - 9);
}
@ -103,14 +107,19 @@ public class ConstantPoolDex extends ConstantPool {
String classString =
DexUtil.convertTypeIndexToString(dexHeader, methodIDItem.getClassIndex());
String[] pathArray = DexUtil.convertClassStringToPathArray("", classString);
if (pathArray != null)
if (pathArray != null) {
namespaceString = pathArray[pathArray.length - 1];
}
}
if (namespaceString != null)
if (namespaceString != null) {
res.token = namespaceString + '.' + res.token;
}
}
res.tag = ConstantPool.POINTER_METHOD;
FunctionDefinitionDataType funcDef = new FunctionDefinitionDataType(res.token, dtManager);
// The FunctionDefinition is constructed on the fly, essentially as an anonymous type
// We use an internal naming scheme involding the the methodID to avoid name collisions
String defName = res.token + '_' + Integer.toHexString(methodID);
FunctionDefinitionDataType funcDef = new FunctionDefinitionDataType(defName, dtManager);
res.type = new PointerDataType(funcDef);
res.hasThisPtr = !isStatic;

View File

@ -4,5 +4,7 @@ eclipse.project.name = '_JsonDoclet'
apply plugin: 'java'
dependencies {
compile 'com.googlecode.json-simple:json-simple:1.1.1'
compile('com.googlecode.json-simple:json-simple:1.1.1') {
exclude group: 'junit', module: 'junit'
}
}