Merge remote-tracking branch 'origin/GT-3414_dev747368_slow_stringtable'

Fixes #1259
This commit is contained in:
ghidorahrex 2020-01-15 13:29:25 -05:00
commit cf0c434d51
8 changed files with 371 additions and 65 deletions

View File

@ -21,6 +21,7 @@ import ghidra.program.model.data.StringDataInstance;
import ghidra.program.model.data.TranslationSettingsDefinition;
import ghidra.program.model.listing.Data;
import ghidra.program.util.DefinedDataIterator;
import util.CollectionUtils;
public class TranslateStringsScript extends GhidraScript {
@ -39,7 +40,8 @@ public class TranslateStringsScript extends GhidraScript {
int count = 0;
monitor.initialize(currentProgram.getListing().getNumDefinedData());
monitor.setMessage("Translating strings");
for (Data data : DefinedDataIterator.definedStrings(currentProgram, currentSelection)) {
for (Data data : CollectionUtils.asIterable(
DefinedDataIterator.definedStrings(currentProgram, currentSelection))) {
if (monitor.isCancelled()) {
break;
}

View File

@ -30,6 +30,7 @@ import ghidra.program.model.data.*;
import ghidra.program.model.listing.*;
import ghidra.program.util.*;
import ghidra.util.StringUtilities;
import ghidra.util.Swing;
import ghidra.util.datastruct.Accumulator;
import ghidra.util.exception.CancelledException;
import ghidra.util.table.AddressBasedTableModel;
@ -111,8 +112,8 @@ class ViewStringsTableModel extends AddressBasedTableModel<ProgramLocation> {
Listing listing = localProgram.getListing();
monitor.setCancelEnabled(true);
monitor.initialize((int) listing.getNumDefinedData());
monitor.initialize(listing.getNumDefinedData());
Swing.allowSwingToProcessEvents();
for (Data stringInstance : DefinedDataIterator.definedStrings(localProgram)) {
accumulator.add(createIndexedStringInstanceLocation(localProgram, stringInstance));
monitor.checkCanceled();

View File

@ -695,6 +695,14 @@ public class ProgramBuilder {
return c;
}
public void applyFixedLengthDataType(String addressString, DataType dt, int length)
throws CodeUnitInsertionException {
startTransaction();
DataUtilities.createData(program, addr(addressString), dt, length, false,
ClearDataMode.CLEAR_ALL_CONFLICT_DATA);
endTransaction();
}
public void applyDataType(String addressString, DataType dt) {
applyDataType(addressString, dt, 1);
}
@ -874,7 +882,7 @@ public class ProgramBuilder {
}
public Data createString(String address, String string, Charset charset, boolean nullTerminate,
AbstractStringDataType dataType) throws Exception {
DataType dataType) throws Exception {
if (nullTerminate) {
string = string + "\0";
}
@ -883,7 +891,7 @@ public class ProgramBuilder {
}
public Data createString(String address, byte[] stringBytes, Charset charset,
AbstractStringDataType dataType) throws Exception {
DataType dataType) throws Exception {
Address addr = addr(address);
setBytes(address, stringBytes);
if (dataType != null) {

View File

@ -72,10 +72,17 @@ public class DefinedStringIteratorTest extends AbstractGhidraHeadlessIntegration
builder.createEncodedString("0x500", "This is the last string", StandardCharsets.US_ASCII,
false);
ArrayDataType charArray = new ArrayDataType(new CharDataType(), 50, 1);
builder.createString("0x600", "The 600 chararray", StandardCharsets.US_ASCII, true,
charArray);
// create an empty area for tests to do their own thing
builder.createUninitializedMemory("uninitialized", "0x3000", 100);
builder.createUninitializedMemory("uninitialized", "0x3000", 0x1000);
builder.applyDataType("0x3100", charArray);
builder.applyFixedLengthDataType("0x3200", new StringDataType(), 10);
program = builder.getProgram();
}
@Test
@ -117,6 +124,11 @@ public class DefinedStringIteratorTest extends AbstractGhidraHeadlessIntegration
assertEquals(addr(0x500), foundString.getAddress());
assertEquals("This is the last string", foundString.getString(program.getMemory()));
assertTrue(iterator.hasNext());
foundString = iterator.next();
assertEquals(addr(0x600), foundString.getAddress());
assertEquals("The 600 chararray", foundString.getString(program.getMemory()));
assertFalse(iterator.hasNext());
}

View File

@ -0,0 +1,151 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.program.util;
import static org.junit.Assert.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import org.junit.Before;
import org.junit.Test;
import ghidra.program.database.ProgramDB;
import ghidra.program.model.data.*;
import ghidra.program.model.listing.Data;
import ghidra.program.model.util.CodeUnitInsertionException;
import ghidra.test.AbstractGhidraHeadlessIntegrationTest;
import ghidra.test.ToyProgramBuilder;
import util.CollectionUtils;
public class DefinedDataIteratorTest extends AbstractGhidraHeadlessIntegrationTest {
private ToyProgramBuilder builder;
private ProgramDB program;
private DataTypeManager dtm;
private DataType intDT;
private StringDataType stringDT;
private CharDataType charDT;
private DataType charArray;
private StructureDataType struct1DT;
private ArrayDataType structArray;
private StructureDataType struct2DT;
private TypeDef intTD;
@Before
public void setUp() throws Exception {
builder = new ToyProgramBuilder("DefinedDataIteratorTests", false);
program = builder.getProgram();
dtm = program.getDataTypeManager();
intDT = AbstractIntegerDataType.getSignedDataType(4, dtm);
intTD = new TypedefDataType("int_typedef", intDT);
stringDT = StringDataType.dataType;
charDT = new CharDataType(dtm);
charArray = new ArrayDataType(charDT, 20, charDT.getLength());
struct1DT = new StructureDataType("struct1", 100);
struct1DT.replaceAtOffset(0, intDT, intDT.getLength(), "f1", null);
struct1DT.replaceAtOffset(10, charArray, charArray.getLength(), "f2", null);
struct1DT.replaceAtOffset(50, stringDT, 10, "f3", null);
structArray = new ArrayDataType(struct1DT, 10, struct1DT.getLength());
struct2DT = new StructureDataType("struct2", 200);
struct2DT.replaceAtOffset(0, intDT, intDT.getLength(), "f1", null);
struct2DT.replaceAtOffset(10, struct1DT, intDT.getLength(), "f2", null);
builder.createMemory("test", "0x0", 0x2000);
program = builder.getProgram();
}
@Test
public void test_Ints() throws Exception {
builder.applyFixedLengthDataType("0x0", intDT, intDT.getLength());
builder.createString("0x10", "test1", StandardCharsets.UTF_8, true, stringDT);
builder.applyFixedLengthDataType("0x100", struct1DT, struct1DT.getLength());
List<Data> list = CollectionUtils.asList((Iterable<Data>)
DefinedDataIterator.byDataType(program, dt -> dt instanceof IntegerDataType));
assertTrue(list.get(0).getAddress().getOffset() == 0x0);
assertTrue(list.get(1).getAddress().getOffset() == 0x100);
assertEquals(2, list.size());
}
@Test
public void test_Strings() throws Exception {
builder.applyFixedLengthDataType("0x0", intDT, intDT.getLength());
builder.createString("0x10", "test1", StandardCharsets.UTF_8, true, stringDT);
builder.applyFixedLengthDataType("0x100", struct1DT, struct1DT.getLength());
List<Data> list =
CollectionUtils.asList((Iterable<Data>) DefinedDataIterator.definedStrings(program));
assertTrue(list.get(0).getAddress().getOffset() == 0x10);
assertTrue(list.get(1).getAddress().getOffset() == 0x100 + 10);
assertTrue(list.get(2).getAddress().getOffset() == 0x100 + 50);
assertEquals(3, list.size());
}
@Test
public void test_ArrayOfStructs() throws Exception {
builder.applyFixedLengthDataType("0x0", intDT, intDT.getLength());
builder.createString("0x10", "test1", StandardCharsets.UTF_8, true, stringDT);
builder.applyFixedLengthDataType("0x100", structArray, structArray.getLength());
int numElements = structArray.getNumElements();
int lastEle = numElements - 1;
int elementSize = structArray.getElementLength();
List<Data> list =
CollectionUtils.asList((Iterable<Data>) DefinedDataIterator.definedStrings(program));
assertEquals(list.get(0).getAddress().getOffset(), 0x10);
assertEquals(list.get(1 + 0).getAddress().getOffset(), 0x100 + 10);
assertEquals(list.get(1 + 1).getAddress().getOffset(), 0x100 + 50);
assertEquals(list.get(1 + (lastEle * 2) + 0).getAddress().getOffset(),
0x100 + (elementSize * lastEle) + 10);
assertEquals(list.get(1 + (lastEle * 2) + 1).getAddress().getOffset(),
0x100 + (elementSize * lastEle) + 50);
assertEquals(1 + (numElements * 2), list.size());
}
@Test
public void test_Typedefs() throws CodeUnitInsertionException {
// 3 ints: 2 are typedefs, 1 is regular int
builder.applyFixedLengthDataType("0x0", intTD, intTD.getLength());
builder.applyFixedLengthDataType("0x10", intTD, intTD.getLength());
builder.applyFixedLengthDataType("0x20", intDT, intTD.getLength());
// iterating by data type ignores typedefs, so we should get all 3 ints
List<Data> list = CollectionUtils.asList((Iterable<Data>)
DefinedDataIterator.byDataType(program, dt -> dt instanceof IntegerDataType));
assertEquals(3, list.size());
// iterating by data instance, we can inspect the actual data type and get the
// typedef
list = CollectionUtils.asList((Iterable<Data>) DefinedDataIterator.byDataInstance(program,
data -> data.getDataType() instanceof TypeDef));
assertEquals(2, list.size());
}
}

View File

@ -65,6 +65,25 @@ public class StringDataInstance {
return false;
}
/**
* Returns true if the specified {@link DataType} is (or could be) a
* string.
* <p>
* Arrays of char-like elements (see {@link ArrayStringable}) are treated
* as string data types. The actual data instance needs to be inspected
* to determine if the array is an actual string.
* <p>
* @param dt DataType to test
* @return boolean true if data type is or could be a string
*/
public static boolean isStringDataType(DataType dt) {
if (dt instanceof TypeDef) {
dt = ((TypeDef) dt).getBaseDataType();
}
return dt instanceof AbstractStringDataType || (dt instanceof Array &&
ArrayStringable.getArrayStringable(((Array) dt).getDataType()) != null);
}
/**
* Returns true if the {@link Data} instance is one of the many 'char' data types.
*

View File

@ -15,8 +15,8 @@
*/
package ghidra.program.model.listing;
import java.util.Arrays;
import java.util.Iterator;
import java.util.NoSuchElementException;
import util.CollectionUtils;
@ -25,8 +25,18 @@ import util.CollectionUtils;
*
* @see CollectionUtils#asIterable
*/
public interface DataIterator extends Iterator<Data>, Iterable<Data> {
public static final DataIterator EMPTY = createEmptyIterator();
public interface DataIterator extends Iterator<Data>, Iterable<Data> {
public static final DataIterator EMPTY = of(/*nothing*/);
/**
* Create a DataIterator that returns a sequence of the specified items.
*
* @param dataInstances variable length list of items that will be iterated
* @return new Iterator
*/
public static DataIterator of(Data... dataInstances) {
return new IteratorWrapper(Arrays.asList(dataInstances).iterator());
}
@Override
public boolean hasNext();
@ -40,15 +50,25 @@ public interface DataIterator extends Iterator<Data>, Iterable<Data> {
}
// --------------------------------------------------------------------------------
// Helper static methods
// Helper static stuff
// --------------------------------------------------------------------------------
public static DataIterator createEmptyIterator() {
return new DataIterator() {
//@formatter:off
@Override public Data next() { throw new NoSuchElementException(); }
@Override public void remove() { throw new IllegalStateException(); }
@Override public boolean hasNext() { return false; }
//@formatter:on
};
static class IteratorWrapper implements DataIterator {
private Iterator<Data> it;
IteratorWrapper(Iterator<Data> it) {
this.it = it;
}
@Override
public boolean hasNext() {
return it.hasNext();
}
@Override
public Data next() {
return it.next();
}
}
}

View File

@ -15,8 +15,7 @@
*/
package ghidra.program.util;
import java.util.LinkedList;
import java.util.Queue;
import java.util.*;
import java.util.function.Predicate;
import ghidra.program.model.address.AddressSetView;
@ -24,7 +23,7 @@ import ghidra.program.model.data.*;
import ghidra.program.model.listing.*;
/**
* Iterator that visits each defined data instance in a Program or in the footprint of
* Iterator that visits each defined data instance in the initialized memory of a Program or in the footprint of
* a specified data element.
* <p>
* Data elements that are nested inside of composites or arrays are visited, not just the
@ -33,7 +32,7 @@ import ghidra.program.model.listing.*;
public class DefinedDataIterator implements DataIterator {
/**
* Creates a new iterator that traverses the entire Program's address space, visiting
* Creates a new iterator that traverses the entire Program's address space, returning
* data instances that successfully match the predicate.
*
* @param program Program to search
@ -42,29 +41,47 @@ public class DefinedDataIterator implements DataIterator {
*/
public static DefinedDataIterator byDataType(Program program,
Predicate<DataType> dataTypePredicate) {
return new DefinedDataIterator(program, null,
data -> dataTypePredicate.test(data.getBaseDataType()));
return new DefinedDataIterator(program, null, dataTypePredicate, null);
}
/**
* Creates a new iterator that traverses the entire Program's address space.
* Creates a new iterator that traverses the entire Program's address space, returning
* data instances that successfully match the predicate.
*
* @param program Program to search
* @param dataInstancePredicate {@link Predicate} that tests each data instance's properties
* @return new iterator
*/
public static DefinedDataIterator byDataInstance(Program program,
Predicate<Data> dataInstancePredicate) {
return new DefinedDataIterator(program, null, null, dataInstancePredicate);
}
/**
* Creates a new iterator that traverses the entire Program's address space returning
* data instances that are strings.
*
* @param program Ghidra {@link Program} to search
* @return new iterator
*/
public static DefinedDataIterator definedStrings(Program program) {
return new DefinedDataIterator(program, null, data -> StringDataInstance.isString(data));
return new DefinedDataIterator(program, null,
dataType -> StringDataInstance.isStringDataType(dataType),
data -> StringDataInstance.isString(data));
}
/**
* Creates a new iterator that traverses a portion of the Program's address space.
* Creates a new iterator that traverses a portion of the Program's address space returning
* data instances that are strings.
*
* @param program Ghidra {@link Program} to search
* @param addrs addresses to limit the iteration to
* @return new iterator
*/
public static DefinedDataIterator definedStrings(Program program, AddressSetView addrs) {
return new DefinedDataIterator(program, addrs, data -> StringDataInstance.isString(data));
return new DefinedDataIterator(program, addrs,
dataType -> StringDataInstance.isStringDataType(dataType),
data -> StringDataInstance.isString(data));
}
/**
@ -76,73 +93,149 @@ public class DefinedDataIterator implements DataIterator {
*/
public static DefinedDataIterator definedStrings(Data singleDataInstance) {
return new DefinedDataIterator(singleDataInstance,
dataType -> StringDataInstance.isStringDataType(dataType),
data -> StringDataInstance.isString(data));
}
private Queue<Data> resultsQueue = new LinkedList<>();
private Predicate<DataType> dataTypePredicate;
private Predicate<Data> dataInstancePredicate;
private DataIterator definedDataIterator;
/**
* LIFO stack of iterators. Newly found iterators of sub-components are
* pushed onto the end and become the current iterator. When an iterator is exhausted,
* it is popped of the end and the uncovered iterator is now the current.
*/
private Deque<DataIterator> itStack = new ArrayDeque<>();
private Data currentDataResult;
private DefinedDataIterator(Program program, AddressSetView addrs,
Predicate<Data> dataInstancePredicate) {
Predicate<DataType> dataTypePredicate, Predicate<Data> dataInstancePredicate) {
this.dataTypePredicate = dataTypePredicate;
this.dataInstancePredicate = dataInstancePredicate;
this.definedDataIterator = program.getListing().getDefinedData(
(addrs == null) ? program.getMemory().getAllInitializedAddressSet() : addrs, true);
itStack.addLast(program.getListing().getDefinedData(
(addrs == null) ? program.getMemory().getAllInitializedAddressSet() : addrs, true));
}
private DefinedDataIterator(Data singleDataInstance, Predicate<Data> dataInstancePredicate) {
private DefinedDataIterator(Data singleDataInstance, Predicate<DataType> dataTypePredicate,
Predicate<Data> dataInstancePredicate) {
this.dataTypePredicate = dataTypePredicate;
this.dataInstancePredicate = dataInstancePredicate;
this.definedDataIterator = DataIterator.EMPTY;
processDataInstance(singleDataInstance);
itStack.addLast(DataIterator.of(singleDataInstance));
}
@Override
public boolean hasNext() {
if (resultsQueue.isEmpty()) {
if (currentDataResult == null) {
findNext();
}
return !resultsQueue.isEmpty();
return currentDataResult != null;
}
@Override
public Data next() {
if (hasNext()) {
return resultsQueue.remove();
if (currentDataResult == null) {
throw new NoSuchElementException();
}
return null;
Data result = currentDataResult;
currentDataResult = null;
return result;
}
private DataIterator currentIt() {
DataIterator it = null;
while ((it = itStack.peekLast()) != null && !it.hasNext()) {
itStack.removeLast();
}
return it;
}
private void findNext() {
while (definedDataIterator.hasNext() && resultsQueue.isEmpty()) {
Data nextData = definedDataIterator.next();
processDataInstance(nextData);
}
}
private void processDataInstance(Data data) {
if (dataInstancePredicate.test(data)) {
resultsQueue.add(data);
return;
}
DataType dt = data.getBaseDataType();
if (dt instanceof Composite || isIterableArray(dt)) {
for (int compNum = 0, compCount =
data.getNumComponents(); compNum < compCount; compNum++) {
Data componentData = data.getComponent(compNum);
processDataInstance(componentData);
DataIterator it = null;
while ((it = currentIt()) != null) {
Data data = it.next();
DataType dt = data.getBaseDataType();
if (matchesDataTypePredicate(dt) && matchesDataInstancePredicate(data)) {
currentDataResult = data;
return;
}
if (dataTypePredicate != null && isContainerDT(dt) &&
recursiveMatchesDataTypePredicate(dt)) {
itStack.addLast(new DataComponentIterator(data));
}
}
}
private boolean isIterableArray(DataType dataType) {
if (dataType instanceof Array) {
DataType elementDT = ((Array) dataType).getDataType();
if (elementDT instanceof TypeDef) {
elementDT = ((TypeDef) elementDT).getBaseDataType();
private boolean isContainerDT(DataType dt) {
return dt instanceof Array || dt instanceof Composite;
}
private boolean recursiveMatchesDataTypePredicate(DataType dt) {
if (matchesDataTypePredicate(dt)) {
return true;
}
if (dt instanceof Array) {
Array arrayDT = (Array) dt;
DataType elementDT = arrayDT.getDataType();
return recursiveMatchesDataTypePredicate(elementDT);
}
else if (dt instanceof Structure) {
// handle Structures and general Composite's separately so
// we can focus on just the defined elements of a structure
Structure comp = (Structure) dt;
for (DataTypeComponent dtc : comp.getDefinedComponents()) {
if (recursiveMatchesDataTypePredicate(dtc.getDataType())) {
return true;
}
}
return (elementDT instanceof Array) || (elementDT instanceof Composite) ||
(elementDT instanceof AbstractStringDataType);
return false;
}
else if (dt instanceof Composite) {
Composite comp = (Composite) dt;
for (DataTypeComponent dtc : comp.getComponents()) {
if (recursiveMatchesDataTypePredicate(dtc.getDataType())) {
return true;
}
}
return false;
}
else if (dt instanceof TypeDef) {
TypeDef tdDT = (TypeDef) dt;
return recursiveMatchesDataTypePredicate(tdDT.getBaseDataType());
}
return false;
}
private boolean matchesDataTypePredicate(DataType dt) {
return dataTypePredicate == null || dataTypePredicate.test(dt);
}
private boolean matchesDataInstancePredicate(Data data) {
return dataInstancePredicate == null || dataInstancePredicate.test(data);
}
private static class DataComponentIterator implements DataIterator {
private Data data;
private int currentIndex;
private int elementCount;
public DataComponentIterator(Data data) {
this.data = data;
this.elementCount = data.getNumComponents();
}
@Override
public boolean hasNext() {
return currentIndex < elementCount;
}
@Override
public Data next() {
Data result = data.getComponent(currentIndex);
currentIndex++;
return result;
}
}
}