mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2024-11-25 21:51:47 +00:00
Merge remote-tracking branch 'origin/GP-1426_Dan_asmWoW64--SQUASHED'
This commit is contained in:
commit
4f59e90b39
@ -38,6 +38,7 @@ import ghidra.program.model.lang.*;
|
||||
import ghidra.program.model.listing.Instruction;
|
||||
import ghidra.test.AbstractGhidraHeadlessIntegrationTest;
|
||||
import ghidra.trace.database.ToyDBTraceBuilder;
|
||||
import ghidra.trace.database.context.DBTraceRegisterContextManager;
|
||||
import ghidra.trace.model.memory.TraceMemoryFlag;
|
||||
import ghidra.trace.model.memory.TraceMemoryManager;
|
||||
import ghidra.trace.model.thread.TraceThread;
|
||||
@ -864,17 +865,13 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes
|
||||
try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "x86:LE:64:default")) {
|
||||
Language lang = tb.trace.getBaseLanguage();
|
||||
Register ctxReg = lang.getContextBaseRegister();
|
||||
Register opsizeReg = lang.getRegister("opsize");
|
||||
Register addrsizeReg = lang.getRegister("addrsize");
|
||||
Register longModeReg = lang.getRegister("longMode");
|
||||
RegisterValue ctxVal = new RegisterValue(ctxReg)
|
||||
.assign(opsizeReg, BigInteger.ONE)
|
||||
.assign(addrsizeReg, BigInteger.ONE)
|
||||
.assign(longModeReg, BigInteger.ZERO);
|
||||
DBTraceRegisterContextManager ctxManager = tb.trace.getRegisterContextManager();
|
||||
try (UndoableTransaction tid = tb.startTransaction()) {
|
||||
tb.trace.getRegisterContextManager()
|
||||
.setValue(lang, ctxVal, Range.atLeast(0L),
|
||||
tb.range(0x00400000, 0x00400002));
|
||||
ctxManager.setValue(lang, ctxVal, Range.atLeast(0L),
|
||||
tb.range(0x00400000, 0x00400002));
|
||||
}
|
||||
TraceThread thread = initTrace(tb,
|
||||
List.of(
|
||||
@ -891,6 +888,8 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes
|
||||
|
||||
TracePcodeEmulator emu = new TracePcodeEmulator(tb.trace, 0);
|
||||
PcodeThread<byte[]> emuThread = emu.newThread(thread.getPath());
|
||||
// TODO: Seems the Trace-bound thread ought to know to do this in reInitialize()
|
||||
ctxVal = ctxManager.getValueWithDefault(lang, ctxReg, 0, tb.addr(0x00400000));
|
||||
emuThread.overrideContext(ctxVal);
|
||||
emuThread.stepInstruction();
|
||||
emuThread.stepInstruction();
|
||||
|
@ -63,7 +63,7 @@ public class AssemblyThrasherDevScript extends GhidraScript {
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolvedConstructor select(AssemblyResolutionResults rr,
|
||||
public AssemblyResolvedPatterns select(AssemblyResolutionResults rr,
|
||||
AssemblyPatternBlock ctx) throws AssemblySemanticException {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
boolean gotOne = false;
|
||||
@ -72,7 +72,7 @@ public class AssemblyThrasherDevScript extends GhidraScript {
|
||||
if (ar.isError()) {
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedConstructor can = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyResolvedPatterns can = (AssemblyResolvedPatterns) ar;
|
||||
if (can.getContext().combine(ctx) == null) {
|
||||
continue;
|
||||
}
|
||||
|
@ -704,7 +704,7 @@ public class AssemblyDualTextField {
|
||||
* @param existing the instruction, if any, currently under the user's cursor
|
||||
* @return a preference
|
||||
*/
|
||||
protected int computePreference(AssemblyResolvedConstructor rc, Instruction existing) {
|
||||
protected int computePreference(AssemblyResolvedPatterns rc, Instruction existing) {
|
||||
if (existing == null) {
|
||||
return 0;
|
||||
}
|
||||
@ -763,7 +763,7 @@ public class AssemblyDualTextField {
|
||||
//result.add(new AssemblyError("", ar.toString()));
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar;
|
||||
for (byte[] ins : rc.possibleInsVals(ctx)) {
|
||||
result.add(new AssemblyInstruction(text, Arrays.copyOf(ins, ins.length),
|
||||
computePreference(rc, existing)));
|
||||
|
@ -42,7 +42,8 @@ public interface Assembler {
|
||||
* refer to pseudo instructions.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: There must be an active transaction on the bound program for this method to succeed.
|
||||
* <b>NOTE:</b> There must be an active transaction on the bound program for this method to
|
||||
* succeed.
|
||||
*
|
||||
* @param at the location where the resulting instructions should be placed
|
||||
* @param listing a new-line separated or array sequence of instructions
|
||||
@ -119,8 +120,8 @@ public interface Assembler {
|
||||
* results.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: The resolved instructions are given as masks and values. Where the mask does not cover,
|
||||
* you can choose any value.
|
||||
* <b>NOTE:</b> The resolved instructions are given as masks and values. Where the mask does not
|
||||
* cover, you can choose any value.
|
||||
*
|
||||
* @param parse a parse result giving a valid tree
|
||||
* @param at the location of the start of the instruction
|
||||
@ -139,8 +140,8 @@ public interface Assembler {
|
||||
* results.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: The resolved instructions are given as masks and values. Where the mask does not cover,
|
||||
* you can choose any value.
|
||||
* <b>NOTE:</b> The resolved instructions are given as masks and values. Where the mask does not
|
||||
* cover, you can choose any value.
|
||||
*
|
||||
* @param parse a parse result giving a valid tree
|
||||
* @param at the location of the start of the instruction
|
||||
@ -192,7 +193,7 @@ public interface Assembler {
|
||||
* @return the new {@link Instruction} code unit
|
||||
* @throws MemoryAccessException there is an issue writing the result to program memory
|
||||
*/
|
||||
public Instruction patchProgram(AssemblyResolvedConstructor res, Address at)
|
||||
public Instruction patchProgram(AssemblyResolvedPatterns res, Address at)
|
||||
throws MemoryAccessException;
|
||||
|
||||
/**
|
||||
|
@ -25,18 +25,21 @@ import ghidra.program.model.listing.Program;
|
||||
public interface AssemblerBuilder {
|
||||
/**
|
||||
* Get the ID of the language for which this instance builds an assembler
|
||||
*
|
||||
* @return the language ID
|
||||
*/
|
||||
public LanguageID getLanguageID();
|
||||
|
||||
/**
|
||||
* Get the language for which this instance builds an assembler
|
||||
*
|
||||
* @return the language
|
||||
*/
|
||||
public Language getLanguage();
|
||||
|
||||
/**
|
||||
* Build an assembler with the given selector callback
|
||||
*
|
||||
* @param selector the selector callback
|
||||
* @return the built assembler
|
||||
*/
|
||||
@ -44,6 +47,7 @@ public interface AssemblerBuilder {
|
||||
|
||||
/**
|
||||
* Build an assembler with the given selector callback and program binding
|
||||
*
|
||||
* @param selector the selector callback
|
||||
* @param program the bound program
|
||||
* @return the built assembler
|
||||
|
@ -19,19 +19,20 @@ import java.util.*;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.AsmUtil;
|
||||
|
||||
/**
|
||||
* Provides a mechanism for pruning and selecting binary assembled instructions from the results
|
||||
* of parsing textual assembly instructions. There are two opportunities: After parsing, but before
|
||||
* semantic resolution, and after resolution. In the first opportunity, filtering is optional ---
|
||||
* the user may discard any or all parse trees. The second is required, since only one instruction
|
||||
* may be placed at the desired address --- the user must select one instruction among the many
|
||||
* results, and if a mask is present, decide on a value for the omitted bits.
|
||||
* Provides a mechanism for pruning and selecting binary assembled instructions from the results of
|
||||
* parsing textual assembly instructions. There are two opportunities: After parsing, but before
|
||||
* prototype generation, and after machine code generation. In the first opportunity, filtering is
|
||||
* optional --- the user may discard any or all parse trees. The second is required, since only one
|
||||
* instruction may be placed at the desired address --- the user must select one instruction among
|
||||
* the many results, and if a mask is present, decide on a value for the omitted bits.
|
||||
*
|
||||
* <p>
|
||||
* Extensions of this class are also suitable for collecting diagnostic information about attempted
|
||||
* assemblies. For example, an implementation may employ the syntax errors in order to produce
|
||||
* code completion suggestions in a GUI.
|
||||
* assemblies. For example, an implementation may employ the syntax errors in order to produce code
|
||||
* completion suggestions in a GUI.
|
||||
*/
|
||||
public class AssemblySelector {
|
||||
protected Set<AssemblyParseResult> syntaxErrors = new TreeSet<>();
|
||||
@ -40,7 +41,7 @@ public class AssemblySelector {
|
||||
/**
|
||||
* A comparator on instruction length (shortest first), then bits lexicographically
|
||||
*/
|
||||
protected Comparator<AssemblyResolvedConstructor> compareBySizeThenBits = (a, b) -> {
|
||||
protected Comparator<AssemblyResolvedPatterns> compareBySizeThenBits = (a, b) -> {
|
||||
int result;
|
||||
result = a.getInstructionLength() - b.getInstructionLength();
|
||||
if (result != 0) {
|
||||
@ -48,7 +49,7 @@ public class AssemblySelector {
|
||||
}
|
||||
|
||||
result =
|
||||
SleighUtil.compareArrays(a.getInstruction().getVals(), b.getInstruction().getVals());
|
||||
AsmUtil.compareArrays(a.getInstruction().getVals(), b.getInstruction().getVals());
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
@ -58,16 +59,20 @@ public class AssemblySelector {
|
||||
/**
|
||||
* Filter a collection of parse trees.
|
||||
*
|
||||
* Generally, the assembly resolver considers every possible parsing of an assembly
|
||||
* instruction. If, for some reason, the user wishes to ignore certain trees (perhaps for
|
||||
* efficiency, or perhaps because a certain form of instruction is desired), entire parse
|
||||
* trees may be pruned here.
|
||||
* <p>
|
||||
* Generally, the assembly resolver considers every possible parsing of an assembly instruction.
|
||||
* If, for some reason, the user wishes to ignore certain trees (perhaps for efficiency, or
|
||||
* perhaps because a certain form of instruction is desired), entire parse trees may be pruned
|
||||
* here.
|
||||
*
|
||||
* It's possible that no trees pass the filter. In this case, this method ought to throw an
|
||||
* {@link AssemblySyntaxException}. Another option is to pass the erroneous result on for semantic
|
||||
* analysis, in which case, the error is simply copied into an erroneous semantic result.
|
||||
* Depending on preferences, this may simplify the overall filtering and error-handling logic.
|
||||
* <p>
|
||||
* It is possible that no trees pass the filter. In this case, this method ought to throw an
|
||||
* {@link AssemblySyntaxException}. Another option is to pass the erroneous result on for
|
||||
* semantic analysis, in which case, the error is simply copied into an erroneous semantic
|
||||
* result. Depending on preferences, this may simplify the overall filtering and error-handling
|
||||
* logic.
|
||||
*
|
||||
* <p>
|
||||
* By default, no filtering is applied. If all the trees produce syntax errors, an exception is
|
||||
* thrown.
|
||||
*
|
||||
@ -95,10 +100,12 @@ public class AssemblySelector {
|
||||
/**
|
||||
* Select an instruction from the possible results.
|
||||
*
|
||||
* Must select precisely one resolved constructor from the results given back by the assembly
|
||||
* resolver. Precisely one. That means the mask of the returned result must consist of all 1s.
|
||||
* Also, if no selection is suitable, an exception must be thrown.
|
||||
* <p>
|
||||
* This must select precisely one resolved constructor from the results given back by the
|
||||
* assembly resolver. This further implies the mask of the returned result must consist of all
|
||||
* 1s. If no selection is suitable, this must throw an exception.
|
||||
*
|
||||
* <p>
|
||||
* By default, this method selects the shortest instruction that is compatible with the given
|
||||
* context and takes 0 for bits that fall outside the mask. If all possible resolutions produce
|
||||
* errors, an exception is thrown.
|
||||
@ -106,18 +113,18 @@ public class AssemblySelector {
|
||||
* @param rr the collection of resolved constructors
|
||||
* @param ctx the applicable context.
|
||||
* @return a single resolved constructor with a full instruction mask.
|
||||
* @throws AssemblySemanticException
|
||||
* @throws AssemblySemanticException
|
||||
*/
|
||||
public AssemblyResolvedConstructor select(AssemblyResolutionResults rr,
|
||||
public AssemblyResolvedPatterns select(AssemblyResolutionResults rr,
|
||||
AssemblyPatternBlock ctx) throws AssemblySemanticException {
|
||||
List<AssemblyResolvedConstructor> sorted = new ArrayList<>();
|
||||
List<AssemblyResolvedPatterns> sorted = new ArrayList<>();
|
||||
// Select only non-erroneous results whose contexts are compatible.
|
||||
for (AssemblyResolution ar : rr) {
|
||||
if (ar.isError()) {
|
||||
semanticErrors.add((AssemblyResolvedError) ar);
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar;
|
||||
sorted.add(rc);
|
||||
}
|
||||
if (sorted.isEmpty()) {
|
||||
@ -127,9 +134,9 @@ public class AssemblySelector {
|
||||
sorted.sort(compareBySizeThenBits);
|
||||
|
||||
// Pick just the first
|
||||
AssemblyResolvedConstructor res = sorted.get(0);
|
||||
AssemblyResolvedPatterns res = sorted.get(0);
|
||||
// Just set the mask to ffs (effectively choosing 0 for the omitted bits)
|
||||
return AssemblyResolution.resolved(res.getInstruction().fillMask(), res.getContext(),
|
||||
"Selected", null);
|
||||
"Selected", null, null, null);
|
||||
}
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedError;
|
||||
/**
|
||||
* Thrown when all resolutions of an assembly instruction result in semantic errors.
|
||||
*
|
||||
* <p>
|
||||
* For SLEIGH, semantic errors amount to incompatible contexts
|
||||
*/
|
||||
public class AssemblySemanticException extends AssemblyException {
|
||||
@ -37,6 +38,7 @@ public class AssemblySemanticException extends AssemblyException {
|
||||
|
||||
/**
|
||||
* Construct a semantic exception with the associated semantic errors
|
||||
*
|
||||
* @param errors the associated semantic errors
|
||||
*/
|
||||
public AssemblySemanticException(Set<AssemblyResolvedError> errors) {
|
||||
@ -46,6 +48,7 @@ public class AssemblySemanticException extends AssemblyException {
|
||||
|
||||
/**
|
||||
* Get the collection of associated semantic errors
|
||||
*
|
||||
* @return the collection
|
||||
*/
|
||||
public Collection<AssemblyResolvedError> getErrors() {
|
||||
|
@ -35,6 +35,7 @@ public class AssemblySyntaxException extends AssemblyException {
|
||||
|
||||
/**
|
||||
* Construct a syntax exception with the associated syntax errors
|
||||
*
|
||||
* @param errors the associated syntax errors
|
||||
*/
|
||||
public AssemblySyntaxException(Set<AssemblyParseResult> errors) {
|
||||
@ -44,6 +45,7 @@ public class AssemblySyntaxException extends AssemblyException {
|
||||
|
||||
/**
|
||||
* Get the collection of associated syntax errors
|
||||
*
|
||||
* @return the collection
|
||||
*/
|
||||
public Collection<AssemblyParseResult> getErrors() {
|
||||
|
@ -17,11 +17,12 @@ package ghidra.app.plugin.assembler.sleigh;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.Collection;
|
||||
|
||||
import ghidra.app.plugin.assembler.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.parse.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNumericSymbols;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
|
||||
import ghidra.program.disassemble.Disassembler;
|
||||
@ -32,17 +33,16 @@ import ghidra.program.model.lang.RegisterValue;
|
||||
import ghidra.program.model.listing.*;
|
||||
import ghidra.program.model.mem.Memory;
|
||||
import ghidra.program.model.mem.MemoryAccessException;
|
||||
import ghidra.program.model.symbol.*;
|
||||
import ghidra.util.task.TaskMonitor;
|
||||
|
||||
/**
|
||||
* An {@link Assembler} for a {@link SleighLanguage}.
|
||||
*
|
||||
* To obtain one of these, please use {@link SleighAssemblerBuilder}, or better yet, the static
|
||||
* methods of {@link Assemblers}.
|
||||
* <p>
|
||||
* For documentation on how the SLEIGH assembler works, see {@link SleighAssemblerBuilder}. To use
|
||||
* the assembler, please use {@link Assemblers#getAssembler(Program)} or similar.
|
||||
*/
|
||||
public class SleighAssembler implements Assembler {
|
||||
public static final int DEFAULT_MAX_RECURSION_DEPTH = 2; // TODO: Toss this
|
||||
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
|
||||
|
||||
protected AssemblySelector selector;
|
||||
@ -75,7 +75,8 @@ public class SleighAssembler implements Assembler {
|
||||
/**
|
||||
* Construct a SleighAssembler.
|
||||
*
|
||||
* NOTE: This variant does not permit {@link #assemble(Address, String...)}.
|
||||
* <p>
|
||||
* <b>NOTE:</b> This variant does not permit {@link #assemble(Address, String...)}.
|
||||
*
|
||||
* @param selector a method of selecting one result from many
|
||||
* @param lang the SLEIGH language (must be same as to create the parser)
|
||||
@ -93,7 +94,7 @@ public class SleighAssembler implements Assembler {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Instruction patchProgram(AssemblyResolvedConstructor res, Address at)
|
||||
public Instruction patchProgram(AssemblyResolvedPatterns res, Address at)
|
||||
throws MemoryAccessException {
|
||||
if (!res.getInstruction().isFullMask()) {
|
||||
throw new AssemblySelectionError("Selected instruction must have a full mask.");
|
||||
@ -157,7 +158,7 @@ public class SleighAssembler implements Assembler {
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseResult> parseLine(String line) {
|
||||
return parser.parse(line, getProgramLabels());
|
||||
return parser.parse(line, getNumericSymbols());
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -173,13 +174,13 @@ public class SleighAssembler implements Assembler {
|
||||
if (parse.isError()) {
|
||||
AssemblyResolutionResults results = new AssemblyResolutionResults();
|
||||
AssemblyParseErrorResult err = (AssemblyParseErrorResult) parse;
|
||||
results.add(AssemblyResolution.error(err.describeError(), "Parsing", null));
|
||||
results.add(AssemblyResolution.error(err.describeError(), "Parsing"));
|
||||
return results;
|
||||
}
|
||||
|
||||
AssemblyParseAcceptResult acc = (AssemblyParseAcceptResult) parse;
|
||||
AssemblyTreeResolver tr =
|
||||
new AssemblyTreeResolver(lang, at.getOffset(), acc.getTree(), ctx, ctxGraph);
|
||||
new AssemblyTreeResolver(lang, at, acc.getTree(), ctx, ctxGraph);
|
||||
return tr.resolve();
|
||||
}
|
||||
|
||||
@ -219,7 +220,7 @@ public class SleighAssembler implements Assembler {
|
||||
public byte[] assembleLine(Address at, String line, AssemblyPatternBlock ctx)
|
||||
throws AssemblySemanticException, AssemblySyntaxException {
|
||||
AssemblyResolutionResults results = resolveLine(at, line, ctx);
|
||||
AssemblyResolvedConstructor res = selector.select(results, ctx);
|
||||
AssemblyResolvedPatterns res = selector.select(results, ctx);
|
||||
if (res == null) {
|
||||
throw new AssemblySelectionError(
|
||||
"Must select exactly one instruction. Report errors via AssemblySemanticError");
|
||||
@ -234,37 +235,15 @@ public class SleighAssembler implements Assembler {
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience to obtain a map of program labels strings to long values
|
||||
* A convenience to obtain assembly symbols
|
||||
*
|
||||
* @return the map
|
||||
*
|
||||
* {@literal TODO Use a Map<String, Address> instead so that, if possible, symbol values can be checked}
|
||||
* lest they be an invalid substitution for a given operand.
|
||||
*/
|
||||
protected Map<String, Long> getProgramLabels() {
|
||||
Map<String, Long> labels = new HashMap<>();
|
||||
for (Register reg : lang.getRegisters()) {
|
||||
// TODO/HACK: There ought to be a better mechanism describing suitable symbolic
|
||||
// substitutions for a given operand.
|
||||
if (!"register".equals(reg.getAddressSpace().getName())) {
|
||||
labels.put(reg.getName(), (long) reg.getOffset());
|
||||
}
|
||||
}
|
||||
protected AssemblyNumericSymbols getNumericSymbols() {
|
||||
if (program != null) {
|
||||
final SymbolIterator it = program.getSymbolTable().getAllSymbols(false);
|
||||
while (it.hasNext()) {
|
||||
Symbol sym = it.next();
|
||||
if (sym.isExternal()) {
|
||||
continue; // skip externals - will generally be referenced indirectly not directly
|
||||
}
|
||||
SymbolType symbolType = sym.getSymbolType();
|
||||
if (symbolType != SymbolType.LABEL && symbolType != SymbolType.FUNCTION) {
|
||||
continue;
|
||||
}
|
||||
labels.put(sym.getName(), sym.getAddress().getOffset());
|
||||
}
|
||||
return AssemblyNumericSymbols.fromProgram(program);
|
||||
}
|
||||
return labels;
|
||||
return AssemblyNumericSymbols.fromLanguage(lang);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -24,8 +24,7 @@ import ghidra.app.plugin.assembler.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential;
|
||||
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParser;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyContextGraph;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyDefaultContext;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
|
||||
@ -43,57 +42,267 @@ import ghidra.util.SystemUtilities;
|
||||
/**
|
||||
* An {@link AssemblerBuilder} capable of supporting almost any {@link SleighLanguage}
|
||||
*
|
||||
* <p>
|
||||
* To build an assembler, please use a static method of the {@link Assemblers} class.
|
||||
*
|
||||
* SLEIGH-based assembly is a bit of an experimental feature at this time. Nevertheless, it seems to
|
||||
* have come along quite nicely. It's not quite as fast as disassembly, since after all, that's what
|
||||
* SLEIGH was designed to do.
|
||||
* <p>
|
||||
* SLEIGH-based assembly is a bit temperamental, since it essentially runs the disassembler
|
||||
* backwards. The process is tenuous, but works well enough for interactive single-instruction
|
||||
* assembly. It is not nearly as fast as disassembly, since after all, SLEIGH was not designed for
|
||||
* assembly. The assembler is great for interactive patching and for building small samples in unit
|
||||
* tests. For other cases, a real tool chain is likely more appropriate.
|
||||
*
|
||||
* Overall, the method is fairly simple, though its implementation is a bit more complex. First, we
|
||||
* gather every pair of pattern and constructor by traversing the decision tree used by disassembly.
|
||||
* We then use the "print pieces" to construct a context-free grammar. Each production is associated
|
||||
* with the one-or-more constructors with the same sequence of print pieces. We then build a LALR(1)
|
||||
* parser for the generated grammar. This now constitutes a generic parser for the given language.
|
||||
* Note that this step takes some time, and may be better suited as a build-time step. Because
|
||||
* SLEIGH specifications are not generally concerned with eliminating ambiguity of printed
|
||||
* instructions (rather, it only does so for instruction bytes), we must consider that the grammar
|
||||
* could be ambiguous. To handle this, the action/goto table is permitted multiple entries per cell,
|
||||
* and we allow backtracking. There are also cases where tokens are not actually separated by
|
||||
* spaces. For example, in the {@code ia.sinc} file, there is JMP ... and J^cc, meaning, the lexer
|
||||
* must consider J as a token as well as JMP, introducing another source of possible backtracking.
|
||||
* Despite that, parsing is completed fairly quickly.
|
||||
* <h2>A Review of Disassembly</h2>
|
||||
*
|
||||
* To assemble, we first parse the textual instruction, yielding zero or more parse trees. No parse
|
||||
* trees implies an error. For each parse tree, we attempt to resolve the instruction bytes,
|
||||
* starting at the leaves and working upwards while tracking and solving context changes. The
|
||||
* context changes must be considered in reverse. We <em>read</em> the context register of the
|
||||
* children (a disassembler would write). We then assume there is at most one variable in the
|
||||
* expression, solve for it, and <em>write</em> the solution to the appropriate field (a
|
||||
* disassembler would read). If no solution exists, a semantic error is logged. Since it's possible
|
||||
* a production in the parse tree is associated with multiple constructors, different combinations
|
||||
* of constructors are explored as we move upward in the tree. If all possible combinations yield
|
||||
* semantic errors, then the overall result is an error.
|
||||
* <p>
|
||||
* Before diving into assembly, it may be helpful to review SLEIGH and disassembly, at least as far
|
||||
* as I understand. SLEIGH is really a specification of three distinct things, all related by trees
|
||||
* of "constructors." 1) A mnemonic grammar, 2) A machine-code grammar, 3) Run-time semantics, i.e.,
|
||||
* p-code. The third is consumed primarily by the decompiler, the emulator, and other analysis, and
|
||||
* is of little concern to the (dis)assembler. All three are tightly bound. A single constructor
|
||||
* specifies a production in both grammars, constraints for selecting the production, as well as the
|
||||
* generated run-time semantics. Consider an example:
|
||||
*
|
||||
* Some productions are "purely recursive," e.g., {@code :^instruction} lines in the SLEIGH. These
|
||||
* are ignored during parser construction. Let such a production be given as I => I. When resolving
|
||||
* the parse tree to bytes, and we encounter a production with I on the left hand side, we then
|
||||
* consider the possible application of the production I => I and its consequential constructors.
|
||||
* Ideally, we could repeat this indefinitely, stopping when all further applications result in
|
||||
* semantic errors; however, there is no guarantee in the SLEIGH specification that such an
|
||||
* algorithm will actually halt, so a maximum number (default of 1) of applications are attempted.
|
||||
* <pre>
|
||||
* :ADD regD,imm8 is op=5 & regD & imm8 { regD = regD + imm8; }
|
||||
* </pre>
|
||||
*
|
||||
* After all the context changes and operands are resolved, we apply the constructor patterns and
|
||||
* proceed up the tree. Thus, each branch yields zero or more "resolved constructors," which each
|
||||
* specify two masked blocks of data: one for the instruction, and one for the context. These are
|
||||
* passed up to the parent production, which, having obtained results from all its children,
|
||||
* attempts to apply the corresponding constructors.
|
||||
* <p>
|
||||
* The colon indicates this constructor applies to the root "instruction" table. The mnemonic
|
||||
* production precedes the <code>is</code> keyword. The machine-code constraints and production
|
||||
* follow. Finally, the semantics appear within braces.
|
||||
*
|
||||
* Once we've resolved the root node, any resolved constructors returned are taken as successfully
|
||||
* assembled instruction bytes. If applicable, the corresponding context registers are compared to
|
||||
* the context at the target address in the program and filtered for compatibility.
|
||||
* <p>
|
||||
* To support bitfield parsing, a list of token formats and fields within must be declared. The
|
||||
* machine-code production may specify constraints in terms of those fields. Such constraints become
|
||||
* patterns that the parser uses to choose a constructor. For example, we may have
|
||||
* <code>op=(0,3);regD=(4,7);imm8=(8,15)</code>. In little endian, this would indicate a 2-byte
|
||||
* token:
|
||||
*
|
||||
* <pre>
|
||||
* +-4----+-4----+-8----------+
|
||||
* | regD | op | imm8 |
|
||||
* +------+------+------------+
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Thus, this constructor is assigned the pattern <code>0101....</code>, which handles
|
||||
* <code>op=5</code>. <code>regD</code> and <code>imm8</code> remain as operands. The operands of
|
||||
* the machine-code production refer to fields and subtables. During disassembly, those operands are
|
||||
* parsed in the order named: left to right, depth first. For the (root) instruction table and each
|
||||
* subtable, the disassembler selects exactly one constructor. The parser may only examine one
|
||||
* machine-code token at a time; however, the token can be large (32 bits is common), and it may
|
||||
* make several sub-table decisions based on fields within a single token, essentially allowing it
|
||||
* to look ahead and parse those fields out of order. In the example, the parser will technically
|
||||
* examine the <code>op</code> field before parsing <code>regD</code>.
|
||||
*
|
||||
* <p>
|
||||
* When parsing a table or subtable, if no constructor's constraints can be matched, parsing fails.
|
||||
* Each token is some number of bytes in size. The parser advances to the next token when it
|
||||
* encounters a semicolon in the machine-code production. Note that when the parser returns to a
|
||||
* parent constructor, i.e., the PDA pops its stack, the parser may return to a previous token. If
|
||||
* that behavior is not desired, a machine-code production may contain ellipses, causing the parser
|
||||
* to advance to the next token, even considering those tokens already examined by operands to the
|
||||
* ellipses' left. Once all operands of the selected instruction constructor have been parsed, the
|
||||
* resulting constructor tree ("prototype") is recorded and returned.
|
||||
*
|
||||
* <p>
|
||||
* To display the instruction's mnemonic, the prototype is walked, generating the tokens ("print
|
||||
* pieces") from the mnemonic production of each constructor. The walk is ordered according to that
|
||||
* mnemonic production. The mnemonic grammar consists of syntactic text and symbols. Any symbols it
|
||||
* uses must also appear in the machine-code production. Where the symbol is a sub-table, it behaves
|
||||
* like a non-terminal in the grammar: It generates the print pieces of the constructor selected for
|
||||
* the sub-table. Where the symbol is a field, it behaves like a terminal. It displays the numeric
|
||||
* value of the field, or in the case of attached names, e.g., register names, it displays the name.
|
||||
*
|
||||
* <p>
|
||||
* To complicate matters, but greatly increase the capability of the disassembler, SLEIGH introduces
|
||||
* temporary symbols and context to the disassembler. A temporary symbol allows the computation of
|
||||
* displayed values from fields. (The value may also be used by the p-code generator.) For example,
|
||||
* a language may permit the expression of immediates as a value and a shift. Temporary symbols
|
||||
* permit the effective value to be computed and displayed. Thus, a temporary symbol is valid in the
|
||||
* mnemonic production. Context serves at least two purposes: 1) To propagate auxiliary information
|
||||
* to sub-tables during disassembly, and 2) To handle persistent state changes in a processor that
|
||||
* modify its decoder, e.g., ARM in THUMB mode. The latter is accomplished by marking regions of
|
||||
* memory with this contextual information. Context is implemented by introducing a context
|
||||
* register. It behaves like a special mutable token, initialized from the disassembler's memory,
|
||||
* the context marked at the instruction's start address, or the language's default context. Like
|
||||
* token fields, context fields can be referred to by a constructor's machine-code production,
|
||||
* either to form constraints or to parse as operands. Fields may be modified by including mutations
|
||||
* in the constructor. Mutations and temporary symbols are defined by assigning an expression to the
|
||||
* field or symbol. Those expressions may refer to other fields and temporary symbols in the scope
|
||||
* of that constructor. Since mutations are meant to be propagated down, they must be applied in
|
||||
* pre-order during parsing. Note that context is not saved on any sort of stack, thus it is
|
||||
* possible for context mutations in a sub-table operand (and its sub-table operands) to affect
|
||||
* parsing of sibling sub-table operands to the right.
|
||||
*
|
||||
* <p>
|
||||
* When disassembling entire subroutines, the disassembler must propagate context changes from
|
||||
* instruction to instruction. Some bits of the context register are marked "global." Those bits,
|
||||
* when instruction parsing succeeds, are taken as the "output context" of the resulting
|
||||
* instruction. Propagation follows from a recursive traversal disassembly strategy, i.e., it heeds
|
||||
* the branch targets of the instruction. The generated p-code is used to determine whether the
|
||||
* instruction has branches and/or fall-through. If the output context differs from the default
|
||||
* context, the disassembler saves it as the initial context for the next instruction. If the
|
||||
* instruction has a branch target, the output context is marked at the target address.
|
||||
*
|
||||
* <h2>Assembly</h2>
|
||||
*
|
||||
* <p>
|
||||
* Conceptually, assembly is a straightforward reversal of the disassembly process; however, the
|
||||
* actual implementation is far more complex. To assemble an instruction there are three distinct
|
||||
* phases: 1) Parsing, 2) Prototype generation, 3) Machine code generation. Each phase may take
|
||||
* advantage of pre-computed artifacts.
|
||||
*
|
||||
* <h3>Parsing Assembly Mnemonics</h3>
|
||||
*
|
||||
* <p>
|
||||
* To parse, we pre-compute a LALR(1) parser based on mnemonic grammar. Because different
|
||||
* constructors may specify the same mnemonic production as others in the same table, we have to
|
||||
* associate all such constructors to the production. This step takes some time, and may be better
|
||||
* suited as a build-time step. Because SLEIGH specifications are not generally concerned with
|
||||
* eliminating ambiguity of printed instructions (rather, it only does so for instruction bytes), we
|
||||
* must consider that the grammar could be ambiguous. To handle this, the action/goto table is
|
||||
* permitted multiple entries per cell, and we allow backtracking. There are also cases where tokens
|
||||
* are not actually separated by spaces. For example, in the {@code ia.sinc} file, there is JMP, and
|
||||
* J^cc, meaning, the lexer must consider J as a token as well as JMP, introducing another source of
|
||||
* possible backtracking. Despite that, parsing an instruction is fairly quick, since the sentences
|
||||
* are rather short. The pre-compute part of this process is implemented in {@link #buildGrammar()}
|
||||
* and {@link #buildParser()}. Parsing is then encapsulated in {@link AssemblyParser}.
|
||||
*
|
||||
* <h3>Prototype Generation</h3>
|
||||
*
|
||||
* <p>
|
||||
* To generate prototypes, we examine each resulting parse tree. If there are no parse trees, then a
|
||||
* syntax errors is reported. Otherwise, for each tree, starting at the root production, we consider
|
||||
* all associated constructors, matching each print piece to its corresponding operand on the
|
||||
* machine-code side. For sub-table operands, the production substituted for the associated
|
||||
* non-terminal guides generation, recursively. For other operands, the associated terminal provides
|
||||
* the value or name. To mimic the token advancement of the disassembler, a shift is computed and
|
||||
* stored for each operand. Computing the shift requires computing each operand's length, and so
|
||||
* once the root of each prototype is generated, the instruction length is also known. Patterns and
|
||||
* mutations are applied to mimic the disassembly process: pre-ordered, depth first, left to right,
|
||||
* heeding the computed shift. If a pattern or mutation for a constructor conflicts with what's been
|
||||
* generated so far, the constructor is pruned. If all possible constructors for a sub-table operand
|
||||
* are pruned, then the containing constructor is also pruned.
|
||||
*
|
||||
* <p>
|
||||
* In some cases, an operand appears in the machine-code production, but not the mnemonic
|
||||
* production: so-called "hidden operands." These pose a potential issue for the assembler, because
|
||||
* nothing syntactic can guide prototype generation. For hidden sub-table operands, we must consider
|
||||
* all constructors in the table. Furthermore, all operands of those constructors are considered
|
||||
* "hidden," and so we exhaust recursively. For other hidden operands, the value is left
|
||||
* unspecified. The prototype generation process is encapsulated in
|
||||
* {@link AssemblyConstructStateGenerator}.
|
||||
*
|
||||
* <h3>Machine Code Generation</h3>
|
||||
*
|
||||
* <p>
|
||||
* Machine code generation is a complex process, but it follows a straightforward reversal of the
|
||||
* disassembler's parse phase. For each prototype, we start at the leaves (non-sub-table operands)
|
||||
* and proceed upwards. This is still a depth-first traversal, but unlike disassembly, generation
|
||||
* proceeds in post-order and right to left, as follows. Starting at the root:
|
||||
*
|
||||
* <ol>
|
||||
* <li>Resolve operands from right to left, descending into sub-table operands.</li>
|
||||
* <li>Solve context mutations, in reverse order.
|
||||
* <li>Apply the required patterns
|
||||
* </ol>
|
||||
*
|
||||
* <p>
|
||||
* Note that for a single prototype, a constructor has already been selected for each sub-table
|
||||
* operand. The resolution of sub-table operands follows the same process as for the root
|
||||
* constructor.
|
||||
*
|
||||
* <p>
|
||||
* For other operands, resolution proceeds by solving the operand's defining expression set equal to
|
||||
* the value specified by the terminal. The resulting values are written into their respective token
|
||||
* or context fields, generating an "assembly pattern." An assembly pattern is simply a masked bit
|
||||
* sequence recording what is expected in the instruction buffer and context register. Each bit is
|
||||
* 0, 1, or unspecified. In many cases, the "defining expression" is simply a field, so "solving"
|
||||
* degenerates simply to "writing" the specified value into the field. Solving expressions is only
|
||||
* required when a terminal defines the value of a temporary symbol. If the value is unspecified,
|
||||
* i.e., it is a hidden operand, then no fields are written. Thus, hidden non-sub-table operands
|
||||
* generate empty patterns.
|
||||
*
|
||||
* <p>
|
||||
* As machine code generation proceeds right to left in a constructor, the resulting assembly
|
||||
* patterns are accumulated. If a generated pattern conflicts with that accumulated so far, the
|
||||
* pattern is pruned, likely halting generation of the current prototype. Once all operands have
|
||||
* been successfully resolved, the constructor's context mutations are solved. These tend to get
|
||||
* complicated since some fields may have values defined by the accumulated pattern, and some may
|
||||
* not. The changes are processed in reverse order from specified in the constructor, since fields
|
||||
* may be mutated in a way that forms data dependences among them. To solve, the field on the
|
||||
* left-hand side of the mutation is read, then it is set equal to the right-hand size and passed to
|
||||
* the solver. Because, from the disassembly perspective, the left-hand side is about to be written,
|
||||
* its value is cleared before passed to the solver. If successful, the solver returns patterns that
|
||||
* satisfy the equation. Resolution accumulates the patterns. If solving fails, or the patterns
|
||||
* conflict, it is pruned. Finally, the patterns required to select the constructor are applied,
|
||||
* again pruning conflicts. Note that a constructor may specify multiple patterns, e.g., if a
|
||||
* constraint is <code>op == 5 || op == 6</code>. Thus, overall, it is possible a single prototype
|
||||
* will generate multiple assembly patterns. This process is encapsulated in
|
||||
* {@link AssemblyConstructState}.
|
||||
*
|
||||
* <h3>Handling Context and Prefixes</h3>
|
||||
*
|
||||
* <p>
|
||||
* Once the root constructor has been completely resolved, the resulting instruction patterns
|
||||
* comprise the generated instruction bytes. However, we must consider the context pattern, too. In
|
||||
* practice, the assembler is invoked at a particular address, and the program database may provide
|
||||
* an initial context (as marked during previous disassembly). In other words, when patching an
|
||||
* instruction, we have to keep any persistent context in place. Thus, we can further cull patterns
|
||||
* whose context does not match. This intuition is frustrated by the possibility of constructors
|
||||
* with the mnemonic production <code>^instruction</code>, though. These "pure recursive"
|
||||
* constructors are often (ab)used to handle instruction prefixes, e.g.:
|
||||
*
|
||||
* <code>
|
||||
* :^instruction is prefixed=0 & byte=0xff; instruction [ prefixed=1; ] {}
|
||||
* </code>
|
||||
*
|
||||
* <p>
|
||||
* There are no syntactic elements that would cue the assembly parser to use this constructor.
|
||||
* Instead, we rely on the context register. Were it not for these kinds of constructors, we could
|
||||
* use the saved context as input to the prototype generation phase; however, we cannot. Instead, we
|
||||
* use the empty context and delay this step until after machine code generation. During assembler
|
||||
* construction, we pre-compute a "context transition graph." The mnemonic production
|
||||
* <code>[instruction] => [instruction]</code> has associated with it all pure recursive
|
||||
* constructors. Naturally, that production cannot be included in the parser, as it would generate
|
||||
* increasingly deep parse trees <em>ad infinitum</em>. The graph starts with a seed node: the
|
||||
* language's default context. Then each pure recursive constructor is considered as an edge,
|
||||
* leading to the node resulting from applying that constructor, mimicking disassembly. This
|
||||
* proceeds for each unvisited node until no new nodes are produced. This component is encapsulated
|
||||
* in {@link AssemblyContextGraph}.
|
||||
*
|
||||
* <p>
|
||||
* To generate prefixes, we seek the shortest paths from nodes whose context pattern match the
|
||||
* initial context to nodes whose context pattern matches the generated assembly pattern. Note that
|
||||
* the shortest path may be the zero-length path. If no paths are found, assembly fails. Machine
|
||||
* code generation then proceeds by considering each path, and resolving the constructors in
|
||||
* reverse, in the same manner as constructors from the prototype are resolved. Note that the
|
||||
* patterns may need to be shifted to accommodate prefix tokens. This is accomplished by examining
|
||||
* the shift of the nested instruction operand for each constructor. This process is implemented in
|
||||
* {@link AssemblyTreeResolver#resolveRootRecursion(AssemblyResolutionResults)}.
|
||||
*
|
||||
* <h3>Final Steps</h3>
|
||||
*
|
||||
* <p>
|
||||
* As a final fail safe, the generated instructions are fed back through the disassembler and the
|
||||
* resulting constructor trees are compared. If not equivalent, the instruction is dropped. It is
|
||||
* possible (common in fact) that the generated assembly instruction pattern is not fully defined.
|
||||
* By default, the assembler will substitute 0 for each undefined bit. However, the assembler API
|
||||
* allows the retrieval of the generated pattern, since a user may wish to substitute other values.
|
||||
*
|
||||
* <p>
|
||||
* If, in the end, no instructions are generated, a semantic error is reported. Often, the
|
||||
* description is unwieldy, since it comprises a list of reasons each pattern was pruned. From the
|
||||
* user side, it is usually sufficient to say, "sorry." From the language developer side, it may be
|
||||
* useful to manually reconstruct the prototype and discover the conflicts. To that end, the
|
||||
* implementation includes optional diagnostics, but even then, decoding them takes some familiarity
|
||||
* and expertise.
|
||||
*/
|
||||
public class SleighAssemblerBuilder implements AssemblerBuilder {
|
||||
protected static final DbgTimer dbg = SystemUtilities.isInTestingBatchMode() ? DbgTimer.INACTIVE : DbgTimer.ACTIVE;
|
||||
protected static final DbgTimer dbg =
|
||||
SystemUtilities.isInTestingBatchMode() ? DbgTimer.INACTIVE : DbgTimer.ACTIVE;
|
||||
|
||||
protected SleighLanguage lang;
|
||||
protected AssemblyGrammar grammar;
|
||||
@ -220,6 +429,7 @@ public class SleighAssemblerBuilder implements AssemblerBuilder {
|
||||
/**
|
||||
* Convert the given operand symbol to an {@link AssemblySymbol}
|
||||
*
|
||||
* <p>
|
||||
* For subtables, this results in a non-terminal, for all others, the result in a terminal.
|
||||
*
|
||||
* @param cons the constructor to which the operand belongs
|
||||
@ -242,7 +452,9 @@ public class SleighAssemblerBuilder implements AssemblerBuilder {
|
||||
return built;
|
||||
}
|
||||
if (defsym == null) {
|
||||
built = new AssemblyNumericTerminal(name, getBitSize(cons, opsym));
|
||||
HandleTpl htpl = getHandleTpl(cons, opsym);
|
||||
built = htpl == null ? new AssemblyNumericTerminal(name, 0, null)
|
||||
: new AssemblyNumericTerminal(name, htpl.getSize(), htpl.getAddressSpace());
|
||||
}
|
||||
else if (defsym instanceof SubtableSymbol) {
|
||||
built = new AssemblyNonTerminal(name);
|
||||
@ -268,39 +480,40 @@ public class SleighAssemblerBuilder implements AssemblerBuilder {
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the size in bits of a textual operand.
|
||||
* Obtain the p-code result handle for the given operand
|
||||
*
|
||||
* This is a little odd, since the variables in pattern expressions do not have an explicit
|
||||
* size. However, the value exported by a constructor's pCode may have an explicit size given
|
||||
* (in bytes). Thus, there is a special case, where a constructor prints just one operand and
|
||||
* exports that same operand with an explicit size. In that case, the size of the operand is
|
||||
* printed according to that exported size.
|
||||
* <p>
|
||||
* This handles a special case, where a constructor prints just one operand and exports that
|
||||
* same operand, often with an explicit size, or as an address in a given space. In such cases,
|
||||
* the listing displays that operand according to that exported size.
|
||||
*
|
||||
* For disassembly, this information is used simply to truncate the bits before they are
|
||||
* displayed. For assembly, we must do two things: 1) Ensure that the provided value fits in the
|
||||
* given size, and 2) Mask the goal when solving the pattern expression for the operand.
|
||||
* <p>
|
||||
* For assembly, this gives a few opportunities: 1) We can/must ensure the specified value fits,
|
||||
* by checking the size. 2) We can/must mask the goal when solving the defining pattern
|
||||
* expression for the operand. 3)) We can/must check that a label's address space matches that
|
||||
* represented by the operand, when used for a numeric terminal.
|
||||
*
|
||||
* @param cons the constructor from which the production is being derived
|
||||
* @param opsym the operand symbol corresponding to the grammatical symbol, whose size we wish
|
||||
* to determine.
|
||||
* @return the size of the operand in bits
|
||||
*/
|
||||
protected int getBitSize(Constructor cons, OperandSymbol opsym) {
|
||||
protected HandleTpl getHandleTpl(Constructor cons, OperandSymbol opsym) {
|
||||
ConstructTpl ctpl = cons.getTempl();
|
||||
if (null == ctpl) {
|
||||
// No pcode, no size specification
|
||||
return 0;
|
||||
return null;
|
||||
}
|
||||
HandleTpl htpl = ctpl.getResult();
|
||||
if (null == htpl) {
|
||||
// If nothing is exported, the size is unspecified
|
||||
return 0;
|
||||
return null;
|
||||
}
|
||||
if (opsym.getIndex() != htpl.getOffsetOperandIndex()) {
|
||||
// If the export is not of the same operand, it does not specify its size
|
||||
return 0;
|
||||
return null;
|
||||
}
|
||||
return htpl.getSize();
|
||||
return htpl;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -326,30 +539,10 @@ public class SleighAssemblerBuilder implements AssemblerBuilder {
|
||||
if (sym.takesOperandIndex()) {
|
||||
indices.add(index);
|
||||
}
|
||||
rhs.add(sym);
|
||||
rhs.addSymbol(sym);
|
||||
}
|
||||
else {
|
||||
String tstr = str.trim();
|
||||
if (tstr.equals("")) {
|
||||
rhs.addWS();
|
||||
}
|
||||
else {
|
||||
char first = tstr.charAt(0);
|
||||
if (!str.startsWith(tstr)) {
|
||||
rhs.addWS();
|
||||
}
|
||||
if (!Character.isLetterOrDigit(first)) {
|
||||
rhs.addWS();
|
||||
}
|
||||
rhs.add(new AssemblyStringTerminal(str.trim()));
|
||||
char last = tstr.charAt(tstr.length() - 1);
|
||||
if (!str.endsWith(tstr)) {
|
||||
rhs.addWS();
|
||||
}
|
||||
if (!Character.isLetterOrDigit(last)) {
|
||||
rhs.addWS();
|
||||
}
|
||||
}
|
||||
rhs.addSeparators(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -384,7 +577,7 @@ public class SleighAssemblerBuilder implements AssemblerBuilder {
|
||||
// Ignore. We don't do pcode.
|
||||
}
|
||||
else if (sym instanceof OperandSymbol) {
|
||||
// Ignore. These are terminals, or will be produced by there defining symbol
|
||||
// Ignore. These are terminals, or will be produced by their defining symbols
|
||||
}
|
||||
else if (sym instanceof ValueSymbol) {
|
||||
// Ignore. These are now terminals
|
||||
|
@ -19,12 +19,12 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.BinaryExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
|
||||
/**
|
||||
* A solver that handles expressions of the form A [OP] B
|
||||
* A solver that handles expressions of the form {@code A [OP] B}
|
||||
*
|
||||
* @param <T> the type of expression solved (the operator)
|
||||
*/
|
||||
@ -37,10 +37,10 @@ public abstract class AbstractBinaryExpressionSolver<T extends BinaryExpression>
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(T exp, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException {
|
||||
MaskedLong lval = solver.getValue(exp.getLeft(), vals, res, cur);
|
||||
MaskedLong rval = solver.getValue(exp.getRight(), vals, res, cur);
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException {
|
||||
MaskedLong lval = solver.getValue(exp.getLeft(), vals, cur);
|
||||
MaskedLong rval = solver.getValue(exp.getRight(), vals, cur);
|
||||
|
||||
if (lval != null && !lval.isFullyDefined()) {
|
||||
if (!lval.isFullyUndefined()) {
|
||||
@ -61,23 +61,23 @@ public abstract class AbstractBinaryExpressionSolver<T extends BinaryExpression>
|
||||
return ConstantValueSolver.checkConstAgrees(cval, goal, description);
|
||||
}
|
||||
else if (lval != null) {
|
||||
return solveRightSide(exp.getRight(), lval, goal, vals, res, cur, hints,
|
||||
return solveRightSide(exp.getRight(), lval, goal, vals, cur, hints,
|
||||
description);
|
||||
}
|
||||
else if (rval != null) {
|
||||
return solveLeftSide(exp.getLeft(), rval, goal, vals, res, cur, hints, description);
|
||||
return solveLeftSide(exp.getLeft(), rval, goal, vals, cur, hints, description);
|
||||
}
|
||||
else {
|
||||
// Each solver may provide a strategy for solving expression where both sides are
|
||||
// variable, e.g., two fields being concatenated via OR.
|
||||
return solveTwoSided(exp, goal, vals, res, cur, hints, description);
|
||||
return solveTwoSided(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
}
|
||||
catch (NeedsBackfillException e) {
|
||||
throw e;
|
||||
}
|
||||
catch (SolverException e) {
|
||||
return AssemblyResolution.error(e.getMessage(), description, null);
|
||||
return AssemblyResolution.error(e.getMessage(), description);
|
||||
}
|
||||
catch (AssertionError e) {
|
||||
dbg.println("While solving: " + exp + " (" + description + ")");
|
||||
@ -86,30 +86,30 @@ public abstract class AbstractBinaryExpressionSolver<T extends BinaryExpression>
|
||||
}
|
||||
|
||||
protected AssemblyResolution solveLeftSide(PatternExpression lexp, MaskedLong rval,
|
||||
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
|
||||
MaskedLong goal, Map<String, Long> vals, AssemblyResolvedPatterns cur,
|
||||
Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
return solver.solve(lexp, computeLeft(rval, goal), vals, res, cur, hints, description);
|
||||
return solver.solve(lexp, computeLeft(rval, goal), vals, cur, hints, description);
|
||||
}
|
||||
|
||||
protected AssemblyResolution solveRightSide(PatternExpression rexp, MaskedLong lval,
|
||||
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
|
||||
MaskedLong goal, Map<String, Long> vals, AssemblyResolvedPatterns cur,
|
||||
Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
return solver.solve(rexp, computeRight(lval, goal), vals, res, cur, hints, description);
|
||||
return solver.solve(rexp, computeRight(lval, goal), vals, cur, hints, description);
|
||||
}
|
||||
|
||||
protected AssemblyResolution solveTwoSided(T exp, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException, SolverException {
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
throw new NeedsBackfillException("_two_sided_");
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(T exp, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur) throws NeedsBackfillException {
|
||||
MaskedLong lval = solver.getValue(exp.getLeft(), vals, res, cur);
|
||||
MaskedLong rval = solver.getValue(exp.getRight(), vals, res, cur);
|
||||
public MaskedLong getValue(T exp, Map<String, Long> vals, AssemblyResolvedPatterns cur)
|
||||
throws NeedsBackfillException {
|
||||
MaskedLong lval = solver.getValue(exp.getLeft(), vals, cur);
|
||||
MaskedLong rval = solver.getValue(exp.getRight(), vals, cur);
|
||||
if (lval != null && rval != null) {
|
||||
MaskedLong cval = compute(lval, rval);
|
||||
return cval;
|
||||
@ -130,7 +130,9 @@ public abstract class AbstractBinaryExpressionSolver<T extends BinaryExpression>
|
||||
/**
|
||||
* Compute the right-hand-side value given that the result and the left are known
|
||||
*
|
||||
* NOTE: Assumes commutativity by default
|
||||
* <p>
|
||||
* <b>NOTE:</b> Assumes commutativity by default
|
||||
*
|
||||
* @param lval the left-hand-side value
|
||||
* @param goal the result
|
||||
* @return the right-hand-side value solution
|
||||
@ -150,16 +152,17 @@ public abstract class AbstractBinaryExpressionSolver<T extends BinaryExpression>
|
||||
public abstract MaskedLong compute(MaskedLong lval, MaskedLong rval);
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(T exp, Map<Integer, Object> res) {
|
||||
int ll = solver.getInstructionLength(exp.getLeft(), res);
|
||||
int lr = solver.getInstructionLength(exp.getRight(), res);
|
||||
public int getInstructionLength(T exp) {
|
||||
int ll = solver.getInstructionLength(exp.getLeft());
|
||||
int lr = solver.getInstructionLength(exp.getRight());
|
||||
return Math.max(ll, lr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc) {
|
||||
MaskedLong lval = solver.valueForResolution(exp.getLeft(), rc);
|
||||
MaskedLong rval = solver.valueForResolution(exp.getRight(), rc);
|
||||
public MaskedLong valueForResolution(T exp, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
MaskedLong lval = solver.valueForResolution(exp.getLeft(), vals, rc);
|
||||
MaskedLong rval = solver.valueForResolution(exp.getRight(), vals, rc);
|
||||
return compute(lval, rval);
|
||||
}
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
|
||||
@ -49,14 +49,13 @@ public abstract class AbstractExpressionSolver<T extends PatternExpression> {
|
||||
* @param exp the expression to solve
|
||||
* @param goal the desired value of the expression
|
||||
* @param vals values of defined symbols
|
||||
* @param res the results of subconstructor resolutions (used for lengths)
|
||||
* @param hints describes techniques applied by calling solvers
|
||||
* @param description the description to give to resolved solutions
|
||||
* @return the resolution
|
||||
* @throws NeedsBackfillException if the expression refers to an undefined symbol
|
||||
*/
|
||||
public abstract AssemblyResolution solve(T exp, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException;
|
||||
|
||||
/**
|
||||
@ -64,33 +63,34 @@ public abstract class AbstractExpressionSolver<T extends PatternExpression> {
|
||||
*
|
||||
* @param exp the expression
|
||||
* @param vals values of defined symbols
|
||||
* @param res the results of subconstructor resolutions (used for lengths)
|
||||
* @return the constant value, or null if it depends on a variable
|
||||
* @throws NeedsBackfillException if the expression refers to an undefined symbol
|
||||
*/
|
||||
public abstract MaskedLong getValue(T exp, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur) throws NeedsBackfillException;
|
||||
public abstract MaskedLong getValue(T exp, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns cur) throws NeedsBackfillException;
|
||||
|
||||
/**
|
||||
* Determines the length of the subconstructor that would be returned had the expression not
|
||||
* depended on an undefined symbol.
|
||||
*
|
||||
* <p>
|
||||
* This is used by the backfilling process to ensure values are written to the correct offset
|
||||
*
|
||||
* @param exp the expression
|
||||
* @param res the results of subconstructor resolutions (used for lengths)
|
||||
* @return the length of filled in token field(s).
|
||||
*/
|
||||
public abstract int getInstructionLength(T exp, Map<Integer, Object> res);
|
||||
public abstract int getInstructionLength(T exp);
|
||||
|
||||
/**
|
||||
* Compute the value of the expression given the (possibly-intermediate) resolution
|
||||
*
|
||||
* @param exp the expression to evaluate
|
||||
* @param vals values of defined symbols
|
||||
* @param rc the resolution on which to evaluate it
|
||||
* @return the result
|
||||
*/
|
||||
public abstract MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc);
|
||||
public abstract MaskedLong valueForResolution(T exp, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc);
|
||||
|
||||
/**
|
||||
* Register this particular solver with the general expression solver
|
||||
|
@ -19,11 +19,11 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.UnaryExpression;
|
||||
|
||||
/**
|
||||
* A solver that handles expressions of the form [OP]A
|
||||
* A solver that handles expressions of the form {@code [OP]A}
|
||||
*
|
||||
* @param <T> the type of expression solved (the operator)
|
||||
*/
|
||||
@ -36,9 +36,9 @@ public abstract class AbstractUnaryExpressionSolver<T extends UnaryExpression>
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(T exp, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException {
|
||||
MaskedLong uval = solver.getValue(exp.getUnary(), vals, res, cur);
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException {
|
||||
MaskedLong uval = solver.getValue(exp.getUnary(), vals, cur);
|
||||
try {
|
||||
if (uval != null && uval.isFullyDefined()) {
|
||||
MaskedLong cval = compute(uval);
|
||||
@ -46,7 +46,7 @@ public abstract class AbstractUnaryExpressionSolver<T extends UnaryExpression>
|
||||
return ConstantValueSolver.checkConstAgrees(cval, goal, description);
|
||||
}
|
||||
}
|
||||
return solver.solve(exp.getUnary(), computeInverse(goal), vals, res, cur, hints,
|
||||
return solver.solve(exp.getUnary(), computeInverse(goal), vals, cur, hints,
|
||||
description);
|
||||
}
|
||||
/*
|
||||
@ -60,9 +60,9 @@ public abstract class AbstractUnaryExpressionSolver<T extends UnaryExpression>
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(T exp, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur) throws NeedsBackfillException {
|
||||
MaskedLong val = solver.getValue(exp.getUnary(), vals, res, cur);
|
||||
public MaskedLong getValue(T exp, Map<String, Long> vals, AssemblyResolvedPatterns cur)
|
||||
throws NeedsBackfillException {
|
||||
MaskedLong val = solver.getValue(exp.getUnary(), vals, cur);
|
||||
if (val != null) {
|
||||
return compute(val);
|
||||
}
|
||||
@ -72,7 +72,9 @@ public abstract class AbstractUnaryExpressionSolver<T extends UnaryExpression>
|
||||
/**
|
||||
* Compute the input value given that the result is known
|
||||
*
|
||||
* NOTE: Assumes an involution by default
|
||||
* <p>
|
||||
* <b>NOTE:</b> Assumes an involution by default
|
||||
*
|
||||
* @param goal the result
|
||||
* @return the input value solution
|
||||
*/
|
||||
@ -89,13 +91,14 @@ public abstract class AbstractUnaryExpressionSolver<T extends UnaryExpression>
|
||||
public abstract MaskedLong compute(MaskedLong val);
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(T exp, Map<Integer, Object> res) {
|
||||
return solver.getInstructionLength(exp.getUnary(), res);
|
||||
public int getInstructionLength(T exp) {
|
||||
return solver.getInstructionLength(exp.getUnary());
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc) {
|
||||
MaskedLong val = solver.valueForResolution(exp.getUnary(), rc);
|
||||
public MaskedLong valueForResolution(T exp, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
MaskedLong val = solver.valueForResolution(exp.getUnary(), vals, rc);
|
||||
return compute(val);
|
||||
}
|
||||
}
|
||||
|
@ -19,12 +19,13 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.ConstantValue;
|
||||
|
||||
/**
|
||||
* "Solves" constant expressions
|
||||
*
|
||||
* <p>
|
||||
* Essentially, this either evaluates successfully when asked for a constant value, or checks that
|
||||
* the goal is equal to the constant. Otherwise, there is no solution.
|
||||
*/
|
||||
@ -36,25 +37,26 @@ public class ConstantValueSolver extends AbstractExpressionSolver<ConstantValue>
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(ConstantValue cv, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) {
|
||||
MaskedLong value = getValue(cv, vals, res, cur);
|
||||
MaskedLong value = getValue(cv, vals, cur);
|
||||
return checkConstAgrees(value, goal, description);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(ConstantValue cv, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur) {
|
||||
public MaskedLong getValue(ConstantValue cv, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns cur) {
|
||||
return MaskedLong.fromLong(cv.getValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(ConstantValue cv, Map<Integer, Object> res) {
|
||||
public int getInstructionLength(ConstantValue cv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(ConstantValue cv, AssemblyResolvedConstructor rc) {
|
||||
public MaskedLong valueForResolution(ConstantValue cv, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
return MaskedLong.fromLong(cv.getValue());
|
||||
}
|
||||
|
||||
@ -62,9 +64,8 @@ public class ConstantValueSolver extends AbstractExpressionSolver<ConstantValue>
|
||||
String description) {
|
||||
if (!value.agrees(goal)) {
|
||||
return AssemblyResolution.error(
|
||||
"Constant value " + value + " does not agree with child requirements", description,
|
||||
null);
|
||||
"Constant value " + value + " does not agree with child requirements", description);
|
||||
}
|
||||
return AssemblyResolution.nop(description, null);
|
||||
return AssemblyResolution.nop(description, null, null);
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ import ghidra.app.plugin.processors.sleigh.expression.ContextField;
|
||||
/**
|
||||
* Solves expressions of a context register field
|
||||
*
|
||||
* <p>
|
||||
* Essentially, this just encodes the goal into the field, if it can be represented in the given
|
||||
* space and format. Otherwise, there is no solution.
|
||||
*/
|
||||
@ -35,33 +36,33 @@ public class ContextFieldSolver extends AbstractExpressionSolver<ContextField> {
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(ContextField cf, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) {
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description) {
|
||||
assert cf.minValue() == 0; // In case someone decides to do signedness there.
|
||||
if (!goal.isInRange(cf.maxValue(), cf.hasSignbit())) {
|
||||
return AssemblyResolution.error("Value " + goal + " is not valid for " + cf,
|
||||
description, null);
|
||||
description);
|
||||
}
|
||||
AssemblyPatternBlock block = AssemblyPatternBlock.fromContextField(cf, goal);
|
||||
return AssemblyResolution.contextOnly(block, description, null);
|
||||
return AssemblyResolution.contextOnly(block, description);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(ContextField cf, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur) {
|
||||
public MaskedLong getValue(ContextField cf, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns cur) {
|
||||
if (cur == null) {
|
||||
return null;
|
||||
}
|
||||
return valueForResolution(cf, cur);
|
||||
return valueForResolution(cf, vals, cur);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(ContextField cf, Map<Integer, Object> res) {
|
||||
public int getInstructionLength(ContextField cf) {
|
||||
return 0; // this is a context field, not an instruction (token) field
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(ContextField cf, AssemblyResolvedConstructor rc) {
|
||||
public MaskedLong valueForResolution(ContextField cf, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
int size = cf.getByteEnd() - cf.getByteStart() + 1;
|
||||
MaskedLong res = rc.readContext(cf.getByteStart(), size);
|
||||
res = res.shiftRight(cf.getShift());
|
||||
|
@ -24,8 +24,8 @@ public enum DefaultSolverHint implements SolverHint {
|
||||
*/
|
||||
GUESSING_REPETITION,
|
||||
/**
|
||||
* A boolean or solver which matches a circular shift is solving the value having guessed a
|
||||
* shift
|
||||
* A boolean {@code or} solver which matches a circular shift is solving the value having
|
||||
* guessed a shift
|
||||
*/
|
||||
GUESSING_CIRCULAR_SHIFT_AMOUNT,
|
||||
/**
|
||||
|
@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.DivExpression;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form A / B
|
||||
* Solves expressions of the form {@code A / B}
|
||||
*/
|
||||
public class DivExpressionSolver extends AbstractBinaryExpressionSolver<DivExpression> {
|
||||
|
||||
@ -37,7 +37,8 @@ public class DivExpressionSolver extends AbstractBinaryExpressionSolver<DivExpre
|
||||
return MaskedLong.fromLong(1);
|
||||
}
|
||||
throw new SolverException(
|
||||
"Encountered a division of the form A / x = B, where A != B. x has many solutions not easily expressed with masking.");
|
||||
"Encountered a division of the form A / x = B, where A != B. x has many solutions " +
|
||||
"not easily expressed with masking.");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -24,10 +24,13 @@ import ghidra.app.plugin.processors.sleigh.expression.EndInstructionValue;
|
||||
/**
|
||||
* "Solves" expressions of {@code inst_next}
|
||||
*
|
||||
* <p>
|
||||
* Works like the constant solver, but takes the value of {@code inst_next}, which is given by the
|
||||
* assembly address and the resulting instruction length.
|
||||
*
|
||||
* NOTE: This solver requires backfill.
|
||||
* <p>
|
||||
* <b>NOTE:</b> This solver requires backfill, since the value of {@code inst_next} is not known
|
||||
* until possible prefixes have been considered.
|
||||
*/
|
||||
public class EndInstructionValueSolver extends AbstractExpressionSolver<EndInstructionValue> {
|
||||
|
||||
@ -37,32 +40,38 @@ public class EndInstructionValueSolver extends AbstractExpressionSolver<EndInstr
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(EndInstructionValue iv, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) {
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description) {
|
||||
throw new AssertionError(
|
||||
"INTERNAL: Should never be asked to solve for " + AssemblyTreeResolver.INST_NEXT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(EndInstructionValue iv, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur)
|
||||
throws NeedsBackfillException {
|
||||
AssemblyResolvedPatterns cur) throws NeedsBackfillException {
|
||||
Long instNext = vals.get(AssemblyTreeResolver.INST_NEXT);
|
||||
if (instNext == null) {
|
||||
throw new NeedsBackfillException(AssemblyTreeResolver.INST_NEXT);
|
||||
}
|
||||
return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_NEXT));
|
||||
return MaskedLong.fromLong(instNext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(EndInstructionValue iv, Map<Integer, Object> res) {
|
||||
public int getInstructionLength(EndInstructionValue iv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(EndInstructionValue exp, AssemblyResolvedConstructor rc) {
|
||||
// Would need to pass in symbol values, and perhaps consider child resolutions.
|
||||
throw new UnsupportedOperationException(
|
||||
"The solver should never ask for this value given a resolved constructor.");
|
||||
public MaskedLong valueForResolution(EndInstructionValue exp, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
Long instNext = vals.get(AssemblyTreeResolver.INST_NEXT);
|
||||
if (instNext == null) {
|
||||
/**
|
||||
* This method is used in forward state construction, so just leave unknown. This may
|
||||
* cause unresolvable trees to get generated, but we can't know that until we try to
|
||||
* resolve them.
|
||||
*/
|
||||
return MaskedLong.UNKS;
|
||||
}
|
||||
return MaskedLong.fromLong(instNext);
|
||||
}
|
||||
}
|
||||
|
@ -19,12 +19,12 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.LeftShiftExpression;
|
||||
import ghidra.util.Msg;
|
||||
|
||||
/**
|
||||
* {@literal Solves expressions of the form A << B}
|
||||
* Solves expressions of the form {@code A << B}
|
||||
*/
|
||||
public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver<LeftShiftExpression> {
|
||||
|
||||
@ -61,13 +61,12 @@ public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver<Le
|
||||
|
||||
@Override
|
||||
protected AssemblyResolution solveTwoSided(LeftShiftExpression exp, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException, SolverException {
|
||||
// Do not guess the same parameter recursively
|
||||
if (hints.contains(DefaultSolverHint.GUESSING_LEFT_SHIFT_AMOUNT)) {
|
||||
// NOTE: Nested left shifts ought to be written as a left shift by a sum
|
||||
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
|
||||
return super.solveTwoSided(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
// Count the number of zeros to the right, and consider this the maximum shift value
|
||||
// Any higher shift amount would produce too many zeros to the right
|
||||
@ -76,24 +75,41 @@ public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver<Le
|
||||
// use of the leading zero count, at least AFAIK. Maybe to better restrict the max???
|
||||
Set<SolverHint> hintsWithLShift =
|
||||
SolverHint.with(hints, DefaultSolverHint.GUESSING_LEFT_SHIFT_AMOUNT);
|
||||
if (maxShift == 64) {
|
||||
// If the goal is 0s, then any shift will do, so long as the shifted value is 0
|
||||
try {
|
||||
// NB. goal is already 0s, so just use it as subgoal for lhs of shift
|
||||
AssemblyResolution lres =
|
||||
solver.solve(exp.getLeft(), goal, vals, cur, hintsWithLShift, description);
|
||||
if (lres.isError()) {
|
||||
throw new SolverException("Solving left:=0 failed");
|
||||
}
|
||||
// If this works, then the rhs can have any value, so nothing to solve for
|
||||
return lres;
|
||||
}
|
||||
catch (SolverException | UnsupportedOperationException e) {
|
||||
Msg.trace(this, "Trying left:=0 in shift resulted in " + e);
|
||||
// Fall through to the guessing method
|
||||
}
|
||||
}
|
||||
for (int shift = maxShift; shift >= 0; shift--) {
|
||||
try {
|
||||
MaskedLong reqr = MaskedLong.fromLong(shift);
|
||||
MaskedLong reql = computeLeft(reqr, goal);
|
||||
|
||||
AssemblyResolution lres =
|
||||
solver.solve(exp.getLeft(), reql, vals, res, cur, hintsWithLShift, description);
|
||||
solver.solve(exp.getLeft(), reql, vals, cur, hintsWithLShift, description);
|
||||
if (lres.isError()) {
|
||||
throw new SolverException("Solving left failed");
|
||||
}
|
||||
AssemblyResolution rres =
|
||||
solver.solve(exp.getRight(), reqr, vals, res, cur, hints, description);
|
||||
solver.solve(exp.getRight(), reqr, vals, cur, hints, description);
|
||||
if (rres.isError()) {
|
||||
throw new SolverException("Solving right failed");
|
||||
}
|
||||
AssemblyResolvedConstructor lsol = (AssemblyResolvedConstructor) lres;
|
||||
AssemblyResolvedConstructor rsol = (AssemblyResolvedConstructor) rres;
|
||||
AssemblyResolvedConstructor sol = lsol.combine(rsol);
|
||||
AssemblyResolvedPatterns lsol = (AssemblyResolvedPatterns) lres;
|
||||
AssemblyResolvedPatterns rsol = (AssemblyResolvedPatterns) rres;
|
||||
AssemblyResolvedPatterns sol = lsol.combine(rsol);
|
||||
if (sol == null) {
|
||||
throw new SolverException(
|
||||
"Left and right solutions conflict for shift=" + shift);
|
||||
@ -105,6 +121,6 @@ public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver<Le
|
||||
// try the next
|
||||
}
|
||||
}
|
||||
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
|
||||
return super.solveTwoSided(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +45,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Create a masked value from a mask and a long
|
||||
*
|
||||
* <p>
|
||||
* Any positions in {@code msk} set to 0 create an {@code x} in the corresponding position of
|
||||
* the result. Otherwise, the position takes the corresponding bit from {@code val}.
|
||||
*
|
||||
@ -92,6 +93,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Get the mask as a long
|
||||
*
|
||||
* <p>
|
||||
* Positions with a defined bit are {@code 1}; positions with an undefined bit are {@code 0}.
|
||||
*
|
||||
* @return the mask as a long
|
||||
@ -126,6 +128,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Apply an additional mask to this masked long
|
||||
*
|
||||
* <p>
|
||||
* Any {@code 0} bit in {@code msk} will result in an undefined bit in the result. {@code 1}
|
||||
* bits result in a copy of the corresponding bit in the result.
|
||||
*
|
||||
@ -139,6 +142,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Sign extend the masked value, according to its mask, to a full long
|
||||
*
|
||||
* <p>
|
||||
* The leftmost defined bit is taken as the sign bit, and extended to the left.
|
||||
*
|
||||
* @return the sign-extended masked long
|
||||
@ -151,6 +155,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Zero extend the masked value, according to its mask, to a full long
|
||||
*
|
||||
* <p>
|
||||
* All bits to the left of the leftmost defined bit are set to 0.
|
||||
*
|
||||
* @return the zero-extended masked long
|
||||
@ -199,6 +204,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Combine this and another masked long into one, by taking defined bits from either
|
||||
*
|
||||
* <p>
|
||||
* If this masked long agrees with the other, then the two are combined. For each bit position
|
||||
* in the result, the defined bit from either corresponding position is taken. If neither is
|
||||
* defined, then the position is undefined in the result. If both are defined, they must agree.
|
||||
@ -217,6 +223,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift {@code size} bits @{code n) positions circularly in a given direction
|
||||
*
|
||||
* <p>
|
||||
* The shifted bits are the least significant {@code size} bits. The remaining bits are
|
||||
* unaffected.
|
||||
*
|
||||
@ -247,6 +254,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift {@code size} bits @{code n) positions circularly in a given direction
|
||||
*
|
||||
* <p>
|
||||
* The shifted bits are the least significant {@code size} bits. The remaining bits are
|
||||
* unaffected.
|
||||
*
|
||||
@ -265,6 +273,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift the bits @{code n} positions left
|
||||
*
|
||||
* <p>
|
||||
* This implements both a signed and unsigned shift.
|
||||
*
|
||||
* @param n the number of positions.
|
||||
@ -282,6 +291,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift the bits {@code n} positions left
|
||||
*
|
||||
* <p>
|
||||
* This implements both a signed and unsigned shift.
|
||||
*
|
||||
* @param n the number of positions.
|
||||
@ -297,6 +307,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Invert a left shift of {@code n} positions, that is shift right
|
||||
*
|
||||
* <p>
|
||||
* This is different from a normal shift right, in that it inserts unknowns at the left. The
|
||||
* normal right shift inserts zeros or sign bits. Additionally, if any ones would fall off the
|
||||
* right, the inversion is undefined.
|
||||
@ -319,6 +330,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Invert a left shift of {@code n} positions, that is shift right
|
||||
*
|
||||
* <p>
|
||||
* This is different from a normal shift right, in that it inserts unknowns at the left. The
|
||||
* normal right shift inserts zeros or sign bits. Additionally, if any ones would fall off the
|
||||
* right, the inversion is undefined.
|
||||
@ -337,6 +349,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift the bits arithmetically {@code n} positions right
|
||||
*
|
||||
* <p>
|
||||
* This implements a signed shift.
|
||||
*
|
||||
* @param n the number of positions.
|
||||
@ -352,6 +365,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift the bits arithmetically {@code n} positions right
|
||||
*
|
||||
* <p>
|
||||
* This implements a signed shift.
|
||||
*
|
||||
* @param n the number of positions.
|
||||
@ -367,6 +381,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Invert an arithmetic right shift of {@code n} positions, that is shift left
|
||||
*
|
||||
* <p>
|
||||
* This is different from a normal shift left, in that it inserts unknowns at the right. The
|
||||
* normal left shift inserts zeros. Additionally, all bits that fall off the left must match the
|
||||
* resulting sign bit, or else the inversion is undefined.
|
||||
@ -400,6 +415,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Invert an arithmetic right shift of {@code n} positions, that is shift left
|
||||
*
|
||||
* <p>
|
||||
* This is different from a normal shift left, in that it inserts unknowns at the right. The
|
||||
* normal left shift inserts zeros. Additionally, all bits that fall off the left must match the
|
||||
* resulting sign bit, or else the inversion is undefined.
|
||||
@ -418,6 +434,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift the bits logically {@code n} positions right
|
||||
*
|
||||
* <p>
|
||||
* This implements an unsigned shift.
|
||||
*
|
||||
* @param n the number of positions.
|
||||
@ -435,6 +452,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift the bits logically {@code n} positions right
|
||||
*
|
||||
* <p>
|
||||
* This implements an unsigned shift.
|
||||
*
|
||||
* @param n the number of positions.
|
||||
@ -451,6 +469,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Shift the bits positionally {@code n} positions right
|
||||
*
|
||||
* <p>
|
||||
* This fills the left with unknown bits
|
||||
*
|
||||
* @param n
|
||||
@ -463,6 +482,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Invert a logical right shift of {@code n} positions, that is shift left
|
||||
*
|
||||
* <p>
|
||||
* This is different from a normal shift left, in that it inserts unknowns at the right. The
|
||||
* normal left shift inserts zeros. Additionally, if any ones would fall off the left, the
|
||||
* inversion is undefined.
|
||||
@ -486,6 +506,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Invert a logical right shift of {@code n} positions, that is shift left
|
||||
*
|
||||
* <p>
|
||||
* This is different from a normal shift left, in that it inserts unknowns at the right. The
|
||||
* normal left shift inserts zeros. Additionally, if any ones would fall off the left, the
|
||||
* inversion is undefined.
|
||||
@ -504,6 +525,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Reverse the least significant {@code n} bytes
|
||||
*
|
||||
* <p>
|
||||
* This interprets the bits as an {@code n}-byte value and changes the endianness. Any bits
|
||||
* outside of the interpretation are truncated, i.e., become unknown.
|
||||
*
|
||||
@ -517,16 +539,17 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Compute the bitwise AND of this and another masked long
|
||||
*
|
||||
* <p>
|
||||
* To handle unknown bits, the result is derived from the following truth table:
|
||||
*
|
||||
* <pre>{@literal
|
||||
* <pre>
|
||||
* 0 x 1 <= A (this)
|
||||
* 0 0 0 0
|
||||
* x 0 x x
|
||||
* 1 0 x 1
|
||||
* ^
|
||||
* B (that)
|
||||
* }</pre>
|
||||
* </pre>
|
||||
*
|
||||
* @param that the other masked long ({@code B}).
|
||||
* @return the result.
|
||||
@ -547,18 +570,19 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
|
||||
/**
|
||||
* Solves the expression {@code A & B = C, for B, given C and A}
|
||||
*
|
||||
* <p>
|
||||
* To handle unknown bits, the solution is derived from the following truth table, where
|
||||
* {@code *} indicates no solution:
|
||||
*
|
||||
* <pre>{@literal
|
||||
* <pre>
|
||||
* 0 x 1 <= A (that)
|
||||
* 0 x x 0
|
||||
* x x x x
|
||||
* 1 * 1 1
|
||||
* ^
|
||||
* B (this)
|
||||
* }</pre>
|
||||
* </pre>
|
||||
*
|
||||
* @param that the other masked long ({@code B}).
|
||||
* @return the result.
|
||||
@ -587,16 +611,17 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Compute the bitwise OR of this and another masked long
|
||||
*
|
||||
* <p>
|
||||
* To handle unknown bits, the result is derived from the following truth table:
|
||||
*
|
||||
* <pre>{@literal
|
||||
* <pre>
|
||||
* 0 x 1 <= A (this)
|
||||
* 0 0 x 1
|
||||
* x x x 1
|
||||
* 1 1 1 1
|
||||
* ^
|
||||
* B (that)
|
||||
* }</pre>
|
||||
* </pre>
|
||||
*
|
||||
* @param that the other masked long ({@code B}).
|
||||
* @return the result.
|
||||
@ -620,17 +645,18 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Solves the expression A | B = C, for B, given C and A
|
||||
*
|
||||
* <p>
|
||||
* To handle unknown bits, the solution is derived from the following truth table, where
|
||||
* {@code *} indicates no solution:
|
||||
*
|
||||
* <pre>{@literal
|
||||
* <pre>
|
||||
* 0 x 1 <= A (that)
|
||||
* 0 0 0 *
|
||||
* x x x x
|
||||
* 1 1 x x
|
||||
* ^
|
||||
* B (this)
|
||||
* }</pre>
|
||||
* </pre>
|
||||
*
|
||||
* @param that the other masked long ({@code B}).
|
||||
* @return the result.
|
||||
@ -658,16 +684,17 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Compute the bitwise XOR of this and another masked long
|
||||
*
|
||||
* <p>
|
||||
* To handle unknown bits, the result is derived from the following truth table:
|
||||
*
|
||||
* <pre>{@literal
|
||||
* <pre>
|
||||
* 0 x 1 <= A (this)
|
||||
* 0 0 x 1
|
||||
* x x x x
|
||||
* 1 1 x 0
|
||||
* ^
|
||||
* B (that)
|
||||
* }</pre>
|
||||
* </pre>
|
||||
*
|
||||
* @param that the other masked long ({@code B}).
|
||||
* @return the result.
|
||||
@ -696,12 +723,13 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Compute the bitwise NOT
|
||||
*
|
||||
* <p>
|
||||
* To handle unknown bits, the result is derived from the following truth table:
|
||||
*
|
||||
* <pre>{@literal
|
||||
* <pre>
|
||||
* 0 x 1 <= A (this)
|
||||
* 1 x 0
|
||||
* }</pre>
|
||||
* </pre>
|
||||
*
|
||||
* @return the result.
|
||||
*/
|
||||
@ -769,7 +797,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
if (lmv == 2 || rmv == 2) {
|
||||
return 2;
|
||||
}
|
||||
else if (lmv == 3 || rmv == 3) {
|
||||
else if (lmv == 3 && rmv == 3) {
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
@ -893,6 +921,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Compute the arithmetic quotient as a solution to unsigned multiplication
|
||||
*
|
||||
* <p>
|
||||
* This is slightly different than {@link #divideUnsigned(MaskedLong)} in its treatment of
|
||||
* unknowns.
|
||||
*
|
||||
@ -924,6 +953,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Checks if this and another masked long agree
|
||||
*
|
||||
* <p>
|
||||
* Two masked longs agree iff their corresponding defined bit positions are equal. Where either
|
||||
* or both positions are undefined, no check is applied. In the case that both masked longs are
|
||||
* fully-defined, this is the same as an equality check on the values.
|
||||
@ -942,6 +972,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Checks if this and a long agree
|
||||
*
|
||||
* <p>
|
||||
* The masked long agrees with the given long iff the masked long's defined bit positions agree
|
||||
* with the corresponding bit positions in the given long. Where there are undefined bits, no
|
||||
* check is applied. In the case that the masked long is fully-defined, this is the same as an
|
||||
@ -978,10 +1009,12 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Check if the masked value falls within a given range
|
||||
*
|
||||
* <p>
|
||||
* The range is defined by a maximum and a signedness. The maximum must be one less than a
|
||||
* positive power of 2. In other words, it defines a maximum number of bits, including the sign
|
||||
* bit if applicable.
|
||||
*
|
||||
* <p>
|
||||
* The defined bits of this masked long are then checked to fall in the given range. The
|
||||
* effective value is derived by sign/zero extending the value according to its mask. In
|
||||
* general, if any {@code 1} bits exist outside of the given max, the value is rejected, unless
|
||||
@ -1013,6 +1046,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* "Compare" two masked longs
|
||||
*
|
||||
* <p>
|
||||
* This is not meant to reflect a numerical comparison. Rather, this is just to impose an
|
||||
* ordering for the sake of storing these in sorted collections.
|
||||
*/
|
||||
@ -1038,6 +1072,7 @@ public class MaskedLong implements Comparable<MaskedLong> {
|
||||
/**
|
||||
* Check for equality
|
||||
*
|
||||
* <p>
|
||||
* This will only return true if the other object is a masked long, even if this one is
|
||||
* fully-defined, and the value is equal to a given long (or {@link Long}). The other masked
|
||||
* long must have the same mask and value to be considered equal. For other sorts of "equality"
|
||||
|
@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.MinusExpression;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form -A
|
||||
* Solves expressions of the form {@code -A}
|
||||
*/
|
||||
public class MinusExpressionSolver extends AbstractUnaryExpressionSolver<MinusExpression> {
|
||||
|
||||
|
@ -19,12 +19,12 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.MultExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form A * B
|
||||
* Solves expressions of the form {@code A * B}
|
||||
*/
|
||||
public class MultExpressionSolver extends AbstractBinaryExpressionSolver<MultExpression> {
|
||||
|
||||
@ -103,25 +103,24 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver<MultExp
|
||||
}
|
||||
|
||||
protected AssemblyResolution tryRep(PatternExpression lexp, MaskedLong rval, MaskedLong repGoal,
|
||||
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException {
|
||||
MaskedLong goal, Map<String, Long> vals, AssemblyResolvedPatterns cur,
|
||||
Set<SolverHint> hints, String description) throws NeedsBackfillException {
|
||||
MaskedLong lval = repGoal.divideUnsigned(rval);
|
||||
if (lval.multiply(rval).agrees(goal)) {
|
||||
return solver.solve(lexp, lval, vals, res, cur, hints, description);
|
||||
return solver.solve(lexp, lval, vals, cur, hints, description);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AssemblyResolution solveLeftSide(PatternExpression lexp, MaskedLong rval,
|
||||
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
|
||||
MaskedLong goal, Map<String, Long> vals, AssemblyResolvedPatterns cur,
|
||||
Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
// Try the usual case first
|
||||
ResultTracker tracker = new ResultTracker();
|
||||
AssemblyResolution sol = tracker.trySolverFunc(() -> {
|
||||
return super.solveLeftSide(lexp, rval, goal, vals, res, cur, hints, description);
|
||||
return super.solveLeftSide(lexp, rval, goal, vals, cur, hints, description);
|
||||
});
|
||||
if (sol != null) {
|
||||
return sol;
|
||||
@ -151,8 +150,8 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver<MultExp
|
||||
if (reps > 0) {
|
||||
MaskedLong repRightGoal = MaskedLong.fromMaskAndValue(repMsk, repVal);
|
||||
sol = tracker.trySolverFunc(() -> {
|
||||
return tryRep(lexp, rval, repRightGoal, goal, vals, res, cur,
|
||||
hintsWithRepetition, description);
|
||||
return tryRep(lexp, rval, repRightGoal, goal, vals, cur, hintsWithRepetition,
|
||||
description);
|
||||
});
|
||||
if (sol != null) {
|
||||
return sol;
|
||||
@ -169,8 +168,8 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver<MultExp
|
||||
repMsk = -1L >>> i;
|
||||
MaskedLong repLeftGoal = MaskedLong.fromMaskAndValue(repMsk, repVal);
|
||||
sol = tracker.trySolverFunc(() -> {
|
||||
return tryRep(lexp, rval, repLeftGoal, goal, vals, res, cur,
|
||||
hintsWithRepetition, description);
|
||||
return tryRep(lexp, rval, repLeftGoal, goal, vals, cur, hintsWithRepetition,
|
||||
description);
|
||||
});
|
||||
if (sol != null) {
|
||||
return sol;
|
||||
@ -182,10 +181,10 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver<MultExp
|
||||
|
||||
@Override
|
||||
protected AssemblyResolution solveRightSide(PatternExpression rexp, MaskedLong lval,
|
||||
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
|
||||
MaskedLong goal, Map<String, Long> vals, AssemblyResolvedPatterns cur,
|
||||
Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
return solveLeftSide(rexp, lval, goal, vals, res, cur, hints, description);
|
||||
return solveLeftSide(rexp, lval, goal, vals, cur, hints, description);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -18,13 +18,15 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
/**
|
||||
* An exception to indicate that the solution of an expression is not yet known
|
||||
*
|
||||
* <p>
|
||||
* Furthermore, it cannot be determined whether or not the expression is even solvable. When this
|
||||
* exception is thrown, a backfill record is placed on the encoded resolution indicating that
|
||||
* exception is thrown, a backfill record is placed on the encoded resolution indicating that the
|
||||
* resolver must attempt to solve the expression again, once the encoding is otherwise complete.
|
||||
* This is needed, most notably, when an encoding depends on the address of the <em>next</em>
|
||||
* instruction, because the length of the current instruction is not known until resolution has
|
||||
* finished.
|
||||
*
|
||||
* <p>
|
||||
* Backfill becomes a possibility when an expression depends on a symbol that is not (yet) defined.
|
||||
* Thus, as a matter of good record keeping, the exception takes the name of the missing symbol.
|
||||
*/
|
||||
@ -33,6 +35,7 @@ public class NeedsBackfillException extends SolverException {
|
||||
|
||||
/**
|
||||
* Construct a backfill exception, resulting from the given missing symbol name
|
||||
*
|
||||
* @param symbol the missing symbol name
|
||||
*/
|
||||
public NeedsBackfillException(String symbol) {
|
||||
@ -42,6 +45,7 @@ public class NeedsBackfillException extends SolverException {
|
||||
|
||||
/**
|
||||
* Retrieve the missing symbol name from the original solution attempt
|
||||
*
|
||||
* @return the missing symbol name
|
||||
*/
|
||||
public String getSymbol() {
|
||||
|
@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.NotExpression;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form ~A
|
||||
* Solves expressions of the form {@code ~A}
|
||||
*/
|
||||
public class NotExpressionSolver extends AbstractUnaryExpressionSolver<NotExpression> {
|
||||
|
||||
|
@ -28,6 +28,7 @@ import ghidra.app.plugin.processors.sleigh.symbol.TripleSymbol;
|
||||
/**
|
||||
* Solves expressions of an operand value
|
||||
*
|
||||
* <p>
|
||||
* These are a sort of named sub-expression, but they may also specify a shift in encoding.
|
||||
*/
|
||||
public class OperandValueSolver extends AbstractExpressionSolver<OperandValue> {
|
||||
@ -39,12 +40,13 @@ public class OperandValueSolver extends AbstractExpressionSolver<OperandValue> {
|
||||
/**
|
||||
* Obtains the "defining expression"
|
||||
*
|
||||
* <p>
|
||||
* This is either the symbols assigned defining expression, or the expression associated with
|
||||
* its defining symbol.
|
||||
*
|
||||
* @return the defining expression, or null if neither is available
|
||||
*/
|
||||
protected PatternExpression getDefiningExpression(OperandSymbol sym) {
|
||||
public static PatternExpression getDefiningExpression(OperandSymbol sym) {
|
||||
PatternExpression patexp = sym.getDefiningExpression();
|
||||
if (patexp != null) {
|
||||
return patexp;
|
||||
@ -59,62 +61,63 @@ public class OperandValueSolver extends AbstractExpressionSolver<OperandValue> {
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(OperandValue ov, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException {
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException {
|
||||
Constructor cons = ov.getConstructor();
|
||||
OperandSymbol sym = cons.getOperand(ov.getIndex());
|
||||
PatternExpression patexp = getDefiningExpression(sym);
|
||||
if (patexp == null) {
|
||||
if (goal.equals(MaskedLong.ZERO)) {
|
||||
return AssemblyResolution.nop(description, null);
|
||||
return AssemblyResolution.nop(description, null, null);
|
||||
}
|
||||
return AssemblyResolution.error("Operand " + sym.getName() +
|
||||
" is undefined and does not agree with child requirements", description, null);
|
||||
" is undefined and does not agree with child requirements", description);
|
||||
}
|
||||
AssemblyResolution result = solver.solve(patexp, goal, vals, res, cur, hints, description);
|
||||
AssemblyResolution result = solver.solve(patexp, goal, vals, cur, hints, description);
|
||||
if (result.isError()) {
|
||||
AssemblyResolvedError err = (AssemblyResolvedError) result;
|
||||
return AssemblyResolution.error(err.getError(),
|
||||
"Solution to " + sym.getName() + " := " + goal + " = " + patexp,
|
||||
List.of(result));
|
||||
List.of(result), null);
|
||||
}
|
||||
// TODO: Shifting here seems like a hack to me.
|
||||
// I assume this only comes at the top of an expression
|
||||
AssemblyResolvedConstructor con = (AssemblyResolvedConstructor) result;
|
||||
int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res);
|
||||
AssemblyResolvedPatterns con = (AssemblyResolvedPatterns) result;
|
||||
int shamt = AssemblyTreeResolver.computeOffset(sym, cons);
|
||||
return con.shift(shamt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(OperandValue ov, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur) throws NeedsBackfillException {
|
||||
public MaskedLong getValue(OperandValue ov, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns cur) throws NeedsBackfillException {
|
||||
Constructor cons = ov.getConstructor();
|
||||
OperandSymbol sym = cons.getOperand(ov.getIndex());
|
||||
PatternExpression patexp = getDefiningExpression(sym);
|
||||
if (patexp == null) {
|
||||
return MaskedLong.ZERO;
|
||||
}
|
||||
int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res);
|
||||
int shamt = AssemblyTreeResolver.computeOffset(sym, cons);
|
||||
cur = cur == null ? null : cur.truncate(shamt);
|
||||
MaskedLong result = solver.getValue(patexp, vals, res, cur);
|
||||
MaskedLong result = solver.getValue(patexp, vals, cur);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(OperandValue ov, Map<Integer, Object> res) {
|
||||
public int getInstructionLength(OperandValue ov) {
|
||||
Constructor cons = ov.getConstructor();
|
||||
OperandSymbol sym = cons.getOperand(ov.getIndex());
|
||||
PatternExpression patexp = sym.getDefiningExpression();
|
||||
if (patexp == null) {
|
||||
return 0;
|
||||
}
|
||||
int length = solver.getInstructionLength(patexp, res);
|
||||
int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res);
|
||||
int length = solver.getInstructionLength(patexp);
|
||||
int shamt = AssemblyTreeResolver.computeOffset(sym, cons);
|
||||
return length + shamt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(OperandValue ov, AssemblyResolvedConstructor rc) {
|
||||
public MaskedLong valueForResolution(OperandValue ov, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
Constructor cons = ov.getConstructor();
|
||||
OperandSymbol sym = cons.getOperand(ov.getIndex());
|
||||
PatternExpression patexp = sym.getDefiningExpression();
|
||||
@ -135,7 +138,7 @@ public class OperandValueSolver extends AbstractExpressionSolver<OperandValue> {
|
||||
// Since I'm using this just for context, ignore shifting for now.
|
||||
//int shamt = AssemblyTreeResolver.computeOffset(sym, cons, rc.children);
|
||||
// Children would be null here, anyway.
|
||||
return solver.valueForResolution(patexp, rc);
|
||||
return solver.valueForResolution(patexp, vals, rc);
|
||||
// NOTE: To be paranoid, I could check for the existence of TokenField in the expression
|
||||
// And also check if a shift would be performed.
|
||||
}
|
||||
|
@ -17,35 +17,27 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.match.ExpressionMatcher;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
|
||||
import ghidra.app.plugin.processors.sleigh.ParserWalker;
|
||||
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.*;
|
||||
import ghidra.program.model.mem.MemoryAccessException;
|
||||
import ghidra.util.Msg;
|
||||
import ghidra.xml.XmlPullParser;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form A | B
|
||||
* Solves expressions of the form {@code A | B}
|
||||
*/
|
||||
public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpression> {
|
||||
static final PatternExpression DUMMY = new PatternExpression() {
|
||||
@Override
|
||||
public long getValue(ParserWalker walker) throws MemoryAccessException {
|
||||
return 0;
|
||||
}
|
||||
protected static class Matchers implements ExpressionMatcher.Context {
|
||||
protected ExpressionMatcher<ConstantValue> val = var(ConstantValue.class);
|
||||
protected ExpressionMatcher<ConstantValue> size = var(ConstantValue.class);
|
||||
protected ExpressionMatcher<PatternValue> fld = fldSz(size);
|
||||
|
||||
@Override
|
||||
public void restoreXml(XmlPullParser parser, SleighLanguage lang) {
|
||||
// Dummy intentionally left empty
|
||||
}
|
||||
protected ExpressionMatcher<?> neqConst = or(
|
||||
and(shr(sub(opnd(fld), val), size), cv(1)),
|
||||
and(shr(sub(val, opnd(fld)), size), cv(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
protected static final Matchers MATCHERS = new Matchers();
|
||||
|
||||
public OrExpressionSolver() {
|
||||
super(OrExpression.class);
|
||||
@ -62,8 +54,8 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
}
|
||||
|
||||
protected AssemblyResolution tryCatenationExpression(OrExpression exp, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
Set<SolverHint> hints, String description) throws SolverException {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) throws SolverException {
|
||||
/*
|
||||
* If OR is being used to concatenate fields, then we can solve with some symbolic
|
||||
* manipulation. We'll descend to see if this is a tree of ORs with SHIFTs or fields at the
|
||||
@ -71,12 +63,12 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
* component independently.
|
||||
*/
|
||||
Map<Long, PatternExpression> fields = new TreeMap<>();
|
||||
fields.put(0L, new ConstantValue(0));
|
||||
collectComponentsOr(exp, 0, fields, vals, res, cur);
|
||||
collectComponentsOr(exp, 0, fields, vals, cur);
|
||||
fields.computeIfAbsent(0L, __ -> new ConstantValue(0));
|
||||
fields.put(64L, new ConstantValue(0));
|
||||
long lo = 0;
|
||||
PatternExpression fieldExp = null;
|
||||
AssemblyResolvedConstructor result = AssemblyResolution.nop(description, null);
|
||||
AssemblyResolvedPatterns result = AssemblyResolution.nop(description);
|
||||
try (DbgCtx dc = dbg.start("Trying solution of field catenation")) {
|
||||
dbg.println("Original: " + goal + ":= " + exp);
|
||||
for (Map.Entry<Long, PatternExpression> ent : fields.entrySet()) {
|
||||
@ -89,12 +81,12 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
dbg.println("Part(" + hi + ":" + lo + "]:= " + fieldExp);
|
||||
MaskedLong part = goal.shiftLeft(64 - hi).shiftRightPositional(64 - hi + lo);
|
||||
dbg.println("Solving: " + part + ":= " + fieldExp);
|
||||
AssemblyResolution sol = solver.solve(fieldExp, part, vals, res, cur, hints,
|
||||
AssemblyResolution sol = solver.solve(fieldExp, part, vals, cur, hints,
|
||||
description + " with shift " + lo);
|
||||
if (sol.isError()) {
|
||||
return sol;
|
||||
}
|
||||
result = result.combine((AssemblyResolvedConstructor) sol);
|
||||
result = result.combine((AssemblyResolvedPatterns) sol);
|
||||
if (result == null) {
|
||||
throw new SolverException("Solutions to individual fields produced conflict");
|
||||
}
|
||||
@ -107,8 +99,8 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
}
|
||||
|
||||
protected AssemblyResolution tryCircularShiftExpression(OrExpression exp, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
Set<SolverHint> hints, String description) throws SolverException {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) throws SolverException {
|
||||
// If OR is being used to accomplish a circular shift, then we can apply a clever solver.
|
||||
// We'll match against the patterns: (f << (C - g)) | (f >> g)
|
||||
// (f >> (C - g)) | (f << g)
|
||||
@ -144,7 +136,7 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
expShift = sub.getRight();
|
||||
if (expShift.equals(s2)) {
|
||||
PatternExpression c = sub.getLeft();
|
||||
MaskedLong cc = solver.getValue(c, vals, res, cur);
|
||||
MaskedLong cc = solver.getValue(c, vals, cur);
|
||||
if (cc.isFullyDefined()) {
|
||||
// the left side has the subtraction, so the overall shift is the opposite
|
||||
// of the direction of the shift on the left
|
||||
@ -158,7 +150,7 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
expShift = sub.getRight();
|
||||
if (expShift.equals(s1)) {
|
||||
PatternExpression c = sub.getLeft();
|
||||
MaskedLong cc = solver.getValue(c, vals, res, cur);
|
||||
MaskedLong cc = solver.getValue(c, vals, cur);
|
||||
if (cc.isFullyDefined()) {
|
||||
// the right side has the subtraction, so the overall shift is the same
|
||||
// as the direction of the shift on the left
|
||||
@ -174,16 +166,16 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
// At this point, I know it's a circular shift
|
||||
dbg.println("Identified circular shift: value:= " + expValu1 + ", shift:= " + expShift +
|
||||
", size:= " + size + ", dir:= " + (dir == 1 ? "right" : "left"));
|
||||
return solveLeftCircularShift(expValu1, expShift, size, dir, goal, vals, res, cur, hints,
|
||||
return solveLeftCircularShift(expValu1, expShift, size, dir, goal, vals, cur, hints,
|
||||
description);
|
||||
}
|
||||
|
||||
protected AssemblyResolution solveLeftCircularShift(PatternExpression expValue,
|
||||
PatternExpression expShift, int size, int dir, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException, SolverException {
|
||||
MaskedLong valValue = solver.getValue(expValue, vals, res, cur);
|
||||
MaskedLong valShift = solver.getValue(expShift, vals, res, cur);
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
MaskedLong valValue = solver.getValue(expValue, vals, cur);
|
||||
MaskedLong valShift = solver.getValue(expShift, vals, cur);
|
||||
|
||||
if (valValue != null && !valValue.isFullyDefined()) {
|
||||
if (!valValue.isFullyUndefined()) {
|
||||
@ -202,12 +194,12 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
throw new AssertionError("Should not have constants when solving special forms");
|
||||
}
|
||||
else if (valValue != null) {
|
||||
return solver.solve(expShift, computeCircShiftG(valValue, size, dir, goal), vals, res,
|
||||
cur, hints, description);
|
||||
return solver.solve(expShift, computeCircShiftG(valValue, size, dir, goal), vals, cur,
|
||||
hints, description);
|
||||
}
|
||||
else if (valShift != null) {
|
||||
return solver.solve(expValue, computeCircShiftF(valShift, size, dir, goal), vals, res,
|
||||
cur, hints, description);
|
||||
return solver.solve(expValue, computeCircShiftF(valShift, size, dir, goal), vals, cur,
|
||||
hints, description);
|
||||
}
|
||||
|
||||
// Oiy. Try guessing the shift amount, starting at 0
|
||||
@ -221,21 +213,21 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
try {
|
||||
MaskedLong reqShift = MaskedLong.fromLong(shift);
|
||||
MaskedLong reqValue = computeCircShiftF(reqShift, size, dir, goal);
|
||||
AssemblyResolution resValue = solver.solve(expValue, reqValue, vals, res, cur,
|
||||
AssemblyResolution resValue = solver.solve(expValue, reqValue, vals, cur,
|
||||
hintsWithCircularShift, description);
|
||||
if (resValue.isError()) {
|
||||
AssemblyResolvedError err = (AssemblyResolvedError) resValue;
|
||||
throw new SolverException("Solving f failed: " + err.getError());
|
||||
}
|
||||
AssemblyResolution resShift =
|
||||
solver.solve(expShift, reqShift, vals, res, cur, hints, description);
|
||||
solver.solve(expShift, reqShift, vals, cur, hints, description);
|
||||
if (resShift.isError()) {
|
||||
AssemblyResolvedError err = (AssemblyResolvedError) resShift;
|
||||
throw new SolverException("Solving g failed: " + err.getError());
|
||||
}
|
||||
AssemblyResolvedConstructor solValue = (AssemblyResolvedConstructor) resValue;
|
||||
AssemblyResolvedConstructor solShift = (AssemblyResolvedConstructor) resShift;
|
||||
AssemblyResolvedConstructor sol = solValue.combine(solShift);
|
||||
AssemblyResolvedPatterns solValue = (AssemblyResolvedPatterns) resValue;
|
||||
AssemblyResolvedPatterns solShift = (AssemblyResolvedPatterns) resShift;
|
||||
AssemblyResolvedPatterns sol = solValue.combine(solShift);
|
||||
if (sol == null) {
|
||||
throw new SolverException(
|
||||
"value and shift solutions conflict for shift=" + shift);
|
||||
@ -276,11 +268,10 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
|
||||
@Override
|
||||
protected AssemblyResolution solveTwoSided(OrExpression exp, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException, SolverException {
|
||||
try {
|
||||
return tryCatenationExpression(exp, goal, vals, res, cur, hints, description);
|
||||
return tryCatenationExpression(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
catch (Exception e) {
|
||||
dbg.println("while solving: " + goal + "=:" + exp);
|
||||
@ -288,46 +279,73 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
}
|
||||
|
||||
try {
|
||||
return tryCircularShiftExpression(exp, goal, vals, res, cur, hints, description);
|
||||
return tryCircularShiftExpression(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
catch (Exception e) {
|
||||
dbg.println("while solving: " + goal + "=:" + exp);
|
||||
dbg.println(e.getMessage());
|
||||
}
|
||||
|
||||
Map<ExpressionMatcher<?>, PatternExpression> match = MATCHERS.neqConst.match(exp);
|
||||
if (match != null) {
|
||||
long value = MATCHERS.val.get(match).getValue();
|
||||
PatternValue field = MATCHERS.fld.get(match);
|
||||
// Solve for equals, then either return that, or forbid it, depending on goal
|
||||
AssemblyResolution solution =
|
||||
solver.solve(field, MaskedLong.fromLong(value), vals, cur, hints, description);
|
||||
if (goal.equals(MaskedLong.fromMaskAndValue(0, 1))) {
|
||||
return solution;
|
||||
}
|
||||
if (goal.equals(MaskedLong.fromMaskAndValue(1, 1))) {
|
||||
if (solution.isError()) {
|
||||
return AssemblyResolution.nop(description);
|
||||
}
|
||||
if (solution.isBackfill()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
AssemblyResolvedPatterns forbidden = (AssemblyResolvedPatterns) solution;
|
||||
forbidden = forbidden.withDescription("Solved 'not equals'");
|
||||
return AssemblyResolution.nop(description).withForbids(Set.of(forbidden));
|
||||
}
|
||||
}
|
||||
|
||||
throw new SolverException("Could not solve two-sided OR");
|
||||
}
|
||||
|
||||
void collectComponents(PatternExpression exp, long shift,
|
||||
Map<Long, PatternExpression> components, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur) throws SolverException {
|
||||
AssemblyResolvedPatterns cur) throws SolverException {
|
||||
if (exp instanceof OrExpression) {
|
||||
collectComponentsOr((OrExpression) exp, shift, components, vals, res, cur);
|
||||
collectComponentsOr((OrExpression) exp, shift, components, vals, cur);
|
||||
}
|
||||
else if (exp instanceof LeftShiftExpression) {
|
||||
collectComponentsLeft((LeftShiftExpression) exp, shift, components, vals, res, cur);
|
||||
collectComponentsLeft((LeftShiftExpression) exp, shift, components, vals, cur);
|
||||
}
|
||||
else if (exp instanceof RightShiftExpression) {
|
||||
collectComponentsRight((RightShiftExpression) exp, shift, components, vals, res, cur);
|
||||
collectComponentsRight((RightShiftExpression) exp, shift, components, vals, cur);
|
||||
}
|
||||
else {
|
||||
assert shift < 64;
|
||||
components.put(shift, exp);
|
||||
PatternExpression conflict = components.put(shift, exp);
|
||||
if (conflict != null) {
|
||||
throw new SolverException("Two 'fields' at the same shift indicates conflict");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void collectComponentsOr(OrExpression exp, long shift, Map<Long, PatternExpression> components,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur)
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur)
|
||||
throws SolverException {
|
||||
collectComponents(exp.getLeft(), shift, components, vals, res, cur);
|
||||
collectComponents(exp.getRight(), shift, components, vals, res, cur);
|
||||
collectComponents(exp.getLeft(), shift, components, vals, cur);
|
||||
collectComponents(exp.getRight(), shift, components, vals, cur);
|
||||
}
|
||||
|
||||
void collectComponentsLeft(LeftShiftExpression exp, long shift,
|
||||
Map<Long, PatternExpression> components, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur) throws SolverException {
|
||||
AssemblyResolvedPatterns cur) throws SolverException {
|
||||
MaskedLong adj;
|
||||
try {
|
||||
adj = solver.getValue(exp.getRight(), vals, res, cur);
|
||||
adj = solver.getValue(exp.getRight(), vals, cur);
|
||||
}
|
||||
catch (NeedsBackfillException e) {
|
||||
throw new SolverException("Variable shifts break field catenation solver", e);
|
||||
@ -335,15 +353,15 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
if (adj == null || !adj.isFullyDefined()) {
|
||||
throw new SolverException("Variable shifts break field catenation solver");
|
||||
}
|
||||
collectComponents(exp.getLeft(), shift + adj.val, components, vals, res, cur);
|
||||
collectComponents(exp.getLeft(), shift + adj.val, components, vals, cur);
|
||||
}
|
||||
|
||||
void collectComponentsRight(RightShiftExpression exp, long shift,
|
||||
Map<Long, PatternExpression> components, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur) throws SolverException {
|
||||
AssemblyResolvedPatterns cur) throws SolverException {
|
||||
MaskedLong adj;
|
||||
try {
|
||||
adj = solver.getValue(exp.getRight(), vals, res, cur);
|
||||
adj = solver.getValue(exp.getRight(), vals, cur);
|
||||
}
|
||||
catch (NeedsBackfillException e) {
|
||||
throw new SolverException("Variable shifts break field catenation solver", e);
|
||||
@ -351,6 +369,6 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpress
|
||||
if (adj == null || !adj.isFullyDefined()) {
|
||||
throw new SolverException("Variable shifts break field catenation solver");
|
||||
}
|
||||
collectComponents(exp.getLeft(), shift - adj.val, components, vals, res, cur);
|
||||
collectComponents(exp.getLeft(), shift - adj.val, components, vals, cur);
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PlusExpression;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form A + B
|
||||
* Solves expressions of the form {@code A + B}
|
||||
*/
|
||||
public class PlusExpressionSolver extends AbstractBinaryExpressionSolver<PlusExpression> {
|
||||
|
||||
|
@ -18,24 +18,30 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
import java.util.*;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
|
||||
/**
|
||||
* This singleton class seeks solutions to {@link PatternExpression}s
|
||||
*
|
||||
* It is called naive, because it does not perform algebraic transformations. Rather, it attempts to
|
||||
* fold constants, assuming there is a single variable in the expression, modifying the goal as it
|
||||
* <p>
|
||||
* It is rather naive. It does not perform algebraic transformations. Instead, it attempts to fold
|
||||
* constants, assuming there is a single variable in the expression, modifying the goal as it
|
||||
* descends toward that variable. If it finds a variable, i.e., token or context field, it encodes
|
||||
* the solution, positioned in the field. If the expression is constant, it checks that the goal
|
||||
* agrees. If not, an error is returned.
|
||||
* agrees. If not, an error is returned. There are some common cases where it is forced to solve
|
||||
* expressions involving multiple variables. Those cases are addressed in the derivatives of
|
||||
* {@link AbstractBinaryExpressionSolver} where the situation can be detected. One common example is
|
||||
* field concatenation using the {@code (A << 4) | B} pattern.
|
||||
*
|
||||
* TODO This whole mechanism ought to just be factored directly into {@link PatternExpression}.
|
||||
* <p>
|
||||
* TODO: Perhaps this whole mechanism ought to just be factored directly into
|
||||
* {@link PatternExpression}.
|
||||
*/
|
||||
public class RecursiveDescentSolver {
|
||||
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
|
||||
private static final RecursiveDescentSolver solver = new RecursiveDescentSolver();
|
||||
protected static final DbgTimer DBG = DbgTimer.INACTIVE;
|
||||
private static final RecursiveDescentSolver INSTANCE = new RecursiveDescentSolver();
|
||||
|
||||
// A mapping from each subclass of PatternExpression to the appropriate solver
|
||||
protected Map<Class<?>, AbstractExpressionSolver<?>> registry = new HashMap<>();
|
||||
@ -67,7 +73,7 @@ public class RecursiveDescentSolver {
|
||||
* @return the singleton instance
|
||||
*/
|
||||
public static RecursiveDescentSolver getSolver() {
|
||||
return solver;
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -103,59 +109,52 @@ public class RecursiveDescentSolver {
|
||||
* @param exp the expression to solve
|
||||
* @param goal the desired output (modulo a mask) of the expression
|
||||
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
|
||||
* @param res resolved subconstructors, by operand index (see method details)
|
||||
* @param hints describes techniques applied by calling solvers
|
||||
* @param description a description to attached to the encoded solution
|
||||
* @return the encoded solution
|
||||
* @throws NeedsBackfillException a solution may exist, but a required symbol is missing
|
||||
*/
|
||||
protected AssemblyResolution solve(PatternExpression exp, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
Set<SolverHint> hints, String description) throws NeedsBackfillException {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException {
|
||||
try {
|
||||
return getRegistered(exp.getClass()).solve(exp, goal, vals, res, cur, hints,
|
||||
description);
|
||||
return getRegistered(exp.getClass()).solve(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
catch (UnsupportedOperationException e) {
|
||||
dbg.println("Error solving " + exp + " = " + goal);
|
||||
DBG.println("Error solving " + exp + " = " + goal);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Solve a given expression, assuming it outputs a given masked value
|
||||
* Solve a given expression, given a masked-value goal
|
||||
*
|
||||
* <p>
|
||||
* From a simplified perspective, we need only the expression and the desired value to solve it.
|
||||
* Generally speaking, the expression may have only contain a single variable, and the encoded
|
||||
* result represents that single variable. It must be absorbed into the overall instruction
|
||||
* and/or context encoding.
|
||||
* Generally speaking, the expression may only contain a single field, and the encoded result
|
||||
* specifies the bits of the solved field. It must be absorbed into the overall assembly
|
||||
* pattern.
|
||||
*
|
||||
* More realistically, however, these expressions may depend on quite a bit of extra
|
||||
* information. For example, PC-relative encodings (i.e., those involving {@code inst_start} or
|
||||
* <p>
|
||||
* More realistically, these expressions may depend on quite a bit of extra information. For
|
||||
* example, PC-relative encodings (i.e., those involving {@code inst_start} or
|
||||
* {@code inst_next}, need to know the starting address of the resulting instruction. {@code
|
||||
* inst_start} must be provided to the solver by the assembler. {@code inst_next} cannot be
|
||||
* known until the instruction length is known. Thus, expressions using it always result in a
|
||||
* {@link NeedsBackfillException}. The symbols, when known, are provided to the solver via the
|
||||
* {@code vals} parameter.
|
||||
*
|
||||
* Expressions involving {@link OperandValueSolver}s are a little more complicated, because they
|
||||
* specify an offset that affects its encoding in the instruction. To compute this offset, the
|
||||
* lengths of other surrounding operands must be known. Thus, when solving a context change for
|
||||
* a given constructor, its resolved subconstructors must be provided to the solver via the
|
||||
* {@code res} parameter.
|
||||
*
|
||||
* @param exp the expression to solve
|
||||
* @param goal the desired output (modulo a mask) of the expression
|
||||
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
|
||||
* @param res resolved subconstructors, by operand index (see method details)
|
||||
* @param description a description to attached to the encoded solution
|
||||
* @return the encoded solution
|
||||
* @throws NeedsBackfillException a solution may exist, but a required symbol is missing
|
||||
*/
|
||||
public AssemblyResolution solve(PatternExpression exp, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, String description)
|
||||
AssemblyResolvedPatterns cur, String description)
|
||||
throws NeedsBackfillException {
|
||||
return solve(exp, goal, vals, res, cur, Set.of(), description);
|
||||
return solve(exp, goal, vals, cur, Set.of(), description);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -163,45 +162,44 @@ public class RecursiveDescentSolver {
|
||||
*
|
||||
* @param exp the (sub-)expression to fold
|
||||
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
|
||||
* @param res resolved subconstructors, by operand index (see
|
||||
* {@link #solve(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor, String)})
|
||||
* @return the masked solution
|
||||
* @throws NeedsBackfillException it may be folded, but a required symbol is missing
|
||||
*/
|
||||
protected <T extends PatternExpression> MaskedLong getValue(T exp, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur)
|
||||
throws NeedsBackfillException {
|
||||
MaskedLong value = getRegistered(exp.getClass()).getValue(exp, vals, res, cur);
|
||||
dbg.println("Expression: " + value + " =: " + exp);
|
||||
AssemblyResolvedPatterns cur) throws NeedsBackfillException {
|
||||
MaskedLong value = getRegistered(exp.getClass()).getValue(exp, vals, cur);
|
||||
DBG.println("Expression: " + value + " =: " + exp);
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the length of the instruction part of the encoded solution to the given expression
|
||||
*
|
||||
* <p>
|
||||
* This is used to keep operands in their appropriate position when backfilling becomes
|
||||
* applicable. Normally, the instruction length is taken from the encoding of a solution, but if
|
||||
* the solution cannot be determined yet, the instruction length must still be obtained.
|
||||
*
|
||||
* <p>
|
||||
* The length can be determined by finding token fields in the expression.
|
||||
*
|
||||
* @param exp the expression, presumably containing a token field
|
||||
* @param res resolved subconstructors, by operand index (see
|
||||
* {@link #solve(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor, String)})
|
||||
* @return the anticipated length, in bytes, of the instruction encoding
|
||||
*/
|
||||
public int getInstructionLength(PatternExpression exp, Map<Integer, Object> res) {
|
||||
return getRegistered(exp.getClass()).getInstructionLength(exp, res);
|
||||
public int getInstructionLength(PatternExpression exp) {
|
||||
return getRegistered(exp.getClass()).getInstructionLength(exp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the value of an expression given a (possibly-intermediate) resolution
|
||||
*
|
||||
* @param exp the expression to evaluate
|
||||
* @param rc the resolution on which to evalute it
|
||||
* @param vals values of defined symbols
|
||||
* @param rc the resolution on which to evaluate it
|
||||
* @return the result
|
||||
*/
|
||||
public MaskedLong valueForResolution(PatternExpression exp, AssemblyResolvedConstructor rc) {
|
||||
return getRegistered(exp.getClass()).valueForResolution(exp, rc);
|
||||
public MaskedLong valueForResolution(PatternExpression exp, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
return getRegistered(exp.getClass()).valueForResolution(exp, vals, rc);
|
||||
}
|
||||
}
|
||||
|
@ -19,12 +19,12 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.RightShiftExpression;
|
||||
import ghidra.util.Msg;
|
||||
|
||||
/**
|
||||
* {@literal Solves expressions of the form A >> B}
|
||||
* Solves expressions of the form {@code A >> B}
|
||||
*/
|
||||
public class RightShiftExpressionSolver
|
||||
extends AbstractBinaryExpressionSolver<RightShiftExpression> {
|
||||
@ -62,15 +62,14 @@ public class RightShiftExpressionSolver
|
||||
|
||||
@Override
|
||||
protected AssemblyResolution solveTwoSided(RightShiftExpression exp, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
Set<SolverHint> hints, String description)
|
||||
throws NeedsBackfillException, SolverException {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) throws NeedsBackfillException, SolverException {
|
||||
// Do the similar thing as in {@link LeftShiftExpressionSolver}
|
||||
|
||||
// Do not guess the same parameter recursively
|
||||
if (hints.contains(DefaultSolverHint.GUESSING_RIGHT_SHIFT_AMOUNT)) {
|
||||
// NOTE: Nested right shifts ought to be written as a right shift by a sum
|
||||
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
|
||||
return super.solveTwoSided(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
|
||||
int maxShift = Long.numberOfLeadingZeros(goal.val);
|
||||
@ -82,18 +81,18 @@ public class RightShiftExpressionSolver
|
||||
MaskedLong reql = computeLeft(reqr, goal);
|
||||
|
||||
AssemblyResolution lres =
|
||||
solver.solve(exp.getLeft(), reql, vals, res, cur, hintsWithRShift, description);
|
||||
solver.solve(exp.getLeft(), reql, vals, cur, hintsWithRShift, description);
|
||||
if (lres.isError()) {
|
||||
throw new SolverException("Solving left failed");
|
||||
}
|
||||
AssemblyResolution rres =
|
||||
solver.solve(exp.getRight(), reqr, vals, res, cur, hints, description);
|
||||
solver.solve(exp.getRight(), reqr, vals, cur, hints, description);
|
||||
if (rres.isError()) {
|
||||
throw new SolverException("Solving right failed");
|
||||
}
|
||||
AssemblyResolvedConstructor lsol = (AssemblyResolvedConstructor) lres;
|
||||
AssemblyResolvedConstructor rsol = (AssemblyResolvedConstructor) rres;
|
||||
AssemblyResolvedConstructor sol = lsol.combine(rsol);
|
||||
AssemblyResolvedPatterns lsol = (AssemblyResolvedPatterns) lres;
|
||||
AssemblyResolvedPatterns rsol = (AssemblyResolvedPatterns) rres;
|
||||
AssemblyResolvedPatterns sol = lsol.combine(rsol);
|
||||
if (sol == null) {
|
||||
throw new SolverException(
|
||||
"Left and right solutions conflict for shift=" + shift);
|
||||
@ -105,6 +104,6 @@ public class RightShiftExpressionSolver
|
||||
// try the next
|
||||
}
|
||||
}
|
||||
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
|
||||
return super.solveTwoSided(exp, goal, vals, cur, hints, description);
|
||||
}
|
||||
}
|
||||
|
@ -20,11 +20,13 @@ import java.util.*;
|
||||
/**
|
||||
* A type for solver hints
|
||||
*
|
||||
* Hints inform "sub-"solvers of the techniques already being applied by the calling solvers. This
|
||||
* <p>
|
||||
* Hints inform sub-solvers of the techniques already being applied by the calling solvers. This
|
||||
* helps prevent situations where, e.g., two multiplication solvers (applied to repeated or nested
|
||||
* multiplication) both attempt to synthesize new goals for repetition. This sort of expression is
|
||||
* common when decoding immediates in the AArch64 specification.
|
||||
*
|
||||
* <p>
|
||||
* Using an interface implemented by an enumeration (instead of just using the enumeration directly)
|
||||
* eases expansion by extension without modifying the core code.
|
||||
*
|
||||
|
@ -24,6 +24,7 @@ import ghidra.app.plugin.processors.sleigh.expression.StartInstructionValue;
|
||||
/**
|
||||
* "Solves" expression of {@code inst_start}
|
||||
*
|
||||
* <p>
|
||||
* Works like the constant solver, but takes the value of {@code inst_start}, which is given by the
|
||||
* assembly address.
|
||||
*/
|
||||
@ -35,28 +36,26 @@ public class StartInstructionValueSolver extends AbstractExpressionSolver<StartI
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(StartInstructionValue iv, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
Set<SolverHint> hints, String description) {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, Set<SolverHint> hints,
|
||||
String description) {
|
||||
throw new AssertionError(
|
||||
"INTERNAL: Should never be asked to solve for " + AssemblyTreeResolver.INST_START);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(StartInstructionValue iv, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur) {
|
||||
AssemblyResolvedPatterns cur) {
|
||||
return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_START));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(StartInstructionValue exp, Map<Integer, Object> res) {
|
||||
public int getInstructionLength(StartInstructionValue exp) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(StartInstructionValue exp,
|
||||
AssemblyResolvedConstructor rc) {
|
||||
// Would need to pass in symbol values.
|
||||
throw new UnsupportedOperationException(
|
||||
"The solver should never ask for this value given a resolved constructor.");
|
||||
public MaskedLong valueForResolution(StartInstructionValue exp, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_START));
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.SubExpression;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form A - B
|
||||
* Solves expressions of the form {@code A - B}
|
||||
*/
|
||||
public class SubExpressionSolver extends AbstractBinaryExpressionSolver<SubExpression> {
|
||||
|
||||
|
@ -24,6 +24,7 @@ import ghidra.app.plugin.processors.sleigh.expression.TokenField;
|
||||
/**
|
||||
* Solves expressions of a token (instruction encoding) field
|
||||
*
|
||||
* <p>
|
||||
* Essentially, this just encodes the goal into the field, if it can be represented in the given
|
||||
* space and format. Otherwise, there is no solution.
|
||||
*/
|
||||
@ -35,33 +36,33 @@ public class TokenFieldSolver extends AbstractExpressionSolver<TokenField> {
|
||||
|
||||
@Override
|
||||
public AssemblyResolution solve(TokenField tf, MaskedLong goal, Map<String, Long> vals,
|
||||
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
|
||||
String description) {
|
||||
AssemblyResolvedPatterns cur, Set<SolverHint> hints, String description) {
|
||||
assert tf.minValue() == 0; // In case someone decides to do signedness there.
|
||||
if (!goal.isInRange(tf.maxValue(), tf.hasSignbit())) {
|
||||
return AssemblyResolution.error("Value " + goal + " is not valid for " + tf,
|
||||
description, null);
|
||||
description);
|
||||
}
|
||||
AssemblyPatternBlock block = AssemblyPatternBlock.fromTokenField(tf, goal);
|
||||
return AssemblyResolution.instrOnly(block, description, null);
|
||||
return AssemblyResolution.instrOnly(block, description);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong getValue(TokenField tf, Map<String, Long> vals, Map<Integer, Object> res,
|
||||
AssemblyResolvedConstructor cur) {
|
||||
public MaskedLong getValue(TokenField tf, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns cur) {
|
||||
if (cur == null) {
|
||||
return null;
|
||||
}
|
||||
return valueForResolution(tf, cur);
|
||||
return valueForResolution(tf, vals, cur);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInstructionLength(TokenField tf, Map<Integer, Object> res) {
|
||||
public int getInstructionLength(TokenField tf) {
|
||||
return tf.getByteEnd() + 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MaskedLong valueForResolution(TokenField tf, AssemblyResolvedConstructor rc) {
|
||||
public MaskedLong valueForResolution(TokenField tf, Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns rc) {
|
||||
int size = tf.getByteEnd() - tf.getByteStart() + 1;
|
||||
MaskedLong res = rc.readInstruction(tf.getByteStart(), size);
|
||||
if (!tf.isBigEndian()) {
|
||||
|
@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.XorExpression;
|
||||
|
||||
/**
|
||||
* Solves expressions of the form A $xor B
|
||||
* Solves expressions of the form {@code A $xor B}
|
||||
*/
|
||||
public class XorExpressionSolver extends AbstractBinaryExpressionSolver<XorExpression> {
|
||||
|
||||
|
@ -0,0 +1,122 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.expression.*;
|
||||
|
||||
/**
|
||||
* Base implementation for expression matchers
|
||||
*
|
||||
* @param <T> the type of expression matched
|
||||
*/
|
||||
public abstract class AbstractExpressionMatcher<T extends PatternExpression>
|
||||
implements ExpressionMatcher<T> {
|
||||
protected final Set<Class<? extends T>> ops;
|
||||
|
||||
public AbstractExpressionMatcher(Set<Class<? extends T>> ops) {
|
||||
this.ops = Set.copyOf(ops);
|
||||
}
|
||||
|
||||
public AbstractExpressionMatcher(Class<? extends T> cls) {
|
||||
this.ops = Set.of(cls);
|
||||
}
|
||||
|
||||
protected T opMatches(PatternExpression expression) {
|
||||
return ops.stream()
|
||||
.filter(op -> op.isInstance(expression))
|
||||
.map(op -> op.cast(expression))
|
||||
.findAny()
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
protected abstract boolean matchDetails(T expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result);
|
||||
|
||||
@Override
|
||||
public boolean match(PatternExpression expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
T t = opMatches(expression);
|
||||
if (t == null) {
|
||||
return false;
|
||||
}
|
||||
if (!matchDetails(t, result)) {
|
||||
return false;
|
||||
}
|
||||
return recordResult(t, result);
|
||||
}
|
||||
|
||||
protected boolean recordResult(PatternExpression expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
PatternExpression already = result.put(this, expression);
|
||||
if (already == null) {
|
||||
return true;
|
||||
}
|
||||
return expressionsIdenticallyDefined(already, expression);
|
||||
}
|
||||
|
||||
protected static boolean expressionsIdenticallyDefined(PatternExpression a,
|
||||
PatternExpression b) {
|
||||
if (a.getClass() != b.getClass()) {
|
||||
return false;
|
||||
}
|
||||
if (a instanceof EndInstructionValue) {
|
||||
return true;
|
||||
}
|
||||
if (a instanceof StartInstructionValue) {
|
||||
return true;
|
||||
}
|
||||
if (a instanceof ConstantValue) {
|
||||
ConstantValue ca = (ConstantValue) a;
|
||||
ConstantValue cb = (ConstantValue) b;
|
||||
return ca.getValue() == cb.getValue();
|
||||
}
|
||||
if (a instanceof UnaryExpression) {
|
||||
UnaryExpression ua = (UnaryExpression) a;
|
||||
UnaryExpression ub = (UnaryExpression) b;
|
||||
return expressionsIdenticallyDefined(ua.getUnary(), ub.getUnary());
|
||||
}
|
||||
if (a instanceof BinaryExpression) {
|
||||
BinaryExpression ba = (BinaryExpression) a;
|
||||
BinaryExpression bb = (BinaryExpression) b;
|
||||
return expressionsIdenticallyDefined(ba.getLeft(), bb.getLeft()) &&
|
||||
expressionsIdenticallyDefined(ba.getRight(), bb.getRight());
|
||||
}
|
||||
if (a instanceof TokenField) {
|
||||
TokenField ta = (TokenField) a;
|
||||
TokenField tb = (TokenField) b;
|
||||
return ta.getBitStart() == tb.getBitStart() &&
|
||||
ta.getBitEnd() == tb.getBitEnd() &&
|
||||
ta.hasSignbit() == tb.hasSignbit();
|
||||
}
|
||||
if (a instanceof ContextField) {
|
||||
ContextField ca = (ContextField) a;
|
||||
ContextField cb = (ContextField) b;
|
||||
return ca.getStartBit() == cb.getStartBit() &&
|
||||
ca.getEndBit() == cb.getEndBit() &&
|
||||
ca.hasSignbit() == cb.hasSignbit();
|
||||
}
|
||||
if (a instanceof OperandValue) {
|
||||
OperandValue va = (OperandValue) a;
|
||||
OperandValue vb = (OperandValue) b;
|
||||
return va.getConstructor() == vb.getConstructor() &&
|
||||
va.getIndex() == vb.getIndex();
|
||||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
|
||||
/**
|
||||
* A matcher which accept any expression of the required type
|
||||
*
|
||||
* <p>
|
||||
* This requires no further consideration of the expressions operands. If the type matches, the
|
||||
* expression matches.
|
||||
*
|
||||
* @param <T> the type to match
|
||||
*/
|
||||
public class AnyMatcher<T extends PatternExpression> extends AbstractExpressionMatcher<T> {
|
||||
public static AnyMatcher<PatternExpression> any() {
|
||||
return new AnyMatcher<>(PatternExpression.class);
|
||||
}
|
||||
|
||||
public AnyMatcher(Set<Class<? extends T>> ops) {
|
||||
super(ops);
|
||||
}
|
||||
|
||||
public AnyMatcher(Class<T> cls) {
|
||||
super(cls);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDetails(T expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
return true;
|
||||
}
|
||||
}
|
@ -0,0 +1,91 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.expression.BinaryExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
|
||||
/**
|
||||
* A matcher for a binary expression
|
||||
*
|
||||
* <p>
|
||||
* If the required type matches, the matching descends to the left then right operands.
|
||||
*
|
||||
* @param <T> the type of expression matched
|
||||
*/
|
||||
public class BinaryExpressionMatcher<T extends BinaryExpression>
|
||||
extends AbstractExpressionMatcher<T> {
|
||||
|
||||
/**
|
||||
* A matcher for binary expression allowing commutativity
|
||||
*
|
||||
* <p>
|
||||
* This behaves the same as {@link BinaryExpressionMatcher}, but if the first attempt fails, the
|
||||
* operand match is re-attempted with the operands swapped.
|
||||
*
|
||||
* @param <T> the type of expression matched
|
||||
*/
|
||||
public static class Commutative<T extends BinaryExpression> extends BinaryExpressionMatcher<T> {
|
||||
public Commutative(Set<Class<? extends T>> ops,
|
||||
ExpressionMatcher<?> leftMatcher, ExpressionMatcher<?> rightMatcher) {
|
||||
super(ops, leftMatcher, rightMatcher);
|
||||
}
|
||||
|
||||
public Commutative(Class<T> cls, ExpressionMatcher<?> leftMatcher,
|
||||
ExpressionMatcher<?> rightMatcher) {
|
||||
super(cls, leftMatcher, rightMatcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDetails(T expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
Set<ExpressionMatcher<?>> reset = new HashSet<>(result.keySet());
|
||||
if (leftMatcher.match(expression.getLeft(), result) &&
|
||||
rightMatcher.match(expression.getRight(), result)) {
|
||||
return true;
|
||||
}
|
||||
result.keySet().retainAll(reset);
|
||||
return rightMatcher.match(expression.getLeft(), result) &&
|
||||
leftMatcher.match(expression.getRight(), result);
|
||||
}
|
||||
}
|
||||
|
||||
protected final ExpressionMatcher<?> leftMatcher;
|
||||
protected final ExpressionMatcher<?> rightMatcher;
|
||||
|
||||
public BinaryExpressionMatcher(Set<Class<? extends T>> ops,
|
||||
ExpressionMatcher<?> leftMatcher, ExpressionMatcher<?> rightMatcher) {
|
||||
super(ops);
|
||||
this.leftMatcher = leftMatcher;
|
||||
this.rightMatcher = rightMatcher;
|
||||
}
|
||||
|
||||
public BinaryExpressionMatcher(Class<T> cls, ExpressionMatcher<?> leftMatcher,
|
||||
ExpressionMatcher<?> rightMatcher) {
|
||||
super(cls);
|
||||
this.leftMatcher = leftMatcher;
|
||||
this.rightMatcher = rightMatcher;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDetails(T expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
return leftMatcher.match(expression.getLeft(), result) &&
|
||||
rightMatcher.match(expression.getRight(), result);
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.expression.ConstantValue;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
|
||||
/**
|
||||
* A matcher for a given constant value
|
||||
*/
|
||||
public class ConstantValueMatcher extends AbstractExpressionMatcher<ConstantValue> {
|
||||
protected final long value;
|
||||
|
||||
public ConstantValueMatcher(long value) {
|
||||
super(ConstantValue.class);
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDetails(ConstantValue expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
return expression.getValue() == value;
|
||||
}
|
||||
}
|
@ -0,0 +1,309 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.expression.*;
|
||||
|
||||
/**
|
||||
* A matcher for a form of patten expression
|
||||
*
|
||||
* <p>
|
||||
* Some solvers may need to apply sophisticated heuristics to recognize certain forms that commonly
|
||||
* occur in pattern expressions. These can certainly be programmed manually, but for many cases, the
|
||||
* form recognition can be accomplished by describing the form as an expression matcher. For a
|
||||
* shorter syntax to construct such matchers. See {@link Context}.
|
||||
*
|
||||
* @param <T> the type of expression matched
|
||||
*/
|
||||
public interface ExpressionMatcher<T extends PatternExpression> {
|
||||
|
||||
/**
|
||||
* Attempt to match the given expression, recording the substitutions if successful
|
||||
*
|
||||
* @param expression the expression to match
|
||||
* @return a map of matchers to substituted expressions
|
||||
*/
|
||||
default Map<ExpressionMatcher<?>, PatternExpression> match(PatternExpression expression) {
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result = new HashMap<>();
|
||||
if (match(expression, result)) {
|
||||
return result;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the expression substituted for this matcher from a previous successful match
|
||||
*
|
||||
* <p>
|
||||
* Calling this on the root matcher is relatively useless, as it would simply return the
|
||||
* expression passed to {@link #match(PatternExpression)}. Instead, sub-matchers should be saved
|
||||
* in a variable, allowing their values to be retrieved. See {@link Context}, for an example.
|
||||
*
|
||||
* @param results the previous match results
|
||||
* @return the substituted expression
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
default T get(Map<ExpressionMatcher<?>, PatternExpression> results) {
|
||||
return (T) results.get(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to match the given expression, recording substitutions in the given map
|
||||
*
|
||||
* <p>
|
||||
* Even if the match was unsuccessful, the result map may contain attempted substitutions. Thus,
|
||||
* the map should be discarded if unsuccessful.
|
||||
*
|
||||
* @param expression the expression to match
|
||||
* @param result a map to store matchers to substituted expressions
|
||||
* @return true if successful, false if not
|
||||
*/
|
||||
boolean match(PatternExpression expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result);
|
||||
|
||||
/**
|
||||
* A context for defining expression matcher succinctly
|
||||
*
|
||||
* <p>
|
||||
* Implementations of this interface have easy access to factory methods for each kind of
|
||||
* {@link PatternExpression}. Additionally, the class itself provide a convenient container for
|
||||
* saving important sub-matchers, so that important sub-expression can be readily retrieved. For
|
||||
* example:
|
||||
*
|
||||
* <pre>
|
||||
* static class MyMatchers implements ExpressionMatcher.Context {
|
||||
* ExpressionMatcher<ConstantValue> shamt = var(ConstantValue.class);
|
||||
* ExpressionMatcher<LeftShiftExpression> exp = shl(var(), shamt);
|
||||
* }
|
||||
*
|
||||
* static final MyMatchers MATCHERS = new MyMatchers();
|
||||
*
|
||||
* public long getConstantShift(PatternExpression expression) {
|
||||
* Map<ExpressionMatcher<?>, PatternExpression> result = MATCHERS.exp.match(expression);
|
||||
* if (result == null) {
|
||||
* return -1;
|
||||
* }
|
||||
* return MATCHERS.shamt.get(result).getValue();
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Saving a sub-matcher to a field (as in the example) also permits that sub-matcher to appear
|
||||
* in multiple places. In that case, the sub-matcher must match identical expressions wherever
|
||||
* it appears. For example, if {@code cv} matches any constant value, then {@code plus(cv, cv)}
|
||||
* would match {@code 2 + 2}, but not {@code 2 + 3}.
|
||||
*/
|
||||
interface Context {
|
||||
|
||||
/**
|
||||
* Match the form {@code L & R} or {@code R & L}
|
||||
*
|
||||
* @param left the matcher for the left operand
|
||||
* @param right the matcher for the right operand
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<AndExpression> and(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher.Commutative<>(AndExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L / R}
|
||||
*
|
||||
* @param left the matcher for the dividend
|
||||
* @param right the matcher for the divisor
|
||||
* @return the matcher for the quotient
|
||||
*/
|
||||
default ExpressionMatcher<DivExpression> div(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher<>(DivExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L << R}
|
||||
*
|
||||
* @param left the matcher for the left operand
|
||||
* @param right the matcher for the shift amount
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<LeftShiftExpression> shl(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher<>(LeftShiftExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L * R} or {@code R * L}
|
||||
*
|
||||
* @param left the matcher for the left factor
|
||||
* @param right the matcher for the right factor
|
||||
* @return the matcher for the product
|
||||
*/
|
||||
default ExpressionMatcher<MultExpression> mul(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher.Commutative<>(MultExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L | R} or {@code R | L}
|
||||
*
|
||||
* @param left the matcher for the left operand
|
||||
* @param right the matcher for the right operand
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<OrExpression> or(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher.Commutative<>(OrExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L + R} or {@code R + L}
|
||||
*
|
||||
* @param left the matcher for the left term
|
||||
* @param right the matcher for the right term
|
||||
* @return the matcher for the sum
|
||||
*/
|
||||
default ExpressionMatcher<PlusExpression> plus(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher<>(PlusExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L >> R}
|
||||
*
|
||||
* @param left the matcher for the left operand
|
||||
* @param right the matcher for the shift amount
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<RightShiftExpression> shr(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher<>(RightShiftExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L - R}
|
||||
*
|
||||
* @param left the matcher for the left term
|
||||
* @param right the matcher for the right term
|
||||
* @return the matcher for the difference
|
||||
*/
|
||||
default ExpressionMatcher<SubExpression> sub(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher<>(SubExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code L $xor R} or {@code R $xor L}
|
||||
*
|
||||
* @param left the matcher for the left operand
|
||||
* @param right the matcher for the right operand
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<XorExpression> xor(ExpressionMatcher<?> left,
|
||||
ExpressionMatcher<?> right) {
|
||||
return new BinaryExpressionMatcher<>(XorExpression.class, left, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match a given constant value
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> To match an unspecified constant value, use {@link #var(Class)} with
|
||||
* {@link ConstantValue}.
|
||||
*
|
||||
* @param value the value to match
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<ConstantValue> cv(long value) {
|
||||
return new ConstantValueMatcher(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match any expression
|
||||
*
|
||||
* <p>
|
||||
* This matches any expression without consideration of its operands, except insofar when it
|
||||
* appears in multiple places, it will check that subsequent matches are identical to the
|
||||
* first.
|
||||
*
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<PatternExpression> var() {
|
||||
return AnyMatcher.any();
|
||||
}
|
||||
|
||||
/**
|
||||
* Match any expression of the given type
|
||||
*
|
||||
* @param <T> the type of expression to match
|
||||
* @param cls the class of expression to match
|
||||
* @return the matcher
|
||||
*/
|
||||
default <T extends PatternExpression> ExpressionMatcher<T> var(Class<T> cls) {
|
||||
return new AnyMatcher<>(cls);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match an operand value
|
||||
*
|
||||
* <p>
|
||||
* Typically, this must wrap any use of a field, since that field is considered an operand
|
||||
* from the constructor's perspective.
|
||||
*
|
||||
* @param def the matcher for the operand's defining expression.
|
||||
* @return the operand matcher
|
||||
*/
|
||||
default ExpressionMatcher<OperandValue> opnd(ExpressionMatcher<?> def) {
|
||||
return new OperandValueMatcher(def);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match a field by its size
|
||||
*
|
||||
* <p>
|
||||
* This matches either a {@link TokenField} or a {@link ContextField}. If matched, it then
|
||||
* passes a {@link ConstantValue} of the field's size (in bits) into the given size matcher.
|
||||
*
|
||||
* @param size the matcher for the field's size
|
||||
* @return the field matcher
|
||||
*/
|
||||
default ExpressionMatcher<PatternValue> fldSz(ExpressionMatcher<?> size) {
|
||||
return new FieldSizeMatcher(size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code -U}
|
||||
*
|
||||
* @param unary the child matcher
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<MinusExpression> neg(ExpressionMatcher<?> unary) {
|
||||
return new UnaryExpressionMatcher<>(MinusExpression.class, unary);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the form {@code ~U}
|
||||
*
|
||||
* @param unary the child matcher
|
||||
* @return the matcher
|
||||
*/
|
||||
default ExpressionMatcher<NotExpression> not(ExpressionMatcher<?> unary) {
|
||||
return new UnaryExpressionMatcher<>(NotExpression.class, unary);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.expression.*;
|
||||
|
||||
/**
|
||||
* A matcher for a token or context field, constrained by its size in bits
|
||||
*/
|
||||
public class FieldSizeMatcher extends AbstractExpressionMatcher<PatternValue> {
|
||||
protected final ExpressionMatcher<?> sizeMatcher;
|
||||
|
||||
public FieldSizeMatcher(ExpressionMatcher<?> sizeMatcher) {
|
||||
super(Set.of(ContextField.class, TokenField.class));
|
||||
this.sizeMatcher = sizeMatcher;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDetails(PatternValue expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
if (expression instanceof ContextField) {
|
||||
ContextField cf = (ContextField) expression;
|
||||
long size = cf.getEndBit() - cf.getStartBit() + 1;
|
||||
return sizeMatcher.match(new ConstantValue(size), result);
|
||||
}
|
||||
if (expression instanceof TokenField) {
|
||||
TokenField tf = (TokenField) expression;
|
||||
long size = tf.getBitEnd() - tf.getBitStart() + 1;
|
||||
return sizeMatcher.match(new ConstantValue(size), result);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.OperandValueSolver;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.OperandValue;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
|
||||
/**
|
||||
* A matcher for a constructor's operand value, constrained by its defining expression
|
||||
*/
|
||||
public class OperandValueMatcher extends AbstractExpressionMatcher<OperandValue> {
|
||||
protected final ExpressionMatcher<?> defMatcher;
|
||||
|
||||
public OperandValueMatcher(ExpressionMatcher<?> defMatcher) {
|
||||
super(OperandValue.class);
|
||||
this.defMatcher = defMatcher;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDetails(OperandValue expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
OperandSymbol opSym = expression.getConstructor().getOperand(expression.getIndex());
|
||||
return defMatcher.match(OperandValueSolver.getDefiningExpression(opSym), result);
|
||||
}
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.expr.match;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.UnaryExpression;
|
||||
|
||||
/**
|
||||
* A matcher for a unnary expression
|
||||
*
|
||||
* <p>
|
||||
* If the required type matches, the matching descends to the child operand.
|
||||
*
|
||||
* @param <T> the type of expression matched
|
||||
*/
|
||||
public class UnaryExpressionMatcher<T extends UnaryExpression>
|
||||
extends AbstractExpressionMatcher<T> {
|
||||
protected final ExpressionMatcher<?> unaryMatcher;
|
||||
|
||||
public UnaryExpressionMatcher(Set<Class<? extends T>> ops, ExpressionMatcher<?> unaryMatcher) {
|
||||
super(ops);
|
||||
this.unaryMatcher = unaryMatcher;
|
||||
}
|
||||
|
||||
public UnaryExpressionMatcher(Class<T> cls, ExpressionMatcher<?> unaryMatcher) {
|
||||
super(cls);
|
||||
this.unaryMatcher = unaryMatcher;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDetails(T expression,
|
||||
Map<ExpressionMatcher<?>, PatternExpression> result) {
|
||||
return unaryMatcher.match(expression.getUnary(), result);
|
||||
}
|
||||
}
|
@ -34,16 +34,17 @@ import ghidra.generic.util.datastruct.TreeSetValuedTreeMap;
|
||||
/**
|
||||
* Defines a context-free grammar, usually for the purpose of parsing mnemonic assembly instructions
|
||||
*
|
||||
* As in classic computer science, a CFG consists of productions of non-terminals and terminals.
|
||||
* The left-hand side of the a production must be a single non-terminal, but the right-hand side
|
||||
* may be any string of symbols. To avoid overloading the term "String," here we call it a
|
||||
* "Sentential."
|
||||
* <p>
|
||||
* As in classic computer science, a CFG consists of productions of non-terminals and terminals. The
|
||||
* left-hand side of the a production must be a single non-terminal, but the right-hand side may be
|
||||
* any string of symbols. To avoid overloading the term "String," here we call it a "Sentential."
|
||||
*
|
||||
* <p>
|
||||
* To define a grammar, simply construct an appropriate subclass (probably {@link AssemblyGrammar})
|
||||
* and call {@link #addProduction(AbstractAssemblyProduction)} or
|
||||
* {@link #addProduction(AssemblyNonTerminal, AssemblySentential)}. The grammar object will collect
|
||||
* the non-terminals and terminals.
|
||||
* {@link #addProduction(AssemblyNonTerminal, AssemblySentential)}.
|
||||
*
|
||||
* <p>
|
||||
* By default, the start symbol is taken from the left-hand side of the first production added to
|
||||
* the grammar.
|
||||
*
|
||||
@ -71,6 +72,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Add a production to the grammar
|
||||
*
|
||||
* @param lhs the left-hand side
|
||||
* @param rhs the right-hand side
|
||||
*/
|
||||
@ -81,6 +83,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Add a production to the grammar
|
||||
*
|
||||
* @param prod the production
|
||||
*/
|
||||
public void addProduction(P prod) {
|
||||
@ -96,7 +99,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
String lhsName = lhs.getName();
|
||||
symbols.put(lhsName, lhs);
|
||||
nonterminals.put(lhsName, lhs);
|
||||
for (AssemblySymbol sym : prod) {
|
||||
for (AssemblySymbol sym : prod.getRHS()) {
|
||||
if (sym instanceof AssemblyNonTerminal) {
|
||||
@SuppressWarnings("unchecked")
|
||||
NT nt = (NT) sym;
|
||||
@ -115,14 +118,15 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Check if the given production is purely recursive, i.e., of the form I => I
|
||||
*
|
||||
* @param prod the production to check
|
||||
* @return true iff the production is purely recursive
|
||||
*/
|
||||
protected boolean isPureRecursive(P prod) {
|
||||
if (prod.size() != 1) {
|
||||
if (prod.getRHS().size() != 1) {
|
||||
return false;
|
||||
}
|
||||
if (!prod.getLHS().equals(prod.getRHS().get(0))) {
|
||||
if (!prod.getLHS().equals(prod.getRHS().getSymbol(0))) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -130,6 +134,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Change the start symbol for the grammar
|
||||
*
|
||||
* @param nt the new start symbol
|
||||
*/
|
||||
public void setStart(AssemblyNonTerminal nt) {
|
||||
@ -138,6 +143,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Change the start symbol for the grammar
|
||||
*
|
||||
* @param startName the name of the new start symbol
|
||||
*/
|
||||
public void setStartName(String startName) {
|
||||
@ -146,6 +152,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get the start symbol for the grammar
|
||||
*
|
||||
* @return the start symbol
|
||||
*/
|
||||
public NT getStart() {
|
||||
@ -154,6 +161,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get the name of the start symbol for the grammar
|
||||
*
|
||||
* @return the name of the start symbol
|
||||
*/
|
||||
public String getStartName() {
|
||||
@ -162,6 +170,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get the named non-terminal
|
||||
*
|
||||
* @param name the name of the desired non-terminal
|
||||
* @return the non-terminal, or null if it is not in this grammar
|
||||
*/
|
||||
@ -171,6 +180,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get the named terminal
|
||||
*
|
||||
* @param name the name of the desired terminal
|
||||
* @return the terminal, or null if it is not in this grammar
|
||||
*/
|
||||
@ -180,6 +190,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Add all the productions of a given grammar to this one
|
||||
*
|
||||
* @param that the grammar whose productions to add
|
||||
*/
|
||||
public void combine(AbstractAssemblyGrammar<NT, P> that) {
|
||||
@ -190,6 +201,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Print the productions of this grammar to the given stream
|
||||
*
|
||||
* @param out the stream
|
||||
*/
|
||||
public void print(PrintStream out) {
|
||||
@ -201,17 +213,19 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
/**
|
||||
* Check that the grammar is consistent
|
||||
*
|
||||
* The grammar is consistent if every non-terminal appearing in the grammar, also appears as
|
||||
* the left-hand side of some production. If not, such non-terminals are said to be undefined.
|
||||
* <p>
|
||||
* The grammar is consistent if every non-terminal appearing in the grammar also appears as the
|
||||
* left-hand side of some production. If not, such non-terminals are said to be undefined.
|
||||
*
|
||||
* @throws AssemblyGrammarException the grammar is inconsistent, i.e., contains undefined
|
||||
* non-terminals.
|
||||
* non-terminals.
|
||||
*/
|
||||
public void verify() throws AssemblyGrammarException {
|
||||
if (!productions.containsKey(startName)) {
|
||||
throw new AssemblyGrammarException("Start symbol has no defining production");
|
||||
}
|
||||
for (P prod : productions.values()) {
|
||||
for (AssemblySymbol sym : prod) {
|
||||
for (AssemblySymbol sym : prod.getRHS()) {
|
||||
if (sym instanceof AssemblyNonTerminal) {
|
||||
AssemblyNonTerminal nt = (AssemblyNonTerminal) sym;
|
||||
if (!(productions.containsKey(nt.getName()))) {
|
||||
@ -233,6 +247,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get the non-terminals
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Collection<NT> nonTerminals() {
|
||||
@ -241,6 +256,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get the terminals
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Collection<AssemblyTerminal> terminals() {
|
||||
@ -249,6 +265,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get all productions where the left-hand side non-terminal has the given name
|
||||
*
|
||||
* @param name the name of the non-terminal
|
||||
* @return all productions "defining" the named non-terminal
|
||||
*/
|
||||
@ -261,6 +278,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Get all productions where the left-hand side is the given non-terminal
|
||||
*
|
||||
* @param nt the non-terminal whose defining productions to find
|
||||
* @return all productions "defining" the given non-terminal
|
||||
*/
|
||||
@ -270,6 +288,7 @@ public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P
|
||||
|
||||
/**
|
||||
* Check if the grammar contains any symbol with the given name
|
||||
*
|
||||
* @param name the name to find
|
||||
* @return true iff a terminal or non-terminal has the given name
|
||||
*/
|
||||
|
@ -15,12 +15,7 @@
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.grammars;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.collections4.list.AbstractListDecorator;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
|
||||
|
||||
/**
|
||||
* Defines a production in a context-free grammar, usually for parsing mnemonic assembly
|
||||
@ -29,7 +24,6 @@ import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
|
||||
* @param <NT> the type of non-terminals
|
||||
*/
|
||||
public abstract class AbstractAssemblyProduction<NT extends AssemblyNonTerminal>
|
||||
extends AbstractListDecorator<AssemblySymbol>
|
||||
implements Comparable<AbstractAssemblyProduction<NT>> {
|
||||
private final NT lhs;
|
||||
private final AssemblySentential<NT> rhs;
|
||||
@ -38,6 +32,7 @@ public abstract class AbstractAssemblyProduction<NT extends AssemblyNonTerminal>
|
||||
|
||||
/**
|
||||
* Construct a production with the given LHS and RHS
|
||||
*
|
||||
* @param lhs the left-hand side
|
||||
* @param rhs the right-hand side
|
||||
*/
|
||||
@ -47,16 +42,13 @@ public abstract class AbstractAssemblyProduction<NT extends AssemblyNonTerminal>
|
||||
this.rhs = rhs;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<AssemblySymbol> decorated() {
|
||||
return rhs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the index of the production
|
||||
*
|
||||
* Instead of using deep comparison, the index is often used as the identify of the production
|
||||
* <p>
|
||||
* Instead of using deep comparison, the index is often used as the identity of the production
|
||||
* within a grammar.
|
||||
*
|
||||
* @return the index
|
||||
*/
|
||||
public int getIndex() {
|
||||
@ -65,6 +57,7 @@ public abstract class AbstractAssemblyProduction<NT extends AssemblyNonTerminal>
|
||||
|
||||
/**
|
||||
* Get the left-hand side
|
||||
*
|
||||
* @return the LHS
|
||||
*/
|
||||
public NT getLHS() {
|
||||
@ -73,6 +66,7 @@ public abstract class AbstractAssemblyProduction<NT extends AssemblyNonTerminal>
|
||||
|
||||
/**
|
||||
* Get the right-hand side
|
||||
*
|
||||
* @return the RHS
|
||||
*/
|
||||
public AssemblySentential<NT> getRHS() {
|
||||
@ -123,15 +117,12 @@ public abstract class AbstractAssemblyProduction<NT extends AssemblyNonTerminal>
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblySentential<NT> subList(int fromIndex, int toIndex) {
|
||||
return rhs.subList(fromIndex, toIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the "name" of this production
|
||||
*
|
||||
* <p>
|
||||
* This is mostly just notional and for debugging. The name is taken as the name of the LHS.
|
||||
*
|
||||
* @return the name of the LHS
|
||||
*/
|
||||
public String getName() {
|
||||
|
@ -20,9 +20,10 @@ import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyExtendedNonTerminal;
|
||||
/**
|
||||
* Defines an "extended" grammar
|
||||
*
|
||||
* "Extended grammar" as in a grammar extended with state numbers from an LR0 parser.
|
||||
* See <a href="http://web.cs.dal.ca/~sjackson/lalr1.html">LALR(1) Parsing</a> from Stephen Jackson
|
||||
* of Dalhousie University, Halifax, Nova Scotia, Canada.
|
||||
* <p>
|
||||
* "Extended grammar" as in a grammar extended with state numbers from an LR0 parser. See
|
||||
* <a href="http://web.cs.dal.ca/~sjackson/lalr1.html">LALR(1) Parsing</a> from Stephen Jackson of
|
||||
* Dalhousie University, Halifax, Nova Scotia, Canada.
|
||||
*/
|
||||
public class AssemblyExtendedGrammar
|
||||
extends AbstractAssemblyGrammar<AssemblyExtendedNonTerminal, AssemblyExtendedProduction> {
|
||||
|
@ -29,6 +29,7 @@ public class AssemblyExtendedProduction
|
||||
|
||||
/**
|
||||
* Construct an extended production based on the given ancestor
|
||||
*
|
||||
* @param lhs the extended left-hand side
|
||||
* @param rhs the extended right-hand side
|
||||
* @param finalState the end state of the final symbol of the RHS
|
||||
@ -49,6 +50,7 @@ public class AssemblyExtendedProduction
|
||||
|
||||
/**
|
||||
* Get the final state of this production
|
||||
*
|
||||
* @return the end state of the last symbol of the RHS
|
||||
*/
|
||||
public int getFinalState() {
|
||||
@ -57,6 +59,7 @@ public class AssemblyExtendedProduction
|
||||
|
||||
/**
|
||||
* Get the original production from which this production was derived
|
||||
*
|
||||
* @return the original production
|
||||
*/
|
||||
public AssemblyProduction getAncestor() {
|
||||
|
@ -17,8 +17,6 @@ package ghidra.app.plugin.assembler.sleigh.grammars;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.collections4.map.LazyMap;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
|
||||
import ghidra.app.plugin.processors.sleigh.Constructor;
|
||||
@ -27,6 +25,7 @@ import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
|
||||
/**
|
||||
* Defines a context free grammar, used to parse mnemonic assembly instructions
|
||||
*
|
||||
* <p>
|
||||
* This stores the CFG and the associated semantics for each production. It also has mechanisms for
|
||||
* tracking "purely recursive" productions. These are productions of the form I => I, and they
|
||||
* necessarily create ambiguity. Thus, when constructing a parser, it is useful to identify them
|
||||
@ -35,8 +34,10 @@ import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
|
||||
public class AssemblyGrammar
|
||||
extends AbstractAssemblyGrammar<AssemblyNonTerminal, AssemblyProduction> {
|
||||
// a nested map of semantics by production, by constructor
|
||||
protected final Map<AssemblyProduction, Map<Constructor, AssemblyConstructorSemantic>> semantics =
|
||||
LazyMap.lazyMap(new TreeMap<>(), () -> new TreeMap<>());
|
||||
protected final Map<AssemblyProduction, Map<Constructor, AssemblyConstructorSemantic>> semanticsByProduction =
|
||||
new TreeMap<>();
|
||||
protected final Map<Constructor, AssemblyConstructorSemantic> semanticsByConstructor =
|
||||
new HashMap<>();
|
||||
// a map of purely recursive, e.g., I => I, productions by name of LHS
|
||||
protected final Map<String, AssemblyProduction> pureRecursive = new TreeMap<>();
|
||||
|
||||
@ -58,6 +59,7 @@ public class AssemblyGrammar
|
||||
|
||||
/**
|
||||
* Add a production associated with a SLEIGH constructor semantic
|
||||
*
|
||||
* @param lhs the left-hand side
|
||||
* @param rhs the right-hand side
|
||||
* @param pattern the pattern associated with the constructor
|
||||
@ -68,27 +70,32 @@ public class AssemblyGrammar
|
||||
DisjointPattern pattern, Constructor cons, List<Integer> indices) {
|
||||
AssemblyProduction prod = newProduction(lhs, rhs);
|
||||
addProduction(prod);
|
||||
Map<Constructor, AssemblyConstructorSemantic> map = semantics.get(prod);
|
||||
AssemblyConstructorSemantic sem = map.get(cons);
|
||||
if (sem == null) {
|
||||
sem = new AssemblyConstructorSemantic(cons, indices);
|
||||
map.put(cons, sem);
|
||||
}
|
||||
else if (!indices.equals(sem.getOperandIndices())) {
|
||||
Map<Constructor, AssemblyConstructorSemantic> map =
|
||||
semanticsByProduction.computeIfAbsent(prod, p -> new TreeMap<>());
|
||||
AssemblyConstructorSemantic sem =
|
||||
map.computeIfAbsent(cons, c -> new AssemblyConstructorSemantic(cons, indices));
|
||||
if (!indices.equals(sem.getOperandIndices())) {
|
||||
throw new IllegalStateException(
|
||||
"Productions of the same constructor must have same operand indices");
|
||||
}
|
||||
semanticsByConstructor.put(cons, sem);
|
||||
|
||||
sem.addPattern(pattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the semantics associated with a given production
|
||||
*
|
||||
* @param prod the production
|
||||
* @return all semantics associated with the given production
|
||||
*/
|
||||
public Collection<AssemblyConstructorSemantic> getSemantics(AssemblyProduction prod) {
|
||||
return Collections.unmodifiableCollection(semantics.get(prod).values());
|
||||
return Collections.unmodifiableCollection(
|
||||
semanticsByProduction.computeIfAbsent(prod, p -> new TreeMap<>()).values());
|
||||
}
|
||||
|
||||
public AssemblyConstructorSemantic getSemantic(Constructor cons) {
|
||||
return semanticsByConstructor.get(cons);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -96,13 +103,15 @@ public class AssemblyGrammar
|
||||
super.combine(that);
|
||||
if (that instanceof AssemblyGrammar) {
|
||||
AssemblyGrammar ag = (AssemblyGrammar) that;
|
||||
this.semantics.putAll(ag.semantics);
|
||||
this.semanticsByProduction.putAll(ag.semanticsByProduction);
|
||||
this.semanticsByConstructor.putAll(ag.semanticsByConstructor);
|
||||
this.pureRecursive.putAll(ag.pureRecursive);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all productions in the grammar that are purely recursive
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Collection<AssemblyProduction> getPureRecursive() {
|
||||
@ -111,6 +120,7 @@ public class AssemblyGrammar
|
||||
|
||||
/**
|
||||
* Obtain, if present, the purely recursive production having the given LHS
|
||||
*
|
||||
* @param lhs the left-hand side
|
||||
* @return the desired production, or null
|
||||
*/
|
||||
|
@ -16,8 +16,9 @@
|
||||
package ghidra.app.plugin.assembler.sleigh.grammars;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.collections4.list.AbstractListDecorator;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
|
||||
@ -25,29 +26,29 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
|
||||
/**
|
||||
* A "string" of symbols
|
||||
*
|
||||
* To avoid overloading the word "String", we call this a "sentential". Technically, to be a
|
||||
* <p>
|
||||
* To avoid overloading the word "string", we call this a "sentential". Technically, to be a
|
||||
* "sentential" in the classic sense, it must be a possible element in the derivation of a sentence
|
||||
* in the grammar starting with the start symbol. We ignore that if only for the sake of naming.
|
||||
*
|
||||
* @param <NT> the type of non-terminals
|
||||
*/
|
||||
public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
AbstractListDecorator<AssemblySymbol> implements Comparable<AssemblySentential<NT>> {
|
||||
public class AssemblySentential<NT extends AssemblyNonTerminal>
|
||||
implements Comparable<AssemblySentential<NT>>, Iterable<AssemblySymbol> {
|
||||
private List<AssemblySymbol> symbols;
|
||||
private final List<AssemblySymbol> unmodifiableSymbols;
|
||||
private boolean finished = false;
|
||||
public static final AssemblyStringTerminal WHITE_SPACE = new WhiteSpace();
|
||||
private static final Pattern PAT_COMMA_WS = Pattern.compile(",\\s+");
|
||||
|
||||
/**
|
||||
* Construct a string from the given list of symbols
|
||||
*
|
||||
* @param symbols
|
||||
*/
|
||||
public AssemblySentential(List<? extends AssemblySymbol> symbols) {
|
||||
this.symbols = new ArrayList<>(symbols);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<AssemblySymbol> decorated() {
|
||||
return symbols;
|
||||
this.unmodifiableSymbols = Collections.unmodifiableList(symbols);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -58,19 +59,22 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
*/
|
||||
public AssemblySentential() {
|
||||
this.symbols = new ArrayList<>();
|
||||
this.unmodifiableSymbols = Collections.unmodifiableList(symbols);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a string from any number of symbols
|
||||
*
|
||||
* @param syms
|
||||
*/
|
||||
public AssemblySentential(AssemblySymbol... syms) {
|
||||
this.symbols = Arrays.asList(syms);
|
||||
this.unmodifiableSymbols = Collections.unmodifiableList(symbols);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (symbols.size() == 0) {
|
||||
if (symbols.isEmpty()) {
|
||||
return "e";
|
||||
}
|
||||
Iterator<? extends AssemblySymbol> symIt = symbols.iterator();
|
||||
@ -117,6 +121,7 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
/**
|
||||
* A "whitespace" terminal
|
||||
*
|
||||
* <p>
|
||||
* This terminal represents "optional" whitespace. "Optional" because in certain circumstances,
|
||||
* whitespace is not actually required, i.e., before or after a special character.
|
||||
*/
|
||||
@ -132,7 +137,7 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseToken> match(String buffer, int pos, AssemblyGrammar grammar,
|
||||
Map<String, Long> labels) {
|
||||
AssemblyNumericSymbols symbols) {
|
||||
if (buffer.length() == 0) {
|
||||
return Collections.singleton(new WhiteSpaceParseToken(grammar, this, ""));
|
||||
}
|
||||
@ -158,7 +163,7 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
|
||||
public Collection<String> getSuggestions(String got, AssemblyNumericSymbols symbols) {
|
||||
return Collections.singleton(" ");
|
||||
}
|
||||
}
|
||||
@ -175,6 +180,7 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
/**
|
||||
* The token consumed by a whitespace terminal when it anticipates the end of input
|
||||
*
|
||||
* <p>
|
||||
* "Expected" tokens given by a parse machine when this is the last token it has consumed are
|
||||
* not valid suggestions. The machine should instead suggest a whitespace character.
|
||||
*/
|
||||
@ -185,7 +191,18 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
}
|
||||
|
||||
/**
|
||||
* Add "optional" whitespace, if not already preceded by whitespace
|
||||
* Add a symbol to the right of this sentential
|
||||
*
|
||||
* @param symbol the symbol to add
|
||||
* @return true
|
||||
*/
|
||||
public boolean addSymbol(AssemblySymbol symbol) {
|
||||
return symbols.add(symbol);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add optional whitespace, if not already preceded by whitespace
|
||||
*
|
||||
* @return true if whitespace was added
|
||||
*/
|
||||
public boolean addWS() {
|
||||
@ -193,7 +210,95 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
if (last != null) {
|
||||
return false;
|
||||
}
|
||||
return add(WHITE_SPACE);
|
||||
return addSymbol(WHITE_SPACE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a comma followed by optional whitespace.
|
||||
*/
|
||||
public void addCommaWS() {
|
||||
addSymbol(new AssemblyStringTerminal(","));
|
||||
addWS();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a syntactic terminal element, but with consideration for optional whitespace surrounding
|
||||
* special characters
|
||||
*
|
||||
* @param str the expected terminal
|
||||
*/
|
||||
public void addSeparatorPart(String str) {
|
||||
String tstr = str.trim();
|
||||
if (tstr.equals("")) {
|
||||
addWS();
|
||||
return;
|
||||
}
|
||||
char first = tstr.charAt(0);
|
||||
if (!str.startsWith(tstr)) {
|
||||
addWS();
|
||||
}
|
||||
if (!Character.isLetterOrDigit(first)) {
|
||||
addWS();
|
||||
}
|
||||
addSymbol(new AssemblyStringTerminal(tstr));
|
||||
char last = tstr.charAt(tstr.length() - 1);
|
||||
if (!str.endsWith(tstr)) {
|
||||
addWS();
|
||||
}
|
||||
if (!Character.isLetterOrDigit(last)) {
|
||||
addWS();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the symbols in this sentential
|
||||
*
|
||||
* @return the symbols;
|
||||
*/
|
||||
public List<AssemblySymbol> getSymbols() {
|
||||
return unmodifiableSymbols;
|
||||
}
|
||||
|
||||
public AssemblySymbol getSymbol(int pos) {
|
||||
return symbols.get(pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split the given string into pieces matched by the pattern, and the pieces between
|
||||
*
|
||||
* <p>
|
||||
* This invokes the given callbacks as the string is processed from left to right.
|
||||
*
|
||||
* @param str the string to split
|
||||
* @param pat the pattern to match
|
||||
* @param matched the callback for matched portions
|
||||
* @param unmatched the callback for unmatched portions
|
||||
*/
|
||||
private static void forMatchUnmatch(String str, Pattern pat, Consumer<String> matched,
|
||||
Consumer<String> unmatched) {
|
||||
int startU = 0;
|
||||
Matcher mat = pat.matcher(str);
|
||||
while (mat.find()) {
|
||||
if (startU < mat.start()) {
|
||||
unmatched.accept(str.substring(startU, mat.start()));
|
||||
}
|
||||
matched.accept(mat.group());
|
||||
startU = mat.end();
|
||||
}
|
||||
if (startU < str.length()) {
|
||||
unmatched.accept(str.substring(startU));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a syntactic terminal element, but considering that commas contained within may be
|
||||
* followed by optional whitespace
|
||||
*
|
||||
* @param str the expected terminal
|
||||
*/
|
||||
public void addSeparators(String str) {
|
||||
// NB. When displaying print pieces, the disassembler replaces all ",\\s+" with ","
|
||||
forMatchUnmatch(str, PAT_COMMA_WS, matched -> addCommaWS(), this::addSeparatorPart);
|
||||
}
|
||||
|
||||
// If the right-most symbol is whitespace, return it
|
||||
@ -209,18 +314,31 @@ public class AssemblySentential<NT extends AssemblyNonTerminal> extends
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim leading and trailing whitespace, and make the string immutable
|
||||
* Trim leading and trailing whitespace, and make the sentential immutable
|
||||
*/
|
||||
public void finish() {
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
symbols = Collections.unmodifiableList(symbols);
|
||||
symbols = unmodifiableSymbols;
|
||||
finished = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblySentential<NT> subList(int fromIndex, int toIndex) {
|
||||
public Iterator<AssemblySymbol> iterator() {
|
||||
return unmodifiableSymbols.iterator();
|
||||
}
|
||||
|
||||
public AssemblySentential<NT> sub(int fromIndex, int toIndex) {
|
||||
return new AssemblySentential<>(symbols.subList(fromIndex, toIndex));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of symbols, including whitespace, in this sentential
|
||||
*
|
||||
* @return the number of symbols
|
||||
*/
|
||||
public int size() {
|
||||
return symbols.size();
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ import ghidra.generic.util.datastruct.TreeSetValuedTreeMap;
|
||||
/**
|
||||
* A class to compute the first and follow of every non-terminal in a grammar
|
||||
*
|
||||
* <p>
|
||||
* See Alfred V. Aho, Monica S. Lam, Ravi Sethi, Jeffrey D. Ullman, <i>Compilers: Principles,
|
||||
* Techniques, & Tools</i>. Bostom, MA: Pearson, 2007, pp. 220-2.
|
||||
*/
|
||||
@ -43,6 +44,7 @@ public class AssemblyFirstFollow {
|
||||
|
||||
/**
|
||||
* Compute the first and follow sets for every non-terminal in the given grammar
|
||||
*
|
||||
* @param grammar the grammar
|
||||
*/
|
||||
public AssemblyFirstFollow(AbstractAssemblyGrammar<?, ?> grammar) {
|
||||
@ -61,7 +63,7 @@ public class AssemblyFirstFollow {
|
||||
while (changed) {
|
||||
changed = false;
|
||||
for (AbstractAssemblyProduction<?> prod : grammar) {
|
||||
if (nullable.containsAll(prod)) {
|
||||
if (nullable.containsAll(prod.getRHS().getSymbols())) {
|
||||
changed |= nullable.add(prod.getLHS());
|
||||
}
|
||||
}
|
||||
@ -81,7 +83,7 @@ public class AssemblyFirstFollow {
|
||||
// Add the first of all each symbol
|
||||
// Terminate after a terminal or non-nullable symbol
|
||||
for (AbstractAssemblyProduction<?> prod : grammar) {
|
||||
for (AssemblySymbol sym : prod) {
|
||||
for (AssemblySymbol sym : prod.getRHS()) {
|
||||
if (sym instanceof AssemblyNonTerminal) {
|
||||
AssemblyNonTerminal nt = (AssemblyNonTerminal) sym;
|
||||
changed |= first.putAll(prod.getLHS(), first.get(nt));
|
||||
@ -116,13 +118,13 @@ public class AssemblyFirstFollow {
|
||||
// Finish the subwalk after a terminal or non-nullable symbol
|
||||
// If you hit the end, add follow(LHS) to follow the current symbol
|
||||
for (AbstractAssemblyProduction<?> prod : grammar) {
|
||||
nextX: for (int i = 0; i < prod.size(); i++) {
|
||||
AssemblySymbol px = prod.get(i);
|
||||
nextX: for (int i = 0; i < prod.getRHS().size(); i++) {
|
||||
AssemblySymbol px = prod.getRHS().getSymbol(i);
|
||||
if (px instanceof AssemblyNonTerminal) {
|
||||
AssemblyNonTerminal X = (AssemblyNonTerminal) px;
|
||||
int j;
|
||||
for (j = i + 1; j < prod.size(); j++) {
|
||||
AssemblySymbol B = prod.get(j);
|
||||
for (j = i + 1; j < prod.getRHS().size(); j++) {
|
||||
AssemblySymbol B = prod.getRHS().getSymbol(j);
|
||||
if (B instanceof AssemblyNonTerminal) {
|
||||
AssemblyNonTerminal nt = (AssemblyNonTerminal) B;
|
||||
changed |= follow.putAll(X, first.get(nt));
|
||||
@ -149,7 +151,9 @@ public class AssemblyFirstFollow {
|
||||
/**
|
||||
* Get the nullable set
|
||||
*
|
||||
* <p>
|
||||
* That is the set of all non-terminals, which through some derivation, can produce epsilon.
|
||||
*
|
||||
* @return the set
|
||||
*/
|
||||
public Collection<AssemblyNonTerminal> getNullable() {
|
||||
@ -159,8 +163,10 @@ public class AssemblyFirstFollow {
|
||||
/**
|
||||
* Get the first set for a given non-terminal
|
||||
*
|
||||
* <p>
|
||||
* That is the set of all terminals, which through some derivation from the given non-terminal,
|
||||
* can appear first in a sentential form.
|
||||
*
|
||||
* @param nt the non-terminal
|
||||
* @return the set
|
||||
*/
|
||||
@ -171,8 +177,10 @@ public class AssemblyFirstFollow {
|
||||
/**
|
||||
* Get the follow set for a given non-terminal
|
||||
*
|
||||
* <p>
|
||||
* That is the set of all terminals, which through some derivation from the start symbol, can
|
||||
* appear immediately after the given non-terminal in a sentential form.
|
||||
*
|
||||
* @param nt the non-terminal
|
||||
* @return the set
|
||||
*/
|
||||
@ -182,6 +190,7 @@ public class AssemblyFirstFollow {
|
||||
|
||||
/**
|
||||
* For debugging, print out the computed sets to the given stream
|
||||
*
|
||||
* @param out the stream
|
||||
*/
|
||||
public void print(PrintStream out) {
|
||||
|
@ -40,6 +40,7 @@ public class AssemblyParseAcceptResult extends AssemblyParseResult {
|
||||
|
||||
/**
|
||||
* Get the tree
|
||||
*
|
||||
* @return the tree
|
||||
*/
|
||||
public AssemblyParseBranch getTree() {
|
||||
|
@ -30,11 +30,11 @@ import ghidra.generic.util.datastruct.TreeSetValuedTreeMap;
|
||||
/**
|
||||
* The Action/Goto table for a LALR(1) parser
|
||||
*
|
||||
* <p>
|
||||
* This table is unconventional in that it permits a single cell to be populated by more than one
|
||||
* action. Typically, such a situation would indicate an ambiguity, or the need for a longer
|
||||
* look-ahead value. Because we do not presume to control the grammar (which was automatically
|
||||
* derived from another source), the parsing algorithm will simply branch, eventually trying both
|
||||
* options.
|
||||
* action. Typically, such a situation would indicate ambiguity, or the need for a longer look-ahead
|
||||
* value. Because we do not presume to control the grammar (which was automatically derived from
|
||||
* another source), the parsing algorithm will simply branch, eventually trying both options.
|
||||
*/
|
||||
public class AssemblyParseActionGotoTable {
|
||||
// A map representing the actual (sparse) table
|
||||
@ -45,6 +45,7 @@ public class AssemblyParseActionGotoTable {
|
||||
|
||||
/**
|
||||
* Add an action entry to the given cell
|
||||
*
|
||||
* @param fromState the state (row) in the table
|
||||
* @param next the symbol (column) in the table
|
||||
* @param action the entry to add to the cell
|
||||
@ -59,6 +60,7 @@ public class AssemblyParseActionGotoTable {
|
||||
|
||||
/**
|
||||
* Add a SHIFT (S<i>n</i>) entry to the given cell
|
||||
*
|
||||
* @param fromState the state (row) in the table
|
||||
* @param next the symbol (column) in the table
|
||||
* @param newState the state (<i>n</i>) after the shift is applied
|
||||
@ -70,6 +72,7 @@ public class AssemblyParseActionGotoTable {
|
||||
|
||||
/**
|
||||
* Add a REDUCE (R<i>n</i>) entry to the given cell
|
||||
*
|
||||
* @param fromState the state (row) in the table
|
||||
* @param next the symbol (column) in the table
|
||||
* @param prod the production (having index <i>n</i>) associated with the reduction
|
||||
@ -81,6 +84,7 @@ public class AssemblyParseActionGotoTable {
|
||||
|
||||
/**
|
||||
* Add a GOTO entry to the given cell
|
||||
*
|
||||
* @param fromState the state (row) in the table
|
||||
* @param next the symbol (column) in the table
|
||||
* @param newState the target state
|
||||
@ -92,6 +96,7 @@ public class AssemblyParseActionGotoTable {
|
||||
|
||||
/**
|
||||
* Add an ACCEPT entry for the given state at the end of input
|
||||
*
|
||||
* @param fromState the state (row) in the table
|
||||
* @return true, if the state does not already accept on end of input
|
||||
*/
|
||||
@ -101,6 +106,7 @@ public class AssemblyParseActionGotoTable {
|
||||
|
||||
/**
|
||||
* Get the terminals that are expected, i.e., have entries for the given state
|
||||
*
|
||||
* @param fromState the state (row) in the table
|
||||
* @return the collection of populated columns (terminals) for the given state
|
||||
*/
|
||||
@ -110,6 +116,7 @@ public class AssemblyParseActionGotoTable {
|
||||
|
||||
/**
|
||||
* Get all entries in a given cell
|
||||
*
|
||||
* @param fromState the state (row) in the table
|
||||
* @param next the symbol (column) in the table
|
||||
* @return all action entries in the given cell
|
||||
|
@ -24,16 +24,18 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential.TruncatedW
|
||||
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseActionGotoTable.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.AsmUtil;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
|
||||
|
||||
/**
|
||||
* A class that implements the LALR(1) parsing algorithm
|
||||
*
|
||||
* Instances of this class store a parse state. In order to work correctly, the class must be
|
||||
* given a properly-constructed Action/Goto table.
|
||||
* <p>
|
||||
* Instances of this class store a parse state. In order to work correctly, the class must be given
|
||||
* a properly-constructed Action/Goto table.
|
||||
*
|
||||
* <p>
|
||||
* This implementation is somewhat unconventional. First, instead of strictly tokenizing and then
|
||||
* parsing, each terminal is given the opportunity to match a token in the input. If none match, it
|
||||
* results in a syntax error (equivalent to the token type having an empty cell in the classical
|
||||
@ -62,8 +64,7 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
// The last token we consumed (i.e., last terminal pushed to the stack)
|
||||
protected AssemblyParseToken lastTok;
|
||||
|
||||
// A set of labels that identify valid tokens for some terminals
|
||||
protected final Map<String, Long> labels; // used for label -> number substitution
|
||||
protected final AssemblyNumericSymbols symbols; // used for symbol -> number substitution
|
||||
|
||||
protected boolean accepted = false; // the machine is in the accepted state
|
||||
protected int error = ERROR_NONE; // non-zero if the machine is in an error state
|
||||
@ -74,24 +75,25 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
|
||||
static int nextMachineId = 0;
|
||||
|
||||
static final DbgTimer dbg = DbgTimer.INACTIVE;
|
||||
static final DbgTimer DBG = DbgTimer.INACTIVE;
|
||||
|
||||
/**
|
||||
* Construct a new parse state
|
||||
*
|
||||
* @param parser the parser driving this machine
|
||||
* @param input the full input line
|
||||
* @param pos the position in the line identifying the next characters to parse
|
||||
* @param labels a map of valid tokens to number for numeric terminals
|
||||
*/
|
||||
public AssemblyParseMachine(AssemblyParser parser, String input, int pos,
|
||||
AssemblyParseToken lastTok, Map<String, Long> labels) {
|
||||
AssemblyParseToken lastTok, AssemblyNumericSymbols symbols) {
|
||||
this.parser = parser;
|
||||
this.stack.push(0);
|
||||
this.buffer = input;
|
||||
this.pos = pos;
|
||||
this.lastTok = lastTok;
|
||||
this.id = nextMachineId++;
|
||||
this.labels = labels;
|
||||
this.symbols = symbols;
|
||||
}
|
||||
|
||||
/* ********************************************************************************************
|
||||
@ -155,12 +157,12 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
return result;
|
||||
}
|
||||
|
||||
result = SleighUtil.compareInOrder(this.stack, that.stack);
|
||||
result = AsmUtil.compareInOrder(this.stack, that.stack);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result = SleighUtil.compareInOrder(this.output, that.output);
|
||||
result = AsmUtil.compareInOrder(this.output, that.output);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
@ -182,11 +184,13 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
/**
|
||||
* Duplicate this machine state
|
||||
*
|
||||
* <p>
|
||||
* This is used extensively when branching
|
||||
*
|
||||
* @return the duplicate
|
||||
*/
|
||||
public AssemblyParseMachine copy() {
|
||||
AssemblyParseMachine c = new AssemblyParseMachine(parser, buffer, pos, lastTok, labels);
|
||||
AssemblyParseMachine c = new AssemblyParseMachine(parser, buffer, pos, lastTok, symbols);
|
||||
// leave labels copied by reference
|
||||
|
||||
c.output.clear();
|
||||
@ -201,25 +205,25 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
c.accepted = accepted;
|
||||
c.error = error;
|
||||
|
||||
dbg.println("Copied " + id + " to " + c.id);
|
||||
DBG.println("Copied " + id + " to " + c.id);
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a given action and continue parsing, exhausting all results after the action
|
||||
*
|
||||
* <p>
|
||||
* The visited list prevents infinite loops or stack overflows resulting from consuming epsilon
|
||||
* and going to the same state. Such loops may involve many states.
|
||||
*
|
||||
* @param a the action
|
||||
* @param tok the token given by the terminal (column) of the entry containing this action
|
||||
* @param results a place to store all the parsing results (each must be accept or error state)
|
||||
* @param visited a collection of machine states already visited
|
||||
*
|
||||
* The visited "collection" prevents infinite loops or stack overflows resulting from
|
||||
* "consuming" epsilon and going to the same state. Such loops may involve many states. It is
|
||||
* also defined as a map here for debugging purposes, so that when a loop is detected, we can
|
||||
* print the ID of the first visit.
|
||||
*/
|
||||
protected void doAction(Action a, AssemblyParseToken tok, Set<AssemblyParseMachine> results,
|
||||
Deque<AssemblyParseMachine> visited) {
|
||||
try (DbgCtx dc = dbg.start("Action: " + a)) {
|
||||
try (DbgCtx dc = DBG.start("Action: " + a)) {
|
||||
if (a instanceof ShiftAction) {
|
||||
AssemblyParseMachine m = copy();
|
||||
m.stack.push(((ShiftAction) a).newStateNum);
|
||||
@ -233,15 +237,15 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
AssemblyParseBranch branch = new AssemblyParseBranch(parser.grammar, prod);
|
||||
AssemblyParseMachine m = copy();
|
||||
m.output.add(prod.getIndex());
|
||||
dbg.println("Prod: " + prod);
|
||||
DBG.println("Prod: " + prod);
|
||||
for (@SuppressWarnings("unused")
|
||||
AssemblySymbol sym : prod) {
|
||||
AssemblySymbol sym : prod.getRHS()) {
|
||||
m.stack.pop();
|
||||
branch.addChild(m.treeStack.pop());
|
||||
}
|
||||
for (Action aa : m.parser.actions.get(m.stack.peek(), prod.getLHS())) {
|
||||
GotoAction ga = (GotoAction) aa;
|
||||
dbg.println("Goto: " + ga);
|
||||
DBG.println("Goto: " + ga);
|
||||
AssemblyParseMachine n = m.copy();
|
||||
n.stack.push(ga.newStateNum);
|
||||
n.treeStack.push(branch);
|
||||
@ -258,6 +262,7 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
|
||||
/**
|
||||
* Consume a given terminal (and corresponding token) and continue parsing
|
||||
*
|
||||
* @param t the terminal
|
||||
* @param tok the corresponding token
|
||||
* @param results a place to store all the parsing results
|
||||
@ -265,10 +270,10 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
*/
|
||||
protected void consume(AssemblyTerminal t, AssemblyParseToken tok,
|
||||
Set<AssemblyParseMachine> results, Deque<AssemblyParseMachine> visited) {
|
||||
try (DbgCtx dc = dbg.start("Matched " + t + " " + tok)) {
|
||||
try (DbgCtx dc = DBG.start("Matched " + t + " " + tok)) {
|
||||
Collection<Action> as = parser.actions.get(stack.peek(), t);
|
||||
assert !as.isEmpty();
|
||||
dbg.println("Actions: " + as);
|
||||
DBG.println("Actions: " + as);
|
||||
for (Action a : as) {
|
||||
doAction(a, tok, results, visited);
|
||||
}
|
||||
@ -278,7 +283,9 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
/**
|
||||
* Look for previous machine states having the same stack and position
|
||||
*
|
||||
* <p>
|
||||
* This would imply we have gone in a loop without consuming anything. We need to prune.
|
||||
*
|
||||
* @param machine the machine state to check
|
||||
* @param visited the stack of previous machine states
|
||||
* @return if there is a loop, the machine state proving it, null otherwise
|
||||
@ -307,15 +314,16 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
|
||||
/**
|
||||
* Parse (or continue parsing) all possible trees from this machine state
|
||||
*
|
||||
* @param results a place to store all the parsing results
|
||||
* @param visited a collection of machine states already visited
|
||||
*/
|
||||
protected void exhaust(Set<AssemblyParseMachine> results, Deque<AssemblyParseMachine> visited) {
|
||||
try (DbgCtx dc = dbg.start("Exhausting machine " + id)) {
|
||||
dbg.println("Machine: " + this);
|
||||
try (DbgCtx dc = DBG.start("Exhausting machine " + id)) {
|
||||
DBG.println("Machine: " + this);
|
||||
AssemblyParseMachine loop = findLoop(this, visited);
|
||||
if (loop != null) {
|
||||
dbg.println("Pruned. Loop of " + loop.id);
|
||||
DBG.println("Pruned. Loop of " + loop.id);
|
||||
return;
|
||||
}
|
||||
try (DequePush<?> push = DequePush.push(visited, this)) {
|
||||
@ -332,7 +340,7 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
}
|
||||
Set<AssemblyTerminal> unmatched = new TreeSet<>(terms);
|
||||
for (AssemblyTerminal t : terms) {
|
||||
for (AssemblyParseToken tok : t.match(buffer, pos, parser.grammar, labels)) {
|
||||
for (AssemblyParseToken tok : t.match(buffer, pos, parser.grammar, symbols)) {
|
||||
unmatched.remove(t);
|
||||
assert buffer.regionMatches(pos, tok.getString(), 0,
|
||||
tok.getString().length());
|
||||
@ -350,9 +358,9 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
newExpected = new TreeSet<>();
|
||||
newExpected.add(AssemblySentential.WHITE_SPACE);
|
||||
}
|
||||
dbg.println("Syntax Error: ");
|
||||
dbg.println(" Expected: " + newExpected);
|
||||
dbg.println(" Got: " + buffer.substring(pos));
|
||||
DBG.println("Syntax Error: ");
|
||||
DBG.println(" Expected: " + newExpected);
|
||||
DBG.println(" Got: " + buffer.substring(pos));
|
||||
m.error = ERROR_SYNTAX;
|
||||
m.got = buffer.substring(pos);
|
||||
m.expected = newExpected;
|
||||
@ -365,6 +373,7 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
|
||||
/**
|
||||
* Parse (or continue parsing) all possible trees from this machine state
|
||||
*
|
||||
* @return the set of all possible trees and errors
|
||||
*/
|
||||
public Set<AssemblyParseMachine> exhaust() {
|
||||
@ -376,6 +385,7 @@ public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
|
||||
|
||||
/**
|
||||
* If in the accepted state, get the resulting parse tree for this machine
|
||||
*
|
||||
* @return the parse tree
|
||||
*/
|
||||
public AssemblyParseBranch getTree() {
|
||||
|
@ -22,6 +22,7 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseBranch;
|
||||
/**
|
||||
* A result of parsing a sentence
|
||||
*
|
||||
* <p>
|
||||
* If the sentence was accepted, this yields a parse tree. If not, this describes the error and
|
||||
* provides suggestions to correct the error.
|
||||
*/
|
||||
@ -29,6 +30,7 @@ public abstract class AssemblyParseResult implements Comparable<AssemblyParseRes
|
||||
|
||||
/**
|
||||
* Construct a successful parse result
|
||||
*
|
||||
* @param tree the tree output by the parser
|
||||
*/
|
||||
public static AssemblyParseAcceptResult accept(AssemblyParseBranch tree) {
|
||||
@ -37,6 +39,7 @@ public abstract class AssemblyParseResult implements Comparable<AssemblyParseRes
|
||||
|
||||
/**
|
||||
* Construct an error parse result
|
||||
*
|
||||
* @param got the input buffer when the error occurred
|
||||
* @param suggestions a subset of strings that would have allowed parsing to proceed
|
||||
*/
|
||||
@ -46,6 +49,7 @@ public abstract class AssemblyParseResult implements Comparable<AssemblyParseRes
|
||||
|
||||
/**
|
||||
* Check if the parse result is successful or an error
|
||||
*
|
||||
* @return true if the result describes an error
|
||||
*/
|
||||
public abstract boolean isError();
|
||||
|
@ -17,25 +17,26 @@ package ghidra.app.plugin.assembler.sleigh.parse;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.collections4.set.AbstractSetDecorator;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.AsmUtil;
|
||||
|
||||
/**
|
||||
* A state in an LR(0) parsing machine
|
||||
*
|
||||
* Each item consists of a kernel and an implied closure. Only the kernel is necessary to define
|
||||
* the item, but the whole closure must be considered when deriving new states.
|
||||
* <p>
|
||||
* Each item consists of a kernel and an implied closure. Only the kernel is necessary to define the
|
||||
* item, but the whole closure must be considered when deriving new states. The kernel can be
|
||||
* retrieved and mutated via {@link #getKernel()}, then the closure derived from it via
|
||||
* {@link #getClosure()}.
|
||||
*/
|
||||
public class AssemblyParseState extends AbstractSetDecorator<AssemblyParseStateItem>
|
||||
implements Comparable<AssemblyParseState> {
|
||||
public class AssemblyParseState implements Comparable<AssemblyParseState> {
|
||||
private final AssemblyGrammar grammar;
|
||||
private final Set<AssemblyParseStateItem> kernel = new LinkedHashSet<>();
|
||||
private Set<AssemblyParseStateItem> closure;
|
||||
|
||||
/**
|
||||
* Construct a new state associated with the given grammar
|
||||
*
|
||||
* @param grammar the grammar
|
||||
*/
|
||||
public AssemblyParseState(AssemblyGrammar grammar) {
|
||||
@ -44,6 +45,7 @@ public class AssemblyParseState extends AbstractSetDecorator<AssemblyParseStateI
|
||||
|
||||
/**
|
||||
* Construct a new state associated with the given grammar, seeded with the given item
|
||||
*
|
||||
* @param grammar the grammar
|
||||
* @param item an item in the state
|
||||
*/
|
||||
@ -52,13 +54,18 @@ public class AssemblyParseState extends AbstractSetDecorator<AssemblyParseStateI
|
||||
kernel.add(item);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<AssemblyParseStateItem> decorated() {
|
||||
/**
|
||||
* Get the (mutable) kernel for this state
|
||||
*
|
||||
* @return the kernel
|
||||
*/
|
||||
public Set<AssemblyParseStateItem> getKernel() {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the closure of this item, caching the result
|
||||
*
|
||||
* @return the closure
|
||||
*/
|
||||
public Set<AssemblyParseStateItem> getClosure() {
|
||||
@ -93,7 +100,7 @@ public class AssemblyParseState extends AbstractSetDecorator<AssemblyParseStateI
|
||||
return result;
|
||||
}
|
||||
// This only works because TreeSet presents the items in order
|
||||
result = SleighUtil.compareInOrder(this.kernel, that.kernel);
|
||||
result = AsmUtil.compareInOrder(this.kernel, that.kernel);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
|
||||
/**
|
||||
* An item in the state of an LR(0) parser
|
||||
*
|
||||
* <p>
|
||||
* An item is a production with a dot indicating a position while parsing
|
||||
*/
|
||||
public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem> {
|
||||
@ -32,6 +33,7 @@ public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem
|
||||
|
||||
/**
|
||||
* Construct a new item starting at the far left of the given production
|
||||
*
|
||||
* @param prod the production
|
||||
*/
|
||||
public AssemblyParseStateItem(AssemblyProduction prod) {
|
||||
@ -41,19 +43,21 @@ public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem
|
||||
/**
|
||||
* Construct a new item starting immediately before the symbol at the given position in the
|
||||
* given production
|
||||
*
|
||||
* @param prod the production
|
||||
* @param pos the position of the dot
|
||||
*/
|
||||
public AssemblyParseStateItem(AssemblyProduction prod, int pos) {
|
||||
this.prod = prod;
|
||||
this.pos = pos;
|
||||
if (pos > prod.size()) {
|
||||
if (pos > prod.getRHS().size()) {
|
||||
throw new AssertionError("INTERNAL: Attempt to advance beyond end of RHS");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance the dot by one position, producing a new item
|
||||
*
|
||||
* @return the new item
|
||||
*/
|
||||
public AssemblyParseStateItem read() {
|
||||
@ -63,20 +67,24 @@ public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem
|
||||
/**
|
||||
* Get the symbol immediately to the right of the dot
|
||||
*
|
||||
* <p>
|
||||
* This is the symbol which must be matched to advance the dot.
|
||||
*
|
||||
* @return the symbol, or null if the item is completed, i.e., the dot is at the far right
|
||||
*/
|
||||
public AssemblySymbol getNext() {
|
||||
if (completed()) {
|
||||
return null;
|
||||
}
|
||||
return prod.get(pos);
|
||||
return prod.getRHS().getSymbol(pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* "Fill" one step out to close a state containing this item
|
||||
*
|
||||
* <p>
|
||||
* To compute the full closure, you must continue stepping out until no new items are generated
|
||||
*
|
||||
* @param grammar the grammar containing the production
|
||||
* @return a subset of items in the closure of a state containing this item
|
||||
*/
|
||||
@ -137,8 +145,9 @@ public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
AssemblySentential<?> prec = prod.subList(0, pos);
|
||||
AssemblySentential<?> proc = prod.subList(pos, prod.size());
|
||||
AssemblySentential<?> rhs = prod.getRHS();
|
||||
AssemblySentential<?> prec = rhs.sub(0, pos);
|
||||
AssemblySentential<?> proc = rhs.sub(pos, rhs.size());
|
||||
StringBuilder sb = new StringBuilder(prod.getIndex() + ". " + prod.getLHS() + " => ");
|
||||
if (prec.size() != 0) {
|
||||
sb.append(prec + " ");
|
||||
@ -153,18 +162,22 @@ public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem
|
||||
/**
|
||||
* Check if this item is completed
|
||||
*
|
||||
* <p>
|
||||
* The item is completed if all symbols have been matched, i.e., the dot is at the far right of
|
||||
* the production.
|
||||
*
|
||||
* @return true iff the item is completed
|
||||
*/
|
||||
public boolean completed() {
|
||||
return (pos == prod.size());
|
||||
return (pos == prod.getRHS().size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the position of the dot
|
||||
*
|
||||
* <p>
|
||||
* The position is the number of symbols to the left of the dot.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getPos() {
|
||||
@ -173,6 +186,7 @@ public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem
|
||||
|
||||
/**
|
||||
* Get the production associated with this item
|
||||
*
|
||||
* @return the production
|
||||
*/
|
||||
public AssemblyProduction getProduction() {
|
||||
|
@ -32,13 +32,15 @@ public class AssemblyParseTransitionTable {
|
||||
|
||||
/**
|
||||
* Put an entry into the state machine
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> Generally, if this returns non-null, something is probably wrong with your LR(0)
|
||||
* machine generator
|
||||
*
|
||||
* @param fromState the source state
|
||||
* @param next the symbol that is matched
|
||||
* @param newState the destination state
|
||||
* @return the previous value for newState
|
||||
*
|
||||
* NOTE: Generally, if this return non-null, something is probably wrong with your LR(0)
|
||||
* machine generator
|
||||
*/
|
||||
public Integer put(int fromState, AssemblySymbol next, int newState) {
|
||||
return map.put(new TableEntryKey(fromState, next), newState);
|
||||
@ -46,6 +48,7 @@ public class AssemblyParseTransitionTable {
|
||||
|
||||
/**
|
||||
* Get an entry from the state machine
|
||||
*
|
||||
* @param fromState the source state
|
||||
* @param next the symbol that has been matched
|
||||
* @return the destination state
|
||||
@ -56,6 +59,7 @@ public class AssemblyParseTransitionTable {
|
||||
|
||||
/**
|
||||
* Traverse every entry in the table, invoking {@link Consumer#accept(Object)} on each
|
||||
*
|
||||
* @param consumer the callback
|
||||
*/
|
||||
public void forEach(Consumer<TableEntry<Integer>> consumer) {
|
||||
|
@ -31,16 +31,20 @@ import ghidra.app.plugin.assembler.sleigh.util.TableEntry;
|
||||
/**
|
||||
* A class to encapsulate LALR(1) parsing for a given grammar
|
||||
*
|
||||
* This class constructs the Action/Goto table (and all the other trappings) of a LALR(1) parser
|
||||
* and provides a {@link #parse(String)} method to parse actual sentences.
|
||||
* <p>
|
||||
* This class constructs the Action/Goto table (and all the other trappings) of a LALR(1) parser and
|
||||
* provides a {@link #parse(String)} method to parse actual sentences.
|
||||
*
|
||||
* <p>
|
||||
* This implementation is somewhat unconventional in that it permits ambiguous grammars. Instead of
|
||||
* complaining, it produces the set of all possible parse trees. Of course, this comes at the cost
|
||||
* of some efficiency.
|
||||
*
|
||||
* <p>
|
||||
* See Alfred V. Aho, Monica S. Lam, Ravi Sethi, Jeffrey D. Ullman, <i>Compilers: Principles,
|
||||
* Techniques, & Tools</i>. Bostom, MA: Pearson, 2007.
|
||||
* Techniques, & Tools</i>. Boston, MA: Pearson, 2007.
|
||||
*
|
||||
* <p>
|
||||
* See Jackson, Stephen. <a href="http://web.cs.dal.ca/~sjackson/lalr1.html">LALR(1) Parsing</a>.
|
||||
* Halifax, Nova Scotia, Canada: Dalhousie University.
|
||||
* <http://web.cs.dal.ca/~sjackson/lalr1.html>
|
||||
@ -61,15 +65,15 @@ public class AssemblyParser {
|
||||
// the LALR(1) Action/Goto table
|
||||
protected AssemblyParseActionGotoTable actions;
|
||||
|
||||
/** A convenience to specify no labels in {@link #parse(String, Map)} */
|
||||
public static final Map<String, Long> EMPTY_LABELS =
|
||||
Collections.unmodifiableMap(new HashMap<String, Long>());
|
||||
|
||||
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
|
||||
protected static final boolean dbg_detail = false;
|
||||
/**
|
||||
* Change this to {@link DbgTimer#ACTIVE} for verbose diagnostics
|
||||
*/
|
||||
protected static final DbgTimer DBG = DbgTimer.INACTIVE;
|
||||
protected static final boolean DBG_DETAIL = false;
|
||||
|
||||
/**
|
||||
* Construct a LALR(1) parser from the given grammar
|
||||
*
|
||||
* @param grammar the grammar
|
||||
*/
|
||||
public AssemblyParser(AssemblyGrammar grammar) {
|
||||
@ -86,39 +90,39 @@ public class AssemblyParser {
|
||||
grammar.addProduction(start, new AssemblySentential<>(grammar.getStart(), AssemblyEOI.EOI));
|
||||
grammar.setStart(start);
|
||||
|
||||
try (DbgCtx dc = dbg.start("Computing First/Follow for General Grammar")) {
|
||||
try (DbgCtx dc = DBG.start("Computing First/Follow for General Grammar")) {
|
||||
this.ff = new AssemblyFirstFollow(grammar);
|
||||
if (dbg_detail) {
|
||||
printGeneralFF(dbg);
|
||||
if (DBG_DETAIL) {
|
||||
printGeneralFF(DBG);
|
||||
}
|
||||
}
|
||||
|
||||
try (DbgCtx dc = dbg.start("Computing LR0 States and Transition Table")) {
|
||||
try (DbgCtx dc = DBG.start("Computing LR0 States and Transition Table")) {
|
||||
buildLR0Machine();
|
||||
if (dbg_detail) {
|
||||
printLR0States(dbg);
|
||||
printLR0TransitionTable(dbg);
|
||||
if (DBG_DETAIL) {
|
||||
printLR0States(DBG);
|
||||
printLR0TransitionTable(DBG);
|
||||
}
|
||||
}
|
||||
|
||||
try (DbgCtx dc = dbg.start("Computing Extended Grammar")) {
|
||||
try (DbgCtx dc = DBG.start("Computing Extended Grammar")) {
|
||||
buildExtendedGrammar();
|
||||
if (dbg_detail) {
|
||||
printExtendedGrammar(dbg);
|
||||
if (DBG_DETAIL) {
|
||||
printExtendedGrammar(DBG);
|
||||
}
|
||||
}
|
||||
|
||||
try (DbgCtx dc = dbg.start("Computing First/Follow for Extended Grammar")) {
|
||||
try (DbgCtx dc = DBG.start("Computing First/Follow for Extended Grammar")) {
|
||||
this.extff = new AssemblyFirstFollow(extendedGrammar);
|
||||
if (dbg_detail) {
|
||||
printExtendedFF(dbg);
|
||||
if (DBG_DETAIL) {
|
||||
printExtendedFF(DBG);
|
||||
}
|
||||
}
|
||||
|
||||
try (DbgCtx dc = dbg.start("Computing Parse Table")) {
|
||||
try (DbgCtx dc = DBG.start("Computing Parse Table")) {
|
||||
buildActionGotoTable();
|
||||
if (dbg_detail) {
|
||||
printParseTable(dbg);
|
||||
if (DBG_DETAIL) {
|
||||
printParseTable(DBG);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -145,7 +149,7 @@ public class AssemblyParser {
|
||||
AssemblySymbol sym = item.getNext();
|
||||
if (sym != null) {
|
||||
AssemblyParseStateItem ni = item.read();
|
||||
go.get(sym).add(ni);
|
||||
go.get(sym).getKernel().add(ni);
|
||||
}
|
||||
}
|
||||
// Now, add the appropriate entries to the transition table
|
||||
@ -158,10 +162,12 @@ public class AssemblyParser {
|
||||
|
||||
/**
|
||||
* Add a newly-constructed LR0 state, and return it's assigned number
|
||||
*
|
||||
* <p>
|
||||
* If the state already exists, this just returns its previously assigned number
|
||||
*
|
||||
* @param state the newly-constructed state
|
||||
* @return the assigned number
|
||||
*
|
||||
* If the state already exists, this just returns its previously assigned number
|
||||
*/
|
||||
protected int addLR0State(AssemblyParseState state) {
|
||||
int num = states.indexOf(state);
|
||||
@ -188,6 +194,7 @@ public class AssemblyParser {
|
||||
|
||||
/**
|
||||
* Extend a production, using the given LR0 start state
|
||||
*
|
||||
* @param prod the production to extend
|
||||
* @param start the starting LR0 state
|
||||
* @return the extended production, if the start state is valid for it
|
||||
@ -195,17 +202,17 @@ public class AssemblyParser {
|
||||
protected AssemblyExtendedProduction extend(AssemblyProduction prod, int start) {
|
||||
AssemblySentential<AssemblyExtendedNonTerminal> extR = new AssemblySentential<>();
|
||||
int curState = start;
|
||||
for (AssemblySymbol sym : prod) {
|
||||
for (AssemblySymbol sym : prod.getRHS()) {
|
||||
int nextState = table.get(curState, sym);
|
||||
if (sym instanceof AssemblyTerminal) {
|
||||
extR.add(sym);
|
||||
extR.addSymbol(sym);
|
||||
}
|
||||
else if (sym instanceof AssemblyNonTerminal) {
|
||||
extR.add(new AssemblyExtendedNonTerminal(curState, (AssemblyNonTerminal) sym,
|
||||
extR.addSymbol(new AssemblyExtendedNonTerminal(curState, (AssemblyNonTerminal) sym,
|
||||
nextState));
|
||||
}
|
||||
else {
|
||||
throw new RuntimeException(
|
||||
throw new AssertionError(
|
||||
"Internal error: all AssemblySymbols must be either terminal or non-terminal");
|
||||
}
|
||||
curState = nextState;
|
||||
@ -265,7 +272,7 @@ public class AssemblyParser {
|
||||
// Make $ accept on any state with a completed start item.
|
||||
nextState: for (i = 0; i < states.size(); i++) {
|
||||
AssemblyParseState state = states.get(i);
|
||||
for (AssemblyParseStateItem item : state) {
|
||||
for (AssemblyParseStateItem item : state.getKernel()) {
|
||||
if (item.completed() && item.getProduction().getLHS().getName().equals("$S")) {
|
||||
actions.putAccept(i);
|
||||
continue nextState;
|
||||
@ -340,24 +347,28 @@ public class AssemblyParser {
|
||||
|
||||
/**
|
||||
* Parse the given sentence
|
||||
*
|
||||
* @param input the sentence to parse
|
||||
* @return all possible parse trees (and possible errors)
|
||||
*/
|
||||
public Iterable<AssemblyParseResult> parse(final String input) {
|
||||
return parse(input, EMPTY_LABELS);
|
||||
return parse(input, AssemblyNumericSymbols.EMPTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the given sentence with the given defined labels
|
||||
*
|
||||
* <p>
|
||||
* The tokenizer for numeric terminals also accepts any key in {@code labels}. In such cases,
|
||||
* the resulting token is assigned the value of the label.
|
||||
*
|
||||
* @param input the sentence to parser
|
||||
* @param labels a map of label to number substitutions
|
||||
* @return all possible parse results (trees and errors)
|
||||
*
|
||||
* The tokenizer for numeric terminals also accepts any key in {@code labels.} In such cases,
|
||||
* the resulting token is assigned the value of the label.
|
||||
*/
|
||||
public Collection<AssemblyParseResult> parse(final String input, Map<String, Long> labels) {
|
||||
AssemblyParseMachine init = new AssemblyParseMachine(this, input, 0, null, labels);
|
||||
public Collection<AssemblyParseResult> parse(final String input,
|
||||
AssemblyNumericSymbols symbols) {
|
||||
AssemblyParseMachine init = new AssemblyParseMachine(this, input, 0, null, symbols);
|
||||
Set<AssemblyParseMachine> results = init.exhaust();
|
||||
|
||||
Set<AssemblyParseResult> ret = new LinkedHashSet<>();
|
||||
@ -368,7 +379,7 @@ public class AssemblyParser {
|
||||
else if (m.error != 0) {
|
||||
Set<String> suggestions = new TreeSet<>();
|
||||
for (AssemblyTerminal t : m.expected) {
|
||||
suggestions.addAll(t.getSuggestions(m.got, labels));
|
||||
suggestions.addAll(t.getSuggestions(m.got, symbols));
|
||||
}
|
||||
ret.add(AssemblyParseResult.error(m.got, suggestions));
|
||||
}
|
||||
@ -395,11 +406,11 @@ public class AssemblyParser {
|
||||
for (int i = 0; i < states.size(); i++) {
|
||||
AssemblyParseState state = states.get(i);
|
||||
out.println("I" + i);
|
||||
for (AssemblyParseStateItem item : state) {
|
||||
for (AssemblyParseStateItem item : state.getKernel()) {
|
||||
out.println("K: " + item);
|
||||
}
|
||||
for (AssemblyParseStateItem item : state.getClosure()) {
|
||||
if (!state.contains(item)) {
|
||||
if (!state.getKernel().contains(item)) {
|
||||
out.println("C: " + item);
|
||||
}
|
||||
}
|
||||
@ -519,6 +530,7 @@ public class AssemblyParser {
|
||||
|
||||
/**
|
||||
* Get the grammar used to construct this parser
|
||||
*
|
||||
* @return the grammar
|
||||
*/
|
||||
public AssemblyGrammar getGrammar() {
|
||||
|
@ -0,0 +1,88 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
|
||||
/**
|
||||
* Base for a node in an assembly prototype
|
||||
*/
|
||||
public abstract class AbstractAssemblyState {
|
||||
protected static final DbgTimer DBG = AssemblyTreeResolver.DBG;
|
||||
|
||||
protected final AssemblyTreeResolver resolver;
|
||||
protected final List<AssemblyConstructorSemantic> path;
|
||||
protected final int shift;
|
||||
protected final int length;
|
||||
|
||||
protected final int hash;
|
||||
|
||||
/**
|
||||
* Construct a node
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param path the path to this node for diagnostics
|
||||
* @param shift the (right) shift in bytes for this operand
|
||||
* @param length the length of this operand
|
||||
*/
|
||||
protected AbstractAssemblyState(AssemblyTreeResolver resolver,
|
||||
List<AssemblyConstructorSemantic> path, int shift, int length) {
|
||||
this.resolver = resolver;
|
||||
this.path = path;
|
||||
this.shift = shift;
|
||||
this.length = length;
|
||||
|
||||
this.hash = computeHash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre compute this nodes hash
|
||||
*
|
||||
* @return the hash
|
||||
*/
|
||||
public abstract int computeHash();
|
||||
|
||||
@Override
|
||||
public abstract boolean equals(Object obj);
|
||||
|
||||
/**
|
||||
* Generate machine (partial) code for this node
|
||||
*
|
||||
* @param fromRight the accumulated patterns thus far, from the right sibling or left-most child
|
||||
* @param errors a place to collect error reports
|
||||
* @return the stream of generated patterns, as accumulated
|
||||
*/
|
||||
protected abstract Stream<AssemblyResolvedPatterns> resolve(AssemblyResolvedPatterns fromRight,
|
||||
Collection<AssemblyResolvedError> errors);
|
||||
|
||||
/**
|
||||
* Get the length in bytes of the operand represented by this node
|
||||
*
|
||||
* @return the length
|
||||
*/
|
||||
public int getLength() {
|
||||
return length;
|
||||
}
|
||||
}
|
@ -0,0 +1,112 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseTreeNode;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
|
||||
/**
|
||||
* Base class for generating prototype nodes ("states") from a parse tree node
|
||||
*
|
||||
* @param <N> the type of parse tree node to process
|
||||
*/
|
||||
public abstract class AbstractAssemblyStateGenerator<N extends AssemblyParseTreeNode> {
|
||||
protected static final DbgTimer DBG = AssemblyTreeResolver.DBG;
|
||||
|
||||
/**
|
||||
* Context to pass along as states are generated
|
||||
*/
|
||||
protected static class GeneratorContext {
|
||||
|
||||
/**
|
||||
* Render the path as a printable string
|
||||
*
|
||||
* @param path the path
|
||||
* @return the string
|
||||
*/
|
||||
public static String pathToString(List<AssemblyConstructorSemantic> path) {
|
||||
return "[" +
|
||||
path.stream().map(sem -> sem.getLocation()).collect(Collectors.joining(",")) + "]";
|
||||
}
|
||||
|
||||
final List<AssemblyConstructorSemantic> path;
|
||||
final int shift;
|
||||
|
||||
/**
|
||||
* Construct a context
|
||||
*
|
||||
* @param path the path of constructors, for diagnostics
|
||||
* @param shift the (right) shift in bytes of the operand whose state is being generated
|
||||
*/
|
||||
public GeneratorContext(List<AssemblyConstructorSemantic> path, int shift) {
|
||||
this.path = List.copyOf(path);
|
||||
this.shift = shift;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a context suitable for descent into an operand
|
||||
*
|
||||
* @param cons the parent constructor
|
||||
* @param shift the shift offset of the operand
|
||||
* @return the context
|
||||
*/
|
||||
public GeneratorContext push(AssemblyConstructorSemantic cons, int shift) {
|
||||
List<AssemblyConstructorSemantic> path = new ArrayList<>(this.path);
|
||||
path.add(cons);
|
||||
return new GeneratorContext(path, this.shift + shift);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a debug line
|
||||
*
|
||||
* @param string the message
|
||||
*/
|
||||
public void dbg(String string) {
|
||||
DBG.println(pathToString(path) + ":" + string);
|
||||
}
|
||||
}
|
||||
|
||||
protected final AssemblyTreeResolver resolver;
|
||||
protected final N node;
|
||||
protected final AssemblyResolvedPatterns fromLeft;
|
||||
|
||||
/**
|
||||
* Construct a generator
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param node the node from which to generate states
|
||||
* @param fromLeft the accumulated patterns from the left sibling or the parent
|
||||
*/
|
||||
public AbstractAssemblyStateGenerator(AssemblyTreeResolver resolver, N node,
|
||||
AssemblyResolvedPatterns fromLeft) {
|
||||
this.resolver = resolver;
|
||||
this.node = node;
|
||||
this.fromLeft = fromLeft;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate states
|
||||
*
|
||||
* @param gc the generator context for this node
|
||||
* @return the stream of prototypes, each including accumulated patterns
|
||||
*/
|
||||
public abstract Stream<AssemblyGeneratedPrototype> generate(GeneratorContext gc);
|
||||
}
|
@ -0,0 +1,214 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.ConstructState;
|
||||
|
||||
/**
|
||||
* The state corresponding to a sub-table operand
|
||||
*
|
||||
* <p>
|
||||
* This is roughly analogous to {@link ConstructState}, but for assembly. It records the assembly
|
||||
* semantic, i.e., SLEIGH constructor, and the child states, one for each operand in the
|
||||
* constructor. It's implementation of {@link #resolve(AssemblyResolvedPatterns, Collection)}
|
||||
* encapsulates, perhaps the very kernel of, machine-code generation. Operands can have there own
|
||||
* complexity, but most of the core machine-code concepts of SLEIGH are handled by constructors.
|
||||
*/
|
||||
public class AssemblyConstructState extends AbstractAssemblyState {
|
||||
|
||||
/**
|
||||
* Compute the farthest end byte (exclusive) among the given operands
|
||||
*
|
||||
* @param operands the operands
|
||||
* @return the farthest end byte
|
||||
*/
|
||||
protected static int computeEnd(List<AbstractAssemblyState> operands) {
|
||||
return operands.stream()
|
||||
.map(s -> s.shift + s.length)
|
||||
.reduce(0, Integer::max);
|
||||
}
|
||||
|
||||
protected final AssemblyConstructorSemantic sem;
|
||||
protected final List<AbstractAssemblyState> children;
|
||||
|
||||
/**
|
||||
* Construct the state for a selected SLEIGH constructor of a sub-table operand
|
||||
*
|
||||
* <p>
|
||||
* The operand's length is computed from the constructors length and the shifts and lengths of
|
||||
* its generated operands.
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param path the path for diagnostics
|
||||
* @param shift the (right) shift of this operand
|
||||
* @param sem the selected SLEIGH constructor
|
||||
* @param children the child state for each operand in the constructor
|
||||
*/
|
||||
public AssemblyConstructState(AssemblyTreeResolver resolver,
|
||||
List<AssemblyConstructorSemantic> path, int shift,
|
||||
AssemblyConstructorSemantic sem, List<AbstractAssemblyState> children) {
|
||||
super(resolver, path, shift,
|
||||
Integer.max(computeEnd(children) - shift, sem.cons.getMinimumLength()));
|
||||
this.sem = sem;
|
||||
this.children = children;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int computeHash() {
|
||||
return Objects.hash(getClass(), shift, sem, children);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (!(obj instanceof AssemblyConstructState)) {
|
||||
return false;
|
||||
}
|
||||
AssemblyConstructState that = (AssemblyConstructState) obj;
|
||||
if (this.resolver != that.resolver) {
|
||||
return false;
|
||||
}
|
||||
if (this.shift != that.shift) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(this.sem, that.sem)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(this.children, that.children)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return sem.getLocation() + "[" +
|
||||
children.stream().map(s -> s.toString()).collect(Collectors.joining(",")) + "]";
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* <p>
|
||||
* Currently, this is used to generate machine-code from a generated assembly instruction
|
||||
* prototype, but it is not used to apply recursive constructors, i.e., for prefix generation.
|
||||
* TODO: That should change. This performs the reverse of the machine-code parsing process, both
|
||||
* in concept and in implementation. First, it descends to the children. Each child is a
|
||||
* {@link AbstractAssemblyState}, i.e., either another constructor, or a value operand. (There
|
||||
* are also specializations for dealing with hidden constructor and value operands.) Then it
|
||||
* solves context changes, in the reverse order of the specification. Finally, it applies the
|
||||
* patterns, in order to satisfy the constraints specified by the constructor. As a final
|
||||
* detail, it records, for diagnostic purposes, the intermediate child patterns into the parent
|
||||
* pattern.
|
||||
*/
|
||||
@Override
|
||||
protected Stream<AssemblyResolvedPatterns> resolve(AssemblyResolvedPatterns fromRight,
|
||||
Collection<AssemblyResolvedError> errors) {
|
||||
String desc = "Resolving constructor: " + sem.getLocation();
|
||||
return resolveRemainingChildren(fromRight, errors, children)
|
||||
.flatMap(fromChildren -> resolveMutations(fromChildren, errors))
|
||||
.flatMap(fromMutations -> resolvePatterns(fromMutations, errors))
|
||||
.map(pat -> pat.parent(desc, children.size()).withConstructor(sem.cons));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply each possible pattern for the selected constructor
|
||||
*
|
||||
* @param fromMutations the assembly pattern after mutations were solved
|
||||
* @param errors a place to collect errors
|
||||
* @return the stream of patterns, as accumulated with {@code fromMutations}
|
||||
*/
|
||||
protected Stream<AssemblyResolvedPatterns> resolvePatterns(
|
||||
AssemblyResolvedPatterns fromMutations, Collection<AssemblyResolvedError> errors) {
|
||||
return sem.getPatterns()
|
||||
.stream()
|
||||
.map(pat -> {
|
||||
DBG.println(path + ": Constructor pattern: " + pat.lineToString());
|
||||
DBG.println(path + ": Current pattern: " + fromMutations.lineToString());
|
||||
AssemblyResolvedPatterns combined = fromMutations.combine(pat.shift(shift));
|
||||
//DBG.println("Combined pattern: " + combined);
|
||||
return combined;
|
||||
})
|
||||
.filter(ar -> {
|
||||
if (ar == null) {
|
||||
errors.add(AssemblyResolution.error("Pattern conflict",
|
||||
"Resolving " + sem.getLocation() + " in " + path));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Solve the mutations for the selected constructor
|
||||
*
|
||||
* @param fromChildren the assembly pattern as accumulated from the left-most child
|
||||
* @param errors a place to collect errors
|
||||
* @return the stream of patterns, as accumulated with {@code fromChildren}
|
||||
*/
|
||||
protected Stream<AssemblyResolvedPatterns> resolveMutations(
|
||||
AssemblyResolvedPatterns fromChildren, Collection<AssemblyResolvedError> errors) {
|
||||
AssemblyResolution ar = sem.solveContextChanges(fromChildren, resolver.vals);
|
||||
if (ar.isError()) {
|
||||
errors.add((AssemblyResolvedError) ar);
|
||||
return Stream.of();
|
||||
}
|
||||
if (ar.isBackfill()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
AssemblyResolvedPatterns pat = (AssemblyResolvedPatterns) ar;
|
||||
return Stream.of(pat.solveContextChangesForForbids(sem, resolver.vals));
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive function from resolving all children right-to-left and accumulating the patterns
|
||||
*
|
||||
* <p>
|
||||
* This pops the right-most child in {@code children}, resolves it, and then recurses, passing
|
||||
* the accumulated patterns in as {@code fromRight} with the remaining children.
|
||||
* {@link Stream#flatMap(java.util.function.Function)} makes this somewhat fluent, given the
|
||||
* possibility of multiple resolutions.
|
||||
*
|
||||
* @param fromRight the assembly pattern as accumulated from the right sibling. If this is the
|
||||
* right-most sibling, then this is the pattern accumulated from the parent's right
|
||||
* sibling, as so on. If no such sibling exists, it is the unrestricted (empty)
|
||||
* pattern.
|
||||
* @param errors a place to collect errors
|
||||
* @param children the remaining children to resolve
|
||||
* @return the stream of accumulated patterns
|
||||
*/
|
||||
protected Stream<AssemblyResolvedPatterns> resolveRemainingChildren(
|
||||
AssemblyResolvedPatterns fromRight, Collection<AssemblyResolvedError> errors,
|
||||
List<AbstractAssemblyState> children) {
|
||||
|
||||
// Need to resolve children (as they apply context changes) from right to left
|
||||
if (children.isEmpty()) {
|
||||
return Stream.of(fromRight);
|
||||
}
|
||||
|
||||
AbstractAssemblyState rightMost = children.get(children.size() - 1);
|
||||
return rightMost.resolve(fromRight, errors).flatMap(fromChild -> {
|
||||
return resolveRemainingChildren(fromChild, errors,
|
||||
children.subList(0, children.size() - 1));
|
||||
});
|
||||
}
|
||||
}
|
@ -0,0 +1,212 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseBranch;
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseTreeNode;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.AsmUtil;
|
||||
import ghidra.app.plugin.processors.sleigh.Constructor;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
|
||||
/**
|
||||
* The generator of {@link AssemblyConstructState} from {@link AssemblyParseBranch}
|
||||
*
|
||||
* <p>
|
||||
* In short, this handles the selection of each possible constructor for the production recorded by
|
||||
* a given parse branch.
|
||||
*/
|
||||
public class AssemblyConstructStateGenerator
|
||||
extends AbstractAssemblyStateGenerator<AssemblyParseBranch> {
|
||||
|
||||
/**
|
||||
* Construct the instruction state generator or a sub-table operand state generator
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param node the node from which to generate states
|
||||
* @param fromLeft the accumulated patterns from the left sibling or the parent
|
||||
*/
|
||||
public AssemblyConstructStateGenerator(AssemblyTreeResolver resolver, AssemblyParseBranch node,
|
||||
AssemblyResolvedPatterns fromLeft) {
|
||||
super(resolver, node, fromLeft);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<AssemblyGeneratedPrototype> generate(GeneratorContext gc) {
|
||||
AssemblyProduction production = node.getProduction();
|
||||
return resolver.grammar.getSemantics(production)
|
||||
.stream()
|
||||
.flatMap(sem -> applyConstructor(gc, sem));
|
||||
}
|
||||
|
||||
/**
|
||||
* Arrange the branch's (mnemonic) children according to the machine-code production
|
||||
*
|
||||
* <p>
|
||||
* This orders the parsed children so that each is readily paired to its operand as given by
|
||||
* {@link Constructor#getOperand(int)}.
|
||||
*
|
||||
* @param sem the SLEIGH constructor whose machine-code production to consider
|
||||
* @return the children arranged in constructor operand order
|
||||
*/
|
||||
protected List<AssemblyParseTreeNode> orderOpNodes(AssemblyConstructorSemantic sem) {
|
||||
Constructor cons = sem.getConstructor();
|
||||
List<AssemblyParseTreeNode> result =
|
||||
Arrays.asList(new AssemblyParseTreeNode[cons.getNumOperands()]);
|
||||
int index = 0;
|
||||
AssemblyProduction production = node.getProduction();
|
||||
List<AssemblyParseTreeNode> substitutions = node.getSubstitutions();
|
||||
for (int i = 0; i < production.getRHS().size(); i++) {
|
||||
AssemblySymbol sym = production.getRHS().getSymbol(i);
|
||||
if (!sym.takesOperandIndex()) {
|
||||
continue;
|
||||
}
|
||||
result.set(sem.getOperandIndex(index), substitutions.get(i));
|
||||
index++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate prototypes, considering the given SLEIGH constructor
|
||||
*
|
||||
* <p>
|
||||
* This comprises three steps: apply patterns, apply context changes, apply operands
|
||||
* left-to-right.
|
||||
*
|
||||
* @param gc the generator context for this node
|
||||
* @param sem the SLEIGH constructor to apply
|
||||
* @return the stream of generated (sub) prototypes
|
||||
*/
|
||||
protected Stream<AssemblyGeneratedPrototype> applyConstructor(GeneratorContext gc,
|
||||
AssemblyConstructorSemantic sem) {
|
||||
Stream<AssemblyResolvedPatterns> applied = sem.applyPatternsForward(gc.shift, fromLeft)
|
||||
.filter(pat -> {
|
||||
if (pat == null) {
|
||||
gc.dbg("Conflicting pattern. fromLeft=" + fromLeft + ",sem=" +
|
||||
sem.getLocation());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
.map(pat -> sem.applyContextChangesForward(resolver.vals, pat));
|
||||
List<AssemblyParseTreeNode> opOrdered = orderOpNodes(sem);
|
||||
return applied.flatMap(
|
||||
patterned -> applyOperands(gc, patterned, sem, opOrdered));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate prototypes by considering all the operands of the given SLEIGH constructor
|
||||
*
|
||||
* <p>
|
||||
* This is the last step of applying a constructor.
|
||||
*
|
||||
* @param gc the generator context for this node
|
||||
* @param fromMutations the patterns as accumulated after context changes
|
||||
* @param sem the selected SLEIGH constructor
|
||||
* @param opOrdered the parsed children ordered as the constructor's operands
|
||||
* @return the stream of generated (sub) prototypes
|
||||
*/
|
||||
protected Stream<AssemblyGeneratedPrototype> applyOperands(GeneratorContext gc,
|
||||
AssemblyResolvedPatterns fromMutations, AssemblyConstructorSemantic sem,
|
||||
List<AssemblyParseTreeNode> opOrdered) {
|
||||
Constructor cons = sem.getConstructor();
|
||||
List<GeneratorContext> siblingGcs =
|
||||
Arrays.asList(new GeneratorContext[cons.getNumOperands()]);
|
||||
return applyRemainingOperands(gc, siblingGcs, fromMutations, sem, opOrdered, List.of());
|
||||
}
|
||||
|
||||
/**
|
||||
* A recursive function for generating child operand prototypes and constructing the parent(s)
|
||||
*
|
||||
* <p>
|
||||
* The implementation generates states for the left-most node not yet considered. It knows which
|
||||
* is next by examining the length of {@code children}, which records the generated state for
|
||||
* each child already considered. It then appends the result to {@code children} and recurses,
|
||||
* using the resulting patterns as {@code fromLeft}. Given that multiple prototypes can be
|
||||
* generated, {@link Stream#flatMap(java.util.function.Function)} makes the recursive invocation
|
||||
* somewhat fluent. The base case occurs when all children have states generated. It constructs
|
||||
* the state for this node, storing the generated children with it.
|
||||
*
|
||||
* <p>
|
||||
* This routine is also operative in computing shifts, since the offset of each operand is
|
||||
* incorporated here. Two accessors are needed to compute the offset:
|
||||
* {@link OperandSymbol#getOffsetBase()} and {@link OperandSymbol#getRelativeOffset()}. The
|
||||
* former identifies which operand's end (exclusive) byte is the base of the offset. The latter
|
||||
* specifies an additional number of bytes to the right. Consider an operand consisting of three
|
||||
* operands, each consuming a 1-byte token.
|
||||
*
|
||||
* <pre>
|
||||
* +-----+-----+-----+
|
||||
* | op0 | op1 | op2 |
|
||||
* +-----+-----+-----+
|
||||
* ^-1 ^0 ^1 ^2
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* A base offset of 0 would indicate that the overall offset is the end of op0 (relative to the
|
||||
* parent op) plus the relative offset. A base offset of -1 is special, but is easy to
|
||||
* conceptualize from the diagram. It indicates the beginning byte of the parent op. Thus every
|
||||
* child operand boundary is numbered. The offset base must always refer to an operand to the
|
||||
* left.
|
||||
*
|
||||
* @param parentGc the generator context for othis node
|
||||
* @param childGcs a list to collect the generator context for each child operand. The root
|
||||
* invocation should pass a fixed-length mutable list of nulls, one for each child.
|
||||
* @param fromLeft the accumulated patterns from the left sibling. The root invocation should
|
||||
* pass the patterns accumulated after context changes.
|
||||
* @param sem the selected SLEIGH constructor, whose operands to generate
|
||||
* @param opOrdered the paresd children ordered as the constructor's operands
|
||||
* @param children the list of children generated so far. The root invocation should pass the
|
||||
* empty list.
|
||||
* @return the stream of generated (sub) prototypes
|
||||
*/
|
||||
protected Stream<AssemblyGeneratedPrototype> applyRemainingOperands(GeneratorContext parentGc,
|
||||
List<GeneratorContext> childGcs, AssemblyResolvedPatterns fromLeft,
|
||||
AssemblyConstructorSemantic sem, List<AssemblyParseTreeNode> opOrdered,
|
||||
List<AbstractAssemblyState> children) {
|
||||
Constructor cons = sem.getConstructor();
|
||||
int opIdx = children.size();
|
||||
if (opIdx == cons.getNumOperands()) {
|
||||
// We're done!
|
||||
return Stream.of(new AssemblyGeneratedPrototype(
|
||||
new AssemblyConstructState(resolver, parentGc.path, parentGc.shift, sem, children),
|
||||
fromLeft));
|
||||
}
|
||||
AssemblyParseTreeNode opNode = opOrdered.get(opIdx);
|
||||
OperandSymbol opSym = cons.getOperand(opIdx);
|
||||
int offset = opSym.getRelativeOffset();
|
||||
int offsetBase = opSym.getOffsetBase();
|
||||
if (-1 != offsetBase) {
|
||||
int baseShift = childGcs.get(offsetBase).shift;
|
||||
int baseLength = children.get(offsetBase).getLength();
|
||||
offset += baseShift - parentGc.shift + baseLength;
|
||||
}
|
||||
|
||||
AbstractAssemblyStateGenerator<?> opGen =
|
||||
resolver.getStateGenerator(opSym, opNode, fromLeft);
|
||||
GeneratorContext opGc = parentGc.push(sem, offset);
|
||||
childGcs.set(opIdx, opGc);
|
||||
return opGen.generate(opGc).flatMap(prot -> {
|
||||
return applyRemainingOperands(parentGc, new ArrayList<>(childGcs), prot.patterns, sem,
|
||||
opOrdered, AsmUtil.extendList(children, prot.state));
|
||||
});
|
||||
}
|
||||
}
|
@ -16,6 +16,7 @@
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.RecursiveDescentSolver;
|
||||
@ -30,19 +31,22 @@ import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol;
|
||||
/**
|
||||
* Describes a SLEIGH constructor semantic
|
||||
*
|
||||
* <p>
|
||||
* These are collected and associated with productions in the grammar based on the given
|
||||
* constructor's print pieces.
|
||||
*/
|
||||
public class AssemblyConstructorSemantic implements Comparable<AssemblyConstructorSemantic> {
|
||||
protected static final RecursiveDescentSolver solver = RecursiveDescentSolver.getSolver();
|
||||
protected static final DbgTimer dbg = AssemblyTreeResolver.dbg;
|
||||
protected static final RecursiveDescentSolver SOLVER = RecursiveDescentSolver.getSolver();
|
||||
protected static final DbgTimer DBG = AssemblyTreeResolver.DBG;
|
||||
|
||||
protected final Set<AssemblyResolvedConstructor> patterns = new HashSet<>();
|
||||
protected final Set<AssemblyResolvedPatterns> patterns = new HashSet<>();
|
||||
protected final Constructor cons;
|
||||
protected final List<Integer> indices;
|
||||
protected final List<ContextChange> contextChanges;
|
||||
protected final List<ContextChange> reversedChanges;
|
||||
|
||||
// A set initialized on first access with forbidden patterns added
|
||||
protected Set<AssemblyResolvedConstructor> upatterns;
|
||||
protected Set<AssemblyResolvedPatterns> upatterns;
|
||||
|
||||
/**
|
||||
* Build a new SLEIGH constructor semantic
|
||||
@ -54,13 +58,28 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
public AssemblyConstructorSemantic(Constructor cons, List<Integer> indices) {
|
||||
this.cons = cons;
|
||||
this.indices = Collections.unmodifiableList(indices);
|
||||
List<ContextChange> changes = new ArrayList<>(cons.getContextChanges());
|
||||
this.contextChanges = List.copyOf(changes);
|
||||
Collections.reverse(changes);
|
||||
this.reversedChanges = List.copyOf(changes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a pattern that would select the constructor
|
||||
*
|
||||
* @param pat the pattern
|
||||
*/
|
||||
public void addPattern(DisjointPattern pat) {
|
||||
addPattern(AssemblyResolution.fromPattern(pat, cons.getMinimumLength(), cons.toString()));
|
||||
addPattern(AssemblyResolution.fromPattern(pat, cons.getMinimumLength(),
|
||||
"Generated constructor pattern " + getLocation(), cons));
|
||||
}
|
||||
|
||||
public void addPattern(AssemblyResolvedConstructor pat) {
|
||||
/**
|
||||
* Record a pattern that would select the constructor
|
||||
*
|
||||
* @param pat the pattern
|
||||
*/
|
||||
public void addPattern(AssemblyResolvedPatterns pat) {
|
||||
if (upatterns != null) {
|
||||
throw new IllegalStateException("Cannot add patterns after a call to getPatterns()");
|
||||
}
|
||||
@ -69,7 +88,26 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return cons.toString() + ":" + patterns.toString();
|
||||
return getLocation();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render the constructor's source location for diagnostics
|
||||
*
|
||||
* @param cons the constructor
|
||||
* @return the location as {@code file:lineno}
|
||||
*/
|
||||
public static String getLocation(Constructor cons) {
|
||||
return cons.getSourceFile() + ":" + cons.getLineno();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render this constructor's source location for diagnostics
|
||||
*
|
||||
* @return the location
|
||||
*/
|
||||
public String getLocation() {
|
||||
return getLocation(cons);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -86,7 +124,7 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
*
|
||||
* @return the patterns
|
||||
*/
|
||||
public Collection<AssemblyResolvedConstructor> getPatterns() {
|
||||
public Collection<AssemblyResolvedPatterns> getPatterns() {
|
||||
if (upatterns == null) {
|
||||
computeAllForbids();
|
||||
}
|
||||
@ -115,6 +153,7 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
/**
|
||||
* Get an iterator over the operand indices
|
||||
*
|
||||
* <p>
|
||||
* If this iterator is advanced for each non-terminal, while simultaneously iterating over the
|
||||
* RHS of the associated production, then this will identify the corresponding operand index for
|
||||
* each non-terminal
|
||||
@ -133,9 +172,9 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
throw new IllegalStateException(
|
||||
"Already computed all forbidden patterns for this constructor");
|
||||
}
|
||||
Set<AssemblyResolvedConstructor> result = new HashSet<>();
|
||||
for (AssemblyResolvedConstructor pat : patterns) {
|
||||
AssemblyResolvedConstructor fpat = withComputedForbids(pat);
|
||||
Set<AssemblyResolvedPatterns> result = new HashSet<>();
|
||||
for (AssemblyResolvedPatterns pat : patterns) {
|
||||
AssemblyResolvedPatterns fpat = withComputedForbids(pat);
|
||||
result.add(fpat);
|
||||
}
|
||||
upatterns = Collections.unmodifiableSet(result);
|
||||
@ -144,6 +183,7 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
/**
|
||||
* Add the list of forbidden patterns to one of the constructor's patterns
|
||||
*
|
||||
* <p>
|
||||
* SLEIGH disambiguates multiple matching pattern by two rules. First, if one is more specific
|
||||
* than ("specializes") another, i.e., it matches on more bits than another pattern, the more
|
||||
* specific pattern is chosen. Second, if the two are equally special, then the one that occurs
|
||||
@ -151,18 +191,20 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
* later-occurring constructor is chosen, we must prevent continued resolution from matching the
|
||||
* more-special or earlier-occurring pattern(s).
|
||||
*
|
||||
* <p>
|
||||
* Essentially, this states, "you may choose any value matching my pattern, except those that
|
||||
* match these forbidden patterns."
|
||||
*
|
||||
* <p>
|
||||
* This takes a given pattern, and searches the rest of the language for any patterns that would
|
||||
* take precedence, and combines them as forbidden patterns with the given pattern.
|
||||
*
|
||||
* @param pat the given pattern
|
||||
* @return the same pattern with forbidden records added
|
||||
*/
|
||||
protected AssemblyResolvedConstructor withComputedForbids(AssemblyResolvedConstructor pat) {
|
||||
protected AssemblyResolvedPatterns withComputedForbids(AssemblyResolvedPatterns pat) {
|
||||
// Forbid anything more specific (or otherwise takes precedence) over me.
|
||||
Set<AssemblyResolvedConstructor> forbids = new HashSet<>();
|
||||
Set<AssemblyResolvedPatterns> forbids = new HashSet<>();
|
||||
SubtableSymbol parent = cons.getParent();
|
||||
|
||||
SleighLanguages.traverseConstructors(parent, new SubtableEntryVisitor() {
|
||||
@ -173,45 +215,58 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
/*
|
||||
/**
|
||||
* I had misunderstood the precedence rules originally.
|
||||
*
|
||||
* 1. If one pattern defines a subset of the other pattern, then the more-specific
|
||||
* one is preferred.
|
||||
* one is preferred.
|
||||
*
|
||||
* 2. Otherwise, preference is by line number
|
||||
*
|
||||
* Thus, I need to check if there is any overlap at all. If not, then I don't
|
||||
* need to worry about forbidding anything.
|
||||
* Then, I'll check if it defines a strict subset, and forbid it if so.
|
||||
* Then, I'll check if it defines a strict overset, and skip the line check if so.
|
||||
* Then, I'll check if its line number *precedes* mine, and forbid it if so.
|
||||
* Thus, I need to check if there is any overlap at all. If not, then I don't need
|
||||
* to worry about forbidding anything. Then, I'll check if it defines a strict
|
||||
* subset, and forbid it if so. Then, I'll check if it defines a strict overset, and
|
||||
* skip the line check if so. Then, I'll check if its line number *precedes* mine,
|
||||
* and forbid it if so.
|
||||
*
|
||||
* (I originally though the pattern with the most bits won, no matter whether or
|
||||
* not those bits overlapped.)
|
||||
* (I originally though the pattern with the most bits won, no matter whether or not
|
||||
* those bits overlapped.)
|
||||
*
|
||||
* There's an additional nuance. Because context is an *input* to the assembler, it
|
||||
* may still cause the selection of a later constructor, despite line number. Thus,
|
||||
* we can't apply the line number rule unless the earlier one also has an overset in
|
||||
* terms of context.
|
||||
*/
|
||||
|
||||
// If the two patterns cannot be combined, then they are disjoint.
|
||||
AssemblyResolvedConstructor sibpat = AssemblyResolution.fromPattern(sibDP,
|
||||
sibcons.getMinimumLength(), "For specialization check");
|
||||
AssemblyResolvedConstructor comb = pat.combine(sibpat);
|
||||
AssemblyResolvedPatterns sibpat = AssemblyResolution.fromPattern(sibDP,
|
||||
sibcons.getMinimumLength(), "For specialization check", sibcons);
|
||||
AssemblyResolvedPatterns comb = pat.combine(sibpat);
|
||||
if (null == comb) {
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
// OK, they overlap. Let's see if its a strict subset
|
||||
if (comb.bitsEqual(sibpat)) {
|
||||
forbids.add(sibpat.withDescription(
|
||||
sibcons + " forbids " + cons + " by pattern specificity"));
|
||||
// My sibling is a strict subset, so it will win the overlap
|
||||
forbids.add(sibpat.withDescription(getLocation(sibcons) + " forbids " +
|
||||
getLocation(cons) + " by pattern specificity"));
|
||||
return CONTINUE;
|
||||
}
|
||||
else if (comb.bitsEqual(pat)) {
|
||||
// I'm a strict subset, so I will no matter the line number
|
||||
// I'm a strict subset, so I will win the overlap
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
// We can't apply the line number rule unless the sibling's context is an overset
|
||||
if (!comb.ctx.equals(pat.ctx)) {
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
// Finally, check the line number
|
||||
if (sibcons.getId() < cons.getId()) {
|
||||
forbids.add(
|
||||
sibpat.withDescription(sibcons + " forbids " + cons + " by rule position"));
|
||||
forbids.add(sibpat.withDescription(getLocation(sibcons) + " forbids " +
|
||||
getLocation(cons) + " by rule position"));
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
@ -226,66 +281,61 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
/**
|
||||
* Solve this constructor's context changes
|
||||
*
|
||||
* <p>
|
||||
* Each value in {@code opvals} must either be a numeric value, e.g., an index from a varnode
|
||||
* list, or another {@link AssemblyResolvedPatterns} for a subconstructor operand.
|
||||
*
|
||||
* <p>
|
||||
* It's helpful to think of the SLEIGH disassembly process here. Normally, once the appropriate
|
||||
* constructor has been identified (by matching patterns), its context changes are applied, and
|
||||
* then its operands parsed (possibly parsing subconstructor operands). Thus, {@code res} can be
|
||||
* thought of as the intermediate result between applying context changes and parsing operands,
|
||||
* except in reverse. The output of this method corresponds to the state before context changes
|
||||
* were applied, i.e., immediately after selecting the constructor. Thus, in reverse, the
|
||||
* context is solved immediately before applying the selected constructor patterns.
|
||||
*
|
||||
* @param res the combined resolution requirements derived from the subconstructors
|
||||
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
|
||||
* @param opvals a map from operand index to operand value
|
||||
* @return the resolution with context changes applied in reverse, or an error
|
||||
*
|
||||
* Each value in {@code opvals} must either be a numeric value, e.g., an index from a
|
||||
* varnode list, or another {@link AssemblyResolvedConstructor} for a subconstructor
|
||||
* operand.
|
||||
*
|
||||
* It's helpful to think of the SLEIGH disassembly process here. Normally, once the
|
||||
* appropriate constructor has been identified (by matching patterns), its context
|
||||
* changes are applied, and then its operands parsed (possibly parsing subconstructor
|
||||
* operands). Thus, {@code res} can be thought of as the intermediate result between
|
||||
* applying context changes and parsing operands, except in reverse. The output of this
|
||||
* method corresponds to the state before context changes were applied, i.e.,
|
||||
* immediately after selecting the constructor. Thus, in reverse, the context is solved
|
||||
* immediately before applying the selected constructor patterns.
|
||||
*
|
||||
* @see AssemblyTreeResolver#resolveSelectedChildren(AssemblyProduction, List, List, Collection)
|
||||
*/
|
||||
public AssemblyResolution solveContextChanges(AssemblyResolvedConstructor res,
|
||||
Map<String, Long> vals, Map<Integer, Object> opvals) {
|
||||
List<ContextChange> contextChanges = cons.getContextChanges();
|
||||
List<ContextChange> reversed = new LinkedList<>();
|
||||
for (ContextChange chg : contextChanges) {
|
||||
reversed.add(0, chg);
|
||||
}
|
||||
for (ContextChange chg : reversed) {
|
||||
public AssemblyResolution solveContextChanges(AssemblyResolvedPatterns res,
|
||||
Map<String, Long> vals) {
|
||||
for (ContextChange chg : reversedChanges) {
|
||||
if (chg instanceof ContextOp) {
|
||||
dbg.println("Current: " + res.lineToString());
|
||||
DBG.println("Current: " + res.lineToString());
|
||||
// This seems backwards. That's because we're going backwards.
|
||||
// This is the "write" location for disassembly.
|
||||
ContextOp cop = (ContextOp) chg;
|
||||
dbg.println("Handling context change: " + cop);
|
||||
DBG.println("Handling context change: " + cop);
|
||||
|
||||
// TODO: Is this res or subres?
|
||||
MaskedLong reqval = res.readContextOp(cop);
|
||||
if (reqval.equals(MaskedLong.UNKS)) {
|
||||
dbg.println("Doesn't affect a current requirement");
|
||||
DBG.println("Doesn't affect a current requirement");
|
||||
continue; // this context change does not satisfy any requirement
|
||||
}
|
||||
dbg.println("'read' " + reqval);
|
||||
DBG.println("'read' " + reqval);
|
||||
|
||||
// Remove the requirement that we just read before trying to solve
|
||||
res = res.maskOut(cop);
|
||||
dbg.println("Masked out: " + res.lineToString());
|
||||
DBG.println("Masked out: " + res.lineToString());
|
||||
|
||||
// Now, solve
|
||||
AssemblyResolution sol = AssemblyTreeResolver.solveOrBackfill(
|
||||
cop.getPatternExpression(), reqval, vals, opvals, res, "Solution to " + cop);
|
||||
dbg.println("Solution: " + sol.lineToString());
|
||||
cop.getPatternExpression(), reqval, vals, res, "Solution to " + cop);
|
||||
DBG.println("Solution: " + sol.lineToString());
|
||||
if (sol.isError()) {
|
||||
AssemblyResolvedError err = (AssemblyResolvedError) sol;
|
||||
return AssemblyResolution.error(err.getError(), res);
|
||||
}
|
||||
|
||||
// Now, forward the new requirements to my parents.
|
||||
if (sol instanceof AssemblyResolvedConstructor) {
|
||||
AssemblyResolvedConstructor solcon = (AssemblyResolvedConstructor) sol;
|
||||
AssemblyResolvedConstructor check = res.combine(solcon);
|
||||
if (sol instanceof AssemblyResolvedPatterns) {
|
||||
AssemblyResolvedPatterns solcon = (AssemblyResolvedPatterns) sol;
|
||||
AssemblyResolvedPatterns check = res.combine(solcon);
|
||||
if (null == check) {
|
||||
return AssemblyResolution.error(
|
||||
"A context change caused a conflict: " + sol, res);
|
||||
@ -296,7 +346,7 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
AssemblyResolvedBackfill solbf = (AssemblyResolvedBackfill) sol;
|
||||
res = res.combine(solbf);
|
||||
}
|
||||
dbg.println("Combined: " + res.lineToString());
|
||||
DBG.println("Combined: " + res.lineToString());
|
||||
}
|
||||
}
|
||||
return res;
|
||||
@ -305,28 +355,45 @@ public class AssemblyConstructorSemantic implements Comparable<AssemblyConstruct
|
||||
/**
|
||||
* Apply just context transformations in the forward (disassembly) direction
|
||||
*
|
||||
* @param outer the state before context changes
|
||||
* @return the state after context changes
|
||||
* <p>
|
||||
* Unlike the usual disassembly process, this method does not take into account any information
|
||||
* from the instruction encoding. Any context bits that depend on it are set to unknown
|
||||
* ({@code x}) in the output. This method is used to pre-compute a context transition graph in
|
||||
* order to quickly resolve purely-recursive semantics on the root constructor table.
|
||||
*
|
||||
* Unlike the usual disassembly process, this method does not take into account any
|
||||
* information from the instruction encoding. Any context bits that depend on it are set
|
||||
* to unknown ({@code x}) in the output. This method is used to pre-compute a context
|
||||
* transition graph in order to quickly resolve purely-recursive semantics on the root
|
||||
* constructor table.
|
||||
* @param fromLeft the state before context changes
|
||||
* @return the state after context changes
|
||||
*/
|
||||
public AssemblyResolvedConstructor applyForward(AssemblyResolvedConstructor outer) {
|
||||
AssemblyResolvedConstructor res = outer;
|
||||
public AssemblyResolvedPatterns applyContextChangesForward(Map<String, Long> vals,
|
||||
AssemblyResolvedPatterns fromLeft) {
|
||||
AssemblyResolvedPatterns res = fromLeft;
|
||||
// TODO: Figure out semantics of ContextCommit. Not sure it matters here.
|
||||
for (ContextChange chg : cons.getContextChanges()) {
|
||||
for (ContextChange chg : contextChanges) {
|
||||
if (chg instanceof ContextOp) {
|
||||
ContextOp cop = (ContextOp) chg;
|
||||
MaskedLong val = solver.valueForResolution(cop.getPatternExpression(), res);
|
||||
MaskedLong val = SOLVER.valueForResolution(cop.getPatternExpression(), vals, res);
|
||||
res = res.writeContextOp(cop, val);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply just the instruction patterns in the forward (disassembly) direction
|
||||
*
|
||||
* @param shift the (right) shift in bytes to apply to the patterns before combining
|
||||
* @param fromLeft the accumulated patterns from the left sibling or parent
|
||||
* @return
|
||||
*/
|
||||
public Stream<AssemblyResolvedPatterns> applyPatternsForward(int shift,
|
||||
AssemblyResolvedPatterns fromLeft) {
|
||||
if (patterns.isEmpty()) {
|
||||
DBG.println("No patterns for " + getLocation() + "?" + "(hash=" +
|
||||
System.identityHashCode(this) + ")");
|
||||
}
|
||||
return patterns.stream().map(pat -> fromLeft.combine(pat.shift(shift)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(AssemblyConstructorSemantic that) {
|
||||
// TODO: This could be better
|
||||
|
@ -33,11 +33,13 @@ import ghidra.graph.algo.DijkstraShortestPathsAlgorithm;
|
||||
/**
|
||||
* A graph of possible context changes via the application of various constructors
|
||||
*
|
||||
* <p>
|
||||
* This is used primarily to find optimal paths for the application of recursive rules, i.e., those
|
||||
* of the form I => I. These cannot be resolved without some form of semantic analysis. The most
|
||||
* notable disadvantage to all of this is that you no longer get all of the possible assemblies,
|
||||
* but only those with the fewest rule applications.
|
||||
* notable disadvantage to all of this is that you no longer get all of the possible assemblies, but
|
||||
* only those with the fewest rule applications.
|
||||
*
|
||||
* <p>
|
||||
* Conceivably, this may also be used to prune some possibilities during semantic resolution of a
|
||||
* parse tree. Even better, it may be possible to derive a grammar which accounts for the context
|
||||
* changes already; however, it's unclear how many rules this will generate, and consequently, how
|
||||
@ -58,12 +60,15 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
/**
|
||||
* Build the context change graph for a given language and grammar
|
||||
*
|
||||
* <p>
|
||||
* The grammar must have been constructed from the given language. The language is used just to
|
||||
* obtain the most common default context.
|
||||
*
|
||||
* <p>
|
||||
* At the moment, this graph only expands the recursive rules at the root constructor table,
|
||||
* i.e., "instruction". Thus, the assembler will not be able to process any language that has
|
||||
* <i>purely</i>-recursive rules at subconstructors.
|
||||
* <i>purely</i>-recursive rules at sub-constructors.
|
||||
*
|
||||
* @param lang the language
|
||||
* @param grammar the grammar derived from the given language
|
||||
*/
|
||||
@ -89,17 +94,16 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the optimal, i.e., fewest, sequences of applications to resolve a given context to
|
||||
* the language's default context.
|
||||
* Compute the optimal, i.e., shortest, sequences of applications to resolve a given context to
|
||||
* another, often the language's default context.
|
||||
*
|
||||
* @param src presumably, the language's default context
|
||||
* @param srcTable the name of the SLEIGH constructor table, presumably "instruction"
|
||||
* @param dst the context block being resolved
|
||||
* @param dstTable the name of the SLEIGH constructor table being resolved
|
||||
* @return a collection of sequences of constructor applications from {@code src} to
|
||||
* {@code dst}
|
||||
* @return a collection of sequences of constructor applications from {@code src} to {@code dst}
|
||||
*
|
||||
* NOTE: For assembly, the sequences will need to be applied right-to-left.
|
||||
* NOTE: For assembly, the sequences will need to be applied right-to-left.
|
||||
*/
|
||||
public Collection<Deque<AssemblyConstructorSemantic>> computeOptimalApplications(
|
||||
AssemblyPatternBlock src, String srcTable, AssemblyPatternBlock dst, String dstTable) {
|
||||
@ -140,6 +144,7 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
/**
|
||||
* Gather all the semantics that can be used as state transitions
|
||||
*
|
||||
* <p>
|
||||
* Currently, only semantics from {@code :^instruction} constructors are taken.
|
||||
*/
|
||||
protected void gatherSemantics() {
|
||||
@ -156,14 +161,16 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
/**
|
||||
* A vertex in a context transition graph
|
||||
*
|
||||
* Each vertex consists of a context block and a (sub)table name
|
||||
* <p>
|
||||
* Each vertex consists of a context block and a (sub-)table name
|
||||
*/
|
||||
protected static class Vertex implements Comparable<Vertex> {
|
||||
protected final AssemblyPatternBlock context;
|
||||
protected final String subtable;
|
||||
|
||||
/**
|
||||
* Construct a new vertex with the given block and subtable name
|
||||
* Construct a new vertex with the given block and sub-table name
|
||||
*
|
||||
* @param context the context
|
||||
* @param subtable the name
|
||||
*/
|
||||
@ -175,10 +182,12 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
/**
|
||||
* Check if this and another vertex "agree"
|
||||
*
|
||||
* This doesn't mean they're equal, but that they share a subtable, and the defined bits of
|
||||
* their context blocks agree.
|
||||
* <p>
|
||||
* This does not mean they are equal, but that they share a sub-table, and the defined bits
|
||||
* of their context blocks agree.
|
||||
*
|
||||
* @param that the other vertex
|
||||
* @return true iff they share subtables and defined bits
|
||||
* @return true iff they share sub-tables and defined bits
|
||||
*/
|
||||
public boolean matches(Vertex that) {
|
||||
if (!this.subtable.equals(that.subtable)) {
|
||||
@ -233,10 +242,10 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
/**
|
||||
* A transition in a context transition graph
|
||||
*
|
||||
* <p>
|
||||
* A transition consists of the constructor whose context changes were applied. The operand
|
||||
* index is included for reference and debugging. If we ever need to process rules with
|
||||
* multiple subconstructors, the operand index explains the subtable name of the destination
|
||||
* vertex.
|
||||
* index is included for reference and debugging. If we ever need to process rules with multiple
|
||||
* sub-constructors, the operand index explains the sub-table name of the destination vertex.
|
||||
*/
|
||||
protected static class Edge implements GEdge<Vertex>, Comparable<Edge> {
|
||||
protected final AssemblyConstructorSemantic sem;
|
||||
@ -247,6 +256,7 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
|
||||
/**
|
||||
* Construct a new transition associated with the given constructor and operand index
|
||||
*
|
||||
* @param sem the constructor semantic
|
||||
* @param op the operand index
|
||||
*/
|
||||
@ -332,7 +342,7 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
cachedVertices.add(from);
|
||||
Set<Edge> result = new HashSet<>();
|
||||
for (AssemblyConstructorSemantic sem : semantics.get(from.subtable)) {
|
||||
for (AssemblyResolvedConstructor rc : sem.patterns) {
|
||||
for (AssemblyResolvedPatterns rc : sem.patterns) {
|
||||
AssemblyPatternBlock pattern = rc.ctx;
|
||||
AssemblyPatternBlock outer = from.context.combine(pattern);
|
||||
if (outer == null) {
|
||||
@ -342,9 +352,9 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
continue;
|
||||
}
|
||||
|
||||
AssemblyResolvedConstructor orc =
|
||||
AssemblyResolution.contextOnly(outer, "For context transition", null);
|
||||
AssemblyResolvedConstructor irc = sem.applyForward(orc);
|
||||
AssemblyResolvedPatterns orc =
|
||||
AssemblyResolution.contextOnly(outer, "For context transition");
|
||||
AssemblyResolvedPatterns irc = sem.applyContextChangesForward(Map.of(), orc);
|
||||
AssemblyPatternBlock inner = irc.getContext();
|
||||
|
||||
Constructor ct = sem.getConstructor();
|
||||
@ -377,6 +387,7 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge
|
||||
/**
|
||||
* This operation is not supported.
|
||||
*
|
||||
* <p>
|
||||
* I could implement this using the cached edges, but that may not be semantically, what a path
|
||||
* computation algorithm actually requires. Instead, I will assume the algorithm only explores
|
||||
* the graph in the same direction as its edges. If not, I will hear about it quickly.
|
||||
|
@ -28,6 +28,7 @@ import ghidra.program.model.listing.DefaultProgramContext;
|
||||
/**
|
||||
* A class that computes the default context for a language, and acts as a pseudo context
|
||||
*
|
||||
* <p>
|
||||
* This class helps maintain context consistency when performing both assembly and disassembly.
|
||||
*/
|
||||
public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgramContext {
|
||||
@ -41,6 +42,7 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr
|
||||
|
||||
/**
|
||||
* Compute the default context at most addresses for the given language
|
||||
*
|
||||
* @param lang the language
|
||||
*/
|
||||
public AssemblyDefaultContext(SleighLanguage lang) {
|
||||
@ -49,6 +51,7 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr
|
||||
|
||||
/**
|
||||
* Compute the default context at the given address for the given language
|
||||
*
|
||||
* @param lang the language
|
||||
* @param at the address
|
||||
*/
|
||||
@ -72,16 +75,23 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr
|
||||
/**
|
||||
* Set the value of the pseudo context register
|
||||
*
|
||||
* <p>
|
||||
* If the provided value has length less than the register, it will be left aligned, and the
|
||||
* remaining bytes will be set to unknown (masked out).
|
||||
*
|
||||
* @param val the value of the register
|
||||
*/
|
||||
public void setContextRegister(byte[] val) {
|
||||
curctx = AssemblyPatternBlock.fromBytes(0, val);
|
||||
}
|
||||
|
||||
public void setContextRegister(AssemblyPatternBlock ctx) {
|
||||
curctx = curctx.combine(ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default value of the context register
|
||||
*
|
||||
* @return the value as a pattern block for assembly
|
||||
*/
|
||||
public AssemblyPatternBlock getDefault() {
|
||||
@ -90,6 +100,7 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr
|
||||
|
||||
/**
|
||||
* Compute the default value of the context register at the given address
|
||||
*
|
||||
* @param addr the addres
|
||||
* @return the value as a pattern block for assembly
|
||||
*/
|
||||
|
@ -0,0 +1,40 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
/**
|
||||
* A tree of generated assembly node states, paired with the resulting patterns
|
||||
*
|
||||
* <p>
|
||||
* This is used as the intermediate result when generating states, since the patterns must be
|
||||
* propagated to each operand as generation proceeds. Usually, the patterns in the final output are
|
||||
* discarded, and machine code generation proceeds using only the state tree.
|
||||
*/
|
||||
public class AssemblyGeneratedPrototype {
|
||||
protected final AbstractAssemblyState state;
|
||||
protected final AssemblyResolvedPatterns patterns;
|
||||
|
||||
public AssemblyGeneratedPrototype(AbstractAssemblyState state,
|
||||
AssemblyResolvedPatterns patterns) {
|
||||
this.state = state;
|
||||
this.patterns = patterns;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return state + " [" + patterns + "]";
|
||||
}
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseTreeNode;
|
||||
import ghidra.app.plugin.processors.sleigh.Constructor;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol;
|
||||
|
||||
/**
|
||||
* The generator of {@link AssemblyConstructState} for a hidden sub-table operand
|
||||
*
|
||||
* <p>
|
||||
* In short, this exhausts all possible constructors in the given sub-table. For well-designed
|
||||
* languages, such exhaustion produces a very small set of possibilities. In general, hidden
|
||||
* sub-table operands are a bad idea.
|
||||
*/
|
||||
public class AssemblyHiddenConstructStateGenerator extends AssemblyConstructStateGenerator {
|
||||
protected final SubtableSymbol subtableSym;
|
||||
|
||||
/**
|
||||
* Construct the hidden sub-table operand state generator
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param node the node from which to generate states
|
||||
* @param fromLeft the accumulated patterns from the left sibling or the parent
|
||||
*/
|
||||
public AssemblyHiddenConstructStateGenerator(AssemblyTreeResolver resolver,
|
||||
SubtableSymbol subtableSym, AssemblyResolvedPatterns fromLeft) {
|
||||
super(resolver, null, fromLeft);
|
||||
this.subtableSym = subtableSym;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<AssemblyGeneratedPrototype> generate(GeneratorContext gc) {
|
||||
return IntStream.range(0, subtableSym.getNumConstructors())
|
||||
.mapToObj(subtableSym::getConstructor)
|
||||
.map(resolver.grammar::getSemantic)
|
||||
.flatMap(sem -> applyConstructor(gc, sem));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<AssemblyParseTreeNode> orderOpNodes(AssemblyConstructorSemantic sem) {
|
||||
// Just provide null operands, since they're hidden, too.
|
||||
Constructor cons = sem.getConstructor();
|
||||
return Arrays.asList(new AssemblyParseTreeNode[cons.getNumOperands()]);
|
||||
}
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
|
||||
public class AssemblyNopState extends AbstractAssemblyState {
|
||||
public AssemblyNopState(AssemblyTreeResolver resolver, List<AssemblyConstructorSemantic> path,
|
||||
int shift, OperandSymbol opSym) {
|
||||
super(resolver, path, shift, opSym.getMinimumLength());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int computeHash() {
|
||||
return "NOP".hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (!(obj instanceof AssemblyNopState)) {
|
||||
return false;
|
||||
}
|
||||
AssemblyNopState that = (AssemblyNopState) obj;
|
||||
if (this.resolver != that.resolver) {
|
||||
return false;
|
||||
}
|
||||
if (this.shift != that.shift) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NOP";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Stream<AssemblyResolvedPatterns> resolve(AssemblyResolvedPatterns fromRight,
|
||||
Collection<AssemblyResolvedError> errors) {
|
||||
return Stream.of(fromRight.nopLeftSibling());
|
||||
}
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
|
||||
/**
|
||||
* The generator of {@link AssemblyOperandState} for a hidden value operand
|
||||
*
|
||||
* <p>
|
||||
* In short, this does nothing, except to hold the place of the operand for diagnostics. Likely, the
|
||||
* "hidden" operand appears in the defining expression of a temporary symbol used in the print
|
||||
* pieces.
|
||||
*/
|
||||
public class AssemblyNopStateGenerator
|
||||
extends AbstractAssemblyStateGenerator<AssemblyParseNumericToken> {
|
||||
protected final OperandSymbol opSym;
|
||||
|
||||
/**
|
||||
* Construct the hidden value operand state generator
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param opSym the operand symbol
|
||||
* @param fromLeft the accumulated patterns from the left sibling or parent
|
||||
*/
|
||||
public AssemblyNopStateGenerator(AssemblyTreeResolver resolver, OperandSymbol opSym,
|
||||
AssemblyResolvedPatterns fromLeft) {
|
||||
super(resolver, null, fromLeft);
|
||||
this.opSym = opSym;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<AssemblyGeneratedPrototype> generate(GeneratorContext gc) {
|
||||
gc.dbg("Generating NOP for " + opSym);
|
||||
return Stream.of(
|
||||
new AssemblyGeneratedPrototype(new AssemblyNopState(resolver, gc.path, gc.shift, opSym),
|
||||
fromLeft));
|
||||
}
|
||||
}
|
@ -0,0 +1,155 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNumericTerminal;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyTerminal;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
|
||||
import ghidra.app.plugin.processors.sleigh.ConstructState;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
|
||||
/**
|
||||
* The state corresponding to a non-sub-table operand
|
||||
*
|
||||
* <p>
|
||||
* This is roughly analogous to {@link ConstructState}, but for assembly. However, it also records
|
||||
* the value of the operand and the actual operand symbol whose value it specifies.
|
||||
*/
|
||||
public class AssemblyOperandState extends AbstractAssemblyState {
|
||||
protected final AssemblyTerminal terminal;
|
||||
protected final long value;
|
||||
protected final OperandSymbol opSym;
|
||||
|
||||
/**
|
||||
* Construct the state for a given operand and selected value
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param path the path for diagnostics
|
||||
* @param shift the (right) shift of this operand
|
||||
* @param terminal the terminal that generated this state
|
||||
* @param value the value of the operand
|
||||
* @param opSym the operand symbol
|
||||
*/
|
||||
public AssemblyOperandState(AssemblyTreeResolver resolver,
|
||||
List<AssemblyConstructorSemantic> path, int shift, AssemblyTerminal terminal,
|
||||
long value, OperandSymbol opSym) {
|
||||
super(resolver, path, shift, opSym.getMinimumLength());
|
||||
this.terminal = terminal;
|
||||
this.value = value;
|
||||
this.opSym = opSym;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int computeHash() {
|
||||
return Objects.hash(getClass(), shift, value, opSym);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (!(obj instanceof AssemblyOperandState)) {
|
||||
return false;
|
||||
}
|
||||
AssemblyOperandState that = (AssemblyOperandState) obj;
|
||||
if (this.resolver != that.resolver) {
|
||||
return false;
|
||||
}
|
||||
if (this.shift != that.shift) {
|
||||
return false;
|
||||
}
|
||||
if (this.value != that.value) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(this.opSym, that.opSym)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return terminal + "=" + value + "(0x" + Long.toHexString(value) + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the size in bits of this operand's value
|
||||
*
|
||||
* <p>
|
||||
* If this operand does not have a strict size, 0 is returned.
|
||||
*
|
||||
* @return the size
|
||||
*/
|
||||
protected int computeBitsize() {
|
||||
if (!(terminal instanceof AssemblyNumericTerminal)) {
|
||||
return 0;
|
||||
}
|
||||
AssemblyNumericTerminal numeric = (AssemblyNumericTerminal) terminal;
|
||||
return numeric.getBitSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Solve the operand's defining expression set equal to the desired value
|
||||
*
|
||||
* @return the resolved patterns, an error, or a backfill
|
||||
*/
|
||||
protected AssemblyResolution solveNumeric() {
|
||||
int bitsize = computeBitsize();
|
||||
PatternExpression symExp = opSym.getDefiningExpression();
|
||||
if (symExp == null) {
|
||||
symExp = opSym.getDefiningSymbol().getPatternExpression();
|
||||
}
|
||||
DBG.println("Equation: " + symExp + " = " + Long.toHexString(value));
|
||||
String desc = "Solution to " + opSym + " in " + Long.toHexString(value) + " = " + symExp;
|
||||
AssemblyResolution sol =
|
||||
AssemblyTreeResolver.solveOrBackfill(symExp, value, bitsize, resolver.vals, null, desc);
|
||||
DBG.println("Solution: " + sol);
|
||||
AssemblyResolution shifted = sol.shift(shift);
|
||||
DBG.println("Shifted: " + shifted);
|
||||
return shifted;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Stream<AssemblyResolvedPatterns> resolve(AssemblyResolvedPatterns fromRight,
|
||||
Collection<AssemblyResolvedError> errors) {
|
||||
try (DbgCtx dc = DBG.start("Resolving " + terminal)) {
|
||||
AssemblyResolution sol = solveNumeric();
|
||||
if (sol.isError()) {
|
||||
errors.add((AssemblyResolvedError) sol);
|
||||
return Stream.of();
|
||||
}
|
||||
if (sol.isBackfill()) {
|
||||
AssemblyResolvedPatterns combined =
|
||||
fromRight.combine((AssemblyResolvedBackfill) sol);
|
||||
return Stream.of(combined.withRight(fromRight));
|
||||
}
|
||||
AssemblyResolution combined = fromRight.combine((AssemblyResolvedPatterns) sol);
|
||||
if (combined == null) {
|
||||
errors.add(
|
||||
AssemblyResolution.error("Pattern/operand conflict", "Resolving " + terminal));
|
||||
return Stream.of();
|
||||
}
|
||||
AssemblyResolvedPatterns pats = (AssemblyResolvedPatterns) combined;
|
||||
// Do not take constructor from right
|
||||
return Stream.of(pats.withRight(fromRight).withConstructor(null));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
|
||||
/**
|
||||
* The generator of {@link AssemblyOperandState} from {@link AssemblyParseNumericToken}
|
||||
*
|
||||
* <p>
|
||||
* In short, this handles generation of a single operand state for the operand and value recorded by
|
||||
* the given parse token.
|
||||
*/
|
||||
public class AssemblyOperandStateGenerator
|
||||
extends AbstractAssemblyStateGenerator<AssemblyParseNumericToken> {
|
||||
protected final OperandSymbol opSym;
|
||||
|
||||
/**
|
||||
* Construct the operand state generator
|
||||
*
|
||||
* @param resolver the resolver
|
||||
* @param node the ndoe from which to generate the state
|
||||
* @param fromLeft the accumulated patterns from the left sibling or parent
|
||||
* @param opSym the operand symbol
|
||||
*/
|
||||
public AssemblyOperandStateGenerator(AssemblyTreeResolver resolver,
|
||||
AssemblyParseNumericToken node, OperandSymbol opSym,
|
||||
AssemblyResolvedPatterns fromLeft) {
|
||||
super(resolver, node, fromLeft);
|
||||
this.opSym = opSym;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<AssemblyGeneratedPrototype> generate(GeneratorContext gc) {
|
||||
return Stream.of(
|
||||
new AssemblyGeneratedPrototype(new AssemblyOperandState(resolver, gc.path, gc.shift,
|
||||
node.getSym(), node.getNumericValue(), opSym), fromLeft));
|
||||
}
|
||||
}
|
@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.SolverException;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.AsmUtil;
|
||||
import ghidra.app.plugin.processors.sleigh.ContextOp;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.ContextField;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.TokenField;
|
||||
@ -31,15 +31,16 @@ import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
|
||||
import ghidra.app.plugin.processors.sleigh.pattern.PatternBlock;
|
||||
import ghidra.program.model.lang.RegisterValue;
|
||||
import ghidra.util.NumericUtilities;
|
||||
import ghidra.util.StringUtilities;
|
||||
|
||||
/**
|
||||
* The analog of {@link PatternBlock}, designed for use by the assembler
|
||||
*
|
||||
* <p>
|
||||
* It is suitable for the assembler because it is represented byte-by-byte, and it offers a number
|
||||
* of useful conversions and operations.
|
||||
*
|
||||
* TODO A lot of this could probably be factored into the {@link PatternBlock} class, but it was
|
||||
* <p>
|
||||
* TODO: A lot of this could probably be factored into the {@link PatternBlock} class, but it was
|
||||
* best to experiment in another class altogether to avoid breaking things.
|
||||
*/
|
||||
public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
@ -53,9 +54,10 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Construct a new pattern block with the given mask, values, and offset
|
||||
*
|
||||
* @param offset an offset (0-up, left-to-right) where the pattern actually starts
|
||||
* @param mask a mask: only {@code 1} bits are included in the pattern
|
||||
* @param vals the value, excluding corresponding {@code 0} bits in the mask
|
||||
* @param mask a mask: only 1 bits are included in the pattern
|
||||
* @param vals the value, excluding corresponding 0 bits in the mask
|
||||
*/
|
||||
protected AssemblyPatternBlock(int offset, byte[] mask, byte[] vals) {
|
||||
assert mask.length == vals.length;
|
||||
@ -66,6 +68,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Construct a new empty pattern block at the given offset, prepared with the given capacity
|
||||
*
|
||||
* @param offset an offset (0-up, left-to-right) where the pattern will start
|
||||
* @param capacity the space to allocate for the mask and values
|
||||
*/
|
||||
@ -77,6 +80,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Get an empty pattern block
|
||||
*
|
||||
* @return the pattern block
|
||||
*/
|
||||
public static AssemblyPatternBlock nop() {
|
||||
@ -85,6 +89,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Get a pattern block with the given (fully-included) values at the given offset
|
||||
*
|
||||
* @param offset the offset (0-up, left-to-right)
|
||||
* @param vals the values
|
||||
* @return a pattern block (having a full mask)
|
||||
@ -100,7 +105,10 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Convert the given long to a pattern block (having offset 0 and a full mask)
|
||||
* NOTE: The result will be 8 bytes in length
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> The result will be 8 bytes in length
|
||||
*
|
||||
* @param value the value to convert
|
||||
* @return the pattern block containing the big-endian representation of the value
|
||||
*/
|
||||
@ -118,7 +126,10 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Convert the given masked long to a pattern block (having offset 0)
|
||||
* NOTE: The result will be 8 bytes in length
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> The result will be 8 bytes in length
|
||||
*
|
||||
* @param ml the masked long, whose values and mask to convert
|
||||
* @return the pattern block containing the big-endian representation of the value
|
||||
*/
|
||||
@ -139,7 +150,9 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Convert a string representation to a pattern block
|
||||
* @see NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String)
|
||||
*
|
||||
* @see NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int,
|
||||
* String)
|
||||
* @param str the string to convert
|
||||
* @return the resulting pattern block
|
||||
*/
|
||||
@ -187,7 +200,8 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a block from a disjoint pattern into an assembly pattern block
|
||||
* Convert a block from a disjoint pattern into an assembly pattern block
|
||||
*
|
||||
* @param pat the pattern to convert
|
||||
* @param context true to select the context block, false to select the instruction block
|
||||
* @return the converted pattern block
|
||||
@ -229,6 +243,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Encode the given masked long into a pattern block as specified by a given token field
|
||||
*
|
||||
* @param tf the token field specifying the location of the value to encode
|
||||
* @param val the value to encode
|
||||
* @return the pattern block with the encoded value
|
||||
@ -262,6 +277,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Encode the given masked long into a pattern block as specified by a given context field
|
||||
*
|
||||
* @param cf the context field specifying the location of the value to encode
|
||||
* @param val the value to encode
|
||||
* @return the pattern block with the encoded value
|
||||
@ -293,11 +309,12 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Convert a register value into a pattern block
|
||||
* @param rv the register value
|
||||
* @return the pattern block
|
||||
*
|
||||
* This is used primarily to compute default context register values, and pass them into an
|
||||
* assembler.
|
||||
*
|
||||
* @param rv the register value
|
||||
* @return the pattern block
|
||||
*/
|
||||
public static AssemblyPatternBlock fromRegisterValue(RegisterValue rv) {
|
||||
byte[] mb = rv.toBytes();
|
||||
@ -310,6 +327,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Allocate a fully-undefined pattern block of the given length
|
||||
*
|
||||
* @param length the length in bytes
|
||||
* @return the block of all unknown bits
|
||||
*/
|
||||
@ -321,6 +339,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Duplicate this pattern block
|
||||
*
|
||||
* @return the duplicate
|
||||
*/
|
||||
public AssemblyPatternBlock copy() {
|
||||
@ -330,6 +349,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Get the length (plus the offset) of this pattern block
|
||||
*
|
||||
* @return the total length
|
||||
*/
|
||||
public int length() {
|
||||
@ -338,6 +358,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Shift, i.e., increase the offset of, this pattern block
|
||||
*
|
||||
* @param amt the amount to shift right
|
||||
* @return the shifted pattern block
|
||||
*/
|
||||
@ -350,6 +371,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Truncate (unshift) this pattern block by removing bytes from the left
|
||||
*
|
||||
* @param amt the amount to truncate or shift left
|
||||
* @return the truncated pattern block
|
||||
*/
|
||||
@ -373,12 +395,13 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
/**
|
||||
* Combine this pattern block with another given block
|
||||
*
|
||||
* <p>
|
||||
* Two blocks can be combined in their corresponding defined bits agree. When blocks are
|
||||
* combined, their bytes are aligned according to their shifts, and the defined bits are taken
|
||||
* from either block. If neither block defines a bit (i.e., the mask bit at that position is
|
||||
* {@code 0} for both input blocks, then the output has an undefined bit in the corresponding
|
||||
* position. If both blocks define the bit, but they have opposite values, then the result is
|
||||
* an error.
|
||||
* from either block. If neither block defines a bit (i.e., the mask bit at that position is 0
|
||||
* for both input blocks, then the output has an undefined bit in the corresponding position. If
|
||||
* both blocks define the bit, but they have opposite values, then the result is an error.
|
||||
*
|
||||
* @param that the other block
|
||||
* @return the new combined block, or null if the blocks disagree for any bit
|
||||
*/
|
||||
@ -487,12 +510,12 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
return result;
|
||||
}
|
||||
|
||||
result = SleighUtil.compareArrays(this.mask, that.mask);
|
||||
result = AsmUtil.compareArrays(this.mask, that.mask);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result = SleighUtil.compareArrays(this.vals, that.vals);
|
||||
result = AsmUtil.compareArrays(this.vals, that.vals);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
@ -501,6 +524,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Read an array, returning a default if the index is out of bounds
|
||||
*
|
||||
* @param arr the array to read
|
||||
* @param idx the index
|
||||
* @param def the default value
|
||||
@ -517,11 +541,12 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
/**
|
||||
* Encode the given value into a copy of this pattern block as specified by a context operation
|
||||
*
|
||||
* NOTE: this method is given as a special operation, instead of a conversion factory method,
|
||||
* because this is a write operation, not a combine operation. As such, the bits (including
|
||||
* undefined bits) replace the bits in the existing pattern block. Were this a conversion
|
||||
* method, we would lose the distinction between unknown bits being written, and bits whose
|
||||
* values are simply not included in the write.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this method is given as a special operation, instead of a conversion factory
|
||||
* method, because this is a write operation, not a combine operation. As such, the bits
|
||||
* (including undefined bits) replace the bits in the existing pattern block. Were this a
|
||||
* conversion method, we would lose the distinction between unknown bits being written, and bits
|
||||
* whose values are simply not included in the write.
|
||||
*
|
||||
* @param cop the context operation specifying the location of the value to encode
|
||||
* @param val the value to encode
|
||||
@ -565,6 +590,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Read the input of a context operation from this pattern block
|
||||
*
|
||||
* @param cop the context operation
|
||||
* @return the decoded input, as a masked value
|
||||
*/
|
||||
@ -595,12 +621,14 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Set all bits read by a given context operation to unknown
|
||||
*
|
||||
* <p>
|
||||
* This is used during resolution to remove a context requirement passed upward by a child. When
|
||||
* a parent constructor writes the required value to the context register, that requirement need
|
||||
* not be passed further upward, since the write satisfies the requirement.
|
||||
*
|
||||
* @param cop the context operation
|
||||
* @return the result
|
||||
*
|
||||
* This is used during resolution to remove a context requirement passed upward by a child.
|
||||
* When a parent constructor writes the required value to the context register, that
|
||||
* requirement need not be passed further upward, since the write satisfies the requirement.
|
||||
*/
|
||||
public AssemblyPatternBlock maskOut(ContextOp cop) {
|
||||
byte[] newMask = Arrays.copyOf(this.mask, this.mask.length);
|
||||
@ -623,6 +651,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Get the values array
|
||||
*
|
||||
* @return the array
|
||||
*/
|
||||
public byte[] getVals() {
|
||||
@ -631,6 +660,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Get the mask array
|
||||
*
|
||||
* @return the array
|
||||
*/
|
||||
public byte[] getMask() {
|
||||
@ -639,6 +669,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Get the number of undefined bytes preceding the mask and values arrays
|
||||
*
|
||||
* @return the offset
|
||||
*/
|
||||
public int getOffset() {
|
||||
@ -646,7 +677,8 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode {@code} len value bytes in big-endian format, beginning at {@code start}
|
||||
* Decode {@code len} value bytes in big-endian format, beginning at {@code start}
|
||||
*
|
||||
* @param start the first byte to decode
|
||||
* @param len the number of bytes to decode
|
||||
* @return the decoded long
|
||||
@ -664,7 +696,8 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode {@code} len mask bytes in big-endian format, beginning at {@code start}
|
||||
* Decode {@code len} mask bytes in big-endian format, beginning at {@code start}
|
||||
*
|
||||
* @param start the first byte to decode
|
||||
* @param len the number of bytes to decode
|
||||
* @return the decoded long
|
||||
@ -682,7 +715,8 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode {@code} len bytes (values and mask) in big-endian format, beginning at {@code start}
|
||||
* Decode {@code len} bytes (values and mask) in big-endian format, beginning at {@code start}
|
||||
*
|
||||
* @param start the first byte to decode
|
||||
* @param len the number of bytes to decode
|
||||
* @return the decoded masked long
|
||||
@ -692,7 +726,8 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill all unknown bits with {@code 0} bits
|
||||
* Fill all unknown bits with 0 bits
|
||||
*
|
||||
* @return the result
|
||||
*/
|
||||
public AssemblyPatternBlock fillMask() {
|
||||
@ -705,6 +740,7 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
|
||||
/**
|
||||
* Check if there are any unknown bits
|
||||
*
|
||||
* @return true if no unknown bits are present, false otherwise
|
||||
*/
|
||||
public boolean isFullMask() {
|
||||
@ -720,8 +756,9 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if all bits are {@code 0} bits
|
||||
* @return true if all are {@code 0}, false otherwise
|
||||
* Check if all bits are 0 bits
|
||||
*
|
||||
* @return true if all are 0, false otherwise
|
||||
*/
|
||||
public boolean isZero() {
|
||||
if (!isFullMask()) {
|
||||
@ -738,8 +775,10 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
/**
|
||||
* Decode the values array into a {@link BigInteger} of length {@code n} bytes
|
||||
*
|
||||
* <p>
|
||||
* The array is either truncated or zero-extended <em>on the right</em> to match the requested
|
||||
* number of bytes, then decoded in big-endian format as an unsigned value.
|
||||
*
|
||||
* @param n the number of bytes (left-to-right) to decode
|
||||
* @return the decoded big integer
|
||||
*/
|
||||
@ -757,7 +796,9 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
/**
|
||||
* Counts the total number of known bits in the pattern
|
||||
*
|
||||
* <p>
|
||||
* At a slightly lower level, counts the number of 1-bits in the mask.
|
||||
*
|
||||
* @return the count
|
||||
*/
|
||||
public int getSpecificity() {
|
||||
@ -785,19 +826,21 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
|
||||
/**
|
||||
* Get an iterable over all the possible fillings of the value, given a partial mask
|
||||
*
|
||||
* <p>
|
||||
* This is meant to be used idiomatically, as in an enhanced for loop:
|
||||
*
|
||||
* <pre>
|
||||
* {@code
|
||||
* for (byte[] val : pattern.possibleVals()) {
|
||||
* System.out.println(format(val));
|
||||
* }
|
||||
* System.out.println(format(val));
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* NOTE: A single byte array is instantiated with the call to {@link Iterable#iterator()}. Each
|
||||
* call to {@link Iterator#next()} modifies the one byte array and returns it. As such, if you
|
||||
* intend to preserve the value in the array for later use, you <em>must</em> make a copy.
|
||||
* <p>
|
||||
* <b>NOTE:</b> A single byte array is instantiated with the call to
|
||||
* {@link Iterable#iterator()}. Each call to {@link Iterator#next()} modifies the one byte array
|
||||
* and returns it. As such, if you intend to preserve the value in the array for later use, you
|
||||
* <em>must</em> make a copy.
|
||||
*
|
||||
* @return the iterable.
|
||||
*/
|
||||
public Iterable<byte[]> possibleVals() {
|
||||
|
@ -15,24 +15,27 @@
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
|
||||
import ghidra.app.plugin.processors.sleigh.Constructor;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
|
||||
|
||||
/**
|
||||
* The (often intermediate) result of assembly
|
||||
*
|
||||
* These may represent a successful construction ({@link AssemblyResolvedConstructor}, a future
|
||||
* field ({@link AssemblyResolvedBackfill}), or an error ({@link AssemblyResolvedError}).
|
||||
* <p>
|
||||
* These may represent a successful construction ({@link AssemblyResolvedPatterns}, a future field
|
||||
* ({@link AssemblyResolvedBackfill}), or an error ({@link AssemblyResolvedError}).
|
||||
*
|
||||
* <p>
|
||||
* This class also provides the static factory methods for constructing any of its subclasses.
|
||||
*/
|
||||
public abstract class AssemblyResolution implements Comparable<AssemblyResolution> {
|
||||
protected final String description;
|
||||
protected final List<? extends AssemblyResolution> children;
|
||||
protected final List<AssemblyResolution> children;
|
||||
protected final AssemblyResolution right;
|
||||
|
||||
private boolean hashed = false;
|
||||
private int hash;
|
||||
@ -50,12 +53,15 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
|
||||
/**
|
||||
* Construct a resolution
|
||||
*
|
||||
* @param description a textual description used as part of {@link #toString()}
|
||||
* @param children for record keeping, any children used in constructing this resolution
|
||||
*/
|
||||
AssemblyResolution(String description, List<? extends AssemblyResolution> children) {
|
||||
AssemblyResolution(String description, List<? extends AssemblyResolution> children,
|
||||
AssemblyResolution right) {
|
||||
this.description = description;
|
||||
this.children = children == null ? List.of() : children;
|
||||
this.children = children == null ? List.of() : Collections.unmodifiableList(children);
|
||||
this.right = right;
|
||||
}
|
||||
|
||||
/* ********************************************************************************************
|
||||
@ -65,61 +71,69 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
/**
|
||||
* Build the result of successfully resolving a SLEIGH constructor
|
||||
*
|
||||
* NOTE: This is not used strictly for resolved SLEIGH constructors. It may also be used to
|
||||
* store intermediates, e.g., encoded operands, during constructor resolution.
|
||||
* <p>
|
||||
* <b>NOTE:</b> This is not used strictly for resolved SLEIGH constructors. It may also be used
|
||||
* to store intermediates, e.g., encoded operands, during constructor resolution.
|
||||
*
|
||||
* @param ins the instruction pattern block
|
||||
* @param ctx the context pattern block
|
||||
* @param description a description of the resolution
|
||||
* @param sel the children selected to resolve this constructor, or null
|
||||
* @param cons the constructor, or null
|
||||
* @param children the children of this constructor, or null
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedConstructor resolved(AssemblyPatternBlock ins,
|
||||
AssemblyPatternBlock ctx, String description,
|
||||
List<? extends AssemblyResolution> sel) {
|
||||
return new AssemblyResolvedConstructor(description, sel, ins, ctx, null, null);
|
||||
public static AssemblyResolvedPatterns resolved(AssemblyPatternBlock ins,
|
||||
AssemblyPatternBlock ctx, String description, Constructor cons,
|
||||
List<? extends AssemblyResolution> children, AssemblyResolution right) {
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx, null,
|
||||
null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an instruction-only successful resolution result
|
||||
*
|
||||
* @see #resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List)
|
||||
* @param ins the instruction pattern block
|
||||
* @param description a description of the resolution
|
||||
* @param children the children selected to resolve this constructor, or null
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedConstructor instrOnly(AssemblyPatternBlock ins,
|
||||
String description, List<AssemblyResolution> children) {
|
||||
return resolved(ins, AssemblyPatternBlock.nop(), description, children);
|
||||
public static AssemblyResolvedPatterns instrOnly(AssemblyPatternBlock ins,
|
||||
String description) {
|
||||
return resolved(ins, AssemblyPatternBlock.nop(), description, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a context-only successful resolution result
|
||||
*
|
||||
* @see #resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List)
|
||||
* @param ctx the context pattern block
|
||||
* @param description a description of the resolution
|
||||
* @param children the children selected to resolve this constructor, or null
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedConstructor contextOnly(AssemblyPatternBlock ctx,
|
||||
String description, List<AssemblyResolution> children) {
|
||||
return resolved(AssemblyPatternBlock.nop(), ctx, description, children);
|
||||
public static AssemblyResolvedPatterns contextOnly(AssemblyPatternBlock ctx,
|
||||
String description) {
|
||||
return resolved(AssemblyPatternBlock.nop(), ctx, description, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a successful resolution result from a SLEIGH constructor's patterns
|
||||
*
|
||||
* @param pat the constructor's pattern
|
||||
* @param description a description of the resolution
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedConstructor fromPattern(DisjointPattern pat, int minLen,
|
||||
String description) {
|
||||
public static AssemblyResolvedPatterns fromPattern(DisjointPattern pat, int minLen,
|
||||
String description, Constructor cons) {
|
||||
AssemblyPatternBlock ins = AssemblyPatternBlock.fromPattern(pat, minLen, false);
|
||||
AssemblyPatternBlock ctx = AssemblyPatternBlock.fromPattern(pat, 0, true);
|
||||
return resolved(ins, ctx, description, null);
|
||||
return resolved(ins, ctx, description, cons, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a backfill record to attach to a successful resolution result
|
||||
*
|
||||
* @param exp the expression depending on a missing symbol
|
||||
* @param goal the desired value of the expression
|
||||
* @param res the resolution result for child constructors
|
||||
@ -128,41 +142,69 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
* @return the new record
|
||||
*/
|
||||
public static AssemblyResolvedBackfill backfill(PatternExpression exp, MaskedLong goal,
|
||||
Map<Integer, Object> res, int inslen, String description) {
|
||||
return new AssemblyResolvedBackfill(description, exp, goal, res, inslen, 0);
|
||||
int inslen, String description) {
|
||||
return new AssemblyResolvedBackfill(description, exp, goal, inslen, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a new "blank" resolved SLEIGH constructor record
|
||||
*
|
||||
* @param description a description of the resolution
|
||||
* @param sel any children that will be involved in populating this record
|
||||
* @param children any children that will be involved in populating this record
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedConstructor nop(String description,
|
||||
List<? extends AssemblyResolution> sel) {
|
||||
return resolved(AssemblyPatternBlock.nop(), AssemblyPatternBlock.nop(), description, sel);
|
||||
public static AssemblyResolvedPatterns nop(String description,
|
||||
List<? extends AssemblyResolution> children, AssemblyResolution right) {
|
||||
return resolved(AssemblyPatternBlock.nop(), AssemblyPatternBlock.nop(), description, null,
|
||||
children, right);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a new "blank" resolved SLEIGH constructor record
|
||||
*
|
||||
* @param description a description of the resolution
|
||||
* @param chilren any children that will be involved in populating this record
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedPatterns nop(String description) {
|
||||
return resolved(AssemblyPatternBlock.nop(), AssemblyPatternBlock.nop(), description, null,
|
||||
null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an error resolution record
|
||||
*
|
||||
* @param error a description of the error
|
||||
* @param description a description of what the resolver was doing when the error ocurred
|
||||
* @param children any children involved in generating the error
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedError error(String error, String description,
|
||||
List<? extends AssemblyResolution> children) {
|
||||
return new AssemblyResolvedError(description, children, error);
|
||||
List<? extends AssemblyResolution> children, AssemblyResolution right) {
|
||||
return new AssemblyResolvedError(description, children, right, error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an error resolution record
|
||||
*
|
||||
* @param error a description of the error
|
||||
* @param description a description of what the resolver was doing when the error occurred
|
||||
* @param children any children involved in generating the error
|
||||
* @return the new resolution
|
||||
*/
|
||||
public static AssemblyResolvedError error(String error, String description) {
|
||||
return new AssemblyResolvedError(description, null, null, error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an error resolution record, based on an intermediate SLEIGH constructor record
|
||||
*
|
||||
* @param error a description of the error
|
||||
* @param res the constructor record that was being populated when the error ocurred
|
||||
* @return the new error resolution
|
||||
*/
|
||||
public static AssemblyResolution error(String error, AssemblyResolvedConstructor res) {
|
||||
return error(error, res.description, res.children);
|
||||
public static AssemblyResolution error(String error, AssemblyResolvedPatterns res) {
|
||||
return error(error, res.description, res.children, res.right);
|
||||
}
|
||||
|
||||
/* ********************************************************************************************
|
||||
@ -171,18 +213,21 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
|
||||
/**
|
||||
* Check if this record describes an error
|
||||
*
|
||||
* @return true if the record is an error
|
||||
*/
|
||||
public abstract boolean isError();
|
||||
|
||||
/**
|
||||
* Check if this record describes a backfill
|
||||
*
|
||||
* @return true if the record is a backfill
|
||||
*/
|
||||
public abstract boolean isBackfill();
|
||||
|
||||
/**
|
||||
* Display the resolution result in one line (omitting child details)
|
||||
*
|
||||
* @return the display description
|
||||
*/
|
||||
protected abstract String lineToString();
|
||||
@ -191,11 +236,27 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
* Misc
|
||||
*/
|
||||
|
||||
protected List<AssemblyResolution> getAllRight() {
|
||||
List<AssemblyResolution> result = new ArrayList<>();
|
||||
collectAllRight(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected void collectAllRight(Collection<AssemblyResolution> into) {
|
||||
into.add(this);
|
||||
if (right == null) {
|
||||
return;
|
||||
}
|
||||
right.collectAllRight(into);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the child portion of {@link #toString()}
|
||||
*
|
||||
* <p>
|
||||
* If a subclass has another, possible additional, notion of children that it would like to
|
||||
* include in {@link #toString()}, it must override this method.
|
||||
*
|
||||
* @see #hasChildren()
|
||||
* @param indent the current indentation
|
||||
* @return the indented description for each child on its own line
|
||||
@ -210,6 +271,7 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
|
||||
/**
|
||||
* Used only by parents: get a multi-line description of this record, indented
|
||||
*
|
||||
* @param indent the current indentation
|
||||
* @return the indented description
|
||||
*/
|
||||
@ -241,9 +303,11 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
/**
|
||||
* Check if this record has children
|
||||
*
|
||||
* <p>
|
||||
* If a subclass has another, possibly additional, notion of children that it would like to
|
||||
* include in {@link #toString()}, it must override this method to return true when such
|
||||
* children are present.
|
||||
*
|
||||
* @see #childrenToString(String)
|
||||
* @return true if this record has children
|
||||
*/
|
||||
@ -256,4 +320,36 @@ public abstract class AssemblyResolution implements Comparable<AssemblyResolutio
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shift the resolution's instruction pattern to the right, if applicable
|
||||
*
|
||||
* <p>
|
||||
* This also shifts any backfill and forbidden pattern records.
|
||||
*
|
||||
* @param amt the number of bytes to shift.
|
||||
* @return the result
|
||||
*/
|
||||
public abstract AssemblyResolution shift(int amt);
|
||||
|
||||
/**
|
||||
* Get this same resolution, but without any right siblings
|
||||
*
|
||||
* @return the resolution
|
||||
*/
|
||||
public AssemblyResolution withoutRight() {
|
||||
return withRight(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get this same resolution, but with the given right sibling
|
||||
*
|
||||
* @return the resolution
|
||||
*/
|
||||
public abstract AssemblyResolution withRight(AssemblyResolution right);
|
||||
|
||||
/**
|
||||
* Get this same resolution, pushing its right siblings down to its children
|
||||
*/
|
||||
public abstract AssemblyResolution parent(String description, int opCount);
|
||||
}
|
||||
|
@ -16,12 +16,17 @@
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.set.AbstractSetDecorator;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
|
||||
/**
|
||||
* A set of possible assembly resolutions for a single SLEIGH constructor
|
||||
*
|
||||
* <p>
|
||||
* Since the assembler works from the leaves up, it unclear in what context a given token appears.
|
||||
* Thus, every possible encoding is collected and passed upward. As resolution continues, many of
|
||||
* the possible encodings are pruned out. When the resolver reaches the root, we end up with every
|
||||
@ -29,6 +34,55 @@ import org.apache.commons.collections4.set.AbstractSetDecorator;
|
||||
* encodings, including error records describing the pruned intermediate results.
|
||||
*/
|
||||
public class AssemblyResolutionResults extends AbstractSetDecorator<AssemblyResolution> {
|
||||
protected static final DbgTimer DBG = AssemblyTreeResolver.DBG;
|
||||
|
||||
public interface Applicator {
|
||||
Iterable<? extends AssemblyResolution> getPatterns(AssemblyResolvedPatterns cur);
|
||||
|
||||
default AssemblyResolvedPatterns setDescription(
|
||||
AssemblyResolvedPatterns res, AssemblyResolution from) {
|
||||
AssemblyResolvedPatterns temp = res.withDescription(from.description);
|
||||
return temp;
|
||||
}
|
||||
|
||||
default AssemblyResolvedPatterns setRight(AssemblyResolvedPatterns res,
|
||||
AssemblyResolvedPatterns cur) {
|
||||
return res.withRight(cur);
|
||||
}
|
||||
|
||||
default AssemblyResolvedPatterns combineConstructor(AssemblyResolvedPatterns cur,
|
||||
AssemblyResolvedPatterns pat) {
|
||||
AssemblyResolvedPatterns combined = cur.combine(pat);
|
||||
if (combined == null) {
|
||||
return null;
|
||||
}
|
||||
return setRight(setDescription(combined, pat), cur);
|
||||
}
|
||||
|
||||
default AssemblyResolvedPatterns combineBackfill(AssemblyResolvedPatterns cur,
|
||||
AssemblyResolvedBackfill bf) {
|
||||
AssemblyResolvedPatterns combined = cur.combine(bf);
|
||||
return setRight(setDescription(combined, bf), cur);
|
||||
}
|
||||
|
||||
default AssemblyResolvedPatterns combine(AssemblyResolvedPatterns cur,
|
||||
AssemblyResolution pat) {
|
||||
if (pat.isError()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
if (pat.isBackfill()) {
|
||||
return combineBackfill(cur, (AssemblyResolvedBackfill) pat);
|
||||
}
|
||||
return combineConstructor(cur, (AssemblyResolvedPatterns) pat);
|
||||
}
|
||||
|
||||
String describeError(AssemblyResolvedPatterns rc, AssemblyResolution pat);
|
||||
|
||||
default AssemblyResolution finish(AssemblyResolvedPatterns resolved) {
|
||||
return resolved;
|
||||
}
|
||||
}
|
||||
|
||||
protected final Set<AssemblyResolution> resolutions;
|
||||
|
||||
/**
|
||||
@ -48,7 +102,7 @@ public class AssemblyResolutionResults extends AbstractSetDecorator<AssemblyReso
|
||||
* @param rc the single resolution entry
|
||||
* @return the new resolution set
|
||||
*/
|
||||
public static AssemblyResolutionResults singleton(AssemblyResolvedConstructor rc) {
|
||||
public static AssemblyResolutionResults singleton(AssemblyResolvedPatterns rc) {
|
||||
return new AssemblyResolutionResults(Collections.singleton(rc));
|
||||
}
|
||||
|
||||
@ -88,4 +142,38 @@ public class AssemblyResolutionResults extends AbstractSetDecorator<AssemblyReso
|
||||
public boolean remove(AssemblyResolution ar) {
|
||||
return this.resolutions.remove(ar);
|
||||
}
|
||||
|
||||
protected AssemblyResolutionResults apply(Applicator applicator) {
|
||||
AssemblyResolutionResults results = new AssemblyResolutionResults();
|
||||
for (AssemblyResolution res : this) {
|
||||
if (res.isError()) {
|
||||
results.add(res);
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) res;
|
||||
DBG.println("Current: " + rc.lineToString());
|
||||
for (AssemblyResolution pat : applicator.getPatterns(rc)) {
|
||||
DBG.println("Pattern: " + pat.lineToString());
|
||||
AssemblyResolvedPatterns combined = applicator.combine(rc, pat);
|
||||
DBG.println("Combined: " + (combined == null ? "(null)" : combined.lineToString()));
|
||||
if (combined == null) {
|
||||
results.add(AssemblyResolution.error(applicator.describeError(rc, pat), rc));
|
||||
continue;
|
||||
}
|
||||
results.add(applicator.finish(combined));
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
protected AssemblyResolutionResults apply(
|
||||
Function<AssemblyResolvedPatterns, AssemblyResolution> function) {
|
||||
return stream().map(res -> {
|
||||
assert !(res instanceof AssemblyResolvedBackfill);
|
||||
if (res.isError()) {
|
||||
return res;
|
||||
}
|
||||
return function.apply((AssemblyResolvedPatterns) res);
|
||||
}).collect(Collectors.toCollection(AssemblyResolutionResults::new));
|
||||
}
|
||||
}
|
||||
|
@ -23,16 +23,17 @@ import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
/**
|
||||
* A {@link AssemblyResolution} indicating the need to solve an expression in the future
|
||||
*
|
||||
* Such records are collected within a {@link AssemblyResolvedConstructor} and then solved just
|
||||
* before the final result(s) are assembled. This is typically required by instructions that refer
|
||||
* to the {@code inst_next} symbol.
|
||||
* <p>
|
||||
* Such records are collected within a {@link AssemblyResolvedPatterns} and then solved just before
|
||||
* the final result(s) are assembled. This is typically required by instructions that refer to the
|
||||
* {@code inst_next} symbol.
|
||||
*
|
||||
* NOTE: These are used internally. The user ought never to see these from the assembly API.
|
||||
* <p>
|
||||
* <b>NOTE:</b> These are used internally. The user ought never to see these from the assembly API.
|
||||
*/
|
||||
public class AssemblyResolvedBackfill extends AssemblyResolution {
|
||||
protected final PatternExpression exp;
|
||||
protected final MaskedLong goal;
|
||||
protected final Map<Integer, Object> res;
|
||||
protected final int inslen;
|
||||
protected final int offset;
|
||||
|
||||
@ -52,31 +53,37 @@ public class AssemblyResolvedBackfill extends AssemblyResolution {
|
||||
/**
|
||||
* @see {@link AssemblyResolution#backfill(PatternExpression, MaskedLong, Map, int, String)}
|
||||
*/
|
||||
AssemblyResolvedBackfill(String description, PatternExpression exp, MaskedLong goal,
|
||||
Map<Integer, Object> res, int inslen, int offset) {
|
||||
super(description, null);
|
||||
AssemblyResolvedBackfill(String description, PatternExpression exp, MaskedLong goal, int inslen,
|
||||
int offset) {
|
||||
super(description, null, null);
|
||||
this.exp = exp;
|
||||
this.goal = goal;
|
||||
this.res = res;
|
||||
this.inslen = inslen;
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Duplicate this record
|
||||
*
|
||||
* @return the duplicate
|
||||
*/
|
||||
AssemblyResolvedBackfill copy() {
|
||||
AssemblyResolvedBackfill cp =
|
||||
new AssemblyResolvedBackfill(description, exp, goal, res, inslen, offset);
|
||||
new AssemblyResolvedBackfill(description, exp, goal, inslen, offset);
|
||||
return cp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolvedBackfill withRight(AssemblyResolution right) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the expected length of the instruction portion of the future encoding
|
||||
*
|
||||
* This is used to make sure that operands following a to-be-determined encoding are placed
|
||||
* properly. Even though the actual encoding cannot yet be determined, its length can.
|
||||
*
|
||||
* @return the total expected length (including the offset)
|
||||
*/
|
||||
public int getInstructionLength() {
|
||||
@ -99,13 +106,14 @@ public class AssemblyResolvedBackfill extends AssemblyResolution {
|
||||
description + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Shift the back-fill record's "instruction" pattern to the right.
|
||||
* @param amt the number of bytes to shift the result when solved.
|
||||
* @return the result
|
||||
*/
|
||||
@Override
|
||||
public AssemblyResolvedBackfill shift(int amt) {
|
||||
return new AssemblyResolvedBackfill(description, exp, goal, res, inslen, offset + amt);
|
||||
return new AssemblyResolvedBackfill(description, exp, goal, inslen, offset + amt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolution parent(String description, int opCount) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -117,26 +125,27 @@ public class AssemblyResolvedBackfill extends AssemblyResolution {
|
||||
* {@link NeedsBackfillException}, since that would imply the missing symbol(s) from the
|
||||
* original attempt are still missing. Instead, the method returns an instance of
|
||||
* {@link AssemblyResolvedError}.
|
||||
*
|
||||
* @param solver a solver, usually the same as the one from the original attempt.
|
||||
* @param vals the defined symbols, usually the same, but with the missing symbol(s).
|
||||
* @return the solution result
|
||||
*/
|
||||
public AssemblyResolution solve(RecursiveDescentSolver solver, Map<String, Long> vals,
|
||||
AssemblyResolvedConstructor cur) {
|
||||
AssemblyResolvedPatterns cur) {
|
||||
try {
|
||||
AssemblyResolution ar =
|
||||
solver.solve(exp, goal, vals, res, cur.truncate(offset), description);
|
||||
solver.solve(exp, goal, vals, cur.truncate(offset), description);
|
||||
if (ar.isError()) {
|
||||
return ar;
|
||||
}
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar;
|
||||
return rc.shift(offset);
|
||||
}
|
||||
catch (NeedsBackfillException e) {
|
||||
return AssemblyResolution.error("Solution still requires backfill", description, null);
|
||||
return AssemblyResolution.error("Solution still requires backfill", description);
|
||||
}
|
||||
catch (UnsupportedOperationException e) {
|
||||
return AssemblyResolution.error("Unsupported: " + e.getMessage(), description, null);
|
||||
return AssemblyResolution.error("Unsupported: " + e.getMessage(), description);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import java.util.List;
|
||||
/**
|
||||
* A {@link AssemblyResolution} indicating the occurrence of a (usually semantic) error
|
||||
*
|
||||
* <p>
|
||||
* The description should indicate where the error occurred. The error message should explain the
|
||||
* actual error. To help the user diagnose the nature of the error, errors in sub-constructors
|
||||
* should be placed as children of an error given by the parent constructor.
|
||||
@ -48,9 +49,9 @@ public class AssemblyResolvedError extends AssemblyResolution {
|
||||
* @see AssemblyResolution#error(String, String, List)
|
||||
*/
|
||||
AssemblyResolvedError(String description, List<? extends AssemblyResolution> children,
|
||||
String error) {
|
||||
super(description, children);
|
||||
AssemblyTreeResolver.dbg.println(error);
|
||||
AssemblyResolution right, String error) {
|
||||
super(description, children, right);
|
||||
AssemblyTreeResolver.DBG.println(error);
|
||||
this.error = error;
|
||||
}
|
||||
|
||||
@ -66,6 +67,7 @@ public class AssemblyResolvedError extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Get a description of the error
|
||||
*
|
||||
* @return the description
|
||||
*/
|
||||
public String getError() {
|
||||
@ -76,4 +78,20 @@ public class AssemblyResolvedError extends AssemblyResolution {
|
||||
public String lineToString() {
|
||||
return error + " (" + description + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolution shift(int amt) {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolution withRight(AssemblyResolution right) {
|
||||
return new AssemblyResolvedError(description, null, right, error);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolution parent(String description, int opCount) {
|
||||
List<AssemblyResolution> allRight = getAllRight();
|
||||
return new AssemblyResolvedError(description, allRight, null, error);
|
||||
}
|
||||
}
|
||||
|
@ -17,8 +17,8 @@ package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.IteratorUtils;
|
||||
import org.apache.commons.collections4.Predicate;
|
||||
@ -27,17 +27,20 @@ import org.apache.commons.lang3.StringUtils;
|
||||
import ghidra.app.plugin.assembler.AssemblySelector;
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.RecursiveDescentSolver;
|
||||
import ghidra.app.plugin.processors.sleigh.ConstructState;
|
||||
import ghidra.app.plugin.processors.sleigh.ContextOp;
|
||||
import ghidra.app.plugin.processors.sleigh.*;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol;
|
||||
|
||||
/**
|
||||
* A {@link AssemblyResolution} indicating successful application of a constructor
|
||||
*
|
||||
* <p>
|
||||
* This is almost analogous to {@link ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern
|
||||
* DisjointPattern}, in that is joins an instruction {@link AssemblyPatternBlock} with a corresponding
|
||||
* context {@link AssemblyPatternBlock}. However, this object is mutable, and it collects backfill records,
|
||||
* as well as forbidden patterns.
|
||||
* DisjointPattern}, in that is joins an instruction {@link AssemblyPatternBlock} with a
|
||||
* corresponding context {@link AssemblyPatternBlock}. However, this object is mutable, and it
|
||||
* collects backfill records, as well as forbidden patterns.
|
||||
*
|
||||
* <p>
|
||||
* When the applied constructor is from the "instruction" subtable, this represents a fully-
|
||||
* constructed instruction with required context. All backfill records ought to be resolved and
|
||||
* applied before the final result is given to the user, i.e., passed into the
|
||||
@ -45,16 +48,17 @@ import ghidra.app.plugin.processors.sleigh.ContextOp;
|
||||
* becomes confined to one of the forbidden patterns, it must be dropped, since the encoding will
|
||||
* actually invoke a more specific SLEIGH constructor.
|
||||
*/
|
||||
public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
public class AssemblyResolvedPatterns extends AssemblyResolution {
|
||||
protected static final String INS = "ins:";
|
||||
protected static final String CTX = "ctx:";
|
||||
protected static final String SEP = ",";
|
||||
|
||||
protected final Constructor cons;
|
||||
protected final AssemblyPatternBlock ins;
|
||||
protected final AssemblyPatternBlock ctx;
|
||||
|
||||
protected final Set<AssemblyResolvedBackfill> backfills;
|
||||
protected final Set<AssemblyResolvedConstructor> forbids;
|
||||
protected final Set<AssemblyResolvedPatterns> forbids;
|
||||
|
||||
@Override
|
||||
protected int computeHash() {
|
||||
@ -71,10 +75,10 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof AssemblyResolvedConstructor)) {
|
||||
if (!(obj instanceof AssemblyResolvedPatterns)) {
|
||||
return false;
|
||||
}
|
||||
AssemblyResolvedConstructor that = (AssemblyResolvedConstructor) obj;
|
||||
AssemblyResolvedPatterns that = (AssemblyResolvedPatterns) obj;
|
||||
if (!this.ins.equals(that.ins)) {
|
||||
return false;
|
||||
}
|
||||
@ -93,11 +97,12 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
/**
|
||||
* @see AssemblyResolution#resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List)
|
||||
*/
|
||||
AssemblyResolvedConstructor(String description,
|
||||
List<? extends AssemblyResolution> children, AssemblyPatternBlock ins,
|
||||
AssemblyPatternBlock ctx, Set<AssemblyResolvedBackfill> backfills,
|
||||
Set<AssemblyResolvedConstructor> forbids) {
|
||||
super(description, children);
|
||||
AssemblyResolvedPatterns(String description, Constructor cons,
|
||||
List<? extends AssemblyResolution> children, AssemblyResolution right,
|
||||
AssemblyPatternBlock ins, AssemblyPatternBlock ctx,
|
||||
Set<AssemblyResolvedBackfill> backfills, Set<AssemblyResolvedPatterns> forbids) {
|
||||
super(description, children, right);
|
||||
this.cons = cons;
|
||||
this.ins = ins;
|
||||
this.ctx = ctx;
|
||||
this.backfills = backfills == null ? Set.of() : backfills;
|
||||
@ -107,15 +112,18 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
/**
|
||||
* Build a new successful SLEIGH constructor resolution from a string representation
|
||||
*
|
||||
* <p>
|
||||
* This was used primarily in testing, to specify expected results.
|
||||
*
|
||||
* @param str the string representation: "{@code ins:[pattern],ctx:[pattern]}"
|
||||
* @see ghidra.util.NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String)
|
||||
* NumericUtilities.convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String)
|
||||
* @see ghidra.util.NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong,
|
||||
* String, int, int, String) NumericUtilities.convertHexStringToMaskedValue(AtomicLong,
|
||||
* AtomicLong, String, int, int, String)
|
||||
* @param description a description of the resolution
|
||||
* @param children any children involved in the resolution
|
||||
* @return the decoded resolution
|
||||
*/
|
||||
public static AssemblyResolvedConstructor fromString(String str, String description,
|
||||
public static AssemblyResolvedPatterns fromString(String str, String description,
|
||||
List<AssemblyResolution> children) {
|
||||
AssemblyPatternBlock ins = null;
|
||||
if (str.startsWith(INS)) {
|
||||
@ -141,17 +149,11 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
return AssemblyResolution.resolved(//
|
||||
ins == null ? AssemblyPatternBlock.nop() : ins,//
|
||||
ctx == null ? AssemblyPatternBlock.nop() : ctx,//
|
||||
description, children);
|
||||
description, null, children, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shift the resolved instruction pattern to the right
|
||||
*
|
||||
* This also shifts any backfill and forbidden pattern records.
|
||||
* @param amt the number of bytes to shift.
|
||||
* @return the result
|
||||
*/
|
||||
public AssemblyResolvedConstructor shift(int amt) {
|
||||
@Override
|
||||
public AssemblyResolvedPatterns shift(int amt) {
|
||||
if (amt == 0) {
|
||||
return this;
|
||||
}
|
||||
@ -163,43 +165,47 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
newBackfills.add(bf.shift(amt));
|
||||
}
|
||||
|
||||
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>();
|
||||
for (AssemblyResolvedConstructor f : this.forbids) {
|
||||
Set<AssemblyResolvedPatterns> newForbids = new HashSet<>();
|
||||
for (AssemblyResolvedPatterns f : this.forbids) {
|
||||
newForbids.add(f.shift(amt));
|
||||
}
|
||||
return new AssemblyResolvedConstructor(description, children, newIns, ctx,
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, newIns, ctx,
|
||||
Collections.unmodifiableSet(newBackfills), Collections.unmodifiableSet(newForbids));
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate (unshift) the resolved instruction pattern from the left
|
||||
*
|
||||
* NOTE: This drops all backfill and forbidden pattern records, since this method is typically
|
||||
* used to read token fields rather than passed around for resolution.
|
||||
* <b>NOTE:</b> This drops all backfill and forbidden pattern records, since this method is
|
||||
* typically used to read token fields rather than passed around for resolution.
|
||||
*
|
||||
* @param amt the number of bytes to remove from the left
|
||||
* @return the result
|
||||
*/
|
||||
public AssemblyResolvedConstructor truncate(int amt) {
|
||||
public AssemblyResolvedPatterns truncate(int amt) {
|
||||
if (amt == 0) {
|
||||
return this;
|
||||
}
|
||||
AssemblyPatternBlock newIns = this.ins.truncate(amt);
|
||||
|
||||
return new AssemblyResolvedConstructor("Truncated: " + description, null, newIns, ctx, null,
|
||||
null);
|
||||
return new AssemblyResolvedPatterns("Truncated: " + description, cons, null, right,
|
||||
newIns, ctx,
|
||||
null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the current encoding is forbidden by one of the attached patterns
|
||||
*
|
||||
* The pattern become forbidden if this encoding's known bits are an overset of any forbidden
|
||||
* <p>
|
||||
* The pattern becomes forbidden if this encoding's known bits are an overset of any forbidden
|
||||
* pattern's known bits.
|
||||
*
|
||||
* @return false if the pattern is forbidden (and thus in error), true if permitted
|
||||
*/
|
||||
public AssemblyResolution checkNotForbidden() {
|
||||
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>();
|
||||
for (AssemblyResolvedConstructor f : this.forbids) {
|
||||
AssemblyResolvedConstructor check = this.combine(f);
|
||||
Set<AssemblyResolvedPatterns> newForbids = new HashSet<>();
|
||||
for (AssemblyResolvedPatterns f : this.forbids) {
|
||||
AssemblyResolvedPatterns check = this.combine(f);
|
||||
if (null == check) {
|
||||
continue;
|
||||
}
|
||||
@ -209,46 +215,51 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
return AssemblyResolution.error("The result is forbidden by " + f, this);
|
||||
}
|
||||
}
|
||||
return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills,
|
||||
Collections.unmodifiableSet(newForbids));
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx,
|
||||
backfills, Collections.unmodifiableSet(newForbids));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this and another resolution have equal encodings
|
||||
*
|
||||
* This is like {@link #equals(Object)}, but it ignores backfills records and forbidden
|
||||
* patterns.
|
||||
* <p>
|
||||
* This is like {@link #equals(Object)}, but it ignores backfill records and forbidden patterns.
|
||||
*
|
||||
* @param that the other resolution
|
||||
* @return true if both have equal encodings
|
||||
*/
|
||||
protected boolean bitsEqual(AssemblyResolvedConstructor that) {
|
||||
protected boolean bitsEqual(AssemblyResolvedPatterns that) {
|
||||
return this.ins.equals(that.ins) && this.ctx.equals(that.ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine the encodings and backfills of the given resolution into this one
|
||||
*
|
||||
* This combines corresponding pattern blocks (assuming they agree), collects backfill
|
||||
* records, and collects forbidden patterns.
|
||||
* <p>
|
||||
* This combines corresponding pattern blocks (assuming they agree), collects backfill records,
|
||||
* and collects forbidden patterns.
|
||||
*
|
||||
* @param that the other resolution
|
||||
* @return the result if successful, or null
|
||||
*/
|
||||
public AssemblyResolvedConstructor combine(AssemblyResolvedConstructor that) {
|
||||
public AssemblyResolvedPatterns combine(AssemblyResolvedPatterns that) {
|
||||
// Not really a backfill, but I would like to re-use code
|
||||
return combineLessBackfill(that, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine a backfill result
|
||||
* @param that the result from backfilling
|
||||
* @param bf the resolved backfilled record
|
||||
* @return the result if successful, or null
|
||||
*
|
||||
* <p>
|
||||
* When a backfill is successful, the result should be combined with the owning resolution. In
|
||||
* addition, for bookkeeping's sake, the resolved record should be removed from the list of
|
||||
* backfills.
|
||||
*
|
||||
* @param that the result from backfilling
|
||||
* @param bf the resolved backfilled record
|
||||
* @return the result if successful, or null
|
||||
*/
|
||||
protected AssemblyResolvedConstructor combineLessBackfill(AssemblyResolvedConstructor that,
|
||||
protected AssemblyResolvedPatterns combineLessBackfill(AssemblyResolvedPatterns that,
|
||||
AssemblyResolvedBackfill bf) {
|
||||
AssemblyPatternBlock newIns = this.ins.combine(that.ins);
|
||||
if (newIns == null) {
|
||||
@ -263,68 +274,87 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
if (bf != null) {
|
||||
newBackfills.remove(bf);
|
||||
}
|
||||
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>(this.forbids);
|
||||
Set<AssemblyResolvedPatterns> newForbids = new HashSet<>(this.forbids);
|
||||
newForbids.addAll(that.forbids);
|
||||
return new AssemblyResolvedConstructor(description, children, newIns, newCtx,
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, newIns, newCtx,
|
||||
Collections.unmodifiableSet(newBackfills), Collections.unmodifiableSet(newForbids));
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine the given backfill record into this resolution
|
||||
*
|
||||
* @param bf the backfill record
|
||||
* @return the result
|
||||
*/
|
||||
public AssemblyResolvedConstructor combine(AssemblyResolvedBackfill bf) {
|
||||
public AssemblyResolvedPatterns combine(AssemblyResolvedBackfill bf) {
|
||||
Set<AssemblyResolvedBackfill> newBackfills = new HashSet<>(this.backfills);
|
||||
newBackfills.add(bf);
|
||||
return new AssemblyResolvedConstructor(description, children, ins, ctx,
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx,
|
||||
Collections.unmodifiableSet(newBackfills), forbids);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new resolution from this one with the given forbidden patterns recorded
|
||||
*
|
||||
* @param more the additional forbidden patterns to record
|
||||
* @return the new resolution
|
||||
*/
|
||||
public AssemblyResolvedConstructor withForbids(Set<AssemblyResolvedConstructor> more) {
|
||||
Set<AssemblyResolvedConstructor> combForbids = new HashSet<>(this.forbids);
|
||||
public AssemblyResolvedPatterns withForbids(Set<AssemblyResolvedPatterns> more) {
|
||||
Set<AssemblyResolvedPatterns> combForbids = new HashSet<>(this.forbids);
|
||||
combForbids.addAll(more);
|
||||
return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills,
|
||||
Collections.unmodifiableSet(more));
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx,
|
||||
backfills, Collections.unmodifiableSet(more));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a copy of this resolution with a new description
|
||||
*
|
||||
* @param desc the new description
|
||||
* @return the copy
|
||||
*/
|
||||
public AssemblyResolvedConstructor withDescription(String desc) {
|
||||
return new AssemblyResolvedConstructor(desc, children, ins, ctx, backfills, forbids);
|
||||
public AssemblyResolvedPatterns withDescription(String desc) {
|
||||
return new AssemblyResolvedPatterns(desc, cons, children, right, ins, ctx, backfills,
|
||||
forbids);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a copy of this resolution with a replaced constructor
|
||||
*
|
||||
* @param cons the new constructor
|
||||
* @return the copy
|
||||
*/
|
||||
public AssemblyResolvedPatterns withConstructor(Constructor cons) {
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx,
|
||||
backfills,
|
||||
forbids);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the given value into the context block as specified by an operation
|
||||
*
|
||||
* @param cop the context operation specifying the location of the value to encode
|
||||
* @param val the masked value to encode
|
||||
* @return the result
|
||||
*
|
||||
* This is the forward (as in disassembly) direction of applying context operations. The
|
||||
* pattern expression is evaluated, and the result is written as specified.
|
||||
* This is the forward (as in disassembly) direction of applying context operations. The
|
||||
* pattern expression is evaluated, and the result is written as specified.
|
||||
*/
|
||||
public AssemblyResolvedConstructor writeContextOp(ContextOp cop, MaskedLong val) {
|
||||
public AssemblyResolvedPatterns writeContextOp(ContextOp cop, MaskedLong val) {
|
||||
AssemblyPatternBlock newCtx = this.ctx.writeContextOp(cop, val);
|
||||
return new AssemblyResolvedConstructor(description, children, ins, newCtx, backfills,
|
||||
forbids);
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, newCtx,
|
||||
backfills, forbids);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the value from the context located where the given context operation would write
|
||||
*
|
||||
* This is used to read the value from the left-hand-side "variable" of a context operation.
|
||||
* It seems backward, because it is. When assembling, the right-hand-side expression of a
|
||||
* context operation must be solved. This means the "variable" is known from the context(s) of
|
||||
* the resolved children constructors. The value read is then used as the goal in solving the
|
||||
* <p>
|
||||
* This is used to read the value from the left-hand-side "variable" of a context operation. It
|
||||
* seems backward, because it is. When assembling, the right-hand-side expression of a context
|
||||
* operation must be solved. This means the "variable" is known from the context(s) of the
|
||||
* resolved children constructors. The value read is then used as the goal in solving the
|
||||
* expression.
|
||||
*
|
||||
* @param cop the context operation whose "variable" to read.
|
||||
* @return the masked result.
|
||||
*/
|
||||
@ -334,36 +364,60 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Duplicate this resolution, with additional description text appended
|
||||
*
|
||||
* @param append the text to append
|
||||
* @return the duplicate
|
||||
* NOTE: An additional separator {@code ": "} is inserted
|
||||
* @return the duplicate NOTE: An additional separator {@code ": "} is inserted
|
||||
*/
|
||||
public AssemblyResolvedConstructor copyAppendDescription(String append) {
|
||||
AssemblyResolvedConstructor cp = new AssemblyResolvedConstructor(
|
||||
description + ": " + append, children, ins.copy(), ctx.copy(), backfills, forbids);
|
||||
public AssemblyResolvedPatterns copyAppendDescription(String append) {
|
||||
AssemblyResolvedPatterns cp = new AssemblyResolvedPatterns(
|
||||
description + ": " + append, cons, children, right, ins.copy(), ctx.copy(), backfills,
|
||||
forbids);
|
||||
return cp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolvedPatterns withRight(AssemblyResolution right) {
|
||||
AssemblyResolvedPatterns cp = new AssemblyResolvedPatterns(description, cons,
|
||||
children, right, ins.copy(), ctx.copy(), backfills, forbids);
|
||||
return cp;
|
||||
}
|
||||
|
||||
public AssemblyResolvedPatterns nopLeftSibling() {
|
||||
return new AssemblyResolvedPatterns("nop-left", null, null, this, ins.copy(),
|
||||
ctx.copy(), backfills, forbids);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolvedPatterns parent(String description, int opCount) {
|
||||
List<AssemblyResolution> allRight = getAllRight();
|
||||
AssemblyResolvedPatterns cp = new AssemblyResolvedPatterns(description, cons,
|
||||
allRight.subList(0, opCount), allRight.get(opCount), ins, ctx, backfills, forbids);
|
||||
return cp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set all bits read by a given context operation to unknown
|
||||
*
|
||||
* @param cop the context operation
|
||||
* @return the result
|
||||
* @see AssemblyPatternBlock#maskOut(ContextOp)
|
||||
*/
|
||||
public AssemblyResolvedConstructor maskOut(ContextOp cop) {
|
||||
public AssemblyResolvedPatterns maskOut(ContextOp cop) {
|
||||
AssemblyPatternBlock newCtx = this.ctx.maskOut(cop);
|
||||
return new AssemblyResolvedConstructor(description, children, ins, newCtx, backfills,
|
||||
forbids);
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, newCtx,
|
||||
backfills, forbids);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply as many backfill records as possible
|
||||
*
|
||||
* <p>
|
||||
* Each backfill record is resolved in turn, if the record cannot be resolved, it remains
|
||||
* listed. If the record can be resolved, but it conflicts, an error record is returned. Each
|
||||
* time a record is resolved and combined successfully, all remaining records are tried again.
|
||||
* The result is the combined resolved backfills, with only the unresolved backfill records
|
||||
* listed.
|
||||
*
|
||||
* @param solver the solver, usually the same as the original attempt to solve.
|
||||
* @param vals the values.
|
||||
* @return the result, or an error.
|
||||
@ -373,15 +427,15 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
return this;
|
||||
}
|
||||
|
||||
AssemblyResolvedConstructor res = this;
|
||||
AssemblyResolvedPatterns res = this;
|
||||
loop: while (true) {
|
||||
for (AssemblyResolvedBackfill bf : res.backfills) {
|
||||
AssemblyResolution ar = bf.solve(solver, vals, this);
|
||||
if (ar.isError()) {
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyResolvedConstructor check = res.combineLessBackfill(rc, bf);
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar;
|
||||
AssemblyResolvedPatterns check = res.combineLessBackfill(rc, bf);
|
||||
if (check == null) {
|
||||
return AssemblyResolution.error("Conflict: Backfill " + bf.description, res);
|
||||
}
|
||||
@ -399,6 +453,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Check if this resolution has pending backfills to apply
|
||||
*
|
||||
* @return true if there are backfills
|
||||
*/
|
||||
public boolean hasBackfills() {
|
||||
@ -407,6 +462,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Check if this resolution includes forbidden patterns
|
||||
*
|
||||
* @return true if there are forbidden patterns
|
||||
*/
|
||||
private boolean hasForbids() {
|
||||
@ -416,43 +472,48 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
/**
|
||||
* Solve and apply context changes in reverse to forbidden patterns
|
||||
*
|
||||
* To avoid circumstances where a context change during disassembly would invoke a more
|
||||
* specific subconstructor than was used to assembly the instruction, we must solve the
|
||||
* forbidden patterns in tandem with the overall resolution. If the context of any forbidden
|
||||
* pattern cannot be solved, we simply drop the forbidden pattern -- the lack of a solution
|
||||
* implies there is no way the context change could produce the forbidden pattern.
|
||||
* <p>
|
||||
* To avoid circumstances where a context change during disassembly would invoke a more specific
|
||||
* sub-constructor than was used to assembly the instruction, we must solve the forbidden
|
||||
* patterns in tandem with the overall resolution. If the context of any forbidden pattern
|
||||
* cannot be solved, we simply drop the forbidden pattern -- the lack of a solution implies
|
||||
* there is no way the context change could produce the forbidden pattern.
|
||||
*
|
||||
* @param sem the constructor whose context changes to solve
|
||||
* @param vals any defined symbols
|
||||
* @param opvals the operand values
|
||||
* @return the result
|
||||
* @see AssemblyConstructorSemantic#solveContextChanges(AssemblyResolvedConstructor, Map, Map)
|
||||
* @see AssemblyConstructorSemantic#solveContextChanges(AssemblyResolvedPatterns, Map, Map)
|
||||
*/
|
||||
public AssemblyResolvedConstructor solveContextChangesForForbids(
|
||||
AssemblyConstructorSemantic sem, Map<String, Long> vals, Map<Integer, Object> opvals) {
|
||||
public AssemblyResolvedPatterns solveContextChangesForForbids(
|
||||
AssemblyConstructorSemantic sem, Map<String, Long> vals) {
|
||||
if (!hasForbids()) {
|
||||
return this;
|
||||
}
|
||||
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>();
|
||||
for (AssemblyResolvedConstructor f : this.forbids) {
|
||||
AssemblyResolution t = sem.solveContextChanges(f, vals, opvals);
|
||||
if (!(t instanceof AssemblyResolvedConstructor)) {
|
||||
Set<AssemblyResolvedPatterns> newForbids = new HashSet<>();
|
||||
for (AssemblyResolvedPatterns f : this.forbids) {
|
||||
AssemblyResolution t = sem.solveContextChanges(f, vals);
|
||||
if (!(t instanceof AssemblyResolvedPatterns)) {
|
||||
// Can't be solved, so it can be dropped
|
||||
continue;
|
||||
}
|
||||
newForbids.add((AssemblyResolvedConstructor) t);
|
||||
newForbids.add((AssemblyResolvedPatterns) t);
|
||||
}
|
||||
return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills,
|
||||
Collections.unmodifiableSet(newForbids));
|
||||
return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx,
|
||||
backfills, Collections.unmodifiableSet(newForbids));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the length of the instruction encoding
|
||||
*
|
||||
* <p>
|
||||
* This is used to ensure each operand is encoded at the correct offset
|
||||
* @return the length of the instruction block
|
||||
*
|
||||
* NOTE: this DOES include the offset
|
||||
* NOTE: this DOES include pending backfills
|
||||
* <p>
|
||||
* <b>NOTE:</b> this DOES include the offset<br>
|
||||
* <b>NOTE:</b> this DOES include pending backfills
|
||||
*
|
||||
* @return the length of the instruction block
|
||||
*/
|
||||
public int getInstructionLength() {
|
||||
int inslen = ins.length();
|
||||
@ -464,10 +525,12 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Get the length of the instruction encoding, excluding trailing undefined bytes
|
||||
* @return the length of the defined bytes in the instruction block
|
||||
*
|
||||
* NOTE: this DOES include the offset
|
||||
* NOTE: this DOES NOT include pending backfills
|
||||
* <p>
|
||||
* <b>NOTE:</b> this DOES include the offset<br>
|
||||
* <b>NOTE:</b> this DOES NOT include pending backfills
|
||||
*
|
||||
* @return the length of the defined bytes in the instruction block
|
||||
*/
|
||||
public int getDefinedInstructionLength() {
|
||||
byte[] imsk = ins.getMask();
|
||||
@ -482,6 +545,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Get the instruction block
|
||||
*
|
||||
* @return the instruction block
|
||||
*/
|
||||
public AssemblyPatternBlock getInstruction() {
|
||||
@ -490,6 +554,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Get the context block
|
||||
*
|
||||
* @return the context block
|
||||
*/
|
||||
public AssemblyPatternBlock getContext() {
|
||||
@ -498,6 +563,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Decode a portion of the instruction block
|
||||
*
|
||||
* @param start the first byte to decode
|
||||
* @param len the number of bytes to decode
|
||||
* @return the read masked value
|
||||
@ -509,6 +575,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
/**
|
||||
* Decode a portion of the context block
|
||||
*
|
||||
* @param start the first byte to decode
|
||||
* @param len the number of bytes to decode
|
||||
* @return the read masked value
|
||||
@ -543,7 +610,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
sb.append(indent);
|
||||
sb.append("backfill: " + bf + "\n");
|
||||
}
|
||||
for (AssemblyResolvedConstructor f : forbids) {
|
||||
for (AssemblyResolvedPatterns f : forbids) {
|
||||
sb.append(indent);
|
||||
sb.append("forbidden: " + f + "\n");
|
||||
}
|
||||
@ -556,20 +623,18 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
* Used for testing and diagnostics: list the constructor line numbers used to resolve this
|
||||
* encoding
|
||||
*
|
||||
* <p>
|
||||
* This includes braces to describe the tree structure
|
||||
*
|
||||
* @see ConstructState#dumpConstructorTree()
|
||||
* @return the constructor tree
|
||||
*/
|
||||
public String dumpConstructorTree() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
// TODO: HACK, but diagnostic
|
||||
Matcher mat = pat.matcher(description);
|
||||
if (mat.find()) {
|
||||
sb.append(mat.group(1));
|
||||
}
|
||||
else {
|
||||
if (cons == null) {
|
||||
return null;
|
||||
}
|
||||
sb.append(cons.getSourceFile() + ":" + cons.getLineno());
|
||||
|
||||
if (children == null) {
|
||||
return sb.toString();
|
||||
@ -577,8 +642,8 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
|
||||
List<String> subs = new ArrayList<>();
|
||||
for (AssemblyResolution c : children) {
|
||||
if (c instanceof AssemblyResolvedConstructor) {
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) c;
|
||||
if (c instanceof AssemblyResolvedPatterns) {
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) c;
|
||||
String s = rc.dumpConstructorTree();
|
||||
if (s != null) {
|
||||
subs.add(s);
|
||||
@ -598,7 +663,9 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
/**
|
||||
* Count the number of bits specified in the resolution patterns
|
||||
*
|
||||
* <p>
|
||||
* Totals the specificity of the instruction and context pattern blocks.
|
||||
*
|
||||
* @return the number of bits in the resulting patterns
|
||||
* @see AssemblyPatternBlock#getSpecificity()
|
||||
*/
|
||||
@ -609,33 +676,34 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
/**
|
||||
* Get an iterable over all the possible fillings of the instruction pattern given a context
|
||||
*
|
||||
* <p>
|
||||
* This is meant to be used idiomatically, as in an enhanced for loop:
|
||||
*
|
||||
* <pre>
|
||||
* {@code
|
||||
* for (byte[] ins : rcon.possibleInsVals(ctx)) {
|
||||
* System.out.println(format(ins));
|
||||
* }
|
||||
* System.out.println(format(ins));
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* This is similar to calling
|
||||
* {@link #getInstruction()}.{@link AssemblyPatternBlock#possibleVals()}, <em>but</em> with
|
||||
* forbidden patterns removed. A context is required so that only those forbidden patterns
|
||||
* matching the given context are actually removed. This method should always be preferred to
|
||||
* the sequence mentioned above, since {@link AssemblyPatternBlock#possibleVals()} on its own
|
||||
* may yield bytes that do not produce the desired instruction.
|
||||
* may yield bytes that do not produce the desired instruction.
|
||||
*
|
||||
* NOTE: The implementation is based on {@link AssemblyPatternBlock#possibleVals()}, so be
|
||||
* aware that a single array is reused for each iterate. You should not retain a pointer to the
|
||||
* array, but rather make a copy.
|
||||
* <p>
|
||||
* <b>NOTE:</b> The implementation is based on {@link AssemblyPatternBlock#possibleVals()}, so
|
||||
* be aware that a single array is reused for each iterate. You should not retain a pointer to
|
||||
* the array, but rather make a copy.
|
||||
*
|
||||
* @param forCtx the context at the assembly address
|
||||
* @return the iterable
|
||||
*/
|
||||
public Iterable<byte[]> possibleInsVals(AssemblyPatternBlock forCtx) {
|
||||
Predicate<byte[]> removeForbidden = (byte[] val) -> {
|
||||
for (AssemblyResolvedConstructor f : forbids) {
|
||||
for (AssemblyResolvedPatterns f : forbids) {
|
||||
// If the forbidden length is larger than us, we can ignore it
|
||||
if (f.getDefinedInstructionLength() > val.length) {
|
||||
continue;
|
||||
@ -663,4 +731,73 @@ public class AssemblyResolvedConstructor extends AssemblyResolution {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
protected static int getOpIndex(String piece) {
|
||||
if (piece.charAt(0) != '\n') {
|
||||
return -1;
|
||||
}
|
||||
return piece.charAt(1) - 'A';
|
||||
}
|
||||
|
||||
/**
|
||||
* If the construct state is a {@code ^instruction} or other purely-recursive constructor, get
|
||||
* its single child.
|
||||
*
|
||||
* @param state the parent state
|
||||
* @return the child state if recursive, or null
|
||||
*/
|
||||
protected static ConstructState getPureRecursion(ConstructState state) {
|
||||
// NB. There can be other operands, but only one can be printed
|
||||
// Furthermore, nothing else can be printed, whether an operand or not
|
||||
List<String> pieces = state.getConstructor().getPrintPieces();
|
||||
if (pieces.size() != 1) {
|
||||
return null;
|
||||
}
|
||||
int opIdx = getOpIndex(pieces.get(0));
|
||||
if (opIdx < 0) {
|
||||
return null;
|
||||
}
|
||||
ConstructState sub = state.getSubState(opIdx);
|
||||
if (sub == null || sub.getConstructor() == null ||
|
||||
sub.getConstructor().getParent() != state.getConstructor().getParent()) {
|
||||
// not recursive
|
||||
return null;
|
||||
}
|
||||
return sub;
|
||||
}
|
||||
|
||||
public boolean equivalentConstructState(ConstructState state) {
|
||||
ConstructState rec = getPureRecursion(state);
|
||||
if (rec != null) {
|
||||
if (state.getConstructor() == cons) {
|
||||
assert children.size() == 1;
|
||||
AssemblyResolvedPatterns recRes = (AssemblyResolvedPatterns) children.get(0);
|
||||
return recRes.equivalentConstructState(rec);
|
||||
}
|
||||
return equivalentConstructState(rec);
|
||||
}
|
||||
if (state.getConstructor() != cons) {
|
||||
return false;
|
||||
}
|
||||
int opCount = cons.getNumOperands();
|
||||
for (int opIdx = 0; opIdx < opCount; opIdx++) {
|
||||
OperandSymbol opSym = cons.getOperand(opIdx);
|
||||
Set<Integer> printed =
|
||||
Arrays.stream(cons.getOpsPrintOrder()).boxed().collect(Collectors.toSet());
|
||||
if (!(opSym.getDefiningSymbol() instanceof SubtableSymbol)) {
|
||||
AssemblyTreeResolver.DBG.println("Operand " + opSym + " is not a sub-table");
|
||||
continue;
|
||||
}
|
||||
if (!printed.contains(opIdx)) {
|
||||
AssemblyTreeResolver.DBG.println("Operand " + opSym + " is hidden");
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedPatterns child = (AssemblyResolvedPatterns) children.get(opIdx);
|
||||
ConstructState subState = state.getSubState(opIdx);
|
||||
if (!child.equivalentConstructState(subState)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
@ -16,27 +16,32 @@
|
||||
package ghidra.app.plugin.assembler.sleigh.sem;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.collections4.IteratorUtils;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.SleighAssemblerBuilder;
|
||||
import ghidra.app.plugin.assembler.sleigh.expr.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AbstractAssemblyStateGenerator.GeneratorContext;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolutionResults.Applicator;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
|
||||
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
|
||||
import ghidra.app.plugin.processors.sleigh.Constructor;
|
||||
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
|
||||
import ghidra.app.plugin.processors.sleigh.*;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.*;
|
||||
import ghidra.program.model.address.Address;
|
||||
import ghidra.program.model.lang.InsufficientBytesException;
|
||||
import ghidra.program.model.lang.UnknownInstructionException;
|
||||
import ghidra.program.model.mem.ByteMemBufferImpl;
|
||||
import ghidra.program.model.mem.MemBuffer;
|
||||
|
||||
/**
|
||||
* The workhorse of semantic resolution for the assembler
|
||||
*
|
||||
* <p>
|
||||
* This class takes a parse tree and some additional information (start address, context, etc.) and
|
||||
* attempts to determine possible encodings using the semantics associated with each branch of the
|
||||
* given parse tree. Details of this process are described in {@link SleighAssemblerBuilder}.
|
||||
@ -44,34 +49,34 @@ import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
|
||||
* @see SleighAssemblerBuilder
|
||||
*/
|
||||
public class AssemblyTreeResolver {
|
||||
protected static final RecursiveDescentSolver solver = RecursiveDescentSolver.getSolver();
|
||||
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
|
||||
protected static final RecursiveDescentSolver SOLVER = RecursiveDescentSolver.getSolver();
|
||||
protected static final DbgTimer DBG = DbgTimer.INACTIVE;
|
||||
|
||||
public static final String INST_START = "inst_start";
|
||||
public static final String INST_NEXT = "inst_next";
|
||||
|
||||
protected final SleighLanguage lang;
|
||||
protected final long instStart;
|
||||
protected final Address at;
|
||||
protected final Map<String, Long> vals = new HashMap<>();
|
||||
protected final AssemblyParseBranch tree;
|
||||
protected final AssemblyGrammar grammar;
|
||||
protected final AssemblyPatternBlock context;
|
||||
protected final AssemblyContextGraph ctxGraph;
|
||||
|
||||
public static final String INST_START = "inst_start";
|
||||
public static final String INST_NEXT = "inst_next";
|
||||
|
||||
/**
|
||||
* Construct a resolver for the given parse tree
|
||||
*
|
||||
* @param lang
|
||||
* @param instStart the byte offset where the instruction will start
|
||||
* @param at the address where the instruction will start
|
||||
* @param tree the parse tree
|
||||
* @param context the context expected at {@code instStart}
|
||||
* @param ctxGraph the context transition graph used to resolve purely-recursive productions
|
||||
*/
|
||||
public AssemblyTreeResolver(SleighLanguage lang, long instStart, AssemblyParseBranch tree,
|
||||
public AssemblyTreeResolver(SleighLanguage lang, Address at, AssemblyParseBranch tree,
|
||||
AssemblyPatternBlock context, AssemblyContextGraph ctxGraph) {
|
||||
this.lang = lang;
|
||||
this.instStart = instStart;
|
||||
this.vals.put(INST_START, lang.getDefaultSpace().getAddressableWordOffset(instStart));
|
||||
this.at = at;
|
||||
this.vals.put(INST_START, at.getAddressableWordOffset());
|
||||
this.tree = tree;
|
||||
this.grammar = tree.getGrammar();
|
||||
this.context = context.fillMask();
|
||||
@ -84,77 +89,324 @@ public class AssemblyTreeResolver {
|
||||
* @return a set of resolutions (encodings and errors)
|
||||
*/
|
||||
public AssemblyResolutionResults resolve() {
|
||||
AssemblyResolutionResults results = resolveBranch(tree);
|
||||
AssemblyResolutionResults ret = new AssemblyResolutionResults();
|
||||
for (AssemblyResolution ar : results) {
|
||||
assert !(ar instanceof AssemblyResolvedBackfill);
|
||||
if (ar.isError()) {
|
||||
ret.add(ar);
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
|
||||
vals.put(INST_NEXT, lang.getDefaultSpace().getAddressableWordOffset(
|
||||
instStart + rc.getInstructionLength()));
|
||||
if (rc.hasBackfills()) {
|
||||
dbg.println("Backfilling: " + rc);
|
||||
}
|
||||
ar = rc.backfill(solver, vals);
|
||||
dbg.println("Backfilled final: " + ar);
|
||||
if (ar.isError()) {
|
||||
ret.add(ar);
|
||||
continue;
|
||||
}
|
||||
rc = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyResolvedPatterns empty = AssemblyResolution.nop("Empty");
|
||||
AssemblyConstructStateGenerator rootGen =
|
||||
new AssemblyConstructStateGenerator(this, tree, empty);
|
||||
|
||||
if (rc.hasBackfills()) {
|
||||
ret.add(AssemblyResolution.error("Solution is incomplete", "failed backfill",
|
||||
List.of(rc)));
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedConstructor ctx =
|
||||
AssemblyResolution.contextOnly(context, "Selecting context", null);
|
||||
AssemblyResolvedConstructor check = rc.combine(ctx);
|
||||
if (null == check) {
|
||||
ret.add(AssemblyResolution.error("Incompatible context", "resolving",
|
||||
List.of(rc)));
|
||||
continue;
|
||||
}
|
||||
rc = check;
|
||||
Collection<AssemblyResolvedError> errors = new ArrayList<>();
|
||||
Stream<AssemblyGeneratedPrototype> protStream =
|
||||
rootGen.generate(new GeneratorContext(List.of(), 0));
|
||||
|
||||
AssemblyResolution fcheck = rc.checkNotForbidden();
|
||||
if (fcheck.isError()) {
|
||||
ret.add(fcheck);
|
||||
continue;
|
||||
if (DBG == DbgTimer.ACTIVE) {
|
||||
try (DbgCtx dc = DBG.start("Prototypes:")) {
|
||||
protStream = protStream.map(prot -> {
|
||||
DBG.println(prot);
|
||||
return prot;
|
||||
}).collect(Collectors.toList()).stream();
|
||||
}
|
||||
rc = (AssemblyResolvedConstructor) fcheck;
|
||||
|
||||
ret.add(rc);
|
||||
}
|
||||
return ret;
|
||||
|
||||
Stream<AssemblyResolvedPatterns> patStream =
|
||||
protStream.map(p -> p.state).distinct().flatMap(s -> s.resolve(empty, errors));
|
||||
|
||||
AssemblyResolutionResults results = new AssemblyResolutionResults();
|
||||
patStream.forEach(results::add);
|
||||
|
||||
results = resolveRootRecursion(results);
|
||||
results = resolvePendingBackfills(results);
|
||||
results = selectContext(results);
|
||||
// TODO: Remove this? It's subsumed by filterByDisassembly, and more accurately....
|
||||
results = filterForbidden(results);
|
||||
results = filterByDisassembly(results);
|
||||
results.addAll(errors);
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a branch of the parse tree
|
||||
* If applicable, get the {@code I => I} production of the grammar
|
||||
*
|
||||
* @param branch the branch
|
||||
* @return the intermediate results
|
||||
* @return the production
|
||||
*/
|
||||
protected AssemblyResolutionResults resolveBranch(AssemblyParseBranch branch) {
|
||||
AssemblyProduction prod = branch.getProduction();
|
||||
AssemblyNonTerminal lhs = prod.getLHS();
|
||||
AssemblyProduction rec = grammar.getPureRecursion(lhs);
|
||||
// Currently, the assembler only allows recursion at the root.
|
||||
// Otherwise, the input context cannot be known.
|
||||
if (rec != null && branch.getParent() == null) {
|
||||
return resolveBranchRecursive(branch, rec);
|
||||
protected AssemblyProduction getRootRecursion() {
|
||||
assert tree.getParent() == null;
|
||||
AssemblyProduction rootProd = tree.getProduction();
|
||||
AssemblyNonTerminal start = rootProd.getLHS();
|
||||
AssemblyProduction rec = grammar.getPureRecursion(start);
|
||||
return rec;
|
||||
}
|
||||
|
||||
/**
|
||||
* If necessary, resolve recursive constructors at the root, usually for prefixes
|
||||
*
|
||||
* <p>
|
||||
* If there are no pure recursive constructors at the root, then this simply returns
|
||||
* {@code temp} unmodified.
|
||||
*
|
||||
* @param temp the resolved root results
|
||||
* @return the results with pure recursive constructors applied to obtain a compatible context
|
||||
*/
|
||||
// Ugh, public so I can refer to it in javadocs...
|
||||
public AssemblyResolutionResults resolveRootRecursion(AssemblyResolutionResults temp) {
|
||||
AssemblyProduction rootRec = getRootRecursion();
|
||||
if (rootRec == null) {
|
||||
return temp;
|
||||
}
|
||||
return resolveBranchNonRecursive(branch);
|
||||
try (DbgCtx dc = DBG.start("Resolving root recursion:")) {
|
||||
AssemblyResolutionResults result = new AssemblyResolutionResults();
|
||||
|
||||
for (AssemblyResolution ar : temp) {
|
||||
if (ar.isError()) {
|
||||
result.add(ar);
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar;
|
||||
AssemblyPatternBlock dst = rc.getContext();
|
||||
// TODO: The desired context may need to be passed in. For now, just take start.
|
||||
AssemblyPatternBlock src = context; // NOTE: This is only correct for "instruction"
|
||||
String table = "instruction";
|
||||
|
||||
DBG.println("Finding paths from " + src + " to " + ar.lineToString());
|
||||
Collection<Deque<AssemblyConstructorSemantic>> paths =
|
||||
ctxGraph.computeOptimalApplications(src, table, dst, table);
|
||||
DBG.println("Found " + paths.size());
|
||||
for (Deque<AssemblyConstructorSemantic> path : paths) {
|
||||
DBG.println(" " + path);
|
||||
result.absorb(applyRecursionPath(path, tree, rootRec, rc));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt a second time to solve operands and context changes
|
||||
*
|
||||
* <p>
|
||||
* Backfills that depended on {@code inst_next} should now easily be solved, since the
|
||||
* instruction length is now known.
|
||||
*
|
||||
* @param temp the resolved results, with backfill pending
|
||||
* @return the results without backfill, possible with new errors
|
||||
*/
|
||||
protected AssemblyResolutionResults resolvePendingBackfills(AssemblyResolutionResults temp) {
|
||||
return temp.apply(rc -> {
|
||||
if (!rc.hasBackfills()) {
|
||||
return rc;
|
||||
}
|
||||
vals.put(INST_NEXT, at.add(rc.getInstructionLength()).getAddressableWordOffset());
|
||||
DBG.println("Backfilling: " + rc);
|
||||
AssemblyResolution ar = rc.backfill(SOLVER, vals);
|
||||
DBG.println("Backfilled final: " + ar);
|
||||
return ar;
|
||||
}).apply(rc -> {
|
||||
if (rc.hasBackfills()) {
|
||||
return AssemblyResolution.error("Solution is incomplete", "failed backfill",
|
||||
List.of(rc), null);
|
||||
}
|
||||
return rc;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter out results whose context do not match that requested
|
||||
*
|
||||
* @param temp the results whose contexts have not yet been checked
|
||||
* @return the results that pass. Those that do not are replaced with errors.
|
||||
*/
|
||||
protected AssemblyResolutionResults selectContext(AssemblyResolutionResults temp) {
|
||||
AssemblyResolvedPatterns ctx =
|
||||
AssemblyResolution.contextOnly(context, "Selecting context");
|
||||
return temp.apply(rc -> {
|
||||
AssemblyResolvedPatterns check = rc.combine(ctx);
|
||||
if (null == check) {
|
||||
return AssemblyResolution.error("Incompatible context", "resolving", List.of(rc),
|
||||
null);
|
||||
}
|
||||
return check;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter out results that would certainly be disassembled differently than assembled
|
||||
*
|
||||
* <p>
|
||||
* Because of constructor precedence rules, it is possible to assemble a pattern from a
|
||||
* prototype that would not result in equivalent disassembly. This can be detected in some cases
|
||||
* via the "forbids" mechanism, where more specific constructors are recorded with the result.
|
||||
* If the generated pattern matches on of those more-specific constructors, it is forbidden.
|
||||
*
|
||||
* @param temp the results whose forbids have not yet been checked
|
||||
* @return the results that pass. Those that do not are replaced with errors.
|
||||
*/
|
||||
protected AssemblyResolutionResults filterForbidden(AssemblyResolutionResults temp) {
|
||||
return temp.apply(rc -> rc.checkNotForbidden());
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter out results that get disassembled differently than assembled
|
||||
*
|
||||
* <p>
|
||||
* The forbids mechanism is not perfect, so as a final fail safe, we disassemble the result and
|
||||
* compare the prototypes.
|
||||
*
|
||||
* @param temp the results whose disassemblies have not yet been checked
|
||||
* @return the results that pass. Those that do not are replaced with errors.
|
||||
*/
|
||||
protected AssemblyResolutionResults filterByDisassembly(AssemblyResolutionResults temp) {
|
||||
AssemblyDefaultContext asmCtx = new AssemblyDefaultContext(lang);
|
||||
asmCtx.setContextRegister(context);
|
||||
return temp.apply(rc -> {
|
||||
MemBuffer buf =
|
||||
new ByteMemBufferImpl(at, rc.getInstruction().getVals(), lang.isBigEndian());
|
||||
try {
|
||||
SleighInstructionPrototype ip =
|
||||
(SleighInstructionPrototype) lang.parse(buf, asmCtx, false);
|
||||
if (!rc.equivalentConstructState(ip.getRootState())) {
|
||||
return AssemblyResolution.error("Disassembly prototype mismatch", rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
catch (InsufficientBytesException | UnknownInstructionException e) {
|
||||
return AssemblyResolution.error("Disassembly failed: " + e.getMessage(), rc);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the state generator for a given operand and parse tree node
|
||||
*
|
||||
* @param opSym the operand symbol
|
||||
* @param node the corresponding parse tree node, possibly null indicating a hidden operand
|
||||
* @param fromLeft the accumulated patterns from the left sibling or parent
|
||||
* @return the generator
|
||||
*/
|
||||
protected AbstractAssemblyStateGenerator<?> getStateGenerator(OperandSymbol opSym,
|
||||
AssemblyParseTreeNode node, AssemblyResolvedPatterns fromLeft) {
|
||||
if (node == null) {
|
||||
return getHiddenStateGenerator(opSym, fromLeft);
|
||||
}
|
||||
if (node.isNumeric()) {
|
||||
return new AssemblyOperandStateGenerator(this, (AssemblyParseNumericToken) node, opSym,
|
||||
fromLeft);
|
||||
}
|
||||
if (node.isConstructor()) {
|
||||
return new AssemblyConstructStateGenerator(this, (AssemblyParseBranch) node, fromLeft);
|
||||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the state generator for a hidden operand
|
||||
*
|
||||
* @param opSym the operand symbol
|
||||
* @param fromLeft the accumulated patterns from the left sibling or parent
|
||||
* @return the generator
|
||||
*/
|
||||
protected AbstractAssemblyStateGenerator<?> getHiddenStateGenerator(OperandSymbol opSym,
|
||||
AssemblyResolvedPatterns fromLeft) {
|
||||
TripleSymbol defSym = opSym.getDefiningSymbol();
|
||||
if (defSym instanceof SubtableSymbol) {
|
||||
return new AssemblyHiddenConstructStateGenerator(this, (SubtableSymbol) defSym,
|
||||
fromLeft);
|
||||
}
|
||||
return new AssemblyNopStateGenerator(this, opSym, fromLeft);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a constructor pattern
|
||||
*
|
||||
* <p>
|
||||
* TODO: This is currently used only for resolving recursion. Could this be factored with
|
||||
* {@link AssemblyConstructState#resolve(AssemblyResolvedPatterns, Collection)}?
|
||||
*
|
||||
* @param sem the SLEIGH constructor
|
||||
* @param shift the shift
|
||||
* @param fromChildren the results from the single resolved child
|
||||
* @return the results
|
||||
*/
|
||||
protected AssemblyResolutionResults resolvePatterns(AssemblyConstructorSemantic sem, int shift,
|
||||
AssemblyResolutionResults fromChildren) {
|
||||
AssemblyResolutionResults results = fromChildren;
|
||||
results = applyMutations(sem, results);
|
||||
results = applyPatterns(sem, shift, results);
|
||||
results = tryResolveBackfills(results);
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO: Can this be factored?
|
||||
*/
|
||||
protected AssemblyResolutionResults parent(String description, AssemblyResolutionResults temp,
|
||||
int opCount) {
|
||||
return temp.stream()
|
||||
.map(r -> r.parent(description, opCount))
|
||||
.collect(Collectors.toCollection(AssemblyResolutionResults::new));
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO: This is currently used only for resolving recursion. Could this be factored with
|
||||
* {@link AssemblyConstructState#resolveMutations(AssemblyResolvedPatterns, Collection)}?
|
||||
*/
|
||||
protected AssemblyResolutionResults applyMutations(AssemblyConstructorSemantic sem,
|
||||
AssemblyResolutionResults temp) {
|
||||
DBG.println("Applying context mutations:");
|
||||
return temp.apply(rc -> {
|
||||
DBG.println("Current: " + rc.lineToString());
|
||||
AssemblyResolution backctx = sem.solveContextChanges(rc, vals);
|
||||
DBG.println("Mutated: " + backctx.lineToString());
|
||||
return backctx;
|
||||
}).apply(rc -> {
|
||||
return rc.solveContextChangesForForbids(sem, vals);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO: This is currently used only for resolving recursion. Could this be factored with
|
||||
* {@link AssemblyConstructState#resolvePatterns(AssemblyResolvedPatterns, Collection)}?
|
||||
*/
|
||||
protected AssemblyResolutionResults applyPatterns(AssemblyConstructorSemantic sem, int shift,
|
||||
AssemblyResolutionResults temp) {
|
||||
DBG.println("Applying patterns:");
|
||||
Collection<AssemblyResolvedPatterns> patterns =
|
||||
sem.getPatterns().stream().map(p -> p.shift(shift)).collect(Collectors.toList());
|
||||
return temp.apply(new Applicator() {
|
||||
@Override
|
||||
public Iterable<? extends AssemblyResolution> getPatterns(
|
||||
AssemblyResolvedPatterns cur) {
|
||||
return patterns;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolvedPatterns setRight(AssemblyResolvedPatterns res,
|
||||
AssemblyResolvedPatterns cur) {
|
||||
// This is typically applied by parent, so don't insert sibling
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describeError(AssemblyResolvedPatterns rc, AssemblyResolution pat) {
|
||||
return "The patterns conflict " + pat.lineToString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolvedPatterns combineBackfill(AssemblyResolvedPatterns cur,
|
||||
AssemblyResolvedBackfill bf) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AssemblyResolution finish(AssemblyResolvedPatterns resolved) {
|
||||
return resolved.checkNotForbidden();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply constructors as indicated by a path returned by the context resolution graph
|
||||
*
|
||||
* Please note: The path given will be emptied during processing.
|
||||
* <p>
|
||||
* <b>NOTE:</b> The given path will be emptied during processing.
|
||||
*
|
||||
* @param path the path to apply
|
||||
* @param branch the branch corresponding to the production whose LHS has a purely-recursive
|
||||
@ -164,265 +416,37 @@ public class AssemblyTreeResolver {
|
||||
* @return the results
|
||||
*/
|
||||
protected AssemblyResolutionResults applyRecursionPath(Deque<AssemblyConstructorSemantic> path,
|
||||
AssemblyParseBranch branch, AssemblyProduction rec, AssemblyResolvedConstructor child) {
|
||||
AssemblyParseBranch branch, AssemblyProduction rec, AssemblyResolvedPatterns child) {
|
||||
/*
|
||||
* A constructor may have multiple patterns, so I cannot assume I will get at most one
|
||||
* output at each constructor in the path. Start (1) collecting all the results, then (2)
|
||||
* filter out and report the errors, then (3) feed successful resolutions into the next
|
||||
* constructor in the path (or finish).
|
||||
*/
|
||||
AssemblyResolutionResults result = new AssemblyResolutionResults();
|
||||
AssemblyResolutionResults collected = new AssemblyResolutionResults();
|
||||
Set<AssemblyResolvedConstructor> intoNext = new LinkedHashSet<>();
|
||||
intoNext.add(child);
|
||||
AssemblyResolutionResults results = new AssemblyResolutionResults();
|
||||
results.add(child);
|
||||
while (!path.isEmpty()) {
|
||||
AssemblyConstructorSemantic sem = path.pollLast();
|
||||
List<AssemblyParseTreeNode> substs = List.of((AssemblyParseTreeNode) branch);
|
||||
// 1
|
||||
for (final AssemblyResolvedConstructor res : intoNext) {
|
||||
List<AssemblyResolvedConstructor> sel = List.of(res);
|
||||
collected.absorb(resolveSelectedChildren(rec, substs, sel, List.of(sem)));
|
||||
}
|
||||
intoNext.clear();
|
||||
// 2
|
||||
for (AssemblyResolution res : collected) {
|
||||
if (res.isError()) {
|
||||
result.add(res);
|
||||
}
|
||||
else { // 3
|
||||
intoNext.add((AssemblyResolvedConstructor) res);
|
||||
}
|
||||
|
||||
int opIdx = sem.getOperandIndex(0);
|
||||
Constructor cons = sem.getConstructor();
|
||||
OperandSymbol opSym = cons.getOperand(opIdx);
|
||||
if (-1 != opSym.getOffsetBase()) {
|
||||
throw new AssertionError("TODO");
|
||||
}
|
||||
int offset = opSym.getRelativeOffset();
|
||||
results = parent("Resolving recursive constructor: " + cons.getSourceFile() + ":" +
|
||||
cons.getLineno(), results, 1);
|
||||
results = results.apply(rc -> rc.shift(offset));
|
||||
results = resolvePatterns(sem, 0, results).apply(rc -> rc.withConstructor(cons));
|
||||
}
|
||||
result.addAll(intoNext);
|
||||
return result;
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a branch where the production's LHS has a purely-recursive definition
|
||||
*
|
||||
* @param branch the branch
|
||||
* @param rec the purely-recursive definition
|
||||
* @return the results
|
||||
* TODO: This is currently used only for resolving recursion. It seems its missing from the
|
||||
* refactor?
|
||||
*/
|
||||
protected AssemblyResolutionResults resolveBranchRecursive(AssemblyParseBranch branch,
|
||||
AssemblyProduction rec) {
|
||||
// TODO: There's probably a clever trick regarding since-constructor productions
|
||||
// And short-circuiting once a compatible recursive rule is found.
|
||||
try (DbgCtx dc = dbg.start("Resolving (recursive) branch: " + branch.getProduction())) {
|
||||
AssemblyResolutionResults result = new AssemblyResolutionResults();
|
||||
|
||||
for (AssemblyResolution ar : resolveBranchNonRecursive(branch)) {
|
||||
if (ar.isError()) {
|
||||
result.add(ar);
|
||||
continue;
|
||||
}
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyPatternBlock dst = rc.getContext();
|
||||
// TODO: The desired context may need to be passed in. For now, just take start.
|
||||
AssemblyPatternBlock src = context; // TODO: This is only correct for "instruction"
|
||||
String table = branch.getProduction().getName();
|
||||
|
||||
dbg.println("Finding paths from " + context + " to " + ar.lineToString());
|
||||
Collection<Deque<AssemblyConstructorSemantic>> paths =
|
||||
ctxGraph.computeOptimalApplications(src, table, dst, table);
|
||||
dbg.println("Found " + paths.size());
|
||||
for (Deque<AssemblyConstructorSemantic> path : paths) {
|
||||
dbg.println(" " + path);
|
||||
result.absorb(applyRecursionPath(path, branch, rec, rc));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the given branch, having selected a particular combination of subconstructor results
|
||||
*
|
||||
* @param prod the production
|
||||
* @param substs the braches and tokens corrresponding to the symbols of the production's RHS
|
||||
* @param sel the selected subconstructor results
|
||||
* @param semantics the collection of possible constructors for this production
|
||||
* @return the results
|
||||
*/
|
||||
protected AssemblyResolutionResults resolveSelectedChildren(AssemblyProduction prod,
|
||||
List<AssemblyParseTreeNode> substs, List<AssemblyResolvedConstructor> sel,
|
||||
Collection<AssemblyConstructorSemantic> semantics) {
|
||||
|
||||
try (DbgCtx dc = dbg.start("Selecting: " + IteratorUtils.toString(sel.iterator(),
|
||||
(AssemblyResolvedConstructor rc) -> rc.lineToString()))) {
|
||||
AssemblyResolutionResults results = new AssemblyResolutionResults();
|
||||
|
||||
// Pre-check the combined contexts
|
||||
AssemblyPatternBlock combCtx = AssemblyPatternBlock.nop();
|
||||
for (AssemblyResolvedConstructor child : sel) {
|
||||
AssemblyPatternBlock check = combCtx.combine(child.getContext());
|
||||
if (null == check) {
|
||||
results.add(AssemblyResolution.error(
|
||||
"Incompatible context requirements among selected children",
|
||||
"Resolving " + prod, sel));
|
||||
return results;
|
||||
}
|
||||
combCtx = check;
|
||||
}
|
||||
dbg.println("Combined context: " + combCtx);
|
||||
|
||||
AssemblyResolvedConstructor res = AssemblyResolution.nop("Resolving " + prod, sel);
|
||||
|
||||
// OK, now that we have a requirement, seek constructors that are compatible.
|
||||
nextSem: for (AssemblyConstructorSemantic sem : semantics) {
|
||||
try (DbgCtx dc2 = dbg.start("Trying: " + sem)) {
|
||||
Constructor cons = sem.getConstructor();
|
||||
|
||||
// Gather the operand values (from non-constructor semantics)
|
||||
AssemblyResolvedConstructor subres =
|
||||
res.copyAppendDescription("Applying constructor: " + sem);
|
||||
|
||||
Map<Integer, Object> opvals = new HashMap<>();
|
||||
Iterator<Integer> opidxit = sem.getOperandIndexIterator();
|
||||
Iterator<AssemblyResolvedConstructor> selit = sel.iterator();
|
||||
for (int i = 0; i < prod.size(); i++) {
|
||||
AssemblyParseTreeNode child = substs.get(i);
|
||||
AssemblySymbol sym = prod.get(i);
|
||||
if (sym.takesOperandIndex()) {
|
||||
int opidx = opidxit.next();
|
||||
if (child.isNumeric()) {
|
||||
AssemblyParseNumericToken num = (AssemblyParseNumericToken) child;
|
||||
opvals.put(opidx, num.getNumericValue());
|
||||
}
|
||||
else if (child.isConstructor()) {
|
||||
opvals.put(opidx, selit.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now, work out how to write the operand values in
|
||||
opidxit = sem.getOperandIndexIterator();
|
||||
Iterator<AssemblyResolvedConstructor> subit = sel.iterator();
|
||||
for (int i = 0; i < prod.size(); i++) {
|
||||
AssemblyParseTreeNode child = substs.get(i);
|
||||
AssemblySymbol sym = prod.get(i);
|
||||
if (!sym.takesOperandIndex()) {
|
||||
continue;
|
||||
}
|
||||
dbg.println("Current: " + subres.lineToString());
|
||||
int opidx = opidxit.next();
|
||||
OperandSymbol subsym = cons.getOperand(opidx);
|
||||
int shift = computeOffset(subsym, cons, opvals);
|
||||
String symname = subsym.getName();
|
||||
dbg.println("Processing symbol: " + symname);
|
||||
if (child.isNumeric()) {
|
||||
int bitsize = 0;
|
||||
if (sym instanceof AssemblyNumericTerminal) {
|
||||
AssemblyNumericTerminal numeric = (AssemblyNumericTerminal) sym;
|
||||
bitsize = numeric.getBitSize();
|
||||
}
|
||||
Long opval = (Long) opvals.get(opidx); // delay unboxing until solving
|
||||
PatternExpression symexp = subsym.getDefiningExpression();
|
||||
if (symexp == null) {
|
||||
symexp = subsym.getDefiningSymbol().getPatternExpression();
|
||||
}
|
||||
String desc =
|
||||
"Solution to " + sym + " := " + Long.toHexString(opval) + " = " +
|
||||
symexp + " (immediate op:" + opidx + ",shift:" + shift + ")";
|
||||
dbg.println("Writing: " + desc);
|
||||
AssemblyResolution sol =
|
||||
solveOrBackfill(symexp, opval, bitsize, vals, opvals, null, desc);
|
||||
dbg.println("Solution: " + sol);
|
||||
if (null == sol) {
|
||||
throw new AssertionError("Who returned a null solution!? " +
|
||||
"Throw an exception or return an error result, please!");
|
||||
}
|
||||
if (sol.isError()) {
|
||||
AssemblyResolvedError err = (AssemblyResolvedError) sol;
|
||||
results.add(AssemblyResolution.error(err.getError(), subres));
|
||||
continue nextSem;
|
||||
}
|
||||
if (sol instanceof AssemblyResolvedConstructor) {
|
||||
AssemblyResolvedConstructor solcon =
|
||||
(AssemblyResolvedConstructor) sol;
|
||||
AssemblyResolvedConstructor check =
|
||||
subres.combine(solcon.shift(shift));
|
||||
if (null == check) {
|
||||
results.add(AssemblyResolution.error(
|
||||
"Conflict: Immediate operand (token " + i + ") " + sol,
|
||||
subres));
|
||||
continue nextSem;
|
||||
}
|
||||
subres = check;
|
||||
}
|
||||
else {
|
||||
AssemblyResolvedBackfill solbf = (AssemblyResolvedBackfill) sol;
|
||||
subres = subres.combine(solbf.shift(shift));
|
||||
}
|
||||
}
|
||||
else if (child.isConstructor()) {
|
||||
// Write the instruction pattern in, shifted
|
||||
AssemblyResolvedConstructor childrc = subit.next();
|
||||
dbg.println("Writing subtable(opidx:" + opidx + "): " + symname + ": " +
|
||||
childrc.lineToString() + " (shift:" + shift + ")");
|
||||
// I've already combined the contexts
|
||||
AssemblyResolvedConstructor check =
|
||||
subres.combine(childrc.shift(shift));
|
||||
if (null == check) {
|
||||
results.add(AssemblyResolution.error(
|
||||
"Conflict: Subtable operand (token " + i + ")", subres));
|
||||
continue nextSem;
|
||||
}
|
||||
subres = check;
|
||||
}
|
||||
else {
|
||||
dbg.println("Probably encountered a varnode production: " + child);
|
||||
}
|
||||
}
|
||||
|
||||
// Now, write out the proper requirements based on context mutations
|
||||
AssemblyResolution backctx = sem.solveContextChanges(subres, vals, opvals);
|
||||
if (!(backctx instanceof AssemblyResolvedConstructor)) {
|
||||
results.add(backctx);
|
||||
continue;
|
||||
}
|
||||
subres = (AssemblyResolvedConstructor) backctx;
|
||||
subres = subres.solveContextChangesForForbids(sem, vals, opvals);
|
||||
|
||||
// Now, write the actual instruction and context requirements from the constructor
|
||||
// patterns
|
||||
dbg.println("Writing patterns:");
|
||||
for (AssemblyResolvedConstructor pat : sem.getPatterns()) { // use the accessor
|
||||
AssemblyResolvedConstructor temp = subres;
|
||||
dbg.println(" Pattern: " + pat.lineToString());
|
||||
dbg.println(" Current: " + temp.lineToString());
|
||||
AssemblyResolvedConstructor check = temp.combine(pat);
|
||||
if (null == check) {
|
||||
results.add(
|
||||
AssemblyResolution.error("The patterns conflict " + subres, temp));
|
||||
continue;
|
||||
}
|
||||
temp = check;
|
||||
|
||||
dbg.println(" Final: " + temp.lineToString());
|
||||
|
||||
AssemblyResolution fcheck = temp.checkNotForbidden();
|
||||
if (fcheck.isError()) {
|
||||
results.add(fcheck);
|
||||
continue;
|
||||
}
|
||||
temp = (AssemblyResolvedConstructor) fcheck;
|
||||
|
||||
results.add(temp);
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
dbg.println("While processing: " + sem);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
results = tryResolveBackfills(results);
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
protected AssemblyResolutionResults tryResolveBackfills(AssemblyResolutionResults results) {
|
||||
AssemblyResolutionResults res = new AssemblyResolutionResults();
|
||||
next_ar: for (AssemblyResolution ar : results) {
|
||||
@ -431,13 +455,13 @@ public class AssemblyTreeResolver {
|
||||
continue;
|
||||
}
|
||||
while (true) {
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
|
||||
AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar;
|
||||
if (!rc.hasBackfills()) {
|
||||
// finish: The complete solution is known
|
||||
res.add(rc);
|
||||
continue next_ar;
|
||||
}
|
||||
ar = rc.backfill(solver, vals);
|
||||
ar = rc.backfill(SOLVER, vals);
|
||||
if (ar.isError() || ar.isBackfill()) {
|
||||
// fail: It is now known that the solution doesn't exist
|
||||
res.add(ar);
|
||||
@ -454,87 +478,27 @@ public class AssemblyTreeResolver {
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a branch without considering any purely-recursive productions
|
||||
*
|
||||
* This method is used either when the LHS has no purely-recursive definition, or before
|
||||
* considering the purely-recursive definition when it is present.
|
||||
*
|
||||
* @param branch the branch
|
||||
* @return the results
|
||||
*/
|
||||
protected AssemblyResolutionResults resolveBranchNonRecursive(AssemblyParseBranch branch) {
|
||||
try (DbgCtx dc = dbg.start("Resolving (non-recursive) branch: " + branch.getProduction())) {
|
||||
// Resolve children first
|
||||
AssemblyResolutionResults results = new AssemblyResolutionResults();
|
||||
AssemblyProduction prod = branch.getProduction();
|
||||
List<AssemblyParseTreeNode> substs = branch.getSubstitutions();
|
||||
assert prod.size() == substs.size();
|
||||
|
||||
// Sort the wheat and chaff
|
||||
// The resolved ones need to stay in order for the cross product
|
||||
List<HashSet<AssemblyResolvedConstructor>> childRes = new ArrayList<>();
|
||||
List<AssemblyResolvedError> childErr = new ArrayList<>();
|
||||
for (int i = 0; i < prod.size(); i++) {
|
||||
AssemblySymbol sym = prod.get(i);
|
||||
if (!sym.takesOperandIndex()) {
|
||||
continue;
|
||||
}
|
||||
AssemblyParseTreeNode child = substs.get(i);
|
||||
if (child.isConstructor()) {
|
||||
AssemblyResolutionResults rr = resolveBranch((AssemblyParseBranch) child);
|
||||
HashSet<AssemblyResolvedConstructor> childResElem = new HashSet<>();
|
||||
for (AssemblyResolution ar : rr) {
|
||||
if (ar.isError()) {
|
||||
childErr.add((AssemblyResolvedError) ar);
|
||||
}
|
||||
else {
|
||||
childResElem.add((AssemblyResolvedConstructor) ar);
|
||||
}
|
||||
}
|
||||
childRes.add(childResElem);
|
||||
}
|
||||
}
|
||||
|
||||
// Now, search for constructors that are compatible, and resolve them wrt. the
|
||||
// selected resolved children:
|
||||
// This is also where the shifting will happen.
|
||||
Collection<AssemblyConstructorSemantic> semantics = grammar.getSemantics(prod);
|
||||
for (List<AssemblyResolvedConstructor> sel : Sets.cartesianProduct(childRes)) {
|
||||
results.absorb(resolveSelectedChildren(prod, substs,
|
||||
Collections.unmodifiableList(sel), semantics));
|
||||
}
|
||||
if (!childErr.isEmpty()) {
|
||||
results.add(AssemblyResolution.error("Child errors", "Resolving " + prod,
|
||||
Collections.unmodifiableList(childErr)));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the offset of an operand encoded in the instruction block
|
||||
*
|
||||
* <p>
|
||||
* TODO: Currently, there are duplicate mechanisms for resolving a constructor: 1) The newer
|
||||
* mechanism implemented in {@link AssemblyConstructState}, and 2) the older one implemented in
|
||||
* {@link #applyPatterns(AssemblyConstructorSemantic, int, AssemblyResolutionResults)}. The
|
||||
* latter seems to require this method, since it does not have pre-computed shifts as in the
|
||||
* former. We should probably remove the latter in favor of the former....
|
||||
*
|
||||
* @param opsym the operand symbol
|
||||
* @param cons the constructor containing the operand
|
||||
* @param res the selected subconstructor encodings
|
||||
* @return the offset (right shift) to apply to the encoded operand
|
||||
*/
|
||||
public static int computeOffset(OperandSymbol opsym, Constructor cons,
|
||||
Map<Integer, Object> res) {
|
||||
public static int computeOffset(OperandSymbol opsym, Constructor cons) {
|
||||
int offset = opsym.getRelativeOffset();
|
||||
int baseidx = opsym.getOffsetBase();
|
||||
if (baseidx != -1) {
|
||||
OperandSymbol baseop = cons.getOperand(baseidx);
|
||||
Object r = res.get(baseidx);
|
||||
if (r instanceof AssemblyResolvedConstructor) {
|
||||
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) r;
|
||||
offset += rc.getInstructionLength();
|
||||
}
|
||||
else {
|
||||
offset += baseop.getMinimumLength();
|
||||
}
|
||||
offset += computeOffset(baseop, cons, res);
|
||||
offset += baseop.getMinimumLength();
|
||||
offset += computeOffset(baseop, cons);
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
@ -545,51 +509,46 @@ public class AssemblyTreeResolver {
|
||||
* @param exp the expression to solve
|
||||
* @param goal the desired value of the expression
|
||||
* @param vals any defined symbols
|
||||
* @param res the selected subconstructor encodings
|
||||
* @param cur the resolved constructor so far
|
||||
* @param description a description of the result
|
||||
* @return the encoded solution, or a backfill record
|
||||
*/
|
||||
protected static AssemblyResolution solveOrBackfill(PatternExpression exp, MaskedLong goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
String description) {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, String description) {
|
||||
try {
|
||||
return solver.solve(exp, goal, vals, res, cur, description);
|
||||
return SOLVER.solve(exp, goal, vals, cur, description);
|
||||
}
|
||||
catch (NeedsBackfillException bf) {
|
||||
int fieldLength = solver.getInstructionLength(exp, res);
|
||||
return AssemblyResolution.backfill(exp, goal, res, fieldLength, description);
|
||||
int fieldLength = SOLVER.getInstructionLength(exp);
|
||||
return AssemblyResolution.backfill(exp, goal, fieldLength, description);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to solve an expression
|
||||
*
|
||||
* <p>
|
||||
* Converts the given goal to a fully-defined {@link MaskedLong} and then solves as before.
|
||||
*
|
||||
* @see #solveOrBackfill(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor,
|
||||
* String)
|
||||
* @see #solveOrBackfill(PatternExpression, MaskedLong, Map, AssemblyResolvedPatterns, String)
|
||||
*/
|
||||
protected static AssemblyResolution solveOrBackfill(PatternExpression exp, long goal,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
String description) {
|
||||
return solveOrBackfill(exp, MaskedLong.fromLong(goal), vals, res, cur, description);
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, String description) {
|
||||
return solveOrBackfill(exp, MaskedLong.fromLong(goal), vals, cur, description);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to solve an expression
|
||||
*
|
||||
* <p>
|
||||
* Converts the given goal and bits count to a {@link MaskedLong} and then solves as before. As
|
||||
* a special case, if {@code bits == 0}, the goal is considered fully-defined (as if
|
||||
* {@code bits == 64}).
|
||||
*
|
||||
* @see #solveOrBackfill(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor,
|
||||
* String)
|
||||
*
|
||||
* @see #solveOrBackfill(PatternExpression, MaskedLong, Map, AssemblyResolvedPatterns, String)
|
||||
*/
|
||||
protected static AssemblyResolution solveOrBackfill(PatternExpression exp, long goal, int bits,
|
||||
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
|
||||
String description) {
|
||||
Map<String, Long> vals, AssemblyResolvedPatterns cur, String description) {
|
||||
long msk;
|
||||
if (bits == 0 || bits >= 64) {
|
||||
msk = -1L;
|
||||
@ -597,7 +556,6 @@ public class AssemblyTreeResolver {
|
||||
else {
|
||||
msk = ~(-1L << bits);
|
||||
}
|
||||
return solveOrBackfill(exp, MaskedLong.fromMaskAndValue(msk, goal), vals, res, cur,
|
||||
description);
|
||||
return solveOrBackfill(exp, MaskedLong.fromMaskAndValue(msk, goal), vals, cur, description);
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ public class AssemblyEOI extends AssemblyTerminal {
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseToken> match(String buffer, int pos, AssemblyGrammar grammar,
|
||||
Map<String, Long> labels) {
|
||||
AssemblyNumericSymbols symbols) {
|
||||
if (pos == buffer.length()) {
|
||||
return Collections.singleton(new AssemblyParseToken(grammar, this, ""));
|
||||
}
|
||||
@ -46,7 +46,7 @@ public class AssemblyEOI extends AssemblyTerminal {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
|
||||
public Collection<String> getSuggestions(String got, AssemblyNumericSymbols symbols) {
|
||||
return Collections.singleton("");
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyExtendedGrammar;
|
||||
|
||||
/**
|
||||
* The type of non-terminal for an "extended grammar"
|
||||
*
|
||||
* @see AssemblyExtendedGrammar
|
||||
*/
|
||||
public class AssemblyExtendedNonTerminal extends AssemblyNonTerminal {
|
||||
@ -28,6 +29,7 @@ public class AssemblyExtendedNonTerminal extends AssemblyNonTerminal {
|
||||
|
||||
/**
|
||||
* Construct a new extended non terminal, derived from the given non-terminal
|
||||
*
|
||||
* @param start the start state for the extended non-terminal
|
||||
* @param nt the non-terminal from which the extended non-terminal is derived
|
||||
* @param end the end state for the extended non-terminal
|
||||
|
@ -23,6 +23,7 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
|
||||
/**
|
||||
* A terminal that accepts only a particular numeric value
|
||||
*
|
||||
* <p>
|
||||
* This is different from a fixed string, because it will accept any encoding of the given numeric
|
||||
* value.
|
||||
*/
|
||||
@ -31,10 +32,11 @@ public class AssemblyFixedNumericTerminal extends AssemblyNumericTerminal {
|
||||
|
||||
/**
|
||||
* Construct a terminal that accepts only the given numeric value
|
||||
*
|
||||
* @param val the value to accept
|
||||
*/
|
||||
public AssemblyFixedNumericTerminal(long val) {
|
||||
super("" + val, 0);
|
||||
super("" + val, 0, null);
|
||||
this.val = val;
|
||||
}
|
||||
|
||||
@ -44,16 +46,16 @@ public class AssemblyFixedNumericTerminal extends AssemblyNumericTerminal {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
|
||||
public Collection<String> getSuggestions(String got, AssemblyNumericSymbols symbols) {
|
||||
return Collections.singleton("" + val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
|
||||
AssemblyGrammar grammar, Map<String, Long> labels) {
|
||||
AssemblyGrammar grammar, AssemblyNumericSymbols symbols) {
|
||||
// TODO: Allow label substitution here? For now, no.
|
||||
Collection<AssemblyParseNumericToken> toks =
|
||||
new HashSet<>(super.match(buffer, pos, grammar, new HashMap<String, Long>()));
|
||||
new HashSet<>(super.match(buffer, pos, grammar, AssemblyNumericSymbols.EMPTY));
|
||||
Iterator<AssemblyParseNumericToken> tokit = toks.iterator();
|
||||
while (tokit.hasNext()) {
|
||||
AssemblyParseNumericToken tok = tokit.next();
|
||||
|
@ -19,11 +19,13 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
|
||||
|
||||
/**
|
||||
* The type of non-terminal for an assembly grammar
|
||||
*
|
||||
* @see AssemblyGrammar
|
||||
*/
|
||||
public class AssemblyNonTerminal extends AssemblySymbol {
|
||||
/**
|
||||
* Construct a non-terminal having the given name
|
||||
*
|
||||
* @param name the name
|
||||
*/
|
||||
public AssemblyNonTerminal(String name) {
|
||||
|
@ -24,7 +24,9 @@ import ghidra.app.plugin.processors.sleigh.symbol.ValueMapSymbol;
|
||||
/**
|
||||
* A terminal that accepts only a particular set of numeric values, mapping each to another value
|
||||
*
|
||||
* <p>
|
||||
* This often used for non-conventional numeric encodings.
|
||||
*
|
||||
* @see ValueMapSymbol
|
||||
*/
|
||||
public class AssemblyNumericMapTerminal extends AssemblyNumericTerminal {
|
||||
@ -32,20 +34,21 @@ public class AssemblyNumericMapTerminal extends AssemblyNumericTerminal {
|
||||
|
||||
/**
|
||||
* Construct a terminal with the given name, accepting only the keys of a given map
|
||||
*
|
||||
* @param name the name
|
||||
* @param map the map from display value to token value
|
||||
*/
|
||||
public AssemblyNumericMapTerminal(String name, Map<Long, Integer> map) {
|
||||
super(name, 0);
|
||||
super(name, 0, null);
|
||||
this.map = map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
|
||||
AssemblyGrammar grammar, Map<String, Long> labels) {
|
||||
AssemblyGrammar grammar, AssemblyNumericSymbols symbols) {
|
||||
// NOTE: No label substitution
|
||||
Collection<AssemblyParseNumericToken> toks =
|
||||
new HashSet<>(super.match(buffer, pos, grammar, new HashMap<String, Long>()));
|
||||
new HashSet<>(super.match(buffer, pos, grammar, AssemblyNumericSymbols.EMPTY));
|
||||
Collection<AssemblyParseNumericToken> results = new LinkedHashSet<>();
|
||||
for (AssemblyParseNumericToken tok : toks) {
|
||||
Integer mapped = map.get(tok.getNumericValue());
|
||||
@ -58,7 +61,7 @@ public class AssemblyNumericMapTerminal extends AssemblyNumericTerminal {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
|
||||
public Collection<String> getSuggestions(String got, AssemblyNumericSymbols symbols) {
|
||||
Set<String> result = new HashSet<>();
|
||||
for (long k : map.keySet()) {
|
||||
result.add(Long.toString(k));
|
||||
|
@ -0,0 +1,285 @@
|
||||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.symbol;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import ghidra.program.model.address.Address;
|
||||
import ghidra.program.model.address.AddressSpace;
|
||||
import ghidra.program.model.lang.Language;
|
||||
import ghidra.program.model.lang.Register;
|
||||
import ghidra.program.model.listing.Program;
|
||||
import ghidra.program.model.symbol.*;
|
||||
|
||||
/**
|
||||
* A context to hold various symbols offered to the assembler, usable where numbers are expected.
|
||||
*/
|
||||
public final class AssemblyNumericSymbols {
|
||||
public static final AssemblyNumericSymbols EMPTY =
|
||||
new AssemblyNumericSymbols(Map.of(), Map.of(), Map.of());
|
||||
|
||||
/**
|
||||
* Collect labels derived from memory-mapped registers in a language
|
||||
*
|
||||
* <p>
|
||||
* TODO: Use of registers should be limited to operands whose size match the register size.
|
||||
*
|
||||
* @param labels the destination map
|
||||
* @param language the language
|
||||
*/
|
||||
private static void collectLanguageLabels(Map<String, Address> labels, Language language) {
|
||||
for (Register reg : language.getRegisters()) {
|
||||
// TODO/HACK: There ought to be a better mechanism describing suitable symbolic
|
||||
// substitutions for a given operand.
|
||||
if (!reg.getAddressSpace().isRegisterSpace()) {
|
||||
labels.put(reg.getName(), reg.getAddress());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect labels from the program's database
|
||||
*
|
||||
* @param labels the destination map
|
||||
* @param program the source program
|
||||
*/
|
||||
private static void collectProgramLabels(Map<String, Address> labels, Program program) {
|
||||
final SymbolIterator it = program.getSymbolTable().getAllSymbols(false);
|
||||
while (it.hasNext()) {
|
||||
Symbol sym = it.next();
|
||||
if (sym.isExternal()) {
|
||||
continue; // skip externals - will generally be referenced indirectly not directly
|
||||
}
|
||||
SymbolType symbolType = sym.getSymbolType();
|
||||
if (symbolType != SymbolType.LABEL && symbolType != SymbolType.FUNCTION) {
|
||||
continue;
|
||||
}
|
||||
labels.put(sym.getName(), sym.getAddress());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect equates from the program's database
|
||||
*
|
||||
* @param equates the destination map
|
||||
* @param programthe source program
|
||||
*/
|
||||
private static void collectProgramEquates(Map<String, Long> equates, Program program) {
|
||||
final Iterator<Equate> it = program.getEquateTable().getEquates();
|
||||
while (it.hasNext()) {
|
||||
Equate eq = it.next();
|
||||
// Thought is: If that's what the user sees, then that's what the user will type!
|
||||
equates.put(eq.getDisplayName(), eq.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get symbols from a language, when no program is available
|
||||
*
|
||||
* @param language the language
|
||||
* @return the symbols
|
||||
*/
|
||||
public static AssemblyNumericSymbols fromLanguage(Language language) {
|
||||
Map<String, Address> labels = new HashMap<>();
|
||||
collectLanguageLabels(labels, language);
|
||||
return forMaps(Map.of(), labels);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get symbols from a program (and its language)
|
||||
*
|
||||
* <p>
|
||||
* TODO: It might be nice to cache these and use a listener to keep the maps up to date. Will
|
||||
* depend on interactive performance.
|
||||
*
|
||||
* @param program the program
|
||||
* @return the symbols
|
||||
*/
|
||||
public static AssemblyNumericSymbols fromProgram(Program program) {
|
||||
Map<String, Long> equates = new HashMap<>();
|
||||
Map<String, Address> labels = new HashMap<>();
|
||||
collectLanguageLabels(labels, program.getLanguage());
|
||||
collectProgramLabels(labels, program);
|
||||
collectProgramEquates(equates, program);
|
||||
return forMaps(equates, labels);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get symbols for the given equate and label maps
|
||||
*
|
||||
* @param equates the equates
|
||||
* @param labels the labels
|
||||
* @return the symbols
|
||||
*/
|
||||
public static AssemblyNumericSymbols forMaps(Map<String, Long> equates,
|
||||
Map<String, Address> labels) {
|
||||
return new AssemblyNumericSymbols(Map.copyOf(equates), Map.copyOf(labels),
|
||||
groupBySpace(labels));
|
||||
}
|
||||
|
||||
private static Map<AddressSpace, Map<String, Address>> groupBySpace(
|
||||
Map<String, Address> labels) {
|
||||
return Collections.unmodifiableMap(labels.entrySet()
|
||||
.stream()
|
||||
.collect(Collectors.groupingBy(ent -> ent.getValue().getAddressSpace(),
|
||||
Collectors.toUnmodifiableMap(Entry::getKey, Entry::getValue))));
|
||||
}
|
||||
|
||||
private final NavigableSet<String> all = new TreeSet<>();
|
||||
public final Map<String, Long> equates;
|
||||
public final Map<String, Address> labels;
|
||||
public final Map<AddressSpace, Map<String, Address>> labelsBySpace;
|
||||
|
||||
private AssemblyNumericSymbols(Map<String, Long> equates, Map<String, Address> labels,
|
||||
Map<AddressSpace, Map<String, Address>> labelsBySpace) {
|
||||
this.equates = equates;
|
||||
this.labels = labels;
|
||||
this.labelsBySpace = labelsBySpace;
|
||||
all.addAll(equates.keySet());
|
||||
all.addAll(labels.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose any symbol with the given name
|
||||
*
|
||||
* <p>
|
||||
* This will check equates first, then labels. If an equate is found, its value is returned. If
|
||||
* a label is found, its addressable word offset is returned.
|
||||
*
|
||||
* @param name the name
|
||||
* @return the value, or null
|
||||
*/
|
||||
public Long chooseAny(String name) {
|
||||
Long eq = equates.get(name);
|
||||
if (eq != null) {
|
||||
return eq;
|
||||
}
|
||||
Address addr = labels.get(name);
|
||||
if (addr != null) {
|
||||
return addr.getAddressableWordOffset();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose a label with the given name in the given space
|
||||
*
|
||||
* @param name the name
|
||||
* @param space the address space
|
||||
* @return the addressable word offset of the found label, or null
|
||||
*/
|
||||
public Long chooseBySpace(String name, AddressSpace space) {
|
||||
Map<String, Address> forSpace = labelsBySpace.get(space);
|
||||
if (forSpace == null) {
|
||||
return null;
|
||||
}
|
||||
Address addr = forSpace.get(name);
|
||||
if (addr == null) {
|
||||
return null;
|
||||
}
|
||||
return addr.getAddressableWordOffset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose a symbol with the given name, using the space as a hint
|
||||
*
|
||||
* <p>
|
||||
* If a space is not given, or if that space is the constant space, then this will choose from
|
||||
* all symbols, via {@link #chooseAny(String)}. If a space is given, and it is not the constant
|
||||
* space, then this will choose from symbols in the given space, via
|
||||
* {@link #chooseBySpace(String, AddressSpace)}.
|
||||
*
|
||||
* @param name the name
|
||||
* @param space the address space, or null
|
||||
* @return the equate value, or label addressable word offset, or null
|
||||
*/
|
||||
public Long choose(String name, AddressSpace space) {
|
||||
if (space == null || space.isConstantSpace()) {
|
||||
return chooseAny(name);
|
||||
}
|
||||
return chooseBySpace(name, space);
|
||||
}
|
||||
|
||||
private Collection<String> suggestFrom(String got, Collection<String> keys, int max,
|
||||
boolean sorted) {
|
||||
Set<String> result = new HashSet<>();
|
||||
int count = 0;
|
||||
for (String label : keys) {
|
||||
if (count >= max) {
|
||||
break;
|
||||
}
|
||||
if (label.startsWith(got)) {
|
||||
result.add(label);
|
||||
count++;
|
||||
}
|
||||
else if (sorted) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest up to max symbols having the given prefix
|
||||
*
|
||||
* @param got the prefix
|
||||
* @param max the maximum number of symbols to suggest
|
||||
* @return the collection of symbol names
|
||||
*/
|
||||
public Collection<String> suggestAny(String got, int max) {
|
||||
return suggestFrom(got, all.tailSet(got), max, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest up to max symbols from the given space having the given prefix
|
||||
*
|
||||
* @param got the prefix
|
||||
* @param space the address space
|
||||
* @param max the maximum number of symbols to suggest
|
||||
* @return the collection of symbol names
|
||||
*/
|
||||
public Collection<String> suggestBySpace(String got, AddressSpace space, int max) {
|
||||
Map<String, Address> forSpace = labelsBySpace.get(space);
|
||||
if (forSpace == null) {
|
||||
return Set.of();
|
||||
}
|
||||
// TODO: Should I sort these, perhaps lazily, to speed search?
|
||||
return suggestFrom(got, forSpace.keySet(), max, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest up to max symbols having the given prefix, using space as a hint
|
||||
*
|
||||
* <p>
|
||||
* As in {@link #chooseAny(String)}, if space is null or the constant space, then this will
|
||||
* suggest from all symbols, via {@link #suggestAny(String, int)}. If space is given, and it is
|
||||
* not the constant space, then this will suggest from symbols in the given space, via
|
||||
* {@link #suggestBySpace(String, AddressSpace, int)}.
|
||||
*
|
||||
* @param got the prefix
|
||||
* @param space the space, or null
|
||||
* @param max the maximum number of symbols to suggest
|
||||
* @return the collection of symbol names
|
||||
*/
|
||||
public Collection<String> getSuggestions(String got, AddressSpace space, int max) {
|
||||
if (space == null || space.isConstantSpace()) {
|
||||
return suggestAny(got, max);
|
||||
}
|
||||
return suggestBySpace(got, space, max);
|
||||
}
|
||||
}
|
@ -18,38 +18,46 @@ package ghidra.app.plugin.assembler.sleigh.symbol;
|
||||
import java.util.*;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
|
||||
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParser;
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
|
||||
import ghidra.program.model.address.AddressSpace;
|
||||
|
||||
/**
|
||||
* A terminal that accepts any numeric value or program label
|
||||
* A terminal that accepts any numeric value or program symbol (label, equate)
|
||||
*
|
||||
* <p>
|
||||
* The literal may take any form accepted by UNIX strtol() with base=0. By default, the literal is
|
||||
* interpreted in base 10, but it may be prefixed such that it's interpreted in an alternative
|
||||
* base. With the prefix '0x', it is interpreted in hexadecimal. With the prefix '0', it is
|
||||
* interpreted in octal.
|
||||
* interpreted in base 10, but it may be prefixed such that it's interpreted in an alternative base.
|
||||
* With the prefix '0x', it is interpreted in hexadecimal. With the prefix '0', it is interpreted in
|
||||
* octal.
|
||||
*
|
||||
* <p>
|
||||
* It may also take the value of a label. If this operand is an address operand, the acceptable
|
||||
* labels are restricted to those in the expected address space.
|
||||
*/
|
||||
public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
public static final String PREFIX_HEX = "0x";
|
||||
public static final String PREFIX_OCT = "0";
|
||||
|
||||
/** Some suggestions, other than labels, to provide */
|
||||
protected static final Collection<String> suggestions = Arrays.asList(new String[] { //
|
||||
"0", "1", "0x0", "+0x0", "-0x0", "01" //
|
||||
});
|
||||
protected static final Collection<String> SUGGESTIONS =
|
||||
List.of("0", "1", "0x0", "+0x0", "-0x0", "01");
|
||||
/** The maximum number of labels to suggest */
|
||||
protected static final int MAX_LABEL_SUGGESTIONS = 10;
|
||||
|
||||
protected final int bitsize;
|
||||
protected final AddressSpace space;
|
||||
|
||||
// TODO: Not all numeric literals can be substituted for a label
|
||||
/**
|
||||
* Construct a terminal with the given name, accepting any numeric value or program label
|
||||
*
|
||||
* @param name the name
|
||||
* @param bitsize the maximum size of the value in bits
|
||||
* @param space the address space if this terminal represents an address operand
|
||||
*/
|
||||
public AssemblyNumericTerminal(String name, int bitsize) {
|
||||
public AssemblyNumericTerminal(String name, int bitsize, AddressSpace space) {
|
||||
super(name);
|
||||
this.bitsize = bitsize;
|
||||
this.space = space;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -63,13 +71,16 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
/**
|
||||
* This is only a convenience for testing
|
||||
*
|
||||
* Please use {@link #match(String, int, AssemblyGrammar, Map) match(String, int, AssemblyGrammar, Map<String, Long>)}.
|
||||
* <p>
|
||||
* Please use {@link #match(String, int, AssemblyGrammar, Map) match(String, int,
|
||||
* AssemblyGrammar, Map<String, Long>)}.
|
||||
*
|
||||
* @param buffer the input buffer
|
||||
* @return the parsed token
|
||||
*/
|
||||
public AssemblyParseNumericToken match(String buffer) {
|
||||
Collection<AssemblyParseNumericToken> col =
|
||||
match(buffer, 0, null, AssemblyParser.EMPTY_LABELS);
|
||||
match(buffer, 0, null, AssemblyNumericSymbols.EMPTY);
|
||||
if (col.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
@ -83,7 +94,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
|
||||
AssemblyGrammar grammar, Map<String, Long> labels) {
|
||||
AssemblyGrammar grammar, AssemblyNumericSymbols symbols) {
|
||||
if (pos >= buffer.length()) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
@ -94,20 +105,21 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
return matchLiteral(pos + 1, buffer, pos, true, grammar);
|
||||
}
|
||||
else {
|
||||
return match(pos, buffer, grammar, labels);
|
||||
return match(pos, buffer, grammar, symbols);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to match a sign-less numeric literal, or a program label
|
||||
*
|
||||
* @param s the buffer cursor where the literal or label is expected
|
||||
* @param buffer the input buffer
|
||||
* @param grammar the grammar containing this terminal
|
||||
* @param labels the program labels, mapped to their values
|
||||
* @param symbols the program symbols
|
||||
* @return the parsed token, or null
|
||||
*/
|
||||
protected Collection<AssemblyParseNumericToken> match(int s, String buffer,
|
||||
AssemblyGrammar grammar, Map<String, Long> labels) {
|
||||
AssemblyGrammar grammar, AssemblyNumericSymbols symbols) {
|
||||
if (s >= buffer.length()) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
@ -126,7 +138,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
break;
|
||||
}
|
||||
String lab = buffer.substring(s, b);
|
||||
Long val = labels.get(lab);
|
||||
Long val = symbols.choose(lab, space);
|
||||
if (val == null) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
@ -135,6 +147,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
|
||||
/**
|
||||
* Try to match a numeric literal, after the optional sign, encoded in hex, decimal, or octal
|
||||
*
|
||||
* @param s buffer cursor where the literal is expected
|
||||
* @param buffer the input buffer
|
||||
* @param pos the start offset of the token parsed so far
|
||||
@ -157,6 +170,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
|
||||
/**
|
||||
* Construct a numeric token
|
||||
*
|
||||
* @param str the string value of the token taken verbatim from the buffer
|
||||
* @param num portion of the token following the optional sign and prefix
|
||||
* @param radix the radix of {@code num}
|
||||
@ -192,6 +206,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
|
||||
/**
|
||||
* Try to match a hexadecimal literal, following the optional sign and prefix
|
||||
*
|
||||
* @param s the buffer cursor where the hex portion starts
|
||||
* @param buffer the input buffer
|
||||
* @param pos the start offset of the token parsed so far
|
||||
@ -215,6 +230,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
|
||||
/**
|
||||
* Try to match a decimal literal, following the optional sign and optional prefix
|
||||
*
|
||||
* @param s the buffer cursor where the hex portion starts
|
||||
* @param buffer the input buffer
|
||||
* @param pos the start offset of the token parsed so far
|
||||
@ -238,6 +254,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
|
||||
/**
|
||||
* Try to match an octal literal, following the optional sign and prefix
|
||||
*
|
||||
* @param s the buffer cursor where the hex portion starts
|
||||
* @param buffer the input buffer
|
||||
* @param pos the start offset of the token parsed so far
|
||||
@ -264,18 +281,9 @@ public class AssemblyNumericTerminal extends AssemblyTerminal {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
|
||||
Set<String> s = new TreeSet<>(suggestions);
|
||||
int labelcount = 0;
|
||||
for (String label : labels.keySet()) {
|
||||
if (labelcount >= MAX_LABEL_SUGGESTIONS) {
|
||||
break;
|
||||
}
|
||||
if (label.startsWith(got)) {
|
||||
s.add(label);
|
||||
labelcount++;
|
||||
}
|
||||
}
|
||||
public Collection<String> getSuggestions(String got, AssemblyNumericSymbols symbols) {
|
||||
Set<String> s = new TreeSet<>(SUGGESTIONS);
|
||||
s.addAll(symbols.getSuggestions(got, space, MAX_LABEL_SUGGESTIONS));
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,7 @@ public class AssemblyStringMapTerminal extends AssemblyTerminal {
|
||||
|
||||
/**
|
||||
* Construct a terminal with the given name, accepting only the keys of a given map
|
||||
*
|
||||
* @param name the name
|
||||
* @param map the map from display text to token value
|
||||
*/
|
||||
@ -45,7 +46,7 @@ public class AssemblyStringMapTerminal extends AssemblyTerminal {
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
|
||||
AssemblyGrammar grammar, Map<String, Long> labels) {
|
||||
AssemblyGrammar grammar, AssemblyNumericSymbols symbols) {
|
||||
Collection<AssemblyParseNumericToken> result = new LinkedHashSet<>();
|
||||
for (Entry<String, Integer> ent : map.entries()) {
|
||||
String str = ent.getKey();
|
||||
@ -57,7 +58,7 @@ public class AssemblyStringMapTerminal extends AssemblyTerminal {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getSuggestions(String string, Map<String, Long> labels) {
|
||||
public Collection<String> getSuggestions(String string, AssemblyNumericSymbols symbols) {
|
||||
return map.keySet();
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,7 @@ public class AssemblyStringTerminal extends AssemblyTerminal {
|
||||
|
||||
/**
|
||||
* Construct a terminal that accepts only the given string
|
||||
*
|
||||
* @param str the string to accept
|
||||
*/
|
||||
public AssemblyStringTerminal(String str) {
|
||||
@ -42,7 +43,7 @@ public class AssemblyStringTerminal extends AssemblyTerminal {
|
||||
|
||||
@Override
|
||||
public Collection<AssemblyParseToken> match(String buffer, int pos, AssemblyGrammar grammar,
|
||||
Map<String, Long> labels) {
|
||||
AssemblyNumericSymbols symbols) {
|
||||
if (buffer.regionMatches(pos, str, 0, str.length())) {
|
||||
return Collections.singleton(new AssemblyParseToken(grammar, this, str));
|
||||
}
|
||||
@ -50,7 +51,7 @@ public class AssemblyStringTerminal extends AssemblyTerminal {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
|
||||
public Collection<String> getSuggestions(String got, AssemblyNumericSymbols symbols) {
|
||||
return Collections.singleton(str);
|
||||
}
|
||||
|
||||
|
@ -20,9 +20,11 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AbstractAssemblyGrammar;
|
||||
/**
|
||||
* A symbol in a context-free grammar
|
||||
*
|
||||
* <p>
|
||||
* Symbols can be either terminals or non-terminals. Non-terminals must have a defining production,
|
||||
* i.e., it must appear as the left-hand side of some production in the grammar.
|
||||
* i.e., it must appear as the left-hand side of some production in the grammar.
|
||||
*
|
||||
* <p>
|
||||
* Traditionally, when displayed, non-terminals should be immediately distinguishable from
|
||||
* terminals. In classic CS literature, this usually means non-terminals are in CAPS, and terminals
|
||||
* are in lower-case. Because the assembler doesn't control the names provided by SLEIGH, we
|
||||
@ -35,6 +37,7 @@ public abstract class AssemblySymbol implements Comparable<AssemblySymbol> {
|
||||
|
||||
/**
|
||||
* Construct a new symbol with the given name
|
||||
*
|
||||
* @param name the name
|
||||
*/
|
||||
public AssemblySymbol(String name) {
|
||||
@ -46,6 +49,7 @@ public abstract class AssemblySymbol implements Comparable<AssemblySymbol> {
|
||||
|
||||
/**
|
||||
* Get the name of this symbol
|
||||
*
|
||||
* @return the name
|
||||
*/
|
||||
public String getName() {
|
||||
@ -72,6 +76,7 @@ public abstract class AssemblySymbol implements Comparable<AssemblySymbol> {
|
||||
|
||||
/**
|
||||
* Check if this symbol consumes an operand index of its constructor
|
||||
*
|
||||
* @return true if the symbol represents an operand
|
||||
*/
|
||||
public boolean takesOperandIndex() {
|
||||
|
@ -16,7 +16,6 @@
|
||||
package ghidra.app.plugin.assembler.sleigh.symbol;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
|
||||
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
|
||||
@ -24,13 +23,16 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
|
||||
/**
|
||||
* The type of terminal for an assembly grammar
|
||||
*
|
||||
* <p>
|
||||
* Unlike classical parsing, each terminal provides its own tokenizer. If multiple tokenizers yield
|
||||
* a token, the parser branches, possibly creating multiple, ambiguous trees.
|
||||
*
|
||||
* @see AssemblyGrammar
|
||||
*/
|
||||
public abstract class AssemblyTerminal extends AssemblySymbol {
|
||||
/**
|
||||
* Construct a terminal having the give name
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
public AssemblyTerminal(String name) {
|
||||
@ -39,20 +41,22 @@ public abstract class AssemblyTerminal extends AssemblySymbol {
|
||||
|
||||
/**
|
||||
* Attempt to match a token from the input buffer starting at a given position
|
||||
*
|
||||
* @param buffer the input buffer
|
||||
* @param pos the cursor position in the buffer
|
||||
* @param grammar the grammar containing this terminal
|
||||
* @param labels the program labels, if applicable
|
||||
* @param symbols symbols from the program, suitable for use as numeric terminals
|
||||
* @return the matched token, or null
|
||||
*/
|
||||
public abstract Collection<? extends AssemblyParseToken> match(String buffer, int pos,
|
||||
AssemblyGrammar grammar, Map<String, Long> labels);
|
||||
AssemblyGrammar grammar, AssemblyNumericSymbols symbols);
|
||||
|
||||
/**
|
||||
* Provide a collection of strings that this terminal would have accepted
|
||||
*
|
||||
* @param got the remaining contents of the input buffer
|
||||
* @param labels the program labels, if applicable
|
||||
* @return a, possibly empty, collection of suggestions
|
||||
*/
|
||||
public abstract Collection<String> getSuggestions(String got, Map<String, Long> labels);
|
||||
public abstract Collection<String> getSuggestions(String got, AssemblyNumericSymbols symbols);
|
||||
}
|
||||
|
@ -20,8 +20,7 @@ import java.util.*;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
|
||||
import ghidra.app.plugin.assembler.sleigh.grammars.*;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
|
||||
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
|
||||
@ -38,6 +37,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode
|
||||
|
||||
/**
|
||||
* Construct a branch from the given grammar and production
|
||||
*
|
||||
* @param grammar the grammar containing the production
|
||||
* @param prod the production applied to create this branch
|
||||
*/
|
||||
@ -70,12 +70,14 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode
|
||||
|
||||
/**
|
||||
* Prepend a child to this branch
|
||||
* @param child the child
|
||||
*
|
||||
* <p>
|
||||
* Because LR parsers produce rightmost derivations, they necessarily populate the branches
|
||||
* right to left. During reduction, each child is popped from the stack, traversing them in
|
||||
* reverse order. This method prepends children so that when reduction is complete, the
|
||||
* children are aligned to the corresponding symbols from the RHS of the production.
|
||||
* reverse order. This method prepends children so that when reduction is complete, the children
|
||||
* are aligned to the corresponding symbols from the RHS of the production.
|
||||
*
|
||||
* @param child the child
|
||||
*/
|
||||
public void addChild(AssemblyParseTreeNode child) {
|
||||
assert expects().equals(child.getSym());
|
||||
@ -86,22 +88,26 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode
|
||||
/**
|
||||
* See what symbol is expected next
|
||||
*
|
||||
* <p>
|
||||
* The child added next must be associated with the token expected next.
|
||||
*
|
||||
* @return the symbol
|
||||
*/
|
||||
protected AssemblySymbol expects() {
|
||||
if (!isComplete()) {
|
||||
return prod.get(prod.size() - substs.size() - 1);
|
||||
AssemblySentential<?> rhs = prod.getRHS();
|
||||
return rhs.getSymbol(rhs.size() - substs.size() - 1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the branch is full
|
||||
* @return true if every symbol on the RHS has a corresonding child
|
||||
*
|
||||
* @return true if every symbol on the RHS has a corresponding child
|
||||
*/
|
||||
protected boolean isComplete() {
|
||||
return prod.size() == substs.size();
|
||||
return prod.getRHS().size() == substs.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -129,6 +135,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode
|
||||
|
||||
/**
|
||||
* Get the production applied to create this branch
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public AssemblyProduction getProduction() {
|
||||
@ -137,6 +144,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode
|
||||
|
||||
/**
|
||||
* Get the list of children, indexed by corresponding symbol from the RHS
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<AssemblyParseTreeNode> getSubstitutions() {
|
||||
@ -150,6 +158,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode
|
||||
|
||||
/**
|
||||
* Get the <em>i</em>th child, corresponding to the <em>i</em>th symbol from the RHS
|
||||
*
|
||||
* @param i the position
|
||||
* @return the child
|
||||
*/
|
||||
|
@ -31,6 +31,7 @@ public class AssemblyParseNumericToken extends AssemblyParseToken {
|
||||
|
||||
/**
|
||||
* Construct a numeric terminal having the given string and numeric values
|
||||
*
|
||||
* @param grammar the grammar containing the terminal
|
||||
* @param term the terminal that matched this token
|
||||
* @param str the portion of the input comprising this token
|
||||
@ -77,6 +78,7 @@ public class AssemblyParseNumericToken extends AssemblyParseToken {
|
||||
|
||||
/**
|
||||
* Get the numeric value of the token
|
||||
*
|
||||
* @return the value
|
||||
*/
|
||||
public long getNumericValue() {
|
||||
|
@ -32,6 +32,7 @@ public class AssemblyParseToken extends AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* Construct a new token having the given string value
|
||||
*
|
||||
* @param grammar the grammar containing the terminal
|
||||
* @param term the terminal that matched this token
|
||||
* @param str the portion of the input comprising this token
|
||||
@ -67,6 +68,7 @@ public class AssemblyParseToken extends AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* Get the portion of the input comprising the token
|
||||
*
|
||||
* @return the string value
|
||||
*/
|
||||
public String getString() {
|
||||
|
@ -29,6 +29,7 @@ public abstract class AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* Construct a node for a tree parsed by the given grammar
|
||||
*
|
||||
* @param grammar the grammar
|
||||
*/
|
||||
public AssemblyParseTreeNode(AssemblyGrammar grammar) {
|
||||
@ -38,14 +39,17 @@ public abstract class AssemblyParseTreeNode {
|
||||
/**
|
||||
* Get the symbol for which this node is substituted
|
||||
*
|
||||
* <p>
|
||||
* For a branch, this is the LHS of the corresponding production. For a token, this is the
|
||||
* terminal whose tokenizer matched it.
|
||||
*
|
||||
* @return the symbol
|
||||
*/
|
||||
public abstract AssemblySymbol getSym();
|
||||
|
||||
/**
|
||||
* Get the branch which contains this node
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public AssemblyParseBranch getParent() {
|
||||
@ -54,6 +58,7 @@ public abstract class AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* Set the branch which contains this node
|
||||
*
|
||||
* @param parent
|
||||
*/
|
||||
protected void setParent(AssemblyParseBranch parent) {
|
||||
@ -63,6 +68,7 @@ public abstract class AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* For debugging: Display this parse tree via the given stream
|
||||
*
|
||||
* @param out the stream
|
||||
*/
|
||||
public void print(PrintStream out) {
|
||||
@ -71,13 +77,15 @@ public abstract class AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* For debugging: Display the tree with the given indent
|
||||
*
|
||||
* @param out the stream
|
||||
* @param indent the indent
|
||||
*/
|
||||
protected abstract void print(PrintStream out, String indent);
|
||||
|
||||
/**
|
||||
* Check if this node yields a subconstructor resolution
|
||||
* Check if this node yields a subconstructor resolution
|
||||
*
|
||||
* @return true if this node yields a subconstructor resolution
|
||||
*/
|
||||
public boolean isConstructor() {
|
||||
@ -86,6 +94,7 @@ public abstract class AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* Check if this node yields a numeric value
|
||||
*
|
||||
* @return true if this node yields a numeric value
|
||||
*/
|
||||
public boolean isNumeric() {
|
||||
@ -94,6 +103,7 @@ public abstract class AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* Get the grammar used to parse the tree
|
||||
*
|
||||
* @return the grammar
|
||||
*/
|
||||
public AssemblyGrammar getGrammar() {
|
||||
@ -102,6 +112,7 @@ public abstract class AssemblyParseTreeNode {
|
||||
|
||||
/**
|
||||
* Generate the string that this node parsed
|
||||
*
|
||||
* @return the string
|
||||
*/
|
||||
public abstract String generateString();
|
||||
|
@ -15,20 +15,21 @@
|
||||
*/
|
||||
package ghidra.app.plugin.assembler.sleigh.util;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Utilities for {@link Collection}s
|
||||
* Utilities for the Assembler
|
||||
*/
|
||||
public class SleighUtil {
|
||||
public class AsmUtil {
|
||||
/**
|
||||
* Compare two collections by their corresponding elements in order
|
||||
*
|
||||
* If the collections have differing sizes, the ordering does not matter. The smaller
|
||||
* collection precedes the larger. Otherwise, each corresponding pair of elements are compared.
|
||||
* Once an unequal pair is found, the collections are ordered by those elements. This is
|
||||
* analogous to {@link String} comparison.
|
||||
* <p>
|
||||
* If the collections have differing sizes, the ordering does not matter. The smaller collection
|
||||
* precedes the larger. Otherwise, each corresponding pair of elements are compared. Once an
|
||||
* unequal pair is found, the collections are ordered by those elements. This is analogous to
|
||||
* {@link String} comparison.
|
||||
*
|
||||
* @param a the first set
|
||||
* @param b the second set
|
||||
* @return a comparison result as in {@link Comparable#compareTo(Object)}
|
||||
@ -53,8 +54,10 @@ public class SleighUtil {
|
||||
/**
|
||||
* Compare two byte arrays by their corresponding entries
|
||||
*
|
||||
* <p>
|
||||
* If the two arrays have differing lengths, the shorter precedes the longer. Otherwise, they
|
||||
* are compared as in C's {@code memcmp}, except that Java {@code byte}s are signed.
|
||||
*
|
||||
* @param a the first array
|
||||
* @param b the second array
|
||||
* @return a comparison result as in {@link Comparable#compareTo(Object)}
|
||||
@ -74,4 +77,22 @@ public class SleighUtil {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend a list with the given item
|
||||
*
|
||||
* <p>
|
||||
* Used in functional style when the list is immutable.
|
||||
*
|
||||
* @param <T> the type of elements
|
||||
* @param list the list
|
||||
* @param ext the additional item
|
||||
* @return an immutable copy of the list with the given item appended
|
||||
*/
|
||||
public static <T> List<T> extendList(List<T> list, T ext) {
|
||||
@SuppressWarnings("unchecked")
|
||||
T[] arr = (T[]) new Object[list.size() + 1];
|
||||
list.toArray(arr);
|
||||
arr[list.size()] = ext;
|
||||
return List.of(arr);
|
||||
}
|
||||
}
|
@ -21,6 +21,7 @@ import java.util.Stack;
|
||||
/**
|
||||
* A debugging, timing, and diagnostic tool
|
||||
*
|
||||
* <p>
|
||||
* TODO: I should probably remove this and rely on the Msg.trace() method, or at the very least,
|
||||
* refactor this to use that.
|
||||
*/
|
||||
@ -30,6 +31,7 @@ public class DbgTimer extends PrintStream {
|
||||
|
||||
/**
|
||||
* Create a new debugging timer, wrapping the given output stream
|
||||
*
|
||||
* @param out the stream
|
||||
*/
|
||||
public DbgTimer(OutputStream out) {
|
||||
@ -58,6 +60,7 @@ public class DbgTimer extends PrintStream {
|
||||
|
||||
/**
|
||||
* Create a new stream wrapping another
|
||||
*
|
||||
* @param out the stream to wrap
|
||||
*/
|
||||
private TabbingOutputStream(OutputStream out) {
|
||||
@ -66,6 +69,7 @@ public class DbgTimer extends PrintStream {
|
||||
|
||||
/**
|
||||
* Start a new (indented) line of output
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
protected void startln() throws IOException {
|
||||
@ -78,6 +82,7 @@ public class DbgTimer extends PrintStream {
|
||||
|
||||
/**
|
||||
* Workaround: Set the time stack reference
|
||||
*
|
||||
* @param timeStack the stack
|
||||
*/
|
||||
protected void setTimeStack(Stack<Long> timeStack) {
|
||||
@ -172,20 +177,21 @@ public class DbgTimer extends PrintStream {
|
||||
|
||||
/**
|
||||
* Start a new, possibly long-running, task
|
||||
* @param message the message to print when the task begins
|
||||
* @return a context to close when the task ends
|
||||
*
|
||||
* This is meant to be used idiomatically, as in a try-with-resources block:
|
||||
*
|
||||
* <pre>
|
||||
* {@code
|
||||
* try (DbgCtx dc = dbg.start("Twiddling the frobs:")) {
|
||||
* // do some classy twiddling
|
||||
* // do some classy twiddling
|
||||
* } // this will automatically print done and the time elapsed within the try block
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* This idiom is preferred because the task will be stopped even if an error occurs, if the
|
||||
* method returns from within the block, etc.
|
||||
*
|
||||
* @param message the message to print when the task begins
|
||||
* @return a context to close when the task ends
|
||||
*
|
||||
*/
|
||||
public DbgCtx start(Object message) {
|
||||
println(message);
|
||||
@ -197,6 +203,7 @@ public class DbgTimer extends PrintStream {
|
||||
/**
|
||||
* Stop the current task
|
||||
*
|
||||
* <p>
|
||||
* This will print done and the elapsed time since the start of the task. The "current task" is
|
||||
* determined from the stack.
|
||||
*/
|
||||
@ -208,6 +215,7 @@ public class DbgTimer extends PrintStream {
|
||||
|
||||
/**
|
||||
* Replace the wrapped output stream (usually temporarily)
|
||||
*
|
||||
* @see #resetOutputStream(TabbingOutputStream)
|
||||
* @param s the replacement stream
|
||||
* @return the original stream, wrapped in a tabbing stream
|
||||
@ -223,6 +231,7 @@ public class DbgTimer extends PrintStream {
|
||||
|
||||
/**
|
||||
* Put the original tabbing stream back
|
||||
*
|
||||
* @see #setOutputStream(OutputStream)
|
||||
* @param s the original wrapped stream
|
||||
* @return the replacement stream, wrapped in a tabbing stream
|
||||
|
@ -31,6 +31,7 @@ public class TableEntry<T> extends TableEntryKey {
|
||||
|
||||
/**
|
||||
* Create a new table entry with the given value at the given state and symbol
|
||||
*
|
||||
* @param state the row
|
||||
* @param sym the column
|
||||
* @param value the value
|
||||
@ -42,6 +43,7 @@ public class TableEntry<T> extends TableEntryKey {
|
||||
|
||||
/**
|
||||
* Get the value of the entry
|
||||
*
|
||||
* @return the value
|
||||
*/
|
||||
public T getValue() {
|
||||
|
@ -31,6 +31,7 @@ public class TableEntryKey implements Comparable<TableEntryKey> {
|
||||
|
||||
/**
|
||||
* Create a new key for the given state and symbol
|
||||
*
|
||||
* @param state the row
|
||||
* @param sym the column
|
||||
*/
|
||||
@ -79,6 +80,7 @@ public class TableEntryKey implements Comparable<TableEntryKey> {
|
||||
|
||||
/**
|
||||
* Get the state (row) of the key in the table
|
||||
*
|
||||
* @return the state
|
||||
*/
|
||||
public int getState() {
|
||||
@ -87,6 +89,7 @@ public class TableEntryKey implements Comparable<TableEntryKey> {
|
||||
|
||||
/**
|
||||
* Get the symbol (column) of the entry in the table
|
||||
*
|
||||
* @return the symbol
|
||||
*/
|
||||
public AssemblySymbol getSym() {
|
||||
|
@ -21,7 +21,7 @@ import java.util.List;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import generic.hash.SimpleCRC32;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
|
||||
public class ConstructState {
|
||||
private Constructor ct;
|
||||
@ -41,7 +41,11 @@ public class ConstructState {
|
||||
return resolvedStates.get(index);
|
||||
}
|
||||
|
||||
public void addSubState(ConstructState opState) {
|
||||
public int getNumSubStates() {
|
||||
return resolvedStates.size();
|
||||
}
|
||||
|
||||
void addSubState(ConstructState opState) {
|
||||
resolvedStates.add(opState);
|
||||
}
|
||||
|
||||
@ -100,7 +104,8 @@ public class ConstructState {
|
||||
* encoding
|
||||
*
|
||||
* This includes braces to describe the tree structure
|
||||
* @see AssemblyResolvedConstructor#dumpConstructorTree()
|
||||
*
|
||||
* @see AssemblyResolvedPatterns#dumpConstructorTree()
|
||||
* @return the constructor tree
|
||||
*/
|
||||
public String dumpConstructorTree() {
|
||||
|
@ -21,7 +21,7 @@ package ghidra.app.plugin.processors.sleigh;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
|
||||
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
|
||||
import ghidra.app.plugin.processors.sleigh.SleighDebugLogger.SleighDebugMode;
|
||||
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
|
||||
import ghidra.app.plugin.processors.sleigh.symbol.*;
|
||||
@ -40,10 +40,9 @@ import ghidra.util.exception.NotYetImplementedException;
|
||||
/**
|
||||
*
|
||||
*
|
||||
* The InstructionPrototype for sleigh languages.
|
||||
* The prototype is unique up to the tree of Constructors.
|
||||
* Variations in the bit pattern that none of the Constructor
|
||||
* mask/values care about get lumped under the same prototype
|
||||
* The InstructionPrototype for sleigh languages. The prototype is unique up to the tree of
|
||||
* Constructors. Variations in the bit pattern that none of the Constructor mask/values care about
|
||||
* get lumped under the same prototype
|
||||
*/
|
||||
public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
// Flowflags for resolving flowType
|
||||
@ -126,9 +125,8 @@ public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache the Constructor state which represents the base
|
||||
* mnemonic, and the operands to that mnemonic
|
||||
* Cache the operand states for each operand in printing order
|
||||
* Cache the Constructor state which represents the base mnemonic, and the operands to that
|
||||
* mnemonic Cache the operand states for each operand in printing order
|
||||
*/
|
||||
private void cacheMnemonicState() {
|
||||
mnemonicState = rootState;
|
||||
@ -191,8 +189,8 @@ public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the pcode templates in the order they would be emitted.
|
||||
* Collect flowFlags FlowRecords
|
||||
* Walk the pcode templates in the order they would be emitted. Collect flowFlags FlowRecords
|
||||
*
|
||||
* @param walker the pcode template walker
|
||||
*/
|
||||
public static FlowSummary walkTemplates(OpTplWalker walker) {
|
||||
@ -286,8 +284,8 @@ public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the Constructor tree gathering ConstructStates which are flow destinations (flowStateList)
|
||||
* flowFlags and delayslot directives
|
||||
* Walk the Constructor tree gathering ConstructStates which are flow destinations
|
||||
* (flowStateList) flowFlags and delayslot directives
|
||||
*/
|
||||
private void cacheTreeInfo() {
|
||||
OpTplWalker walker = new OpTplWalker(rootState, -1);
|
||||
@ -631,7 +629,9 @@ public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gather all the flow records (perhaps across multiple InstructionPrototypes via crossbuilds) and convert to Addresses
|
||||
* Gather all the flow records (perhaps across multiple InstructionPrototypes via crossbuilds)
|
||||
* and convert to Addresses
|
||||
*
|
||||
* @param res is the resulting flow Addresses
|
||||
* @param parsecontext is the parsing context for the current instruction
|
||||
* @param context is the context for the particular address so crossbuilds can be resolved
|
||||
@ -1458,9 +1458,11 @@ public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstruct the ParserContext's internal packed context array and its list of global ContextSet directives
|
||||
* by walking a previously resolved ConstructState tree
|
||||
* @param protoContext is the SleighParserContext containing the tree and holding the context results
|
||||
* Reconstruct the ParserContext's internal packed context array and its list of global
|
||||
* ContextSet directives by walking a previously resolved ConstructState tree
|
||||
*
|
||||
* @param protoContext is the SleighParserContext containing the tree and holding the context
|
||||
* results
|
||||
* @param debug
|
||||
* @throws MemoryAccessException
|
||||
*/
|
||||
@ -1589,7 +1591,7 @@ public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
return newContext;
|
||||
}
|
||||
|
||||
ConstructState getRootState() {
|
||||
public ConstructState getRootState() {
|
||||
return rootState;
|
||||
}
|
||||
|
||||
@ -1607,7 +1609,8 @@ public class SleighInstructionPrototype implements InstructionPrototype {
|
||||
* encoding
|
||||
*
|
||||
* This includes braces to describe the tree structure
|
||||
* @see AssemblyResolvedConstructor#dumpConstructorTree()
|
||||
*
|
||||
* @see AssemblyResolvedPatterns#dumpConstructorTree()
|
||||
* @return the constructor tree
|
||||
*/
|
||||
public String dumpConstructorTree() {
|
||||
|
@ -23,14 +23,13 @@ import ghidra.app.plugin.processors.sleigh.FixedHandle;
|
||||
import ghidra.app.plugin.processors.sleigh.ParserWalker;
|
||||
import ghidra.program.model.address.AddressFactory;
|
||||
import ghidra.program.model.address.AddressSpace;
|
||||
import ghidra.program.model.lang.InstructionContext;
|
||||
import ghidra.xml.XmlElement;
|
||||
import ghidra.xml.XmlPullParser;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Placeholder that resolves for a specific InstructionContext into
|
||||
* a FixedHandle representing the semantic value of a Constructor
|
||||
* Placeholder that resolves for a specific {@link InstructionContext} into a {@link FixedHandle}
|
||||
* representing the semantic value of a {@link Constructor}
|
||||
*/
|
||||
public class HandleTpl {
|
||||
|
||||
@ -131,6 +130,7 @@ public class HandleTpl {
|
||||
|
||||
/**
|
||||
* Get the size of the expected value in bits
|
||||
*
|
||||
* @return the number of bits
|
||||
*/
|
||||
public int getSize() {
|
||||
@ -144,4 +144,13 @@ public class HandleTpl {
|
||||
return space.getSpaceId().getSize();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the address space of the value, if applicable
|
||||
*
|
||||
* @return the address space, or null if not applicable
|
||||
*/
|
||||
public AddressSpace getAddressSpace() {
|
||||
return space.getSpaceId();
|
||||
}
|
||||
}
|
||||
|
@ -99,6 +99,18 @@ public class AARCH64BEAssemblyTest extends AbstractAssemblyTest {
|
||||
public void testAssemble_mov_x0_0x8() {
|
||||
assertOneCompatRestExact("mov x0,#0x8", "00:01:80:d2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAssemble_mov_x2_0x0() {
|
||||
// NB: 0 is special here because immediates include a shift. 0 can have any shift
|
||||
assertOneCompatRestExact("mov x2,#0x0", "02:00:80:d2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAssemble_mov_x1_n0x1() {
|
||||
// NB: This uses ~(imm16 << (aa_hw * 16)), so -1 becomes 0 when solving the shift
|
||||
assertOneCompatRestExact("mov x1,#-0x1", "01:00:80:92");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAssemble_sbfiz_x1_x2_0x2_0x20() {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user