GP-3707 ELF RISCV relocation processing improvements

This commit is contained in:
Gary Hayward 2023-08-15 13:24:23 -04:00 committed by ghidra1
parent 6f5ded53f2
commit 1ba91ebb51

View File

@ -13,18 +13,38 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.util.bin.format.elf.relocation;
import java.util.Iterator;
import ghidra.app.util.bin.format.elf.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.*;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.Memory;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.reloc.*;
import ghidra.program.model.reloc.Relocation.Status;
import ghidra.program.model.reloc.RelocationResult;
import ghidra.util.exception.NotFoundException;
/**
* See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc for information on the different riscv elf
* relocation types. Different relocation types are found in different contexts - not all of which are currently handled here.
* The contexts we *attempt* to handle include:
*
* 1. fully linked Elf executables like the Linux `cp` utility which rely on dynamic linking to libraries like libc.so.6
* 2. object files compiled with or without Position Independent code (`-fpic`) support
* 3. Sharable object libraries like `libc.so.6`
* 3. kernel load modules compiled with position independent code (`-fpic`) support
*
* Keep in mind:
*
* 1. You may find multiple relocations at any single address.
* 2. Many relocations and relocation variants are there to support linker/loader optimizations unneeded by Ghidra.
* 3. Some relocations can only name their target indirectly. R_RISCV_PCREL_LO12_I references a R_RISCV_PCREL_HI20 relocation,
* but needs the symbol referenced by that R_RISCV_PCREL_HI20 in order to compute a PC relative offset.
* 4. Many discrete symbols can share the same symbol name, e.g. `.L0`. These symbol names can include non-printing characters like `".L0^B2"`
*
*/
public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
@Override
@ -32,6 +52,84 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
return elf.e_machine() == ElfConstants.EM_RISCV;
}
/**
* Get the adjusted 20 high bits of a 32 bit target. The lower 12 bits will
* be found in a later instruction, using a sign-extended signed addition. If those
* 12 bits will be seen as negative, we need to increment the higher bits by one
* @param target a 32 bit offset
* @return the higher 20 bits possibly incremented by 0x1000
*/
static int getHi20(int target) {
int target_h = target & 0xfffff000;
// the low order 12 bits are sign-extended before adding, so we may need to adjust the high order 20 bits
if ((target & 0x00000800) == 0x800) {
target_h = target_h + 0x1000;
}
return target_h;
}
/**
* get the lower 12 bits of a 32 bit target. These will typically be added - not or'ed - to
* a register holding the higher 20 bits.
* @param target a 32 bit offset
* @return the lower 12 bits of target
*/
static int getLo12(int target) {
return (target & 0x00000fff);
}
/**
* PC relative relocations like R_RISCV_PCREL_LO12_I find their target indirectly,
* using their symbolValue to locate the address of the matching R_RISCV_PCREL_HI20
* or R_RISCV_GOT_HI20.
* That *HI20 relocation's symbol value points to the actual target.
* This function attempts to locate that actual target by querying Ghidra's Relocation
* and Symbol tables. There can be more than one relocation assigned to a given address,
* so we need to search.
*
* Note that this function probably belongs within ElfRelocationContext, but we have no
* published strategy for integration testing that class.
*
* @return the relocation symbol value associated with the linked relocation type.
*/
static int getSymbolValueIndirect(ElfRelocationContext elfRelocationContext, int hi20Addr) {
int target;
// Get the relevant Ghidra tables
Program program = elfRelocationContext.getProgram();
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
RelocationTable relocationTable = program.getRelocationTable();
// get the possible address of R_RISCV_PCREL_HI20 relocation holding the target address
Address hi20Address = program.getAddressFactory().getAddress(space.getSpaceID(), hi20Addr);
// we need an address set with one holding a single address to retrieve multiple relocations
AddressSet relocationAddressSet = new AddressSet(hi20Address);
// get any relocations at this address - hopefully R_RISCV_PCREL_HI20 and likely R_RISCV_RELAX
Iterator<Relocation> relocs = relocationTable.getRelocations(relocationAddressSet);
// search the relocations for a R_RISCV_PCREL_HI20 relocation
while (relocs.hasNext()) {
Relocation rel = relocs.next();
// there may be other valid relocation types to process
if ((rel.getType() == RISCV_ElfRelocationConstants.R_RISCV_PCREL_HI20) ||
(rel.getType() == RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20)) {
int refSymbolIndex = (int) rel.getValues()[0];
// System.out.println("Matching PCREL_HI20 value is 0x" + Long.toHexString(refSymbolIndex));
// Note that an elf symbol index is not the same thing as a regular symbol index.
ElfSymbol elfSym = elfRelocationContext.getSymbol(refSymbolIndex);
int targetOffset = (int) elfRelocationContext.getSymbolValue(elfSym);
// System.out.println(
// "PCREL_HI20 Symbol \"" + elfRelocationContext.getSymbolName(refSymbolIndex) +
// "\" found with offset 0x" + Long.toHexString(targetOffset));
// compute the target offset from the referred auipc instruction
target = targetOffset - hi20Addr;
// System.out.println("PCREL_HI20 Symbol location is 0x" + Long.toHexString(target));
return target;
}
}
return 0;
}
@Override
public RelocationResult relocate(ElfRelocationContext elfRelocationContext,
ElfRelocation relocation,
@ -49,7 +147,8 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
return RelocationResult.SKIPPED;
}
long addend = relocation.hasAddend() ? relocation.getAddend() : is32 ? memory.getInt(relocationAddress) : memory.getLong(relocationAddress);
long addend = relocation.hasAddend() ? relocation.getAddend()
: is32 ? memory.getInt(relocationAddress) : memory.getLong(relocationAddress);
long offset = relocationAddress.getOffset();
long base = elfRelocationContext.getImageBaseWordAdjustmentOffset();
@ -59,27 +158,11 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
long symbolValue = elfRelocationContext.getSymbolValue(sym);
String symbolName = elfRelocationContext.getSymbolName(symbolIndex);
//TODO remove debug
switch(type) {
case 2:
case 3:
case 5:
break;
default:
// System.out.println("DEBUG RISCV: " +
// type + " " + relocationAddress + " " +
// String.format("%x", symbolValue) + " " +
// String.format("%x", addend) + " " +
// String.format("%x", offset) + " " +
// String.format("%x", base));// + " " +
//String.format("%x", memory.getInt(relocationAddress)));
break;
}
long value64 = 0;
int value32 = 0;
short value16 = 0;
byte value8 = 0;
int target = 0;
int byteLength = 4; // most relocations affect 4-bytes (change if different)
@ -151,7 +234,7 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
case RISCV_ElfRelocationConstants.R_RISCV_TLS_DTPMOD64:
// TLS relocation word64 = S->TLSINDEX
markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTPMOD32", symbolName,
markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTPMOD64", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
@ -169,7 +252,7 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
case RISCV_ElfRelocationConstants.R_RISCV_TLS_TPREL32:
// TLS relocation word32 = TLS + S + A + S_TLS_OFFSET - TLS_DTV_OFFSET
markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTREL32", symbolName,
markAsWarning(program, relocationAddress, "R_RISCV_TLS_TPREL32", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
@ -181,7 +264,7 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
case RISCV_ElfRelocationConstants.R_RISCV_BRANCH:
// PC-relative branch (B-Type)
int target = (int) (addend + symbolValue - offset);
target = (int) (addend + symbolValue - offset);
value32 =
((target & 0x01e) << 7) | ((target & 0x0800) >> 4) | ((target & 0x03e0) << 20) |
((target & 0x1000) << 19) | memory.getInt(relocationAddress);
@ -198,27 +281,18 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
break;
case RISCV_ElfRelocationConstants.R_RISCV_CALL:
// PC-relative call MACRO call,tail (auipc+jalr pair)
target = (int) (addend + symbolValue - offset);
int target_l = target << 20 >> 20;
int target_h = target - target_l;
value32 = (target_h & 0xfffff000) | memory.getInt(relocationAddress);
memory.setInt(relocationAddress, value32);
value32 = ((target_l & 0x00000fff) << 20) | memory.getInt(relocationAddress.add(4));
memory.setInt(relocationAddress.add(4), value32);
break;
// PC-relative call (PLT) MACRO call,tail (auipc+jalr pair) PIC
// Identical processing in Ghidra as the following
case RISCV_ElfRelocationConstants.R_RISCV_CALL_PLT:
// PC-relative call (PLT) MACRO call,tail (auipc+jalr pair) PIC
markAsWarning(program, relocationAddress, "R_RISCV_CALL_PLT", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
case RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20:
// PC-relative GOT reference MACRO la
markAsWarning(program, relocationAddress, "R_RISCV_GOT_HI20", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
// PC-relative call MACRO call,tail (auipc+jalr pair)
target = (int) (addend + symbolValue - offset);
memory.setInt(relocationAddress,
getHi20(target) | memory.getInt(relocationAddress));
memory.setInt(relocationAddress.add(4),
(getLo12(target) << 20) | memory.getInt(relocationAddress.add(4)));
byteLength = 8;
break;
case RISCV_ElfRelocationConstants.R_RISCV_TLS_GOT_HI20:
// PC-relative TLS IE GOT offset MACRO la.tls.ie
@ -232,23 +306,42 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
case RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20:
// PC-relative GOT reference MACRO la
case RISCV_ElfRelocationConstants.R_RISCV_PCREL_HI20:
// PC-relative reference %pcrel_hi(symbol) (U-Type)
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_HI20", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
// PC-relative, not tested on 32 bit objects
target = (int) (addend + symbolValue - offset);
memory.setInt(relocationAddress,
getHi20(target) | memory.getInt(relocationAddress));
break;
case RISCV_ElfRelocationConstants.R_RISCV_PCREL_LO12_I:
// PC-relative reference %pcrel_lo(symbol) (I-Type)
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
// PC-relative reference %pcrel_lo(symbol) (I-Type), relative to the cited pc_rel_hi20
target = getSymbolValueIndirect(elfRelocationContext, (int) symbolValue);
if (target == 0) {
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
}
value32 = ((target & 0x00000fff) << 20) | memory.getInt(relocationAddress);
memory.setInt(relocationAddress, value32);
break;
case RISCV_ElfRelocationConstants.R_RISCV_PCREL_LO12_S:
// PC-relative reference %pcrel_lo(symbol) (S-Type)
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_S", symbolName,
symbolIndex, "TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
// S-type immediates split the 12 bit value into separate 7 bit and 5 bit fields.
// Warning: untested!
target = getSymbolValueIndirect(elfRelocationContext, (int) symbolValue);
if (target == 0) {
markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName,
symbolIndex,
"TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
}
value32 = ((target & 0x000007f) << 25) | (target & 0x00000f80) |
memory.getInt(relocationAddress);
memory.setInt(relocationAddress, value32);
break;
case RISCV_ElfRelocationConstants.R_RISCV_HI20:
// Absolute address %hi(symbol) (U-Type)
@ -298,6 +391,8 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
case RISCV_ElfRelocationConstants.R_RISCV_ADD8:
// 8-bit label addition word8 = old + S + A
markAsWarning(program, relocationAddress, "R_RISCV_ADD8", symbolName, symbolIndex,
"TODO, needs support ", elfRelocationContext.getLog());
value8 = memory.getByte(relocationAddress);
value8 += (byte) symbolValue;
value8 += (byte) addend;
@ -384,34 +479,36 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
"TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
case RISCV_ElfRelocationConstants.R_RISCV_RVC_BRANCH:
case RISCV_ElfRelocationConstants.R_RISCV_RVC_BRANCH: {
// PC-relative branch offset (CB-Type)
target = (short) (addend + symbolValue - offset);
short target_s = (short) (addend + symbolValue - offset);
// 15 13 | 12 11 10|9 7| 6 5 4 3 2|1 0
// C.BEQZ offset[8|4:3] src offset[7:6|2:1|5] C1
value16 = (short) (((target & 0x100) << 4) | ((target & 0x18) << 7) |
((target & 0xc0) >> 1) |
((target & 0x06) << 2) | ((target & 0x20) >> 3) |
value16 = (short) (((target_s & 0x100) << 4) | ((target_s & 0x18) << 7) |
((target_s & 0xc0) >> 1) |
((target_s & 0x06) << 2) | ((target_s & 0x20) >> 3) |
(memory.getShort(relocationAddress) & 0xe383));
byteLength = 2;
memory.setShort(relocationAddress, value16);
byteLength = 2;
break;
}
case RISCV_ElfRelocationConstants.R_RISCV_RVC_JUMP:
// PC-relative jump offset (CJ-Type)
target = (short) (addend + symbolValue - offset);
case RISCV_ElfRelocationConstants.R_RISCV_RVC_JUMP: {
short target_s = (short) (addend + symbolValue - offset);
// Complicated swizzling going on here.
// For details, see The RISC-V Instruction Set Manual Volume I: Unprivileged ISA
// 15 13 | 12 11 10 9 8 7 6 5 3 2|1 0
// C.J offset[11| 4|9:8|10|6|7|3:1|5] C1
value16 = (short) (((target & 0x800) << 1) | ((target & 0x10) << 7) |
((target & 0x300) << 1) |
((target & 0x400) >> 2) | ((target & 0x40) << 1) | ((target & 0x80) >> 1) |
((target & 0x0e) << 2) | ((target & 0x20) >> 3) |
value16 = (short) (((target_s & 0x800) << 1) | ((target_s & 0x10) << 7) |
((target_s & 0x300) << 1) |
((target_s & 0x400) >> 2) | ((target_s & 0x40) << 1) |
((target_s & 0x80) >> 1) |
((target_s & 0x0e) << 2) | ((target_s & 0x20) >> 3) |
(memory.getShort(relocationAddress) & 0xe003));
byteLength = 2;
memory.setShort(relocationAddress, value16);
byteLength = 2;
break;
}
case RISCV_ElfRelocationConstants.R_RISCV_RVC_LUI:
// Absolute address (CI-Type)
@ -444,10 +541,8 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler {
return RelocationResult.UNSUPPORTED;
case RISCV_ElfRelocationConstants.R_RISCV_RELAX:
// Instruction pair can be relaxed
markAsWarning(program, relocationAddress, "R_RISCV_RELAX", symbolName, symbolIndex,
"TODO, needs support ", elfRelocationContext.getLog());
return RelocationResult.UNSUPPORTED;
// Instruction pair can be relaxed by the linker/loader- ignore
return RelocationResult.SKIPPED;
case RISCV_ElfRelocationConstants.R_RISCV_SUB6:
// Local label subtraction