GT-3113 x86/64 linux syscalls

This commit is contained in:
James 2019-08-27 17:06:20 -04:00
parent 586ba6ad92
commit 8302bef89a
12 changed files with 1103 additions and 4 deletions

View File

@ -72,6 +72,8 @@ data/typeinfo/win32/msvcrt/iids.txt||GHIDRA||reviewed||END|
data/typeinfo/win32/msvcrt/syntaxes.txt||GHIDRA||||END|
data/typeinfo/win32/windows_vs12_32.gdt||GHIDRA||||END|
data/typeinfo/win32/windows_vs12_64.gdt||GHIDRA||||END|
data/x64_linux_syscall_numbers||GHIDRA||||END|
data/x86_linux_syscall_numbers||GHIDRA||||END|
ghidra_scripts/AskScript.properties||GHIDRA||||END|
ghidra_scripts/RecursiveStringFinder.py||GHIDRA||||END|
ghidra_scripts/mark_in_out.py||GHIDRA||reviewed||END|

View File

@ -0,0 +1,322 @@
#format = number(decimal) syscall_name
0 read
1 write
2 open
3 close
4 stat
5 fstat
6 lstat
7 poll
8 lseek
9 mmap
10 mprotect
11 munmap
12 sys_brk
13 rt_sigaction
14 rt_sigprocmask
16 ioctl
17 pread64
18 pwrite64
19 readv
2 writev
21 access
22 pipe
23 select
24 sched_yield
25 mremap
26 msync
27 mincore
28 madvise
29 shmget
30 shmat
31 shmctl
32 dup
33 dup2
34 pause
35 nanosleep
36 getitimer
37 alarm
38 setitimer
39 getpid
40 sendfile
41 socket
42 connect
43 accept
44 sendto
45 recvfrom
46 sendmsg
47 recvmsg
48 shutdown
49 bind
50 listen
51 getsockname
52 getpeername
53 socketpair
54 setsockopt
55 getsockopt
56 clone
57 fork
58 vfork
59 execve
60 exit
61 wait4
62 kill
63 uname
64 semget
65 semop
66 semctl
67 shmdt
68 msgget
69 msgsnd
70 msgrcv
71 msgctl
72 fcntl
73 flock
74 fsync
75 fdatasync
76 truncate
77 ftruncate
78 getdents
79 getcwd
80 chdir
81 fchdir
82 rename
83 mkdir
84 rmdir
85 creat
86 link
87 unlink
88 symlink
89 readlink
90 chmod
91 fchmod
92 chown
93 fchown
94 lchown
95 umask
96 gettimeofday
97 getrlimit
98 getrusage
99 sysinfo
100 times
101 ptrace
102 getuid
103 syslog
104 getgid
105 setuid
106 setgid
107 geteuid
108 getegid
109 setpgid
110 getppid
111 getpgrp
112 setsid
113 setreuid
114 setregid
115 getgroups
116 setgroups
117 setresuid
118 getresuid
119 setresgid
120 getresgid
121 getpgid
122 setfsuid
123 setfsgid
124 getsid
125 capget
126 capset
127 rt_sigpending
128 rt_sigtimedwait
129 rt_sigqueueinfo
130 rt_sigsuspend
131 sigaltstack
132 utime
133 mknod
134 uselib
135 personality
136 ustat
137 statfs
138 fstatfs
139 sysfs
140 getpriority
141 setpriority
142 sched_setparam
143 sched_getparam
144 sched_setscheduler
145 sched_getscheduler
146 sched_get_priority_max
147 sched_get_priority_min
148 sched_rr_get_interval
149 mlock
150 munlock
151 mlockall
152 munlockall
153 vhangup
154 modify_ldt
155 pivot_root
156 _sysctl
157 prctl
158 arch_prctl
159 adjtimex
160 setrlimit
161 chroot
162 sync
163 acct
164 settimeofday
165 mount
166 umount2
167 swapon
168 swapoff
169 reboot
170 sethostname
171 setdomainname
172 iopl
173 ioperm
174 create_module
175 init_module
176 delete_module
177 get_kernel_syms
178 query_module
179 quotactl
180 nfsservctl
181 getpmsg
182 putpmsg
183 afs_syscall
184 tuxcall
185 security
186 gettid
187 readahead
188 setxattr
189 lsetxattr
190 fsetxattr
191 getxattr
192 lgetxattr
193 fgetxattr
194 listxattr
195 llistxattr
196 flistxattr
197 removexattr
198 lremovexattr
199 fremovexattr
200 tkill
201 time
202 futex
203 sched_setaffinity
204 sched_getaffinity
206 io_setup
207 io_destroy
208 io_getevents
209 io_submit
210 io_cancel
213 epoll_create
216 remap_file_pages
217 getdents64
218 set_tid_address
219 restart_syscall
220 semtimedop
221 fadvise64
222 timer_create
223 timer_settime
224 timer_gettime
225 timer_getoverrun
226 timer_delete
227 clock_settime
228 clock_gettime
229 clock_getres
230 clock_nanosleep
231 exit_group
232 epoll_wait
233 epoll_ctl
234 tgkill
235 utimes
237 mbind
238 set_mempolicy
239 get_mempolicy
240 mq_open
241 mq_unlink
242 mq_timedsend
243 mq_timedreceive
244 mq_notify
245 mq_getsetattr
246 kexec_load
247 waitid
248 add_key
249 request_key
250 keyctl
251 ioprio_set
252 ioprio_get
253 inotify_init
254 inotify_add_watch
255 inotify_rm_watch
256 migrate_pages
257 openat
258 mkdirat
259 mknodat
260 fchownat
261 futimesat
262 newfstatat
263 unlinkat
264 renameat
265 linkat
266 symlinkat
267 readlinkat
268 fchmodat
269 faccessat
270 pselect6
271 ppoll
272 unshare
273 set_robust_list
274 get_robust_list
275 splice
276 tee
277 sync_file_range
278 vmsplice
279 move_pages
280 utimensat
281 epoll_pwait
282 signalfd
283 timerfd_create
284 eventfd
285 fallocate
286 timerfd_settime
287 timerfd_gettime
288 accept4
289 signalfd4
290 eventfd2
291 epoll_create1
292 dup3
293 pipe2
294 inotify_init1
295 preadv
296 pwritev
297 rt_tgsigqueueinfo
298 perf_event_open
299 recvmmsg
300 fanotify_init
301 fanotify_mark
302 prlimit64
303 name_to_handle_at
304 open_by_handle_at
305 clock_adjtime
306 syncfs
307 sendmmsg
308 setns
309 getcpu
310 process_vm_readv
311 process_vm_writev
312 kcmp
313 finit_module
314 sched_setattr
315 sched_getattr
317 seccomp
318 getrandom
319 memfd_create
320 kexec_file_load
321 bpf
323 userfaultfd
324 membarrier
325 mlock2
326 copy_file_range
329 pkey_mprotect
330 pkey_alloc
331 pkey_free

View File

@ -0,0 +1,192 @@
#format = number(decimal) syscall_name
00 setup
01 exit
02 fork
03 read
04 write
05 open
06 close
07 waitpid
08 creat
09 link
10 unlink
11 execve
12 chdir
13 time
14 mknod
15 chmod
16 lchown
17 break
18 oldstat
19 lseek
20 getpid
21 mount
22 umount
23 setuid
24 getuid
25 stime
26 ptrace
27 alarm
28 oldfstat
29 pause
30 utime
31 stty
32 gtty
33 access
34 nice
35 ftime
36 sync
37 kill
38 rename
39 mkdir
40 rmdir
41 dup
42 pipe
43 times
44 prof
45 brk
46 setgid
47 getgid
48 signal
49 geteuid
50 getegid
51 acct
52 umount2
53 lock
54 ioctl
55 fcntl
56 mpx
57 setpgid
58 ulimit
59 oldolduname
60 umask
61 chroot
62 ustat
63 dup2
64 getppid
65 getpgrp
66 setsid
67 sigaction
68 sgetmask
69 ssetmask
70 setreuid
71 setregid
72 sigsuspend
73 sigpending
74 sethostname
75 setrlimit
76 getrlimit
77 getrusage
78 gettimeofday
79 settimeofday
80 getgroups
81 setgroups
82 select
83 symlink
84 oldlstat
85 readlink
86 uselib
87 swapon
88 reboot
89 readdir
90 mmap
91 munmap
92 truncate
93 ftruncate
94 fchmod
95 fchown
96 getpriority
97 setpriority
98 profil
99 statfs
100 fstatfs
101 ioperm
102 socketcall
103 syslog
104 setitimer
105 getitimer
106 stat
107 lstat
108 fstat
109 olduname
110 iopl
111 vhangup
112 idle
113 vm86old
114 wait4
115 swapoff
116 sysinfo
117 ipc
118 fsync
119 sigreturn
120 clone
121 setdomainname
122 uname
123 modify_ldt
124 adjtimex
125 mprotect
126 sigprocmask
127 create_module
128 init_module
129 delete_module
130 get_kernel_syms
131 quotactl
132 getpgid
133 fchdir
134 bdflush
135 sysfs
136 personality
137 afs_syscall
138 setfsuid
139 setfsgid
140 _llseek
141 getdents
142 _newselect
143 flock
144 msync
145 readv
146 writev
147 getsid
148 fdatasync
149 _sysctl
150 mlock
151 munlock
152 mlockall
153 munlockall
154 sched_setparam
155 sched_getparam
156 sched_setscheduler
157 sched_getscheduler
158 sched_yield
159 sched_get_priority_max
160 sched_get_priority_min
161 sched_rr_get_interval
162 nanosleep
163 mremap
164 setresuid
165 getresuid
166 vm86
167 query_module
168 poll
169 nfsservctl
170 setresgid
171 getresgid
172 prctl
173 rt_sigreturn
174 rt_sigaction
175 rt_sigprocmask
176 rt_sigpending
177 rt_sigtimedwait
178 rt_sigqueueinfo
179 rt_sigsuspend
180 pread
181 pwrite
182 chown
183 getcwd
184 capget
185 capset
186 sigaltstack
187 sendfile
188 getpmsg
189 putpmsg
190 vfork

View File

@ -0,0 +1,324 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//Uses overriding references and the symbolic propogator to resolve system calls
//@category Analysis
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import java.util.function.Predicate;
import generic.jar.ResourceFile;
import ghidra.app.cmd.function.ApplyFunctionDataTypesCmd;
import ghidra.app.cmd.memory.AddUninitializedMemoryBlockCmd;
import ghidra.app.plugin.core.analysis.AutoAnalysisManager;
import ghidra.app.plugin.core.analysis.ConstantPropagationContextEvaluator;
import ghidra.app.script.GhidraScript;
import ghidra.app.services.DataTypeManagerService;
import ghidra.app.util.opinion.ElfLoader;
import ghidra.framework.Application;
import ghidra.program.model.address.*;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.lang.BasicCompilerSpec;
import ghidra.program.model.lang.Register;
import ghidra.program.model.listing.*;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.symbol.*;
import ghidra.program.util.ContextEvaluator;
import ghidra.program.util.SymbolicPropogator;
import ghidra.program.util.SymbolicPropogator.Value;
import ghidra.util.Msg;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
/**
* This script will resolve system calls for x86 or x64 Linux binaries.
* It assumes that in the x64 case, the syscall native instruction is used to make system calls,
* and in the x86 case, system calls are made via an indirect call to GS:[0x10].
* It should be straightforward to modify this script for other cases.
*/
public class ResolveX86orX64LinuxSyscallsScript extends GhidraScript {
//disassembles to "CALL dword ptr GS:[0x10]"
private static final byte[] x86_bytes = { 0x65, -1, 0x15, 0x10, 0x00, 0x00, 0x00 };
private static final String X86 = "x86";
private static final String SYSCALL_SPACE_NAME = "syscall";
private static final int SYSCALL_SPACE_LENGTH = 0x10000;
//this is the name of the userop (aka CALLOTHER) in the pcode translation of the
//native "syscall" instruction
private static final String SYSCALL_X64_CALLOTHER = "syscall";
//tests whether an instruction is making a system call
private Predicate<Instruction> tester;
//register holding the syscall number
private String syscallRegister;
//datatype archive containing signature of system calls
private String datatypeArchiveName;
//file containing map from syscall numbers to syscall names
//note that different architectures can have different system call numbers, even
//if they're both Linux...
private String syscallFileName;
//the type of overriding reference to apply
private RefType overrideType;
//the calling convention to use for system calls (must be defined in the appropriate .cspec file)
private String callingConvention;
@Override
protected void run() throws Exception {
if (!(currentProgram.getExecutableFormat().equals(ElfLoader.ELF_NAME) &&
currentProgram.getLanguage().getProcessor().toString().equals(X86))) {
popup("This script is intended for x86 or x64 Linux files");
return;
}
//determine whether the executable is 32 or 64 bit and set fields appropriately
int size = currentProgram.getLanguage().getLanguageDescription().getSize();
if (size == 64) {
tester = ResolveX86orX64LinuxSyscallsScript::checkX64Instruction;
syscallRegister = "RAX";
datatypeArchiveName = "generic_clib_64";
syscallFileName = "x64_linux_syscall_numbers";
overrideType = RefType.CALLOTHER_OVERRIDE_CALL;
callingConvention = "syscall";
}
else {
tester = ResolveX86orX64LinuxSyscallsScript::checkX86Instruction;
syscallRegister = "EAX";
datatypeArchiveName = "generic_clib";
syscallFileName = "x86_linux_syscall_numbers";
overrideType = RefType.CALL_OVERRIDE_UNCONDITIONAL;
callingConvention = "syscall";
}
//get the space where the system calls live.
//If it doesn't exist, create it.
AddressSpace syscallSpace =
currentProgram.getAddressFactory().getAddressSpace(SYSCALL_SPACE_NAME);
if (syscallSpace == null) {
//don't muck with address spaces if you don't have exclusive access to the program.
if (!currentProgram.hasExclusiveAccess()) {
popup("Must have exclusive access to " + currentProgram.getName() +
" to run this script");
return;
}
Address startAddr = currentProgram.getAddressFactory().getAddressSpace(
BasicCompilerSpec.OTHER_SPACE_NAME).getAddress(0x0L);
AddUninitializedMemoryBlockCmd cmd = new AddUninitializedMemoryBlockCmd(
SYSCALL_SPACE_NAME, null, this.getClass().getName(), startAddr,
SYSCALL_SPACE_LENGTH, true, true, true, false, true);
if (!cmd.applyTo(currentProgram)) {
popup("Failed to create " + SYSCALL_SPACE_NAME);
return;
}
syscallSpace = currentProgram.getAddressFactory().getAddressSpace(SYSCALL_SPACE_NAME);
}
else {
printf("AddressSpace %s found, continuing...\n", SYSCALL_SPACE_NAME);
}
//get all of the functions that contain system calls
//note that this will not find system call instructions that are not in defined functions
Map<Function, Set<Address>> funcsToCalls = getSyscallsInFunctions(currentProgram, monitor);
if (funcsToCalls.isEmpty()) {
popup("No system calls found (within defined functions)");
return;
}
//get the system call number at each callsite of a system call.
//note that this is not guaranteed to succeed at a given system call call site -
//it might be hard (or impossible) to determine a specific constant
Map<Address, Long> addressesToSyscalls =
resolveConstants(funcsToCalls, currentProgram, monitor);
if (addressesToSyscalls.isEmpty()) {
popup("Couldn't resolve any syscall constants");
return;
}
//get the map from system call numbers to system call names
//you might have to create this yourself!
Map<Long, String> syscallNumbersToNames = getSyscallNumberMap();
//at each system call call site where a constant could be determined, create
//the system call (if not already created), then add the appropriate overriding reference
//use syscallNumbersToNames to name the created functions
//if there's not a name corresponding to the constant use a default
for (Entry<Address, Long> entry : addressesToSyscalls.entrySet()) {
Address callSite = entry.getKey();
Long offset = entry.getValue();
Address callTarget = syscallSpace.getAddress(offset);
Function callee = currentProgram.getFunctionManager().getFunctionAt(callTarget);
if (callee == null) {
String funcName = "syscall_" + String.format("%08X", offset);
if (syscallNumbersToNames.get(offset) != null) {
funcName = syscallNumbersToNames.get(offset);
}
callee = createFunction(callTarget, funcName);
callee.setCallingConvention(callingConvention);
}
Reference ref = currentProgram.getReferenceManager().addMemoryReference(callSite,
callTarget, overrideType, SourceType.USER_DEFINED, Reference.MNEMONIC);
//overriding references must be primary to be active
currentProgram.getReferenceManager().setPrimary(ref, true);
}
//finally, open the appropriate data type archive and apply its function data types
//to the new system call space, so that the system calls have the correct signatures
AutoAnalysisManager mgr = AutoAnalysisManager.getAnalysisManager(currentProgram);
DataTypeManagerService service = mgr.getDataTypeManagerService();
List<DataTypeManager> dataTypeManagers = new ArrayList<>();
dataTypeManagers.add(service.openDataTypeArchive(datatypeArchiveName));
dataTypeManagers.add(currentProgram.getDataTypeManager());
ApplyFunctionDataTypesCmd cmd = new ApplyFunctionDataTypesCmd(dataTypeManagers,
new AddressSet(syscallSpace.getMinAddress(), syscallSpace.getMaxAddress()),
SourceType.USER_DEFINED, false, false);
cmd.applyTo(currentProgram);
}
//TODO: better error checking!
private Map<Long, String> getSyscallNumberMap() {
Map<Long, String> syscallMap = new HashMap<>();
ResourceFile rFile = Application.findDataFileInAnyModule(syscallFileName);
if (rFile == null) {
popup("Error opening syscall number file, using default names");
return syscallMap;
}
try (FileReader fReader = new FileReader(rFile.getFile(false));
BufferedReader bReader = new BufferedReader(fReader)) {
String line = null;
while ((line = bReader.readLine()) != null) {
//lines starting with # are comments
if (!line.startsWith("#")) {
String[] parts = line.trim().split(" ");
Long number = Long.parseLong(parts[0]);
syscallMap.put(number, parts[1]);
}
}
}
catch (IOException e) {
Msg.showError(this, null, "Error reading syscall map file", e.getMessage(), e);
}
return syscallMap;
}
/**
* Scans through all of the functions defined in {@code program} and returns
* a map which takes a function to the set of address in its body which contain
* system calls
* @param program program containing functions
* @param tMonitor monitor
* @return map function -> addresses in function containing syscalls
* @throws CancelledException if the user cancels
*/
private Map<Function, Set<Address>> getSyscallsInFunctions(Program program,
TaskMonitor tMonitor) throws CancelledException {
Map<Function, Set<Address>> funcsToCalls = new HashMap<>();
for (Function func : program.getFunctionManager().getFunctionsNoStubs(true)) {
tMonitor.checkCanceled();
for (Instruction inst : program.getListing().getInstructions(func.getBody(), true)) {
if (tester.test(inst)) {
Set<Address> callSites = funcsToCalls.get(func);
if (callSites == null) {
callSites = new HashSet<>();
funcsToCalls.put(func, callSites);
}
callSites.add(inst.getAddress());
}
}
}
return funcsToCalls;
}
/**
* Uses the symbolic propogator to attempt to determine the constant value in
* the syscall register at each system call instruction
*
* @param funcsToCalls map from functions containing syscalls to address in each function of
* the system call
* @param program containing the functions
* @return map from addresses of system calls to system call numbers
* @throws CancelledException if the user cancels
*/
private Map<Address, Long> resolveConstants(Map<Function, Set<Address>> funcsToCalls,
Program program, TaskMonitor tMonitor) throws CancelledException {
Map<Address, Long> addressesToSyscalls = new HashMap<>();
Register syscallReg = program.getLanguage().getRegister(syscallRegister);
for (Function func : funcsToCalls.keySet()) {
Address start = func.getEntryPoint();
ContextEvaluator eval = new ConstantPropagationContextEvaluator(true);
SymbolicPropogator symEval = new SymbolicPropogator(program);
symEval.flowConstants(start, func.getBody(), eval, true, tMonitor);
for (Address callSite : funcsToCalls.get(func)) {
Value val = symEval.getRegisterValue(callSite, syscallReg);
if (val == null) {
createBookmark(callSite, "System Call",
"Couldn't resolve value of " + syscallReg);
printf("Couldn't resolve value of " + syscallReg + " at " + callSite + "\n");
continue;
}
addressesToSyscalls.put(callSite, val.getValue());
}
}
return addressesToSyscalls;
}
/**
* Checks whether an x86 native instruction is a system call
* @param inst instruction to check
* @return true precisely when the instruction is a system call
*/
private static boolean checkX86Instruction(Instruction inst) {
try {
return Arrays.equals(x86_bytes, inst.getBytes());
}
catch (MemoryAccessException e) {
Msg.info(ResolveX86orX64LinuxSyscallsScript.class,
"MemoryAccessException at " + inst.getAddress().toString());
return false;
}
}
/**
* Checks whether an x64 instruction is a system call
* @param inst instruction to check
* @return true precisely when the instruction is a system call
*/
private static boolean checkX64Instruction(Instruction inst) {
boolean retVal = false;
for (PcodeOp op : inst.getPcode()) {
if (op.getOpcode() == PcodeOp.CALLOTHER) {
int index = (int) op.getInput(0).getOffset();
if (inst.getProgram().getLanguage().getUserDefinedOpName(index).equals(
SYSCALL_X64_CALLOTHER)) {
retVal = true;
}
}
}
return retVal;
}
}

View File

@ -164,4 +164,52 @@
<range space="stack" first="8" last="39"/>
</localrange>
</prototype>
<prototype name="syscall" extrapop="8" stackshift="8">
<input pointermax="8">
<pentry minsize="1" maxsize="8">
<register name="RDI"/>
</pentry>
<pentry minsize="1" maxsize="8">
<register name="RSI"/>
</pentry>
<pentry minsize="1" maxsize="8">
<register name="RDX"/>
</pentry>
<pentry minsize="1" maxsize="8">
<register name="R10"/>
</pentry>
<pentry minsize="1" maxsize="8">
<register name="R8"/>
</pentry>
<pentry minsize="1" maxsize="8">
<register name="R9"/>
</pentry>
</input>
<output killedbycall="true">
<pentry minsize="1" maxsize="8">
<register name="RAX"/>
</pentry>
</output>
<unaffected>
<varnode space="ram" offset="0" size="8"/>
<register name="RBX"/>
<register name="RDX"/>
<register name="RBP"/>
<register name="RDI"/>
<register name="RSI"/>
<register name="RSP"/>
<register name="R8"/>
<register name="R9"/>
<register name="R10"/>
<register name="R12"/>
<register name="R13"/>
<register name="R14"/>
<register name="R15"/>
<register name="DF"/>
</unaffected>
<killedbycall>
<register name="RCX"/>
<register name="R11"/>
</killedbycall>
</prototype>
</compiler_spec>

View File

@ -246,6 +246,47 @@
<register name="EAX"/>
</likelytrash>
</prototype>
<prototype name="syscall" extrapop="4" stackshift="4">
<input>
<pentry minsize="1" maxsize="4">
<register name="EBX"/>
</pentry>
<pentry minsize="1" maxsize="4">
<register name="ECX"/>
</pentry>
<pentry minsize="1" maxsize="4">
<register name="EDX"/>
</pentry>
<pentry minsize="1" maxsize="4">
<register name="ESI"/>
</pentry>
<pentry minsize="1" maxsize="4">
<register name="EDI"/>
</pentry>
<pentry minsize="1" maxsize="4">
<register name="EBP"/>
</pentry>
</input>
<output killedbycall="true">
<pentry minsize="1" maxsize="4">
<register name="EAX"/>
</pentry>
</output>
<unaffected>
<register name="EBX"/>
<register name="ECX"/>
<register name="EDX"/>
<register name="EBP"/>
<register name="EDI"/>
<register name="ESI"/>
<register name="ESP"/>
<register name="DF"/>
</unaffected>
<killedbycall>
<register name="EAX"/>
</killedbycall>
</prototype>
<resolveprototype name="__cdecl/__regparm">
<model name="__cdecl"/> <!-- The default case -->
<model name="__regparm3"/>

View File

@ -4,7 +4,7 @@ AS=gcc
OUTDIR := out
EXAMPLES := dataMutability override custom switch sharedReturn jumpWithinInstruction opaque globalRegVars.so setRegister compilerVsDecompiler noReturn createStructure animals ldiv inline
EXAMPLES := dataMutability override custom switch sharedReturn jumpWithinInstruction opaque globalRegVars.so setRegister compilerVsDecompiler noReturn createStructure animals ldiv inline write
$(EXAMPLES): | $(OUTDIR)
@ -59,5 +59,8 @@ ldiv: ldiv.c
inline: inline.s
$(AS) inline.s -o $(OUTDIR)/inline
write: write.c
$(CC) write.c -o $(OUTDIR)/write -O1
clean:
rm -rf $(OUTDIR)

View File

@ -0,0 +1,27 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
char *hello = "Hello World!\n";
int main(int argc, char **argv){
asm(".intel_syntax noprefix");
asm("mov rax,1");
asm("mov rdi,1");
asm("mov rsi, QWORD PTR hello[rip]");
asm("mov rdx, 13");
asm("syscall");
asm(".att_syntax prefix");
return 0;
}

View File

@ -657,7 +657,134 @@ calling convention, it assumes that the call to \textbf{adjustStack} does not ch
\end{block}
\end{frame}
\subsection{System Calls}
\begin{frame}
\begin{block}{System Calls}
\begin{itemize}
\item \textbf{System calls} are a way for a program to request a service from the operating system.
\item Examples include process control, file management, device management,\ldots
\item A typical implementation uses a special native instruction along with a designated register, which we'll call the
\textbf{system call register}.
\item When the special instruction is executed, the value in the system call register determines which function is called.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\item Open and analyze the file \textbf{write}, then navigate to \textbf{main}.
\item[] Note: \textbf{main} prints \texttt{Hello World!} to the screen using the \textbf{write} system call.
\end{enumerate}
\begin{itemize}
\item Before going further, let's examine what we see.
\begin{itemize}
\item In the decompiler, you should see \textbf{syscall()}, which looks like a function call but isn't (try clicking on it).
\item This is an example of a \textbf{user-defined Pcode op}.
\item Such operations are used when implementing the Pcode for a particular instruction is too hard (or impossible).
\item These operations show up as \textbf{CALLOTHER} Pcode ops in the Pcode field in the Listing. They can have inputs and outputs, but otherwise are treated
as black boxes by the decompiler.
\end{itemize}
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{1}
\item In the decompiler, why is the return value of \textbf{main} \texttt{undefined [16]}?
\end{enumerate}
\pause
\begin{itemize}
\item The \textbf{SYSCALL} instruction is translated to a single \textbf{CALLOTHER} Pcode op (named \textbf{syscall}). The decompiler does not consider this operation to have any
side effects, so when it tries to automatically determine the return type it sees a move to \textbf{RDX} and a move to \textbf{RAX} before the \textbf{RET} instruction.
These registers form a register pair for this architecture, so the decompiler thinks the return value is 16 bytes.
\item So how do we improve the decompilation?
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{itemize}
\item This system call is a call to \textbf{write} since \texttt{1} is written to the system call register (\textbf{RAX}) before the \textbf{syscall}
instruction is executed (search online for ``x64 Linux syscall table").
\item We'd like the call to \textbf{write} to appear with the correct name, signature, and calling convention.
\item We'd also like cross references, so that we can easily see all calls to \textbf{write}.
\item During execution, the code for the \textbf{write} function is somewhere in the kernel and not in the program's address space.
\item So what should the call target be in Ghidra?
\item Answer: use \textbf{overlay blocks} on the \textbf{OTHER} space.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{itemize}
\item Prior to Ghidra 9.1, the \textbf{OTHER} space was used to store data from a binary that does not get loaded into memory, such as the \texttt{.comment} section of an ELF file.
\item In 9.1, we've extended the ability to make references into the \textbf{OTHER} space.
\item You can't use this space directly, but you can create \textbf{overlay blocks} on the \textbf{OTHER} space.
\item Overlays are a (sort of old school) technique to allow different blocks to be swapped in and out at the same address.
\item For our purposes, they allow us to put things in an artificial memory space without the possibility of conflicting with other uses of that space.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{2}
\item Create an overlay of the \textbf{OTHER} space as follows:
\begin{enumerate}[(i)]
\item Bring up the \textbf{Memory Map} by clicking on the ram chip icon in the tool bar of the Code Browser.
\item Click on the green plus to add a block.
\item Call the block \textbf{syscall\_block}. Have it start at address \texttt{0x0} of the \textbf{OTHER} space and have length \texttt{0x1000}.
For Block Type, select \textbf{Overlay} from the drop-down menu.
\end{enumerate}
\end{enumerate}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{3}
\item Next, go to address \texttt{0x1} in \textbf{syscall\_block} and create a function (in the Listing, select both the address and the \texttt{??} and press \texttt{f}).
\item Edit this new function to give it the name \textbf{write} and the \textbf{syscall} calling convention.
\item If you happen to know the parameters and their types you can add them. Altervatively, select the new function \textbf{write} in the Code Browser, right-click on
\textbf{generic\_clib\_64} in the \textbf{Data Type Manager}, and select \textbf{Apply Function Data Types}
\item[] Note: the function we've created has no body. It's essentially an address to hang a function signature and to get cross-references.
\end{enumerate}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{6}
\item Now, navigate back to the \textbf{syscall} instruction in \textbf{main}.
\item Click on the instruction in the Listing, then press \texttt{r} to bring up the \textbf{Reference Manager}.
\item Click the green plus to add a reference. Enter \textbf{syscall\_block::1} for the ``To Address'' and \textbf{CALLOTHER\_CALL\_OVERRIDE} for the Ref-Type.
This reference type essentially transforms the \textbf{CALLOTHER} Pcode op to a \textbf{CALL} op before sending the Pcode to the decompiler. The call target is the ``To Address''
of the reference.
\item[] The decompilation should now look as expected.
\end{enumerate}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{System Call Notes}
\begin{enumerate}
\item The script \texttt{ResolveX86orX64LinuxSyscallScript.java} will do all of this for you. You can run it on this file, but a better demonstration is to run it on a
libc shared object file.
\item The script uses the \textbf{Symbolic Propagator} to determine the value of a register at a particular location.
\item The script requires a mapping from system call numbers to system call names. The x86 and x64 ones come with Ghidra, you will need to supply others.
\item Also, the signatures of most Linux system calls are included with Ghidra (used in step 6 above). The script shows you how to apply function data types programmatically,
but you might have to supply your own data type archive.
\end{enumerate}
\end{block}
\end{frame}
\section{Improving Decompilation: Control Flow}
@ -760,10 +887,22 @@ determine statically.
\begin{block}{Exercise: Opaque Predicates}
\begin{enumerate}
\item Open and analyze the file \textbf{opaque}, then navigate to the function \textbf{main}.
\item \textbf{main} contains an opaque predicate. Find it and fix it with the instruction patcher by changing a conditional jump to an unconditional jump.
\item To patch an instruction, right-click on it in the Listing and select \textbf{Patch Instruction}.
\item Hint: The opaque predicate is based on the fact that if you square an integer and reduce mod 4, you can only ever get 0 or 1. Look for a multiplication, modular reduction (optimized to a bitmask), and comparison in the assembly.
\item \textbf{main} contains an opaque predicate. Find it and fix it by either:
\begin{enumerate}[(i)]
\item Changing a conditional jump to an unconditional jump using the instruction patcher. To patch an instruction, right-click on it in the Listing and select \textbf{Patch Instruction}.
\item Adding a (primary) reference with Ref-Type \textbf{JUMP\_OVERRIDE\_UNCONDITIONAL} on the appropriate conditional jump. The ``To Address'' of the reference should be the jump target.
To the decompiler, this will change the conditional jump to an unconditional jump.
\end{enumerate}
\item[] (hint on next slide)
\end{enumerate}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Exercise: Opaque Predicates}
\begin{itemize}
\item Hint: The opaque predicate is based on the fact that if you square an integer and reduce mod 4, you can only ever get 0 or 1. Look for a multiplication, modular reduction (optimized to a bitmask), and comparison in the assembly.
\end{itemize}
\end{block}
\end{frame}

Binary file not shown.

View File

@ -39,6 +39,7 @@ GhidraClass/ExerciseFiles/Advanced/override.so||GHIDRA||||END|
GhidraClass/ExerciseFiles/Advanced/setRegister||GHIDRA||||END|
GhidraClass/ExerciseFiles/Advanced/sharedReturn||GHIDRA||||END|
GhidraClass/ExerciseFiles/Advanced/switch||GHIDRA||||END|
GhidraClass/ExerciseFiles/Advanced/write||GHIDRA||||END|
GhidraClass/ExerciseFiles/Emulation/Source/README.txt||GHIDRA||||END|
GhidraClass/ExerciseFiles/VersionTracking/WallaceSrc.exe||GHIDRA||||END|
GhidraClass/ExerciseFiles/VersionTracking/WallaceVersion2.exe||GHIDRA||||END|