I created this application as a little challenge and some practice at manually obfuscating an application at the assembly level.
I wrote the application in IA32 assembly and then manually obfuscated it using a couple of different methods.
Here I will show how to solve the challenge in 2 different ways.
Lastly I will show how the obfuscation could have been done better so that it would have been a lot more difficult to solve this using a simple static disassembly.
The Challenge
We are given the static disassembly below of a 32bit linux application which says whether or not the author is going to some event:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 | ./going-or-not-obf: file format elf32-i386
Disassembly of section .text:
08048060 <.text>:
8048060: 89 c2 mov edx,eax
8048062: bf 25 00 00 00 mov edi,0x25
8048067: eb 4d jmp 0x80480b6
8048069: b3 32 mov bl,0x32
804806b: 5e pop esi
804806c: 31 c0 xor eax,eax
804806e: 74 6c je 0x80480dc
8048070: b7 6a mov bh,0x6a
8048072: e8 17 00 00 00 call 0x804808e
8048077: b1 04 mov cl,0x4
8048079: 8a 06 mov al,BYTE PTR [esi]
804807b: 29 cc sub esp,ecx
804807d: 41 inc ecx
804807e: 30 c8 xor al,cl
8048080: 31 c9 xor ecx,ecx
8048082: 83 f8 04 cmp eax,0x4
8048085: 74 12 je 0x8048099
8048087: 8d 4d f1 lea ecx,[ebp-0xf]
804808a: b2 10 mov dl,0x10
804808c: eb 09 jmp 0x8048097
804808e: 31 db xor ebx,ebx
8048090: 31 c9 xor ecx,ecx
8048092: 89 ca mov edx,ecx
8048094: ff 24 24 jmp DWORD PTR [esp]
8048097: eb 05 jmp 0x804809e
8048099: 8d 4d e5 lea ecx,[ebp-0x1b]
804809c: b2 0c mov dl,0xc
804809e: 31 c0 xor eax,eax
80480a0: b0 08 mov al,0x8
80480a2: bb 04 00 00 00 mov ebx,0x4
80480a7: 29 d8 sub eax,ebx
80480a9: 29 c3 sub ebx,eax
80480ab: 43 inc ebx
80480ac: cd 80 int 0x80
80480ae: 31 c0 xor eax,eax
80480b0: 31 db xor ebx,ebx
80480b2: fe c0 inc al
80480b4: cd 80 int 0x80
80480b6: e8 ae ff ff ff call 0x8048069
80480bb: ed in eax,dx
80480bc: 4e dec esi
80480bd: 65 23 2a and ebp,DWORD PTR gs:[edx]
80480c0: 2d 2b 23 64 30 sub eax,0x3064232b
80480c5: 2b 2a sub ebp,DWORD PTR [edx]
80480c7: 64 29 25 64 0d 4e 65 sub DWORD PTR fs:0x654e0d64,esp
80480ce: 23 2a and ebp,DWORD PTR [edx]
80480d0: 2d 2b 23 64 29 sub eax,0x2964232b
80480d5: 25 64 0d ee 89 and eax,0x89ee0d64
80480da: 89 c5 mov ebp,eax
80480dc: b0 c9 mov al,0xc9
80480de: 01 f8 add eax,edi
80480e0: eb 1f jmp 0x8048101
80480e2: 8d 55 00 lea edx,[ebp+0x0]
80480e5: 88 0c 24 mov BYTE PTR [esp],cl
80480e8: 4c dec esp
80480e9: 68 e9 80 04 08 push 0x80480e9
80480ee: 85 d2 test edx,edx
80480f0: 38 02 cmp BYTE PTR [edx],al
80480f2: 0f 84 78 ff ff ff je 0x8048070
80480f8: 89 fb mov ebx,edi
80480fa: 83 c3 1f add ebx,0x1f
80480fd: 30 1a xor BYTE PTR [edx],bl
80480ff: 4a dec edx
8048100: c3 ret
8048101: 31 ed xor ebp,ebp
8048103: 31 c9 xor ecx,ecx
8048105: 31 d2 xor edx,edx
8048107: 42 inc edx
8048108: 8d 2c 0c lea ebp,[esp+ecx*1]
804810b: 8a 0c 16 mov cl,BYTE PTR [esi+edx*1]
804810e: 38 c1 cmp cl,al
8048110: 74 d0 je 0x80480e2
8048112: 88 0c 24 mov BYTE PTR [esp],cl
8048115: 83 ec 01 sub esp,0x1
8048118: 42 inc edx
8048119: 89 e4 mov esp,esp
804811b: 83 f9 00 cmp ecx,0x0
804811e: 7f eb jg 0x804810b
8048120: 89 ed mov ebp,ebp
8048122: c3 ret
|
The challenge is to figure out whether or not the author is going based solely on this static disassembly.
Method 1: The Easy Way
In this method we'll rebuild the application and simply run it to get the answer.
The first step is to copy the instruction into a new nasm file, if we do that we get:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 | global _start
section .text
_start:
mov edx,eax
mov edi,0x25
jmp 0x80480b6
mov bl,0x32
pop esi
xor eax,eax
je 0x80480dc
mov bh,0x6a
call 0x804808e
mov cl,0x4
mov al,BYTE PTR [esi]
sub esp,ecx
inc ecx
xor al,cl
xor ecx,ecx
cmp eax,0x4
je 0x8048099
lea ecx,[ebp-0xf]
mov dl,0x10
jmp 0x8048097
xor ebx,ebx
xor ecx,ecx
mov edx,ecx
jmp DWORD PTR [esp]
jmp 0x804809e
lea ecx,[ebp-0x1b]
mov dl,0xc
xor eax,eax
mov al,0x8
mov ebx,0x4
sub eax,ebx
sub ebx,eax
inc ebx
int 0x80
xor eax,eax
xor ebx,ebx
inc al
int 0x80
call 0x8048069
in eax,dx
dec esi
and ebp,DWORD PTR gs:[edx]
sub eax,0x3064232b
sub ebp,DWORD PTR [edx]
sub DWORD PTR fs:0x654e0d64,esp
and ebp,DWORD PTR [edx]
sub eax,0x2964232b
and eax,0x89ee0d64
mov ebp,eax
mov al,0xc9
add eax,edi
jmp 0x8048101
lea edx,[ebp+0x0]
mov BYTE PTR [esp],cl
dec esp
push 0x80480e9
test edx,edx
cmp BYTE PTR [edx],al
je 0x8048070
mov ebx,edi
add ebx,0x1f
xor BYTE PTR [edx],bl
dec edx
ret
xor ebp,ebp
xor ecx,ecx
xor edx,edx
inc edx
lea ebp,[esp+ecx*1]
mov cl,BYTE PTR [esi+edx*1]
cmp cl,al
je 0x80480e2
mov BYTE PTR [esp],cl
sub esp,0x1
inc edx
mov esp,esp
cmp ecx,0x0
jg 0x804810b
mov ebp,ebp
ret
|
When we try to assemble this we get:
| [email protected]:~# nasm -felf32 -o going-or-not-obf-test1 going-or-not-obf-test1.nasm going-or-not-obf-test1.nasm:16: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:29: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:47: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:49: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:50: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:51: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:59: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:63: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:67: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:75: error: comma, colon or end of line expected
going-or-not-obf-test1.nasm:78: error: comma, colon or end of line expected
|
Looking at the lines that have caused the errors:
1
2
3
4
5
6
7
8
9
10
11
12 | [email protected]:~# for i in 16 29 47 49 50 51 59 63 67 75 78; do cat -n going-or-not-obf-test1.nasm | grep "^[ ]*$i"; done
16 mov al,BYTE PTR [esi]
29 jmp DWORD PTR [esp]
47 and ebp,DWORD PTR gs:[edx]
49 sub ebp,DWORD PTR [edx]
50 sub DWORD PTR fs:0x654e0d64,esp
51 and ebp,DWORD PTR [edx]
59 mov BYTE PTR [esp],cl
63 cmp BYTE PTR [edx],al
67 xor BYTE PTR [edx],bl
75 mov cl,BYTE PTR [esi+edx*1]
78 mov BYTE PTR [esp],cl
|
You can see that its all lines that have [SIZE] PTR, we will remove any DWORD PTR and BYTE PTR and for the lines that had BYTE put that before the first operand, so they end up like this:
1
2
3
4
5
6
7
8
9
10
11
12 | [email protected]:~# for i in 16 29 47 49 50 51 59 63 67 75 78; do cat -n going-or-not-obf-test2.nasm | grep "^[ ]*$i"; done
16 mov BYTE al, [esi]
29 jmp [esp]
47 and ebp, gs:[edx]
49 sub ebp, [edx]
50 sub fs:0x654e0d64,esp
51 and ebp, [edx]
59 mov BYTE [esp],cl
63 cmp BYTE [edx],al
67 xor BYTE [edx],bl
75 mov BYTE cl,[esi+edx*1]
78 mov BYTE [esp],cl
|
Now we try to assemble it again:
| [email protected]:~# nasm -felf32 -o going-or-not-obf-test2 going-or-not-obf-test2.nasm
going-or-not-obf-test2.nasm:47: error: invalid combination of opcode and operands
going-or-not-obf-test2.nasm:50: error: invalid combination of opcode and operands
|
So there is still a problem with 2 lines, it looks as if these instructions are invalid, this could possibly be data, what we shall do is replace these 2 instructions with the raw opcodes from the disassembly, so our application ends up like this:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 | global _start
section .text
_start:
mov edx,eax
mov edi,0x25
jmp 0x80480b6
mov bl,0x32
pop esi
xor eax,eax
je 0x80480dc
mov bh,0x6a
call 0x804808e
mov cl,0x4
mov BYTE al, [esi]
sub esp,ecx
inc ecx
xor al,cl
xor ecx,ecx
cmp eax,0x4
je 0x8048099
lea ecx,[ebp-0xf]
mov dl,0x10
jmp 0x8048097
xor ebx,ebx
xor ecx,ecx
mov edx,ecx
jmp [esp]
jmp 0x804809e
lea ecx,[ebp-0x1b]
mov dl,0xc
xor eax,eax
mov al,0x8
mov ebx,0x4
sub eax,ebx
sub ebx,eax
inc ebx
int 0x80
xor eax,eax
xor ebx,ebx
inc al
int 0x80
call 0x8048069
in eax,dx
dec esi
db 0x65,0x23,0x2a
sub eax,0x3064232b
sub ebp, [edx]
db 0x64,0x29,0x25,0x64,0x0d,0x4e,0x65
and ebp, [edx]
sub eax,0x2964232b
and eax,0x89ee0d64
mov ebp,eax
mov al,0xc9
add eax,edi
jmp 0x8048101
lea edx,[ebp+0x0]
mov BYTE [esp],cl
dec esp
push 0x80480e9
test edx,edx
cmp BYTE [edx],al
je 0x8048070
mov ebx,edi
add ebx,0x1f
xor BYTE [edx],bl
dec edx
ret
xor ebp,ebp
xor ecx,ecx
xor edx,edx
inc edx
lea ebp,[esp+ecx*1]
mov BYTE cl,[esi+edx*1]
cmp cl,al
je 0x80480e2
mov BYTE [esp],cl
sub esp,0x1
inc edx
mov esp,esp
cmp ecx,0x0
jg 0x804810b
mov ebp,ebp
ret
|
If we assemble this and test it out:
So it assembles and links now but we get a segmentation fault. Let's investigate why:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 | [email protected]:~# gdb -q ./going-or-not-obf-test3
Reading symbols from /root/going-or-not-obf-test3...(no debugging symbols found)...done.
(gdb) r
Starting program: /root/going-or-not-obf-test3
Program received signal SIGSEGV, Segmentation fault.
0x080480b6 in _start ()
(gdb) x/i $eip
=> 0x80480b6 <_start+86>: add BYTE PTR [eax],al
(gdb) print/x $eax
$1 = 0x0
(gdb) disassemble
Dump of assembler code for function _start:
0x08048060 <+0>: mov edx,eax
0x08048062 <+2>: mov edi,0x25
0x08048067 <+7>: jmp 0x80480b6 <_start+86>
0x0804806c <+12>: mov bl,0x32
0x0804806e <+14>: pop esi
0x0804806f <+15>: xor eax,eax
0x08048071 <+17>: je 0x80480dc <_start+124>
0x08048077 <+23>: mov bh,0x6a
0x08048079 <+25>: call 0x804808e <_start+46>
0x0804807e <+30>: mov cl,0x4
0x08048080 <+32>: mov al,BYTE PTR [esi]
0x08048082 <+34>: sub esp,ecx
0x08048084 <+36>: inc ecx
0x08048085 <+37>: xor al,cl
0x08048087 <+39>: xor ecx,ecx
0x08048089 <+41>: cmp eax,0x4
0x0804808c <+44>: je 0x8048099 <_start+57>
0x08048092 <+50>: lea ecx,[ebp-0xf]
0x08048095 <+53>: mov dl,0x10
0x08048097 <+55>: jmp 0x8048097 <_start+55>
0x0804809c <+60>: xor ebx,ebx
0x0804809e <+62>: xor ecx,ecx
0x080480a0 <+64>: mov edx,ecx
0x080480a2 <+66>: jmp DWORD PTR [esp]
0x080480a5 <+69>: jmp 0x804809e <_start+62>
0x080480aa <+74>: lea ecx,[ebp-0x1b]
0x080480ad <+77>: mov dl,0xc
0x080480af <+79>: xor eax,eax
0x080480b1 <+81>: mov al,0x8
0x080480b3 <+83>: mov ebx,0x4
0x080480b8 <+88>: sub eax,ebx
0x080480ba <+90>: sub ebx,eax
0x080480bc <+92>: inc ebx
0x080480bd <+93>: int 0x80
0x080480bf <+95>: xor eax,eax
0x080480c1 <+97>: xor ebx,ebx
0x080480c3 <+99>: inc al
0x080480c5 <+101>: int 0x80
---Type <return> to continue, or q <return> to quit---
0x080480c7 <+103>: call 0x8048069 <_start+9>
0x080480cc <+108>: in eax,dx
0x080480cd <+109>: dec esi
0x080480ce <+110>: and ebp,DWORD PTR gs:[edx]
0x080480d1 <+113>: sub eax,0x3064232b
0x080480d6 <+118>: sub ebp,DWORD PTR [edx]
0x080480d8 <+120>: sub DWORD PTR fs:0x654e0d64,esp
0x080480df <+127>: and ebp,DWORD PTR [edx]
0x080480e1 <+129>: sub eax,0x2964232b
0x080480e6 <+134>: and eax,0x89ee0d64
0x080480eb <+139>: mov ebp,eax
0x080480ed <+141>: mov al,0xc9
0x080480ef <+143>: add eax,edi
0x080480f1 <+145>: jmp 0x8048101 <_start+161>
0x080480f6 <+150>: lea edx,[ebp+0x0]
0x080480f9 <+153>: mov BYTE PTR [esp],cl
0x080480fc <+156>: dec esp
0x080480fd <+157>: push 0x80480e9
0x08048102 <+162>: test edx,edx
0x08048104 <+164>: cmp BYTE PTR [edx],al
0x08048106 <+166>: je 0x8048070 <_start+16>
0x0804810c <+172>: mov ebx,edi
0x0804810e <+174>: add ebx,0x1f
0x08048111 <+177>: xor BYTE PTR [edx],bl
0x08048113 <+179>: dec edx
0x08048114 <+180>: ret
0x08048115 <+181>: xor ebp,ebp
0x08048117 <+183>: xor ecx,ecx
0x08048119 <+185>: xor edx,edx
0x0804811b <+187>: inc edx
0x0804811c <+188>: lea ebp,[esp+ecx*1]
0x0804811f <+191>: mov cl,BYTE PTR [esi+edx*1]
0x08048122 <+194>: cmp cl,al
0x08048124 <+196>: je 0x80480e2 <_start+130>
0x0804812a <+202>: mov BYTE PTR [esp],cl
0x0804812d <+205>: sub esp,0x1
0x08048130 <+208>: inc edx
0x08048131 <+209>: mov esp,esp
0x08048133 <+211>: cmp ecx,0x0
---Type <return> to continue, or q <return> to quit---
0x08048136 <+214>: jg 0x804810b <_start+171>
0x0804813c <+220>: mov ebp,ebp
0x0804813e <+222>: ret
End of assembler dump.
|
So it looks as if we've landed in the middle of an instruction.
Near the start of the application (on line 16 above), it jumps it a certain memory address which is the middle of an instruction. The resulting instruction, as seen on line 9, tries to move a value to the address pointed to by the EAX register.
On line 11 you can see that the value in EAX is 0, which is what caused the segfault, 0 is an invalid memory address.
The reason for this is because the original application jumped to static memory addresses, in the application the memory addresses are different so this will need to be fixed for the application to work.
What we need to do is replace any fixed memory addresses with labels. We can find where in the application the memory addresses are meant to go by looking at the original disassembly.
Once we have done this the resulting application is as follows:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 | global _start
section .text
_start:
mov edx,eax
mov edi,0x25
jmp One
Two:
mov bl,0x32
pop esi
xor eax,eax
je Three
Eight:
mov bh,0x6a
call Nine
mov cl,0x4
mov BYTE al, [esi]
sub esp,ecx
inc ecx
xor al,cl
xor ecx,ecx
cmp eax,0x4
je Eleven
lea ecx,[ebp-0xf]
mov dl,0x10
jmp Twelve
Nine:
xor ebx,ebx
xor ecx,ecx
mov edx,ecx
jmp [esp]
Twelve:
jmp Ten
Eleven:
lea ecx,[ebp-0x1b]
mov dl,0xc
Ten:
xor eax,eax
mov al,0x8
mov ebx,0x4
sub eax,ebx
sub ebx,eax
inc ebx
int 0x80
xor eax,eax
xor ebx,ebx
inc al
int 0x80
One:
call Two
in eax,dx
dec esi
db 0x65,0x23,0x2a
sub eax,0x3064232b
sub ebp, [edx]
db 0x64,0x29,0x25,0x64,0x0d,0x4e,0x65
and ebp, [edx]
sub eax,0x2964232b
and eax,0x89ee0d64
mov ebp,eax
Three:
mov al,0xc9
add eax,edi
jmp Four
Six:
lea edx,[ebp+0x0]
mov BYTE [esp],cl
dec esp
Seven:
push Seven
test edx,edx
cmp BYTE [edx],al
je Eight
mov ebx,edi
add ebx,0x1f
xor BYTE [edx],bl
dec edx
ret
Four:
xor ebp,ebp
xor ecx,ecx
xor edx,edx
inc edx
lea ebp,[esp+ecx*1]
Five:
mov BYTE cl,[esi+edx*1]
cmp cl,al
je Six
mov BYTE [esp],cl
sub esp,0x1
inc edx
mov esp,esp
cmp ecx,0x0
jg Five
mov ebp,ebp
ret
|
There are a couple of values here (on lines 55, 59 and 60) which look like memory addresses but they aren't valid memory addresses in the original disassembly so they could just be normal values or, as its in the same section as the invalid instructions, part of some data.
With this done we can test this application:
So we have our answer, the author is not going :-)
Method 2: The Hard Way
Here we will attempt to understand the application and figure out what the application does without building and running it.
Although you would have needed some understanding of IA32 to do the previous method, obviously you will need a better understanding of it to do this.
The first step would be what we have already done. Well, there would be no need for the ability to assemble the application, or even have a valid nasm file but we would need to replace any known addresses with labels because this will make the disassembly significantly easier to read.
For this will we just use the nasm file above (going-or-not-obf-test4.nasm), just because it will make this post a little shorter :-)
What we do now is follow the control flow of the application and simplfy it as we go by replacing more complex sequencies with less complex 1's or even only 1 instruction in some cases and removing any dead instructions (instructions which have no effect on the application at all) altogether.
This process is manual deobfuscation and can be applied to small sections of applications instead of just full applications like the last method.
Let's start with the first instruction mov edx,eax
, this looks like it is a junk line (or dead code) mainly because this is the first instruction of the application, if this was just a code segment instead of a full application this code would be more likely to be meaningful.
The second instruction mov edi,0x25
, is also very difficult to quickly determine its usefulness to the application, what we need to do here is take note of the value inside the EDI register.
The next 4 instructions do something interesting, if you follow the control flow of the application and line the instructions sequentially you get:
| jmp One
One:
call Two
Two:
mov bl,0x32
pop esi
|
So the 3rd instruction (on line 5) is not related here, and is similar to the previous mov instruction, just make a note that bl contains 0x32.
The other 3 instructions are using a technique used in some shellcode to get the an address in memory when the code might start at a different point in memory.
Its called the JMP-CALL-POP technique and gets the address of the address immediately following the call instruction into the register used in the pop instruction.
Knowing this we can replace the entire code above with:
Let's look at the next 4 instructions:
| xor eax,eax
je Three
Three:
mov al,0xc9
add eax,edi
|
So here, on line 5, we use the EDI register, we zero EAX, set it to 0xc9 (201), adds it to EDI (0x25 or 37) and stores the result in EAX, this series of instructions are what is called constant unfolding where a series of instructions are done to work out the actual required value instead of just assigning the value to begin with.
We could use the opposite, a common compiler optimization constant folding, to decrease the complexity of this code, so these 4 instructions could be replaced by:
The next 5 instructions are:
| jmp Four
Four:
xor ebp,ebp
xor ecx,ecx
xor edx,edx
inc edx
|
This set of instructions just sets EBP and ECX to 0 and EDX to 1. Now its obvious that the instrction at the beginning was dead code because EDX hasn't been used at all and now it has been overwritten.
We can rewrite the application so far in a much more simplfied way:
| _start:
mov edi,0x25
mov bl,0x32
mov esi, One
mov eax,0xee
xor ebp,ebp
xor ecx,ecx
mov edx,0x1
|
As you can see, this is much easier to read than the previous code that was jumping about all over the place.
I kept the assignment to EDI (on line 2) there because, although I've removed the need for it in assigning the value of EAX (on line 5), it still might be used in the future.
Also, the assignment to bl (on line 3) still might not be needed but we shall keep it there just incase.
Let's quickly review the state of the registers:
| EDI = 0x25
BL = 0x32
ESI = (Address of One) One
EAX = 0xee
EBP = 0x0
ECX = 0x0
EDX = 0x1
|
The register state and code rewrite should be constantly updated as you go through the code.
The next instruction is lea ebp,[esp+ecx*1]
, which is the same as EBP = ESP + ECX * 1 or EBP = ESP + 0 * 1 or EBP = ESP.
After this instruction we enter the following loop:
1
2
3
4
5
6
7
8
9
10
11
12 | Five:
mov BYTE cl,[esi+edx*1]
cmp cl,al
je Six
mov BYTE [esp],cl
sub esp,0x1
inc edx
mov esp,esp
cmp ecx,0x0
jg Five
mov ebp,ebp
ret
|
So this first moves a byte at ESI + EDX * 1, which is basically just ESI + EDX, into the cl register. We know at this point the value inside EDX is 1 and that ESI points to some address in the middle of the application, so our loop will start getting data 1 byte after that address.
This byte is them compared with al, which we know is 0xee, and if they are the same execution will jump to Six.
Providing the jump to Six isn't taken, the byte is moved to the top of the stack (which ESP points to), ESP is adjusted accordingly, EDX is incremented by 1 and the loop is rerun.
The mov instruction on line 8 doesn't do anything, dead code which can be removed.
Now we can find all of the data that is being worked on here:
| 4e 65 23 2a 2d 2b 23 64 30 2b 2a 64 29 25 64 0d 4e 65 23 2a 2d 2b 23 64 29 25 64 0d ee
|
The starting address of this data is 80480bc in the original disassembly, which is 1 byte after the address of the instruction following the call instruction in the jmp-call-pop routine at the start of the application.
It ends with the ee value because this is the point at which the jump to Six is taken.
Also, notice that nowhere here is a 0x0 (or 00) byte, this means that the jg (jump if greater than) instruction on line 10 will always be taken, every byte there is above 0 so the 2 instructions after are dead code and can be removed from the analysis and the jg can be replaced with a jmp.
It is clear that this data, which is sitting in the middle of the application, is being put on the stack for some reason, the lea instruction right before the loop just saved the address pointing to the beginning of the new location of the data on the stack into the EBP register.
We could try to figure out how meaningful this data is now but it would be best to have a look to see what the application does with it first.
Now let's take the jump to Six:
| lea edx,[ebp+0x0]
mov BYTE [esp],cl
dec esp
|
First it loads the address of the data on the stack, currently in EBP, into EDX.
cl, which is currently 0xee, is put onto the stack and ESP is adjusted accordingly.
We then enter into the 2nd loop:
| Seven:
push Seven
test edx,edx
cmp BYTE [edx],al
je Eight
mov ebx,edi
add ebx,0x1f
xor BYTE [edx],bl
dec edx
ret
|
This is a very unusual loop, you will only see this type of code when reversing obfuscated code.
It started by pushing its own address to the stack, this allows the ret on line 10 to return to Seven.
The test instruction on line 3 is dead code because all test does is set EFLAGS, but they are immediately overwritten by the cmp instruction that follows.
Lines 4 and 5 again test the value of a byte in the data, this time pointed to by EDX, against 0xee and jump's to Eight when its reached.
The next 2 instructions, lines 6 and 7, move the value from EDI into EBX and add's 0x1f to it. We already know that 0x25 is currently in EDI, so EBX = 0x25 + 0x1f or EBX = 0x44.
The byte in the data is then xor'd with bl (or 0x44) and EDX is decremented.
Clearly this is a simply xor encoding of the data, I wrote a python script a while ago to xor a number of bytes with 1 byte and output both the resulting bytes as ascii characters, and the same but with the characters reversed (due to little endian architectures), here is the script:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 | #!/usr/bin/env python
import sys
string = sys.argv[1]
xor = sys.argv[2]
decoded = ""
for c in string:
decoded += chr(ord(c) ^ ord(xor))
print "String as is:"
print decoded
print "\n\nString reversed:"
print decoded[::-1]
|
This script is very simple, 1 thing to bare in mind though is that, because we are dealing with data outside of the printable ascii range (0x20 - 0x7e), we can just type the characters on the command line.
So we run the script like this:
| [email protected]:~# python xor-and-ascii.py $(python -c 'print "\x4e\x65\x23\x2a\x2d\x2b\x23\x64\x30\x2b\x2a\x64\x29\x25\x64\x0d\x4e\x65\x23\x2a\x2d\x2b\x23\x64\x29\x25\x64\x0d"') $(python -c 'print "\x44"')
String as is:
!gniog ton ma I
!gniog ma I
String reversed:
I am going!
I am not going!
|
So now we know what that data is in the middle of the application, clearly it was done like this to confuse but we have reversed enough of the application now to figure out what this is.
With this is mind, we no longer need those 2 loops, or any of the code aimed at moving and decoding the data, we can simply put it in as is.
Let's review our rewritten application:
1
2
3
4
5
6
7
8
9
10
11
12 | _start:
mov edi,0x25
mov esi,One
mov ebp,not+0xf
mov ebx,0x44
mov ecx,0xee
mov eax,ecx
mov edx,am
One:
db 0xed
am: db "I am going!",0xa
not: db "I am not going!",0xa
|
I have obviously removed most of the code because it simply isn't needed now, I've made sure that EBP still points to the end of the data and EDX to the beginning just incase there is some reason for this, but most of the code so far was devoted to decoding the data which is no longer needed.
Now for the registers:
| EDI = 0x25
EBX = 0x44
ESI = (Address of One) One
EAX = 0xee
EBP = (Address of the end of the data) not+0xf
ECX = 0xee
EDX = (Address of the beginning of the data) am
|
The next 5 instructions show another weird use of call and jmp:
| Eight:
mov bh,0x6a
call Nine
Nine:
xor ebx,ebx
xor ecx,ecx
mov edx,ecx
jmp [esp]
|
Firstly there is an assignment to bh (the second 8 bits of the EBX register) but then, on line 5, the whole EBX register is cleared using xor so line 2 is dead code.
The call instruction on line 3 and the jmp instruction on line 8 seem to be used just to confuse the reverser, there is no reason for this, but bare in mind that this would have stuck 4 bytes on the stack, next to the decoded data, which hasn't been cleaned up (this could effect the application in some way).
The rest of this code just zero's out EBX, ECX and EDX.
The next 8 instructions are very interesting:
| mov cl,0x4
mov BYTE al, [esi]
sub esp,ecx
inc ecx
xor al,cl
xor ecx,ecx
cmp eax,0x4
je Eleven
|
Lines 1 and 3 fix the value of ESP after the call, jmp sequence earlier.
The rest xor's 0x5 with the byte at One and compares the result with 0x4. We can test this out in python, we know the byte at One is 0xed, so:
| [email protected]:~# python
Python 2.7.3 (default, Mar 14 2014, 11:57:14)
[GCC 4.7.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> a = "\xed"
>>> b = "\x05"
>>> hex(ord(a) ^ ord(b))
'0xe8'
|
This isn't equal to 0x4 so the jump on line 8 will not be taken.
The next instruction lea ecx,[ebp-0xf]
loads EBP - 16 into ECX, ECX will now point to somewhere in the middle of the data (it will actually point 16 characters from the end, which is the start of the string I am not going!).
We can probably guess at what this is going to do from here but let's finish the analysis.
0x10 is then loaded into EDX and then 2 unconditional jumps are taken:
| jmp Twelve
Twelve:
jmp Ten
|
The only reason for these jumps is to confuse the reverser, we can just ignore them.
The next 7 lines is a very important part of the application:
| xor eax,eax
mov al,0x8
mov ebx,0x4
sub eax,ebx
sub ebx,eax
inc ebx
int 0x80
|
So lines 1-4 set EAX to 0x4, lines 5 and 6 set EBX to 0x1 and then the interrupt *0x80 is initiated.
Interrupt 0x80 is a special interrupt which initiates a system call, the system call number has to be stored in EAX, which is 0x4 at this moment in time.
We can figure out what system call this is:
| [email protected]:~# grep ' 4$' /usr/include/i386-linux-gnu/asm/unistd_32.h
#define __NR_write 4
|
This makes sense, the prototype for this syscall is:
| ssize_t write(int fd, const void *buf, size_t count);
|
Each of the arguments go in EBX, ECX and EDX. So to write to stdout, EBX should be 1 which it is.
ECX should point to the string, which it currently points to I am not going!, and EDX should contain the number of characters to print which it does.
The last 4 instructions just run another syscall, exit, you can check this yourself if you wish:
| xor eax,eax
xor ebx,ebx
inc al
int 0x80
|
Obviously we can now wrtie this in a much simpler way, but there is no need, we know exactly what this application does and how it does it.
Improving Obfuscation
As I mentioned earlier, the obfuscation could have been done better to make the reversing process harder. I actually purposefully made the obfuscation weaker than I could have to make the challenge easier.
Inserting more junk data inbetween some instructions could make the static disassembly significantly more difficult to read and understand.
I have to actually add a byte (0x89) at the end of the data section because the next few instructions were being obfuscated in a way that made them unreadable:
| 80480d5: 25 64 0d ee 89 and eax,0x89ee0d64
80480da: c5 b0 c9 01 f8 eb lds esi,FWORD PTR [eax-0x1407fe37]
80480e0: 1f pop ds
80480e1: 8d 55 00 lea edx,[ebp+0x0]
80480e4: 88 0c 24 mov BYTE PTR [esp],cl
80480e7: 4c dec esp
|
The disassembly shown here has had the last byte of the data removed and is the last line of the data section; and a few lines after.
As you can see the byte following the data section has been moved to the data section and as a result the next few instructions have been incorrectly disassembled.
This method can be implemented throughout the whole application, making most of the instructions disassemble incorrectly.
Constant unfolding could be improved here, for instance:
| mov al,0x8
mov ebx,0x4
sub eax,ebx
sub ebx,eax
inc ebx
int 0x80
|
Could be rewritten to:
| push 0xff7316ca
xor [esp], 0x8ce931
mov eax, 0xffffffff
sub eax, [esp]
push eax
shl [esp], 0x4
sub [esp], 0x3f
pop ebx
int 0x80
|
They both do the same thing but the second is a little harder to read, you could obviously keep extending this by implementing more and more complex algorithms to work out your required value.
This can also be applied to references to memory addresses, for instance, if you want to jump to a certain memory address, do some maths to work out the memory address before jumping there.
More advanced instructions could be used like imul, idiv, cmpsb, rol, stosb, rep, movsx, fadd, fcom... The list goes on...
The MMX and other unusual registers could have been taken advantage of.
Also, the key to decrypt the data could have been a command line argument or somehow retreived from outside of the application, this way it would have been extremely difficult decode the data.
Conclusion
There are sometimes easier ways to get a result other than reversing the whole application, maybe just understanding a few bits might be enough.
Although there are ways to make the reversers job more difficult, its never possible to make it impossible to reverse, providing the reverser is able to run the application (if the CPU can see the instructions, then so can the reverser).
A good knowledge of assembly is needed to do any type of indepth reverse engineering.
Further Reading
Reversing: Secrets of Reverse Engineering by Eldad Eilam
Intel® 64 and IA-32 Architectures Developer's Manual