This is another challenge from the Volga CTF Quals 2016, involving an x64 ELF executable that encodes files. Our objective is to recover the clear text data from the encrypted file.

Here’s the description for this challenge:

This binary does something with the data. The transformation must be reversible, but the details are unknown. It shouldn’t be too difficult to reverse that transformation and obtain the flag, should it?

You can download the executable from the CTF Writeup repository on GitHub.

What I’m going to do in this writeup is simple:

  1. Reverse the algorithm. I will use IDA Pro, but you can also use radare2.
  2. Write a compatible encoder in C++.
  3. Invert the transformation and implement decoding functionality.

For starters, you may have noticed that this executable does not like to be run inside a debugger; you can easily pinpoint where the anti-debugger check is performed by setting a breakpoint to the exit() function.

00401070 Protection proc near 
00401070     sub     rsp, 8
00401070     ; terminate the process if the LD_PRELOAD variable
00401070     ; is set
00401074     mov     edi, offset name ; "LD_PRELOAD"
00401079     call    _getenv
0040107e     test    rax, rax
00401081     jnz     short _terminate
00401081     ; terminate if the PTRACE_TRACEME ptrace request
00401081     ; returns -1 (meaning that a debugger is already
00401081     ; attached)
00401083     xor     ecx, ecx
00401085     xor     edx, edx
00401087     xor     esi, esi
00401089     xor     edi, edi        ; request (PTRACE_TRACEME)
0040108b     xor     eax, eax
0040108d     call    _ptrace
00401092     test    rax, rax
00401095     js      short _terminate
00401097     add     rsp, 8
0040109b     retn
0040109c _terminate:
0040109c     xor     edi, edi        ; status (0)
0040109e     call    _exit
0040109e Protection endp

We could have choosen to hook the ptrace() function and disable this check by using the LD_PRELOAD environment variable, but the code at address 401074 would have prevented us from being able to do it. What we really want to remove instead is the debugger protection and to be honest it’s easier to just patch the program. It’s not required if you don’t plan on stepping through the code or if you don’t mind skipping it manually with a breakpoint each time you start the program.

Now that we got rid of the protection, let’s take a look at the caller.

004019C0 init proc near
004019C0     push    r15
004019C2     mov     r15d, edi
004019C5     push    r14
004019C7     mov     r14, rsi
004019CA     push    r13
004019CC     mov     r13, rdx
004019CF     push    r12
004019CF     ; array contents:
004019CF     ; Initialization01 (004013D0)
004019CF     ; Protection (00401070)
004019CF     ; CppIoStreamConstructor (004012F0)
004019D1     lea     r12, InitializationFunctionsList1
004019D8     push    rbp
004019D8     ; array contents: Initialization02 (004013B0)
004019D9     lea     rbp, InitializationFunctionsList2
004019E0     push    rbx
004019E1     sub     rbp, r12
004019E4     xor     ebx, ebx
004019E6     sar     rbp, 3
004019EA     sub     rsp, 8
004019EE     call    _init_proc
004019F3     test    rbp, rbp
004019F6     jz      short loc_401A16
004019F6     ; it's pretty obvious that someone has messed up
004019F6     ; this opcode. replace it with a function call
004019F6     ; to address 004013B0
004019F8     nop     dword ptr [rax+rax+00000000h]
00401A00 loc_401A00:
00401A00     mov     rdx, r13
00401A03     mov     rsi, r14
00401A06     mov     edi, r15d
00401A06     ; functions called:
00401A06     ; 1. Initialization01: program initialization
00401A06     ; 2. Protection: LD_PRELOAD/debugger check
00401A06     ; 3. CppIoStreamConstructor: constructor and destructor
00401A06     ;    (using atexit) for the std::ios_base c++ object
00401A09     call    qword ptr [r12+rbx*8]
00401A0D     add     rbx, 1
00401A11     cmp     rbx, rbp
00401A14     jnz     short loc_401A00
00401A16 loc_401A16:
00401A16     add     rsp, 8
00401A1A     pop     rbx
00401A1B     pop     rbp
00401A1C     pop     r12
00401A1E     pop     r13
00401A20     pop     r14
00401A22     pop     r15
00401A24     retn
00401A24 init endp

If you played the previous level (named Broken) you will remember that the init() function had been patched to remove a function call that was required for the algorithm to work as intended; this challenge is no exception and you will have to fix the instruction located at address 004019F8. Again, the function we need to call can be found inside the second array.

There’s not much else to say about the rest of the functions referenced here; they’re just used for initialization and we don’t really care to analyze them, provided that we take a memory snapshot once it’s done. Let’s see how the rest of the program works; it’s pretty long, so I will use annotated pseudo-code to illustrate its internals.

// 004010B0
int main(int argc, char *argv[])
    // 004010d0
    if (argc != 3)
        return 0;

    const char *input_file_path = argv[1];
    const char *output_file_path = argv[2];

    // 0040111a
    std::fstream input_file(input_file_path);

    // 00401135
    std::fstream output_file(output_file_path);

    // 004011c5
    input_file.seekg(0, std::ios_base::end);

    // 004011d2
    std::streamsize file_size = input_file.tellg();

    // 004011ee
    input_file.seekg(0, std::ios_base::beg);

    // 004011fa
    std::uint8_t *buffer = new std::uint8_t[file_size];

    // 0040121D, file_size);

    // we don't care much about the following function, as it doesn't touch
    // our buffer in any way. Take a memory snapshot after this call

    // 0040122C
    call sub_401932;

    // this is what we need to analyze

    // 0040123A
    call sub_401836;

    // 00401248
    output_file.write(buffer, file_size);

    return 0;

// 00401836
void sub_401836(uint8_t *input_buffer, uint8_t *output_buffer, uint32_t buffer_size)
    // rdi = input_buffer
    // rsi = output_buffer
    // rdx = buffer_size

    // an xmmword is 16 bytes long; this function encodes one block
    // at a time by calling sub_40179e

    // 00401860
    rdx = shr rdx, 0x04;

    // 00401870
    if (rdx == 0)

        // 00401877
        xmm0 = *rdi;

        // 0040187b
        call sub_40179e;

        // 00401880
        *rsi = xmm0;

        // 00401884
        rdi += 0x10;

        // 00401888
        rsi += 0x10;

        // 0040188c
    } while (rdx != 0); // 00401864

    // 0040189f

// 0040179e
void sub_40179e()
    // this is the real encoding function; it's nothing too fancy
    // and you can easily invert each transformation by performing
    // the same operations in reverse order

    // 004017ac
    xmm7 = xmm0;

    // 004017c8
    xmm0 = xmmword_602128;

    // 004017d1
    pxor xmm7, xmm0;

    // 004017d6
    r15 = 0x10;

        // 004017e1
        call sub_4015d0;

        // 004017e6
        call sub_40167d;

        // 004017eb
        call sub_40169f;

        // 004017f0
        xmm0 = xmmword_602128[r15];

        // 004017f9
        pxor xmm7, xmm0;

        // 004017fe
        r15 += 0x10;
    } while (r15 < 0x0A); // 00401802

    // 0040180b
    call sub_4015d0;

    // 00401810
    call sub_40167d;

    // 00401815
    xmm0 = xmmword_602128[r15];

    // 0040181e
    pxor xmm7, xmm0;

    // 00401823
    xmm0 = xmm7

    // 00401835

// 004015d0
void sub_4015d0()
    // 004015d9
    [rsp] = xmm7;

    // 004015de
    for (offset = 0; offset < 0x0c; offset += 0x04)
        ebx = [rsp + offset];
        call sub_40161f;
        [rsp + offset] = eax;

    // 00401615
    xmm7 = [rsp];

    // 0040161e

// 0040167d
void sub_40167d()
    // 00401694
    xmm0 = xmmword_401683;

    // 00401698
    pshufb xmm7, xmm0;

    // 0040169e

// 0040169f
void __usercall sub_40169f()
    // 004016a8
    [rsp] = xmm7;

    // 004016ad
    for (offset = 0; offset < 0x0c; offset += 0x04)
        edi = [rsp + offset];
        call sub_4016e9;
        [esp] = eax;

    // 004016df
    xmm7 = [rsp];

    // 004016e8

// 0040161f
void __usercall sub_40161f()
    // input: ebx
    // output: eax

    movzx   ecx, bl
    mov     al, byte ptr qword_602268[ecx]
    movzx   ecx, bh
    mov     ah, byte ptr qword_602268[ecx]
    shl     eax, 10h
    shr     ebx, 10h
    movzx   ecx, bl
    mov     al, byte ptr qword_602268[ecx]
    movzx   ecx, bh
    mov     ah, byte ptr qword_602268[ecx]
    rol     eax, 10h
    add     rsp, 8

There’re two more functions we need to analyze: sub_40161f and sub_4016e9; I’ve added some spacing to make the listings easier to follow and understand.

0040161F sub_40161f proc near
0040161F     ; input: ebx
0040161F     ; output: eax
0040161F     push    rcx
00401620     jmp     short loc_40162B
00401622     db 0x00
00401623     db 0x00
00401624     db 0x00
00401625     db 0xE9
00401626     db 0xA6
00401627     db 0x01
00401628     db 0x40
00401629     db 0xE9
0040162A     db 0x04
0040162B loc_40162B:
0040162B     xor     rax, rax
0040162E     jmp     short loc_401646


00401646 loc_401646:
00401646     movzx   ecx, bl
00401649     mov     al, byte ptr qword_602268[ecx]
00401650     movzx   ecx, bh
00401653     mov     ah, byte ptr qword_602268[ecx]
0040165A     shl     eax, 10h
0040165D     shr     ebx, 10h
00401660     movzx   ecx, bl
00401663     mov     al, byte ptr qword_602268[ecx]
0040166A     movzx   ecx, bh
0040166D     mov     ah, byte ptr qword_602268[ecx]
00401674     rol     eax, 10h
00401677     add     rsp, 8
0040167B     nop
0040167C     retn
0040167C sub_40161f endp

This function is pretty easy to invert; just execute the same opcodes in reverse order and you will obtain the input value. Let’s take a look at the second procedure:

004016E9 sub_4016e9 proc near
004016E9     ; input: edi
004016E9     ; output: eax
004016E9     xor     eax, eax
004016EB     jz      short loc_4016EE
004016ED     db 0xE8
004016EE loc_4016EE:
004016EE     movzx   r8, dil
004016F2     shr     edi, 8
004016F5     movzx   r9, dil
004016F9     shr     edi, 8
004016FC     movzx   r10, dil
00401700     shr     edi, 8
00401703     movzx   r11, dil
00401707     xor     eax, eax
00401709     mov     r12b, byte_602394[r11]
00401710     mov     dil, byte_602494[r8]
00401717     xor     edi, r9d
0040171A     xor     edi, r10d
0040171D     xor     edi, r12d
00401720     and     edi, 0FFh
00401726     or      eax, edi
00401728     shl     eax, 8
0040172B     mov     r12b, byte_602494[r11]
00401732     mov     dil, byte_602394[r10]
00401739     xor     edi, r8d
0040173C     xor     edi, r9d
0040173F     xor     edi, r12d
00401742     and     edi, 0FFh
00401748     or      eax, edi
0040174A     shl     eax, 8
0040174D     jb      short loc_401752
0040174F     jnb     short loc_401752
00401751     db 0E9h
00401752 loc_401752:
00401752     mov     r12b, byte_602494[r10]
00401759     mov     dil, byte_602394[r9]
00401760     xor     edi, r8d
00401763     xor     edi, r12d
00401766     xor     edi, r11d
00401769     and     edi, 0FFh
0040176F     or      eax, edi
00401771     shl     eax, 8
00401774     mov     r12b, byte_602494[r9]
0040177B     mov     dil, byte_602394[r8]
00401782     xor     edi, r12d
00401785     xor     edi, r10d
00401788     xor     edi, r11d
0040178B     and     edi, 0FFh
00401791     or      eax, edi
00401793     retn
0040109e sub_4016e9 endp

This is slightly harder, and will require some work; first of all, write down how the sub_4016e9 function encodes the input value:

output[3] = byte_602494[input[0]] ^ input[1] ^ input[2] ^ byte_602394[input[3]];
output[2] = byte_602394[input[2]] ^ input[0] ^ input[1] ^ byte_602494[input[3]];
output[1] = byte_602394[input[1]] ^ input[0] ^ input[3] ^ byte_602494[input[2]];
output[0] = byte_602394[input[0]] ^ input[2] ^ input[3] ^ byte_602494[input[1]];

Then, invert the operands so that the input value is on the left:

input[1] = output[3] ^ byte_602494[input[0]] ^ input[2] ^ byte_602394[input[3]]
input[0] = output[2] ^ byte_602394[input[2]] ^ input[1] ^ byte_602494[input[3]]
input[3] = output[1] ^ byte_602394[input[1]] ^ input[0] ^ byte_602494[input[2]]
input[2] = output[0] ^ byte_602394[input[0]] ^ input[3] ^ byte_602494[input[1]]

We can now brute force the system and obtain the input value:

// ...

for (std::uint16_t input1 = 0x00; !found && input1 <= 0xFF; input1++)
    input[1] = static_cast<std::uint8_t>(input1);

    for (std::uint16_t input2 = 0x00; !found && input2 <= 0xFF; input2++)
        input[2] = static_cast<std::uint8_t>(input2);

        for (std::uint16_t input3 = 0x00; !found && input3 <= 0xFF; input3++)
            input[3] = static_cast<std::uint8_t>(input3);

            input[0] = output[2] ^ byte_602394[input[2]] ^ input[1] ^ byte_602494[input[3]];

            if (input[1] != (output[3] ^ byte_602494[input[0]] ^ input[2] ^ byte_602394[input[3]]))

            if (input[2] != (output[0] ^ byte_602394[input[0]] ^ input[3] ^ byte_602494[input[1]]))

            if (input[3] != (output[1] ^ byte_602394[input[1]] ^ input[0] ^ byte_602494[input[2]]))

            found = true;

// ...

We have pretty much analyzed everything we needed to invert the transformation algorithm, and we can now decode the encrypted file:

alessandro at tachikoma in ~/Projects/untransformer (master)
$ untransformer decode flag.transformed flag.decoded && cat flag.decoded

I have published the whole source code on my GitHub page in case you want to take a look at the decoder.

If you managed to get this far, thanks for reading! I hope you found this writeup interesting; I know I at least had fun writing it.

This is a pretty nice challenge from the VolgaCTF 2016 Quals; sadly, I couldn’t join the r/OpenToAllCTFteam and play because I was too busy, but I noticed it was missing a writeup and decided to write one.

The first thing I always do when I want to analyze an executable is to run it inside a disposable virtual machine; the first thing you will notice is that it doesn’t seem to be doing anything at all, and that it will closes itself after half a minute with the following message: “The processing has taken too long, terminating the process…”. It’s obvious we will not gain any further knowledge from this executable by just launching it. Fire up your favorite disassembler and let’s take a look at the entry point.

This is roughly what happens inside the main entry point:

  • A couple of structures are initialized.
  • Four threads are started using a function that wraps pthread_create.
  • The program pauses until the threads have returned (using a wrapper for pthread_join).
  • A printf() call outputs a string.

This is the function that is used to create each thread. We are running under Linux x64, meaning that the vast majority of the functions we will encounter here will use the __fastcall convention: arguments are primarily passed using the registers (RDI, RSI, RDX, RCX, R8, R9, XMM0/XMM7). The start routine is the second argument of the function, and will therefore end up inside the RSI register.

.text:0000000000401340 ; void __fastcall CreateThread(pthread_t *thread, void *(__cdecl *start_routine)(void *), void *thread_argument)
.text:0000000000401340 CreateThread proc near                  ; CODE XREF: main+E9^p
.text:0000000000401340                                         ; main+102^p ...
.text:0000000000401340     push    rbx
.text:0000000000401341     mov     ebx, ecx
.text:0000000000401343     mov     rcx, rdx                    ; arg
.text:0000000000401346     mov     rdx, rsi                    ; start_routine
.text:0000000000401349     xor     esi, esi                    ; attr
.text:000000000040134B     call    _pthread_create
.text:0000000000401350     test    eax, eax
.text:0000000000401352     jnz     short loc_401356
.text:0000000000401354     pop     rbx
.text:0000000000401355     retn
.text:0000000000401356 ; ---------------------------------------------------------------------------
.text:0000000000401356 loc_401356:                             ; CODE XREF: CreateThread+12 ^j
.text:0000000000401356     mov     edi, offset s               ; "Failed to spawn thread\n"
.text:000000000040135B     call    _perror
.text:0000000000401360     mov     edi, ebx                    ; status
.text:0000000000401362     call    _exit
.text:0000000000401362 CreateThread endp

You can now track down the start routines used to create the threads:

  • 0x00400E20: ComputeSHA256Hash
  • 0x00400E60: ComputeSHA512Hash
  • 0x00400F40: Thread3
  • 0x00400EA0: TimeoutThread

The first two threads will compute the hash of a buffer and terminate; remember the two structures that are initialized right at the start of the main entry point? They hold both the input buffer and the pointer where the resulting digest will be stored. They are not particularly interesting, and I will not talk much about them.

00000000 HashedBuffer struc ; (sizeof=0x18, mappedto_3) ; XREF: 00007FFC02AF11C0/r
00000000                                         ; main/r ...
00000000 pbuffer dq ?                            ; XREF: main+8C/w main+AA/w ; offset
00000008 size dd ?                               ; XREF: main+9F/w main+B5/w
0000000C     db ? ; undefined
0000000D     db ? ; undefined
0000000E     db ? ; undefined
0000000F     db ? ; undefined
00000010 pdigest dq ?                            ; XREF: main+60/w main+C1/w ; offset
00000018 HashedBuffer ends

.text:0000000000400E20 ; uint8_t __fastcall ComputeSHA256Hash(struct HashedBuffer *hashed_buffer_obj)
.text:0000000000400E20 ComputeSHA256Hash proc near
.text:0000000000400E20     push    rbx
.text:0000000000400E21     mov     rbx, rdi
.text:0000000000400E24     mov     edi, offset thread1_sha256_context ; context
.text:0000000000400E29     call    _SHA256_Init
.text:0000000000400E2E     movsxd  rdx, dword ptr [rbx+8] ; len
.text:0000000000400E32     mov     rsi, [rbx]  ; data
.text:0000000000400E35     mov     edi, offset thread1_sha256_context ; context
.text:0000000000400E3A     call    _SHA256_Update
.text:0000000000400E3F     mov     rdi, [rbx+10h] ; digest
.text:0000000000400E43     mov     esi, offset thread1_sha256_context ; context
.text:0000000000400E48     call    _SHA256_Final
.text:0000000000400E4D     mov     rax, [rbx+10h]
.text:0000000000400E51     pop     rbx
.text:0000000000400E52     retn
.text:0000000000400E52 ComputeSHA256Hash

.text:0000000000400E60 ; uint8_t __fastcall ComputeSHA512Hash(struct HashedBuffer *hashed_buffer_obj)
.text:0000000000400E60 ComputeSHA512Hash proc near
.text:0000000000400E60     push    rbx
.text:0000000000400E61     mov     rbx, rdi
.text:0000000000400E64     mov     edi, offset thread2_sha512_context ; context
.text:0000000000400E69     call    _SHA512_Init
.text:0000000000400E6E     movsxd  rdx, dword ptr [rbx+8] ; len
.text:0000000000400E72     mov     rsi, [rbx]  ; data
.text:0000000000400E75     mov     edi, offset thread2_sha512_context ; context
.text:0000000000400E7A     call    _SHA512_Update
.text:0000000000400E7F     mov     rdi, [rbx+10h] ; digest
.text:0000000000400E83     mov     esi, offset thread2_sha512_context ; context
.text:0000000000400E88     call    _SHA512_Final
.text:0000000000400E8D     mov     rax, [rbx+10h]
.text:0000000000400E91     pop     rbx
.text:0000000000400E92     retn
.text:0000000000400E92 ComputeSHA512Has

The fourth thread is the one that prints the timeout message and terminate the process by calling exit().

.text:0000000000400EA0 ; void *__cdecl TimeoutThread(void *unused)
.text:0000000000400EA0 TimeoutThread proc near                 ; DATA XREF: main+12C^o
.text:0000000000400EA0     sub     rsp, 8
.text:0000000000400EA4     mov     edi, 30                     ; seconds
.text:0000000000400EA9     call    _sleep
.text:0000000000400EAE     mov     edi, 1
.text:0000000000400EB3     mov     edx, offset aTheProcessingH ; "The processing has taken too long, term"...
.text:0000000000400EB8     mov     esi, offset unk_401970
.text:0000000000400EBD     xor     eax, eax
.text:0000000000400EBF     call    ___printf_chk
.text:0000000000400EC4     mov     edi, 5                      ; status
.text:0000000000400EC9     call    _exit
.text:0000000000400EC9 TimeoutThread endp

Let’s take a look at the third thread; you will eventually notice there’s something spying on you once you start setting breakpoints around. The program will stop working and the execution will not even reach the main entry point. This is caused by a function that computes the hash of a selected number of functions defined the program and terminate in case any of the calculated signatures don’t match.

.text:0000000000401580 ; void ModuleInitialization(void)
.text:0000000000401580 ModuleInitialization proc near          ; DATA XREF: start+16^o
.text:0000000000401580     push    r15
.text:0000000000401582     mov     r15d, edi
.text:0000000000401585     push    r14
.text:0000000000401587     mov     r14, rsi
.text:000000000040158A     push    r13
.text:000000000040158C     mov     r13, rdx
.text:000000000040158F     push    r12
.text:0000000000401591     lea     r12, InitializationCallbacks
.text:0000000000401598     push    rbp
.text:0000000000401599     lea     rbp, InitializationCallbacks2
.text:00000000004015A0     push    rbx
.text:00000000004015A1     sub     rbp, r12
.text:00000000004015A4     xor     ebx, ebx
.text:00000000004015A6     sar     rbp, 3
.text:00000000004015AA     sub     rsp, 8
.text:00000000004015AE     call    _init_proc
.text:00000000004015B3     test    rbp, rbp
.text:00000000004015B6     jz      short loc_4015D6
.text:00000000004015B8     nop     dword ptr [rax+rax+00000000h]
.text:00000000004015C0 loc_4015C0:                             ; CODE XREF: ModuleInitialization+54 vj
.text:00000000004015C0     mov     rdx, r13                    ;
.text:00000000004015C0                                         ; Functions called: PatchProtection, Initialize
.text:00000000004015C3     mov     rsi, r14
.text:00000000004015C6     mov     edi, r15d
.text:00000000004015C9     call    qword ptr [r12+rbx*8]
.text:00000000004015CD     add     rbx, 1
.text:00000000004015D1     cmp     rbx, rbp
.text:00000000004015D4     jnz     short loc_4015C0
.text:00000000004015D6 loc_4015D6:                             ; CODE XREF: ModuleInitialization+36 ^j
.text:00000000004015D6     add     rsp, 8
.text:00000000004015DA     pop     rbx
.text:00000000004015DB     pop     rbp
.text:00000000004015DC     pop     r12
.text:00000000004015DE     pop     r13
.text:00000000004015E0     pop     r14
.text:00000000004015E2     pop     r15
.text:00000000004015E4     retn
.text:00000000004015E4 ModuleInitialization endp

If you are curious about this, you can declare such function using the __attribute__((constructor)) statement in your C or C++ code.

As you can see, it’s calling a couple of initialization functions taken from an array; this array is accessed at virtual address 0x00401591 and contains both the patching protection (0x00400A50) that is interfering with us and a function that initializes the internal state of the program (0x00400DF0). The huge nop instruction at virtual address 0x004015B8 is a clear indication that an opcode has been removed. Did you notice that the second array is referenced but its value is actually never used? Replace the instruction with a call to the function pointer stored inside the second array (0x00400DD0).

Now we have to disable the protection; I have forced the jump at virtual address 0x00401541, but you can probably just skip the whole function.

Let’s go back to the thread we were analyzing (0x00400F40). If you step through the code, you will notice that it deadlocks inside a sem_wait call. Do you remember how sem_init works? When you create a new semaphore, you can set the initial value; this value is increased using sem_post and decreased using sem_wait. If you wait on a semaphore that is currently set to 0, you will have to wait until someone increments it using sem_post.

The whole situation become a lot easier to understand once you give the semaphores a name. I have named them (surprise) semaphore1 (0x00400F77), semaphore2 (0x00400F8D) and semaphore3 (0x00400FA3). They have all been initialized to 0: keep this in mind because it’s important.

Once the semaphores are initialized, a couple more threads are created: Thread5 (0x00400ED0) and Thread6 (0x004012C0). Both functions perform (more or less) the same operations:

// pseudo-code for Thread5 and Thread6
void ThreadEntryPoint()
	// keep in mind that you have two almost identical threads that
	// are performing the same operations!
	// increment semaphore3 twice; this will allow Thread3 to call
	// sem_wait(semaphore3) twice after Thread5 and Thread6 are created.
	_sem_post(semaphore3) // semaphore3 += 2

		// wait for Thread3 to give us the ok to proceed
		// we obviously need two sem_post(semaphore1) calls in order
		// to unlock both threads
		sem_wait(semaphore1); // semaphore1 -= 2
		// update the program state
		// ...

		// this will tell Thread3 that it can proceed
	} while (sem_post(semaphore3) == 0); // semaphore3 += 2

Now that we know how the two threads are working, let’s go back to Thread3. We have another nop at virtual address 0x00401073 and we need to replace it with another function call; remember the patch protection function at virtual address 0x00400A50? One of the routines that it was protecting was not referenced by anything else in the program, meaning that it’s the one we need to call.

.text:0000000000400AE0 loc_400AE0:                             ; CODE XREF: PatchProtection+78 ^j
.text:0000000000400AE0     mov     rdi, cs:sub401450_address   ;
.text:0000000000400AE0                                         ; this is the virtual address of the function: 0x00401450
.text:0000000000400AE7     test    rdi, rdi
.text:0000000000400AEA     jz      short loc_400B02
.text:0000000000400AEC     mov     rdx, cs:sub401450_code_hash_ptr ; expected_hash
.text:0000000000400AF3     mov     esi, cs:sub401450_code_length ; length
.text:0000000000400AF9     call    VerifyMemoryHash
.text:0000000000400AFE     test    eax, eax
.text:0000000000400B00     jnz     short loc_400B07

You will notice that this is not enough to fix the program, because it will now deadlock somewhere else inside the code we added; if you take a closer look at where it gets stuck you will realize that the only possible explanation is that one of the semaphores inside the threads #5 and #6 has been changed.

Open the Thread6 function and fix the semaphore inside the loop:

.text:0000000000401311 BF 00 35 60 00        mov     edi, offset semaphore1 ; change this to semaphore 2
.text:0000000000401316 E8 D5 F5 FF FF        call    _sem_wait
.text:000000000040131B 85 C0                 test    eax, eax
.text:000000000040131D 74 B9                 jz      short loc_40

Here’s a summary of the patching I have done:

# open the executable in read/write
radare2 -w broken

# disable the patch protection
[0x00400d35]> s 0x401541
[0x00401541]> wa jmp 0x401530

# add the missing initialization function
[0x00401541]> s 0x4015b8
[0x004015b8]> wa call 0x400DD0
[0x004015b8]> s 0x004015bd
[0x004015bd]> wa nop
[0x004015bd]> s 0x004015be
[0x004015be]> wa nop
[0x004015be]> s 0x004015bf
[0x004015bf]> wa nop

# restore the function call inside Thread3
[0x004015b8]> s 0x0401073
[0x00401073]> wa call 0x401450

# fix the semaphore inside Thread6
[0x00401073]> s 0x00401311
[0x00401311]> wa mov edi, 0x6034E0

# save everything and close
[0x00401311]> wc
[0x00401311]> q

Run it again and you will get the flag: VolgaCTF{avoid_de@dl0cks_they_br3ak_your_@pp}

Hello everyone!

I have released an update for the new x64 client; you can grab it from the MultiWars2 page. I have kept the old version around in case it gives any issue.

P.S.: thank you Jim for testing the release! Have fun!

I’ve fixed an issue with the command line handling code in MultiWars2; you should now be able to correctly pass arguments to Gw2.exe.

As a reminder, this is how to automatically authenticate yourself when launching the game:

MultiWars2LT.exe -email -password yourpassword -nopatchui

I’ve released a new version of my Guild Wars 2 launcher, and I’ve renamed it to MultiWars2LT.

I have simplified it a lot compared to the previous release by removing both the debugger engine and the user interface. I have other projects I have to dedicate my time to right now, and I currently don’t have much time to spare for MultiWars2. This is why I’ve rewritten it as a simple command line tool; I know it doesn’t have any fancy bell, but it should get the job done.

Don’t worry, I will keep updating it in case compatibility issues arise with new patches.