Solving SmokeStack, from the third Flare-On Challenge


Note: This article has been published right after the Flare-On Challenge 3 has ended.

Official writeups can be found here: 2016 Flare-On Challenge solutions from fireeye.com

SmokeStack is the fifth level of the third edition of the Flare On Challenge organized by FireEye. I've decided to write a post about it because this is one of the two levels I've enjoyed the most (the other being CHIMERA).

I will be using the assembly I've annotated from the start take make things easier to understand. A little warning: this post is (really) verbose, as I've included the assembly code in its entirety.

Let's get started!

The application expects the user to pass 10 characters as the first parameter.

; function starts at virtual address 0x00402F30
_main proc
    ; ...

    ; 0x00402F76
    cmp     [ebp+argc], 1
    jle     __exit

    ; 0x00402F80
    mov     eax, [ebp+argv]
    mov     ecx, [eax+4]
    push    ecx
    call    _strlen
    jl      __exit

    ; ...
_main endp

Each character is then extended to a 2-bytes value and copied to a global buffer. The code is sometimes pretty verbose, which is a clear indication that it was not compiled with optimizations flags.

; function starts at virtual address 0x00402F30
_main proc
    ; ...

    ; 0x00402F9C
    mov     [ebp+i], 0
    jmp     short __vm_stack_initialization_loop

    ; 0x00402FAE
__vm_stack_initialization_loop:
    cmp     [ebp+i], 0Ah
    jge     short __start_vm_execution_loop

    ; 0x00402FB4
    mov     eax, [ebp+argv]
    mov     ecx, [eax+4]

    mov     edx, [ebp+i]
    movsx   ax, byte ptr [ecx+edx]

    mov     ecx, [ebp+i]
    mov     vm_stack[ecx*2], ax

    jmp     short __vm_stack_initialization_loop_condition

    ; 0x00402FA5
__vm_stack_initialization_loop_condition:
    mov     edx, [ebp+i]
    add     edx, 1
    mov     [ebp+i], edx

    ; ...
_main endp

Take a look at the cross references and notice how this is the only place where the program accesses this buffer directly, as the rest of the code will make use of the following two functions to access it:

; function starts at virtual address 0x00401000
VMStack_push proc value:word
    push    ebp
    mov     ebp, esp

    mov     ax, vm_stack_pointer
    add     ax, 1
    mov     vm_stack_pointer, ax

    movzx   ecx, vm_stack_pointer
    mov     dx, [ebp+value]
    mov     vm_stack[ecx*2], dx

    pop     ebp
    retn
VMStack_push endp

; function starts at virtual address 0x00401080
VMStack_pop proc
    push    ebp
    mov     ebp, esp
    push    ecx

    movzx   eax, vm_stack_pointer
    mov     cx, vm_stack[eax*2]
    mov     [ebp+word], cx

    mov     dx, vm_stack_pointer
    sub     dx, 1
    mov     vm_stack_pointer, dx

    mov     ax, [ebp+word]

    mov     esp, ebp
    pop     ebp
    retn
VMStack_pop endp

The counter grows when a value is saved, and decreases when a value is removed; it is pretty obvious that this is some kind of LIFO stack implementation.

Back to the entry point: the buffer has been populated with our string and the first initialization phase ends at virtual address 0x00402FCF. The next function we're going to enter is located at virtual address 0x00401610 and will soak up most of the execution time - this is where we are going to focus our efforts.

The first part is not really interesting, as it's just initialization of values and function pointers.

; function starts at address 0x00401610
VMMain proc
    ;
    ; initialization
    ;

    push    ebp
    mov     ebp, esp

    call    InitializeVMOpcodeHandlers

    xor     eax, eax
    mov     vm_register_A, ax

    xor     ecx, ecx
    mov     vm_register_B, cx

    mov     edx, 9
    mov     vm_stack_pointer, dx

    xor     eax, eax
    mov     vm_instruction_pointer, ax

    ;
    ; main loop
    ;

    ; 0x0040163D
__vm_execution_loop:
    movzx   ecx, vm_instruction_pointer
    movzx   edx, vm_code_size
    cmp     ecx, edx
    jge     short __last_vm_instruction_reached

    ; 0x0040164F
    call    VMFetchAndExecuteNextOpcode
    jmp     short __vm_execution_loop

    ; 0x00401656
__last_vm_instruction_reached:

    ; the virtual machine exit code is taken from the first register
    mov     ax, vm_register_A
    pop     ebp
    retn
VMMain endp

; function starts at virtual address 0x00401570
InitializeVMOpcodeHandlers proc
    push    ebp
    mov     ebp, esp

    mov     vm_opcode_handlers, VMOpcodeHandler_push
    mov     vm_opcode_handlers+4, VMOpcodeHandler_pop
    mov     vm_opcode_handlers+8, VMOpcodeHandler_add
    mov     vm_opcode_handlers+0Ch, VMOpcodeHandler_sub
    mov     vm_opcode_handlers+10h, VMOpcodeHandler_RotateRight
    mov     vm_opcode_handlers+14h, VMOpcodeHandler_RotateLeft
    mov     vm_opcode_handlers+18h, VMOpcodeHandler_xor
    mov     vm_opcode_handlers+1Ch, VMOpcodeHandler_not
    mov     vm_opcode_handlers+20h, VMOpcodeHandler_eq
    mov     vm_opcode_handlers+24h, VMOpcodeHandler_sel
    mov     vm_opcode_handlers+28h, VMOpcodeHandler_jmp
    mov     vm_opcode_handlers+2Ch, VMOpcodeHandler_pushRegister
    mov     vm_opcode_handlers+30h, VMOpcodeHandler_mov
    mov     vm_opcode_handlers+34h, VMOpcodeHandler_nop

    pop     ebp
    retn
InitializeVMOpcodeHandlers endp

The second part of the routine will loop until a counter reaches the end, each time calling the following function:

; function starts at virtual address 0x00401540
VMFetchAndExecuteNextOpcode proc
    push    ebp
    mov     ebp, esp
    push    ecx

    movzx   eax, vm_instruction_pointer
    mov     cx, ds:vm_instructions[eax*2]
    mov     [ebp+opcode], cx

    movzx   edx, [ebp+opcode]
    mov     eax, vm_opcode_handlers[edx*4]
    call    eax

    mov     esp, ebp
    pop     ebp
    retn
VMFetchAndExecuteNextOpcode endp

Let's get an overview of what is happening:

  • The function pointer array is used in conjuction with indexes found in a statically allocated buffer that has been initialized at compile-time.
  • A counter is incremented each time one of those function pointers is used.
  • As we already noticed, all values are extended (or truncated) to 16-bit words.

We can now draw some conclusions:

  • Each one of the indexes used to access the function pointer array is in fact an opcode.
  • The counter that is incremented at each call is the instruction pointer. It is not incremented automatically because the opcode length is not always the same, as they may optionally require immediate values.
  • You have probably already guessed it by now, but the global memory buffer is the virtual machine stack.
  • The virtual machine is heavily stack-based, and operates on 2-bytes words.

And Now for Something Completely Different: a dump of the analyzed opcode handlers. I have put the sel opcode at the top of the list since it's the most unusual one.

; this function pops three values, and uses one of them to decide
; which one of other two needs to be kept into the stack.
;
; function starts at virtual address 0x00401360
VMOpcodeHandler_sel proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    call    VMStack_pop
    mov     [ebp+third_word], ax

    movzx   eax, [ebp+third_word]
    cmp     eax, 1
    jnz     short loc_401399

    movzx   ecx, [ebp+first_word]
    push    ecx             ; value
    call    VMStack_push

    add     esp, 4
    jmp     short loc_4013A6

loc_401399:
    movzx   edx, [ebp+second_word]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

loc_4013A6:
    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_sel endp

; function starts at virtual address 0x00401180
VMOpcodeHandler_RotateRight proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch
    push    esi

    call    VMStack_pop

    mov     [ebp+first_word], ax
    call    VMStack_pop

    mov     [ebp+second_word], ax
    movzx   eax, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    sar     eax, cl

    movzx   edx, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    mov     esi, 10h
    sub     esi, ecx
    mov     ecx, esi
    shl     edx, cl
    or      eax, edx
    and     eax, 0FFFFh
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    pop     esi
    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_RotateRight endp

; function starts at virtual address 0x004011F0
VMOpcodeHandler_RotateLeft proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch
    push    esi

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    shl     eax, cl

    movzx   edx, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    mov     esi, 10h
    sub     esi, ecx
    mov     ecx, esi
    sar     edx, cl
    or      eax, edx
    and     eax, 0FFFFh
    mov     [ebp+value], ax

    movzx   edx, [ebp+value]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    pop     esi
    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_RotateLeft endp

; function starts at virtual address 0x00401030
VMOpcodeHandler_push proc
    push    ebp
    mov     ebp, esp
    push    ecx

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    movzx   ecx, vm_instruction_pointer
    mov     dx, ds:vm_instructions[ecx*2]
    mov     [ebp+immediate], dx

    movzx   eax, [ebp+immediate]
    push    eax             ; value
    call    VMStack_push
    add     esp, 4

    mov     cx, vm_instruction_pointer
    add     cx, 1
    mov     vm_instruction_pointer, cx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_push endp

; function starts at virtual address 0x004010C0
VMOpcodeHandler_pop proc
    push    ebp
    mov     ebp, esp

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    call    VMStack_pop

    pop     ebp
    retn
VMOpcodeHandler_pop endp

; function starts at virtual address 0x004010E0
VMOpcodeHandler_add proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+first_word]
    movzx   ecx, [ebp+second_word]
    add     eax, ecx
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_add endp

; function starts at virtual address 0x00401130
VMOpcodeHandler_sub proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    sub     eax, ecx
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_sub endp

; function starts at virtual address 0x00401260
VMOpcodeHandler_xor proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+first_word]
    movzx   ecx, [ebp+second_word]
    xor     eax, ecx
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_xor endp

; function starts at virtual address 0x004012B0
VMOpcodeHandler_not proc
    push    ebp
    mov     ebp, esp

    sub     esp, 8

    call    VMStack_pop
    mov     [ebp+word], ax

    movzx   eax, [ebp+word]
    not     eax
    and     eax, 0FFFFh
    mov     [ebp+result], ax

    movzx   ecx, [ebp+result]
    push    ecx             ; value
    call    VMStack_push

    add     esp, 4

    mov     dx, vm_instruction_pointer
    add     dx, 1
    mov     vm_instruction_pointer, dx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_not endp

; function starts at virtual address 0x00401300
VMOpcodeHandler_eq proc 
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+first_word]
    movzx   ecx, [ebp+second_word]
    cmp     eax, ecx
    jnz     short loc_40132F

    mov     edx, 1
    mov     [ebp+result], dx
    jmp     short loc_401335

loc_40132F:
    xor     eax, eax
    mov     [ebp+result], ax

loc_401335:
    movzx   ecx, [ebp+result]
    push    ecx             ; value
    call    VMStack_push

    add     esp, 4

    mov     dx, vm_instruction_pointer
    add     dx, 1
    mov     vm_instruction_pointer, dx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_eq endp

; function starts at virtual address 0x004013C0
VMOpcodeHandler_jmp proc
    push    ebp
    mov     ebp, esp

    call    VMStack_pop
    mov     vm_instruction_pointer, ax

    pop     ebp
    retn
VMOpcodeHandler_jmp endp

; function starts at virtual address 0x004013D0
VMOpcodeHandler_pushRegister proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    movzx   ecx, vm_instruction_pointer
    mov     dx, ds:vm_instructions[ecx*2]
    mov     [ebp+opcode_parameter], dx

    movzx   eax, [ebp+opcode_parameter]
    mov     [ebp+opcode_parameter_alias], eax
    cmp     [ebp+opcode_parameter_alias], 3
    ja      short __opcode_handler_end

    mov     ecx, [ebp+opcode_parameter_alias]
    jmp     ds:off_401464[ecx*4]

__read_accumulator:
    mov     dx, vm_register_A
    mov     [ebp+word], dx
    jmp     short __opcode_handler_end

__read_base_stack_pointer:
    mov     ax, vm_register_B
    mov     [ebp+word], ax
    jmp     short __opcode_handler_end

__read_stack_pointer:
    mov     cx, vm_stack_pointer
    mov     [ebp+word], cx
    jmp     short __opcode_handler_end

__read_instruction_pointer:
    mov     dx, vm_instruction_pointer
    mov     [ebp+word], dx

__opcode_handler_end:
    movzx   eax, [ebp+word]
    push    eax
    call    VMStack_push

    add     esp, 4

    mov     cx, vm_instruction_pointer
    add     cx, 1
    mov     vm_instruction_pointer, cx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_pushRegister endp

; function starts at virtual address 0x00401480
VMOpcodeHandler_mov proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    movzx   ecx, vm_instruction_pointer
    mov     dx, ds:vm_instructions[ecx*2]
    mov     [ebp+vm_register_id], dx

    call    VMStack_pop
    mov     [ebp+value], ax

    movzx   eax, [ebp+vm_register_id]
    mov     [ebp+vm_register_id_copy], eax
    cmp     [ebp+vm_register_id_copy], 3
    ja      short __increment_instruction_pointer

    mov     ecx, [ebp+vm_register_id_copy]
    jmp     ds:off_401510[ecx*4]

__set_accumulator:
    mov     dx, [ebp+value]
    mov     vm_register_A, dx
    jmp     short __increment_instruction_pointer

__set_base_stack_pointer:
    mov     ax, [ebp+value]
    mov     vm_register_B, ax
    jmp     short __increment_instruction_pointer

__set_stack_pointer:
    mov     cx, [ebp+value]
    mov     vm_stack_pointer, cx
    jmp     short __increment_instruction_pointer

__set_instruction_pointer:
    mov     dx, [ebp+value]
    mov     vm_instruction_pointer, dx

 __increment_instruction_pointer:
    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_mov endp

; function starts at virtual address 0x00401520
VMOpcodeHandler_nop proc
    push    ebp
    mov     ebp, esp

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    pop     ebp
    retn
VMOpcodeHandler_nop endp

You should now have a good understanding of how each opcode works, but you can't but agree with me when I say that debugging this is everything but comfortable. For this reason, I have chosen to dump the virtual machine code (see virtual address 0x0040A140) and write a simple disassembler.

#include <iostream>
#include <fstream>
#include <vector>
#include <stdexcept>
#include <string>
#include <iomanip>

const std::vector<std::string> mnemonics =
{
    "push", "pop", "add", "sub",
    "trm1", "trm2", "xor", "not",
    "eq", "sel", "jmp", "push",
    "mov", "nop"
};

int main(int argc, char *argv[]);
const char *GetRegisterName(std::uint16_t register_id);

int main(int argc, char *argv[])
{
    static_cast<void>(argc);
    static_cast<void>(argv);
    
    if (argc != 2)
    {
        std::cout << "Usage:\n";
        std::cout << "smokestack_disasm dump" << std::endl;

        return 1;
    }

    std::vector<std::uint8_t> buffer;

    try
    {
        std::fstream input_file;
        input_file.open(argv[1], std::ios_base::in |
            std::ios_base::binary);

        if (!input_file)
            throw std::runtime_error("Failed to open the input file");

        input_file.seekg(0, std::ios_base::end);
        if (!input_file)
            throw std::runtime_error("Seek failed");

        std::streamsize input_file_size = input_file.tellg();
        if (!input_file)
            throw std::runtime_error("Failed to get the file size");

        input_file.seekg(0);
        if (!input_file)
            throw std::runtime_error("Seek failed");

        buffer.resize(input_file_size);
        if (buffer.size() != input_file_size)
            throw std::runtime_error("Memory allocation failed");

        input_file.read(reinterpret_cast<char *>(buffer.data()),
            input_file_size);

        if (!input_file)
            throw std::runtime_error("Failed to read the file");

        input_file.close();
    }

    catch (const std::exception &exception)
    {
        std::cout << exception.what() << std::endl;
        return 1;
    }

    const std::uint8_t *ptr = buffer.data();

    while (ptr < buffer.data() + buffer.size())
    {
        std::uint32_t instruction_pointer = (ptr - buffer.data()) / 2;

        std::cout << std::hex << std::setfill('0') << std::setw(4)
            << instruction_pointer;

        std::cout << "\t\t";

        std::cout << std::hex << std::setfill('0') << std::setw(2) <<
            static_cast<int>(*ptr);

        std::cout << "\t" << mnemonics[*ptr] << " ";

        // opcodes that require immediate parameters needs to
        // increment the instruction pointer twice
        switch (*ptr)
        {
            // push <immediate>
            case 0:
            {
                ptr += 2;

                std::uint16_t value = 
                    *reinterpret_cast<const std::uint16_t *>(ptr);

                std::cout << "0x" << std::hex << std::setfill('0') <<
                    std::setw(4) << value;

                // also show ascii encoding
                if (value >= 0x20 && value <= 0x7D)
                {
                    std::cout << " ; '" << static_cast<char>(value)
                        << "'";
                }

                break;
            }

            // push <register_id>
            case 11:
            {
                ptr += 2;

                std::uint16_t value =
                    *reinterpret_cast<const std::uint16_t *>(ptr);

                std::cout << GetRegisterName(value);

                break;
            }

            // mov <register_id>, stack[sp]
            case 12:
            {
                ptr += 2;

                std::uint16_t value =
                    *reinterpret_cast<const std::uint16_t *>(ptr);

                std::cout << GetRegisterName(value);
                std::cout << ", ST(0)";

                break;
            }

            default:
                break;
        }

        std::cout << std::endl;

        // show an empty line after we have printed a jump instruction
        if (*ptr == 10)
        {
            std::cout << std::hex << std::setfill('0') << std::setw(4)
                << instruction_pointer << std::endl;
        }

        ptr += 2;
    }

    return 0;
}

const char *GetRegisterName(std::uint16_t register_id)
{
    switch (register_id)
    {
        case 0:
            return "ax";
        
        case 1:
            return "bp";
        
        case 2:
            return "sp";
        
        case 3:
            return "ip";
        
        default:
            throw std::runtime_error("Invalid register id");
    }
}

The following is the full output of the disassembler, including my own comments.

The correct string is then the one that allows the virtual machine to execute the program to the end: kYwxCbJoLp. There's no need to analyze the rest of the executable; pass those characters back to the program and it will print our flag: A_p0p_pu$H_&_a_Jmp@flare-on.com.