Solving SmokeStack, from the third Flare-On Challenge


Note: This article has been published right after the Flare-On Challenge 3 has ended.

Official writeups can be found here: 2016 Flare-On Challenge solutions from fireeye.com

SmokeStack is the fifth level of the third edition of the Flare On Challenge organized by FireEye. I’ve decided to write a post about it because this is one of the two levels I’ve enjoyed the most (the other being CHIMERA).

I will be using the assembly I’ve annotated from the start take make things easier to understand. A little warning: this post is (really) verbose, as I’ve included the assembly code in its entirety.

Let’s get started!

The application expects the user to pass 10 characters as the first parameter.

; function starts at virtual address 0x00402F30
_main proc
    ; ...

    ; 0x00402F76
    cmp     [ebp+argc], 1
    jle     __exit

    ; 0x00402F80
    mov     eax, [ebp+argv]
    mov     ecx, [eax+4]
    push    ecx
    call    _strlen
    jl      __exit

    ; ...
_main endp

Each character is then extended to a 2-bytes value and copied to a global buffer. The code is sometimes pretty verbose, which is a clear indication that it was not compiled with optimizations flags.

; function starts at virtual address 0x00402F30
_main proc
    ; ...

    ; 0x00402F9C
    mov     [ebp+i], 0
    jmp     short __vm_stack_initialization_loop

    ; 0x00402FAE
__vm_stack_initialization_loop:
    cmp     [ebp+i], 0Ah
    jge     short __start_vm_execution_loop

    ; 0x00402FB4
    mov     eax, [ebp+argv]
    mov     ecx, [eax+4]

    mov     edx, [ebp+i]
    movsx   ax, byte ptr [ecx+edx]

    mov     ecx, [ebp+i]
    mov     vm_stack[ecx*2], ax

    jmp     short __vm_stack_initialization_loop_condition

    ; 0x00402FA5
__vm_stack_initialization_loop_condition:
    mov     edx, [ebp+i]
    add     edx, 1
    mov     [ebp+i], edx

    ; ...
_main endp

Take a look at the cross references and notice how this is the only place where the program accesses this buffer directly, as the rest of the code will make use of the following two functions to access it:

; function starts at virtual address 0x00401000
VMStack_push proc value:word
    push    ebp
    mov     ebp, esp

    mov     ax, vm_stack_pointer
    add     ax, 1
    mov     vm_stack_pointer, ax

    movzx   ecx, vm_stack_pointer
    mov     dx, [ebp+value]
    mov     vm_stack[ecx*2], dx

    pop     ebp
    retn
VMStack_push endp

; function starts at virtual address 0x00401080
VMStack_pop proc
    push    ebp
    mov     ebp, esp
    push    ecx

    movzx   eax, vm_stack_pointer
    mov     cx, vm_stack[eax*2]
    mov     [ebp+word], cx

    mov     dx, vm_stack_pointer
    sub     dx, 1
    mov     vm_stack_pointer, dx

    mov     ax, [ebp+word]

    mov     esp, ebp
    pop     ebp
    retn
VMStack_pop endp

The counter grows when a value is saved, and decreases when a value is removed; it is pretty obvious that this is some kind of LIFO stack implementation.

Back to the entry point: the buffer has been populated with our string and the first initialization phase ends at virtual address 0x00402FCF. The next function we’re going to enter is located at virtual address 0x00401610 and will soak up most of the execution time - this is where we are going to focus our efforts.

The first part is not really interesting, as it’s just initialization of values and function pointers.

; function starts at address 0x00401610
VMMain proc
    ;
    ; initialization
    ;

    push    ebp
    mov     ebp, esp

    call    InitializeVMOpcodeHandlers

    xor     eax, eax
    mov     vm_register_A, ax

    xor     ecx, ecx
    mov     vm_register_B, cx

    mov     edx, 9
    mov     vm_stack_pointer, dx

    xor     eax, eax
    mov     vm_instruction_pointer, ax

    ;
    ; main loop
    ;

    ; 0x0040163D
__vm_execution_loop:
    movzx   ecx, vm_instruction_pointer
    movzx   edx, vm_code_size
    cmp     ecx, edx
    jge     short __last_vm_instruction_reached

    ; 0x0040164F
    call    VMFetchAndExecuteNextOpcode
    jmp     short __vm_execution_loop

    ; 0x00401656
__last_vm_instruction_reached:

    ; the virtual machine exit code is taken from the first register
    mov     ax, vm_register_A
    pop     ebp
    retn
VMMain endp

; function starts at virtual address 0x00401570
InitializeVMOpcodeHandlers proc
    push    ebp
    mov     ebp, esp

    mov     vm_opcode_handlers, VMOpcodeHandler_push
    mov     vm_opcode_handlers+4, VMOpcodeHandler_pop
    mov     vm_opcode_handlers+8, VMOpcodeHandler_add
    mov     vm_opcode_handlers+0Ch, VMOpcodeHandler_sub
    mov     vm_opcode_handlers+10h, VMOpcodeHandler_RotateRight
    mov     vm_opcode_handlers+14h, VMOpcodeHandler_RotateLeft
    mov     vm_opcode_handlers+18h, VMOpcodeHandler_xor
    mov     vm_opcode_handlers+1Ch, VMOpcodeHandler_not
    mov     vm_opcode_handlers+20h, VMOpcodeHandler_eq
    mov     vm_opcode_handlers+24h, VMOpcodeHandler_sel
    mov     vm_opcode_handlers+28h, VMOpcodeHandler_jmp
    mov     vm_opcode_handlers+2Ch, VMOpcodeHandler_pushRegister
    mov     vm_opcode_handlers+30h, VMOpcodeHandler_mov
    mov     vm_opcode_handlers+34h, VMOpcodeHandler_nop

    pop     ebp
    retn
InitializeVMOpcodeHandlers endp

The second part of the routine will loop until a counter reaches the end, each time calling the following function:

; function starts at virtual address 0x00401540
VMFetchAndExecuteNextOpcode proc
    push    ebp
    mov     ebp, esp
    push    ecx

    movzx   eax, vm_instruction_pointer
    mov     cx, ds:vm_instructions[eax*2]
    mov     [ebp+opcode], cx

    movzx   edx, [ebp+opcode]
    mov     eax, vm_opcode_handlers[edx*4]
    call    eax

    mov     esp, ebp
    pop     ebp
    retn
VMFetchAndExecuteNextOpcode endp

Let’s get an overview of what is happening:

  • The function pointer array is used in conjuction with indexes found in a statically allocated buffer that has been initialized at compile-time.
  • A counter is incremented each time one of those function pointers is used.
  • As we already noticed, all values are extended (or truncated) to 16-bit words.

We can now draw some conclusions:

  • Each one of the indexes used to access the function pointer array is in fact an opcode.
  • The counter that is incremented at each call is the instruction pointer. It is not incremented automatically because the opcode length is not always the same, as they may optionally require immediate values.
  • You have probably already guessed it by now, but the global memory buffer is the virtual machine stack.
  • The virtual machine is heavily stack-based, and operates on 2-bytes words.

And Now for Something Completely Different: a dump of the analyzed opcode handlers. I have put the sel opcode at the top of the list since it’s the most unusual one.

; this function pops three values, and uses one of them to decide
; which one of other two needs to be kept into the stack.
;
; function starts at virtual address 0x00401360
VMOpcodeHandler_sel proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    call    VMStack_pop
    mov     [ebp+third_word], ax

    movzx   eax, [ebp+third_word]
    cmp     eax, 1
    jnz     short loc_401399

    movzx   ecx, [ebp+first_word]
    push    ecx             ; value
    call    VMStack_push

    add     esp, 4
    jmp     short loc_4013A6

loc_401399:
    movzx   edx, [ebp+second_word]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

loc_4013A6:
    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_sel endp

; function starts at virtual address 0x00401180
VMOpcodeHandler_RotateRight proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch
    push    esi

    call    VMStack_pop

    mov     [ebp+first_word], ax
    call    VMStack_pop

    mov     [ebp+second_word], ax
    movzx   eax, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    sar     eax, cl

    movzx   edx, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    mov     esi, 10h
    sub     esi, ecx
    mov     ecx, esi
    shl     edx, cl
    or      eax, edx
    and     eax, 0FFFFh
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    pop     esi
    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_RotateRight endp

; function starts at virtual address 0x004011F0
VMOpcodeHandler_RotateLeft proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch
    push    esi

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    shl     eax, cl

    movzx   edx, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    mov     esi, 10h
    sub     esi, ecx
    mov     ecx, esi
    sar     edx, cl
    or      eax, edx
    and     eax, 0FFFFh
    mov     [ebp+value], ax

    movzx   edx, [ebp+value]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    pop     esi
    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_RotateLeft endp

; function starts at virtual address 0x00401030
VMOpcodeHandler_push proc
    push    ebp
    mov     ebp, esp
    push    ecx

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    movzx   ecx, vm_instruction_pointer
    mov     dx, ds:vm_instructions[ecx*2]
    mov     [ebp+immediate], dx

    movzx   eax, [ebp+immediate]
    push    eax             ; value
    call    VMStack_push
    add     esp, 4

    mov     cx, vm_instruction_pointer
    add     cx, 1
    mov     vm_instruction_pointer, cx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_push endp

; function starts at virtual address 0x004010C0
VMOpcodeHandler_pop proc
    push    ebp
    mov     ebp, esp

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    call    VMStack_pop

    pop     ebp
    retn
VMOpcodeHandler_pop endp

; function starts at virtual address 0x004010E0
VMOpcodeHandler_add proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+first_word]
    movzx   ecx, [ebp+second_word]
    add     eax, ecx
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_add endp

; function starts at virtual address 0x00401130
VMOpcodeHandler_sub proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+second_word]
    movzx   ecx, [ebp+first_word]
    sub     eax, ecx
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_sub endp

; function starts at virtual address 0x00401260
VMOpcodeHandler_xor proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+first_word]
    movzx   ecx, [ebp+second_word]
    xor     eax, ecx
    mov     [ebp+result], ax

    movzx   edx, [ebp+result]
    push    edx             ; value
    call    VMStack_push

    add     esp, 4

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_xor endp

; function starts at virtual address 0x004012B0
VMOpcodeHandler_not proc
    push    ebp
    mov     ebp, esp

    sub     esp, 8

    call    VMStack_pop
    mov     [ebp+word], ax

    movzx   eax, [ebp+word]
    not     eax
    and     eax, 0FFFFh
    mov     [ebp+result], ax

    movzx   ecx, [ebp+result]
    push    ecx             ; value
    call    VMStack_push

    add     esp, 4

    mov     dx, vm_instruction_pointer
    add     dx, 1
    mov     vm_instruction_pointer, dx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_not endp

; function starts at virtual address 0x00401300
VMOpcodeHandler_eq proc 
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    call    VMStack_pop
    mov     [ebp+first_word], ax

    call    VMStack_pop
    mov     [ebp+second_word], ax

    movzx   eax, [ebp+first_word]
    movzx   ecx, [ebp+second_word]
    cmp     eax, ecx
    jnz     short loc_40132F

    mov     edx, 1
    mov     [ebp+result], dx
    jmp     short loc_401335

loc_40132F:
    xor     eax, eax
    mov     [ebp+result], ax

loc_401335:
    movzx   ecx, [ebp+result]
    push    ecx             ; value
    call    VMStack_push

    add     esp, 4

    mov     dx, vm_instruction_pointer
    add     dx, 1
    mov     vm_instruction_pointer, dx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_eq endp

; function starts at virtual address 0x004013C0
VMOpcodeHandler_jmp proc
    push    ebp
    mov     ebp, esp

    call    VMStack_pop
    mov     vm_instruction_pointer, ax

    pop     ebp
    retn
VMOpcodeHandler_jmp endp

; function starts at virtual address 0x004013D0
VMOpcodeHandler_pushRegister proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    movzx   ecx, vm_instruction_pointer
    mov     dx, ds:vm_instructions[ecx*2]
    mov     [ebp+opcode_parameter], dx

    movzx   eax, [ebp+opcode_parameter]
    mov     [ebp+opcode_parameter_alias], eax
    cmp     [ebp+opcode_parameter_alias], 3
    ja      short __opcode_handler_end

    mov     ecx, [ebp+opcode_parameter_alias]
    jmp     ds:off_401464[ecx*4]

__read_accumulator:
    mov     dx, vm_register_A
    mov     [ebp+word], dx
    jmp     short __opcode_handler_end

__read_base_stack_pointer:
    mov     ax, vm_register_B
    mov     [ebp+word], ax
    jmp     short __opcode_handler_end

__read_stack_pointer:
    mov     cx, vm_stack_pointer
    mov     [ebp+word], cx
    jmp     short __opcode_handler_end

__read_instruction_pointer:
    mov     dx, vm_instruction_pointer
    mov     [ebp+word], dx

__opcode_handler_end:
    movzx   eax, [ebp+word]
    push    eax
    call    VMStack_push

    add     esp, 4

    mov     cx, vm_instruction_pointer
    add     cx, 1
    mov     vm_instruction_pointer, cx

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_pushRegister endp

; function starts at virtual address 0x00401480
VMOpcodeHandler_mov proc
    push    ebp
    mov     ebp, esp

    sub     esp, 0Ch

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    movzx   ecx, vm_instruction_pointer
    mov     dx, ds:vm_instructions[ecx*2]
    mov     [ebp+vm_register_id], dx

    call    VMStack_pop
    mov     [ebp+value], ax

    movzx   eax, [ebp+vm_register_id]
    mov     [ebp+vm_register_id_copy], eax
    cmp     [ebp+vm_register_id_copy], 3
    ja      short __increment_instruction_pointer

    mov     ecx, [ebp+vm_register_id_copy]
    jmp     ds:off_401510[ecx*4]

__set_accumulator:
    mov     dx, [ebp+value]
    mov     vm_register_A, dx
    jmp     short __increment_instruction_pointer

__set_base_stack_pointer:
    mov     ax, [ebp+value]
    mov     vm_register_B, ax
    jmp     short __increment_instruction_pointer

__set_stack_pointer:
    mov     cx, [ebp+value]
    mov     vm_stack_pointer, cx
    jmp     short __increment_instruction_pointer

__set_instruction_pointer:
    mov     dx, [ebp+value]
    mov     vm_instruction_pointer, dx

 __increment_instruction_pointer:
    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    mov     esp, ebp
    pop     ebp
    retn
VMOpcodeHandler_mov endp

; function starts at virtual address 0x00401520
VMOpcodeHandler_nop proc
    push    ebp
    mov     ebp, esp

    mov     ax, vm_instruction_pointer
    add     ax, 1
    mov     vm_instruction_pointer, ax

    pop     ebp
    retn
VMOpcodeHandler_nop endp

You should now have a good understanding of how each opcode works, but you can’t but agree with me when I say that debugging this is everything but comfortable. For this reason, I have chosen to dump the virtual machine code (see virtual address 0x0040A140) and write a simple disassembler.

#include <iostream>
#include <fstream>
#include <vector>
#include <stdexcept>
#include <string>
#include <iomanip>

const std::vector<std::string> mnemonics =
{
    "push", "pop", "add", "sub",
    "trm1", "trm2", "xor", "not",
    "eq", "sel", "jmp", "push",
    "mov", "nop"
};

int main(int argc, char *argv[]);
const char *GetRegisterName(std::uint16_t register_id);

int main(int argc, char *argv[])
{
    static_cast<void>(argc);
    static_cast<void>(argv);
    
    if (argc != 2)
    {
        std::cout << "Usage:\n";
        std::cout << "smokestack_disasm dump" << std::endl;

        return 1;
    }

    std::vector<std::uint8_t> buffer;

    try
    {
        std::fstream input_file;
        input_file.open(argv[1], std::ios_base::in |
            std::ios_base::binary);

        if (!input_file)
            throw std::runtime_error("Failed to open the input file");

        input_file.seekg(0, std::ios_base::end);
        if (!input_file)
            throw std::runtime_error("Seek failed");

        std::streamsize input_file_size = input_file.tellg();
        if (!input_file)
            throw std::runtime_error("Failed to get the file size");

        input_file.seekg(0);
        if (!input_file)
            throw std::runtime_error("Seek failed");

        buffer.resize(input_file_size);
        if (buffer.size() != input_file_size)
            throw std::runtime_error("Memory allocation failed");

        input_file.read(reinterpret_cast<char *>(buffer.data()),
            input_file_size);

        if (!input_file)
            throw std::runtime_error("Failed to read the file");

        input_file.close();
    }

    catch (const std::exception &exception)
    {
        std::cout << exception.what() << std::endl;
        return 1;
    }

    const std::uint8_t *ptr = buffer.data();

    while (ptr < buffer.data() + buffer.size())
    {
        std::uint32_t instruction_pointer = (ptr - buffer.data()) / 2;

        std::cout << std::hex << std::setfill('0') << std::setw(4)
            << instruction_pointer;

        std::cout << "\t\t";

        std::cout << std::hex << std::setfill('0') << std::setw(2) <<
            static_cast<int>(*ptr);

        std::cout << "\t" << mnemonics[*ptr] << " ";

        // opcodes that require immediate parameters needs to
        // increment the instruction pointer twice
        switch (*ptr)
        {
            // push <immediate>
            case 0:
            {
                ptr += 2;

                std::uint16_t value = 
                    *reinterpret_cast<const std::uint16_t *>(ptr);

                std::cout << "0x" << std::hex << std::setfill('0') <<
                    std::setw(4) << value;

                // also show ascii encoding
                if (value >= 0x20 && value <= 0x7D)
                {
                    std::cout << " ; '" << static_cast<char>(value)
                        << "'";
                }

                break;
            }

            // push <register_id>
            case 11:
            {
                ptr += 2;

                std::uint16_t value =
                    *reinterpret_cast<const std::uint16_t *>(ptr);

                std::cout << GetRegisterName(value);

                break;
            }

            // mov <register_id>, stack[sp]
            case 12:
            {
                ptr += 2;

                std::uint16_t value =
                    *reinterpret_cast<const std::uint16_t *>(ptr);

                std::cout << GetRegisterName(value);
                std::cout << ", ST(0)";

                break;
            }

            default:
                break;
        }

        std::cout << std::endl;

        // show an empty line after we have printed a jump instruction
        if (*ptr == 10)
        {
            std::cout << std::hex << std::setfill('0') << std::setw(4)
                << instruction_pointer << std::endl;
        }

        ptr += 2;
    }

    return 0;
}

const char *GetRegisterName(std::uint16_t register_id)
{
    switch (register_id)
    {
        case 0:
            return "ax";
        
        case 1:
            return "bp";
        
        case 2:
            return "sp";
        
        case 3:
            return "ip";
        
        default:
            throw std::runtime_error("Invalid register id");
    }
}

The following is the full output of the disassembler, including my own comments.

0000    00  push 0x0021
0002    02  add           ; \ adds 0x21 to the last character in the
0003    00  push 0x0091   ; / program argument
0005    08  eq 
0006    00  push 0x0016
0008    00  push 0x000c   ; \ this is what we should take. last char
000a    09  sel           ; / is: 0x91 - 0x21 = 'p'
000b    0a  jmp 
000b
000c    0b  push ax       ; \
000e    00  push 0x000c   ; | ax is set to 0 during startup
0010    02  add           ; | ax = ST(0) = 0 + 0x0c
0011    0c  mov ax, ST(0) ; /
0013    00  push 0x001d   ; \
0015    0a  jmp           ; / we're going to jump to address 0x001d
0015
0016    0b  push ax
0018    00  push 0x0063
001a    02  add 
001b    0c  mov ax, ST(0)
001d    00  push 0x0018   ; \
001f    06  xor           ; | next character: 0x54 ^ 0x18 = 'L'
0020    00  push 0x0054   ; |
0022    08  eq            ; /
0023    00  push 0x0033   ; \
0025    00  push 0x0029   ; | we're going to jump to 0x0029
0027    09  sel           ; |
0028    0a  jmp           ; /
0028
0029    0b  push ax       ; \
002b    00  push 0x002c   ; | ax is still 0x0C; result is 0x38
002d    02  add           ; | and is saved to ax again
002e    0c  mov ax, ST(0) ; /
0030    00  push 0x003d   ; \
0032    0a  jmp           ; / we're going to jump to 0x003d
0032
0033    00  push 0x000e
0035    01  pop 
0036    0b  push ax
0038    00  push 0x0059
003a    02  add 
003b    0c  mov ax, ST(0)
003d    0b  push ax       ; } 0x38 is pushed again on stack
003f    00  push 0x0000   ; \
0041    0c  mov bx, ST(0) ; / bx = 0x0000
0043    00  push 0x0009   ; \
0045    0c  mov ax, ST(0) ; / ax = 0x0009
0045
0047    0b  push bx       ; \
0049    00  push 0x0002   ; |
004b    02  add           ; | bx += 0x0002
004c    0c  mov bx, ST(0) ; /
004e    0b  push ax       ; \
0050    00  push 0x0001   ; |
0052    03  sub           ; | ax -= 0x0001
0053    0c  mov ax, ST(0) ; /
0055    0b  push ax       ; \
0057    00  push 0x0000   ; | condition: ax == 0x0000
0059    08  eq            ; /
005a    00  push 0x0047   ; \
005c    00  push 0x0060   ; | false:resume the loop (0x0047).
005e    09  sel           ; | true: leave the loop (0x0060). bx is set
005f    0a  jmp           ; /        to (2 * 9)
005f
0060    0c  mov ax, ST(0) ; } dx = 0x005d + 0x0012 = 0x006f (char 'o')
0062    0b  push bx       ; } push 0x0012
0064    03  sub           ; } 0x006f - 0x0012 = 0x005d
0065    00  push 0x005d   ; \
0067    08  eq            ; |
0068    00  push 0x007c   ; | the condition must be true and we need
006a    00  push 0x006e   ; | to jump to 0x006e
006c    09  sel           ; |
006d    0a  jmp           ; /
006d
006e    0b  push ax       ; } push 0x0038
0070    00  push 0x0007   ; \
0072    03  sub           ; | ax = 0x0038 - 0x0007 = 0x0031
0073    0c  mov ax, ST(0) ; /
0075    00  push 0x005b   ; \
0077    0c  mov bx, ST(0) ; / bx = 0x005b
0079    00  push 0x0087   ; \
007b    0a  jmp           ; / jmp 0x0087
007b
007c    00  push 0x0036   ; '6'
007e    0c  mov bx, ST(0)
0080    0b  push ax
0082    0b  push bx
0084    02  add 
0085    0c  mov bx, ST(0)
0087    0b  push bx       ; \ (bx = 0x005b)
0089    00  push 0x0058   ; | 0x0058 + 0x005b = 0x00b3
008b    02  add           ; /
008c    06  xor           ; } 0x00b3 ^ 0x004a = 0x00f9 -> char 'J'
008d    00  push 0x00f9   ; \
008f    08  eq            ; |
0090    00  push 0x00a0   ; | jmp (xor_result == 0x00f9 ? 0x96 : 0xa0)
0092    00  push 0x0096   ; |
0094    09  sel           ; |
0095    0a  jmp           ; /
0095
0096    0b  push ax       ; \ (ax = 0x0031)
0098    00  push 0x004d   ; |
009a    06  xor           ; | ax = 0x0031 ^ 0x004d = 0x007c
009b    0c  mov ax, ST(0) ; /
009d    00  push 0x00ae   ; \
009f    0a  jmp           ; / jmp 0x00ae
009f
00a0    00  push 0x0323
00a2    00  push 0x012b
00a4    03  sub 
00a5    0c  mov bx, ST(0)
00a7    0b  push ax
00a9    0b  push bx
00ab    02  add 
00ac    0c  mov bx, ST(0)
00ae    0c  mov bx, ST(0) ; } bx = character 'b'
00b0    0b  push bx       ; } push 0x0062
00b2    0b  push bx       ; \
00b4    00  push 0x0001   ; | bx -= 0x0001
00b6    03  sub           ; |
00b7    0c  mov bx, ST(0) ; /
00b9    00  push 0x0003   ; \
00bb    02  add           ; / ST(0) += 0x0003
00bc    0b  push bx       ; \
00be    00  push 0x0000   ; |
00c0    08  eq            ; |
00c1    00  push 0x00b2   ; | loop while bx != 0x0000
00c3    00  push 0x00c7   ; |
00c5    09  sel           ; |
00c6    0a  jmp           ; /
00c6
00c7    07  not           ; \ bx = 0x62 + (0x62 * 3)
00c7                      ; / not(0x188) = 0xfe77
00c8    00  push 0xfe77   ; \
00ca    08  eq            ; |
00cb    00  push 0x00d8   ; | condition must be true
00cd    00  push 0x00d1   ; | to jump to 0x00d1
00cf    09  sel           ; |
00d0    0a  jmp           ; /
00d0
00d1    0b  push ax       ; \
00d3    00  push 0x0058   ; |
00d5    02  add           ; | ax = 0x007c + 0x0058 = 0x00d4
00d6    0c  mov ax, ST(0) ; /
00d8    00  push 0x0003   ; \
00da    04  trm1          ; | x: character at position 4
00db    00  push 0x008c   ; |
00dd    02  add           ; | condition = ((x >> 0x0003) | 
00de    00  push 0x6094   ; |     (x << (0x0010 - 0x0003)) & 0xffff
00e0    08  eq            ; |     + 0x8C) == 0x6094
00e1    00  push 0x00ee   ; |
00e3    00  push 0x00e7   ; | we need to jump to 0x00e7
00e5    09  sel           ; |
00e6    0a  jmp           ; /
00e6
00e7    0b  push ax
00e9    00  push 0x00e7
00eb    02  add 
00ec    0c  mov ax, ST(0)
00ee    0b  push bx       ; \
00f0    02  add           ; | bx is 0x0000
00f1    00  push 0x000c   ; |
00f3    06  xor           ; | the next word we're going to use is
00f4    00  push 0x0074   ; | the third character
00f6    08  eq            ; |
00f7    00  push 0x0107   ; | if ((0x0000 + 'x') ^ 0x000c == 0x0074)
00f9    00  push 0x00fd   ; |     jmp 0x00fd <- take this jump
00fb    09  sel           ; | else
00fc    0a  jmp           ; /     jmp 0x0107
00fc
00fd    0b  push ax       ; \
00ff    00  push 0x0009   ; | ax = 0x00d4 - 0x0009 = 0x00cb
0101    03  sub           ; |
0102    0c  mov ax, ST(0) ; /
0104    00  push 0x011d   ; \
0106    0a  jmp           ; / jmp 0x011d
0106
0107    00  push 0x000a
0109    0c  mov bx, ST(0)
010b    0b  push bx
010d    00  push 0x0001
010f    03  sub 
0110    0c  mov bx, ST(0)
0112    0b  push bx
0114    00  push 0x0000
0116    08  eq 
0117    00  push 0x010b
0119    00  push 0x011d
011b    09  sel 
011c    0a  jmp 
011c
011d    00  push 0x0006   ; \ trm2(0x0006, character at position 2)
011f    05  trm2          ; |
0120    00  push 0x1dc0   ; | condition = (shl('w', 0x0006) |
0122    08  eq            ; |     sar('w', 0x10 - 0x0006)) &
0122                      ; |     0xffff == 0x1dc0;
0122                      ; |
0123    00  push 0x0133   ; | if (condition)
0125    00  push 0x0129   ; |     jmp 0x0129 <- take this jump
0127    09  sel           ; | else
0128    0a  jmp           ; /     jmp 0x0133
0128
0129    0b  push ax       ; \
012b    00  push 0x0071   ; |
012d    02  add           ; | ax = 0x00cb + 0x0071 = 0x013c
012e    0c  mov ax, ST(0) ; /
0130    00  push 0x013d   ; \
0132    0a  jmp           ; / jmp 0x013d
0132
0133    0b  push ax
0135    00  push 0x0077   ; 'w'
0137    02  add 
0138    0c  mov ax, ST(0)
013a    00  push 0x013d
013c    0a  jmp 
013c
013d    00  push 0x0016   ; \
013f    02  add           ; | this is the character at position 1
0140    00  push 0x000e   ; |
0142    03  sub           ; | condition = (0x0016 + 'Y' -
0143    00  push 0x0061   ; |      0x000E) == 0x0061;
0145    08  eq            ; |
0146    00  push 0x0153   ; | if (condition)
0148    00  push 0x014c   ; |     jmp 0x014c
014a    09  sel           ; | else
014b    0a  jmp           ; /     jmp 0x0153
014b
014c    0b  push ax       ; \ 
014e    00  push 0x002c   ; |
0150    03  sub           ; | ax = 0x013c - 0x002c = 0x0110
0151    0c  mov ax, ST(0) ; /
0153    0c  mov bx, ST(0) ; } bx = 'k' -> character at position 0
0155    0b  push bx
0157    00  push 0x212c
0159    0b  push bx       ; \
015b    00  push 0x0001   ; | this loop subtracts 0x07 from 0x212c
015d    03  sub           ; | for 'k' (0x6b) times
015e    0c  mov bx, ST(0) ; |
0160    00  push 0x0007   ; | bx--;
0162    03  sub           ; /
0163    0b  push bx       ; \
0165    00  push 0x0000   ; | if (bx == 0)
0167    08  eq            ; |     jmp 0x016e
0168    00  push 0x0159   ; | else
016a    00  push 0x016e   ; |     jmp 0x0159
016c    09  sel           ; |
016d    0a  jmp           ; /
016d
016e    00  push 0x01ca   ; \
0170    06  xor           ; | if ((loop_result ^ 0x01ca) == 0x1ff5)
0171    00  push 0x1ff5   ; |     jmp 0x017a
0173    08  eq            ; | else
0174    00  push 0x0181   ; |     jmp 0x0181
0176    00  push 0x017a   ; |
0178    09  sel           ; |
0179    0a  jmp           ; /
0179
017a    0b  push ax       ; \
017c    00  push 0x0012   ; | ax = 0x0110 + 0x0012 = 0x0122
017e    02  add           ; |
017f    0c  mov ax, ST(0) ; /
0181    0d  nop

The correct string is then the one that allows the virtual machine to execute the program to the end: kYwxCbJoLp. There’s no need to analyze the rest of the executable; pass those characters back to the program and it will print our flag: A_p0p_pu$H_&_a_Jmp@flare-on.com.