## This is a sample script to show how to use the python bindings in Malcat
## It is meant to be run against a PE file.

import malcat
import itertools
import datetime


####### analysis.file
print("############### TESTING FILE OBJECT ####################")
print(f"The current file is {analysis.file.name} (path: {analysis.file.path}) and is {len(analysis.file)} bytes big")
print(f"The first 6 bytes of the file are {analysis.file.read(0,6)}")
print(f"The next 6 bytes of the file are {analysis.file[6:12]}")
match_off, match_len = analysis.file.search(r"PE\x00\x00", start=0, size=len(analysis.file))
if match_len:
    print(f"Pattern found! {match_len} bytes at #{match_off:x}")
else:
    print("Pattern not found!")
for match_off, match_len in analysis.file.search_all(r"\xAA{4,}+"):
    print(f"One pattern found: {match_len} bytes at #{match_off:x}")
# you can also write to the file (each write is undoable):
# analysis.file.write(2, 56)
# analysis.file[2] = 56
# you can also write bytes:
# analysis.file.write(2, b"ab")
# analysis.file[2:4] = b"ab"


####### analysis.map
print("\n\n############### TESTING MAP OBJECT ####################")
address = 0x1100
print(f"Effective address {address:x} has offset=#{analysis.map.to_phys(address):x}, va=0x{analysis.map.to_virt(address):x}, rva=@{analysis.map.to_rva(address):x}")

# you can use gui.print instead of print to add formatting
ep_rva = analysis.struct['OptionalHeader']['AddressOfEntryPoint']
if ep_rva is not None:
    gui.print(f"RVA @[rva]{ep_rva:x}[/rva] has offset=#[fa]{analysis.map.to_phys(analysis.map.from_rva(ep_rva)):x}[/fa] ", format=True)

print(f"Imagebase=0x{analysis.map.base:x} - total size of address space (physical + virtual) = {len(analysis.map)} bytes")

region = analysis.map.get_region_for_rva(address)
if region is not None:
    print(f"RVA @{address:x} lies in region {region.name} (physical: [#{region.phys:x}-#{region.phys + region.phys_size:x}[, virtual: [0x{region.virt:x}-0x{region.virt + region.virt_size:x}[, exec={region.exec}, read={region.read}, write={region.write})")
print("List of regions: {}".format(", ".join([x.name for x in analysis.map])))

if "overlay" in analysis.map:
    print("File has overlay")
else:
    print("File has no overlay")

####### analysis.struct
print("\n\n############### TESTING STRUCT OBJECT ####################")
if "OptionalHeader" in analysis.struct:
    # [] is for field value access: return the structure field value (if field is a leaf, i.e no struct, bitfield or array)
    print(f"EntryPoint: 0x{analysis.struct['OptionalHeader']['AddressOfEntryPoint']:x}")
    # . is for field detailled access: you get access to the value, offset, size, bytes, name, etc
    print(f"EntryPoint: {analysis.struct.OptionalHeader.AddressOfEntryPoint}")
    ep = analysis.struct.OptionalHeader.AddressOfEntryPoint
    print(f"{ep.name}: 0x{ep.value:x} (at effective address {ep.address:x}, aka offset #{ep.offset:x}), size of field: {ep.size}, bytes: {ep.bytes}")
    # .at(field_name) is a synonym for .field_name
    print(f"EntryPoint: {analysis.struct.OptionalHeader.at('AddressOfEntryPoint').value:x}")
    # .at(index) get access to the ith field
    print(f"OptionalHeader[0] is field {analysis.struct.OptionalHeader.at(0).name}")
    # you can also use the [index] syntax to access the value directly
    print(f"OptionalHeader[0]: {analysis.struct.OptionalHeader[0]}")

    # enums
    machine = analysis.struct.PE.Machine
    print(f"PE.Machine = {machine.value}, has_enum = {machine.has_enum}, enum = {machine.enum}")

    # arrays
    print(f"has exports ? {analysis.struct['OptionalHeader']['DataDirectory'][0]['Size'] > 0}")
    print(f"number of data directories:  {analysis.struct['OptionalHeader']['DataDirectory'].count}")
    for i, dd in enumerate(analysis.struct["OptionalHeader"]["DataDirectory"]):
        if "Offset" in dd:
            print(f"    DataDirectory[{i}]: #{dd['Offset']:x}-#{dd['Offset'] + dd['Size']:x}")
        else:
            print(f"    DataDirectory[{i}]: 0x{dd['Rva']:x}-0x{dd['Rva'] + dd['Size']:x}")

    # if you want to access detailed field info, use .at(index) instead of [index]:
    for i in range(analysis.struct["OptionalHeader"]["DataDirectory"].count):
        print(f"    DataDirectory[{i}] is located at #{analysis.struct['OptionalHeader']['DataDirectory'].at(i).offset:x}")

    # structures
    # you can also enumerate all fields of a structure by index
    for i in range(analysis.struct.OptionalHeader.count):
        field_access = analysis.struct.OptionalHeader.at(i)   # .at(index) return field detailled access of ith field/array element
        field_value = analysis.struct.OptionalHeader[i]   # <=> field_access.value
        # some field may have an enum equivalent 
        print(f"    {field_access.name}: {field_value} {field_access.has_enum and ('<=> ' + field_access.enum) or ''}")
    
    # bitfields 
    print(f"is executable ? {analysis.struct.PE.Characteristics['ExecutableImage']}")
    print(f"is executable ? {analysis.struct.PE.Characteristics[1]}") # access by index

    # writing 
    # atomic fields can be written (with some restriction, like written data amy not be larger on disk)
    # uncomment to test:
    # analysis.struct["PE"]["TimeDateStamp"] = datetime.datetime.now()
    # analysis.struct.MZ.InitialSS.value = 5      # you can also write by setting the .value field


####### fns
print("\n\n############### TESTING FNS OBJECT ####################")

# analysis.fns[ea_start:ea_end] would return an iterator over all bfunctions in this address range
fns = list(analysis.fns[analysis.map.from_phys(0) : analysis.map.from_phys(0x5000)])
print(f"there are {len(fns)} functions in range[#0-#5000[")

first_function = analysis.fns.find_forward(0)
if first_function is not None:
    print(f"first function of file: {first_function} at [0x{analysis.map.to_virt(first_function.start):x} - 0x{analysis.map.to_virt(first_function.end):x}[")
last_function = analysis.fns.find_backward(len(analysis.map))
if last_function is not None:
    print(f"last function of file: {last_function} at 0x{analysis.map.to_virt(last_function.start):x} ({len(last_function)} bytes)")

for fn in itertools.islice(analysis.fns, 3):
    print(f"* function {fn.fullname} at #{analysis.map.to_phys(fn.start):x}")
    print(f"  module part of name: {fn.module}")        # X1.X2.Y<Z1, Z2>(A1, A2) -> X1.X2
    print(f"  identifier part of name: {fn.name}")      # X1.X2.Y<Z1, Z2>(A1, A2) -> Y
    print(f"  template parameters: {fn.template_args}") # X1.X2.Y<Z1, Z2>(A1, A2) -> [Z1, Z2]
    print(f"  arguments: {fn.args}")                    # X1.X2.Y<Z1, Z2>(A1, A2) -> [A1, A2]
    print(f"  number of instructions: {fn.num_instructions}")     
    print(f"  number of intra-jumps: {fn.num_intra_jumps}")     
    print(f"  number of basic blocks: {fn.num_bb}")     
    print(f"  opcode stats:")
    for type in malcat.Instruction.Type.__members__.values():
        print(f"    num_{type.name.lower()}: {getattr(fn, 'num_' + type.name.lower())}")




####### cfg
print("\n\n############### TESTING CFG OBJECT ####################")
#The CFG, or control flow graph, divides executable code into a graph of basic blocks. Basic blocks are contiguous file ranges that satisfies:
#  * control flow always starts at the beginning of the block for every possible execution of the program
#  * control flow always goes to the end of the block for every possible execution of the program
#  * the basic block is located in a single region
#  * basic blocks have incoming and outgoing edges, which can be of 4 types: STEP, JUMP, CALL or EXCEPTION
#In order to simplify the interface a bit and make code easier to read, non-code regions (i.e. data) also belong to special basic blocks named data blocks, which have no incoming nor outgoing edges. 

# analysis.cfg[ea_start:ea_end] would return an iterator over all basic blocks in this address range
bbs = list(analysis.cfg[analysis.map.from_phys(0) : analysis.map.from_phys(0x5000)])
print("there are {} basic blocks in range[#0-#5000[".format(len(bbs)))

# describe first 10 basic blocks
for bb in itertools.islice(analysis.cfg, 10):
    print(f"* BasicBlock at [#{analysis.map.to_phys(bb.start):x}-#{analysis.map.to_phys(bb.end):x}[ ({bb.code and 'CODE' or 'DATA'})")
    # incoming edges
    for inc_edge in itertools.islice(bb.incoming, 5):
        # you can use analysis.cfg[<ea>] to get the basic block contain address <ea>
        source_bb = analysis.cfg[inc_edge.address]
        print("    > incoming edge ({}) from [#{:x}:#{:x}[".format(inc_edge.type, analysis.map.to_phys(source_bb.start), analysis.map.to_phys(source_bb.end)))

    # you can use analysis.cfg.align(x) to align address <x> to the previous instruction boundary
    # It is more precise than analysis.asm.align since it uses basic block start address 
    # as a known valid instruction start
    if bb.code:
        last_instr_adr = analysis.cfg.align(bb.end - 1)
        print("    Last instruction is at #{:x} : {}".format(last_instr_adr, analysis.asm[last_instr_adr]))


####### analysis.xrefs
print("\n\n############### TESTING XREF OBJECT ####################")
for reflist in itertools.islice(analysis.xrefs, 5):
    print(f"* 0x{analysis.a2v(reflist.target):x} is referenced by:")
    for ref in reflist:
        print(f"    * {analysis.ppa(ref.address)} ({ref.type})")

first_function = analysis.fns.find_forward(0)
if first_function is not None and first_function.address in analysis.xrefs:
    reflist = analysis.xrefs[first_function.address]
    print(f"\nFunction {first_function.name} is referenced {len(reflist)}: times")
    for source in reflist:
        print(f"    * {analysis.ppa(source.address)} ({source.type})")


####### analysis.syms
print("\n\n############### TESTING SYMS OBJECT ####################")
for sym in itertools.islice(analysis.syms[0:len(analysis.map)], 5):
    print(f"* 0x{analysis.map.to_virt(sym.address):x} = {sym.name} ({sym.type})")

print("")
for tname in ("functions", "labels", "imports", "exports", "entrypoints", "variables", "typedefs"):
    print(f"* Number of {tname}: {getattr(analysis.syms, 'number_of_' + tname)}")

if "EntryPoint" in analysis.syms:
    ep_address = analysis.syms["EntryPoint"]
    print(f"\n* Entrypoint defined at address 0x{ analysis.map.to_virt(ep_address):x}")
    print(f"* List of symbols defined at 0x{ analysis.map.to_virt(ep_address):x}:")
    for sym in analysis.syms[ep_address]:
        print(f"    * 0x{analysis.map.to_virt(sym.address):x} = {sym.name} ({sym.type})")


####### analysis.sigs
print("\n\n############### TESTING SIGS OBJECT ####################")
for sig in analysis.sigs:
    print(f"yara rule {sig.name} ({sig.id}) : {sig.type} matched !")
    for pattern in sig.patterns:
        for offset, size in pattern.matches:
            print(f"    - pattern {pattern.id} matched at #{offset:x}-#{offset + size:x}")


print("\n\n############### TESTING STRINGS OBJECT ####################")
for s in itertools.islice(analysis.strings, 15):
    print(f"* string {repr(s.text)} found at #{analysis.map.to_phys(s.address):x}: {s.type}:{s.encoding} [tag={s.tag}] [score={s.score}] [entropy={s.entropy}]: bytes={s.bytes}")
print(f"sample has {'VirtualProtect' not in analysis.strings and 'not' or ''} the string VirtualProtect")


print("\n\n############### TESTING ENTROPY OBJECT ####################")
mid_address = analysis.map.from_phys(analysis.file.size // 2)
print(f"* Entropy of first half of the file: {analysis.entropy[:mid_address]} ({100*analysis.entropy[:mid_address] // 255}%)")
print(f"* Entropy of second half of the file: {analysis.entropy[mid_address:]} ({100*analysis.entropy[mid_address:] // 255}%)")


print("\n\n############### TESTING CARVED FILES OBJECT ####################")
if ".rsrc" in analysis.map:
    rsrc = analysis.map[".rsrc"]
    for sub in analysis.carved[rsrc.start:rsrc.end]:
        print(f"* Found sub-file '{sub.name}' in .rsrc at #{analysis.map.to_phys(sub.address):x} ({sub.size} bytes): {sub.type} [{sub.category}]")

print("\n\n############### TESTING VIRTUAL FILES OBJECT ####################")

for vfile in analysis.vfiles:
    print(f"* Virtual file '{vfile.path}' ({vfile.size} bytes)")

print("\n\n############### TESTING CONSTANTS ####################")

for cst in analysis.constants:
    print(f"constant found {cst.name} ({cst.category}) at {analysis.ppa(cst.address)} !")
