from filetypes.base import *
import malcat
import struct
import string
from filetypes.CFB_office import parse_vba, SummaryAnalyzer, OleStreamAnalyzer, Summary, decompile_vba, codepage2codec
from filetypes.CFB_msi import parse_msi, decompile_msi


# https://interoperability.blob.core.windows.net/files/MS-OVBA/[MS-OVBA].pdf
CFB_CLSIDs = {
    "000C1084-0000-0000-C000-000000000046": "MSI Installer",
    # Package Object
    "F20DA720-C02F-11CE-927B-0800095AE340": "OLE Package Object",
    "00020C01-0000-0000-C000-000000000046": "OLE Package Object",
    "00022601-0000-0000-C000-000000000046": "OLE Package Object",
    "00022602-0000-0000-C000-000000000046": "OLE Package Object",
    "00022603-0000-0000-C000-000000000046": "OLE Package Object",
    "0003000C-0000-0000-C000-000000000046": "OLE Package Object",
    "0003000D-0000-0000-C000-000000000046": "OLE Package Object",
    "0003000E-0000-0000-C000-000000000046": "OLE Package Object",

    # Equation Object
    "00021700-0000-0000-C000-000000000046": "Microsoft Equation 2.0 (Known Related to CVE-2017-11882 or CVE-2018-0802)",
    "0002CE02-0000-0000-C000-000000000046": "Microsoft Equation 3.0 (Known Related to CVE-2017-11882 or CVE-2018-0802)",
    "0002CE03-0000-0000-C000-000000000046": "MathType Equation Object",

    "00020900-0000-0000-C000-000000000046": "Word.Document.6",
    "00020906-0000-0000-C000-000000000046": "Word.Document.8",
    "F4754C9B-64F5-4B40-8AF4-679732AC0607": "Word.Document.12",
    "00020810-0000-0000-C000-000000000046": "Excel.Sheet.5",
    "00020811-0000-0000-C000-000000000046": "Excel.Chart.5",
    "00020820-0000-0000-C000-000000000046": "Excel.Sheet.8",
    "00020821-0000-0000-C000-000000000046": "Excel.Chart.8",
    "00020830-0000-0000-C000-000000000046": "Excel.Sheet.12",
    "00020832-0000-0000-C000-000000000046": "Excel.SheetMacroEnabled.12",
    "00020833-0000-0000-C000-000000000046": "Excel.SheetBinaryMacroEnabled.12",
    "64818D10-4F9B-11CF-86EA-00AA00B929E8": "Powerpoint.Show.8",
    "64818D11-4F9B-11CF-86EA-00AA00B929E8": "Powerpoint.Slide.8",
    "CF4F55F4-8F87-4D47-80BB-5808164BB3F8": "Powerpoint.Show.12",
    "048EB43E-2059-422F-95E0-557DA96038AF": "Powerpoint.Slide.12",

    "00000300-0000-0000-C000-000000000046": "StdOleLink (Known Related to CVE-2017-0199, CVE-2017-8570 or CVE-2017-8759)",
    "00000535-0000-0010-8000-00AA006D2EA4": "ADODB.RecordSet (Known Related to CVE-2015-0097)",
    "05741520-C4EB-440A-AC3F-9643BBC9F847": "otkloadr.WRLoader (Known Related to CVE-2015-1641)",
    "0CF774D0-F077-11D1-B1BC-00C04F86C324": "HTML.HostEncode",
    "0D43FE01-F093-11CF-8940-00A0C9054228": "Scripting.FileSystemObject",
    "0E59F1D5-1FBE-11D0-8FF2-00A0D10038BC": "MSScriptControl.ScriptControl  (Known Related to CVE-2015-0097)",
    "1461A561-24E8-4BA3-8D4A-FFEEF980556B": "BCSAddin.Connect (Known Related to CVE-2016-0042)",
    "14CE31DC-ABC2-484C-B061-CF3416AED8FF": "Loads WUAEXT.DLL (Known Related to CVE-2015-6128)",
    "1D8A9B47-3A28-4CE2-8A4B-BD34E45BCEEB": "UPnP.DescriptionDocument",
    "1EFB6596-857C-11D1-B16A-00C0F0283628": "MSCOMCTL.TabStrip (Known Related to CVE-2012-1856 & CVE-2013-3906)",
    "23CE100B-1390-49D6-BA00-F17D3AEE149C": "UmOutlookAddin.UmEvmCtrl (Known Related to CVE-2016-0042)",
    "33BD73C2-7BB4-48F4-8DBC-82B8B313AE16": "osf.SandboxManager (Known Related To CVE-2015-1770)",
    "33FD0563-D81A-4393-83CC-0195B1DA2F91": "UPnP.DescriptionDocumentEx",
    "394C052E-B830-11D0-9A86-00C04FD8DBF7": "Loads ELSEXT.DLL (Known Related to CVE-2015-6128)",
    "3BA59FA5-41BF-4820-98E4-04645A806698": "osf.SandboxContent (Known Related To CVE-2015-1770)",
    "41B9BE05-B3AF-460C-BF0B-2CDD44A093B1": "Search.XmlContentFilter",
    "4315D437-5B8C-11D0-BD3B-00A0C911CE86": "Device Moniker (Known Related to CVE-2016-0015)",
    "4C599241-6926-101B-9992-00000B65C6F9": "Forms.Image (Known Related to CVE-2015-2424)",
    "44F9A03B-A3EC-4F3B-9364-08E0007F21DF": "Control.TaskSymbol (Known Related to CVE-2015-1642 & CVE-2015-2424)",
    "46E31370-3F7A-11CE-BED6-00AA00611080": "Forms.MultiPage",
    "4D3263E4-CAB7-11D2-802A-0080C703929C": "AutoCAD 2000-2002 Document",
    "5E4405B0-5374-11CE-8E71-0020AF04B1D7": "AutoCAD R14 Document",
    "6A221957-2D85-42A7-8E19-BE33950D1DEB": "AutoCAD 2013 Document",
    "6AD4AE40-2FF1-4D88-B27A-F76FC7B40440": "BCSAddin.ManageSolutionHelper (Known Related to CVE-2016-0042)",
    "6E182020-F460-11CE-9BCD-00AA00608E01": "Forms.Frame",
    "66833FE6-8583-11D1-B16A-00C0F0283628": "MSCOMCTL.Toolbar (Known Related to CVE-2012-0158 & CVE-2012-1856)",
    "799ED9EA-FB5E-11D1-B7D6-00C04FC2AAE2": "Microsoft.VbaAddin (Known Related to CVE-2016-0042)",
    "79EAC9D0-BAF9-11CE-8C82-00AA004BA90B": "StdHlink",
    "79EAC9D1-BAF9-11CE-8C82-00AA004BA90B": "StdHlinkBrowseContext",
    "79EAC9E2-BAF9-11CE-8C82-00AA004BA90B": "(http:) Asychronous Pluggable Protocol Handler",
    "79EAC9E3-BAF9-11CE-8C82-00AA004BA90B": "(ftp:) Asychronous Pluggable Protocol Handler",
    "79EAC9E5-BAF9-11CE-8C82-00AA004BA90B": "(https:) Asychronous Pluggable Protocol Handler",
    "79EAC9E6-BAF9-11CE-8C82-00AA004BA90B": "(mk:) Asychronous Pluggable Protocol Handler",
    "79EAC9E7-BAF9-11CE-8C82-00AA004BA90B": "(file:, local:) Asychronous Pluggable Protocol Handler",
    "7AABBB95-79BE-4C0F-8024-EB6AF271231C": "AutoCAD 2007-2009 Document",
    "85131630-480C-11D2-B1F9-00C04F86C324": "JSFile.HostEncode",
    "85131631-480C-11D2-B1F9-00C04F86C324": "VBSFile.HostEncode",
    "8627E73B-B5AA-4643-A3B0-570EDA17E3E7": "UmOutlookAddin.ButtonBar (Known Related to CVE-2016-0042)",
    "88D96A0C-F192-11D4-A65F-0040963251E5": "Msxml2.SAXXMLReader.6.0 (Known Related to CVE-2022-30190)",
    "8E75D913-3D21-11D2-85C4-080009A0C626": "AutoCAD 2004-2006 Document",
    "975797FC-4E2A-11D0-B702-00C04FD8DBF7": "Loads ELSEXT.DLL (Known Related to CVE-2015-6128)",
    "A08A033D-1A75-4AB6-A166-EAD02F547959": "otkloadr.WRAssembly  (Known Related to CVE-2015-1641)",
    "B54F3741-5B07-11CF-A4B0-00AA004A55E8": "VBS, VBScript",
    "BDD1F04B-858B-11D1-B16A-00C0F0283628": "MSCOMCTL.ListViewCtrl (Known Related to CVE-2012-0158)",
    "C08AFD90-F2A1-11D1-8455-00A0C91F3880": "ShellBrowserWindow",
    "C62A69F0-16DC-11CE-9E98-00AA00574A4F": "Forms.Form",
    "C74190B6-8589-11D1-B16A-00C0F0283628": "MSCOMCTL.TreeCtrl (Known Related to CVE-2012-0158)",
    "CCD068CD-1260-4AEA-B040-A87974EB3AEF": "UmOutlookAddin.RoomsCTP (Known Related to CVE-2016-0042)",
    "CDDBCC7C-BE18-4A58-9CBF-D62A012272CE": "osf.Sandbox (Known Related To CVE-2015-1770)",
    "CDF1C8AA-2D25-43C7-8AFE-01F73A3C66DA": "UmOutlookAddin.InspectorContext (Known Related to CVE-2016-0042)",
    "D27CDB6E-AE6D-11CF-96B8-444553540000": "Shockwave Flash Object",
    "D50FED35-0A08-4B17-B3E0-A8DD0EDE375D": "UmOutlookAddin.PlayOnPhoneDlg (Known Related to CVE-2016-0042)",
    "D70E31AD-2614-49F2-B0FC-ACA781D81F3E": "AutoCAD 2010-2012 Document",
    "D93CE8B5-3BF8-462C-A03F-DED2730078BA": "Loads WUAEXT.DLL (Known Related to CVE-2015-6128)",
    "DD9DA666-8594-11D1-B16A-00C0F0283628": "MSCOMCTL.ImageComboCtrl (Known Related to CVE-2014-1761)",
    "E5CA59F5-57C4-4DD8-9BD6-1DEEEDD27AF4": "InkEd.InkEdit",
    "E8CC4CBF-FDFF-11D0-B865-00A0C9081C1D": "Loads OCI.DLL (Known Related to CVE-2015-6128)",
    "ECABAFC9-7F19-11D2-978E-0000F8757E2A": "Loads MQRT.DLL (Known Related to CVE-2015-6128)",
    "ECF44975-786E-462F-B02A-CBCCB1A2C4A2": "UmOutlookAddin.FormRegionContext (Known Related to CVE-2016-0042)",
    "F414C260-6AC0-11CF-B6D1-00AA00BBBB58": "ECMAScript, JavaScript, JScript, LiveScript",
    "F959DBBB-3867-41F2-8E5F-3B8BEFAA81B3": "UmOutlookAddin.FormRegionAddin (Known Related to CVE-2016-0042)",
}

def toprintable(s):
    return "".join(map(lambda c: c in string.printable and c or c != "\x00" and "\\x{:02x}".format(ord(c)) or "", s))

def align(val, what, down=False):
    if val % what:
        if down:
            val -= val % what
        else:
            val += what - (val % what)
    return val

class FileHeader(Struct):

    def parse(self):
        yield Bytes(8, name="Signature")
        yield GUID(name="CLSID", comment="must be set to zero")
        yield UInt16(name="MinorVersion", comment="version number for nonbreaking changes. This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004")
        yield UInt16(name="MajorVersion", comment="version number for breaking changes. This field MUST be set to either 0x0003 (version 3) or 0x0004 (version 4)")
        bo = yield UInt16(name="ByteOrder", comment="this field MUST be set to 0xFFFE. This field is a byte order mark for all integer fields, specifying little-endian byte order")
        if bo not in (0xFFFE, 0xFEFF):
            raise FatalError("Invalid byte order")
        ss = yield UInt16(name="SectorShift", comment="this field MUST be set to 0x0009, or 0x000c, depending on the Major Version field. This field specifies the sector size of the compound file as a power of 2")
        if ss not in (9, 12):
            raise FatalError("Invalid sector shift")
        yield UInt16(name="MiniSectorShift", comment="this field MUST be set to 0x0006. This field specifies the sector size of the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes")
        yield Unused(6, name="Reserved")
        yield UInt32(name="NumberOfDirectorySectors", comment="the count of the number of directory sectors in the compound file")
        yield UInt32(name="NumberOfFATSectors", comment="contains the count of the number of FAT sectors in the compound file")
        yield UInt32(name="FirstDirectorySector", comment="the starting sector number for the directory stream")
        yield UInt32(name="TransactionSignatureNumber", comment="MAY contain a sequence number that is incremented every time the compound file is saved by an implementation that supports file transactions. This is the field that MUST be set to all zeroes if file transactions are not implemented")
        yield UInt32(name="MiniStreamCutoffSize", comment="MUST be set to 0x00001000. This field specifies the maximum size of a user-defined data stream that is allocated from the mini FAT and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than or equal to this cutoff size must be allocated as normal sectors from the FAT")
        yield UInt32(name="FirstMiniFATSector", comment="contains the starting sector number for the mini FAT")
        yield UInt32(name="NumberOfMiniFATSectors", comment="contains the count of the number of mini FAT sectors in the compound file")
        yield UInt32(name="FirstDIFATSector", comment="contains the starting sector number for the DIFAT")
        yield UInt32(name="NumberOfDIFATSectors", comment="contains the count of the number of DIFAT sectors in the compound file")


class DirectoryEntry(Struct):

    def parse(self):        
        yield StringUtf16le(32, name="Name", zero_terminated=True, comment="directory name")
        yield UInt16(name="NameLength", comment="directory name length including zero terminator")
        yield UInt8(name="ObjectType", values=[
            ("Unallocated", 0),
            ("Storage", 1),
            ("Stream", 2),
            ("Root", 5),
        ])
        yield UInt8(name="NodeType", comment="Color in the red-black tree", values=[
            ("Red", 0),
            ("Black", 1),
        ])
        yield UInt32(name="LeftSibling", comment="contains the stream ID of the left sibling. If there is no left sibling, the field MUST be set to NOSTREAM (0xFFFFFFFF)")
        yield UInt32(name="RightSibling", comment="contains the stream ID of the right sibling. If there is no left sibling, the field MUST be set to NOSTREAM (0xFFFFFFFF)")
        yield UInt32(name="Child", comment="contains the stream ID of a child object. If there is no child object, including all entries for stream objects, the field MUST be set to NOSTREAM (0xFFFFFFFF)")
        yield GUID(name="CLSID", comment="object class for root / storege dirs")
        yield UInt32(name="State", comment="contains the user-defined flags if this entry is for a storage object or root storage object")
        yield Filetime(name="CreationTime", comment="creation time for a storage object (FILETIME struct)")
        yield Filetime(name="ModificationTime", comment="modification time for a storage object (FILETIME struct)")
        yield UInt32(name="StartingSector", comment="contains the first sector location if this is a stream object. For a root storage object, this field MUST contain the first sector of the mini stream, if the mini stream exists. For a storage object, this field MUST be set to all zeroes")
        if self.parser.sector_size == 512:
            yield UInt32(name="StreamSize", comment="contains the size of the user-defined data if this is a stream object. For a root storage object, this field contains the size of the mini stream. For a storage object, this field MUST be set to all zeroes")
            yield Unused(4, name="StreamSizeHigh", comment="SizeHigh, should be set to 0 or 0xffffffff")
        else:
            yield UInt64(name="StreamSize", comment="contains the size of the user-defined data if this is a stream object. For a root storage object, this field contains the size of the mini stream. For a storage object, this field MUST be set to all zeroes")






class CFBAnalyzer(FileTypeAnalyzer):
    category = malcat.FileType.DOCUMENT
    name = "CFB"
    regexp = r"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"

    def __init__(self):
        FileTypeAnalyzer.__init__(self)
        self.header = None
        self.minifat_threshold = None
        self.sector_size = None
        self.minisector_size = None
        self.minifats = []
        self.ministream_first_sector = None
        self.sectors = 0
        self.fats = []
        self.difats = []
        self.directory = []
        self.vba_modules = []
        self.msi_db = None
        self.filesystem = {}
        self.codec = None
        self.fragmented = False

    def decode_stream_name(self, name):
        def mime2char(val):
            if val < 10:
                return chr(ord("0") + val)
            elif val < 10 + 26:
                return chr(ord("A") + val - 10)
            elif val < 10 + 26 + 26:
                return chr(ord("a") + val - 10 - 26)
            elif val == 10 + 26 + 26:
                return "."
            else:
                return "_"
        r = ""
        is_msi = self.type == "MSI Installer"
        for c in name:
            c = ord(c)
            if c == 0:
                break
            if c in (1, 3, 5, 0x4840):
                pass
            elif c >= 0x3800 and c < 0x4800 and is_msi:
                c = c - 0x3800
                r += mime2char(c & 0x3f)
                r += mime2char((c >> 6) & 0x3f)
            elif c >= 0x4800 and c < 0x4840 and is_msi:
                r += mime2char(c - 0x4800)
            else:
                r += chr(c)
        return r

    def iter_file(self, first_sector, size, force_FAT=False):
        sector = first_sector
        if size is None or size >= self.minifat_threshold or force_FAT:
            sector_size = self.sector_size
        else:
            sector_size = self.minisector_size
        left = size
        while left > 0 and sector <= 0xFFFFFFFA:
            off = self.sector2offset(sector, size, force_FAT=force_FAT)
            yield off, min(left, sector_size)
            left -= sector_size
            sector = self.sector2next(sector, size, force_FAT=force_FAT)

    def sector2offset(self, sector, size=None, force_FAT=False, raise_exception=True):
        if sector > 0xFFFFFFFA:
            raise FatalError("Invalid sector")
        if size is None or size >= self.minifat_threshold or force_FAT:
            if sector >= self.sectors:
                if raise_exception:
                    raise FatalError("FAT sector too big: {:x}".format(sector))
                else:
                    return 0
            return (sector + 1) * self.sector_size
        else:
            if not self.minifats:
                raise FatalError("No mini FAT")
            mini_offset = sector * self.minisector_size
            sect = self.ministream_first_sector
            for i in range(mini_offset // self.sector_size):
                sect = self.sector2next(sect)
            if sect > 0xFFFFFFFA:
                raise FatalError("Could not get to ministream offset")
            return self.sector2offset(sect) + mini_offset % self.sector_size

    def fatsector2sector(self, fat_sector_index):
        if fat_sector_index < 109:
            return self.difats[0][fat_sector_index]
        else:
            fat_sector_index -= 109
            difat_entries = self.sector_size // 4 - 1   # -1 because last difat sector links to next difat sector
            difat_sector = fat_sector_index // difat_entries
            if difat_sector >= len(self.difats):
                raise FatalError("Fat sector above difat limits: {} vs {}".format(fat_sector_index + 109, 109 + difat_entries * len(self.difats)))
            return self.difats[1 + difat_sector][fat_sector_index % difat_entries]

    def sector2next(self, sector, size=None, force_FAT=False):
        if sector > 0xFFFFFFFA:
            raise FatalError("Invalid sector")
        if size is None or size >= self.minifat_threshold or force_FAT:
            fat_tables = self.fats
        else:
            fat_tables = self.minifats
        for fat in fat_tables:
            if sector < fat.count:
                return fat[sector]
            sector -= fat.count
        raise FatalError("No FAT Entry for sector {:x}".format(sector))

    
    def read_entry(self, entry):
        res = []
        for offset, sz in self.iter_file(entry["StartingSector"], entry["StreamSize"]):
            res.append(self.read(offset, sz))
        return b"".join(res)

    def get_entry_path(self, i, seen=None):
        if seen is None:
            seen = set()
        if i in seen:
            return [""]
        seen.add(i)
        if i is None or i >= len(self.directory):
            return ["<DETACHED>"]
        elif i == 0:
            return [""]
        else:
            name = self.decode_stream_name(self.directory[i]["Name"])
            return self.get_entry_path(self.parents.get(i, None), seen) + [name]
    
    def read_file(self, path):
        direntry = self.filesystem.get(path, None)
        if direntry is None:
            raise KeyError("No file named {}".format(path))
        return self.read_entry(direntry)

    def read_stream(self, vfile, password=None):
        return self.read_file(vfile.path)

    def unpack_stream(self, vfile, password=None):
        return self.unpack_buffer(self.read_stream(vfile))

    def unpack_buffer(self, buffer):
        from transforms.compress import OfficeRleUnpack
        return bytearray(OfficeRleUnpack().run(buffer))

    def decompile(self):
        return "No VBA found - nothing to decompile"

    def get_type(self):
        if self.directory:
            res = CFB_CLSIDs.get(self.directory[0]["CLSID"].upper(), None)
            if res:
                return res
            elif "/Workbook" in self.filesystem:
                return "Excel"
            elif "/Book" in self.filesystem:
                return "Excel"
            elif "/WordDocument" in self.filesystem:
                return "Word"
            return "<UnknownType>"


    def parse(self, hint):
        fh = yield FileHeader(category=Type.HEADER)
        self.add_section("FileHeader", fh.offset, fh.size)
        self.sector_size = pow(2, fh["SectorShift"])
        self.sectors = fh["NumberOfFATSectors"] * self.sector_size // 4
        self.minisector_size = pow(2, fh["MiniSectorShift"])
        self.minifat_threshold = fh["MiniStreamCutoffSize"]
        self.confirm()

        # difat
        cur_size = 109 * 4
        last_sector = -1
        difat = yield Array(109, UInt32(), name="DIFAT", comment="the first 109 FAT sector locations of the compound file", category=Type.FIXUP)
        self.difats.append(difat)
        start = difat.offset
        nextdifat = fh["FirstDIFATSector"]
        effective_num_difat_sectors = 0
        while nextdifat < 0xFFFFFFFA:
            effective_num_difat_sectors += 1
            self.jump(self.sector2offset(nextdifat))
            difat = yield Array(self.sector_size // 4, UInt32(), name="DIFAT", category=Type.FIXUP)
            self.difats.append(difat)
            if last_sector != nextdifat - 1:
                self.add_section("DIFAT", start, cur_size, r=False, discardable=True)
                cur_size = 0
                start = difat.offset
            cur_size += self.sector_size
            last_sector = nextdifat
            nextdifat = difat[difat.count - 1]
        if cur_size:
            self.add_section("DIFAT", start, cur_size, r=False, discardable=True)
        if effective_num_difat_sectors != fh["NumberOfDIFATSectors"]:
            raise FatalError("Invalid number of difat sectors: found {} sectors vs {} declared".format(effective_num_difat_sectors, fh["NumberOfDIFATSectors"]))

        # fat
        cur_size = 0
        last_sector = None
        for i in range(fh["NumberOfFATSectors"]):
            fat_sector = self.fatsector2sector(i)
            if fat_sector > 0xFFFFFFFA:
                raise FatalError("Invalid DIFAT (cannot reach FAT sector {:d})".format(i))
            if last_sector is None or last_sector != fat_sector - 1:
                if cur_size:
                    fat = yield Array(cur_size, UInt32(), name="FAT", category=Type.FIXUP)
                    self.add_section("FAT", fat.offset, fat.size, r=False, discardable=True)
                    self.fats.append(fat)
                cur_size = 0
                self.jump(self.sector2offset(fat_sector))
            cur_size += self.sector_size // 4
            last_sector = fat_sector
        if cur_size:
            fat = yield Array(cur_size, UInt32(), name="FAT", category=Type.FIXUP) 
            self.add_section("FAT", fat.offset, fat.size, r=False, discardable=True)
            self.fats.append(fat)

        self.confirm()

        # minifat
        mfsect = fh["FirstMiniFATSector"]
        last_sector = None
        cur_size = 0
        while mfsect <= 0xFFFFFFFA:
            if last_sector is None or last_sector != mfsect - 1:
                if cur_size:
                    mf = yield Array(cur_size, UInt32(), name="MiniFAT", category=Type.FIXUP)
                    self.add_section("MINIFAT", mf.offset, mf.size, r=False, discardable=True)
                    self.minifats.append(mf)
                cur_size = 0
                self.jump(self.sector2offset(mfsect))
            cur_size += self.sector_size // 4
            last_sector = mfsect
            mfsect = self.sector2next(mfsect)
        if cur_size:
            mf = yield Array(cur_size, UInt32(), name="MiniFAT", category=Type.FIXUP)
            self.add_section("MINIFAT", mf.offset, mf.size, r=False, discardable=True)
            self.minifats.append(mf)

        self.confirm()
            
        # directories
        dirs_per_sector = self.sector_size // 128
        last_sector = fh["FirstDirectorySector"]
        self.jump(self.sector2offset(last_sector))
        cur_size = dirs_per_sector
        start = None
        last = None
        blocks = []
        while True:
            nexts = self.sector2next(last_sector)
            if nexts != last_sector + 1:
                somedirs = yield Array(cur_size, DirectoryEntry(), name="Directories", category=Type.HEADER)
                self.add_section("Directory", somedirs.offset, somedirs.size, r=False, discardable=True)
                for el in somedirs:
                    self.directory.append(el)
                cur_size = 0
                if nexts > 0xFFFFFFFA:
                    break
                self.jump(self.sector2offset(nexts))
                self.fragmented = True
            cur_size += dirs_per_sector
            last_sector = nexts

        if cur_size:
            somedirs = yield Array(cur_size, DirectoryEntry(), name="Directories", category=Type.HEADER)
            self.add_section("Directory", somedirs.offset, somedirs.size, r=False, discardable=True)
            for el in somedirs:
                if el["ObjectType"] != 0:
                    self.directory.append(el)

        self.ministream_first_sector = self.directory[0]["StartingSector"]
        self.type = self.get_type()
        self.add_metadata("Type", self.type)

        #streams
        self.parents = {}
        self.filesystem = {}
        for i, entry in enumerate(self.directory):
            if entry["ObjectType"] == 0:
                continue
            if entry["Child"] != 0xffffffff:
                def walk_siblings(i, parent, seen):
                    if i >= len(self.directory) or i in seen:
                        return
                    seen.add(i)
                    self.parents[i] = parent
                    entry = self.directory[i]
                    if entry["LeftSibling"] != 0xffffffff:
                        walk_siblings(entry["LeftSibling"], parent, seen)
                    if entry["RightSibling"] != 0xffffffff:
                        walk_siblings(entry["RightSibling"], parent, seen)
                walk_siblings(entry["Child"], i, set())
            if i == 0 or entry["NameLength"] == 0 or entry["StreamSize"] == 0 or entry["StartingSector"] > 0xFFFFFFFA:
                continue
            if i == 0:
                fname = "MiniStream"
            else:
                fname = toprintable(self.decode_stream_name(entry["Name"]))
            start = None
            last = None
            blocks = []
            minifile =  entry["StreamSize"] < self.minifat_threshold
            try:
                for offset, sz in self.iter_file(entry["StartingSector"], entry["StreamSize"], force_FAT=(i==0)):
                    if last is not None and offset != last:
                        blocks.append((start, last-start))
                        start = None
                        self.fragmented = True
                    if start is None: 
                        start = offset
                    last = offset + sz
                if start is not None:
                    blocks.append((start, last-start))
                if len(blocks) ==  1:
                    start, sz = blocks[0]
                    self.jump(start)
                    if "SummaryInformation" in fname:
                        sname = self.decode_stream_name(entry["Name"])
                        yield Summary("Document" in sname, name=sname, category=Type.META)
                for block in blocks:
                    start, sz = block
                    if minifile:
                        ssz = align(sz, self.minisector_size)
                    else:
                        ssz = align(sz, self.sector_size)
                    self.add_section(fname, start, ssz)
            except ParsingError as e:
                print(e)
                continue
        self.confirm()

        SPECIAL_FILES = { }
        # parse filesystem
        for i, entry in enumerate(self.directory):
            if entry["ObjectType"] == 0 or entry["StartingSector"] > 0xFFFFFFFA:
                continue
            fpath = "/".join(self.get_entry_path(i))
            self.filesystem[fpath] = entry
            rawname = entry["Name"].lower().strip().replace("\x00", "")
            if rawname and ord(rawname[0]) < 10:
                if rawname == "\x01ole10native":
                    SPECIAL_FILES[fpath] = ("Office.OleNative", "read_stream")
                elif rawname == "\x01ole":
                    SPECIAL_FILES[fpath] = ("Office.OleStream", "read_stream")
                elif rawname == "\x01compobj":
                    SPECIAL_FILES[fpath] = ("Office.CompObj", "read_stream")
                elif rawname == "\x03objinfo":
                    SPECIAL_FILES[fpath] = ("Office.ObjInfo", "read_stream")
        

        # parse summaries
        for summary_path in "/SummaryInformation", "/DocumentSummaryInformation":
            if summary_path in self.filesystem:
                try:
                    data = self.read_entry(self.filesystem[summary_path])
                    fake_file = malcat.FileBuffer(data, summary_path)
                    parser = SummaryAnalyzer()
                    parser.run(fake_file, hint=summary_path[1:])
                    if parser.summary_metadata:
                        SPECIAL_FILES[summary_path] = ("Office.Summary", "read_stream")
                        for cat, d in parser.summary_metadata.items():
                            for k, v in d.items():
                                self.add_metadata(k, v, f"{summary_path[1:]}-{cat}")
                    if parser.codec and self.codec is None:
                        self.codec = parser.codec
                except ParsingError as e:
                    print(e)

        # type-specific parsing
        try:
            if self.type == "MSI Installer":
                self.set_architecture(malcat.Architecture.MSI)
                # parse msi tables 
                self.msi_db = parse_msi(self)
                self.decompile = lambda: decompile_msi(self)
            else:
                # parse vba dir
                self.set_architecture(malcat.Architecture.VBA)
                vbadir, self.vba_modules, meta = parse_vba(self)
                for k, v in meta.items():
                    self.add_metadata(k, v, category="VbaProject")
                if vbadir:
                    SPECIAL_FILES[vbadir] = ("Office.VbaDir", "unpack_stream")
                self.decompile = lambda: decompile_vba(self)

        except ParsingError as e:
            print(e)

        # add vfiles
        for path, entry in self.filesystem.items():
            if entry["ObjectType"] != 2:
                continue
            subtype, operation = SPECIAL_FILES.get(path, ("", "read_stream"))
            self.add_file(path, entry["StreamSize"], operation, subtype)




##################################################################
