Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
864817b
DIS-1352 Add a thumbcache idx parser code
Miauwkeru Oct 27, 2022
6832a26
DIS-1352 Add testdata to git lfs
Miauwkeru Oct 27, 2022
3a4aafb
Add tools to extract thumbcache
Miauwkeru Oct 27, 2022
47030a9
Apply suggestions from code review
Miauwkeru Oct 27, 2022
b336264
Use a string instead of binary string
Miauwkeru Oct 27, 2022
73c050b
Add constants instead of magic numbers
Miauwkeru Oct 27, 2022
b8ff8ad
Add typehints
Miauwkeru Oct 27, 2022
ca4fc94
Add the missed constants and typehints
Miauwkeru Oct 27, 2022
45778e2
DIS-1352 Add correct readme
Miauwkeru Oct 28, 2022
09a1d4a
Add fix for test_thumbcache assertion
Miauwkeru Oct 28, 2022
d1e15c8
Add some user information to the tools
Miauwkeru Oct 28, 2022
bb3fcfd
Improve error handling
Miauwkeru Oct 28, 2022
4f400a4
Resolve an issue of unknown entries
Miauwkeru Oct 28, 2022
6c7ab88
Add an additional relation with the INDEX_HEADER_V2
Miauwkeru Oct 28, 2022
c2f49fe
Made the pytest results a bit more clear
Miauwkeru Oct 28, 2022
cbec47a
Apply suggestions from code review
Miauwkeru Oct 28, 2022
e64b193
Remove data argument from __init__
Miauwkeru Oct 28, 2022
a5fab7d
Rename instances of file to fh
Miauwkeru Oct 28, 2022
e94b03f
Apply suggestions from code review
Miauwkeru Oct 28, 2022
def18c5
Rename cstruct variables
Miauwkeru Nov 1, 2022
158c9b7
Use comments consistently in the c_thumbcache headers
Miauwkeru Nov 2, 2022
efc9caa
Use argparse.exit for error conditions
Miauwkeru Nov 2, 2022
b9a87ce
Add docs link to documentation
Miauwkeru Nov 2, 2022
90608c0
Define python_requires
Miauwkeru Nov 2, 2022
fedb801
Put important classes in __init__.py
Miauwkeru Nov 2, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.db filter=lfs diff=lfs merge=lfs -text
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# dissect.thumbcache
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the default template for the readme. See dissect.target for example (but you know ;))


This is a project to parse windows thumbcache.

## Windows vista+

The project currently only supports the windows vista+ indexed thumbcache. The windows xp format is currently not implemented.
79 changes: 79 additions & 0 deletions dissect/thumbcache/c_thumbcache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from dissect.cstruct import cstruct

c_thumbcache_index_def = """
struct INDEX_HEADER_V1 {
char signature[4];
uint32 version;
uint32 unknown1;
uint32 used_entries;
uint32 total_entries;
uint32 unknown2;
};
struct INDEX_HEADER_V2 {
char signature[8]; // 0x00
uint64 version; // 0x08
uint32 unknown1; // 0x10
uint32 used_entries; // 0x14
uint32 total_entries; // 0x18
uint32 unknown2; // 0x1B
}; // 0x20
struct VISTA_ENTRY {
char hash[8];
uint64 last_modified;
uint32 flags;
};
struct WINDOWS7_ENTRY {
char hash[8];
uint32 flags;
};
struct WINDOWS8_ENTRY {
char hash[8];
uint32 flags;
uint32 unknown; // Is sometims filled with information, couldn't figure out what it meant yet though.
}
struct CACHE_HEADER {
char signature[4];
uint32 version;
uint32 type;
uint32 size;
uint32 offset;
uint32 entries;
}
struct CACHE_HEADER_VISTA {
char signature[4];
uint32 version;
uint32 type;
uint32 offset;
uint32 size;
uint32 entries;
}
struct CACHE_ENTRY {
char signature[4];
uint32 size;
char hash[8];
uint32 identifier_size;
uint32 padding_size;
uint32 data_size;
uint32 _unknown3;
}
struct CACHE_ENTRY_VISTA {
char signature[4];
uint32 size;
char hash[8];
wchar extension[4];
uint32 identifier_size;
uint32 padding_size;
uint32 data_size;
uint32 _unknown3;
}
"""
c_thumbcache_index = cstruct()
c_thumbcache_index.load(c_thumbcache_index_def)
8 changes: 8 additions & 0 deletions dissect/thumbcache/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class NotAThumbnailIndexFileError(Exception):
pass


class InvalidSignatureError(Exception):
"""Gets raised if the signature does not match the expected value."""

pass
153 changes: 153 additions & 0 deletions dissect/thumbcache/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from __future__ import annotations

from datetime import datetime
from typing import BinaryIO, Iterator

from dissect.cstruct import Structure
from dissect.util import ts

from dissect.thumbcache.c_thumbcache import c_thumbcache_index
from dissect.thumbcache.exceptions import NotAThumbnailIndexFileError
from dissect.thumbcache.util import ThumbnailType

INDEX_ENTRIES = {
ThumbnailType.WINDOWS_7: 5,
ThumbnailType.WINDOWS_81: 11,
ThumbnailType.WINDOWS_10: 14,
ThumbnailType.WINDOWS_VISTA: 5,
}


class ThumbnailIndex:
_signature = b"IMMM"

def __init__(self, file: BinaryIO):
self.file = file
self._header = None

@property
def header(self):
if self._header is None:
self._header = self._find_header(self.file)
return self._header

def _find_header(self, file: BinaryIO) -> Structure:
"""Search for the header signature, and puts ``file`` at the correct position.
From windows 8.1 onward, the two fields seem use a 64-bit format.
field inside the header with the value ``0C 00 30 20``.
Args:
file: The file to read the header and indexes from.
Returns:
A c_thumbcache_index.INDEX_HEADER structure.
Raises:
NotAThumbnailIndexFileError: If the ``IMMM`` signature could not be found.
"""
position = file.tell()
buffer = file.read(len(c_thumbcache_index.INDEX_HEADER_V1))
offset = buffer.find(self._signature)

if offset == 4:
file.seek(position)

header = c_thumbcache_index.INDEX_HEADER_V2(file)
# From looking at the index files, it has a specific amount of information
# Depending on the number of index_db files.
# TODO: see if it does anything interesting, or it might have something to do with the icon_cache.
additional_header_bytes = INDEX_ENTRIES.get(header.version) * 8
file.read(additional_header_bytes)
return header
elif offset == 0:
return c_thumbcache_index.INDEX_HEADER_V1(buffer)
else:
raise NotAThumbnailIndexFileError()

@property
def version(self) -> int:
return self.header.version

@property
def type(self) -> ThumbnailType:
return ThumbnailType(self.version)

@property
def total_entries(self) -> int:
return self.header.total_entries

@property
def used_entries(self) -> int:
return self.header.used_entries

def entries(self) -> Iterator[IndexEntry]:
"""Returns all index entries that are actually used."""
for _ in range(self.total_entries):
entry = IndexEntry(self.file, self.type)
entry.header
entry.cache_offsets

yield entry


class IndexEntry:
def __init__(self, file: BinaryIO, type: ThumbnailType, data=[]) -> None:
self.file = file
self.type = type
self._header = None
self._data = None

@property
def header(self):
if not self._header:
self._header = self._select_header()
return self._header

def _select_header(self):
"""Selects header version according to the thumbnailtype."""
if self.type == ThumbnailType.WINDOWS_VISTA:
return c_thumbcache_index.VISTA_ENTRY(self.file)
elif self.type == ThumbnailType.WINDOWS_7:
return c_thumbcache_index.WINDOWS7_ENTRY(self.file)
else:
return c_thumbcache_index.WINDOWS8_ENTRY(self.file)

def in_use(self) -> bool:
return self.identifier != b"\x00" * 8

@property
def identifier(self) -> bytes:
return self.header.hash

@property
def flags(self) -> int:
return self.header.flags

@property
def cache_offsets(self) -> list[int]:
"""Retrieve the index data entries.
These are offsets into the thumbcache files, where the order specifies in which of the files.
More information about the order can be found in :class:`Thumbcache`.
"""
if not self._data:
size = INDEX_ENTRIES.get(self.type)
self._data = c_thumbcache_index.uint32[size](self.file)
if self.type > ThumbnailType.WINDOWS_7:
# Alignment step
self.file.read((size % 2) * 4)
return self._data

@property
def last_modified(self) -> datetime:
if self.type == ThumbnailType.WINDOWS_VISTA:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just out of curiosity: Only Vista contains a "last modified" entry in the header?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it was only contained in its initial release on windows vista

return ts.wintimestamp(self._header.last_modified)
return None

def __repr__(self) -> str:
return (
f"identifier={self.identifier.hex()} flags={hex(self.flags)} "
f"cache_offsets={[hex(x) for x in self.cache_offsets]}"
)
66 changes: 66 additions & 0 deletions dissect/thumbcache/thumbcache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from pathlib import Path
from typing import Iterator

from dissect.thumbcache.index import IndexEntry, ThumbnailIndex
from dissect.thumbcache.thumbcache_file import ThumbcacheEntry, ThumbcacheFile


class Thumbcache:
"""This class combines the thumbnailindex and thumbcachefile together.
The class looks up all files inside ``path`` that have the same ``prefix``.
Args:
path: The directory that contains the thumbcache files.
prefix: The start of the name to search for.
"""

def __init__(self, path: Path, prefix="thumbcache") -> None:
self._mapping: dict[str, Path] = {}
self.index_file, self.cache_files = self._populate_files(path, prefix)

def _populate_files(self, path: Path, prefix: str) -> tuple[Path, list[Path]]:
cache_files = []
index_file = None
for file in path.glob(f"{prefix}*"):
if file.name.endswith("_idx.db"):
index_file = file
else:
cache_files.append(file)
return index_file, cache_files

@property
def mapping(self) -> dict[int, Path]:
"""Looks at the version field in the cache file header."""
if not self._mapping:
for file in self.cache_files:
with file.open("rb") as cache_file:
t_file = ThumbcacheFile(cache_file)
key = t_file.type
self._mapping.update({key: file})
return self._mapping

def entries(self) -> Iterator[tuple[Path, ThumbcacheEntry]]:
"""Iterates through all the specific entries from the thumbcache files."""
used_entries = list(self.index_entries())

for entry in used_entries:
yield from self._entries_from_offsets(entry.cache_offsets)

def index_entries(self) -> Iterator[IndexEntry]:
"""Iterates through all the index entries that are in use"""
with self.index_file.open("rb") as i_file:
for entry in ThumbnailIndex(i_file).entries():
if entry.in_use():
yield entry

def _entries_from_offsets(self, offsets: list[int]) -> Iterator[tuple[Path, ThumbcacheEntry]]:
"""Retrieve Thumbcache entries from a ThumbcacheFile using offsets."""
for idx, offset in enumerate(offsets):
if offset == 0xFFFFFFFF:
continue

cache_path = self.mapping.get(idx)

with cache_path.open("rb") as cache_file:
yield cache_path, ThumbcacheFile(cache_file)[offset]
Loading