diff --git a/pyOneNote/FileNode.py b/pyOneNote/FileNode.py index a5f04d0..0269763 100644 --- a/pyOneNote/FileNode.py +++ b/pyOneNote/FileNode.py @@ -1,3 +1,4 @@ +import logging import uuid import struct from datetime import datetime, timedelta @@ -13,21 +14,36 @@ def __init__(self, file): class FileNodeList: def __init__(self, file, document, file_chunk_reference): - file.seek(file_chunk_reference.stp) + self.file = file + self.document = document + self.file_chunk_reference = file_chunk_reference self.end = file_chunk_reference.stp + file_chunk_reference.cb - self.fragments = [] + # self.fragments = [] + def __iter__(self): # FileNodeList can contain one or more FileNodeListFragment - while True: - section_end = file_chunk_reference.stp + file_chunk_reference.cb - fragment = FileNodeListFragment(file, document, section_end) - self.fragments.append(fragment) - if fragment.nextFragment.isFcrNil(): - break - file_chunk_reference = fragment.nextFragment - file.seek(fragment.nextFragment.stp) + self.section_start = self.file_chunk_reference.stp + self.section_end = self.file_chunk_reference.stp + self.file_chunk_reference.cb + self.is_iter_end = False + return self + + def __next__(self): + if self.is_iter_end: + raise StopIteration + + self.file.seek(self.section_start) + fragment = FileNodeListFragment(self.file, self.document, self.section_end) + # fragment = FileNodeListFragment(self.file, self.document, self.section_end) + + self.section_start = fragment.nextFragment.stp + self.section_end = fragment.nextFragment.stp + fragment.nextFragment.cb + if fragment.nextFragment.isFcrNil(): + self.is_iter_end = True + return fragment + +# orig class FileNodeListFragment: def __init__(self, file, document, end): self.fileNodes = [] @@ -103,11 +119,11 @@ def __init__(self, file): class FileNode: count = 0 + def __init__(self, file, document): - self.document= document + self.document = document self.file_node_header = FileNodeHeader(file) - if DEBUG: - print(str(file.tell()) + ' ' + self.file_node_header.file_node_type + ' ' + str(self.file_node_header.baseType)) + logging.debug(str(file.tell()) + ' ' + self.file_node_header.file_node_type + ' ' + str(self.file_node_header.baseType)) # potential error self.children = [] FileNode.count += 1 if self.file_node_header.file_node_type == "ObjectGroupStartFND": @@ -174,7 +190,7 @@ def __init__(self, file, document): # no data part self.data = None else: - p = 1 + pass current_offset = file.tell() if self.file_node_header.baseType == 2: @@ -382,7 +398,6 @@ def __init__(self, file, file_node_header): self.guidReference, = struct.unpack('<16s', file.read(16)) self.guidReference = uuid.UUID(bytes_le=self.guidReference) current_offset = file.tell() - file.seek(self.ref.stp) self.fileDataStoreObject = FileDataStoreObject(file, self.ref) file.seek(current_offset) @@ -471,17 +486,17 @@ def __init__(self, file, document): def __str__(self): return ' ({}, {})'.format( - self.document._global_identification_table[self.current_revision][self.guidIndex], - self.n) + self.document._global_identification_table[self.current_revision][self.guidIndex], + self.n) def __repr__(self): return ' ({}, {})'.format( - self.document._global_identification_table[self.current_revision][self.guidIndex], - self.n) + self.document._global_identification_table[self.current_revision][self.guidIndex], + self.n) class JCID: - _jcid_name_mapping= { + _jcid_name_mapping = { 0x00120001: "jcidReadOnlyPersistablePropertyContainerForAuthor", 0x00020001: "jcidPersistablePropertyContainerForTOC", 0x00020001: "jcidPersistablePropertyContainerForTOCSection", @@ -544,15 +559,32 @@ def __str__(self): class FileDataStoreObject: def __init__(self, file, fileNodeChunkReference): + file.seek(fileNodeChunkReference.stp) self.guidHeader, self.cbLength, self.unused, self.reserved = struct.unpack('<16sQ4s8s', file.read(36)) - self.FileData, = struct.unpack('{}s'.format(self.cbLength), file.read(self.cbLength)) + self.content_pos = file.tell() file.seek(fileNodeChunkReference.stp + fileNodeChunkReference.cb - 16) - self.guidFooter, = struct.unpack('16s', file.read(16)) + self.guidFooter = file.read(16) self.guidHeader = uuid.UUID(bytes_le=self.guidHeader) self.guidFooter = uuid.UUID(bytes_le=self.guidFooter) + self.file = file + + def readinto(self, dst, chunk_size=4096): + self.file.seek(self.content_pos) + + while True: + chunk = self.file.read(chunk_size) + if not chunk: + break + dst.write(chunk) + + def read_content(self): + self.file.seek(self.content_pos) + return self.file.read() def __str__(self): - return self.FileData[:128].hex() + self.file.seek(self.content_pos) + chunk_128 = self.file.read(128) + return chunk_128.hex() class ObjectSpaceObjectPropSet: @@ -651,9 +683,8 @@ def get_compact_ids(stream_of_context_ids, count): data.append(stream_of_context_ids.read()) return data - def get_properties(self): - if self._formated_properties is not None : + if self._formated_properties is not None: return self._formated_properties self._formated_properties = {} @@ -670,7 +701,7 @@ def get_properties(self): except: propertyVal = self.rgData[i].Data.hex() else: - property_name_lower = propertyName.lower() + property_name_lower = propertyName.lower() if 'time' in property_name_lower: if len(self.rgData[i]) == 8: timestamp_in_nano, = struct.unpack('{", "").replace("}", "") guid = guid.lower() - if not guid in self._files: - self._files[guid] = {"extension": "", "content": "", "identity": ""} - self._files[guid]["extension"] = node.data.Extension.StringData - self._files[guid]["identity"] = str(node.data.oid) - return self._files - + if not guid in files: + files[guid] = {"extension": "", "content": "", "identity": ""} + files[guid]["extension"] = node.data.Extension.StringData + files[guid]["identity"] = str(node.data.oid) + + for guid, file in files.items(): + yield guid, { + "extension": file["extension"], + "content": file["content"].data.fileDataStoreObject, + "identity": file["identity"] + } def get_global_identification_table(self): return self._global_identification_table def get_json(self): files_in_hex = {} - for key, file in self.get_files().items(): + for key, file in self.get_files(): files_in_hex[key] = {'extension': file['extension'], - 'content': file['content'].hex(), + 'content': file['content'].read_content().hex(), 'identity': file['identity']} res = { @@ -84,6 +88,3 @@ def get_json(self): def __str__(self): return '{}\n{}\n{}'.format(str(self.header), str(self.rootFileNode)) - - -