Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 63 additions & 34 deletions pyOneNote/FileNode.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import uuid
import struct
from datetime import datetime, timedelta
Expand All @@ -13,21 +14,36 @@ def __init__(self, file):

class FileNodeList:
def __init__(self, file, document, file_chunk_reference):
file.seek(file_chunk_reference.stp)
self.file = file
self.document = document
self.file_chunk_reference = file_chunk_reference
self.end = file_chunk_reference.stp + file_chunk_reference.cb
self.fragments = []
# self.fragments = []

def __iter__(self):
# FileNodeList can contain one or more FileNodeListFragment
while True:
section_end = file_chunk_reference.stp + file_chunk_reference.cb
fragment = FileNodeListFragment(file, document, section_end)
self.fragments.append(fragment)
if fragment.nextFragment.isFcrNil():
break
file_chunk_reference = fragment.nextFragment
file.seek(fragment.nextFragment.stp)
self.section_start = self.file_chunk_reference.stp
self.section_end = self.file_chunk_reference.stp + self.file_chunk_reference.cb
self.is_iter_end = False
return self

def __next__(self):
if self.is_iter_end:
raise StopIteration

self.file.seek(self.section_start)
fragment = FileNodeListFragment(self.file, self.document, self.section_end)
# fragment = FileNodeListFragment(self.file, self.document, self.section_end)

self.section_start = fragment.nextFragment.stp
self.section_end = fragment.nextFragment.stp + fragment.nextFragment.cb

if fragment.nextFragment.isFcrNil():
self.is_iter_end = True
return fragment


# orig
class FileNodeListFragment:
def __init__(self, file, document, end):
self.fileNodes = []
Expand Down Expand Up @@ -103,11 +119,11 @@ def __init__(self, file):

class FileNode:
count = 0

def __init__(self, file, document):
self.document= document
self.document = document
self.file_node_header = FileNodeHeader(file)
if DEBUG:
print(str(file.tell()) + ' ' + self.file_node_header.file_node_type + ' ' + str(self.file_node_header.baseType))
logging.debug(str(file.tell()) + ' ' + self.file_node_header.file_node_type + ' ' + str(self.file_node_header.baseType)) # potential error
self.children = []
FileNode.count += 1
if self.file_node_header.file_node_type == "ObjectGroupStartFND":
Expand Down Expand Up @@ -174,7 +190,7 @@ def __init__(self, file, document):
# no data part
self.data = None
else:
p = 1
pass

current_offset = file.tell()
if self.file_node_header.baseType == 2:
Expand Down Expand Up @@ -382,7 +398,6 @@ def __init__(self, file, file_node_header):
self.guidReference, = struct.unpack('<16s', file.read(16))
self.guidReference = uuid.UUID(bytes_le=self.guidReference)
current_offset = file.tell()
file.seek(self.ref.stp)
self.fileDataStoreObject = FileDataStoreObject(file, self.ref)
file.seek(current_offset)

Expand Down Expand Up @@ -471,17 +486,17 @@ def __init__(self, file, document):

def __str__(self):
return '<ExtendedGUID> ({}, {})'.format(
self.document._global_identification_table[self.current_revision][self.guidIndex],
self.n)
self.document._global_identification_table[self.current_revision][self.guidIndex],
self.n)

def __repr__(self):
return '<ExtendedGUID> ({}, {})'.format(
self.document._global_identification_table[self.current_revision][self.guidIndex],
self.n)
self.document._global_identification_table[self.current_revision][self.guidIndex],
self.n)


class JCID:
_jcid_name_mapping= {
_jcid_name_mapping = {
0x00120001: "jcidReadOnlyPersistablePropertyContainerForAuthor",
0x00020001: "jcidPersistablePropertyContainerForTOC",
0x00020001: "jcidPersistablePropertyContainerForTOCSection",
Expand Down Expand Up @@ -544,15 +559,32 @@ def __str__(self):

class FileDataStoreObject:
def __init__(self, file, fileNodeChunkReference):
file.seek(fileNodeChunkReference.stp)
self.guidHeader, self.cbLength, self.unused, self.reserved = struct.unpack('<16sQ4s8s', file.read(36))
self.FileData, = struct.unpack('{}s'.format(self.cbLength), file.read(self.cbLength))
self.content_pos = file.tell()
file.seek(fileNodeChunkReference.stp + fileNodeChunkReference.cb - 16)
self.guidFooter, = struct.unpack('16s', file.read(16))
self.guidFooter = file.read(16)
self.guidHeader = uuid.UUID(bytes_le=self.guidHeader)
self.guidFooter = uuid.UUID(bytes_le=self.guidFooter)
self.file = file

def readinto(self, dst, chunk_size=4096):
self.file.seek(self.content_pos)

while True:
chunk = self.file.read(chunk_size)
if not chunk:
break
dst.write(chunk)

def read_content(self):
self.file.seek(self.content_pos)
return self.file.read()

def __str__(self):
return self.FileData[:128].hex()
self.file.seek(self.content_pos)
chunk_128 = self.file.read(128)
return chunk_128.hex()


class ObjectSpaceObjectPropSet:
Expand Down Expand Up @@ -651,9 +683,8 @@ def get_compact_ids(stream_of_context_ids, count):
data.append(stream_of_context_ids.read())
return data


def get_properties(self):
if self._formated_properties is not None :
if self._formated_properties is not None:
return self._formated_properties

self._formated_properties = {}
Expand All @@ -670,7 +701,7 @@ def get_properties(self):
except:
propertyVal = self.rgData[i].Data.hex()
else:
property_name_lower = propertyName.lower()
property_name_lower = propertyName.lower()
if 'time' in property_name_lower:
if len(self.rgData[i]) == 8:
timestamp_in_nano, = struct.unpack('<Q', self.rgData[i])
Expand All @@ -685,10 +716,10 @@ def get_properties(self):
size, = struct.unpack('<f', self.rgData[i])
propertyVal = PropertySet.half_inch_size_to_pixels(size)
elif 'langid' in property_name_lower:
lcid, =struct.unpack('<H', self.rgData[i])
lcid, = struct.unpack('<H', self.rgData[i])
propertyVal = '{}({})'.format(PropertySet.lcid_to_string(lcid), lcid)
elif 'languageid' in property_name_lower:
lcid, =struct.unpack('<I', self.rgData[i])
lcid, = struct.unpack('<I', self.rgData[i])
propertyVal = '{}({})'.format(PropertySet.lcid_to_string(lcid), lcid)
else:
if isinstance(self.rgData[i], list):
Expand All @@ -698,14 +729,13 @@ def get_properties(self):
self._formated_properties[propertyName] = propertyVal
return self._formated_properties


def __str__(self):
result = ''
for propertyName, propertyVal in self.get_properties().items():
result += '{}{}: {}\n'.format(self.indent, propertyName, propertyVal)
return result

[staticmethod]
@staticmethod
def half_inch_size_to_pixels(picture_width, dpi=96):
# Number of pixels per half-inch
pixels_per_half_inch = dpi / 2
Expand All @@ -715,7 +745,7 @@ def half_inch_size_to_pixels(picture_width, dpi=96):

return int(pixels)

[staticmethod]
@staticmethod
def time32_to_datetime(time32):
# Define the starting time (12:00 A.M., January 1, 1980, UTC)
start = datetime(1980, 1, 1, 0, 0, 0)
Expand All @@ -728,8 +758,7 @@ def time32_to_datetime(time32):

return dt


[staticmethod]
@staticmethod
def parse_filetime(filetime):
# Define the number of 100-nanosecond intervals in 1 second
intervals_per_second = 10 ** 7
Expand All @@ -748,7 +777,7 @@ def parse_filetime(filetime):

return dt

[staticmethod]
@staticmethod
def lcid_to_string(lcid):
return locale.windows_locale.get(lcid, 'Unknown LCID')

Expand Down
9 changes: 4 additions & 5 deletions pyOneNote/Header.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(self, file):
self.guidLegacyFileVersion = uuid.UUID(bytes_le=self.guidLegacyFileVersion)
self.guidFileFormat = uuid.UUID(bytes_le=self.guidFileFormat)
self.guidAncestor = uuid.UUID(bytes_le=self.guidAncestor)
self.guidFileVersion = uuid.UUID(bytes_le=self.guidFileVersion )
self.guidFileVersion = uuid.UUID(bytes_le=self.guidFileVersion)
self.guidDenyReadFileVersion = uuid.UUID(bytes_le=self.guidDenyReadFileVersion)

self.fcrHashedChunkList = FileChunkReference64x32(self.fcrHashedChunkList)
Expand All @@ -108,17 +108,16 @@ def __init__(self, file):
self.fcrLegacyTransactionLog = FileChunkReference32(self.fcrLegacyTransactionLog)
self.fcrLegacyFileNodeListRoot = FileChunkReference32(self.fcrLegacyFileNodeListRoot)


def convert_to_dictionary(self):
res = {}
for key, item in self.__dict__.items():
if not key.startswith('_') and not key == 'rgbReserved':
if isinstance(item, uuid.UUID):
res[key] = str(item)
elif isinstance(item, FileChunkReference64x32) or \
isinstance(item, FileChunkReference32) or \
isinstance(item, FileNodeChunkReference):
isinstance(item, FileChunkReference32) or \
isinstance(item, FileNodeChunkReference):
res[key] = str(item)
else:
res[key] = item
return res
return res
67 changes: 46 additions & 21 deletions pyOneNote/Main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pyOneNote.Header import *
from pyOneNote.FileNode import *
from io import FileIO

from pyOneNote.OneDocument import *
import math
import sys
Expand All @@ -8,7 +8,10 @@
import argparse
import json

log = logging.getLogger()
log = logging.getLogger("pyOneNoteLogger")
log.setLevel(logging.DEBUG)
logging.basicConfig(format='%(message)s')


def check_valid(file):
if file.read(16) in (
Expand All @@ -35,7 +38,7 @@ def process_onenote_file(file, output_dir, extension, json_output):

print('\n\nProperties\n####################################################################')
indent = '\t'
file_metadata ={}
file_metadata = {}
for propertySet in data['properties']:
print('{}{}({}):'.format(indent, propertySet['type'], propertySet['identity']))
if propertySet['type'] == "jcidEmbeddedFileNode":
Expand All @@ -45,42 +48,62 @@ def process_onenote_file(file, output_dir, extension, json_output):
if 'PictureContainer' in propertySet['val']:
file_metadata[propertySet['val']['PictureContainer'][0]] = propertySet['val']


for property_name, property_val in propertySet['val'].items():
print('{}{}: {}'.format(indent+'\t', property_name, str(property_val)))
print('{}{}: {}'.format(indent + '\t', property_name, str(property_val)))
print("")

print('\n\nEmbedded Files\n####################################################################')
indent = '\t'
for name, file in data['files'].items():
print('{}{} ({}):'.format(indent, name, file['identity']))
print('\t{}Extension: {}'.format(indent, file['extension']))
if(file['identity'] in file_metadata):
if file['identity'] in file_metadata:
for property_name, property_val in file_metadata[file['identity']].items():
print('{}{}: {}'.format(indent+'\t', property_name, str(property_val)))
print('{}'.format( get_hex_format(file['content'][:256], 16, indent+'\t')))
print('{}{}: {}'.format(indent + '\t', property_name, str(property_val)))
print('{}'.format(get_hex_format(file['content'][:256], 16, indent + '\t')))

if extension and not extension.startswith("."):
extension = "." + extension

counter = 0
for file_guid, file in document.get_files().items():
for file_guid, file in document.get_files():
with open(
os.path.join(output_dir,
"file_{}{}{}".format(counter, file["extension"], extension)), "wb"
) as output_file:
output_file.write(file["content"])
file["content"].readinto(output_file)
counter += 1

return json.dumps(data)


def process_onenote_file_v2(file, output_dir, extension):
if not check_valid(file):
log.error("please provide valid One file")
exit()

file.seek(0)
document = OneDocment(file)

if extension and not extension.startswith("."):
extension = "." + extension

for file_guid, file in document.get_files():
file_extension = file["extension"]
file_content = file["content"]
with open(
os.path.join(output_dir,
"{}{}{}".format(file_guid, file_extension, extension)), "wb"
) as output_file:
file_content.readinto(output_file)


def get_hex_format(hex_str, col, indent):
res = ''
chars = (col*2)
for i in range(math.ceil( len(hex_str)/chars)):
segment = hex_str[i*chars: (i+1)*chars]
res += indent + ' '.join([segment[i:i+2] for i in range(0, len(segment), 2)]) +'\n'
chars = (col * 2)
for i in range(math.ceil(len(hex_str) / chars)):
segment = hex_str[i * chars: (i + 1) * chars]
res += indent + ' '.join([segment[i:i + 2] for i in range(0, len(segment), 2)]) + '\n'
return res


Expand All @@ -89,18 +112,20 @@ def main():
p.add_argument("-f", "--file", action="store", help="File to analyze", required=True)
p.add_argument("-o", "--output-dir", action="store", default="./", help="Path where store extracted files")
p.add_argument("-e", "--extension", action="store", default="", help="Append this extension to extracted file(s)")
p.add_argument("-j", "--json", action="store_true", default=False, help="Generate JSON output only, no dumps or prints")
p.add_argument("-j", "--json", action="store_true", default=False, help="Generate JSON output only, no dumps or log.infos")
p.add_argument("--optimize", action="store_true", default=False, help="Optimize memory usage")

args = p.parse_args()

if not os.path.exists(args.file):
sys.exit("File: %s doesn't exist", args.file)

with open(args.file, "rb") as file:
process_onenote_file(file, args.output_dir, args.extension, args.json)

with FileIO(args.file, 'rb') as file:
if args.optimize:
process_onenote_file_v2(file, args.output_dir, args.extension)
else:
process_onenote_file(file, args.output_dir, args.extension, args.json)


if __name__ == "__main__":
main()


Loading