From 9892f4488961e1668af72adfa573922df0378a93 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Thu, 19 Feb 2026 20:41:12 +0100 Subject: [PATCH 01/34] fix(archive): sharpcompress-1.0 api-kompatibilitaet absichern --- .../Infrastructure/ArchiveInternals.vb | 61 +++++++------------ .../Support/SharpCompressApiCompat.cs | 20 ++++-- ...SharpCompressEntryModelNonNullUnitTests.cs | 1 + 3 files changed, 36 insertions(+), 46 deletions(-) diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index b9faba97..1e56304d 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -11,8 +11,6 @@ Option Strict On Option Explicit On Imports System.IO -Imports System.Reflection -Imports System.Security Namespace Global.Tomtastisch.FileClassifier ''' @@ -129,12 +127,9 @@ Namespace Global.Tomtastisch.FileClassifier Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenArchiveFactoryCompat(stream, options) - Catch ex As TargetInvocationException When IsExpectedInvocationException(ex) - Return Nothing - Catch ex As TargetInvocationException - Throw Catch ex As Exception When _ TypeOf ex Is MissingMethodException OrElse + TypeOf ex Is System.Reflection.TargetInvocationException OrElse TypeOf ex Is InvalidOperationException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException @@ -166,12 +161,9 @@ Namespace Global.Tomtastisch.FileClassifier Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenGZipArchiveCompat(stream, options) - Catch ex As TargetInvocationException When IsExpectedInvocationException(ex) - Return Nothing - Catch ex As TargetInvocationException - Throw Catch ex As Exception When _ TypeOf ex Is MissingMethodException OrElse + TypeOf ex Is System.Reflection.TargetInvocationException OrElse TypeOf ex Is InvalidOperationException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException @@ -179,17 +171,6 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function IsExpectedInvocationException(ex As TargetInvocationException) As Boolean - Dim inner = ex?.InnerException - If inner Is Nothing Then Return False - - Return TypeOf inner Is InvalidOperationException OrElse - TypeOf inner Is NotSupportedException OrElse - TypeOf inner Is ArgumentException OrElse - TypeOf inner Is InvalidDataException OrElse - TypeOf inner Is IOException - End Function - Private Shared Function OpenArchiveFactoryCompat( stream As Stream, options As SharpCompress.Readers.ReaderOptions @@ -210,15 +191,15 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function GetOpenCompatMethod(type As Type) As System.Reflection.MethodInfo Dim signature = New Type() {GetType(Stream), GetType(SharpCompress.Readers.ReaderOptions)} - Dim method = type.GetMethod("OpenArchive", BindingFlags.Public Or - BindingFlags.Static, + Dim method = type.GetMethod("OpenArchive", System.Reflection.BindingFlags.Public Or + System.Reflection.BindingFlags.Static, binder:=Nothing, types:=signature, modifiers:=Nothing) If method IsNot Nothing Then Return method - method = type.GetMethod("Open", BindingFlags.Public Or - BindingFlags.Static, + method = type.GetMethod("Open", System.Reflection.BindingFlags.Public Or + System.Reflection.BindingFlags.Static, binder:=Nothing, types:=signature, modifiers:=Nothing) @@ -247,7 +228,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -287,7 +268,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -302,7 +283,7 @@ Namespace Global.Tomtastisch.FileClassifier StreamGuard.RewindToStart(stream) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException OrElse @@ -411,7 +392,7 @@ Namespace Global.Tomtastisch.FileClassifier Return entries.AsReadOnly() Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -449,7 +430,7 @@ Namespace Global.Tomtastisch.FileClassifier destinationFull = Path.GetFullPath(destinationDirectory) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is PathTooLongException OrElse TypeOf ex Is NotSupportedException OrElse @@ -485,7 +466,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -500,7 +481,7 @@ Namespace Global.Tomtastisch.FileClassifier Directory.Delete(stageDir, recursive:=True) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException @@ -526,7 +507,7 @@ Namespace Global.Tomtastisch.FileClassifier targetPath = Path.GetFullPath(Path.Combine(destinationPrefix, entryName)) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is PathTooLongException OrElse TypeOf ex Is NotSupportedException OrElse @@ -569,7 +550,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -611,7 +592,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -710,7 +691,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -740,7 +721,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -853,7 +834,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -943,7 +924,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -1003,7 +984,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse diff --git a/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs b/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs index 2caed640..bfb552a9 100644 --- a/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs +++ b/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs @@ -27,9 +27,13 @@ internal static IWriter OpenWriter(Stream stream, ArchiveType archiveType, Write var args = new object[] { stream, archiveType, options }; var signature = new[] { typeof(Stream), typeof(ArchiveType), typeof(WriterOptions) }; - var method = typeof(WriterFactory).GetMethod("OpenWriter", signature) - ?? typeof(WriterFactory).GetMethod("Open", signature) - ?? throw new MissingMethodException(typeof(WriterFactory).FullName, "OpenWriter/Open(Stream, ArchiveType, WriterOptions) [compat]"); + var method = typeof(WriterFactory).GetMethod("OpenWriter", signature); + method ??= typeof(WriterFactory).GetMethod("Open", signature); + + if (method is null) + { + throw new MissingMethodException(typeof(WriterFactory).FullName, "OpenWriter/Open(Stream, ArchiveType, WriterOptions)"); + } return (IWriter)method.Invoke(null, args)!; } @@ -39,9 +43,13 @@ private static object InvokeOpen(Type type, Stream stream, ReaderOptions options var args = new object[] { stream, options }; var signature = new[] { typeof(Stream), typeof(ReaderOptions) }; - var method = type.GetMethod("OpenArchive", signature) - ?? type.GetMethod("Open", signature) - ?? throw new MissingMethodException(type.FullName, "OpenArchive/Open(Stream, ReaderOptions)"); + var method = type.GetMethod("OpenArchive", signature); + method ??= type.GetMethod("Open", signature); + + if (method is null) + { + throw new MissingMethodException(type.FullName, "OpenArchive/Open(Stream, ReaderOptions)"); + } return method.Invoke(null, args)!; } diff --git a/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs index dedc3d2c..dce7301a 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs @@ -2,6 +2,7 @@ using FileTypeDetectionLib.Tests.Support; using SharpCompress.Common; using SharpCompress.Writers; +using FileTypeDetectionLib.Tests.Support; using Tomtastisch.FileClassifier; namespace FileTypeDetectionLib.Tests.Unit; From c282b0bcdf497cf1af8fcf220cd4bb1b929e735a Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Thu, 19 Feb 2026 20:44:08 +0100 Subject: [PATCH 02/34] fix(test): using-reihenfolge fuer preflight-format-gate --- .../Unit/SharpCompressEntryModelNonNullUnitTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs index dce7301a..356e5157 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs @@ -1,8 +1,8 @@ using System.Text; using FileTypeDetectionLib.Tests.Support; +using SharpCompress.Archives; using SharpCompress.Common; using SharpCompress.Writers; -using FileTypeDetectionLib.Tests.Support; using Tomtastisch.FileClassifier; namespace FileTypeDetectionLib.Tests.Unit; From 15cef2959f40729adfabc44882080b904f1a44c6 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Thu, 19 Feb 2026 20:45:55 +0100 Subject: [PATCH 03/34] chore(ci): trigger preflight nach pr-governance-update From c834f7c53615f517c844cddcbf2e48685ea59a57 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Thu, 19 Feb 2026 20:48:43 +0100 Subject: [PATCH 04/34] chore(test): trigger frischen pr-lauf nach governance-update --- .../Support/SharpCompressApiCompat.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs b/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs index bfb552a9..a4f9fb4e 100644 --- a/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs +++ b/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs @@ -32,7 +32,7 @@ internal static IWriter OpenWriter(Stream stream, ArchiveType archiveType, Write if (method is null) { - throw new MissingMethodException(typeof(WriterFactory).FullName, "OpenWriter/Open(Stream, ArchiveType, WriterOptions)"); + throw new MissingMethodException(typeof(WriterFactory).FullName, "OpenWriter/Open(Stream, ArchiveType, WriterOptions) [compat]"); } return (IWriter)method.Invoke(null, args)!; From 8129dcc9c5e4f53a0617f5b532d9a05845a3bcb3 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Thu, 19 Feb 2026 20:56:17 +0100 Subject: [PATCH 05/34] chore(ci): qodana-blocker minimal bereinigen --- .../Infrastructure/ArchiveInternals.vb | 40 ++++++++++--------- .../Support/SharpCompressApiCompat.cs | 20 +++------- ...SharpCompressEntryModelNonNullUnitTests.cs | 1 - 3 files changed, 27 insertions(+), 34 deletions(-) diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index 1e56304d..66878b69 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -11,6 +11,8 @@ Option Strict On Option Explicit On Imports System.IO +Imports System.Reflection +Imports System.Security Namespace Global.Tomtastisch.FileClassifier ''' @@ -191,15 +193,15 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function GetOpenCompatMethod(type As Type) As System.Reflection.MethodInfo Dim signature = New Type() {GetType(Stream), GetType(SharpCompress.Readers.ReaderOptions)} - Dim method = type.GetMethod("OpenArchive", System.Reflection.BindingFlags.Public Or - System.Reflection.BindingFlags.Static, + Dim method = type.GetMethod("OpenArchive", BindingFlags.Public Or + BindingFlags.Static, binder:=Nothing, types:=signature, modifiers:=Nothing) If method IsNot Nothing Then Return method - method = type.GetMethod("Open", System.Reflection.BindingFlags.Public Or - System.Reflection.BindingFlags.Static, + method = type.GetMethod("Open", BindingFlags.Public Or + BindingFlags.Static, binder:=Nothing, types:=signature, modifiers:=Nothing) @@ -228,7 +230,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -268,7 +270,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -283,7 +285,7 @@ Namespace Global.Tomtastisch.FileClassifier StreamGuard.RewindToStart(stream) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException OrElse @@ -392,7 +394,7 @@ Namespace Global.Tomtastisch.FileClassifier Return entries.AsReadOnly() Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -430,7 +432,7 @@ Namespace Global.Tomtastisch.FileClassifier destinationFull = Path.GetFullPath(destinationDirectory) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is PathTooLongException OrElse TypeOf ex Is NotSupportedException OrElse @@ -466,7 +468,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -481,7 +483,7 @@ Namespace Global.Tomtastisch.FileClassifier Directory.Delete(stageDir, recursive:=True) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException @@ -507,7 +509,7 @@ Namespace Global.Tomtastisch.FileClassifier targetPath = Path.GetFullPath(Path.Combine(destinationPrefix, entryName)) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is PathTooLongException OrElse TypeOf ex Is NotSupportedException OrElse @@ -550,7 +552,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -592,7 +594,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -691,7 +693,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -721,7 +723,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -834,7 +836,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -924,7 +926,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -984,7 +986,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse diff --git a/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs b/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs index a4f9fb4e..2caed640 100644 --- a/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs +++ b/tests/FileTypeDetectionLib.Tests/Support/SharpCompressApiCompat.cs @@ -27,13 +27,9 @@ internal static IWriter OpenWriter(Stream stream, ArchiveType archiveType, Write var args = new object[] { stream, archiveType, options }; var signature = new[] { typeof(Stream), typeof(ArchiveType), typeof(WriterOptions) }; - var method = typeof(WriterFactory).GetMethod("OpenWriter", signature); - method ??= typeof(WriterFactory).GetMethod("Open", signature); - - if (method is null) - { - throw new MissingMethodException(typeof(WriterFactory).FullName, "OpenWriter/Open(Stream, ArchiveType, WriterOptions) [compat]"); - } + var method = typeof(WriterFactory).GetMethod("OpenWriter", signature) + ?? typeof(WriterFactory).GetMethod("Open", signature) + ?? throw new MissingMethodException(typeof(WriterFactory).FullName, "OpenWriter/Open(Stream, ArchiveType, WriterOptions) [compat]"); return (IWriter)method.Invoke(null, args)!; } @@ -43,13 +39,9 @@ private static object InvokeOpen(Type type, Stream stream, ReaderOptions options var args = new object[] { stream, options }; var signature = new[] { typeof(Stream), typeof(ReaderOptions) }; - var method = type.GetMethod("OpenArchive", signature); - method ??= type.GetMethod("Open", signature); - - if (method is null) - { - throw new MissingMethodException(type.FullName, "OpenArchive/Open(Stream, ReaderOptions)"); - } + var method = type.GetMethod("OpenArchive", signature) + ?? type.GetMethod("Open", signature) + ?? throw new MissingMethodException(type.FullName, "OpenArchive/Open(Stream, ReaderOptions)"); return method.Invoke(null, args)!; } diff --git a/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs index 356e5157..dedc3d2c 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/SharpCompressEntryModelNonNullUnitTests.cs @@ -1,6 +1,5 @@ using System.Text; using FileTypeDetectionLib.Tests.Support; -using SharpCompress.Archives; using SharpCompress.Common; using SharpCompress.Writers; using Tomtastisch.FileClassifier; From 23fbcf1b5dc09fc7a8ad04595cc29d5eb48ceba4 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Thu, 19 Feb 2026 21:02:52 +0100 Subject: [PATCH 06/34] fix(archive): targetinvocation nur fuer erwartete inner-fehler abfangen --- .../Infrastructure/ArchiveInternals.vb | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index 66878b69..b9faba97 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -129,9 +129,12 @@ Namespace Global.Tomtastisch.FileClassifier Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenArchiveFactoryCompat(stream, options) + Catch ex As TargetInvocationException When IsExpectedInvocationException(ex) + Return Nothing + Catch ex As TargetInvocationException + Throw Catch ex As Exception When _ TypeOf ex Is MissingMethodException OrElse - TypeOf ex Is System.Reflection.TargetInvocationException OrElse TypeOf ex Is InvalidOperationException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException @@ -163,9 +166,12 @@ Namespace Global.Tomtastisch.FileClassifier Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenGZipArchiveCompat(stream, options) + Catch ex As TargetInvocationException When IsExpectedInvocationException(ex) + Return Nothing + Catch ex As TargetInvocationException + Throw Catch ex As Exception When _ TypeOf ex Is MissingMethodException OrElse - TypeOf ex Is System.Reflection.TargetInvocationException OrElse TypeOf ex Is InvalidOperationException OrElse TypeOf ex Is NotSupportedException OrElse TypeOf ex Is ArgumentException @@ -173,6 +179,17 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function + Private Shared Function IsExpectedInvocationException(ex As TargetInvocationException) As Boolean + Dim inner = ex?.InnerException + If inner Is Nothing Then Return False + + Return TypeOf inner Is InvalidOperationException OrElse + TypeOf inner Is NotSupportedException OrElse + TypeOf inner Is ArgumentException OrElse + TypeOf inner Is InvalidDataException OrElse + TypeOf inner Is IOException + End Function + Private Shared Function OpenArchiveFactoryCompat( stream As Stream, options As SharpCompress.Readers.ReaderOptions From edf624f98f168f78c0d0faf9ffc6ac353a7627d0 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 12:45:54 +0100 Subject: [PATCH 07/34] refactor(core): finalize hashing internals and detection model alignment --- .../Abstractions/Archive/ZipExtractedEntry.vb | 13 +- .../Abstractions/Detection/DetectionDetail.vb | 14 +- .../Abstractions/Detection/FileKind.vb | 12 +- .../Abstractions/Detection/FileType.vb | 12 +- .../Abstractions/Hashing/HashDigestSet.vb | 27 +- .../Abstractions/Hashing/HashEvidence.vb | 41 +- .../Abstractions/Hashing/HashOptions.vb | 23 +- .../Hashing/HashRoundTripReport.vb | 303 ++++++--- .../Hashing/Internal/EvidenceHashingCore.vb | 415 ++++++++++++ .../Hashing/Internal/EvidenceHashingIo.vb | 95 +++ .../Internal/EvidenceHashingRoundTrip.vb | 162 +++++ .../Abstractions/Hashing/Internal/README.md | 24 + src/FileTypeDetection/ArchiveProcessing.vb | 7 +- .../Detection/FileTypeRegistry.vb | 104 +-- src/FileTypeDetection/EvidenceHashing.vb | 639 ++++-------------- .../FileTypeDetectionLib.vbproj | 4 +- src/FileTypeDetection/FileTypeDetector.vb | 4 +- src/FileTypeDetection/FileTypeOptions.vb | 85 ++- .../Infrastructure/ArchiveInternals.vb | 321 ++++++--- .../Infrastructure/ArchiveManagedInternals.vb | 53 +- .../Infrastructure/CoreInternals.vb | 24 +- .../Infrastructure/MimeProvider.vb | 6 +- .../Net8_0Plus/HashPrimitivesProvider.vb | 30 +- .../NetStandard2_0/HashPrimitivesProvider.vb | 30 +- src/FileTypeDetection/Utils/EnumUtils.vb | 251 +++++++ src/FileTypeDetection/Utils/GuardUtils.vb | 149 ++++ src/FileTypeDetection/Utils/IterableUtils.vb | 74 ++ src/FileTypeDetection/Utils/README.md | 25 + .../Contracts/public-api.snapshot.txt | 26 +- .../Steps/FileTypeDetectionSteps.cs | 30 +- ...tionDetailAndArchiveValidationUnitTests.cs | 2 +- .../Unit/EndToEndFailClosedMatrixUnitTests.cs | 64 +- .../Unit/ExtensionCheckUnitTests.cs | 10 +- .../Unit/FileTypeDetectorEdgeUnitTests.cs | 2 +- .../FileTypeDetectorPrivateBranchUnitTests.cs | 30 +- .../Unit/HashingEvidenceTests.cs | 79 ++- .../Unit/HeaderDetectionWarningUnitTests.cs | 2 +- .../Unit/HeaderOnlyPolicyUnitTests.cs | 2 +- .../LegacyOfficeBinaryRefinerUnitTests.cs | 6 +- .../Unit/OpenXmlRefinerUnitTests.cs | 20 +- 40 files changed, 2245 insertions(+), 975 deletions(-) create mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb create mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIo.vb create mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb create mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/README.md create mode 100644 src/FileTypeDetection/Utils/EnumUtils.vb create mode 100644 src/FileTypeDetection/Utils/GuardUtils.vb create mode 100644 src/FileTypeDetection/Utils/IterableUtils.vb create mode 100644 src/FileTypeDetection/Utils/README.md diff --git a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb index 0f10558b..2e49adf6 100644 --- a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb +++ b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb @@ -30,14 +30,22 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Unveränderlicher Byteinhalt des Eintrags. ''' - Public ReadOnly Property Content As ImmutableArray(Of Byte) + Public ReadOnly Property Content As ImmutableArray _ + ( + Of Byte + ) ''' ''' Größe des Eintragsinhalts in Bytes. ''' Public ReadOnly Property Size As Integer - Friend Sub New(entryPath As String, payload As Byte()) + Friend Sub New _ + ( + entryPath As String, + payload As Byte() + ) + RelativePath = If(entryPath, String.Empty) If payload Is Nothing OrElse payload.Length = 0 Then Content = ImmutableArray(Of Byte).Empty @@ -56,6 +64,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Schreibgeschützter mit dem Entry-Inhalt. Public Function OpenReadOnlyStream() As MemoryStream + Dim data = If(Content.IsDefaultOrEmpty, Array.Empty(Of Byte)(), Content.ToArray()) Return New MemoryStream(data, writable:=False) End Function diff --git a/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb b/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb index 3c8c0b78..1697b1b1 100644 --- a/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb +++ b/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb @@ -44,12 +44,14 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property ExtensionVerified As Boolean - Friend Sub New( - detectedType As FileType, - reasonCode As String, - usedZipContentCheck As Boolean, - usedStructuredRefinement As Boolean, - extensionVerified As Boolean) + Friend Sub New _ + ( + detectedType As FileType, + reasonCode As String, + usedZipContentCheck As Boolean, + usedStructuredRefinement As Boolean, + extensionVerified As Boolean + ) Me.DetectedType = If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)) Me.ReasonCode = If(reasonCode, String.Empty) diff --git a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb index b8aefb8c..97253ac1 100644 --- a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb +++ b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb @@ -56,18 +56,18 @@ Namespace Global.Tomtastisch.FileClassifier Zip ''' - ''' Office Open XML Word-Dokument (DOCX). + ''' Office Open XML Word-Dokument (DOCX. ''' - Docx + Doc ''' - ''' Office Open XML Excel-Dokument (XLSX). + ''' Office Open XML Excel-Dokument (XLS). ''' - Xlsx + Xls ''' - ''' Office Open XML PowerPoint-Dokument (PPTX). + ''' Office Open XML PowerPoint-Dokument (PPT). ''' - Pptx + Ppt End Enum End Namespace diff --git a/src/FileTypeDetection/Abstractions/Detection/FileType.vb b/src/FileTypeDetection/Abstractions/Detection/FileType.vb index 9cfbc2c5..45b95fcf 100644 --- a/src/FileTypeDetection/Abstractions/Detection/FileType.vb +++ b/src/FileTypeDetection/Abstractions/Detection/FileType.vb @@ -44,8 +44,15 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Aliases As ImmutableArray(Of String) - Friend Sub New(kind As FileKind, canonicalExtension As String, mime As String, allowed As Boolean, - aliases As IEnumerable(Of String)) + Friend Sub New _ + ( + kind As FileKind, + canonicalExtension As String, + mime As String, + allowed As Boolean, + aliases As IEnumerable(Of String) + ) + Dim dedup As HashSet(Of String) = New HashSet(Of String)(StringComparer.OrdinalIgnoreCase) Dim n As String Dim orderedAliases As List(Of String) @@ -76,6 +83,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' String-Repräsentation des Feldes . Public Overrides Function ToString() As String + Return Kind.ToString() End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb index 96cb9f82..b99cb881 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb @@ -58,15 +58,18 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property HasLogicalHash As Boolean - Friend Sub New( - physicalSha256 As String, - logicalSha256 As String, - fastPhysicalXxHash3 As String, - fastLogicalXxHash3 As String, - hmacPhysicalSha256 As String, - hmacLogicalSha256 As String, - hasPhysicalHash As Boolean, - hasLogicalHash As Boolean) + Friend Sub New _ + ( + physicalSha256 As String, + logicalSha256 As String, + fastPhysicalXxHash3 As String, + fastLogicalXxHash3 As String, + hmacPhysicalSha256 As String, + hmacLogicalSha256 As String, + hasPhysicalHash As Boolean, + hasLogicalHash As Boolean + ) + Me.PhysicalSha256 = Normalize(physicalSha256) Me.LogicalSha256 = Normalize(logicalSha256) Me.FastPhysicalXxHash3 = Normalize(fastPhysicalXxHash3) @@ -91,7 +94,11 @@ Namespace Global.Tomtastisch.FileClassifier End Get End Property - Private Shared Function Normalize(value As String) As String + Private Shared Function Normalize _ + ( + value As String + ) As String + Return If(value, String.Empty).Trim().ToLowerInvariant() End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb index 47804479..eba33cbb 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb @@ -69,17 +69,20 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Notes As String - Friend Sub New( - sourceType As HashSourceType, - label As String, - detectedType As FileType, - entry As ZipExtractedEntry, - compressedBytes As Byte(), - uncompressedBytes As Byte(), - entryCount As Integer, - totalUncompressedBytes As Long, - digests As HashDigestSet, - notes As String) + Friend Sub New _ + ( + sourceType As HashSourceType, + label As String, + detectedType As FileType, + entry As ZipExtractedEntry, + compressedBytes As Byte(), + uncompressedBytes As Byte(), + entryCount As Integer, + totalUncompressedBytes As Long, + digests As HashDigestSet, + notes As String + ) + Me.SourceType = sourceType Me.Label = If(label, String.Empty) Me.DetectedType = If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)) @@ -92,8 +95,13 @@ Namespace Global.Tomtastisch.FileClassifier Me.UncompressedBytes = ToImmutable(uncompressedBytes) End Sub - Friend Shared Function CreateFailure(sourceType As HashSourceType, label As String, notes As String) _ - As HashEvidence + Friend Shared Function CreateFailure _ + ( + sourceType As HashSourceType, + label As String, + notes As String + ) As HashEvidence + Return New HashEvidence( sourceType:=sourceType, label:=label, @@ -107,10 +115,15 @@ Namespace Global.Tomtastisch.FileClassifier notes:=notes) End Function - Private Shared Function ToImmutable(data As Byte()) As Immutable.ImmutableArray(Of Byte) + Private Shared Function ToImmutable _ + ( + data As Byte() + ) As Immutable.ImmutableArray(Of Byte) + If data Is Nothing OrElse data.Length = 0 Then Return Immutable.ImmutableArray(Of Byte).Empty End If + Return Immutable.ImmutableArray.Create(data) End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb index ba5e0a3c..ad8cba1f 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb @@ -44,17 +44,21 @@ Namespace Global.Tomtastisch.FileClassifier Public Property MaterializedFileName As String = "deterministic-roundtrip.bin" Friend Function Clone() As HashOptions + Return New HashOptions With { - .IncludePayloadCopies = IncludePayloadCopies, - .IncludeFastHash = IncludeFastHash, - .IncludeSecureHash = IncludeSecureHash, - .MaterializedFileName = If(MaterializedFileName, String.Empty) + .IncludePayloadCopies = IncludePayloadCopies, + .IncludeFastHash = IncludeFastHash, + .IncludeSecureHash = IncludeSecureHash, + .MaterializedFileName = If(MaterializedFileName, String.Empty) } End Function - Friend Shared Function Normalize(options As HashOptions) As HashOptions + Friend Shared Function Normalize _ + ( + options As HashOptions + ) As HashOptions + Dim cloned As HashOptions - If options Is Nothing Then options = New HashOptions() cloned = options.Clone() @@ -63,7 +67,11 @@ Namespace Global.Tomtastisch.FileClassifier End Function - Private Shared Function NormalizeMaterializedFileName(candidate As String) As String + Private Shared Function NormalizeMaterializedFileName _ + ( + candidate As String + ) As String + Dim normalized = If(candidate, String.Empty).Trim() If String.IsNullOrWhiteSpace(normalized) Then Return "deterministic-roundtrip.bin" @@ -80,6 +88,7 @@ Namespace Global.Tomtastisch.FileClassifier If String.IsNullOrWhiteSpace(normalized) Then Return "deterministic-roundtrip.bin" + ' ReSharper disable once LoopCanBeConvertedToQuery For Each invalidChar In IO.Path.GetInvalidFileNameChars() If normalized.IndexOf(invalidChar) >= 0 Then Return "deterministic-roundtrip.bin" diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb index 69cdd6b0..fafb9d22 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb @@ -1,135 +1,280 @@ ' ============================================================================ -' FILE: HashRoundTripReport.vb +' FILE: (neue version)HashRoundTripReport.vb ' ' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) ' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD ' - Try/Catch konsistent im Catch-Filter-Schema ' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' +' Kontext: +' - Report-Typ zur Bewertung der Konsistenz mehrerer Hash-Evidences über definierte Slots (H1 bis Hn). +' - Fail-closed: fehlende Evidence wird deterministisch als Failure-Eintrag materialisiert. +' +' Hinweise: +' - Keine Behavior-Änderungen durch reines Reformatting: Vergleichslogik bleibt unverändert. +' - Externe API: zentraler Zugriff über Evidence(slot), LogicalEquals(slot), PhysicalEquals(slot). ' ============================================================================ Option Strict On Option Explicit On +Imports System +Imports Tomtastisch.FileClassifier.Utils + Namespace Global.Tomtastisch.FileClassifier + ''' - ''' Ergebnisbericht für deterministische h1-h4-RoundTrip-Prüfungen. + ''' Bericht über die Konsistenz mehrerer Hash-Evidences in festen Slots (H1 bis Hn). ''' ''' - ''' Der Bericht stellt Einzel-Evidence und daraus abgeleitete Konsistenzkennzahlen für logische und physische - ''' Digest-Vergleiche bereit. + ''' Zweck: + ''' - Normalisiert fehlende Slots fail-closed zu deterministischen Failure-Evidences. + ''' - Vergleicht H1 gegen H2 bis Hn jeweils logisch und physisch. + ''' + ''' Verantwortlichkeiten: + ''' - Slot-Normalisierung (EnsureEvidence). + ''' - Berechnung der Vergleichsflags (LogicalEquals/PhysicalEquals). + ''' - Aggregation LogicalConsistent (AND über alle logischen Gleichheiten). + ''' + ''' Nicht-Ziele: + ''' - Kein I/O, keine Policy-Engines, keine Logger-Integration. + ''' - Slot-Ermittlung erfolgt zentral über EnumUtils (Enum.GetValues). ''' Public NotInheritable Class HashRoundTripReport - ''' - ''' Eingabepfad des geprüften Objekts. - ''' - Public ReadOnly Property InputPath As String - ''' - ''' Kennzeichnet, ob die Eingabe als Archiv verarbeitet wurde. - ''' + ' ===================================================================== + ' Konstanten / Shared ReadOnly (Single Source of Truth) + ' ===================================================================== + + Public Enum HashSlot + H1 = 1 + H2 = 2 + H3 = 3 + H4 = 4 + End Enum + + Private Shared ReadOnly RequiredSlots As HashSlot() = _ + EnumUtils.GetValues(Of HashSlot)( + sortOrder:=EnumUtils.EnumSortOrder.Ascending + ) + + ' ===================================================================== + ' Felder / Properties (Typzustand) + ' ===================================================================== + + Public ReadOnly Property InputPath As String Public ReadOnly Property IsArchiveInput As Boolean + Public ReadOnly Property Notes As String - ''' - ''' Erster Nachweis (Ausgangszustand). - ''' - Public ReadOnly Property H1 As HashEvidence + Private ReadOnly _evidences As HashEvidence() ' index = slot-1 + Private ReadOnly _logicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn + Private ReadOnly _physicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn - ''' - ''' Zweiter Nachweis (kanonische Sicht). - ''' - Public ReadOnly Property H2 As HashEvidence + Public ReadOnly Property LogicalConsistent As Boolean - ''' - ''' Dritter Nachweis (logische Bytes). - ''' - Public ReadOnly Property H3 As HashEvidence ''' - ''' Vierter Nachweis (materialisierte Zielrepräsentation). + ''' Liefert die Slots, die in dieser Report-Version geführt werden (in Reihenfolge). ''' - Public ReadOnly Property H4 As HashEvidence + Public ReadOnly Property Slots As HashSlot() + Get + Return IterableUtils.CloneArray(RequiredSlots) + End Get + + End Property - ''' - ''' Kennzeichnet Gleichheit der logischen Digests zwischen h1 und h2. - ''' - Public ReadOnly Property LogicalH1EqualsH2 As Boolean - ''' - ''' Kennzeichnet Gleichheit der logischen Digests zwischen h1 und h3. - ''' - Public ReadOnly Property LogicalH1EqualsH3 As Boolean + ' ===================================================================== + ' Konstruktor(en) + ' ===================================================================== ''' - ''' Kennzeichnet Gleichheit der logischen Digests zwischen h1 und h4. + ''' Erstellt einen Bericht aus Evidences in Slot-Reihenfolge (H1, H2, ...). ''' - Public ReadOnly Property LogicalH1EqualsH4 As Boolean + ''' Pfad/Identifier der geprüften Eingabe. + ''' True, wenn die Eingabe als Archiv verarbeitet wurde. + ''' Hinweise (freier Text). + ''' Evidence-Varargs in Slot-Reihenfolge; exakt so viele wie Slots(). + ''' Wird ausgelöst, wenn evidences Nothing ist oder die Slot-Anzahl nicht passt. + Friend Sub New _ + ( + inputPath As String, + isArchiveInput As Boolean, + notes As String, + ParamArray evidences As HashEvidence() + ) - ''' - ''' Kennzeichnet Gleichheit der physischen Digests zwischen h1 und h2. - ''' - Public ReadOnly Property PhysicalH1EqualsH2 As Boolean + ' Deklarationsblock (Pflicht, spaltenartig) + Dim slotCount As Integer = RequiredSlots.Length + Dim i As Integer + Dim baseEvidence As HashEvidence + Dim otherEvidence As HashEvidence + Dim eqLogical As Boolean + Dim consistentLocal As Boolean = True + + ' ----------------------------------------------------------------- + ' Guard-Clauses (fail-closed) + ' ----------------------------------------------------------------- + GuardUtils.RequireLength(evidences, slotCount, NameOf(evidences)) + + ' ----------------------------------------------------------------- + ' Snapshot / Assignment (Input) + ' ----------------------------------------------------------------- + Me.InputPath = If(inputPath, String.Empty) + Me.IsArchiveInput = isArchiveInput + Me.Notes = If(notes, String.Empty) - ''' - ''' Kennzeichnet Gleichheit der physischen Digests zwischen h1 und h3. - ''' - Public ReadOnly Property PhysicalH1EqualsH3 As Boolean + ' ----------------------------------------------------------------- + ' Normalisierung / Canonicalization (Slots) + ' ----------------------------------------------------------------- + _evidences = New HashEvidence(slotCount - 1) {} + _logicalEq = New Boolean(slotCount - 2) {} + _physicalEq = New Boolean(slotCount - 2) {} + + For i = 0 To slotCount - 1 + Dim slot As HashSlot = RequiredSlots(i) + _evidences(SlotIndex(slot)) = EnsureEvidence(evidences(i), slot) + Next + + ' ----------------------------------------------------------------- + ' Branches (Vergleiche: H1 gegen H2..Hn) + ' ----------------------------------------------------------------- + baseEvidence = _evidences(SlotIndex(HashSlot.H1)) + + For idx = 0 To slotCount - 2 + + Dim otherSlot = RequiredSlots(idx + 1) + otherEvidence = _evidences(SlotIndex(otherSlot)) + + eqLogical = EqualLogical(baseEvidence, otherEvidence) + + _logicalEq(idx) = eqLogical + _physicalEq(idx) = EqualPhysical(baseEvidence, otherEvidence) + + consistentLocal = consistentLocal AndAlso eqLogical + + Next + + LogicalConsistent = consistentLocal + + End Sub + + + ' ===================================================================== + ' Public API + ' ===================================================================== ''' - ''' Kennzeichnet Gleichheit der physischen Digests zwischen h1 und h4. + ''' Liefert die Evidence für einen Slot (H1 bis Hn). ''' - Public ReadOnly Property PhysicalH1EqualsH4 As Boolean + ''' Der Slot, dessen Evidence geliefert werden soll. + ''' Die normalisierte Evidence (nie Nothing). + Public Function Evidence(slot As HashSlot) As HashEvidence + + ' Deklarationsblock + Dim index As Integer + + GuardUtils.EnumDefined(GetType(HashSlot), slot, NameOf(slot)) + index = SlotIndex(slot) + + Return _evidences(index) + + End Function ''' - ''' Gesamtindikator für logische Konsistenz über h1 bis h4. + ''' Liefert das Ergebnis des logischen Vergleichs von H1 mit Hx für einen Slot H2 bis Hn. ''' - Public ReadOnly Property LogicalConsistent As Boolean + ''' Der Vergleichsslot (H2 bis Hn). + ''' True, wenn logisch gleich; sonst False. + Public Function LogicalEquals(otherSlot As HashSlot) As Boolean + + ' Deklarationsblock + Dim index As Integer + + If otherSlot = HashSlot.H1 Then + Throw New ArgumentException("Use H2..Hn.", NameOf(otherSlot)) + End If + + GuardUtils.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) + index = OtherIndex(otherSlot) + + Return _logicalEq(index) + + End Function ''' - ''' Ergänzende Hinweise zum RoundTrip-Lauf. + ''' Liefert das Ergebnis des physischen Vergleichs von H1 mit Hx für einen Slot H2 bis Hn. ''' - Public ReadOnly Property Notes As String + ''' Der Vergleichsslot (H2 bis Hn). + ''' True, wenn physisch gleich; sonst False. + Public Function PhysicalEquals(otherSlot As HashSlot) As Boolean - Friend Sub New(inputPath As String, isArchiveInput As Boolean, h1 As HashEvidence, h2 As HashEvidence, - h3 As HashEvidence, h4 As HashEvidence, - notes As String) - Me.InputPath = If(inputPath, String.Empty) - Me.IsArchiveInput = isArchiveInput - Me.H1 = - If(h1, HashEvidence.CreateFailure(HashSourceType.Unknown, "h1", "missing")) - Me.H2 = - If(h2, HashEvidence.CreateFailure(HashSourceType.Unknown, "h2", "missing")) - Me.H3 = - If(h3, HashEvidence.CreateFailure(HashSourceType.Unknown, "h3", "missing")) - Me.H4 = - If(h4, HashEvidence.CreateFailure(HashSourceType.Unknown, "h4", "missing")) - Me.Notes = If(notes, String.Empty) + ' Deklarationsblock + Dim index As Integer - LogicalH1EqualsH2 = EqualLogical(Me.H1, Me.H2) - LogicalH1EqualsH3 = EqualLogical(Me.H1, Me.H3) - LogicalH1EqualsH4 = EqualLogical(Me.H1, Me.H4) - PhysicalH1EqualsH2 = EqualPhysical(Me.H1, Me.H2) - PhysicalH1EqualsH3 = EqualPhysical(Me.H1, Me.H3) - PhysicalH1EqualsH4 = EqualPhysical(Me.H1, Me.H4) - LogicalConsistent = LogicalH1EqualsH2 AndAlso LogicalH1EqualsH3 AndAlso LogicalH1EqualsH4 - End Sub + If otherSlot = HashSlot.H1 Then + Throw New ArgumentException("Use H2..Hn.", NameOf(otherSlot)) + End If + + GuardUtils.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) + index = OtherIndex(otherSlot) + + Return _physicalEq(index) + End Function + + + ' ===================================================================== + ' Internal/Private Helpers + ' ===================================================================== + + Private Shared Function EnsureEvidence(evidence As HashEvidence, slot As HashSlot) As HashEvidence + If evidence IsNot Nothing Then Return evidence + + Return HashEvidence.CreateFailure( + HashSourceType.Unknown, + SlotLabel(slot), + "missing" + ) + End Function + + Private Shared Function SlotLabel(slot As HashSlot) As String + Return "h" & CInt(slot).ToString() + End Function + + Private Shared Function SlotIndex(slot As HashSlot) As Integer + Return CInt(slot) - 1 + End Function + + Private Shared Function OtherIndex(otherSlot As HashSlot) As Integer + Return SlotIndex(otherSlot) - 1 + End Function + + ' Hinweis: Vergleichslogik bleibt unverändert; keine Änderung der Semantik. Private Shared Function EqualLogical(leftEvidence As HashEvidence, rightEvidence As HashEvidence) As Boolean If leftEvidence Is Nothing OrElse rightEvidence Is Nothing Then Return False If leftEvidence.Digests Is Nothing OrElse rightEvidence.Digests Is Nothing Then Return False If Not leftEvidence.Digests.HasLogicalHash OrElse Not rightEvidence.Digests.HasLogicalHash Then Return False - Return _ - String.Equals(leftEvidence.Digests.LogicalSha256, rightEvidence.Digests.LogicalSha256, - StringComparison.Ordinal) + + Return String.Equals( + leftEvidence.Digests.LogicalSha256, + rightEvidence.Digests.LogicalSha256, + StringComparison.Ordinal + ) End Function Private Shared Function EqualPhysical(leftEvidence As HashEvidence, rightEvidence As HashEvidence) As Boolean If leftEvidence Is Nothing OrElse rightEvidence Is Nothing Then Return False If leftEvidence.Digests Is Nothing OrElse rightEvidence.Digests Is Nothing Then Return False - If Not leftEvidence.Digests.HasPhysicalHash OrElse Not rightEvidence.Digests.HasPhysicalHash Then _ - Return False - Return _ - String.Equals(leftEvidence.Digests.PhysicalSha256, rightEvidence.Digests.PhysicalSha256, - StringComparison.Ordinal) + If Not leftEvidence.Digests.HasPhysicalHash OrElse Not rightEvidence.Digests.HasPhysicalHash Then Return False + + Return String.Equals( + leftEvidence.Digests.PhysicalSha256, + rightEvidence.Digests.PhysicalSha256, + StringComparison.Ordinal + ) End Function + End Class + End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb new file mode 100644 index 00000000..ad793049 --- /dev/null +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb @@ -0,0 +1,415 @@ +' ============================================================================ +' FILE: EvidenceHashingCore.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Namespace Global.Tomtastisch.FileClassifier + ''' + ''' Interner, zustandsloser Kernservice für deterministische Evidence-Bildung. + ''' + ''' + ''' + ''' Die Komponente kapselt Normalisierung, Manifestbildung, Digest-Berechnung und optionale HMAC-Verarbeitung + ''' ohne Public-API-Verantwortung. + ''' + ''' + ''' Fehler werden fail-closed über Rückgabewerte und unveränderte Fehltexte in die aufrufende Fassade propagiert. + ''' + ''' + Friend NotInheritable Class EvidenceHashingCore + Private Sub New() + End Sub + + Friend Shared Function BuildEvidenceFromEntries _ + ( + sourceType As HashSourceType, + label As String, + detectedType As FileType, + compressedBytes As Byte(), + entries As IReadOnlyList(Of ZipExtractedEntry), + hashOptions As HashOptions, + notes As String + ) As HashEvidence + + Dim normalizedEntries As List(Of NormalizedEntry) = Nothing + Dim normalizeError As String = String.Empty + Dim logicalBytes As Byte() + Dim logicalSha As String + Dim fastLogical As String + Dim hmacLogical As String + Dim physicalSha As String + Dim fastPhysical As String + Dim hmacPhysical As String + Dim hasPhysical As Boolean + Dim secureNote As String + Dim hmacKey As Byte() + Dim hasHmacKey As Boolean + Dim firstEntry As ZipExtractedEntry = Nothing + Dim digestSet As HashDigestSet + Dim combinedNotes As String + Dim totalBytes As Long + Dim persistedCompressed As Byte() + Dim persistedLogical As Byte() + + If Not TryNormalizeEntries(entries, normalizedEntries, normalizeError) Then + Return HashEvidence.CreateFailure(sourceType, label, normalizeError) + End If + + logicalBytes = BuildLogicalManifestBytes(normalizedEntries) + logicalSha = ComputeSha256Hex(logicalBytes) + fastLogical = ComputeFastHash(logicalBytes, hashOptions) + hmacLogical = String.Empty + physicalSha = String.Empty + fastPhysical = String.Empty + hmacPhysical = String.Empty + hasPhysical = False + secureNote = String.Empty + hmacKey = Array.Empty(Of Byte)() + hasHmacKey = False + + If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then + hasHmacKey = TryResolveHmacKey(hmacKey, secureNote) + If hasHmacKey Then + hmacLogical = ComputeHmacSha256Hex(hmacKey, logicalBytes) + End If + End If + + If compressedBytes IsNot Nothing AndAlso compressedBytes.Length > 0 Then + physicalSha = ComputeSha256Hex(compressedBytes) + fastPhysical = ComputeFastHash(compressedBytes, hashOptions) + hasPhysical = True + If hasHmacKey Then + hmacPhysical = ComputeHmacSha256Hex(hmacKey, compressedBytes) + End If + End If + + If normalizedEntries.Count > 0 Then + firstEntry = New ZipExtractedEntry(normalizedEntries(0).RelativePath, normalizedEntries(0).Content) + End If + + digestSet = New HashDigestSet( + physicalSha256:=physicalSha, + logicalSha256:=logicalSha, + fastPhysicalXxHash3:=fastPhysical, + fastLogicalXxHash3:=fastLogical, + hmacPhysicalSha256:=hmacPhysical, + hmacLogicalSha256:=hmacLogical, + hasPhysicalHash:=hasPhysical, + hasLogicalHash:=True) + + combinedNotes = AppendNoteIfAny(notes, secureNote) + + totalBytes = 0 + For Each entry In normalizedEntries + totalBytes += CLng(entry.Content.LongLength) + Next + + persistedCompressed = If( + hashOptions.IncludePayloadCopies, + CopyBytes(compressedBytes), + Array.Empty(Of Byte)()) + + persistedLogical = If( + hashOptions.IncludePayloadCopies, + CopyBytes(logicalBytes), + Array.Empty(Of Byte)()) + + Return New HashEvidence( + sourceType:=sourceType, + label:=NormalizeLabel(label), + detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), + entry:=firstEntry, + compressedBytes:=persistedCompressed, + uncompressedBytes:=persistedLogical, + entryCount:=normalizedEntries.Count, + totalUncompressedBytes:=totalBytes, + digests:=digestSet, + notes:=combinedNotes) + End Function + + Friend Shared Function BuildEvidenceFromRawPayload _ + ( + sourceType As HashSourceType, + label As String, + detectedType As FileType, + payload As Byte(), + hashOptions As HashOptions, + notes As String + ) As HashEvidence + + Dim safePayload As Byte() = If(payload, Array.Empty(Of Byte)()) + Dim physicalSha As String = ComputeSha256Hex(safePayload) + Dim logicalSha As String = physicalSha + Dim fastPhysical As String = ComputeFastHash(safePayload, hashOptions) + Dim fastLogical As String = fastPhysical + Dim hmacPhysical As String = String.Empty + Dim hmacLogical As String = String.Empty + Dim secureNote As String = String.Empty + Dim hmacKey As Byte() = Array.Empty(Of Byte)() + Dim persistedPayload As Byte() + Dim entry As ZipExtractedEntry + Dim digestSet As HashDigestSet + Dim combinedNotes As String + + If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then + If TryResolveHmacKey(hmacKey, secureNote) Then + hmacPhysical = ComputeHmacSha256Hex(hmacKey, safePayload) + hmacLogical = hmacPhysical + End If + End If + + persistedPayload = If( + hashOptions.IncludePayloadCopies, + CopyBytes(safePayload), + Array.Empty(Of Byte)()) + + entry = New ZipExtractedEntry(EvidenceHashing.DefaultPayloadLabelCore(), safePayload) + + digestSet = New HashDigestSet( + physicalSha256:=physicalSha, + logicalSha256:=logicalSha, + fastPhysicalXxHash3:=fastPhysical, + fastLogicalXxHash3:=fastLogical, + hmacPhysicalSha256:=hmacPhysical, + hmacLogicalSha256:=hmacLogical, + hasPhysicalHash:=True, + hasLogicalHash:=True) + + combinedNotes = AppendNoteIfAny(notes, secureNote) + + Return New HashEvidence( + sourceType:=sourceType, + label:=NormalizeLabel(label), + detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), + entry:=entry, + compressedBytes:=persistedPayload, + uncompressedBytes:=persistedPayload, + entryCount:=1, + totalUncompressedBytes:=safePayload.LongLength, + digests:=digestSet, + notes:=combinedNotes) + End Function + + Friend Shared Function TryNormalizeEntries _ + ( + entries As IReadOnlyList(Of ZipExtractedEntry), + ByRef normalizedEntries As List(Of NormalizedEntry), + ByRef errorMessage As String + ) As Boolean + + Dim seen As HashSet(Of String) = New HashSet(Of String)(StringComparer.Ordinal) + Dim normalizedPath As String + Dim payload As Byte() + + normalizedEntries = New List(Of NormalizedEntry)() + errorMessage = String.Empty + + If entries Is Nothing Then + errorMessage = "Entries sind null." + Return False + End If + + For Each entry In entries + If entry Is Nothing Then + errorMessage = "Entry ist null." + Return False + End If + + normalizedPath = Nothing + If Not TryNormalizeEntryPath(entry.RelativePath, normalizedPath) Then + errorMessage = $"Ungültiger Entry-Pfad: '{entry.RelativePath}'." + Return False + End If + + If Not seen.Add(normalizedPath) Then + errorMessage = $"Doppelter Entry-Pfad nach Normalisierung: '{normalizedPath}'." + Return False + End If + + payload = If(entry.Content.IsDefaultOrEmpty, Array.Empty(Of Byte)(), entry.Content.ToArray()) + normalizedEntries.Add(New NormalizedEntry(normalizedPath, payload)) + Next + + normalizedEntries.Sort(Function(a, b) StringComparer.Ordinal.Compare(a.RelativePath, b.RelativePath)) + Return True + End Function + + Friend Shared Function TryNormalizeEntryPath _ + ( + rawPath As String, + ByRef normalizedPath As String + ) As Boolean + + Dim isDirectory As Boolean = False + Return ArchiveEntryPathPolicy.TryNormalizeRelativePath( + rawPath, + allowDirectoryMarker:=False, + normalizedPath, + isDirectory) + End Function + + Friend Shared Function BuildLogicalManifestBytes _ + ( + entries As IReadOnlyList(Of NormalizedEntry) + ) As Byte() + + Dim versionBytes As Byte() + Dim pathBytes As Byte() + Dim contentHash As Byte() + + Using ms As New IO.MemoryStream() + Using writer As New IO.BinaryWriter(ms, Text.Encoding.UTF8, leaveOpen:=True) + versionBytes = Text.Encoding.UTF8.GetBytes(EvidenceHashing.LogicalManifestVersionCore()) + writer.Write(versionBytes.Length) + writer.Write(versionBytes) + writer.Write(entries.Count) + + For Each entry In entries + pathBytes = Text.Encoding.UTF8.GetBytes(entry.RelativePath) + contentHash = HashPrimitives.Current.Sha256.ComputeHash(entry.Content) + writer.Write(pathBytes.Length) + writer.Write(pathBytes) + writer.Write(CLng(entry.Content.LongLength)) + writer.Write(contentHash.Length) + writer.Write(contentHash) + Next + End Using + + Return ms.ToArray() + End Using + End Function + + Friend Shared Function ComputeSha256Hex _ + ( + payload As Byte() + ) As String + + Dim data As Byte() = If(payload, Array.Empty(Of Byte)()) + Return HashPrimitives.Current.Sha256.ComputeHashHex(data) + End Function + + Friend Shared Function ComputeFastHash _ + ( + payload As Byte(), + options As HashOptions + ) As String + + Dim data As Byte() + + If options Is Nothing OrElse Not options.IncludeFastHash Then Return String.Empty + data = If(payload, Array.Empty(Of Byte)()) + + Return HashPrimitives.Current.FastHash64.ComputeHashHex(data) + End Function + + Friend Shared Function ComputeHmacSha256Hex _ + ( + key As Byte(), + payload As Byte() + ) As String + + Dim safeKey As Byte() = If(key, Array.Empty(Of Byte)()) + Dim data As Byte() = If(payload, Array.Empty(Of Byte)()) + + Using hmac As New Security.Cryptography.HMACSHA256(safeKey) + Return HashPrimitives.Current.HexCodec.EncodeLowerHex(hmac.ComputeHash(data)) + End Using + End Function + + Friend Shared Function TryResolveHmacKey _ + ( + ByRef key As Byte(), + ByRef note As String + ) As Boolean + + Dim b64 As String + + key = Array.Empty(Of Byte)() + note = String.Empty + + b64 = Environment.GetEnvironmentVariable(EvidenceHashing.HmacKeyEnvVarB64Core()) + If String.IsNullOrWhiteSpace(b64) Then + note = $"Secure hashing requested but env var '{EvidenceHashing.HmacKeyEnvVarB64Core()}' is missing; HMAC digests omitted." + Return False + End If + + Try + key = Convert.FromBase64String(b64.Trim()) + If key Is Nothing OrElse key.Length = 0 Then + key = Array.Empty(Of Byte)() + note = $"Secure hashing requested but env var '{EvidenceHashing.HmacKeyEnvVarB64Core()}' is empty; HMAC digests omitted." + Return False + End If + + Return True + Catch ex As Exception When _ + TypeOf ex Is FormatException OrElse + TypeOf ex Is ArgumentException + key = Array.Empty(Of Byte)() + note = $"Secure hashing requested but env var '{EvidenceHashing.HmacKeyEnvVarB64Core()}' is invalid Base64; HMAC digests omitted." + Return False + End Try + End Function + + Friend Shared Function AppendNoteIfAny _ + ( + baseNotes As String, + toAppend As String + ) As String + + Dim left As String = If(baseNotes, String.Empty).Trim() + Dim right As String = If(toAppend, String.Empty).Trim() + + If right.Length = 0 Then Return left + If left.Length = 0 Then Return right + Return left & " " & right + End Function + + Friend Shared Function NormalizeLabel _ + ( + label As String + ) As String + + Dim normalized As String = If(label, String.Empty).Trim() + If normalized.Length = 0 Then Return EvidenceHashing.DefaultPayloadLabelCore() + Return normalized + End Function + + Friend Shared Function CopyBytes _ + ( + data As Byte() + ) As Byte() + + Dim copy As Byte() + + If data Is Nothing OrElse data.Length = 0 Then Return Array.Empty(Of Byte)() + copy = New Byte(data.Length - 1) {} + Buffer.BlockCopy(data, 0, copy, 0, data.Length) + + Return copy + End Function + + ''' + ''' Normalisierte Entry-Repräsentation für kanonische Manifestbildung. + ''' + ''' + ''' Relative Pfade und Inhalte werden nach Guard-Prüfung unveränderlich für deterministische Sortierung gehalten. + ''' + Friend NotInheritable Class NormalizedEntry + Friend ReadOnly Property RelativePath As String + Friend ReadOnly Property Content As Byte() + + Friend Sub New(relativePath As String, content As Byte()) + Me.RelativePath = If(relativePath, String.Empty) + Me.Content = If(content, Array.Empty(Of Byte)()) + End Sub + End Class + End Class +End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIo.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIo.vb new file mode 100644 index 00000000..e1856b72 --- /dev/null +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIo.vb @@ -0,0 +1,95 @@ +' ============================================================================ +' FILE: EvidenceHashingIo.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Namespace Global.Tomtastisch.FileClassifier + ''' + ''' Interne I/O-Hilfsfunktionen für bounded Dateieinlesung im Hashing-Kontext. + ''' + ''' + ''' Die Komponente erzwingt MaxBytes-Limits fail-closed und liefert deterministische Fehltexte. + ''' + Friend NotInheritable Class EvidenceHashingIo + Private Sub New() + End Sub + + Friend Shared Function TryReadFileBounded _ + ( + path As String, + detectorOptions As FileTypeProjectOptions, + ByRef bytes As Byte(), + ByRef errorMessage As String + ) As Boolean + + Dim fi As IO.FileInfo + + bytes = Array.Empty(Of Byte)() + errorMessage = String.Empty + + If String.IsNullOrWhiteSpace(path) Then + errorMessage = "Pfad ist leer." + Return False + End If + + If detectorOptions Is Nothing Then + errorMessage = "Optionen fehlen." + Return False + End If + + Try + fi = New IO.FileInfo(path) + If Not fi.Exists Then + errorMessage = "Datei existiert nicht." + Return False + End If + + If fi.Length > detectorOptions.MaxBytes Then + errorMessage = "Datei größer als MaxBytes." + Return False + End If + + Using fs As New IO.FileStream( + path, + IO.FileMode.Open, + IO.FileAccess.Read, + IO.FileShare.Read, + InternalIoDefaults.FileStreamBufferSize, + IO.FileOptions.SequentialScan) + + Using ms As New IO.MemoryStream(CInt(Math.Min(Math.Max(fi.Length, 0), Integer.MaxValue))) + StreamBounds.CopyBounded(fs, ms, detectorOptions.MaxBytes) + bytes = ms.ToArray() + End Using + End Using + + Return True + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is IO.IOException OrElse + TypeOf ex Is IO.InvalidDataException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + Return SetReadFileError(ex, errorMessage) + End Try + End Function + + Friend Shared Function SetReadFileError _ + ( + ex As Exception, + ByRef errorMessage As String + ) As Boolean + + errorMessage = $"Datei konnte nicht gelesen werden: {ex.Message}" + Return False + End Function + End Class +End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb new file mode 100644 index 00000000..2c7e23cf --- /dev/null +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb @@ -0,0 +1,162 @@ +' ============================================================================ +' FILE: EvidenceHashingRoundTrip.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Namespace Global.Tomtastisch.FileClassifier + ''' + ''' Interne RoundTrip-Pipeline für den deterministischen h1-h4-Hashbericht. + ''' + ''' + ''' + ''' Der Service erstellt temporäre Ziele für Materialisierung, nutzt die öffentliche Fassade für h1/h2/h4, + ''' berechnet h3 über kanonische Logical-Bytes und bereinigt temporäre Verzeichnisse best-effort. + ''' + ''' + ''' Catch-Filter und Fehltexte bleiben unverändert fail-closed. + ''' + ''' + Friend NotInheritable Class EvidenceHashingRoundTrip + Private Sub New() + End Sub + + Friend Shared Function VerifyRoundTrip _ + ( + path As String, + options As HashOptions + ) As HashRoundTripReport + + Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() + Dim normalizedOptions As HashOptions = EvidenceHashing.ResolveHashOptionsCore(detectorOptions, options) + Dim failed As HashEvidence + Dim h1 As HashEvidence + Dim originalBytes As Byte() = Array.Empty(Of Byte)() + Dim readError As String = String.Empty + Dim archiveEntries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() + Dim isArchiveInput As Boolean + Dim h2 As HashEvidence + Dim canonicalBytes As Byte() + Dim normalizedEntries As List(Of EvidenceHashingCore.NormalizedEntry) + Dim normalizeError As String + Dim h3 As HashEvidence + Dim h4 As HashEvidence = HashEvidence.CreateFailure( + HashSourceType.MaterializedFile, + "roundtrip-h4-file", + "Materialization failed.") + + Dim roundTripTempRoot As String = IO.Path.Combine( + IO.Path.GetTempPath(), + "ftd-roundtrip-" & Guid.NewGuid().ToString("N", Globalization.CultureInfo.InvariantCulture)) + + Dim targetFile As String + Dim notes As String + + If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then + failed = HashEvidence.CreateFailure(HashSourceType.FilePath, path, "Datei nicht gefunden.") + Return New HashRoundTripReport( + path, + isArchiveInput:=False, + notes:="Input file missing.", + failed, + failed, + failed, + failed) + End If + + h1 = EvidenceHashing.HashFile(path, normalizedOptions) + If Not h1.Digests.HasLogicalHash Then + failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, "h1 konnte nicht berechnet werden.") + Return New HashRoundTripReport( + path, + isArchiveInput:=False, + notes:="h1 missing logical digest.", + h1, + failed, + failed, + failed) + End If + + If Not EvidenceHashingIo.TryReadFileBounded(path, detectorOptions, originalBytes, readError) Then + failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, readError) + Return New HashRoundTripReport( + path, + isArchiveInput:=False, + notes:=readError, + h1, + failed, + failed, + failed) + End If + + isArchiveInput = ArchiveEntryCollector.TryCollectFromFile(path, detectorOptions, archiveEntries) + + If isArchiveInput Then + h2 = EvidenceHashing.HashEntries(archiveEntries, "roundtrip-h2-entries", normalizedOptions) + normalizedEntries = Nothing + normalizeError = String.Empty + If EvidenceHashingCore.TryNormalizeEntries(archiveEntries, normalizedEntries, normalizeError) Then + canonicalBytes = EvidenceHashingCore.BuildLogicalManifestBytes(normalizedEntries) + Else + canonicalBytes = Array.Empty(Of Byte)() + End If + Else + h2 = EvidenceHashing.HashBytes(originalBytes, "roundtrip-h2-bytes", normalizedOptions) + canonicalBytes = EvidenceHashingCore.CopyBytes(originalBytes) + End If + + h3 = EvidenceHashingCore.BuildEvidenceFromRawPayload( + sourceType:=HashSourceType.RawBytes, + label:="roundtrip-h3-logical-bytes", + detectedType:=FileTypeRegistry.Resolve(FileKind.Unknown), + payload:=canonicalBytes, + hashOptions:=normalizedOptions, + notes:="Canonical logical bytes hashed directly.") + + Try + IO.Directory.CreateDirectory(roundTripTempRoot) + targetFile = IO.Path.Combine( + roundTripTempRoot, + EvidenceHashingCore.NormalizeLabel(normalizedOptions.MaterializedFileName)) + + If FileMaterializer.Persist(canonicalBytes, targetFile, overwrite:=False, secureExtract:=False) Then + h4 = EvidenceHashing.HashFile(targetFile, normalizedOptions) + End If + Finally + Try + If IO.Directory.Exists(roundTripTempRoot) Then + IO.Directory.Delete(roundTripTempRoot, recursive:=True) + End If + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is IO.IOException OrElse + TypeOf ex Is IO.PathTooLongException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + LogGuard.Debug(detectorOptions.Logger, $"[HashRoundTrip] Cleanup-Fehler: {ex.Message}") + End Try + End Try + + notes = If( + isArchiveInput, + "Archive roundtrip (h1-h4) executed.", + "Raw file roundtrip (h1-h4) executed.") + + Return New HashRoundTripReport( + path, + isArchiveInput, + notes, + h1, + h2, + h3, + h4) + End Function + End Class +End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md b/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md new file mode 100644 index 00000000..48196a1e --- /dev/null +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md @@ -0,0 +1,24 @@ +# Abstractions Hashing Internal Modul + +## 1. Zweck +Dieses Verzeichnis enthaelt interne, zustandslose Hashing-Bausteine hinter der oeffentlichen Fassade `EvidenceHashing`. + +## 2. Inhalt +- `EvidenceHashingCore.vb` +- `EvidenceHashingRoundTrip.vb` +- `EvidenceHashingIo.vb` + +## 3. API und Verhalten +- Keine Public API in diesem Verzeichnis. +- Fail-closed Fehlerpfade und deterministische Digest-Bildung werden zentral gekapselt. +- Die RoundTrip-Pipeline materialisiert temporaere Dateien und bereinigt best-effort. + +## 4. Verifikation +- Nutzung wird ueber `EvidenceHashing` sowie Unit-/Integrationstests in `tests/FileTypeDetectionLib.Tests` verifiziert. + +## 5. Diagramm +N/A + +## 6. Verweise +- [Hashing-Abstractions](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Abstractions/Hashing/README.md) +- [Hashing-Contract](https://github.com/tomtastisch/FileClassifier/blob/main/docs/contracts/001_CONTRACT_HASHING.MD) diff --git a/src/FileTypeDetection/ArchiveProcessing.vb b/src/FileTypeDetection/ArchiveProcessing.vb index d15fd91e..865cdc80 100644 --- a/src/FileTypeDetection/ArchiveProcessing.vb +++ b/src/FileTypeDetection/ArchiveProcessing.vb @@ -44,6 +44,7 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As Boolean + Return FileTypeDetector.TryValidateArchive(path) End Function @@ -59,6 +60,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As Boolean + Dim opt = FileTypeOptions.GetSnapshot() Return ArchivePayloadGuard.IsSafeArchivePayload(data, opt) End Function @@ -79,8 +81,8 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String, verifyBeforeExtract As Boolean - ) _ - As IReadOnlyList(Of ZipExtractedEntry) + ) As IReadOnlyList(Of ZipExtractedEntry) + Return New FileTypeDetector().ExtractArchiveSafeToMemory(path, verifyBeforeExtract) End Function @@ -97,6 +99,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As IReadOnlyList(Of ZipExtractedEntry) + Dim opt As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() diff --git a/src/FileTypeDetection/Detection/FileTypeRegistry.vb b/src/FileTypeDetection/Detection/FileTypeRegistry.vb index cc606759..0b21a726 100644 --- a/src/FileTypeDetection/Detection/FileTypeRegistry.vb +++ b/src/FileTypeDetection/Detection/FileTypeRegistry.vb @@ -36,55 +36,53 @@ Namespace Global.Tomtastisch.FileClassifier ImmutableDictionary.CreateRange(Of FileKind, ImmutableArray(Of String))( { _ New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Jpeg, - ImmutableArray. - Create("jpe")), + Aliases("jpe")), New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Zip, - ImmutableArray. - Create("tar", - "tgz", - "gz", - "gzip", - "bz2", - "bzip2", - "xz", - "7z", - "zz", - "rar")), - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Docx, - ImmutableArray. - Create("doc", - "docm", - "docb", - "dot", - "dotm", - "dotx", - "odt", - "ott")), - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Xlsx, - ImmutableArray. - Create("xls", - "xlsm", - "xlsb", - "xlt", - "xltm", - "xltx", - "xltb", - "xlam", - "xla", - "ods", - "ots")), - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Pptx, - ImmutableArray. - Create("ppt", - "pptm", - "pot", - "potm", - "potx", - "pps", - "ppsm", - "ppsx", - "odp", - "otp")) + Aliases("tar", + "tgz", + "gz", + "gzip", + "bz2", + "bzip2", + "xz", + "7z", + "zz", + "rar")), + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Doc, + Aliases("doc", + "docx", + "docm", + "docb", + "dot", + "dotm", + "dotx", + "odt", + "ott")), + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Xls, + Aliases("xls", + "xlsx", + "xlsm", + "xlsb", + "xlt", + "xltm", + "xltx", + "xltb", + "xlam", + "xla", + "ods", + "ots")), + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Ppt, + Aliases("ppt", + "pptx", + "pptm", + "pot", + "potm", + "potx", + "pps", + "ppsm", + "ppsx", + "odp", + "otp")) }) Private Shared ReadOnly _ @@ -93,6 +91,10 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared ReadOnly MagicRules As ImmutableArray(Of MagicRule) + Private Shared Function Aliases(ParamArray values As String()) As ImmutableArray(Of String) + Return ImmutableArray.Create(values) + End Function + Shared Sub New() Dim definitions = BuildDefinitionsFromEnum() TypesByKind = BuildTypes(definitions) @@ -214,9 +216,9 @@ Namespace Global.Tomtastisch.FileClassifier End Function Friend Shared Function HasStructuredContainerDetection(kind As FileKind) As Boolean - Return kind = FileKind.Docx OrElse - kind = FileKind.Xlsx OrElse - kind = FileKind.Pptx + Return kind = FileKind.Doc OrElse + kind = FileKind.Xls OrElse + kind = FileKind.Ppt End Function Friend Shared Function HasDirectContentDetection(kind As FileKind) As Boolean diff --git a/src/FileTypeDetection/EvidenceHashing.vb b/src/FileTypeDetection/EvidenceHashing.vb index 0069c993..2a403e95 100644 --- a/src/FileTypeDetection/EvidenceHashing.vb +++ b/src/FileTypeDetection/EvidenceHashing.vb @@ -16,15 +16,16 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' ''' - ''' Verantwortung: Die Klasse erzeugt reproduzierbare Digest-Evidence für Dateien, Rohbytes und Archiv-Entries. + ''' Die Fassade orchestriert Dateieinlesung, Typdetektion und Archivsicht und delegiert die eigentliche + ''' Hash-Berechnung an interne, zustandslose Utility-Komponenten. ''' ''' - ''' Security/Compliance: Optionale HMAC-Digests verwenden den Schlüssel aus - ''' FILECLASSIFIER_HMAC_KEY_B64; fehlt der Schlüssel, wird fail-closed ohne HMAC fortgeführt. + ''' Fail-Closed-Verhalten: Ungültige Eingaben, Größenlimit-Verstöße und I/O-Fehler liefern stets ein + ''' deterministisches HashEvidence.CreateFailure(...)-Ergebnis mit unverändertem Fehltext. ''' ''' - ''' Nebenwirkungen: VerifyRoundTrip verwendet temporäre Dateisystempfade für die Materialisierung und - ''' bereinigt diese anschließend best-effort. + ''' Side-Effects: VerifyRoundTrip materialisiert kanonische Bytes in ein temporäres Dateisystemziel + ''' und bereinigt den temporären Ordner anschließend best-effort. ''' ''' Public NotInheritable Class EvidenceHashing @@ -39,7 +40,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für eine Datei mit Standard-Hashoptionen. ''' ''' - ''' Delegiert auf die Überladung mit expliziten Hashoptionen. + ''' + ''' Ablauf: + ''' 1) Delegation auf die Überladung mit expliziten Optionen, + ''' 2) Anwendung der Snapshot-Defaults aus FileTypeOptions. + ''' ''' ''' Pfad zur Eingabedatei. ''' Hash-Evidence; bei Fehlern ein fail-closed Nachweisobjekt mit Fehlerhinweis. @@ -47,6 +52,7 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As HashEvidence + Return HashFile(path, options:=Nothing) End Function @@ -54,7 +60,17 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für eine Datei. ''' ''' - ''' Archive werden über kanonisches Manifest gehasht; Nicht-Archive über direkte Payload-Digests. + ''' + ''' Ablauf: + ''' 1) Snapshot und Normalisierung der Hashoptionen, + ''' 2) Guard-Validierung (Pfad/Existenz/Bounded Read), + ''' 3) Typdetektion, + ''' 4) Archivzweig über kanonisches Manifest oder Fallback-Zweig über Rohpayload, + ''' 5) Rückgabe als deterministisches HashEvidence. + ''' + ''' + ''' Fail-Closed: Bei Guard-/I/O-Fehlern wird eine Failure-Evidence mit unverändertem Fehltext erzeugt. + ''' ''' ''' Pfad zur Eingabedatei. ''' Optionale Hashparameter; bei Nothing werden globale Defaults verwendet. @@ -63,8 +79,8 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String, options As HashOptions - ) _ - As HashEvidence + ) As HashEvidence + Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() Dim normalizedOptions As HashOptions = ResolveHashOptions(detectorOptions, options) Dim fileBytes As Byte() = Array.Empty(Of Byte)() @@ -73,17 +89,16 @@ Namespace Global.Tomtastisch.FileClassifier Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then - Return _ - HashEvidence.CreateFailure(HashSourceType.FilePath, path, "Datei nicht gefunden.") + Return Failure(HashSourceType.FilePath, path, "Datei nicht gefunden.") End If - If Not TryReadFileBounded(path, detectorOptions, fileBytes, readError) Then - Return HashEvidence.CreateFailure(HashSourceType.FilePath, path, readError) + If Not EvidenceHashingIo.TryReadFileBounded(path, detectorOptions, fileBytes, readError) Then + Return Failure(HashSourceType.FilePath, path, readError) End If detectedType = New FileTypeDetector().Detect(path) If ArchiveEntryCollector.TryCollectFromFile(path, detectorOptions, entries) Then - Return BuildEvidenceFromEntries( + Return EvidenceHashingCore.BuildEvidenceFromEntries( sourceType:=HashSourceType.FilePath, label:=IO.Path.GetFileName(path), detectedType:=detectedType, @@ -93,7 +108,7 @@ Namespace Global.Tomtastisch.FileClassifier notes:="Archive content hashed via canonical manifest.") End If - Return BuildEvidenceFromRawPayload( + Return EvidenceHashingCore.BuildEvidenceFromRawPayload( sourceType:=HashSourceType.FilePath, label:=IO.Path.GetFileName(path), detectedType:=detectedType, @@ -106,7 +121,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für Rohbytes mit Standardlabel. ''' ''' - ''' Delegiert auf die Überladung mit Label und expliziten Hashoptionen. + ''' + ''' Ablauf: + ''' 1) Delegation auf die Überladung mit Label und Optionen, + ''' 2) Verwendung des stabilen Standardlabels payload.bin. + ''' ''' ''' Zu hashende Rohbytes. ''' Hash-Evidence; bei Fehlern ein fail-closed Nachweisobjekt mit Fehlerhinweis. @@ -114,6 +133,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As HashEvidence + Return HashBytes(data, DefaultPayloadLabel, options:=Nothing) End Function @@ -121,7 +141,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für Rohbytes mit benutzerdefiniertem Label. ''' ''' - ''' Delegiert auf die Überladung mit expliziten Hashoptionen. + ''' + ''' Ablauf: + ''' 1) Delegation auf die Überladung mit expliziten Optionen, + ''' 2) Label-Normalisierung im Zielpfad. + ''' ''' ''' Zu hashende Rohbytes. ''' Fachliches Label für den Nachweis. @@ -131,6 +155,7 @@ Namespace Global.Tomtastisch.FileClassifier data As Byte(), label As String ) As HashEvidence + Return HashBytes(data, label, options:=Nothing) End Function @@ -138,7 +163,17 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für Rohbytes. ''' ''' - ''' Die Eingabe wird gegen globale Größenlimits geprüft und anschließend als Archiv- oder Rohpayload verarbeitet. + ''' + ''' Ablauf: + ''' 1) Snapshot und Normalisierung der Hashoptionen, + ''' 2) Guard-Validierung (null/MaxBytes), + ''' 3) Typdetektion, + ''' 4) Archivzweig mit kanonischem Manifest oder Rohpayload-Zweig, + ''' 5) Rückgabe als deterministisches HashEvidence. + ''' + ''' + ''' Fail-Closed: Bei Guard-Verletzung wird eine Failure-Evidence mit unverändertem Fehltext erzeugt. + ''' ''' ''' Zu hashende Rohbytes. ''' Fachliches Label für den Nachweis. @@ -149,26 +184,24 @@ Namespace Global.Tomtastisch.FileClassifier data As Byte(), label As String, options As HashOptions - ) _ - As HashEvidence + ) As HashEvidence + Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() Dim normalizedOptions As HashOptions = ResolveHashOptions(detectorOptions, options) Dim detectedType As FileType Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() If data Is Nothing Then - Return _ - HashEvidence.CreateFailure(HashSourceType.RawBytes, label, "Payload ist null.") + Return Failure(HashSourceType.RawBytes, label, "Payload ist null.") End If If CLng(data.Length) > detectorOptions.MaxBytes Then - Return _ - HashEvidence.CreateFailure(HashSourceType.RawBytes, label, "Payload größer als MaxBytes.") + Return Failure(HashSourceType.RawBytes, label, "Payload größer als MaxBytes.") End If detectedType = New FileTypeDetector().Detect(data) If ArchiveEntryCollector.TryCollectFromBytes(data, detectorOptions, entries) Then - Return BuildEvidenceFromEntries( + Return EvidenceHashingCore.BuildEvidenceFromEntries( sourceType:=HashSourceType.RawBytes, label:=NormalizeLabel(label), detectedType:=detectedType, @@ -178,7 +211,7 @@ Namespace Global.Tomtastisch.FileClassifier notes:="Archive bytes hashed via canonical manifest.") End If - Return BuildEvidenceFromRawPayload( + Return EvidenceHashingCore.BuildEvidenceFromRawPayload( sourceType:=HashSourceType.RawBytes, label:=NormalizeLabel(label), detectedType:=detectedType, @@ -191,7 +224,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis aus extrahierten Archiveinträgen mit Standardlabel. ''' ''' - ''' Delegiert auf die Überladung mit Label und expliziten Hashoptionen. + ''' + ''' Ablauf: + ''' 1) Delegation auf die Überladung mit Label und Optionen, + ''' 2) Verwendung des stabilen Labels archive-entries. + ''' ''' ''' Read-only Liste normalisierbarer Archiveinträge. ''' Hash-Evidence; bei Fehlern ein fail-closed Nachweisobjekt mit Fehlerhinweis. @@ -199,6 +236,7 @@ Namespace Global.Tomtastisch.FileClassifier ( entries As IReadOnlyList(Of ZipExtractedEntry) ) As HashEvidence + Return HashEntries(entries, "archive-entries", options:=Nothing) End Function @@ -206,7 +244,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis aus extrahierten Archiveinträgen mit benutzerdefiniertem Label. ''' ''' - ''' Delegiert auf die Überladung mit expliziten Hashoptionen. + ''' + ''' Ablauf: + ''' 1) Delegation auf die Überladung mit expliziten Optionen, + ''' 2) Label-Normalisierung im Zielpfad. + ''' ''' ''' Read-only Liste normalisierbarer Archiveinträge. ''' Fachliches Label für den Nachweis. @@ -215,8 +257,8 @@ Namespace Global.Tomtastisch.FileClassifier ( entries As IReadOnlyList(Of ZipExtractedEntry), label As String - ) _ - As HashEvidence + ) As HashEvidence + Return HashEntries(entries, label, options:=Nothing) End Function @@ -224,7 +266,16 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis aus extrahierten Archiveinträgen. ''' ''' - ''' Entry-Pfade und -Inhalte werden vor der Manifestbildung normalisiert, dedupliziert und deterministisch sortiert. + ''' + ''' Ablauf: + ''' 1) Snapshot und Normalisierung der Hashoptionen, + ''' 2) Deterministische Entry-Normalisierung (Pfad, Deduplizierung, Sortierung), + ''' 3) Manifestbildung und Digest-Berechnung, + ''' 4) Rückgabe als HashEvidence. + ''' + ''' + ''' Fail-Closed: Null-Entries, ungültige Pfade oder Duplikate nach Normalisierung führen zu Failure-Evidence. + ''' ''' ''' Read-only Liste normalisierbarer Archiveinträge. ''' Fachliches Label für den Nachweis. @@ -236,9 +287,11 @@ Namespace Global.Tomtastisch.FileClassifier label As String, options As HashOptions ) As HashEvidence - Dim projectOptions = FileTypeOptions.GetSnapshot() - Dim normalizedOptions = ResolveHashOptions(projectOptions, options) - Return BuildEvidenceFromEntries( + + Dim projectOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() + Dim normalizedOptions As HashOptions = ResolveHashOptions(projectOptions, options) + + Return EvidenceHashingCore.BuildEvidenceFromEntries( sourceType:=HashSourceType.ArchiveEntries, label:=NormalizeLabel(label), detectedType:=FileTypeRegistry.Resolve(FileKind.Zip), @@ -252,7 +305,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Führt den deterministischen h1-h4-RoundTrip mit Standard-Hashoptionen aus. ''' ''' - ''' Delegiert auf die Überladung mit expliziten Hashoptionen. + ''' + ''' Ablauf: + ''' 1) Delegation auf die Überladung mit expliziten Optionen, + ''' 2) Anwendung der Snapshot-Defaults aus FileTypeOptions. + ''' ''' ''' Pfad zur Eingabedatei. ''' RoundTrip-Bericht mit Konsistenzkennzahlen und Notes. @@ -260,6 +317,7 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As HashRoundTripReport + Return VerifyRoundTrip(path, options:=Nothing) End Function @@ -275,7 +333,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' 4) h4: Hash nach Materialisierung der kanonischen Bytes. ''' ''' - ''' Fehler werden fail-closed als Bericht mit Fehler-Evidence zurückgegeben. + ''' Side-Effects: Die Materialisierung erzeugt temporär ein Dateiziel im System-Temp-Pfad und entfernt + ''' den Temp-Ordner anschließend best-effort mit Catch-Filter-Handling. + ''' + ''' + ''' Fail-Closed: Fehlerpfade liefern einen vollständigen Bericht mit Failure-Evidences. ''' ''' ''' Pfad zur Eingabedatei. @@ -285,501 +347,60 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String, options As HashOptions - ) _ - As HashRoundTripReport - Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() - Dim normalizedOptions As HashOptions = ResolveHashOptions(detectorOptions, options) - Dim failed As HashEvidence - Dim h1 As HashEvidence - Dim originalBytes As Byte() = Array.Empty(Of Byte)() - Dim readError As String = String.Empty - Dim archiveEntries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() - Dim isArchiveInput As Boolean - Dim h2 As HashEvidence - Dim canonicalBytes As Byte() - Dim normalizedEntries As List(Of NormalizedEntry) - Dim normalizeError As String - Dim h3 As HashEvidence - Dim h4 As HashEvidence = HashEvidence.CreateFailure( - HashSourceType.MaterializedFile, - "roundtrip-h4-file", - "Materialization failed." - ) - Dim roundTripTempRoot As String = IO.Path.Combine( - IO.Path.GetTempPath(), - "ftd-roundtrip-" & - Guid.NewGuid().ToString("N", Globalization.CultureInfo.InvariantCulture) - ) - Dim targetFile As String - Dim notes As String - - If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then - failed = HashEvidence.CreateFailure(HashSourceType.FilePath, path, "Datei nicht gefunden.") - Return _ - New HashRoundTripReport(path, isArchiveInput:=False, h1:=failed, h2:=failed, h3:=failed, h4:=failed, - notes:="Input file missing.") - End If - - h1 = HashFile(path, normalizedOptions) - If Not h1.Digests.HasLogicalHash Then - failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, "h1 konnte nicht berechnet werden.") - Return _ - New HashRoundTripReport(path, isArchiveInput:=False, h1:=h1, h2:=failed, h3:=failed, h4:=failed, - notes:="h1 missing logical digest.") - End If - - If Not TryReadFileBounded(path, detectorOptions, originalBytes, readError) Then - failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, readError) - Return _ - New HashRoundTripReport(path, isArchiveInput:=False, h1:=h1, h2:=failed, h3:=failed, h4:=failed, - notes:=readError) - End If - - isArchiveInput = ArchiveEntryCollector.TryCollectFromFile(path, detectorOptions, archiveEntries) - - If isArchiveInput Then - h2 = HashEntries(archiveEntries, "roundtrip-h2-entries", normalizedOptions) - normalizedEntries = Nothing - normalizeError = String.Empty - If TryNormalizeEntries(archiveEntries, normalizedEntries, normalizeError) Then - canonicalBytes = BuildLogicalManifestBytes(normalizedEntries) - Else - canonicalBytes = Array.Empty(Of Byte)() - End If - Else - h2 = HashBytes(originalBytes, "roundtrip-h2-bytes", normalizedOptions) - canonicalBytes = CopyBytes(originalBytes) - End If + ) As HashRoundTripReport - h3 = BuildEvidenceFromRawPayload( - sourceType:=HashSourceType.RawBytes, - label:="roundtrip-h3-logical-bytes", - detectedType:=FileTypeRegistry.Resolve(FileKind.Unknown), - payload:=canonicalBytes, - hashOptions:=normalizedOptions, - notes:="Canonical logical bytes hashed directly.") - - Try - IO.Directory.CreateDirectory(roundTripTempRoot) - targetFile = IO.Path.Combine( - roundTripTempRoot, - NormalizeLabel(normalizedOptions.MaterializedFileName) - ) - - If FileMaterializer.Persist(canonicalBytes, targetFile, overwrite:=False, secureExtract:=False) Then - h4 = HashFile(targetFile, normalizedOptions) - End If - - Finally - Try - If IO.Directory.Exists(roundTripTempRoot) Then - IO.Directory.Delete(roundTripTempRoot, recursive:=True) - End If - - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse - TypeOf ex Is IO.IOException OrElse - TypeOf ex Is IO.PathTooLongException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - LogGuard.Debug(detectorOptions.Logger, $"[HashRoundTrip] Cleanup-Fehler: {ex.Message}") - End Try - End Try - - notes = If( - isArchiveInput, - "Archive roundtrip (h1-h4) executed.", - "Raw file roundtrip (h1-h4) executed." - ) - - Return New HashRoundTripReport(path, isArchiveInput, h1, h2, h3, h4, notes) + Return EvidenceHashingRoundTrip.VerifyRoundTrip(path, options) End Function - Private Shared Function BuildEvidenceFromEntries( - sourceType As HashSourceType, - label As String, - detectedType As FileType, - compressedBytes As Byte(), - entries As IReadOnlyList(Of ZipExtractedEntry), - hashOptions As HashOptions, - notes As String - ) As HashEvidence - - Dim normalizedEntries As List(Of NormalizedEntry) = Nothing - Dim normalizeError As String = String.Empty - Dim logicalBytes As Byte() - Dim logicalSha As String - Dim fastLogical As String - Dim hmacLogical As String - Dim physicalSha As String - Dim fastPhysical As String - Dim hmacPhysical As String - Dim hasPhysical As Boolean - Dim secureNote As String - Dim hmacKey As Byte() - Dim hasHmacKey As Boolean - Dim firstEntry As ZipExtractedEntry = Nothing - Dim digestSet As HashDigestSet - Dim combinedNotes As String - Dim totalBytes As Long - Dim persistedCompressed As Byte() - Dim persistedLogical As Byte() - - If Not TryNormalizeEntries(entries, normalizedEntries, normalizeError) Then - Return HashEvidence.CreateFailure(sourceType, label, normalizeError) - End If + Friend Shared Function ResolveHashOptionsCore _ + ( + projectOptions As FileTypeProjectOptions, + options As HashOptions + ) As HashOptions - logicalBytes = BuildLogicalManifestBytes(normalizedEntries) - logicalSha = ComputeSha256Hex(logicalBytes) - fastLogical = ComputeFastHash(logicalBytes, hashOptions) - hmacLogical = String.Empty - physicalSha = String.Empty - fastPhysical = String.Empty - hmacPhysical = String.Empty - hasPhysical = False - secureNote = String.Empty - hmacKey = Array.Empty(Of Byte)() - hasHmacKey = False - - If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then - hasHmacKey = TryResolveHmacKey(hmacKey, secureNote) - If hasHmacKey Then - hmacLogical = ComputeHmacSha256Hex(hmacKey, logicalBytes) - End If - End If + Return ResolveHashOptions(projectOptions, options) + End Function - If compressedBytes IsNot Nothing AndAlso compressedBytes.Length > 0 Then - physicalSha = ComputeSha256Hex(compressedBytes) - fastPhysical = ComputeFastHash(compressedBytes, hashOptions) - hasPhysical = True - If hasHmacKey Then - hmacPhysical = ComputeHmacSha256Hex(hmacKey, compressedBytes) - End If - End If + Friend Shared Function LogicalManifestVersionCore() As String + Return LogicalManifestVersion + End Function - If normalizedEntries.Count > 0 Then - firstEntry = New ZipExtractedEntry(normalizedEntries(0).RelativePath, normalizedEntries(0).Content) - End If + Friend Shared Function DefaultPayloadLabelCore() As String + Return DefaultPayloadLabel + End Function - digestSet = New HashDigestSet( - physicalSha256:=physicalSha, - logicalSha256:=logicalSha, - fastPhysicalXxHash3:=fastPhysical, - fastLogicalXxHash3:=fastLogical, - hmacPhysicalSha256:=hmacPhysical, - hmacLogicalSha256:=hmacLogical, - hasPhysicalHash:=hasPhysical, - hasLogicalHash:=True) - - combinedNotes = AppendNoteIfAny(notes, secureNote) - - totalBytes = 0 - For Each entry In normalizedEntries - totalBytes += CLng(entry.Content.LongLength) - Next - - persistedCompressed = - If(hashOptions.IncludePayloadCopies, CopyBytes(compressedBytes), Array.Empty(Of Byte)()) - persistedLogical = - If(hashOptions.IncludePayloadCopies, CopyBytes(logicalBytes), Array.Empty(Of Byte)()) - - Return New HashEvidence( - sourceType:=sourceType, - label:=NormalizeLabel(label), - detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), - entry:=firstEntry, - compressedBytes:=persistedCompressed, - uncompressedBytes:=persistedLogical, - entryCount:=normalizedEntries.Count, - totalUncompressedBytes:=totalBytes, - digests:=digestSet, - notes:=combinedNotes) + Friend Shared Function HmacKeyEnvVarB64Core() As String + Return HmacKeyEnvVarB64 End Function - Private Shared Function BuildEvidenceFromRawPayload( + Private Shared Function Failure _ + ( sourceType As HashSourceType, label As String, - detectedType As FileType, - payload As Byte(), - hashOptions As HashOptions, notes As String ) As HashEvidence - Dim safePayload As Byte() = If(payload, Array.Empty(Of Byte)()) - Dim physicalSha As String = ComputeSha256Hex(safePayload) - Dim logicalSha As String = physicalSha - Dim fastPhysical As String = ComputeFastHash(safePayload, hashOptions) - Dim fastLogical As String = fastPhysical - Dim hmacPhysical As String = String.Empty - Dim hmacLogical As String = String.Empty - Dim secureNote As String = String.Empty - Dim hmacKey As Byte() = Array.Empty(Of Byte)() - Dim persistedPayload As Byte() - Dim entry As ZipExtractedEntry - Dim digestSet As HashDigestSet - Dim combinedNotes As String - - If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then - If TryResolveHmacKey(hmacKey, secureNote) Then - hmacPhysical = ComputeHmacSha256Hex(hmacKey, safePayload) - hmacLogical = hmacPhysical - End If - End If - - persistedPayload = If(hashOptions.IncludePayloadCopies, CopyBytes(safePayload), Array.Empty(Of Byte)()) - entry = New ZipExtractedEntry(DefaultPayloadLabel, safePayload) - - digestSet = New HashDigestSet( - physicalSha256:=physicalSha, - logicalSha256:=logicalSha, - fastPhysicalXxHash3:=fastPhysical, - fastLogicalXxHash3:=fastLogical, - hmacPhysicalSha256:=hmacPhysical, - hmacLogicalSha256:=hmacLogical, - hasPhysicalHash:=True, - hasLogicalHash:=True) - - combinedNotes = AppendNoteIfAny(notes, secureNote) - - Return New HashEvidence( - sourceType:=sourceType, - label:=NormalizeLabel(label), - detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), - entry:=entry, - compressedBytes:=persistedPayload, - uncompressedBytes:=persistedPayload, - entryCount:=1, - totalUncompressedBytes:=safePayload.LongLength, - digests:=digestSet, - notes:=combinedNotes) - End Function - - Private Shared Function TryNormalizeEntries( - entries As IReadOnlyList(Of ZipExtractedEntry), - ByRef normalizedEntries As List(Of NormalizedEntry), - ByRef errorMessage As String - ) As Boolean - - Dim seen As HashSet(Of String) = New HashSet(Of String)(StringComparer.Ordinal) - Dim normalizedPath As String - Dim payload As Byte() - - normalizedEntries = New List(Of NormalizedEntry)() - errorMessage = String.Empty - - If entries Is Nothing Then - errorMessage = "Entries sind null." - Return False - End If - - For Each entry In entries - If entry Is Nothing Then - errorMessage = "Entry ist null." - Return False - End If - - normalizedPath = Nothing - If Not TryNormalizeEntryPath(entry.RelativePath, normalizedPath) Then - errorMessage = $"Ungültiger Entry-Pfad: '{entry.RelativePath}'." - Return False - End If - - If Not seen.Add(normalizedPath) Then - errorMessage = $"Doppelter Entry-Pfad nach Normalisierung: '{normalizedPath}'." - Return False - End If - - payload = If(entry.Content.IsDefaultOrEmpty, Array.Empty(Of Byte)(), entry.Content.ToArray()) - normalizedEntries.Add(New NormalizedEntry(normalizedPath, payload)) - Next - - normalizedEntries.Sort(Function(a, b) StringComparer.Ordinal.Compare(a.RelativePath, b.RelativePath)) - Return True - End Function - - Private Shared Function TryNormalizeEntryPath(rawPath As String, ByRef normalizedPath As String) As Boolean - Dim isDirectory = False - Return _ - ArchiveEntryPathPolicy.TryNormalizeRelativePath(rawPath, allowDirectoryMarker:=False, normalizedPath, - isDirectory) - End Function - - Private Shared Function BuildLogicalManifestBytes(entries As IReadOnlyList(Of NormalizedEntry)) As Byte() - Dim versionBytes As Byte() - Dim pathBytes As Byte() - Dim contentHash As Byte() - - Using ms As New IO.MemoryStream() - Using writer As New IO.BinaryWriter(ms, Text.Encoding.UTF8, leaveOpen:=True) - versionBytes = Text.Encoding.UTF8.GetBytes(LogicalManifestVersion) - writer.Write(versionBytes.Length) - writer.Write(versionBytes) - writer.Write(entries.Count) - - For Each entry In entries - pathBytes = Text.Encoding.UTF8.GetBytes(entry.RelativePath) - contentHash = HashPrimitives.Current.Sha256.ComputeHash(entry.Content) - writer.Write(pathBytes.Length) - writer.Write(pathBytes) - writer.Write(CLng(entry.Content.LongLength)) - writer.Write(contentHash.Length) - writer.Write(contentHash) - Next - End Using - Return ms.ToArray() - End Using - End Function - - Private Shared Function ComputeSha256Hex(payload As Byte()) As String - Dim data = If(payload, Array.Empty(Of Byte)()) - Return HashPrimitives.Current.Sha256.ComputeHashHex(data) + Return HashEvidence.CreateFailure(sourceType, label, notes) End Function - Private Shared Function TryResolveHmacKey(ByRef key As Byte(), ByRef note As String) As Boolean - Dim b64 As String - - key = Array.Empty(Of Byte)() - note = String.Empty - - b64 = Environment.GetEnvironmentVariable(HmacKeyEnvVarB64) - If String.IsNullOrWhiteSpace(b64) Then - note = $"Secure hashing requested but env var '{HmacKeyEnvVarB64}' is missing; HMAC digests omitted." - Return False - End If - - Try - key = Convert.FromBase64String(b64.Trim()) - If key Is Nothing OrElse key.Length = 0 Then - key = Array.Empty(Of Byte)() - note = $"Secure hashing requested but env var '{HmacKeyEnvVarB64}' is empty; HMAC digests omitted." - Return False - End If - Return True - Catch ex As Exception When _ - TypeOf ex Is FormatException OrElse - TypeOf ex Is ArgumentException - key = Array.Empty(Of Byte)() - note = $"Secure hashing requested but env var '{HmacKeyEnvVarB64}' is invalid Base64; HMAC digests omitted." - Return False - End Try - End Function - - Private Shared Function ComputeHmacSha256Hex(key As Byte(), payload As Byte()) As String - Dim safeKey = If(key, Array.Empty(Of Byte)()) - Dim data = If(payload, Array.Empty(Of Byte)()) - Using hmac As New Security.Cryptography.HMACSHA256(safeKey) - Return HashPrimitives.Current.HexCodec.EncodeLowerHex(hmac.ComputeHash(data)) - End Using - End Function - - Private Shared Function ComputeFastHash(payload As Byte(), options As HashOptions) As String - Dim data As Byte() - - If options Is Nothing OrElse Not options.IncludeFastHash Then Return String.Empty - data = If(payload, Array.Empty(Of Byte)()) - Return HashPrimitives.Current.FastHash64.ComputeHashHex(data) - End Function - - Private Shared Function AppendNoteIfAny(baseNotes As String, toAppend As String) As String - Dim left = If(baseNotes, String.Empty).Trim() - Dim right = If(toAppend, String.Empty).Trim() - If right.Length = 0 Then Return left - If left.Length = 0 Then Return right - Return left & " " & right - End Function - - Private Shared Function NormalizeLabel(label As String) As String - Dim normalized = If(label, String.Empty).Trim() - If normalized.Length = 0 Then Return DefaultPayloadLabel - Return normalized - End Function - - Private Shared Function CopyBytes(data As Byte()) As Byte() - Dim copy As Byte() + Private Shared Function NormalizeLabel _ + ( + label As String + ) As String - If data Is Nothing OrElse data.Length = 0 Then Return Array.Empty(Of Byte)() - copy = New Byte(data.Length - 1) {} - Buffer.BlockCopy(data, 0, copy, 0, data.Length) - Return copy + Return EvidenceHashingCore.NormalizeLabel(label) End Function - Private Shared Function ResolveHashOptions( - projectOptions As FileTypeProjectOptions, - options As HashOptions - ) As HashOptions + Private Shared Function ResolveHashOptions _ + ( + projectOptions As FileTypeProjectOptions, + options As HashOptions + ) As HashOptions If options IsNot Nothing Then Return HashOptions.Normalize(options) - If projectOptions IsNot Nothing Then _ - Return HashOptions.Normalize(projectOptions.DeterministicHash) - Return HashOptions.Normalize(Nothing) - End Function + If projectOptions IsNot Nothing Then Return HashOptions.Normalize(projectOptions.DeterministicHash) - Private Shared Function TryReadFileBounded(path As String, detectorOptions As FileTypeProjectOptions, - ByRef bytes As Byte(), ByRef errorMessage As String) As Boolean - Dim fi As IO.FileInfo - - bytes = Array.Empty(Of Byte)() - errorMessage = String.Empty - If String.IsNullOrWhiteSpace(path) Then - errorMessage = "Pfad ist leer." - Return False - End If - - If detectorOptions Is Nothing Then - errorMessage = "Optionen fehlen." - Return False - End If - - Try - fi = New IO.FileInfo(path) - If Not fi.Exists Then - errorMessage = "Datei existiert nicht." - Return False - End If - - If fi.Length > detectorOptions.MaxBytes Then - errorMessage = "Datei größer als MaxBytes." - Return False - End If - - Using _ - fs As _ - New IO.FileStream(path, IO.FileMode.Open, IO.FileAccess.Read, IO.FileShare.Read, - InternalIoDefaults.FileStreamBufferSize, IO.FileOptions.SequentialScan) - Using ms As New IO.MemoryStream(CInt(Math.Min(Math.Max(fi.Length, 0), Integer.MaxValue))) - StreamBounds.CopyBounded(fs, ms, detectorOptions.MaxBytes) - bytes = ms.ToArray() - End Using - End Using - Return True - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse - TypeOf ex Is IO.IOException OrElse - TypeOf ex Is IO.InvalidDataException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - Return SetReadFileError(ex, errorMessage) - End Try - End Function - - Private Shared Function SetReadFileError(ex As Exception, ByRef errorMessage As String) As Boolean - errorMessage = $"Datei konnte nicht gelesen werden: {ex.Message}" - Return False + Return HashOptions.Normalize(Nothing) End Function - - ''' - ''' Interne Hilfsklasse NormalizedEntry zur kapselnden Umsetzung von Guard-, I/O- und Policy-Logik. - ''' - Private NotInheritable Class NormalizedEntry - Friend ReadOnly Property RelativePath As String - Friend ReadOnly Property Content As Byte() - - Friend Sub New(relativePath As String, content As Byte()) - Me.RelativePath = If(relativePath, String.Empty) - Me.Content = If(content, Array.Empty(Of Byte)()) - End Sub - End Class End Class End Namespace diff --git a/src/FileTypeDetection/FileTypeDetectionLib.vbproj b/src/FileTypeDetection/FileTypeDetectionLib.vbproj index 8e009026..f61a0d12 100644 --- a/src/FileTypeDetection/FileTypeDetectionLib.vbproj +++ b/src/FileTypeDetection/FileTypeDetectionLib.vbproj @@ -7,8 +7,8 @@ true false Tomtastisch.FileClassifier - 5.2.1 - 5.2.1 + 6.0.0 + 6.0.0 tomtastisch Deterministic file type and MIME detection with fail-closed archive safety checks, secure extraction primitives, and reproducible hashing evidence for .NET. filetype;mime;detection;magic-bytes;sniffing;archive;zip;tar;7z;rar;zipslip;security;hashing;sha256;deterministic;dotnet;netstandard2.0;net8;net10 diff --git a/src/FileTypeDetection/FileTypeDetector.vb b/src/FileTypeDetection/FileTypeDetector.vb index e08ad769..57caa6b7 100644 --- a/src/FileTypeDetection/FileTypeDetector.vb +++ b/src/FileTypeDetection/FileTypeDetector.vb @@ -796,9 +796,7 @@ Namespace Global.Tomtastisch.FileClassifier If refined.Kind <> FileKind.Unknown Then WarnIfNoDirectContentDetection(refined.Kind, opt) - trace.UsedStructuredRefinement = - (refined.Kind = FileKind.Docx OrElse refined.Kind = FileKind.Xlsx OrElse - refined.Kind = FileKind.Pptx) + trace.UsedStructuredRefinement = FileTypeRegistry.HasStructuredContainerDetection(refined.Kind) trace.ReasonCode = If(trace.UsedStructuredRefinement, ReasonArchiveStructuredRefined, ReasonArchiveRefined) Return refined diff --git a/src/FileTypeDetection/FileTypeOptions.vb b/src/FileTypeDetection/FileTypeOptions.vb index a0e97180..bd2e0a67 100644 --- a/src/FileTypeDetection/FileTypeOptions.vb +++ b/src/FileTypeDetection/FileTypeOptions.vb @@ -204,7 +204,11 @@ Namespace Global.Tomtastisch.FileClassifier Return Text.Json.JsonSerializer.Serialize(dto) End Function - Friend Shared Function LoadOptionsFromPath(path As String) As Boolean + Friend Shared Function LoadOptionsFromPath _ + ( + path As String + ) As Boolean + If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then Return False If Not path.EndsWith(".json", StringComparison.OrdinalIgnoreCase) Then Return False @@ -234,60 +238,91 @@ Namespace Global.Tomtastisch.FileClassifier End SyncLock End Sub - Private Shared Function SafeInt(el As Text.Json.JsonElement, fallback As Integer) As Integer + Private Shared Function SafeInt _ + ( + el As Text.Json.JsonElement, + fallback As Integer + ) As Integer + Dim v As Integer If el.ValueKind = Text.Json.JsonValueKind.Number AndAlso el.TryGetInt32(v) Then Return v Return fallback End Function - Private Shared Function SafeLong(el As Text.Json.JsonElement, fallback As Long) As Long + Private Shared Function SafeLong _ + ( + el As Text.Json.JsonElement, + fallback As Long + ) As Long + Dim v As Long If el.ValueKind = Text.Json.JsonValueKind.Number AndAlso el.TryGetInt64(v) Then Return v Return fallback End Function - Private Shared Function ParsePositiveInt(el As Text.Json.JsonElement, fallback As Integer, - name As String, - logger As Microsoft.Extensions.Logging.ILogger) As Integer + Private Shared Function ParsePositiveInt _ + ( + el As Text.Json.JsonElement, + fallback As Integer, + name As String, + logger As Microsoft.Extensions.Logging.ILogger + ) As Integer + Dim v = SafeInt(el, fallback) If v > 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParseNonNegativeInt(el As Text.Json.JsonElement, fallback As Integer, - name As String, - logger As Microsoft.Extensions.Logging.ILogger) As Integer + Private Shared Function ParseNonNegativeInt _ + ( + el As Text.Json.JsonElement, + fallback As Integer, + name As String, + logger As Microsoft.Extensions.Logging.ILogger + ) As Integer + Dim v = SafeInt(el, fallback) If v >= 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParsePositiveLong(el As Text.Json.JsonElement, fallback As Long, - name As String, - logger As Microsoft.Extensions.Logging.ILogger) _ - As Long + Private Shared Function ParsePositiveLong _ + ( + el As Text.Json.JsonElement, + fallback As Long, + name As String, + logger As Microsoft.Extensions.Logging.ILogger + ) As Long + Dim v = SafeLong(el, fallback) If v > 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParseBoolean(el As Text.Json.JsonElement, fallback As Boolean, - name As String, - logger As Microsoft.Extensions.Logging.ILogger) _ - As Boolean + Private Shared Function ParseBoolean _ + ( + el As Text.Json.JsonElement, + fallback As Boolean, + name As String, + logger As Microsoft.Extensions.Logging.ILogger + ) As Boolean + If el.ValueKind = Text.Json.JsonValueKind.True Then Return True If el.ValueKind = Text.Json.JsonValueKind.False Then Return False LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParseString(el As Text.Json.JsonElement, fallback As String, - name As String, - logger As Microsoft.Extensions.Logging.ILogger) _ - As String + Private Shared Function ParseString _ + ( + el As Text.Json.JsonElement, + fallback As String, + name As String, + logger As Microsoft.Extensions.Logging.ILogger + ) As String Dim value As String @@ -299,7 +334,8 @@ Namespace Global.Tomtastisch.FileClassifier Return fallback End Function - Private Shared Sub TryParseHashOptions( + Private Shared Sub TryParseHashOptions _ + ( el As Text.Json.JsonElement, ByRef includePayloadCopies As Boolean, ByRef includeFastHash As Boolean, @@ -350,9 +386,10 @@ Namespace Global.Tomtastisch.FileClassifier Next End Sub - Private Shared Function Snapshot( + Private Shared Function Snapshot _ + ( opt As FileTypeProjectOptions - ) As FileTypeProjectOptions + ) As FileTypeProjectOptions Dim snap As FileTypeProjectOptions diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index b9faba97..32b3d19a 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -235,8 +235,13 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function TryDescribeBytes(data As Byte(), opt As FileTypeProjectOptions, - ByRef descriptor As ArchiveDescriptor) As Boolean + Friend Shared Function TryDescribeBytes _ + ( + data As Byte(), + opt As FileTypeProjectOptions, + ByRef descriptor As ArchiveDescriptor + ) As Boolean + descriptor = ArchiveDescriptor.UnknownDescriptor() If data Is Nothing OrElse data.Length = 0 Then Return False If opt Is Nothing Then Return False @@ -260,8 +265,13 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function TryDescribeStream(stream As Stream, opt As FileTypeProjectOptions, - ByRef descriptor As ArchiveDescriptor) As Boolean + Friend Shared Function TryDescribeStream _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + ByRef descriptor As ArchiveDescriptor + ) As Boolean + Dim mapped As ArchiveContainerType Dim gzipWrapped As Boolean @@ -313,7 +323,11 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function MapArchiveType(type As SharpCompress.Common.ArchiveType) As ArchiveContainerType + Friend Shared Function MapArchiveType _ + ( + type As SharpCompress.Common.ArchiveType + ) As ArchiveContainerType + Select Case type Case SharpCompress.Common.ArchiveType.Zip Return ArchiveContainerType.Zip @@ -339,18 +353,26 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function ValidateArchiveStream(stream As Stream, opt As FileTypeProjectOptions, depth As Integer, - descriptor As ArchiveDescriptor) As Boolean + Friend Shared Function ValidateArchiveStream _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + descriptor As ArchiveDescriptor + ) As Boolean + Return ProcessArchiveStream(stream, opt, depth, descriptor, Nothing) End Function - Friend Shared Function ProcessArchiveStream( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - descriptor As ArchiveDescriptor, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean + Friend Shared Function ProcessArchiveStream _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + descriptor As ArchiveDescriptor, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean + Dim backend As IArchiveBackend If Not StreamGuard.IsReadable(stream) Then Return False @@ -373,26 +395,37 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function TryExtractArchiveStreamToMemory(stream As Stream, opt As FileTypeProjectOptions) _ - As IReadOnlyList(Of ZipExtractedEntry) + Friend Shared Function TryExtractArchiveStreamToMemory _ + ( + stream As Stream, + opt As FileTypeProjectOptions + ) As IReadOnlyList(Of ZipExtractedEntry) + Dim descriptor As ArchiveDescriptor = Nothing Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() If Not ArchiveTypeResolver.TryDescribeStream(stream, opt, descriptor) Then Return emptyResult + Return TryExtractArchiveStreamToMemory(stream, opt, descriptor) End Function - Friend Shared Function TryExtractArchiveStreamToMemory(stream As Stream, opt As FileTypeProjectOptions, - descriptor As ArchiveDescriptor) _ - As IReadOnlyList(Of ZipExtractedEntry) + Friend Shared Function TryExtractArchiveStreamToMemory _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + descriptor As ArchiveDescriptor + ) As IReadOnlyList(Of ZipExtractedEntry) + Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() - Dim entries As List(Of ZipExtractedEntry) = New List(Of ZipExtractedEntry)() + Dim entries As List(Of ZipExtractedEntry) = New List(Of ZipExtractedEntry)() Dim ok As Boolean If Not StreamGuard.IsReadable(stream) Then Return emptyResult If opt Is Nothing Then Return emptyResult - If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then _ + + If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then Return emptyResult - + End if + Try StreamGuard.RewindToStart(stream) ok = ArchiveProcessingEngine.ProcessArchiveStream( @@ -409,6 +442,7 @@ Namespace Global.Tomtastisch.FileClassifier End If Return entries.AsReadOnly() + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -424,16 +458,26 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function TryExtractArchiveStream(stream As Stream, destinationDirectory As String, - opt As FileTypeProjectOptions) As Boolean + Friend Shared Function TryExtractArchiveStream _ + ( + stream As Stream, + destinationDirectory As String, + opt As FileTypeProjectOptions + ) As Boolean + Dim descriptor As ArchiveDescriptor = Nothing If Not ArchiveTypeResolver.TryDescribeStream(stream, opt, descriptor) Then Return False Return TryExtractArchiveStream(stream, destinationDirectory, opt, descriptor) End Function - Friend Shared Function TryExtractArchiveStream(stream As Stream, destinationDirectory As String, - opt As FileTypeProjectOptions, descriptor As ArchiveDescriptor) _ - As Boolean + Friend Shared Function TryExtractArchiveStream _ + ( + stream As Stream, + destinationDirectory As String, + opt As FileTypeProjectOptions, + descriptor As ArchiveDescriptor + ) As Boolean + Dim destinationFull As String Dim parent As String Dim stageDir As String @@ -483,6 +527,7 @@ Namespace Global.Tomtastisch.FileClassifier Directory.Move(stageDir, destinationFull) Return True + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -494,6 +539,7 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf ex Is ObjectDisposedException LogGuard.Debug(opt.Logger, $"[ArchiveExtract] Fehler: {ex.Message}") Return False + Finally If Directory.Exists(stageDir) Then Try @@ -510,8 +556,13 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function ExtractEntryToDirectory(entry As IArchiveEntryModel, destinationPrefix As String, - opt As FileTypeProjectOptions) As Boolean + Private Shared Function ExtractEntryToDirectory _ + ( + entry As IArchiveEntryModel, + destinationPrefix As String, + opt As FileTypeProjectOptions + ) As Boolean + Dim entryName As String = Nothing Dim isDirectory As Boolean = False Dim targetPath As String @@ -567,6 +618,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using End Using Return True + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -581,8 +633,13 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function ExtractEntryToMemory(entry As IArchiveEntryModel, entries As List(Of ZipExtractedEntry), - opt As FileTypeProjectOptions) As Boolean + Private Shared Function ExtractEntryToMemory _ + ( + entry As IArchiveEntryModel, + entries As List(Of ZipExtractedEntry), + opt As FileTypeProjectOptions + ) As Boolean + Dim entryName As String = Nothing Dim isDirectory As Boolean = False Dim payload As Byte() @@ -623,9 +680,14 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function TryGetSafeEntryName(entry As IArchiveEntryModel, opt As FileTypeProjectOptions, - ByRef safeEntryName As String, ByRef isDirectory As Boolean) _ - As Boolean + Private Shared Function TryGetSafeEntryName _ + ( + entry As IArchiveEntryModel, + opt As FileTypeProjectOptions, + ByRef safeEntryName As String, + ByRef isDirectory As Boolean + ) As Boolean + Dim entryName As String = Nothing Dim normalizedDirectoryFlag As Boolean = False @@ -639,20 +701,28 @@ Namespace Global.Tomtastisch.FileClassifier Return False End If - If _ - Not _ - ArchiveEntryPathPolicy.TryNormalizeRelativePath(entry.RelativePath, allowDirectoryMarker:=True, - entryName, normalizedDirectoryFlag) Then + If Not ArchiveEntryPathPolicy.TryNormalizeRelativePath( + entry.RelativePath, + allowDirectoryMarker:=True, + entryName, + normalizedDirectoryFlag + ) Then + Return False End If safeEntryName = entryName - isDirectory = entry.IsDirectory OrElse normalizedDirectoryFlag OrElse - entryName.EndsWith("/"c) + isDirectory = entry.IsDirectory OrElse normalizedDirectoryFlag OrElse entryName.EndsWith("/"c) + Return True End Function - Private Shared Function ValidateEntrySize(entry As IArchiveEntryModel, opt As FileTypeProjectOptions) As Boolean + Private Shared Function ValidateEntrySize _ + ( + entry As IArchiveEntryModel, + opt As FileTypeProjectOptions + ) As Boolean + Dim sizeValue As Long? If entry Is Nothing OrElse opt Is Nothing Then Return False @@ -660,9 +730,7 @@ Namespace Global.Tomtastisch.FileClassifier sizeValue = entry.UncompressedSize If sizeValue.HasValue Then - If sizeValue.Value < 0 Then - Return opt.AllowUnknownArchiveEntrySize - End If + If sizeValue.Value < 0 Then Return opt.AllowUnknownArchiveEntrySize If sizeValue.Value > opt.MaxZipEntryUncompressedBytes Then Return False Return True @@ -671,12 +739,17 @@ Namespace Global.Tomtastisch.FileClassifier Return opt.AllowUnknownArchiveEntrySize End Function - Private Shared Function EnsureTrailingSeparator(dirPath As String) As String + Private Shared Function EnsureTrailingSeparator _ + ( + dirPath As String + ) As String + If String.IsNullOrEmpty(dirPath) Then Return Path.DirectorySeparatorChar.ToString() - If dirPath.EndsWith(Path.DirectorySeparatorChar) OrElse dirPath.EndsWith(Path.AltDirectorySeparatorChar) _ - Then + If dirPath.EndsWith(Path.DirectorySeparatorChar) _ + OrElse dirPath.EndsWith(Path.AltDirectorySeparatorChar) Then Return dirPath End If + Return dirPath & Path.DirectorySeparatorChar End Function End Class @@ -688,26 +761,38 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function TryCollectFromFile(path As String, opt As FileTypeProjectOptions, - ByRef entries As IReadOnlyList(Of ZipExtractedEntry)) As Boolean + Friend Shared Function TryCollectFromFile _ + ( + path As String, + opt As FileTypeProjectOptions, + ByRef entries As IReadOnlyList(Of ZipExtractedEntry) + ) As Boolean + Dim descriptor As ArchiveDescriptor = Nothing entries = Array.Empty(Of ZipExtractedEntry)() If String.IsNullOrWhiteSpace(path) OrElse Not File.Exists(path) Then Return False If opt Is Nothing Then Return False - Try - Using _ - fs As _ - New FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, - InternalIoDefaults.FileStreamBufferSize, FileOptions.SequentialScan) + Try + Using fs As New FileStream( + path, FileMode.Open, + FileAccess.Read, + FileShare.Read, + InternalIoDefaults.FileStreamBufferSize, + FileOptions.SequentialScan + ) + If Not ArchiveTypeResolver.TryDescribeStream(fs, opt, descriptor) Then Return False StreamGuard.RewindToStart(fs) + If Not ArchiveSafetyGate.IsArchiveSafeStream(fs, opt, descriptor, depth:=0) Then Return False StreamGuard.RewindToStart(fs) + entries = ArchiveExtractor.TryExtractArchiveStreamToMemory(fs, opt, descriptor) Return entries IsNot Nothing AndAlso entries.Count > 0 End Using + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -719,12 +804,18 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf ex Is ObjectDisposedException LogGuard.Debug(opt.Logger, $"[ArchiveCollect] Datei-Fehler: {ex.Message}") entries = Array.Empty(Of ZipExtractedEntry)() + Return False End Try End Function - Friend Shared Function TryCollectFromBytes(data As Byte(), opt As FileTypeProjectOptions, - ByRef entries As IReadOnlyList(Of ZipExtractedEntry)) As Boolean + Friend Shared Function TryCollectFromBytes _ + ( + data As Byte(), + opt As FileTypeProjectOptions, + ByRef entries As IReadOnlyList(Of ZipExtractedEntry) + ) As Boolean + Dim descriptor As ArchiveDescriptor = Nothing entries = Array.Empty(Of ZipExtractedEntry)() @@ -734,10 +825,12 @@ Namespace Global.Tomtastisch.FileClassifier Try If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False If Not ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) Then Return False + Using ms As New MemoryStream(data, writable:=False) entries = ArchiveExtractor.TryExtractArchiveStreamToMemory(ms, opt, descriptor) Return entries IsNot Nothing AndAlso entries.Count > 0 End Using + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -773,13 +866,15 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Verarbeitet ein Archiv über SharpCompress fail-closed und optionalen Entry-Callback. ''' - Public Function Process( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - containerTypeValue As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean Implements IArchiveBackend.Process + Public Function Process _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + containerTypeValue As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean Implements IArchiveBackend.Process + Dim mapped As ArchiveContainerType Dim entries As List(Of SharpCompress.Archives.IArchiveEntry) Dim nestedResult As Boolean = False @@ -803,19 +898,31 @@ Namespace Global.Tomtastisch.FileClassifier If containerTypeValue = ArchiveContainerType.GZip AndAlso Not gzipWrapped Then Return False Using archive = OpenArchiveForContainerCompat(stream, containerTypeValue) + If archive Is Nothing Then Return False + mapped = ArchiveTypeResolver.MapArchiveType(archive.Type) - gzipWrappedTar = gzipWrapped AndAlso containerTypeValue = ArchiveContainerType.GZip AndAlso _ - mapped = ArchiveContainerType.Tar + + gzipWrappedTar = gzipWrapped AndAlso containerTypeValue = ArchiveContainerType.GZip _ + AndAlso mapped = ArchiveContainerType.Tar + If mapped <> containerTypeValue AndAlso Not gzipWrappedTar Then Return False - entries = archive.Entries. - OrderBy(Function(e) If(e.Key, String.Empty), StringComparer.Ordinal). - ToList() + entries = archive.Entries.OrderBy( + Function(e) If(e.Key, String.Empty), + StringComparer.Ordinal + ).ToList() If Not gzipWrappedTar Then - nestedHandled = TryProcessNestedGArchive(entries, opt, depth, containerTypeValue, extractEntry, - nestedResult) + nestedHandled = TryProcessNestedGArchive( + entries, + opt, + depth, + containerTypeValue, + extractEntry, + nestedResult + ) + If nestedHandled Then Return nestedResult End If @@ -836,9 +943,11 @@ Namespace Global.Tomtastisch.FileClassifier If Not model.IsDirectory Then knownSize = 0 requireKnownForTotal = (extractEntry Is Nothing) OrElse depth > 0 + If gzipWrappedTar Then requireKnownForTotal = False End If + If Not TryGetValidatedSize(model, opt, knownSize, requireKnownForTotal) Then Return False totalUncompressed += knownSize If totalUncompressed > opt.MaxZipTotalUncompressedBytes Then Return False @@ -865,20 +974,24 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function OpenArchiveForContainerCompat(stream As Stream, - containerTypeValue As ArchiveContainerType) _ - As SharpCompress.Archives.IArchive + Private Shared Function OpenArchiveForContainerCompat _ + ( + stream As Stream, + containerTypeValue As ArchiveContainerType + ) As SharpCompress.Archives.IArchive + Return ArchiveSharpCompressCompat.OpenArchiveForContainer(stream, containerTypeValue) End Function Private Shared Function TryProcessNestedGArchive( - entries As List(Of SharpCompress.Archives.IArchiveEntry), - opt As FileTypeProjectOptions, - depth As Integer, - containerType As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean), - ByRef nestedResult As Boolean - ) As Boolean + entries As List(Of SharpCompress.Archives.IArchiveEntry), + opt As FileTypeProjectOptions, + depth As Integer, + containerType As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean), + ByRef nestedResult As Boolean + ) As Boolean + Dim onlyEntry As SharpCompress.Archives.IArchiveEntry Dim model As IArchiveEntryModel Dim payload As Byte() = Nothing @@ -915,32 +1028,42 @@ Namespace Global.Tomtastisch.FileClassifier End If Using nestedMs As New MemoryStream(payload, writable:=False) - nestedResult = ArchiveProcessingEngine.ProcessArchiveStream(nestedMs, opt, depth + 1, nestedDescriptor, - extractEntry) + nestedResult = ArchiveProcessingEngine.ProcessArchiveStream( + nestedMs, + opt, + depth + 1, + nestedDescriptor, + extractEntry + ) End Using Return True End Function - Private Shared Function TryReadEntryPayloadBoundedWithOptions( - entry As SharpCompress.Archives.IArchiveEntry, - maxBytes As Long, - opt As FileTypeProjectOptions, - ByRef payload As Byte() - ) As Boolean + Private Shared Function TryReadEntryPayloadBoundedWithOptions _ + ( + entry As SharpCompress.Archives.IArchiveEntry, + maxBytes As Long, + opt As FileTypeProjectOptions, + ByRef payload As Byte() + ) As Boolean + payload = Array.Empty(Of Byte)() If entry Is Nothing Then Return False If maxBytes <= 0 Then Return False If opt Is Nothing Then Return False Try + Using source = entry.OpenEntryStream() If source Is Nothing OrElse Not source.CanRead Then Return False + Using ms As New MemoryStream() StreamBounds.CopyBounded(source, ms, maxBytes) payload = ms.ToArray() Return True End Using End Using + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -956,8 +1079,14 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function TryGetValidatedSize(entry As IArchiveEntryModel, opt As FileTypeProjectOptions, - ByRef knownSize As Long, requireKnownForTotal As Boolean) As Boolean + Private Shared Function TryGetValidatedSize _ + ( + entry As IArchiveEntryModel, + opt As FileTypeProjectOptions, + ByRef knownSize As Long, + requireKnownForTotal As Boolean + ) As Boolean + Dim value As Long? knownSize = 0 @@ -980,8 +1109,13 @@ Namespace Global.Tomtastisch.FileClassifier Return TryMeasureEntrySize(entry, opt, knownSize) End Function - Private Shared Function TryMeasureEntrySize(entry As IArchiveEntryModel, opt As FileTypeProjectOptions, - ByRef measured As Long) As Boolean + Private Shared Function TryMeasureEntrySize _ + ( + entry As IArchiveEntryModel, + opt As FileTypeProjectOptions, + ByRef measured As Long + ) As Boolean + Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte Dim n As Integer @@ -993,6 +1127,7 @@ Namespace Global.Tomtastisch.FileClassifier Try Using source = entry.OpenStream() If source Is Nothing OrElse Not source.CanRead Then Return False + While True n = source.Read(buf, 0, buf.Length) If n <= 0 Then Exit While @@ -1000,7 +1135,9 @@ Namespace Global.Tomtastisch.FileClassifier If measured > opt.MaxZipEntryUncompressedBytes Then Return False End While End Using + Return True + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse diff --git a/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb index 559db333..4c440d79 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb @@ -24,17 +24,24 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function ValidateArchiveStream(stream As Stream, opt As FileTypeProjectOptions, depth As Integer) _ - As Boolean + Friend Shared Function ValidateArchiveStream _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer + ) As Boolean + Return ProcessArchiveStream(stream, opt, depth, Nothing) End Function - Friend Shared Function ProcessArchiveStream( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - extractEntry As Func(Of ZipArchiveEntry, Boolean) - ) As Boolean + Friend Shared Function ProcessArchiveStream _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + extractEntry As Func(Of ZipArchiveEntry, Boolean) + ) As Boolean + Dim totalUncompressed As Long Dim ordered As IEnumerable(Of ZipArchiveEntry) Dim u As Long @@ -74,6 +81,7 @@ Namespace Global.Tomtastisch.FileClassifier Try Using es = e.Open() + Using nestedMs = RecyclableStreams.GetStream("ArchiveStreamEngine.Nested") StreamBounds.CopyBounded(es, nestedMs, opt.MaxZipNestedBytes) nestedMs.Position = 0 @@ -85,7 +93,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -106,7 +114,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -118,7 +126,12 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function IsNestedArchiveEntry(entry As ZipArchiveEntry, opt As FileTypeProjectOptions) As Boolean + Private Shared Function IsNestedArchiveEntry _ + ( + entry As ZipArchiveEntry, + opt As FileTypeProjectOptions + ) As Boolean + Dim header(15) As Byte Dim read As Integer Dim exact As Byte() @@ -141,7 +154,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -173,13 +186,15 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Verarbeitet ZIP-Archive fail-closed über die Managed-Archive-Engine. ''' - Public Function Process( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - containerTypeValue As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean Implements IArchiveBackend.Process + Public Function Process _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + containerTypeValue As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean Implements IArchiveBackend.Process + If containerTypeValue <> ArchiveContainerType.Zip Then Return False If extractEntry Is Nothing Then diff --git a/src/FileTypeDetection/Infrastructure/CoreInternals.vb b/src/FileTypeDetection/Infrastructure/CoreInternals.vb index 8c722e66..b013e948 100644 --- a/src/FileTypeDetection/Infrastructure/CoreInternals.vb +++ b/src/FileTypeDetection/Infrastructure/CoreInternals.vb @@ -371,9 +371,9 @@ Namespace Global.Tomtastisch.FileClassifier Return FileTypeRegistry.Resolve(FileKind.Unknown) End If - If hasDocxMarker Then Return FileTypeRegistry.Resolve(FileKind.Docx) - If hasXlsxMarker Then Return FileTypeRegistry.Resolve(FileKind.Xlsx) - If hasPptxMarker Then Return FileTypeRegistry.Resolve(FileKind.Pptx) + If hasDocxMarker Then Return FileTypeRegistry.Resolve(FileKind.Doc) + If hasXlsxMarker Then Return FileTypeRegistry.Resolve(FileKind.Xls) + If hasPptxMarker Then Return FileTypeRegistry.Resolve(FileKind.Ppt) End If If hasOpenDocumentConflict Then @@ -418,12 +418,12 @@ Namespace Global.Tomtastisch.FileClassifier If String.IsNullOrWhiteSpace(mimeValue) Then Return FileKind.Unknown normalizedMime = mimeValue.Trim().ToLowerInvariant() - If normalizedMime = "application/vnd.oasis.opendocument.text" Then Return FileKind.Docx - If normalizedMime = "application/vnd.oasis.opendocument.text-template" Then Return FileKind.Docx - If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet" Then Return FileKind.Xlsx - If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet-template" Then Return FileKind.Xlsx - If normalizedMime = "application/vnd.oasis.opendocument.presentation" Then Return FileKind.Pptx - If normalizedMime = "application/vnd.oasis.opendocument.presentation-template" Then Return FileKind.Pptx + If normalizedMime = "application/vnd.oasis.opendocument.text" Then Return FileKind.Doc + If normalizedMime = "application/vnd.oasis.opendocument.text-template" Then Return FileKind.Doc + If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet" Then Return FileKind.Xls + If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet-template" Then Return FileKind.Xls + If normalizedMime = "application/vnd.oasis.opendocument.presentation" Then Return FileKind.Ppt + If normalizedMime = "application/vnd.oasis.opendocument.presentation-template" Then Return FileKind.Ppt Return FileKind.Unknown End Function @@ -611,9 +611,9 @@ Namespace Global.Tomtastisch.FileClassifier If hasPowerPoint Then markerCount += 1 If markerCount <> 1 Then Return FileTypeRegistry.Resolve(FileKind.Unknown) - If hasWord Then Return FileTypeRegistry.Resolve(FileKind.Docx) - If hasExcel Then Return FileTypeRegistry.Resolve(FileKind.Xlsx) - If hasPowerPoint Then Return FileTypeRegistry.Resolve(FileKind.Pptx) + If hasWord Then Return FileTypeRegistry.Resolve(FileKind.Doc) + If hasExcel Then Return FileTypeRegistry.Resolve(FileKind.Xls) + If hasPowerPoint Then Return FileTypeRegistry.Resolve(FileKind.Ppt) Return FileTypeRegistry.Resolve(FileKind.Unknown) End Function diff --git a/src/FileTypeDetection/Infrastructure/MimeProvider.vb b/src/FileTypeDetection/Infrastructure/MimeProvider.vb index 1dfd55a3..0457ab7e 100644 --- a/src/FileTypeDetection/Infrastructure/MimeProvider.vb +++ b/src/FileTypeDetection/Infrastructure/MimeProvider.vb @@ -29,7 +29,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Dateiendung mit oder ohne führenden Punkt. ''' Kanonischer MIME-Typ oder leerer String. - Friend Shared Function GetMime(extWithDot As String) As String + Friend Shared Function GetMime _ + ( + extWithDot As String + ) As String + Dim ext As String = extWithDot If String.IsNullOrWhiteSpace(extWithDot) Then Return String.Empty diff --git a/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb b/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb index bec15d0a..f34534cb 100644 --- a/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb +++ b/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb @@ -63,7 +63,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Zu kodierende Eingabedaten; Nothing wird als leeres Array behandelt. ''' Hex-String in Kleinbuchstaben ohne Trennzeichen. - Public Function EncodeLowerHex(data As Byte()) As String Implements IHexCodec.EncodeLowerHex + Public Function EncodeLowerHex _ + ( + data As Byte() + ) As String Implements IHexCodec.EncodeLowerHex + Dim safeData = If(data, Array.Empty(Of Byte)()) Return Convert.ToHexString(safeData).ToLowerInvariant() End Function @@ -93,7 +97,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Byte-Array. - Public Function ComputeHash(data As Byte()) As Byte() Implements ISha256Primitives.ComputeHash + Public Function ComputeHash _ + ( + data As Byte() + ) As Byte() Implements ISha256Primitives.ComputeHash + Dim safeData = If(data, Array.Empty(Of Byte)()) Return Security.Cryptography.SHA256.HashData(safeData) End Function @@ -103,7 +111,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex(data As Byte()) As String Implements ISha256Primitives.ComputeHashHex + Public Function ComputeHashHex _ + ( + data As Byte() + ) As String Implements ISha256Primitives.ComputeHashHex + Return _codec.EncodeLowerHex(ComputeHash(data)) End Function End Class @@ -122,7 +134,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' Fasthash als . - Public Function ComputeHashUInt64(data As Byte()) As ULong Implements IFastHash64.ComputeHashUInt64 + Public Function ComputeHashUInt64 _ + ( + data As Byte() + ) As ULong Implements IFastHash64.ComputeHashUInt64 + Dim safeData = If(data, Array.Empty(Of Byte)()) Return IO.Hashing.XxHash3.HashToUInt64(safeData) End Function @@ -132,7 +148,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' 16-stelliger Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex(data As Byte()) As String Implements IFastHash64.ComputeHashHex + Public Function ComputeHashHex _ + ( + data As Byte() + ) As String Implements IFastHash64.ComputeHashHex + Return ComputeHashUInt64(data).ToString("x16", CultureInfo.InvariantCulture) End Function End Class diff --git a/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb b/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb index 31abe3a2..81b89684 100644 --- a/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb +++ b/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb @@ -65,7 +65,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Zu kodierende Eingabedaten; Nothing wird als leeres Array behandelt. ''' Hex-String in Kleinbuchstaben ohne Trennzeichen. - Public Function EncodeLowerHex(data As Byte()) As String Implements IHexCodec.EncodeLowerHex + Public Function EncodeLowerHex _ + ( + data As Byte() + ) As String Implements IHexCodec.EncodeLowerHex + Dim safeData = If(data, Array.Empty(Of Byte)()) Dim chars As Char() Dim index As Integer = 0 @@ -107,7 +111,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Byte-Array. - Public Function ComputeHash(data As Byte()) As Byte() Implements ISha256Primitives.ComputeHash + Public Function ComputeHash _ + ( + data As Byte() + ) As Byte() Implements ISha256Primitives.ComputeHash + Dim safeData = If(data, Array.Empty(Of Byte)()) Using sha As Security.Cryptography.SHA256 = Security.Cryptography.SHA256.Create() Return sha.ComputeHash(safeData) @@ -119,7 +127,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex(data As Byte()) As String Implements ISha256Primitives.ComputeHashHex + Public Function ComputeHashHex _ + ( + data As Byte() + ) As String Implements ISha256Primitives.ComputeHashHex + Return _codec.EncodeLowerHex(ComputeHash(data)) End Function End Class @@ -138,7 +150,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' Fasthash als . - Public Function ComputeHashUInt64(data As Byte()) As ULong Implements IFastHash64.ComputeHashUInt64 + Public Function ComputeHashUInt64 _ + ( + data As Byte() + ) As ULong Implements IFastHash64.ComputeHashUInt64 + Dim safeData = If(data, Array.Empty(Of Byte)()) Return IO.Hashing.XxHash3.HashToUInt64(safeData) End Function @@ -148,7 +164,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' 16-stelliger Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex(data As Byte()) As String Implements IFastHash64.ComputeHashHex + Public Function ComputeHashHex _ + ( + data As Byte() + ) As String Implements IFastHash64.ComputeHashHex + Return ComputeHashUInt64(data).ToString("x16", CultureInfo.InvariantCulture) End Function End Class diff --git a/src/FileTypeDetection/Utils/EnumUtils.vb b/src/FileTypeDetection/Utils/EnumUtils.vb new file mode 100644 index 00000000..9ccf5c18 --- /dev/null +++ b/src/FileTypeDetection/Utils/EnumUtils.vb @@ -0,0 +1,251 @@ +' ============================================================================ +' FILE: EnumUtils.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' +' Kontext: +' - Allgemeine Enum-Helfer (Java-ähnliches values()). +' - Liefert Enum-Werte als typisiertes Array, optional sortiert und optional als Index-Range. +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System + +Namespace Global.Tomtastisch.FileClassifier.Utils + + ''' + ''' Utility-Funktionen für Enum-Typen (values()). + ''' + ''' + ''' + ''' Zweck: + ''' - Liefert Enum-Werte als typisiertes Array ohne LINQ. + ''' - Optional: Sortierung nach numerischem Enum-Wert. + ''' - Optional: Index-Range (from/to) mit deterministischem Clamping. + ''' + ''' + ''' Nicht-Ziele: + ''' - Keine zustandsbehaftete Logik. + ''' - Keine Abhängigkeiten auf Projektdienste (I/O, Logger, Policy-Engine). + ''' - Keine Reflection-Features außer . + ''' + ''' + Public NotInheritable Class EnumUtils + + Private Sub New() + End Sub + + + ' ===================================================================== + ' Internal (Projekt-intern): Sortier-Optionen + ' ===================================================================== + + Friend Enum EnumSortOrder + None = 0 + Ascending = 1 + Descending = 2 + End Enum + + + ' ===================================================================== + ' Public API + ' ===================================================================== + + ''' + ''' Liefert alle Werte eines Enum-Typs als typisiertes Array. + ''' + ''' + ''' + ''' Ablaufstruktur: + ''' 1) Werte werden über geladen, + ''' 2) Ausgabe erfolgt als typisiertes Array TEnum(). + ''' + ''' + ''' Hinweis: + ''' - Diese öffentliche Überladung liefert bewusst die gesamte Menge ohne Sortierung und ohne Range. + ''' - Erweiterte Optionen (Sortierung/Range) sind projektintern gehalten. + ''' + ''' + ''' Enum-Typ. + ''' Enum-Werte als Array. + ''' Wird ausgelöst, wenn kein Enum ist. + ''' + ''' + ''' ' Beispiel: alle Werte ohne Sortierung/Range + ''' Dim values() As ExampleSlot = EnumUtils.GetValues(Of ExampleSlot)() + ''' For Each v As ExampleSlot In values + ''' Console.WriteLine(v) + ''' Next + ''' + ''' + Public Shared Function GetValues(Of TEnum As Structure)() As TEnum() + + Return GetValues(Of TEnum)(EnumSortOrder.None, fromIndex:=Nothing, toIndex:=Nothing) + + End Function + + + ' ===================================================================== + ' Internal API (Projekt-intern): Sortierung + Range + ' ===================================================================== + + ''' + ''' Liefert Enum-Werte als typisiertes Array (optional sortiert) und optional als Index-Range. + ''' + ''' + ''' + ''' Ablaufstruktur: + ''' 1) Werte werden über geladen, + ''' 2) optional: Sortierung nach numerischem Enum-Wert, + ''' 3) Range wird deterministisch geklemmt, + ''' 4) Ausgabe erfolgt als Slice über . + ''' + ''' + ''' Range-Semantik (0-basiert, inklusive): + ''' - Keine Angabe: gesamte Menge. + ''' - Nur : von bis letztes Element. + ''' - Nur : von 0 bis . + ''' - Beide: von bis . + ''' + ''' + ''' Clamping-Regeln (deterministisch): + ''' 1) wird zuerst geklemmt: + ''' - < 0 => 0 + ''' - > max => max + ''' - Nothing => max + ''' 2) wird danach geklemmt: + ''' - Nothing => 0 + ''' - < 0 => 0 + ''' - > max2 => max2 + ''' wobei max2 = geklemmter (falls gesetzt), sonst max. + ''' + ''' + ''' Fail-Closed: + ''' - Ist kein Enum, wird eine ausgelöst. + ''' - Leere Enums liefern ein leeres Array. + ''' + ''' + ''' Enum-Typ. + ''' + ''' Sortierreihenfolge: + ''' - : keine Sortierung (Originalreihenfolge von ). + ''' - : aufsteigend nach numerischem Enum-Wert. + ''' - : absteigend nach numerischem Enum-Wert. + ''' + ''' Startindex (0-basiert, inklusive); Nothing bedeutet 0. + ''' Endindex (0-basiert, inklusive); Nothing bedeutet max. + ''' Enum-Werte als Array (ggf. sortiert und/oder gefiltert). + ''' Wird ausgelöst, wenn kein Enum ist. + ''' + ''' + ''' ' Aufsteigend sortiert, dann Slice der Indizes 1..3 (inklusive) + ''' Dim slice() As ExampleSlot = EnumUtils.GetValues(Of ExampleSlot)( + ''' sortOrder:=EnumUtils.EnumSortOrder.Ascending, + ''' fromIndex:=1, + ''' toIndex:=3 + ''' ) + ''' + ''' + Friend Shared Function GetValues(Of TEnum As Structure) _ + ( + Optional sortOrder As EnumSortOrder = EnumSortOrder.None, + Optional fromIndex As Nullable(Of Integer) = Nothing, + Optional toIndex As Nullable(Of Integer) = Nothing + ) As TEnum() + + ' Deklarationsblock + Dim enumType As Type = GetType(TEnum) + Dim raw As Array = Nothing + + Dim values() As TEnum = Nothing + Dim keys() As Long = Nothing + + Dim i As Integer + Dim count As Integer = 0 + Dim maxIndex As Integer = 0 + + Dim effectiveTo As Integer = 0 + Dim effectiveMaxFrom As Integer = 0 + Dim effectiveFrom As Integer = 0 + + Dim length As Integer = 0 + Dim result() As TEnum = Nothing + + + ' ----------------------------------------------------------------- + ' Guard-Clauses + ' ----------------------------------------------------------------- + If Not enumType.IsEnum Then + Throw New ArgumentException("TEnum muss ein Enum-Typ sein.", NameOf(TEnum)) + End If + + + ' ----------------------------------------------------------------- + ' Werte laden + ' ----------------------------------------------------------------- + raw = [Enum].GetValues(enumType) + + count = raw.Length + If count = 0 Then + Return New TEnum() {} + End If + + maxIndex = count - 1 + + values = New TEnum(count - 1) {} + For i = 0 To count - 1 + values(i) = CType(raw.GetValue(i), TEnum) + Next + + + ' ----------------------------------------------------------------- + ' Optional: Sortierung (Keys nur bei Bedarf) + ' ----------------------------------------------------------------- + If sortOrder <> EnumSortOrder.None Then + + keys = New Long(count - 1) {} + For i = 0 To count - 1 + keys(i) = Convert.ToInt64(values(i)) + Next + + Array.Sort(keys, values) + + If sortOrder = EnumSortOrder.Descending Then + Array.Reverse(values) + End If + + End If + + + ' ----------------------------------------------------------------- + ' Range: toIndex zuerst clampen, dann fromIndex + ' ----------------------------------------------------------------- + effectiveTo = If(toIndex.HasValue, toIndex.Value, maxIndex) + effectiveTo = Math.Min(Math.Max(effectiveTo, 0), maxIndex) + + effectiveMaxFrom = If(toIndex.HasValue, effectiveTo, maxIndex) + + effectiveFrom = If(fromIndex.HasValue, fromIndex.Value, 0) + effectiveFrom = Math.Min(Math.Max(effectiveFrom, 0), effectiveMaxFrom) + + + ' ----------------------------------------------------------------- + ' Slice kopieren (inklusive) + ' ----------------------------------------------------------------- + length = (effectiveTo - effectiveFrom) + 1 + result = New TEnum(length - 1) {} + + Array.Copy(values, effectiveFrom, result, 0, length) + + Return result + + End Function + + End Class + +End Namespace diff --git a/src/FileTypeDetection/Utils/GuardUtils.vb b/src/FileTypeDetection/Utils/GuardUtils.vb new file mode 100644 index 00000000..76cececa --- /dev/null +++ b/src/FileTypeDetection/Utils/GuardUtils.vb @@ -0,0 +1,149 @@ +' ============================================================================ +' FILE: GuardUtils.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' +' Kontext: +' - Minimale Guard-Utilities für Argumentprüfung (fail-closed via Exceptions). +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System + +Namespace Global.Tomtastisch.FileClassifier.Utils + + ''' + ''' Utility-Funktionen für Guard-Clauses (Argumentprüfung). + ''' + ''' + ''' + ''' Zweck: + ''' - Zentralisierte, konsistente Argumentprüfungen. + ''' - Reduziert Boilerplate in Konstruktoren und Public APIs. + ''' + ''' + ''' Fail-Closed: + ''' - Bei Verstoß wird eine passende Exception ausgelöst (ArgumentNull/Argument/ArgumentOutOfRange). + ''' - Keine stillen Korrekturen, keine Side-Effects. + ''' + ''' + Public NotInheritable Class GuardUtils + + Private Sub New() + End Sub + + + ' ===================================================================== + ' Public API (Shared; Utility, stateless) + ' ===================================================================== + + ''' + ''' Erzwingt, dass nicht Nothing ist. + ''' + ''' + ''' + ''' Ablaufstruktur: + ''' 1) Nullprüfung, + ''' 2) bei Nothing: . + ''' + ''' + ''' Beliebiger Referenztyp. + ''' Zu prüfender Wert. + ''' Parametername für Exception-Metadaten. + ''' Wird ausgelöst, wenn Nothing ist. + Public Shared Sub NotNothing(Of T)(value As T, paramName As String) + + ' Deklarationsblock + Dim isNull As Boolean + + isNull = (value Is Nothing) + If isNull Then + Throw New ArgumentNullException(paramName) + End If + + End Sub + + ''' + ''' Erzwingt, dass nicht Nothing ist und die erwartete Länge hat. + ''' + ''' + ''' + ''' Ablaufstruktur: + ''' 1) Nullprüfung, + ''' 2) Längenprüfung, + ''' 3) bei Abweichung: mit Erwartungs-/Istwert. + ''' + ''' + ''' Array, das geprüft werden soll. + ''' Erwartete Länge. + ''' Parametername für Exception-Metadaten. + ''' Wird ausgelöst, wenn Nothing ist. + ''' Wird ausgelöst, wenn die Länge nicht entspricht. + Public Shared Sub RequireLength(value As Array, expectedLength As Integer, paramName As String) + + ' Deklarationsblock + Dim actualLength As Integer + + If value Is Nothing Then + Throw New ArgumentNullException(paramName) + End If + + actualLength = value.Length + If actualLength <> expectedLength Then + Throw New ArgumentException( + $"Expected length {expectedLength}, but was {actualLength}.", + paramName + ) + End If + + End Sub + + ''' + ''' Erzwingt, dass als Wert in definiert ist. + ''' + ''' + ''' + ''' Ablaufstruktur: + ''' 1) Prüft auf Nothing und Enum-Typ, + ''' 2) prüft Definition via , + ''' 3) bei Verstoß: . + ''' + ''' + ''' Hinweis: + ''' - Diese Guard-Funktion ist bewusst untyped, um auch Validierung bei dynamischen Enum-Zugriffen abzudecken. + ''' + ''' + ''' Enum-Typ, gegen den geprüft wird. + ''' Zu prüfender Enum-Wert (boxed). + ''' Parametername für Exception-Metadaten. + ''' Wird ausgelöst, wenn Nothing ist. + ''' Wird ausgelöst, wenn kein Enum ist. + ''' Wird ausgelöst, wenn nicht definiert ist. + Public Shared Sub EnumDefined(enumType As Type, value As Object, paramName As String) + + ' Deklarationsblock + Dim isOk As Boolean + + If enumType Is Nothing Then + Throw New ArgumentNullException(NameOf(enumType)) + End If + + If Not enumType.IsEnum Then + Throw New ArgumentException("enumType muss ein Enum-Typ sein.", NameOf(enumType)) + End If + + isOk = [Enum].IsDefined(enumType, value) + If Not isOk Then + Throw New ArgumentOutOfRangeException(paramName) + End If + + End Sub + + End Class + +End Namespace diff --git a/src/FileTypeDetection/Utils/IterableUtils.vb b/src/FileTypeDetection/Utils/IterableUtils.vb new file mode 100644 index 00000000..e0f20b6e --- /dev/null +++ b/src/FileTypeDetection/Utils/IterableUtils.vb @@ -0,0 +1,74 @@ +' ============================================================================ +' FILE: IterableUtils.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' +' Kontext: +' - Minimale Array-/Iterable-Helfer (Defensive Copies). +' ============================================================================ + +Option Strict On +Option Explicit On + +Namespace Global.Tomtastisch.FileClassifier.Utils + + ''' + ''' Utility-Funktionen für defensive Kopien (Array-basierte Rückgaben). + ''' + ''' + ''' + ''' Zweck: + ''' - Verhindert, dass interne Arrays über Public API als Referenz nach außen geleakt werden. + ''' - Unterstützt defensive Copies bei Rückgaben und Snapshots. + ''' + ''' + ''' Fail-Closed: + ''' - Nothing bleibt Nothing; es findet keine implizite Erzeugung leerer Arrays statt. + ''' + ''' + Public NotInheritable Class IterableUtils + + Private Sub New() + End Sub + + + ' ===================================================================== + ' Public API (Shared; Utility, stateless) + ' ===================================================================== + + ''' + ''' Erstellt eine defensive Kopie von . + ''' + ''' + ''' + ''' Ablaufstruktur: + ''' 1) Nullprüfung, + ''' 2) Kopie via (shallow copy), + ''' 3) Rückgabe als typisiertes Array. + ''' + ''' + ''' Hinweis: + ''' - Bei Referenztypen werden die Referenzen kopiert (shallow copy), nicht die Objekte selbst. + ''' + ''' + ''' Elementtyp. + ''' Quelle; Nothing bleibt Nothing. + ''' Defensive Kopie oder Nothing. + Public Shared Function CloneArray(Of T)(source As T()) As T() + + ' Deklarationsblock + Dim copy() As T + + If source Is Nothing Then Return Nothing + + copy = CType(source.Clone(), T()) + Return copy + + End Function + + End Class + +End Namespace diff --git a/src/FileTypeDetection/Utils/README.md b/src/FileTypeDetection/Utils/README.md new file mode 100644 index 00000000..9ffd2596 --- /dev/null +++ b/src/FileTypeDetection/Utils/README.md @@ -0,0 +1,25 @@ +# Utils Modul + +## 1. Zweck +Dieses Verzeichnis enthaelt kleine, wiederverwendbare Utility-Helfer fuer Guards, Enum-Werte und defensive Kopien. + +## 2. Inhalt +- `GuardUtils.vb` +- `EnumUtils.vb` +- `IterableUtils.vb` + +## 3. API und Verhalten +- Utilities sind stateless und deterministisch. +- `GuardUtils` validiert Argumente fail-closed per Exceptions. +- `EnumUtils` liefert typisierte Enum-Werte ohne LINQ-Zwang in Call-Sites. +- `IterableUtils` erstellt defensive Kopien fuer Array-Rueckgaben. + +## 4. Verifikation +- Nutzung erfolgt in Core-/Abstraction-Typen; Korrektheit wird durch bestehende Unit- und Contract-Tests abgesichert. + +## 5. Diagramm +N/A + +## 6. Verweise +- [Modul-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) +- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt b/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt index 0fc137a7..4f895c90 100644 --- a/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt +++ b/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt @@ -1,12 +1,12 @@ -F:Tomtastisch.FileClassifier.FileKind.Docx:Tomtastisch.FileClassifier.FileKind +F:Tomtastisch.FileClassifier.FileKind.Doc:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Gif:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Jpeg:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Pdf:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Png:Tomtastisch.FileClassifier.FileKind -F:Tomtastisch.FileClassifier.FileKind.Pptx:Tomtastisch.FileClassifier.FileKind +F:Tomtastisch.FileClassifier.FileKind.Ppt:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Unknown:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Webp:Tomtastisch.FileClassifier.FileKind -F:Tomtastisch.FileClassifier.FileKind.Xlsx:Tomtastisch.FileClassifier.FileKind +F:Tomtastisch.FileClassifier.FileKind.Xls:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Zip:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.HashSourceType.ArchiveEntries:Tomtastisch.FileClassifier.HashSourceType F:Tomtastisch.FileClassifier.HashSourceType.FilePath:Tomtastisch.FileClassifier.HashSourceType @@ -48,6 +48,9 @@ M:Tomtastisch.FileClassifier.FileTypeOptions.LoadOptions(System.String):System.B M:Tomtastisch.FileClassifier.FileTypeProjectBaseline.ApplyDeterministicDefaults():System.Void M:Tomtastisch.FileClassifier.FileTypeProjectOptions..ctor() M:Tomtastisch.FileClassifier.HashOptions..ctor() +M:Tomtastisch.FileClassifier.HashRoundTripReport.Evidence(Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot):Tomtastisch.FileClassifier.HashEvidence +M:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalEquals(Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot):System.Boolean +M:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalEquals(Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot):System.Boolean M:Tomtastisch.FileClassifier.ZipExtractedEntry.OpenReadOnlyStream():System.IO.MemoryStream P:Tomtastisch.FileClassifier.DetectionDetail.DetectedType:Tomtastisch.FileClassifier.FileType P:Tomtastisch.FileClassifier.DetectionDetail.ExtensionVerified:System.Boolean @@ -94,20 +97,11 @@ P:Tomtastisch.FileClassifier.HashOptions.IncludeFastHash:System.Boolean P:Tomtastisch.FileClassifier.HashOptions.IncludePayloadCopies:System.Boolean P:Tomtastisch.FileClassifier.HashOptions.IncludeSecureHash:System.Boolean P:Tomtastisch.FileClassifier.HashOptions.MaterializedFileName:System.String -P:Tomtastisch.FileClassifier.HashRoundTripReport.H1:Tomtastisch.FileClassifier.HashEvidence -P:Tomtastisch.FileClassifier.HashRoundTripReport.H2:Tomtastisch.FileClassifier.HashEvidence -P:Tomtastisch.FileClassifier.HashRoundTripReport.H3:Tomtastisch.FileClassifier.HashEvidence -P:Tomtastisch.FileClassifier.HashRoundTripReport.H4:Tomtastisch.FileClassifier.HashEvidence P:Tomtastisch.FileClassifier.HashRoundTripReport.InputPath:System.String P:Tomtastisch.FileClassifier.HashRoundTripReport.IsArchiveInput:System.Boolean P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalConsistent:System.Boolean -P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalH1EqualsH2:System.Boolean -P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalH1EqualsH3:System.Boolean -P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalH1EqualsH4:System.Boolean P:Tomtastisch.FileClassifier.HashRoundTripReport.Notes:System.String -P:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalH1EqualsH2:System.Boolean -P:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalH1EqualsH3:System.Boolean -P:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalH1EqualsH4:System.Boolean +P:Tomtastisch.FileClassifier.HashRoundTripReport.Slots:Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot[] P:Tomtastisch.FileClassifier.ZipExtractedEntry.Content:System.Collections.Immutable.ImmutableArray P:Tomtastisch.FileClassifier.ZipExtractedEntry.RelativePath:System.String P:Tomtastisch.FileClassifier.ZipExtractedEntry.Size:System.Int32 @@ -127,15 +121,15 @@ T:class Tomtastisch.FileClassifier.HashRoundTripReport T:class Tomtastisch.FileClassifier.ZipExtractedEntry T:enum Tomtastisch.FileClassifier.FileKind T:enum Tomtastisch.FileClassifier.HashSourceType -V:Tomtastisch.FileClassifier.FileKind.Docx +V:Tomtastisch.FileClassifier.FileKind.Doc V:Tomtastisch.FileClassifier.FileKind.Gif V:Tomtastisch.FileClassifier.FileKind.Jpeg V:Tomtastisch.FileClassifier.FileKind.Pdf V:Tomtastisch.FileClassifier.FileKind.Png -V:Tomtastisch.FileClassifier.FileKind.Pptx +V:Tomtastisch.FileClassifier.FileKind.Ppt V:Tomtastisch.FileClassifier.FileKind.Unknown V:Tomtastisch.FileClassifier.FileKind.Webp -V:Tomtastisch.FileClassifier.FileKind.Xlsx +V:Tomtastisch.FileClassifier.FileKind.Xls V:Tomtastisch.FileClassifier.FileKind.Zip V:Tomtastisch.FileClassifier.HashSourceType.ArchiveEntries V:Tomtastisch.FileClassifier.HashSourceType.FilePath diff --git a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs index 3cf46cf9..da20459a 100644 --- a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs +++ b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs @@ -1,6 +1,7 @@ using FileTypeDetectionLib.Tests.Support; using Reqnroll; using Tomtastisch.FileClassifier; +using System.Collections.Generic; namespace FileTypeDetectionLib.Tests.Steps; @@ -9,6 +10,13 @@ public sealed class FileTypeDetectionSteps { private const string StateKey = "detection_state"; private const string ResourceColumn = "ressource"; + private static readonly IReadOnlyDictionary LegacyFileKindAliases = + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["Docx"] = FileKind.Doc, + ["Xlsx"] = FileKind.Xls, + ["Pptx"] = FileKind.Ppt + }; private readonly ScenarioContext _scenarioContext; public FileTypeDetectionSteps(ScenarioContext scenarioContext) @@ -276,8 +284,7 @@ public void WhenICheckCurrentBytesType(string expectedKind) { var state = State(); Assert.NotNull(state.CurrentPayload); - Assert.True( - Enum.TryParse(expectedKind, true, out var kind), + Assert.True(TryParseFileKindLiteral(expectedKind, out var kind), $"Unknown FileKind literal in feature: {expectedKind}"); var detector = new FileTypeDetector(); @@ -290,8 +297,7 @@ public void ThenTheDetectedKindIs(string expectedKind) var state = State(); Assert.NotNull(state.LastResult); - Assert.True( - Enum.TryParse(expectedKind, true, out var expected), + Assert.True(TryParseFileKindLiteral(expectedKind, out var expected), $"Unknown FileKind literal in feature: {expectedKind}"); Assert.Equal(expected, state.LastResult!.Kind); @@ -420,9 +426,9 @@ public void ThenHashReportIsLogicallyConsistent() var state = State(); Assert.NotNull(state.LastRoundTripReport); Assert.True(state.LastRoundTripReport!.LogicalConsistent); - Assert.True(state.LastRoundTripReport.LogicalH1EqualsH2); - Assert.True(state.LastRoundTripReport.LogicalH1EqualsH3); - Assert.True(state.LastRoundTripReport.LogicalH1EqualsH4); + Assert.True(state.LastRoundTripReport.LogicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.True(state.LastRoundTripReport.LogicalEquals(HashRoundTripReport.HashSlot.H3)); + Assert.True(state.LastRoundTripReport.LogicalEquals(HashRoundTripReport.HashSlot.H4)); } [Then("ist der Hashbericht als Archiv klassifiziert {string}")] @@ -540,6 +546,16 @@ private static void AssertResourceExists(string name) Assert.True(File.Exists(path), $"Test resource missing: {path}"); } + private static bool TryParseFileKindLiteral(string literal, out FileKind kind) + { + if (Enum.TryParse(literal, true, out kind)) + { + return true; + } + + return LegacyFileKindAliases.TryGetValue(literal, out kind); + } + private static byte[] CreateArchivePayload(string archiveType) { var normalized = archiveType.Trim().ToLowerInvariant(); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs index aaf755cb..9c58b8e8 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs @@ -13,7 +13,7 @@ public void DetectDetailed_ReturnsStructuredArchiveTrace_ForDocx() var path = TestResources.Resolve("sample.docx"); var detail = new FileTypeDetector().DetectDetailed(path); - Assert.Equal(FileKind.Docx, detail.DetectedType.Kind); + Assert.Equal(FileKind.Doc, detail.DetectedType.Kind); Assert.Equal("ArchiveStructuredRefined", detail.ReasonCode); Assert.True(detail.UsedZipContentCheck); Assert.True(detail.UsedStructuredRefinement); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs index f7afd2b8..bee7bd8e 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs @@ -12,9 +12,9 @@ public sealed class EndToEndFailClosedMatrixUnitTests { "sample.jpg", FileKind.Jpeg, false }, { "sample.gif", FileKind.Gif, false }, { "sample.webp", FileKind.Webp, false }, - { "sample.docx", FileKind.Docx, false }, - { "sample.xlsx", FileKind.Xlsx, false }, - { "sample.pptx", FileKind.Pptx, false }, + { "sample.docx", FileKind.Doc, false }, + { "sample.xlsx", FileKind.Xls, false }, + { "sample.pptx", FileKind.Ppt, false }, { "sample.zip", FileKind.Zip, true }, { "sample.7z", FileKind.Zip, true }, { "sample.rar", FileKind.Zip, true }, @@ -23,35 +23,35 @@ public sealed class EndToEndFailClosedMatrixUnitTests public static TheoryData SupportedAliasMatrix => new() { - { "sample.docx", ".doc", FileKind.Docx }, - { "sample.docx", ".docm", FileKind.Docx }, - { "sample.docx", ".docb", FileKind.Docx }, - { "sample.docx", ".dot", FileKind.Docx }, - { "sample.docx", ".dotm", FileKind.Docx }, - { "sample.docx", ".dotx", FileKind.Docx }, - { "sample.docx", ".odt", FileKind.Docx }, - { "sample.docx", ".ott", FileKind.Docx }, - { "sample.xlsx", ".xls", FileKind.Xlsx }, - { "sample.xlsx", ".xlsm", FileKind.Xlsx }, - { "sample.xlsx", ".xlsb", FileKind.Xlsx }, - { "sample.xlsx", ".xlt", FileKind.Xlsx }, - { "sample.xlsx", ".xltm", FileKind.Xlsx }, - { "sample.xlsx", ".xltx", FileKind.Xlsx }, - { "sample.xlsx", ".xltb", FileKind.Xlsx }, - { "sample.xlsx", ".xlam", FileKind.Xlsx }, - { "sample.xlsx", ".xla", FileKind.Xlsx }, - { "sample.xlsx", ".ods", FileKind.Xlsx }, - { "sample.xlsx", ".ots", FileKind.Xlsx }, - { "sample.pptx", ".ppt", FileKind.Pptx }, - { "sample.pptx", ".pptm", FileKind.Pptx }, - { "sample.pptx", ".pot", FileKind.Pptx }, - { "sample.pptx", ".potm", FileKind.Pptx }, - { "sample.pptx", ".potx", FileKind.Pptx }, - { "sample.pptx", ".pps", FileKind.Pptx }, - { "sample.pptx", ".ppsm", FileKind.Pptx }, - { "sample.pptx", ".ppsx", FileKind.Pptx }, - { "sample.pptx", ".odp", FileKind.Pptx }, - { "sample.pptx", ".otp", FileKind.Pptx }, + { "sample.docx", ".doc", FileKind.Doc }, + { "sample.docx", ".docm", FileKind.Doc }, + { "sample.docx", ".docb", FileKind.Doc }, + { "sample.docx", ".dot", FileKind.Doc }, + { "sample.docx", ".dotm", FileKind.Doc }, + { "sample.docx", ".dotx", FileKind.Doc }, + { "sample.docx", ".odt", FileKind.Doc }, + { "sample.docx", ".ott", FileKind.Doc }, + { "sample.xlsx", ".xls", FileKind.Xls }, + { "sample.xlsx", ".xlsm", FileKind.Xls }, + { "sample.xlsx", ".xlsb", FileKind.Xls }, + { "sample.xlsx", ".xlt", FileKind.Xls }, + { "sample.xlsx", ".xltm", FileKind.Xls }, + { "sample.xlsx", ".xltx", FileKind.Xls }, + { "sample.xlsx", ".xltb", FileKind.Xls }, + { "sample.xlsx", ".xlam", FileKind.Xls }, + { "sample.xlsx", ".xla", FileKind.Xls }, + { "sample.xlsx", ".ods", FileKind.Xls }, + { "sample.xlsx", ".ots", FileKind.Xls }, + { "sample.pptx", ".ppt", FileKind.Ppt }, + { "sample.pptx", ".pptm", FileKind.Ppt }, + { "sample.pptx", ".pot", FileKind.Ppt }, + { "sample.pptx", ".potm", FileKind.Ppt }, + { "sample.pptx", ".potx", FileKind.Ppt }, + { "sample.pptx", ".pps", FileKind.Ppt }, + { "sample.pptx", ".ppsm", FileKind.Ppt }, + { "sample.pptx", ".ppsx", FileKind.Ppt }, + { "sample.pptx", ".odp", FileKind.Ppt }, + { "sample.pptx", ".otp", FileKind.Ppt }, { "sample.zip", ".tar", FileKind.Zip }, { "sample.zip", ".tgz", FileKind.Zip }, { "sample.zip", ".tar.gz", FileKind.Zip }, diff --git a/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs index 90684448..b5119829 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs @@ -72,7 +72,7 @@ public void Detect_DocxPayloadWithPdfExtension_RemainsDocx_UnlessVerifyExtension var detectedWithoutExtensionPolicy = detector.Detect(path); var detectedWithExtensionPolicy = detector.Detect(path, true); - Assert.Equal(FileKind.Docx, detectedWithoutExtensionPolicy.Kind); + Assert.Equal(FileKind.Doc, detectedWithoutExtensionPolicy.Kind); Assert.Equal(FileKind.Unknown, detectedWithExtensionPolicy.Kind); } finally @@ -91,7 +91,7 @@ public void DetectAndVerifyExtension_AcceptsXlsmExtension_ForSpreadsheetOpenXmlP try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Xlsx, detected.Kind); + Assert.Equal(FileKind.Xls, detected.Kind); } finally { @@ -109,7 +109,7 @@ public void DetectAndVerifyExtension_AcceptsXlsbExtension_ForSpreadsheetBinaryWo try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Xlsx, detected.Kind); + Assert.Equal(FileKind.Xls, detected.Kind); } finally { @@ -127,7 +127,7 @@ public void DetectAndVerifyExtension_AcceptsOdsExtension_ForOpenDocumentSpreadsh try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Xlsx, detected.Kind); + Assert.Equal(FileKind.Xls, detected.Kind); } finally { @@ -145,7 +145,7 @@ public void DetectAndVerifyExtension_AcceptsDocExtension_ForLegacyOfficePayload( try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Docx, detected.Kind); + Assert.Equal(FileKind.Doc, detected.Kind); } finally { diff --git a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs index 69e3db9f..c70d9490 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs @@ -56,7 +56,7 @@ public void DetectDetailed_ReturnsStructuredRefined_ForDocx() var path = TestResources.Resolve("sample.docx"); var detail = new FileTypeDetector().DetectDetailed(path); - Assert.Equal(FileKind.Docx, detail.DetectedType.Kind); + Assert.Equal(FileKind.Doc, detail.DetectedType.Kind); Assert.Equal("ArchiveStructuredRefined", detail.ReasonCode); Assert.True(detail.UsedStructuredRefinement); } diff --git a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs index 4d9e6ae9..3c1b0ba6 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs @@ -173,10 +173,10 @@ public void FinalizeArchiveDetection_ReturnsRefined_WhenNotUnknown() var trace = Activator.CreateInstance(traceType!); var opt = FileTypeProjectOptions.DefaultOptions(); - var refined = FileTypeRegistry.Resolve(FileKind.Docx); + var refined = FileTypeRegistry.Resolve(FileKind.Doc); var result = TestGuard.NotNull(method.Invoke(null, new[] { refined, opt, trace! }) as FileType); - Assert.Equal(FileKind.Docx, result.Kind); + Assert.Equal(FileKind.Doc, result.Kind); } [Fact] @@ -207,19 +207,19 @@ public void ExtensionMatchesKind_HandlesEmptyAndMismatch() } [Theory] - [InlineData("file.doc", FileKind.Docx)] - [InlineData("file.docm", FileKind.Docx)] - [InlineData("file.docx", FileKind.Docx)] - [InlineData("file.odt", FileKind.Docx)] - [InlineData("file.xls", FileKind.Xlsx)] - [InlineData("file.xlsm", FileKind.Xlsx)] - [InlineData("file.xlsx", FileKind.Xlsx)] - [InlineData("file.xlsb", FileKind.Xlsx)] - [InlineData("file.ods", FileKind.Xlsx)] - [InlineData("file.ppt", FileKind.Pptx)] - [InlineData("file.pptm", FileKind.Pptx)] - [InlineData("file.pptx", FileKind.Pptx)] - [InlineData("file.odp", FileKind.Pptx)] + [InlineData("file.doc", FileKind.Doc)] + [InlineData("file.docm", FileKind.Doc)] + [InlineData("file.docx", FileKind.Doc)] + [InlineData("file.odt", FileKind.Doc)] + [InlineData("file.xls", FileKind.Xls)] + [InlineData("file.xlsm", FileKind.Xls)] + [InlineData("file.xlsx", FileKind.Xls)] + [InlineData("file.xlsb", FileKind.Xls)] + [InlineData("file.ods", FileKind.Xls)] + [InlineData("file.ppt", FileKind.Ppt)] + [InlineData("file.pptm", FileKind.Ppt)] + [InlineData("file.pptx", FileKind.Ppt)] + [InlineData("file.odp", FileKind.Ppt)] public void ExtensionMatchesKind_AcceptsOfficeVariantAliases(string path, FileKind expectedKind) { var method = diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs index a7e5abad..02d6a62d 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs @@ -60,6 +60,13 @@ internal static string ComputeHmacSha256Hex(byte[] key, byte[] payload) using var hmac = new HMACSHA256(key); return Convert.ToHexString(hmac.ComputeHash(payload)).ToLowerInvariant(); } + + internal static Type GetInternalType(string fullName) + { + var type = typeof(EvidenceHashing).Assembly.GetType(fullName, throwOnError: false); + Assert.NotNull(type); + return type!; + } } // Section 1: SHA256 physical vs logical behavior @@ -400,7 +407,8 @@ public void EvidenceHashing_HashBytes_UsesLoadedIncludeFastHash() [Fact] public void ComputeFastHash_ReturnsEmpty_WhenOptionDisabled() { - var method = typeof(EvidenceHashing).GetMethod("ComputeFastHash", BindingFlags.NonPublic | BindingFlags.Static)!; + var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); + var method = coreType.GetMethod("ComputeFastHash", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var options = new HashOptions { IncludeFastHash = false }; @@ -634,9 +642,9 @@ public void VerifyRoundTrip_ProducesLogicalConsistency(string fixtureId, bool ex Assert.Equal(expectedArchive, report.IsArchiveInput); Assert.True(report.LogicalConsistent); - Assert.True(report.LogicalH1EqualsH2); - Assert.True(report.LogicalH1EqualsH3); - Assert.True(report.LogicalH1EqualsH4); + Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H3)); + Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H4)); } [Theory] @@ -784,7 +792,7 @@ public void VerifyRoundTrip_ReturnsFailure_WhenH1MissingLogicalDigest() var path = TestResources.Resolve("sample.pdf"); var report = EvidenceHashing.VerifyRoundTrip(path); - Assert.False(report.H1.Digests.HasLogicalHash); + Assert.False(report.Evidence(HashRoundTripReport.HashSlot.H1).Digests.HasLogicalHash); Assert.Contains("h1", report.Notes, StringComparison.OrdinalIgnoreCase); } @@ -955,7 +963,8 @@ public void ResolveHashOptions_FallsBack_WhenProjectOptionsNull() [Fact] public void NormalizedEntry_Defaults_WhenConstructedWithNulls() { - var type = typeof(EvidenceHashing).GetNestedTypes(BindingFlags.NonPublic) + var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); + var type = coreType.GetNestedTypes(BindingFlags.NonPublic | BindingFlags.Public) .First(t => t.Name == "NormalizedEntry"); var ctor = type.GetConstructors(BindingFlags.NonPublic | BindingFlags.Instance) @@ -963,9 +972,11 @@ public void NormalizedEntry_Defaults_WhenConstructedWithNulls() var instance = ctor.Invoke(new object?[] { null, null }); var relativePath = - (string)type.GetProperty("RelativePath", BindingFlags.NonPublic | BindingFlags.Instance)!.GetValue(instance)!; + (string)type.GetProperty("RelativePath", BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance)! + .GetValue(instance)!; var content = - (byte[])type.GetProperty("Content", BindingFlags.NonPublic | BindingFlags.Instance)!.GetValue(instance)!; + (byte[])type.GetProperty("Content", BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance)! + .GetValue(instance)!; Assert.Equal(string.Empty, relativePath); Assert.NotNull(content); @@ -975,16 +986,16 @@ public void NormalizedEntry_Defaults_WhenConstructedWithNulls() [Fact] public void HashRoundTripReport_Constructor_DefaultsToFailureEvidence_WhenInputsNull() { - var report = new HashRoundTripReport("", isArchiveInput: false, h1: null, h2: null, h3: null, - h4: null, notes: null); + var report = new HashRoundTripReport("", isArchiveInput: false, notes: null, null, null, null, + null); Assert.False(report.LogicalConsistent); - Assert.False(report.LogicalH1EqualsH2); - Assert.False(report.LogicalH1EqualsH3); - Assert.False(report.LogicalH1EqualsH4); - Assert.False(report.PhysicalH1EqualsH2); - Assert.False(report.PhysicalH1EqualsH3); - Assert.False(report.PhysicalH1EqualsH4); + Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H3)); + Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H4)); + Assert.False(report.PhysicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.False(report.PhysicalEquals(HashRoundTripReport.HashSlot.H3)); + Assert.False(report.PhysicalEquals(HashRoundTripReport.HashSlot.H4)); } [Fact] @@ -1012,16 +1023,16 @@ public void HashRoundTripReport_Constructor_ReportsConsistency_WhenLogicalAndPhy digests: digest, notes: "ok"); - var report = new HashRoundTripReport("x", isArchiveInput: false, h1: evidence, h2: evidence, - h3: evidence, h4: evidence, notes: "ok"); + var report = new HashRoundTripReport("x", isArchiveInput: false, notes: "ok", evidence, evidence, + evidence, evidence); Assert.True(report.LogicalConsistent); - Assert.True(report.LogicalH1EqualsH2); - Assert.True(report.LogicalH1EqualsH3); - Assert.True(report.LogicalH1EqualsH4); - Assert.True(report.PhysicalH1EqualsH2); - Assert.True(report.PhysicalH1EqualsH3); - Assert.True(report.PhysicalH1EqualsH4); + Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H3)); + Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H4)); + Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H3)); + Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H4)); } [Fact] @@ -1049,11 +1060,11 @@ public void HashRoundTripReport_Constructor_DistinguishesPhysicalWhenLogicalMiss digests: digest, notes: "ok"); - var report = new HashRoundTripReport("x", isArchiveInput: false, h1: evidence, h2: evidence, - h3: evidence, h4: evidence, notes: "ok"); + var report = new HashRoundTripReport("x", isArchiveInput: false, notes: "ok", evidence, evidence, + evidence, evidence); - Assert.False(report.LogicalH1EqualsH2); - Assert.True(report.PhysicalH1EqualsH2); + Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H2)); } [Fact] @@ -1083,7 +1094,8 @@ public void HashRoundTripReport_EqualPhysical_ReturnsFalse_WhenEvidenceNull() [Fact] public void NormalizeLabel_FallsBack_ForNullOrWhitespace() { - var method = typeof(EvidenceHashing).GetMethod("NormalizeLabel", BindingFlags.NonPublic | BindingFlags.Static)!; + var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); + var method = coreType.GetMethod("NormalizeLabel", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var label1 = TestGuard.NotNull(method.Invoke(null, new object?[] { null }) as string); @@ -1096,7 +1108,8 @@ public void NormalizeLabel_FallsBack_ForNullOrWhitespace() [Fact] public void CopyBytes_ReturnsEmpty_ForNullOrEmpty() { - var method = typeof(EvidenceHashing).GetMethod("CopyBytes", BindingFlags.NonPublic | BindingFlags.Static)!; + var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); + var method = coreType.GetMethod("CopyBytes", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var empty1 = TestGuard.NotNull(method.Invoke(null, new object?[] { null }) as byte[]); @@ -1109,7 +1122,8 @@ public void CopyBytes_ReturnsEmpty_ForNullOrEmpty() [Fact] public void TryReadFileBounded_ReturnsFalse_ForMissingPathOrOptions() { - var method = typeof(EvidenceHashing).GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; + var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIo"); + var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var bytes = Array.Empty(); @@ -1127,7 +1141,8 @@ public void TryReadFileBounded_ReturnsFalse_ForMissingPathOrOptions() [Fact] public void TryReadFileBounded_ReturnsFalse_WhenFileTooLarge() { - var method = typeof(EvidenceHashing).GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; + var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIo"); + var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); using var scope = TestTempPaths.CreateScope("ftd-hash-read"); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs index 1781e08e..c10a0604 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs @@ -20,7 +20,7 @@ public void Detect_DoesNotLogWarning_ForStructuredDocxDetection() var source = TestResources.Resolve("sample.docx"); var detected = new FileTypeDetector().Detect(source); - Assert.Equal(FileKind.Docx, detected.Kind); + Assert.Equal(FileKind.Doc, detected.Kind); Assert.DoesNotContain(logger.Messages, m => m.Contains("Keine direkte Content-Erkennung", StringComparison.Ordinal)); } diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs index adef7cb8..b4f306f4 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs @@ -21,7 +21,7 @@ public void Detect_StillRefines_ArchiveContainers_WhenHeaderOnlyNonZipIsTrue() var source = TestResources.Resolve("sample.docx"); var detected = new FileTypeDetector().Detect(source); - Assert.Equal(FileKind.Docx, detected.Kind); + Assert.Equal(FileKind.Doc, detected.Kind); } [Fact] diff --git a/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs index 61f6427d..6e63e544 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs @@ -5,9 +5,9 @@ namespace FileTypeDetectionLib.Tests.Unit; public sealed class LegacyOfficeBinaryRefinerUnitTests { [Theory] - [InlineData("WordDocument", FileKind.Docx)] - [InlineData("Workbook", FileKind.Xlsx)] - [InlineData("PowerPoint Document", FileKind.Pptx)] + [InlineData("WordDocument", FileKind.Doc)] + [InlineData("Workbook", FileKind.Xls)] + [InlineData("PowerPoint Document", FileKind.Ppt)] public void TryRefineBytes_DetectsLegacyOfficeMarkers(string marker, FileKind expected) { var payload = CreateOleLikePayload(marker); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs index badef674..f5195a2c 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs @@ -25,10 +25,10 @@ public void TryRefineStream_ReturnsUnknown_ForUnreadableStream() } [Theory] - [InlineData("word/document.xml", FileKind.Docx)] - [InlineData("xl/workbook.xml", FileKind.Xlsx)] - [InlineData("xl/workbook.bin", FileKind.Xlsx)] - [InlineData("ppt/presentation.xml", FileKind.Pptx)] + [InlineData("word/document.xml", FileKind.Doc)] + [InlineData("xl/workbook.xml", FileKind.Xls)] + [InlineData("xl/workbook.bin", FileKind.Xls)] + [InlineData("ppt/presentation.xml", FileKind.Ppt)] public void TryRefineStream_DetectsOpenXmlKinds(string markerPath, FileKind expected) { var payload = CreateOpenXmlPackage(markerPath); @@ -40,12 +40,12 @@ public void TryRefineStream_DetectsOpenXmlKinds(string markerPath, FileKind expe } [Theory] - [InlineData("application/vnd.oasis.opendocument.text", FileKind.Docx)] - [InlineData("application/vnd.oasis.opendocument.text-template", FileKind.Docx)] - [InlineData("application/vnd.oasis.opendocument.spreadsheet", FileKind.Xlsx)] - [InlineData("application/vnd.oasis.opendocument.spreadsheet-template", FileKind.Xlsx)] - [InlineData("application/vnd.oasis.opendocument.presentation", FileKind.Pptx)] - [InlineData("application/vnd.oasis.opendocument.presentation-template", FileKind.Pptx)] + [InlineData("application/vnd.oasis.opendocument.text", FileKind.Doc)] + [InlineData("application/vnd.oasis.opendocument.text-template", FileKind.Doc)] + [InlineData("application/vnd.oasis.opendocument.spreadsheet", FileKind.Xls)] + [InlineData("application/vnd.oasis.opendocument.spreadsheet-template", FileKind.Xls)] + [InlineData("application/vnd.oasis.opendocument.presentation", FileKind.Ppt)] + [InlineData("application/vnd.oasis.opendocument.presentation-template", FileKind.Ppt)] public void TryRefineStream_DetectsOpenDocumentKinds(string mimeType, FileKind expected) { var payload = CreateOpenDocumentPackage(mimeType); From 1396e6d28c852ef866bf9bd05a141700b60253e4 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 12:46:01 +0100 Subject: [PATCH 08/34] test(governance): add policy-045 compliance and public API allowlist gates --- docs/governance/045_COMPLIANCE_STATUS_DE.MD | 44 +++++++++++++ .../Contracts/CodePolicy045ComplianceTests.cs | 63 +++++++++++++++++++ .../Contracts/PublicApiAllowlistTests.cs | 49 +++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 docs/governance/045_COMPLIANCE_STATUS_DE.MD create mode 100644 tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs create mode 100644 tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs diff --git a/docs/governance/045_COMPLIANCE_STATUS_DE.MD b/docs/governance/045_COMPLIANCE_STATUS_DE.MD new file mode 100644 index 00000000..cb45a0ed --- /dev/null +++ b/docs/governance/045_COMPLIANCE_STATUS_DE.MD @@ -0,0 +1,44 @@ +# 045 Compliance Status (DE) + +Stand: 2026-02-20 +Geltungsbereich: `src/FileTypeDetection/*` + +## Ziel +Nachweisbare, reproduzierbare Einhaltung der Kernvorgaben aus `045_CODE_QUALITY_POLICY_DE.MD` über automatisierte Tests und wiederholbare Build-/Testläufe. + +## Automatisierte Governance-Gates +1. Public-API-Allowlist-Gate + Test: `PublicApiAllowlistTests` + Datei: `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs` + Wirkung: Unerwartete neue Public-Typen werden als blocker erkannt. + +2. Public-API-Snapshot-Gate + Test: `PublicApiContractSnapshotTests` + Datei: `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiContractSnapshotTests.cs` + Wirkung: Surface-Drift in Public API wird deterministisch sichtbar. + +3. 045 Core-Layout-/Policy-Gate + Test: `CodePolicy045ComplianceTests` + Datei: `tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs` + Geprüfte Regeln: + - Header enthält `' FILE:` und `INTERNE POLICY` + - `Option Strict On` und `Option Explicit On` vorhanden + - Reihenfolge: Header -> Options -> Namespace + - Verbotener Catch-Pseudofilter wird ausgeschlossen + - Dateien mit `Public`-Typen enthalten XML-`summary` + - Scan nur auf Source-Dateien (Ausschluss `bin/` und `obj/`) + +## Reproduzierbare Verifikation +1. `dotnet build FileClassifier.sln -v minimal` +2. `dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal` + +## Iterative Verbesserungen (Issue 03) +1. Interne Duplikatkonsolidierung: + - `FileTypeDetector` nutzt zentrale Registry-Regel für structured refinement. + - Alias-Definitionen in `FileTypeRegistry` über Helper vereinheitlicht. +2. Policy-Header harmonisiert: + - `HashRoundTripReport.vb`, `EnumUtils.vb`, `GuardUtils.vb`, `IterableUtils.vb`. + +## Hinweis +Diese Statusdatei dokumentiert den nachweisbaren Kernumfang der automatisierten 045-Compliance-Prüfung. +Erweiterungen an den Governance-Gates sind möglich, müssen jedoch fail-closed und mit niedriger False-Positive-Rate erfolgen. diff --git a/tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs b/tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs new file mode 100644 index 00000000..697576e6 --- /dev/null +++ b/tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs @@ -0,0 +1,63 @@ +using System.Text.RegularExpressions; + +namespace FileTypeDetectionLib.Tests.Contracts; + +[Trait("Category", "Governance")] +public sealed class CodePolicy045ComplianceTests +{ + private static readonly Regex NamespaceRegex = new(@"^\s*Namespace\s+", + RegexOptions.Multiline | RegexOptions.CultureInvariant); + private static readonly Regex PublicTypeRegex = new(@"^\s*Public\s+(?:NotInheritable\s+)?(?:Class|Enum|Structure|Module|Interface)\s+", + RegexOptions.Multiline | RegexOptions.CultureInvariant); + private static readonly Regex ForbiddenCatchPseudoFilterRegex = new(@"Catch\s+\w+\s+As\s+Exception\s+When\s+TypeOf\s+\w+\s+Is\s+Exception", + RegexOptions.CultureInvariant); + + [Fact] + public void VbFiles_UnderSrcFileTypeDetection_ComplyWithCore045LayoutRules() + { + var repoRoot = Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..")); + var sourceRoot = Path.Combine(repoRoot, "src", "FileTypeDetection"); + Assert.True(Directory.Exists(sourceRoot), $"Source root missing: {sourceRoot}"); + + var files = Directory.GetFiles(sourceRoot, "*.vb", SearchOption.AllDirectories) + .Where(path => !path.Contains($"{Path.DirectorySeparatorChar}bin{Path.DirectorySeparatorChar}", + StringComparison.OrdinalIgnoreCase)) + .Where(path => !path.Contains($"{Path.DirectorySeparatorChar}obj{Path.DirectorySeparatorChar}", + StringComparison.OrdinalIgnoreCase)) + .OrderBy(path => path, StringComparer.Ordinal) + .ToArray(); + + Assert.NotEmpty(files); + + foreach (var file in files) + { + var content = File.ReadAllText(file); + + Assert.Contains("' FILE:", content, StringComparison.Ordinal); + Assert.Contains("INTERNE POLICY", content, StringComparison.Ordinal); + Assert.Contains("Option Strict On", content, StringComparison.Ordinal); + Assert.Contains("Option Explicit On", content, StringComparison.Ordinal); + Assert.True(NamespaceRegex.IsMatch(content), $"Missing namespace declaration: {file}"); + + var fileIndex = content.IndexOf("' FILE:", StringComparison.Ordinal); + var strictIndex = content.IndexOf("Option Strict On", StringComparison.Ordinal); + var explicitIndex = content.IndexOf("Option Explicit On", StringComparison.Ordinal); + var namespaceIndex = NamespaceRegex.Match(content).Index; + + Assert.True(fileIndex >= 0 && strictIndex > fileIndex, + $"Policy 045 order violated ('FILE' before Option Strict): {file}"); + Assert.True(explicitIndex > strictIndex, + $"Policy 045 order violated (Option Explicit after Option Strict): {file}"); + Assert.True(namespaceIndex > explicitIndex, + $"Policy 045 order violated (Namespace after options): {file}"); + + Assert.False(ForbiddenCatchPseudoFilterRegex.IsMatch(content), + $"Policy 045 violation (forbidden catch pseudo-filter): {file}"); + + if (PublicTypeRegex.IsMatch(content)) + { + Assert.Contains("''' ", content, StringComparison.Ordinal); + } + } + } +} diff --git a/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs b/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs new file mode 100644 index 00000000..d8f2e7c2 --- /dev/null +++ b/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs @@ -0,0 +1,49 @@ +using System.Reflection; +using Tomtastisch.FileClassifier; + +namespace FileTypeDetectionLib.Tests.Contracts; + +[Trait("Category", "ApiContract")] +public sealed class PublicApiAllowlistTests +{ + private static readonly string[] AllowedPublicTypes = + { + "Tomtastisch.FileClassifier.ArchiveProcessing", + "Tomtastisch.FileClassifier.DetectionDetail", + "Tomtastisch.FileClassifier.EvidenceHashing", + "Tomtastisch.FileClassifier.FileKind", + "Tomtastisch.FileClassifier.FileMaterializer", + "Tomtastisch.FileClassifier.FileType", + "Tomtastisch.FileClassifier.FileTypeDetector", + "Tomtastisch.FileClassifier.FileTypeOptions", + "Tomtastisch.FileClassifier.FileTypeProjectBaseline", + "Tomtastisch.FileClassifier.FileTypeProjectOptions", + "Tomtastisch.FileClassifier.HashDigestSet", + "Tomtastisch.FileClassifier.HashEvidence", + "Tomtastisch.FileClassifier.HashOptions", + "Tomtastisch.FileClassifier.HashRoundTripReport", + "Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot", + "Tomtastisch.FileClassifier.HashSourceType", + "Tomtastisch.FileClassifier.ZipExtractedEntry" + }; + + [Fact] + public void PublicTypes_MatchExplicitAllowlist() + { + var assembly = typeof(FileTypeDetector).Assembly; + var actual = assembly.GetTypes() + .Where(type => (type.IsPublic || type.IsNestedPublic) && + type.Namespace == "Tomtastisch.FileClassifier") + .Select(type => type.FullName) + .Where(name => !string.IsNullOrWhiteSpace(name)) + .Cast() + .OrderBy(name => name, StringComparer.Ordinal) + .ToArray(); + + var expected = AllowedPublicTypes + .OrderBy(name => name, StringComparer.Ordinal) + .ToArray(); + + Assert.Equal(expected, actual); + } +} From d6164e72838350bb56713eb445dc6abb7fc19f20 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 12:46:05 +0100 Subject: [PATCH 09/34] release(versioning): prepare 6.0.0 convergence and policy checks --- Directory.Build.props | 2 +- docs/versioning/002_HISTORY_VERSIONS.MD | 3 +- docs/versioning/003_CHANGELOG_RELEASES.MD | 12 ++++++++ docs/versioning/102_HISTORY_VERSIONS.MD | 3 +- docs/versioning/103_CHANGELOG_RELEASES.MD | 12 ++++++++ tools/versioning/check-version-policy.sh | 35 +++++++++++++++++++---- 6 files changed, 59 insertions(+), 8 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index 91864408..93a7b379 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,6 +5,6 @@ true - 5.2.1 + 6.0.0 diff --git a/docs/versioning/002_HISTORY_VERSIONS.MD b/docs/versioning/002_HISTORY_VERSIONS.MD index cbaacc44..baae397f 100644 --- a/docs/versioning/002_HISTORY_VERSIONS.MD +++ b/docs/versioning/002_HISTORY_VERSIONS.MD @@ -12,7 +12,7 @@ Heuristik fuer die Rueckwirkungs-Zuordnung: - `docs|test|ci|chore|tooling|refactor|fix` => Patch Aktueller Entwicklungsstand: -- Aktuelle Entwicklungslinie enthaelt `5.x` (aktueller Arbeitsstand: `v5.2.1`; Details in `docs/versioning/003_CHANGELOG_RELEASES.MD`). +- Aktuelle Entwicklungslinie enthaelt `6.x` (aktueller Arbeitsstand: `v6.0.0`; Details in `docs/versioning/003_CHANGELOG_RELEASES.MD`). Hinweis: - Die Spalte `Keyword` verwendet den technischen Klassifizierungswert aus der Historie. @@ -20,6 +20,7 @@ Hinweis: | Version | Kurzbeschreibung | Commit | Keyword | |---|---|---|---| +| `6.0.0` | Breaking-Release: `FileKind`-Enum und `HashRoundTripReport`-Public-API auf neues Slot-/Methodenmodell umgestellt, Hashing-Interna in Core/RoundTrip/Io ausgelagert | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | SharpCompress-API auf typsichere Aufrufe umgestellt, tar.gz-Verarbeitung fail-closed gehaertet und Qodana-CI-Gate als Pflichtlauf dokumentiert/erzwungen | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | netstandard2.0-Compat-Layer eingefuehrt, Provider-Struktur konsolidiert und TFM-Multi-Targeting erweitert | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | | `5.1.4` | Refactor-Cluster 7C abgeschlossen + Qodana-Alerts auf 0 + Version-Bump fuer Release | [2adeb83](https://github.com/tomtastisch/FileClassifier/commit/2adeb83) | patch | diff --git a/docs/versioning/003_CHANGELOG_RELEASES.MD b/docs/versioning/003_CHANGELOG_RELEASES.MD index 0adec393..10cf2a90 100644 --- a/docs/versioning/003_CHANGELOG_RELEASES.MD +++ b/docs/versioning/003_CHANGELOG_RELEASES.MD @@ -7,6 +7,18 @@ Alle Aenderungen werden hier technisch dokumentiert. Die Release-Version selbst ist der Git-Tag `vX.Y.Z` (optional `-prerelease`) als SSOT. +## [6.0.0] +- Added: + - Neue interne Hashing-Services (`EvidenceHashingCore`, `EvidenceHashingRoundTrip`, `EvidenceHashingIo`) fuer deterministische Auslagerung ohne neue Dependencies. +- Changed: + - Breaking-Umstellung von `FileKind`-Werten (`Docx`/`Xlsx`/`Pptx` -> `Doc`/`Xls`/`Ppt`). + - Breaking-Umstellung der `HashRoundTripReport`-Public-API auf Slot-basiertes Zugriffsmodell (`HashSlot`, `Evidence(...)`, `LogicalEquals(...)`, `PhysicalEquals(...)`). + - `EvidenceHashing` auf Fassade reduziert; Kernlogik in interne stateless Services verschoben. +- Fixed: + - Reflection-basierte Unit-Tests auf neue interne Hashing-Typen angepasst. +- Docs/CI/Tooling: + - Versionskonvergenz lokal auf `6.0.0` vorbereitet (`RepoVersion`, `Version`, `PackageVersion`, Versionshistorie DE/EN). + ## [5.2.1] - Added: - CI-Dokumentation um expliziten Pflicht-Gate-Charakter fuer Qodana erweitert. diff --git a/docs/versioning/102_HISTORY_VERSIONS.MD b/docs/versioning/102_HISTORY_VERSIONS.MD index 934455fc..f6ba1c29 100644 --- a/docs/versioning/102_HISTORY_VERSIONS.MD +++ b/docs/versioning/102_HISTORY_VERSIONS.MD @@ -12,13 +12,14 @@ Heuristics for retroactive classification: - `docs|test|ci|chore|tooling|refactor|fix` => patch Current state: -- Current release line contains `5.x` (current working state: `v5.2.1`; details in `docs/versioning/103_CHANGELOG_RELEASES.MD`). +- Current release line contains `6.x` (current working state: `v6.0.0`; details in `docs/versioning/103_CHANGELOG_RELEASES.MD`). Note: - The \"short description\" column follows the original commit/PR intent text for deterministic traceability and is not normalized to a single language. | Version | Short description | Commit | Keyword | |---|---|---|---| +| `6.0.0` | Breaking release: migrated `FileKind` enum and `HashRoundTripReport` public API to the new slot/method model and split hashing internals into core/roundtrip/io services | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | Switched SharpCompress calls to type-safe APIs, hardened tar.gz fail-closed handling, and enforced/documented Qodana CI as a mandatory gate | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | Introduce netstandard2.0 compatibility layer, consolidate provider structure, and extend TFM multi-targeting | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | | `5.1.4` | Refactor-Cluster 7C abgeschlossen + Qodana-Alerts auf 0 + Version-Bump fuer Release | [2adeb83](https://github.com/tomtastisch/FileClassifier/commit/2adeb83) | patch | diff --git a/docs/versioning/103_CHANGELOG_RELEASES.MD b/docs/versioning/103_CHANGELOG_RELEASES.MD index 5654ec02..b065566a 100644 --- a/docs/versioning/103_CHANGELOG_RELEASES.MD +++ b/docs/versioning/103_CHANGELOG_RELEASES.MD @@ -6,6 +6,18 @@ All changes are documented here in technical terms. The release version itself is the Git tag `vX.Y.Z` (optional `-prerelease`) as SSOT. +## [6.0.0] +- Added: + - New internal hashing services (`EvidenceHashingCore`, `EvidenceHashingRoundTrip`, `EvidenceHashingIo`) for deterministic extraction without adding dependencies. +- Changed: + - Breaking migration of `FileKind` values (`Docx`/`Xlsx`/`Pptx` -> `Doc`/`Xls`/`Ppt`). + - Breaking migration of `HashRoundTripReport` public API to a slot-based model (`HashSlot`, `Evidence(...)`, `LogicalEquals(...)`, `PhysicalEquals(...)`). + - Reduced `EvidenceHashing` to a facade and moved core logic into internal stateless services. +- Fixed: + - Updated reflection-based unit tests to the new internal hashing types. +- Docs/CI/Tooling: + - Prepared local version convergence to `6.0.0` (`RepoVersion`, `Version`, `PackageVersion`, version history DE/EN). + ## [5.2.1] - Added: - Extended CI documentation to state Qodana as an explicit mandatory gate. diff --git a/tools/versioning/check-version-policy.sh b/tools/versioning/check-version-policy.sh index 77f26497..b7b706db 100755 --- a/tools/versioning/check-version-policy.sh +++ b/tools/versioning/check-version-policy.sh @@ -30,8 +30,6 @@ collect_version_policy_violations() { local -a files=() local -a violations=() local -a patterns=( - '' - '' '' '' '' @@ -57,15 +55,42 @@ collect_version_policy_violations() { } run_ci_mode() { - local violations + local violations repo_version vbproj_version vbproj_package_version + + repo_version="$(sed -n 's/.*\([^<]*\)<\/RepoVersion>.*/\1/p' Directory.Build.props | head -n1)" + vbproj_version="$(sed -n 's/.*\([^<]*\)<\/Version>.*/\1/p' src/FileTypeDetection/FileTypeDetectionLib.vbproj | head -n1)" + vbproj_package_version="$(sed -n 's/.*\([^<]*\)<\/PackageVersion>.*/\1/p' src/FileTypeDetection/FileTypeDetectionLib.vbproj | head -n1)" + + if [[ -z "${repo_version}" ]]; then + echo "version-policy: RepoVersion missing in Directory.Build.props" >&2 + return 1 + fi + if [[ -z "${vbproj_version}" ]]; then + echo "version-policy: Version missing in src/FileTypeDetection/FileTypeDetectionLib.vbproj" >&2 + return 1 + fi + if [[ -z "${vbproj_package_version}" ]]; then + echo "version-policy: PackageVersion missing in src/FileTypeDetection/FileTypeDetectionLib.vbproj" >&2 + return 1 + fi + + if [[ "${vbproj_version}" != "${repo_version}" ]]; then + echo "version-policy: Version (${vbproj_version}) != RepoVersion (${repo_version})" >&2 + return 1 + fi + if [[ "${vbproj_package_version}" != "${repo_version}" ]]; then + echo "version-policy: PackageVersion (${vbproj_package_version}) != RepoVersion (${repo_version})" >&2 + return 1 + fi + violations="$(collect_version_policy_violations)" if [[ -n "${violations}" ]]; then - echo "version-policy: static version fields are forbidden (tag is SSOT)." >&2 + echo "version-policy: forbidden static assembly/version fields detected." >&2 echo "${violations}" >&2 return 1 fi - echo "version-policy: no static package/assembly version fields detected." + echo "version-policy: convergence fields valid and no forbidden static fields detected." } read_nupkg_version() { From cf95b3995c0ad4974f26a3b0a8f64d8a26256c62 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 12:52:23 +0100 Subject: [PATCH 10/34] chore(style): normalize formatter drift for preflight gate --- .../Abstractions/Archive/ZipExtractedEntry.vb | 6 +- .../Abstractions/Detection/DetectionDetail.vb | 2 +- .../Abstractions/Detection/FileType.vb | 12 +- .../Abstractions/Hashing/HashDigestSet.vb | 6 +- .../Abstractions/Hashing/HashEvidence.vb | 16 +- .../Abstractions/Hashing/HashOptions.vb | 10 +- .../Hashing/HashRoundTripReport.vb | 48 ++--- src/FileTypeDetection/ArchiveProcessing.vb | 8 +- src/FileTypeDetection/FileTypeOptions.vb | 28 +-- .../Infrastructure/ArchiveInternals.vb | 172 +++++++++--------- .../Infrastructure/ArchiveManagedInternals.vb | 16 +- .../Infrastructure/MimeProvider.vb | 2 +- .../Net8_0Plus/HashPrimitivesProvider.vb | 10 +- .../NetStandard2_0/HashPrimitivesProvider.vb | 10 +- src/FileTypeDetection/Utils/EnumUtils.vb | 24 +-- .../Steps/FileTypeDetectionSteps.cs | 2 +- 16 files changed, 186 insertions(+), 186 deletions(-) diff --git a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb index 2e49adf6..083ea3b5 100644 --- a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb +++ b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb @@ -42,10 +42,10 @@ Namespace Global.Tomtastisch.FileClassifier Friend Sub New _ ( - entryPath As String, + entryPath As String, payload As Byte() ) - + RelativePath = If(entryPath, String.Empty) If payload Is Nothing OrElse payload.Length = 0 Then Content = ImmutableArray(Of Byte).Empty @@ -64,7 +64,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Schreibgeschützter mit dem Entry-Inhalt. Public Function OpenReadOnlyStream() As MemoryStream - + Dim data = If(Content.IsDefaultOrEmpty, Array.Empty(Of Byte)(), Content.ToArray()) Return New MemoryStream(data, writable:=False) End Function diff --git a/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb b/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb index 1697b1b1..f686667b 100644 --- a/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb +++ b/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb @@ -44,7 +44,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property ExtensionVerified As Boolean - Friend Sub New _ + Friend Sub New _ ( detectedType As FileType, reasonCode As String, diff --git a/src/FileTypeDetection/Abstractions/Detection/FileType.vb b/src/FileTypeDetection/Abstractions/Detection/FileType.vb index 45b95fcf..c8a0076a 100644 --- a/src/FileTypeDetection/Abstractions/Detection/FileType.vb +++ b/src/FileTypeDetection/Abstractions/Detection/FileType.vb @@ -44,15 +44,15 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Aliases As ImmutableArray(Of String) - Friend Sub New _ + Friend Sub New _ ( - kind As FileKind, - canonicalExtension As String, - mime As String, + kind As FileKind, + canonicalExtension As String, + mime As String, allowed As Boolean, aliases As IEnumerable(Of String) ) - + Dim dedup As HashSet(Of String) = New HashSet(Of String)(StringComparer.OrdinalIgnoreCase) Dim n As String Dim orderedAliases As List(Of String) @@ -83,7 +83,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' String-Repräsentation des Feldes . Public Overrides Function ToString() As String - + Return Kind.ToString() End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb index b99cb881..462e9b19 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb @@ -58,7 +58,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property HasLogicalHash As Boolean - Friend Sub New _ + Friend Sub New _ ( physicalSha256 As String, logicalSha256 As String, @@ -69,7 +69,7 @@ Namespace Global.Tomtastisch.FileClassifier hasPhysicalHash As Boolean, hasLogicalHash As Boolean ) - + Me.PhysicalSha256 = Normalize(physicalSha256) Me.LogicalSha256 = Normalize(logicalSha256) Me.FastPhysicalXxHash3 = Normalize(fastPhysicalXxHash3) @@ -98,7 +98,7 @@ Namespace Global.Tomtastisch.FileClassifier ( value As String ) As String - + Return If(value, String.Empty).Trim().ToLowerInvariant() End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb index eba33cbb..c978946d 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb @@ -69,7 +69,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Notes As String - Friend Sub New _ + Friend Sub New _ ( sourceType As HashSourceType, label As String, @@ -82,7 +82,7 @@ Namespace Global.Tomtastisch.FileClassifier digests As HashDigestSet, notes As String ) - + Me.SourceType = sourceType Me.Label = If(label, String.Empty) Me.DetectedType = If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)) @@ -95,13 +95,13 @@ Namespace Global.Tomtastisch.FileClassifier Me.UncompressedBytes = ToImmutable(uncompressedBytes) End Sub - Friend Shared Function CreateFailure _ + Friend Shared Function CreateFailure _ ( - sourceType As HashSourceType, - label As String, + sourceType As HashSourceType, + label As String, notes As String ) As HashEvidence - + Return New HashEvidence( sourceType:=sourceType, label:=label, @@ -119,11 +119,11 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As Immutable.ImmutableArray(Of Byte) - + If data Is Nothing OrElse data.Length = 0 Then Return Immutable.ImmutableArray(Of Byte).Empty End If - + Return Immutable.ImmutableArray.Create(data) End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb index ad8cba1f..d63ff250 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb @@ -44,7 +44,7 @@ Namespace Global.Tomtastisch.FileClassifier Public Property MaterializedFileName As String = "deterministic-roundtrip.bin" Friend Function Clone() As HashOptions - + Return New HashOptions With { .IncludePayloadCopies = IncludePayloadCopies, .IncludeFastHash = IncludeFastHash, @@ -53,11 +53,11 @@ Namespace Global.Tomtastisch.FileClassifier } End Function - Friend Shared Function Normalize _ + Friend Shared Function Normalize _ ( options As HashOptions ) As HashOptions - + Dim cloned As HashOptions If options Is Nothing Then options = New HashOptions() @@ -67,11 +67,11 @@ Namespace Global.Tomtastisch.FileClassifier End Function - Private Shared Function NormalizeMaterializedFileName _ + Private Shared Function NormalizeMaterializedFileName _ ( candidate As String ) As String - + Dim normalized = If(candidate, String.Empty).Trim() If String.IsNullOrWhiteSpace(normalized) Then Return "deterministic-roundtrip.bin" diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb index fafb9d22..3a3b95dd 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb @@ -53,11 +53,11 @@ Namespace Global.Tomtastisch.FileClassifier H4 = 4 End Enum - Private Shared ReadOnly RequiredSlots As HashSlot() = _ + Private Shared ReadOnly RequiredSlots As HashSlot() = _ EnumUtils.GetValues(Of HashSlot)( sortOrder:=EnumUtils.EnumSortOrder.Ascending ) - + ' ===================================================================== ' Felder / Properties (Typzustand) ' ===================================================================== @@ -66,9 +66,9 @@ Namespace Global.Tomtastisch.FileClassifier Public ReadOnly Property IsArchiveInput As Boolean Public ReadOnly Property Notes As String - Private ReadOnly _evidences As HashEvidence() ' index = slot-1 - Private ReadOnly _logicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn - Private ReadOnly _physicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn + Private ReadOnly _evidences As HashEvidence() ' index = slot-1 + Private ReadOnly _logicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn + Private ReadOnly _physicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn Public ReadOnly Property LogicalConsistent As Boolean @@ -80,7 +80,7 @@ Namespace Global.Tomtastisch.FileClassifier Get Return IterableUtils.CloneArray(RequiredSlots) End Get - + End Property @@ -105,18 +105,18 @@ Namespace Global.Tomtastisch.FileClassifier ) ' Deklarationsblock (Pflicht, spaltenartig) - Dim slotCount As Integer = RequiredSlots.Length - Dim i As Integer - Dim baseEvidence As HashEvidence - Dim otherEvidence As HashEvidence - Dim eqLogical As Boolean - Dim consistentLocal As Boolean = True - + Dim slotCount As Integer = RequiredSlots.Length + Dim i As Integer + Dim baseEvidence As HashEvidence + Dim otherEvidence As HashEvidence + Dim eqLogical As Boolean + Dim consistentLocal As Boolean = True + ' ----------------------------------------------------------------- ' Guard-Clauses (fail-closed) ' ----------------------------------------------------------------- GuardUtils.RequireLength(evidences, slotCount, NameOf(evidences)) - + ' ----------------------------------------------------------------- ' Snapshot / Assignment (Input) ' ----------------------------------------------------------------- @@ -135,7 +135,7 @@ Namespace Global.Tomtastisch.FileClassifier Dim slot As HashSlot = RequiredSlots(i) _evidences(SlotIndex(slot)) = EnsureEvidence(evidences(i), slot) Next - + ' ----------------------------------------------------------------- ' Branches (Vergleiche: H1 gegen H2..Hn) ' ----------------------------------------------------------------- @@ -230,10 +230,10 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function EnsureEvidence(evidence As HashEvidence, slot As HashSlot) As HashEvidence If evidence IsNot Nothing Then Return evidence - + Return HashEvidence.CreateFailure( - HashSourceType.Unknown, - SlotLabel(slot), + HashSourceType.Unknown, + SlotLabel(slot), "missing" ) End Function @@ -255,10 +255,10 @@ Namespace Global.Tomtastisch.FileClassifier If leftEvidence Is Nothing OrElse rightEvidence Is Nothing Then Return False If leftEvidence.Digests Is Nothing OrElse rightEvidence.Digests Is Nothing Then Return False If Not leftEvidence.Digests.HasLogicalHash OrElse Not rightEvidence.Digests.HasLogicalHash Then Return False - + Return String.Equals( - leftEvidence.Digests.LogicalSha256, - rightEvidence.Digests.LogicalSha256, + leftEvidence.Digests.LogicalSha256, + rightEvidence.Digests.LogicalSha256, StringComparison.Ordinal ) End Function @@ -267,10 +267,10 @@ Namespace Global.Tomtastisch.FileClassifier If leftEvidence Is Nothing OrElse rightEvidence Is Nothing Then Return False If leftEvidence.Digests Is Nothing OrElse rightEvidence.Digests Is Nothing Then Return False If Not leftEvidence.Digests.HasPhysicalHash OrElse Not rightEvidence.Digests.HasPhysicalHash Then Return False - + Return String.Equals( - leftEvidence.Digests.PhysicalSha256, - rightEvidence.Digests.PhysicalSha256, + leftEvidence.Digests.PhysicalSha256, + rightEvidence.Digests.PhysicalSha256, StringComparison.Ordinal ) End Function diff --git a/src/FileTypeDetection/ArchiveProcessing.vb b/src/FileTypeDetection/ArchiveProcessing.vb index 865cdc80..fa3b328f 100644 --- a/src/FileTypeDetection/ArchiveProcessing.vb +++ b/src/FileTypeDetection/ArchiveProcessing.vb @@ -44,7 +44,7 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As Boolean - + Return FileTypeDetector.TryValidateArchive(path) End Function @@ -60,7 +60,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As Boolean - + Dim opt = FileTypeOptions.GetSnapshot() Return ArchivePayloadGuard.IsSafeArchivePayload(data, opt) End Function @@ -82,7 +82,7 @@ Namespace Global.Tomtastisch.FileClassifier path As String, verifyBeforeExtract As Boolean ) As IReadOnlyList(Of ZipExtractedEntry) - + Return New FileTypeDetector().ExtractArchiveSafeToMemory(path, verifyBeforeExtract) End Function @@ -99,7 +99,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As IReadOnlyList(Of ZipExtractedEntry) - + Dim opt As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() diff --git a/src/FileTypeDetection/FileTypeOptions.vb b/src/FileTypeDetection/FileTypeOptions.vb index bd2e0a67..a4852491 100644 --- a/src/FileTypeDetection/FileTypeOptions.vb +++ b/src/FileTypeDetection/FileTypeOptions.vb @@ -208,7 +208,7 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As Boolean - + If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then Return False If Not path.EndsWith(".json", StringComparison.OrdinalIgnoreCase) Then Return False @@ -240,10 +240,10 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function SafeInt _ ( - el As Text.Json.JsonElement, + el As Text.Json.JsonElement, fallback As Integer ) As Integer - + Dim v As Integer If el.ValueKind = Text.Json.JsonValueKind.Number AndAlso el.TryGetInt32(v) Then Return v Return fallback @@ -251,10 +251,10 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function SafeLong _ ( - el As Text.Json.JsonElement, + el As Text.Json.JsonElement, fallback As Long ) As Long - + Dim v As Long If el.ValueKind = Text.Json.JsonValueKind.Number AndAlso el.TryGetInt64(v) Then Return v Return fallback @@ -262,12 +262,12 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function ParsePositiveInt _ ( - el As Text.Json.JsonElement, + el As Text.Json.JsonElement, fallback As Integer, name As String, logger As Microsoft.Extensions.Logging.ILogger ) As Integer - + Dim v = SafeInt(el, fallback) If v > 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") @@ -276,12 +276,12 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function ParseNonNegativeInt _ ( - el As Text.Json.JsonElement, + el As Text.Json.JsonElement, fallback As Integer, name As String, logger As Microsoft.Extensions.Logging.ILogger ) As Integer - + Dim v = SafeInt(el, fallback) If v >= 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") @@ -290,12 +290,12 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function ParsePositiveLong _ ( - el As Text.Json.JsonElement, + el As Text.Json.JsonElement, fallback As Long, name As String, logger As Microsoft.Extensions.Logging.ILogger ) As Long - + Dim v = SafeLong(el, fallback) If v > 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") @@ -304,12 +304,12 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function ParseBoolean _ ( - el As Text.Json.JsonElement, + el As Text.Json.JsonElement, fallback As Boolean, name As String, logger As Microsoft.Extensions.Logging.ILogger ) As Boolean - + If el.ValueKind = Text.Json.JsonValueKind.True Then Return True If el.ValueKind = Text.Json.JsonValueKind.False Then Return False LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") @@ -318,7 +318,7 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function ParseString _ ( - el As Text.Json.JsonElement, + el As Text.Json.JsonElement, fallback As String, name As String, logger As Microsoft.Extensions.Logging.ILogger diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index 32b3d19a..121eacea 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -237,11 +237,11 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function TryDescribeBytes _ ( - data As Byte(), + data As Byte(), opt As FileTypeProjectOptions, ByRef descriptor As ArchiveDescriptor ) As Boolean - + descriptor = ArchiveDescriptor.UnknownDescriptor() If data Is Nothing OrElse data.Length = 0 Then Return False If opt Is Nothing Then Return False @@ -267,11 +267,11 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function TryDescribeStream _ ( - stream As Stream, + stream As Stream, opt As FileTypeProjectOptions, ByRef descriptor As ArchiveDescriptor ) As Boolean - + Dim mapped As ArchiveContainerType Dim gzipWrapped As Boolean @@ -327,7 +327,7 @@ Namespace Global.Tomtastisch.FileClassifier ( type As SharpCompress.Common.ArchiveType ) As ArchiveContainerType - + Select Case type Case SharpCompress.Common.ArchiveType.Zip Return ArchiveContainerType.Zip @@ -355,12 +355,12 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function ValidateArchiveStream _ ( - stream As Stream, - opt As FileTypeProjectOptions, + stream As Stream, + opt As FileTypeProjectOptions, depth As Integer, descriptor As ArchiveDescriptor ) As Boolean - + Return ProcessArchiveStream(stream, opt, depth, descriptor, Nothing) End Function @@ -372,7 +372,7 @@ Namespace Global.Tomtastisch.FileClassifier descriptor As ArchiveDescriptor, extractEntry As Func(Of IArchiveEntryModel, Boolean) ) As Boolean - + Dim backend As IArchiveBackend If Not StreamGuard.IsReadable(stream) Then Return False @@ -397,35 +397,35 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function TryExtractArchiveStreamToMemory _ ( - stream As Stream, + stream As Stream, opt As FileTypeProjectOptions ) As IReadOnlyList(Of ZipExtractedEntry) - + Dim descriptor As ArchiveDescriptor = Nothing Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() If Not ArchiveTypeResolver.TryDescribeStream(stream, opt, descriptor) Then Return emptyResult - + Return TryExtractArchiveStreamToMemory(stream, opt, descriptor) End Function Friend Shared Function TryExtractArchiveStreamToMemory _ ( - stream As Stream, + stream As Stream, opt As FileTypeProjectOptions, descriptor As ArchiveDescriptor ) As IReadOnlyList(Of ZipExtractedEntry) - + Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() - Dim entries As List(Of ZipExtractedEntry) = New List(Of ZipExtractedEntry)() + Dim entries As List(Of ZipExtractedEntry) = New List(Of ZipExtractedEntry)() Dim ok As Boolean If Not StreamGuard.IsReadable(stream) Then Return emptyResult If opt Is Nothing Then Return emptyResult - + If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then Return emptyResult End if - + Try StreamGuard.RewindToStart(stream) ok = ArchiveProcessingEngine.ProcessArchiveStream( @@ -442,7 +442,7 @@ Namespace Global.Tomtastisch.FileClassifier End If Return entries.AsReadOnly() - + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -460,11 +460,11 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function TryExtractArchiveStream _ ( - stream As Stream, + stream As Stream, destinationDirectory As String, opt As FileTypeProjectOptions ) As Boolean - + Dim descriptor As ArchiveDescriptor = Nothing If Not ArchiveTypeResolver.TryDescribeStream(stream, opt, descriptor) Then Return False Return TryExtractArchiveStream(stream, destinationDirectory, opt, descriptor) @@ -472,12 +472,12 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function TryExtractArchiveStream _ ( - stream As Stream, + stream As Stream, destinationDirectory As String, - opt As FileTypeProjectOptions, + opt As FileTypeProjectOptions, descriptor As ArchiveDescriptor ) As Boolean - + Dim destinationFull As String Dim parent As String Dim stageDir As String @@ -527,7 +527,7 @@ Namespace Global.Tomtastisch.FileClassifier Directory.Move(stageDir, destinationFull) Return True - + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -539,7 +539,7 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf ex Is ObjectDisposedException LogGuard.Debug(opt.Logger, $"[ArchiveExtract] Fehler: {ex.Message}") Return False - + Finally If Directory.Exists(stageDir) Then Try @@ -558,11 +558,11 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function ExtractEntryToDirectory _ ( - entry As IArchiveEntryModel, + entry As IArchiveEntryModel, destinationPrefix As String, opt As FileTypeProjectOptions ) As Boolean - + Dim entryName As String = Nothing Dim isDirectory As Boolean = False Dim targetPath As String @@ -618,7 +618,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using End Using Return True - + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -635,11 +635,11 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function ExtractEntryToMemory _ ( - entry As IArchiveEntryModel, + entry As IArchiveEntryModel, entries As List(Of ZipExtractedEntry), opt As FileTypeProjectOptions ) As Boolean - + Dim entryName As String = Nothing Dim isDirectory As Boolean = False Dim payload As Byte() @@ -682,12 +682,12 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function TryGetSafeEntryName _ ( - entry As IArchiveEntryModel, + entry As IArchiveEntryModel, opt As FileTypeProjectOptions, - ByRef safeEntryName As String, + ByRef safeEntryName As String, ByRef isDirectory As Boolean ) As Boolean - + Dim entryName As String = Nothing Dim normalizedDirectoryFlag As Boolean = False @@ -702,27 +702,27 @@ Namespace Global.Tomtastisch.FileClassifier End If If Not ArchiveEntryPathPolicy.TryNormalizeRelativePath( - entry.RelativePath, + entry.RelativePath, allowDirectoryMarker:=True, - entryName, + entryName, normalizedDirectoryFlag ) Then - + Return False End If safeEntryName = entryName isDirectory = entry.IsDirectory OrElse normalizedDirectoryFlag OrElse entryName.EndsWith("/"c) - + Return True End Function Private Shared Function ValidateEntrySize _ ( - entry As IArchiveEntryModel, + entry As IArchiveEntryModel, opt As FileTypeProjectOptions ) As Boolean - + Dim sizeValue As Long? If entry Is Nothing OrElse opt Is Nothing Then Return False @@ -743,13 +743,13 @@ Namespace Global.Tomtastisch.FileClassifier ( dirPath As String ) As String - + If String.IsNullOrEmpty(dirPath) Then Return Path.DirectorySeparatorChar.ToString() If dirPath.EndsWith(Path.DirectorySeparatorChar) _ OrElse dirPath.EndsWith(Path.AltDirectorySeparatorChar) Then Return dirPath End If - + Return dirPath & Path.DirectorySeparatorChar End Function End Class @@ -763,36 +763,36 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function TryCollectFromFile _ ( - path As String, + path As String, opt As FileTypeProjectOptions, ByRef entries As IReadOnlyList(Of ZipExtractedEntry) ) As Boolean - + Dim descriptor As ArchiveDescriptor = Nothing entries = Array.Empty(Of ZipExtractedEntry)() If String.IsNullOrWhiteSpace(path) OrElse Not File.Exists(path) Then Return False If opt Is Nothing Then Return False - Try + Try Using fs As New FileStream( - path, FileMode.Open, - FileAccess.Read, + path, FileMode.Open, + FileAccess.Read, FileShare.Read, - InternalIoDefaults.FileStreamBufferSize, + InternalIoDefaults.FileStreamBufferSize, FileOptions.SequentialScan ) - + If Not ArchiveTypeResolver.TryDescribeStream(fs, opt, descriptor) Then Return False StreamGuard.RewindToStart(fs) - + If Not ArchiveSafetyGate.IsArchiveSafeStream(fs, opt, descriptor, depth:=0) Then Return False StreamGuard.RewindToStart(fs) - + entries = ArchiveExtractor.TryExtractArchiveStreamToMemory(fs, opt, descriptor) Return entries IsNot Nothing AndAlso entries.Count > 0 End Using - + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -804,18 +804,18 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf ex Is ObjectDisposedException LogGuard.Debug(opt.Logger, $"[ArchiveCollect] Datei-Fehler: {ex.Message}") entries = Array.Empty(Of ZipExtractedEntry)() - + Return False End Try End Function Friend Shared Function TryCollectFromBytes _ ( - data As Byte(), + data As Byte(), opt As FileTypeProjectOptions, ByRef entries As IReadOnlyList(Of ZipExtractedEntry) ) As Boolean - + Dim descriptor As ArchiveDescriptor = Nothing entries = Array.Empty(Of ZipExtractedEntry)() @@ -825,12 +825,12 @@ Namespace Global.Tomtastisch.FileClassifier Try If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False If Not ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) Then Return False - + Using ms As New MemoryStream(data, writable:=False) entries = ArchiveExtractor.TryExtractArchiveStreamToMemory(ms, opt, descriptor) Return entries IsNot Nothing AndAlso entries.Count > 0 End Using - + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -874,7 +874,7 @@ Namespace Global.Tomtastisch.FileClassifier containerTypeValue As ArchiveContainerType, extractEntry As Func(Of IArchiveEntryModel, Boolean) ) As Boolean Implements IArchiveBackend.Process - + Dim mapped As ArchiveContainerType Dim entries As List(Of SharpCompress.Archives.IArchiveEntry) Dim nestedResult As Boolean = False @@ -898,31 +898,31 @@ Namespace Global.Tomtastisch.FileClassifier If containerTypeValue = ArchiveContainerType.GZip AndAlso Not gzipWrapped Then Return False Using archive = OpenArchiveForContainerCompat(stream, containerTypeValue) - + If archive Is Nothing Then Return False - + mapped = ArchiveTypeResolver.MapArchiveType(archive.Type) - + gzipWrappedTar = gzipWrapped AndAlso containerTypeValue = ArchiveContainerType.GZip _ AndAlso mapped = ArchiveContainerType.Tar - + If mapped <> containerTypeValue AndAlso Not gzipWrappedTar Then Return False entries = archive.Entries.OrderBy( - Function(e) If(e.Key, String.Empty), + Function(e) If(e.Key, String.Empty), StringComparer.Ordinal ).ToList() If Not gzipWrappedTar Then nestedHandled = TryProcessNestedGArchive( - entries, - opt, - depth, - containerTypeValue, + entries, + opt, + depth, + containerTypeValue, extractEntry, nestedResult ) - + If nestedHandled Then Return nestedResult End If @@ -943,11 +943,11 @@ Namespace Global.Tomtastisch.FileClassifier If Not model.IsDirectory Then knownSize = 0 requireKnownForTotal = (extractEntry Is Nothing) OrElse depth > 0 - + If gzipWrappedTar Then requireKnownForTotal = False End If - + If Not TryGetValidatedSize(model, opt, knownSize, requireKnownForTotal) Then Return False totalUncompressed += knownSize If totalUncompressed > opt.MaxZipTotalUncompressedBytes Then Return False @@ -979,7 +979,7 @@ Namespace Global.Tomtastisch.FileClassifier stream As Stream, containerTypeValue As ArchiveContainerType ) As SharpCompress.Archives.IArchive - + Return ArchiveSharpCompressCompat.OpenArchiveForContainer(stream, containerTypeValue) End Function @@ -991,7 +991,7 @@ Namespace Global.Tomtastisch.FileClassifier extractEntry As Func(Of IArchiveEntryModel, Boolean), ByRef nestedResult As Boolean ) As Boolean - + Dim onlyEntry As SharpCompress.Archives.IArchiveEntry Dim model As IArchiveEntryModel Dim payload As Byte() = Nothing @@ -1029,9 +1029,9 @@ Namespace Global.Tomtastisch.FileClassifier Using nestedMs As New MemoryStream(payload, writable:=False) nestedResult = ArchiveProcessingEngine.ProcessArchiveStream( - nestedMs, - opt, - depth + 1, + nestedMs, + opt, + depth + 1, nestedDescriptor, extractEntry ) @@ -1046,24 +1046,24 @@ Namespace Global.Tomtastisch.FileClassifier opt As FileTypeProjectOptions, ByRef payload As Byte() ) As Boolean - + payload = Array.Empty(Of Byte)() If entry Is Nothing Then Return False If maxBytes <= 0 Then Return False If opt Is Nothing Then Return False Try - + Using source = entry.OpenEntryStream() If source Is Nothing OrElse Not source.CanRead Then Return False - + Using ms As New MemoryStream() StreamBounds.CopyBounded(source, ms, maxBytes) payload = ms.ToArray() Return True End Using End Using - + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -1081,12 +1081,12 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function TryGetValidatedSize _ ( - entry As IArchiveEntryModel, + entry As IArchiveEntryModel, opt As FileTypeProjectOptions, - ByRef knownSize As Long, + ByRef knownSize As Long, requireKnownForTotal As Boolean ) As Boolean - + Dim value As Long? knownSize = 0 @@ -1111,11 +1111,11 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function TryMeasureEntrySize _ ( - entry As IArchiveEntryModel, + entry As IArchiveEntryModel, opt As FileTypeProjectOptions, ByRef measured As Long ) As Boolean - + Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte Dim n As Integer @@ -1127,7 +1127,7 @@ Namespace Global.Tomtastisch.FileClassifier Try Using source = entry.OpenStream() If source Is Nothing OrElse Not source.CanRead Then Return False - + While True n = source.Read(buf, 0, buf.Length) If n <= 0 Then Exit While @@ -1135,9 +1135,9 @@ Namespace Global.Tomtastisch.FileClassifier If measured > opt.MaxZipEntryUncompressedBytes Then Return False End While End Using - + Return True - + Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse diff --git a/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb index 4c440d79..aba31d1e 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb @@ -26,11 +26,11 @@ Namespace Global.Tomtastisch.FileClassifier Friend Shared Function ValidateArchiveStream _ ( - stream As Stream, - opt As FileTypeProjectOptions, + stream As Stream, + opt As FileTypeProjectOptions, depth As Integer ) As Boolean - + Return ProcessArchiveStream(stream, opt, depth, Nothing) End Function @@ -41,7 +41,7 @@ Namespace Global.Tomtastisch.FileClassifier depth As Integer, extractEntry As Func(Of ZipArchiveEntry, Boolean) ) As Boolean - + Dim totalUncompressed As Long Dim ordered As IEnumerable(Of ZipArchiveEntry) Dim u As Long @@ -81,7 +81,7 @@ Namespace Global.Tomtastisch.FileClassifier Try Using es = e.Open() - + Using nestedMs = RecyclableStreams.GetStream("ArchiveStreamEngine.Nested") StreamBounds.CopyBounded(es, nestedMs, opt.MaxZipNestedBytes) nestedMs.Position = 0 @@ -128,10 +128,10 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function IsNestedArchiveEntry _ ( - entry As ZipArchiveEntry, + entry As ZipArchiveEntry, opt As FileTypeProjectOptions ) As Boolean - + Dim header(15) As Byte Dim read As Integer Dim exact As Byte() @@ -194,7 +194,7 @@ Namespace Global.Tomtastisch.FileClassifier containerTypeValue As ArchiveContainerType, extractEntry As Func(Of IArchiveEntryModel, Boolean) ) As Boolean Implements IArchiveBackend.Process - + If containerTypeValue <> ArchiveContainerType.Zip Then Return False If extractEntry Is Nothing Then diff --git a/src/FileTypeDetection/Infrastructure/MimeProvider.vb b/src/FileTypeDetection/Infrastructure/MimeProvider.vb index 0457ab7e..cb17aa3c 100644 --- a/src/FileTypeDetection/Infrastructure/MimeProvider.vb +++ b/src/FileTypeDetection/Infrastructure/MimeProvider.vb @@ -33,7 +33,7 @@ Namespace Global.Tomtastisch.FileClassifier ( extWithDot As String ) As String - + Dim ext As String = extWithDot If String.IsNullOrWhiteSpace(extWithDot) Then Return String.Empty diff --git a/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb b/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb index f34534cb..3f0216d9 100644 --- a/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb +++ b/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb @@ -67,7 +67,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As String Implements IHexCodec.EncodeLowerHex - + Dim safeData = If(data, Array.Empty(Of Byte)()) Return Convert.ToHexString(safeData).ToLowerInvariant() End Function @@ -101,7 +101,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As Byte() Implements ISha256Primitives.ComputeHash - + Dim safeData = If(data, Array.Empty(Of Byte)()) Return Security.Cryptography.SHA256.HashData(safeData) End Function @@ -115,7 +115,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As String Implements ISha256Primitives.ComputeHashHex - + Return _codec.EncodeLowerHex(ComputeHash(data)) End Function End Class @@ -138,7 +138,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As ULong Implements IFastHash64.ComputeHashUInt64 - + Dim safeData = If(data, Array.Empty(Of Byte)()) Return IO.Hashing.XxHash3.HashToUInt64(safeData) End Function @@ -152,7 +152,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As String Implements IFastHash64.ComputeHashHex - + Return ComputeHashUInt64(data).ToString("x16", CultureInfo.InvariantCulture) End Function End Class diff --git a/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb b/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb index 81b89684..deae05f3 100644 --- a/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb +++ b/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb @@ -69,7 +69,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As String Implements IHexCodec.EncodeLowerHex - + Dim safeData = If(data, Array.Empty(Of Byte)()) Dim chars As Char() Dim index As Integer = 0 @@ -115,7 +115,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As Byte() Implements ISha256Primitives.ComputeHash - + Dim safeData = If(data, Array.Empty(Of Byte)()) Using sha As Security.Cryptography.SHA256 = Security.Cryptography.SHA256.Create() Return sha.ComputeHash(safeData) @@ -131,7 +131,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As String Implements ISha256Primitives.ComputeHashHex - + Return _codec.EncodeLowerHex(ComputeHash(data)) End Function End Class @@ -154,7 +154,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As ULong Implements IFastHash64.ComputeHashUInt64 - + Dim safeData = If(data, Array.Empty(Of Byte)()) Return IO.Hashing.XxHash3.HashToUInt64(safeData) End Function @@ -168,7 +168,7 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As String Implements IFastHash64.ComputeHashHex - + Return ComputeHashUInt64(data).ToString("x16", CultureInfo.InvariantCulture) End Function End Class diff --git a/src/FileTypeDetection/Utils/EnumUtils.vb b/src/FileTypeDetection/Utils/EnumUtils.vb index 9ccf5c18..37db6371 100644 --- a/src/FileTypeDetection/Utils/EnumUtils.vb +++ b/src/FileTypeDetection/Utils/EnumUtils.vb @@ -159,22 +159,22 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ) As TEnum() ' Deklarationsblock - Dim enumType As Type = GetType(TEnum) - Dim raw As Array = Nothing + Dim enumType As Type = GetType(TEnum) + Dim raw As Array = Nothing - Dim values() As TEnum = Nothing - Dim keys() As Long = Nothing + Dim values() As TEnum = Nothing + Dim keys() As Long = Nothing - Dim i As Integer - Dim count As Integer = 0 - Dim maxIndex As Integer = 0 + Dim i As Integer + Dim count As Integer = 0 + Dim maxIndex As Integer = 0 - Dim effectiveTo As Integer = 0 - Dim effectiveMaxFrom As Integer = 0 - Dim effectiveFrom As Integer = 0 + Dim effectiveTo As Integer = 0 + Dim effectiveMaxFrom As Integer = 0 + Dim effectiveFrom As Integer = 0 - Dim length As Integer = 0 - Dim result() As TEnum = Nothing + Dim length As Integer = 0 + Dim result() As TEnum = Nothing ' ----------------------------------------------------------------- diff --git a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs index da20459a..7c2542d2 100644 --- a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs +++ b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs @@ -1,7 +1,7 @@ +using System.Collections.Generic; using FileTypeDetectionLib.Tests.Support; using Reqnroll; using Tomtastisch.FileClassifier; -using System.Collections.Generic; namespace FileTypeDetectionLib.Tests.Steps; From 9efc3095eb76aaea617ba28b881328e08d792f3d Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 12:52:34 +0100 Subject: [PATCH 11/34] security(policy): align supported-major claim with 6.x baseline --- SECURITY.md | 4 ++-- docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD | 2 +- docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD | 2 +- tools/audit/verify-security-claims.sh | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 1cc33bd2..0cb09805 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -16,8 +16,8 @@ Security-Fixes werden nur fuer den aktuell unterstuetzten Major bereitgestellt. | Version | Security-Support | | ------- | ---------------- | -| 5.x | Ja | -| < 5.0 | Nein | +| 6.x | Ja | +| < 6.0 | Nein | ## 3. Meldung einer Sicherheitsluecke Bitte melde Sicherheitsluecken **nicht** ueber oeffentliche Issues. diff --git a/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD b/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD index fcd3c24f..b37e106a 100644 --- a/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD +++ b/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD @@ -9,7 +9,7 @@ Abbildung von Aussagen in `SECURITY.md` auf Nachweisquellen und Verifikationskom | Claim ID | SECURITY-Anker | Claim-Zusammenfassung | Nachweisquelle | Verifikationskommando | Pass-Kriterium | Blocker | |---|---|---|---|---|---|---| -| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security-Support ist an Major 5 gebunden | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version-Major ist `5` | yes | +| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security-Support ist an Major 6 gebunden | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version-Major ist `6` | yes | | SEC-CLAIM-002 | 3. Meldung | Private Vulnerability Reporting ist aktiv | GitHub API `private-vulnerability-reporting` | `gh api "repos/$REPO/private-vulnerability-reporting"` | `.enabled == true` | yes | | SEC-CLAIM-003 | 9. Nachweisbarkeit | Dependabot Security Updates sind aktiv | GitHub API (`security_and_analysis` oder `automated-security-fixes`) | `gh api "repos/$REPO" --jq '.security_and_analysis.dependabot_security_updates.status' || gh api "repos/$REPO/automated-security-fixes" --jq '.enabled'` | `enabled` oder `true` | yes | | SEC-CLAIM-004 | 9. Nachweisbarkeit | Secret Scanning ist aktiv | GitHub API (`security_and_analysis` oder Secret-Scanning-Alerts Endpoint) | `gh api "repos/$REPO" --jq '.security_and_analysis.secret_scanning.status' || gh api "repos/$REPO/secret-scanning/alerts?per_page=1"` | `enabled` oder Endpoint erfolgreich erreichbar | yes | diff --git a/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD b/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD index e9d3d6ab..3b278577 100644 --- a/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD +++ b/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD @@ -9,7 +9,7 @@ Mapping of claims in `SECURITY.md` to evidence sources and verification commands | Claim ID | SECURITY Anchor | Claim Summary | Evidence Source | Verification Command | Pass Criteria | Blocker | |---|---|---|---|---|---|---| -| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security support is tied to major 5 | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version major is `5` | yes | +| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security support is tied to major 6 | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version major is `6` | yes | | SEC-CLAIM-002 | 3. Meldung | Private vulnerability reporting path exists | GitHub API `private-vulnerability-reporting` | `gh api "repos/$REPO/private-vulnerability-reporting"` | `.enabled == true` | yes | | SEC-CLAIM-003 | 9. Nachweisbarkeit | Dependabot security updates enabled | GitHub API (`security_and_analysis` or `automated-security-fixes`) | `gh api "repos/$REPO" --jq '.security_and_analysis.dependabot_security_updates.status' || gh api "repos/$REPO/automated-security-fixes" --jq '.enabled'` | `enabled` or `true` | yes | | SEC-CLAIM-004 | 9. Nachweisbarkeit | Secret scanning enabled | GitHub API (`security_and_analysis` or Secret-Scanning-Alerts Endpoint) | `gh api "repos/$REPO" --jq '.security_and_analysis.secret_scanning.status' || gh api "repos/$REPO/secret-scanning/alerts?per_page=1"` | `enabled` or endpoint reachable | yes | diff --git a/tools/audit/verify-security-claims.sh b/tools/audit/verify-security-claims.sh index 695f0a9a..ace4e158 100755 --- a/tools/audit/verify-security-claims.sh +++ b/tools/audit/verify-security-claims.sh @@ -151,16 +151,16 @@ if [[ -z "${REPO_FULL}" ]]; then add_violation "CI-SEC-CLAIM-001" "fail" "Unable to determine GitHub repository slug" "SECURITY.md" fi -# Claim: 5.x supported and <5.0 unsupported maps to current package major = 5 +# Claim: 6.x supported and <6.0 unsupported maps to current package major = 6 pkg_ver="$(sed -n 's:.*\([^<]*\).*:\1:p' "${ROOT_DIR}/src/FileTypeDetection/FileTypeDetectionLib.vbproj" | head -n1)" if [[ -z "${pkg_ver}" ]]; then add_violation "CI-SEC-CLAIM-002" "fail" "Package version not found" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" else major="${pkg_ver%%.*}" - if [[ "${major}" == "5" ]]; then + if [[ "${major}" == "6" ]]; then add_pass else - add_violation "CI-SEC-CLAIM-002" "fail" "Expected package major 5 for SECURITY.md support claim, found ${pkg_ver}" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" + add_violation "CI-SEC-CLAIM-002" "fail" "Expected package major 6 for SECURITY.md support claim, found ${pkg_ver}" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" fi fi From 156d833fd92e31256d698c6a2bbf03f37390fc3c Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 12:55:56 +0100 Subject: [PATCH 12/34] ci(preflight): retrigger pull_request checks with updated governance body From 3e25950029b894030f058bd689e85bafe7c419e4 Mon Sep 17 00:00:00 2001 From: Tomtastisch <82227609+tomtastisch@users.noreply.github.com> Date: Sat, 21 Feb 2026 12:58:49 +0100 Subject: [PATCH 13/34] Fix typo in DOCX file type summary Signed-off-by: Tomtastisch <82227609+tomtastisch@users.noreply.github.com> --- src/FileTypeDetection/Abstractions/Detection/FileKind.vb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb index 97253ac1..2365e730 100644 --- a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb +++ b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb @@ -56,7 +56,7 @@ Namespace Global.Tomtastisch.FileClassifier Zip ''' - ''' Office Open XML Word-Dokument (DOCX. + ''' Office Open XML Word-Dokument (DOC. ''' Doc From 1c80645b5a8b5a728d41165b3fd0991eb78ef778 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 13:00:08 +0100 Subject: [PATCH 14/34] docs(governance): add closure evidence for issues 105-107 --- .../046_ISSUE_105_106_107_CLOSURE_DE.MD | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD diff --git a/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD b/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD new file mode 100644 index 00000000..654f8a8f --- /dev/null +++ b/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD @@ -0,0 +1,76 @@ +# Issue Closure Matrix (DE) - #105 / #106 / #107 + +Stand: 2026-02-21 + +## Ziel +Nachweis, dass alle offenen Punkte aus den Issues `#105`, `#106` und `#107` im PR `#108` umgesetzt sind. + +## Issue #105 - HashRoundTripReport ersetzen und Usages migrieren +Status: umgesetzt. + +Abdeckung: +- Datei ersetzt: `src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb` +- Legacy-Usages migriert auf neue API (`HashSlot`, `Evidence(slot)`, `LogicalEquals(slot)`, `PhysicalEquals(slot)`). +- API-Contract/Snapshot aktualisiert: + - `tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt` + +Geforderte `rg`-Trefferliste alt vs. neu: +1. Legacy-API-Symbole (alt) + - Kommando: + - `rg -n "LogicalH1Equals|PhysicalH1Equals|HashRoundTripReport\\.(H1|H2|H3|H4)\\b" src tests -S` + - Ergebnis: + - `0` Treffer +2. Neue API-Symbole (neu) + - Kommando: + - `rg -n "HashSlot|Evidence\\(|LogicalEquals\\(|PhysicalEquals\\(" src tests -S` + - Ergebnis: + - Treffer in `HashRoundTripReport.vb`, `HashingEvidenceTests.cs`, `FileTypeDetectionSteps.cs` und `public-api.snapshot.txt`. + +Build/Test-Evidence: +- `dotnet build FileClassifier.sln -v minimal` -> pass +- `dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal` -> pass + +## Issue #106 - Public-API-Policy haerten (Allowlist + Contract-Gates) +Status: umgesetzt. + +Abdeckung: +- Explizite Public-API-Allowlist als blocker: + - `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs` +- Public-API-Snapshot-Gate als blocker: + - `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiContractSnapshotTests.cs` +- Policy-Compliance-Gate als blocker: + - `tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs` + +Dokumentierte freigegebene Public-Hauptklassen: +- In `AllowedPublicTypes` innerhalb + `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs`. + +Reproduzierbarer Governance-Check: +- `dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal` + +## Issue #107 - Interne Refactor-/Utility-Konsolidierung +Status: umgesetzt. + +Refactor-Matrix (Duplikatquelle -> neue Utility-Stelle): +1. Hashing-Kernlogik in Fassade `EvidenceHashing.vb` + - -> `EvidenceHashingCore.vb` + - Methoden u. a.: `BuildEvidenceFromEntries`, `BuildEvidenceFromRawPayload`, + `TryNormalizeEntries`, `BuildLogicalManifestBytes`, `ComputeSha256Hex`, + `ComputeFastHash`, `ComputeHmacSha256Hex`. +2. RoundTrip-I/O/Temp-Lifecycle in Fassade + - -> `EvidenceHashingRoundTrip.vb` + - Pipeline: TempRoot/Target, Materialisierung, Cleanup, Report-Bildung. +3. Gebundene Datei-Leselogik/Fehleraufbereitung + - -> `EvidenceHashingIo.vb` + - Methoden: `TryReadFileBounded`, `SetReadFileError`. +4. Enum- und Guard-Helfer in mehreren Klassen + - -> `EnumUtils.vb`, `GuardUtils.vb`, `IterableUtils.vb` + - Einsatz sichtbar in `HashRoundTripReport.vb`. + +Evidence-Kommandos: +- `rg -n "EvidenceHashingCore|EvidenceHashingRoundTrip|EvidenceHashingIo" src tests -S` +- `rg -n "EnumUtils\\.|GuardUtils\\.|IterableUtils\\." src tests -S` +- `dotnet test FileClassifier.sln -v minimal` + +## Entscheidung +Alle offenen Punkte aus `#105`, `#106` und `#107` sind im aktuellen PR-Stand umgesetzt und auditierbar nachgewiesen. From 9b54c856093b9199139e7ba400b3a41d46c0fb12 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 15:52:24 +0100 Subject: [PATCH 15/34] refactor(detection): externalize registry ssot config and cache ordered kinds --- .../Configuration/FileTypeRegistryConfig.vb | 245 +++++++++++++ .../Detection/FileTypeRegistry.vb | 338 ++++++++++++------ 2 files changed, 464 insertions(+), 119 deletions(-) create mode 100644 src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb diff --git a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb new file mode 100644 index 00000000..797e73dd --- /dev/null +++ b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb @@ -0,0 +1,245 @@ +' ============================================================================ +' FILE: FileTypeRegistryConfig.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' +' SSOT CONFIG (verbindlich) +' - AliasGroups: zentrale “Wildcard”-Semantik (FileKind.* steht für viele Aliaswerte) +' - AliasOverrides: Kind -> AliasGroup +' - ExtensionOverrides: Canonical-Extension Overrides +' - MagicPatternCatalog: zentrale Magic-Signaturen +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System.Collections.Immutable + +Namespace Global.Tomtastisch.FileClassifier + ''' + ''' Zentrale Konfiguration (SSOT) für . + ''' Definiert: + ''' - Canonical-Extension Overrides + ''' - Aliasgruppen (Wildcard-Semantik) + ''' - Mapping -> Aliasgruppe + ''' - Magic-Pattern-Katalog + ''' + ''' + ''' Diese Konfiguration enthält ausschließlich statische Daten und deterministische Builder. + ''' Normalisierung, Deduplikation und Sortierung erfolgen in . + ''' + Friend Module FileTypeRegistryConfig + + ''' + ''' Canonical-Extension Overrides (SSOT). + ''' Wird genutzt, wenn die Canonical-Extension nicht aus dem Enum-Namen abgeleitet werden soll. + ''' + Friend ReadOnly ExtensionOverrides _ + As ImmutableDictionary(Of FileKind, String) = _ + BuildExtensionOverrides() + + ''' + ''' Aliasgruppen (SSOT) zur Abbildung der Wildcard-Semantik. + ''' Gruppen fassen gleichartige Aliaswerte zusammen (z.B. Archive, Office-Container). + ''' + Friend ReadOnly AliasGroups _ + As ImmutableDictionary(Of String, ImmutableArray(Of String)) = _ + BuildAliasGroups() + + ''' + ''' Mapping -> Aliasgruppe (SSOT). + ''' Die Werte ergänzen die automatisch abgeleiteten Aliases (Enumname + Canonical-Extension). + ''' + Friend ReadOnly AliasOverrides _ + As ImmutableDictionary(Of FileKind, ImmutableArray(Of String)) = _ + BuildAliasOverrides() + + ''' + ''' Magic-Pattern-Katalog (SSOT) für direkte Header-Erkennung. + ''' Enthält pro eine Liste von Patterns; ein Pattern besteht aus Segmenten. + ''' + Friend ReadOnly MagicPatternCatalog _ + As ImmutableDictionary(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern)) = + BuildMagicPatternCatalog() + + ''' + ''' Erstellt ein unveränderliches Aliasarray aus variablen Stringwerten. + ''' + ''' Aliaswerte in Rohform. + ''' Unveränderliches Array der Aliaswerte. + Private Function A _ + ( + ParamArray values As String() + ) As ImmutableArray(Of String) + + Return ImmutableArray.Create(values) + End Function + + ''' + ''' Liefert ein leeres Aliasarray. + ''' + ''' Leeres . + Private Function EmptyAliases() As ImmutableArray(Of String) + Return ImmutableArray(Of String).Empty + End Function + + ''' + ''' Liefert die Aliasgruppe für einen Gruppennamen. + ''' Fail-closed: Unbekannte Gruppen liefern ein leeres Array. + ''' + ''' Gruppenname (case-insensitive). + ''' Aliasgruppe oder leeres Array. + Private Function GetGroup _ + ( + name As String _ + ) As ImmutableArray(Of String) + + Dim values As ImmutableArray(Of String) = ImmutableArray(Of String).Empty + If AliasGroups.TryGetValue(name, values) Then Return values + Return ImmutableArray(Of String).Empty + End Function + + ''' + ''' Baut die Canonical-Extension Overrides deterministisch. + ''' + ''' Unveränderliches Dictionary Kind->Extension. + Private Function BuildExtensionOverrides() As ImmutableDictionary(Of FileKind, String) + Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, String)() + + b(FileKind.Jpeg) = ".jpg" + + Return b.ToImmutable() + End Function + + ''' + ''' Baut die Aliasgruppen deterministisch. + ''' Gruppen sind fachliche Wildcards und werden in referenziert. + ''' + ''' Unveränderliches Dictionary Gruppenname->Aliasliste. + Private Function BuildAliasGroups() As ImmutableDictionary(Of String, ImmutableArray(Of String)) + Dim b = ImmutableDictionary.CreateBuilder(Of String, ImmutableArray(Of String))(StringComparer.OrdinalIgnoreCase) + + ' Wildcard-Semantik (Gruppen): + ' - ARCHIVE: alle Archive/Container, die über FileKind.Zip normalisiert werden. + ' - OFFICE_*: Office/ähnliche Container (Doc/Xls/Ppt), deren Content/Container-Detection separat läuft. + + b("JPEG") = A("jpe") + + b("ARCHIVE") = A( + "tar", "tgz", "gz", "gzip", + "bz2", "bzip2", + "xz", + "7z", "zz", "rar") + + b("OFFICE_DOC") = A( + "doc", "docx", "docm", "docb", + "dot", "dotm", "dotx", + "odt", "ott") + + b("OFFICE_XLS") = A( + "xls", "xlsx", "xlsm", "xlsb", + "xlt", "xltm", "xltx", "xltb", + "xlam", "xla", + "ods", "ots") + + b("OFFICE_PPT") = A( + "ppt", "pptx", "pptm", + "pot", "potm", "potx", + "pps", "ppsm", "ppsx", + "odp", "otp") + + Return b.ToImmutable() + End Function + + ''' + ''' Baut das Mapping -> Aliasgruppe deterministisch. + ''' + ''' Unveränderliches Dictionary Kind->Aliasliste. + Private Function BuildAliasOverrides() As ImmutableDictionary(Of FileKind, ImmutableArray(Of String)) + Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of String))() + + b(FileKind.Jpeg) = GetGroup("JPEG") + b(FileKind.Zip) = GetGroup("ARCHIVE") + b(FileKind.Doc) = GetGroup("OFFICE_DOC") + b(FileKind.Xls) = GetGroup("OFFICE_XLS") + b(FileKind.Ppt) = GetGroup("OFFICE_PPT") + + Return b.ToImmutable() + End Function + + ''' + ''' Erstellt ein aus Segmenten. + ''' + ''' Segmente, die gemeinsam matchen müssen. + ''' Magic-Pattern. + Private Function Pattern _ + ( + ParamArray segments As FileTypeRegistry.MagicSegment() + ) As FileTypeRegistry.MagicPattern + + Return New FileTypeRegistry.MagicPattern(ImmutableArray.Create(segments)) + End Function + + ''' + ''' Erstellt ein , das eine Bytefolge ab einem festen Offset erwartet. + ''' + ''' Startoffset im Header. + ''' Erwartete Bytefolge. + ''' Magic-Segment. + Private Function Prefix _ + ( + offset As Integer, + ParamArray bytesValue As Byte() + ) As FileTypeRegistry.MagicSegment + + Return New FileTypeRegistry.MagicSegment(offset, ImmutableArray.Create(bytesValue)) + End Function + + ''' + ''' Baut den Magic-Pattern-Katalog deterministisch. + ''' Einträge sind ausschließlich direkte Header-Signaturen (kein Container-Parsing). + ''' + ''' Unveränderliches Dictionary Kind->Magic-Patterns. + Private Function BuildMagicPatternCatalog _ + () As ImmutableDictionary(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern)) + + Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern))() + + b(FileKind.Pdf) = ImmutableArray.Create( + Pattern(Prefix(0, &H25, &H50, &H44, &H46, &H2D)) + ) + + b(FileKind.Png) = ImmutableArray.Create( + Pattern(Prefix(0, &H89, &H50, &H4E, &H47, &HD, &HA, &H1A, &HA)) + ) + + b(FileKind.Jpeg) = ImmutableArray.Create( + Pattern(Prefix(0, &HFF, &HD8, &HFF)) + ) + + b(FileKind.Gif) = ImmutableArray.Create( + Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H37, &H61)), + Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H39, &H61)) + ) + + b(FileKind.Webp) = ImmutableArray.Create( + Pattern( + Prefix(0, &H52, &H49, &H46, &H46), + Prefix(8, &H57, &H45, &H42, &H50) + ) + ) + + b(FileKind.Zip) = ImmutableArray.Create( + Pattern(Prefix(0, &H50, &H4B, &H3, &H4)), + Pattern(Prefix(0, &H50, &H4B, &H5, &H6)), + Pattern(Prefix(0, &H50, &H4B, &H7, &H8)) + ) + + Return b.ToImmutable() + End Function + + End Module +End Namespace diff --git a/src/FileTypeDetection/Detection/FileTypeRegistry.vb b/src/FileTypeDetection/Detection/FileTypeRegistry.vb index 0b21a726..3293ab09 100644 --- a/src/FileTypeDetection/Detection/FileTypeRegistry.vb +++ b/src/FileTypeDetection/Detection/FileTypeRegistry.vb @@ -25,76 +25,58 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub + ''' + ''' SSOT: Ordnet jedem den zugehörigen zu. + ''' Der Eintrag ist immer vorhanden (fail-closed). + ''' Friend Shared ReadOnly TypesByKind As ImmutableDictionary(Of FileKind, FileType) + + ''' + ''' Alias-Index: Ordnet normalisierte Aliaswerte (z.B. Endungen ohne Punkt) einem zu. + ''' Die Normalisierung erfolgt über . + ''' Friend Shared ReadOnly KindByAlias As ImmutableDictionary(Of String, FileKind) + + ''' + ''' Canonical-Extension Overrides (SSOT). Wird für einzelne Typen genutzt, + ''' wenn der Enumname nicht der gewünschten Canonical-Extension entspricht. + ''' Private Shared ReadOnly ExtensionOverrides As ImmutableDictionary(Of FileKind, String) = - ImmutableDictionary.CreateRange(Of FileKind, String)( - {New KeyValuePair(Of FileKind, String)(FileKind.Jpeg, ".jpg")}) + FileTypeRegistryConfig.ExtensionOverrides + ''' + ''' Zusätzliche Aliaswerte pro (SSOT). Diese Werte ergänzen die automatisch + ''' abgeleiteten Aliases (Enumname + Canonical-Extension) und werden deterministisch normalisiert. + ''' Private Shared ReadOnly AliasOverrides As ImmutableDictionary(Of FileKind, ImmutableArray(Of String)) = - ImmutableDictionary.CreateRange(Of FileKind, ImmutableArray(Of String))( - { _ - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Jpeg, - Aliases("jpe")), - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Zip, - Aliases("tar", - "tgz", - "gz", - "gzip", - "bz2", - "bzip2", - "xz", - "7z", - "zz", - "rar")), - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Doc, - Aliases("doc", - "docx", - "docm", - "docb", - "dot", - "dotm", - "dotx", - "odt", - "ott")), - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Xls, - Aliases("xls", - "xlsx", - "xlsm", - "xlsb", - "xlt", - "xltm", - "xltx", - "xltb", - "xlam", - "xla", - "ods", - "ots")), - New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Ppt, - Aliases("ppt", - "pptx", - "pptm", - "pot", - "potm", - "potx", - "pps", - "ppsm", - "ppsx", - "odp", - "otp")) - }) + FileTypeRegistryConfig.AliasOverrides + + ''' + ''' Cache der deterministisch sortierten Enumwerte (). + ''' Vermeidet wiederholte Reflection/Sortierung in Hotpaths. + ''' + Private Shared ReadOnly OrderedKindsCache As ImmutableArray(Of FileKind) = BuildOrderedKinds() + ''' + ''' Katalog von Magic-Patterns pro . + ''' Die Datenquelle ist die zentrale Konfiguration FileTypeRegistryConfig. + ''' Private Shared ReadOnly _ MagicPatternCatalog As ImmutableDictionary(Of FileKind, ImmutableArray(Of MagicPattern)) = - BuildMagicPatternCatalog() + FileTypeRegistryConfig.MagicPatternCatalog + ''' + ''' Aus abgeleitete Regeln für die Magic-Erkennung. + ''' Enthält ausschließlich Einträge mit mindestens einem Magic-Pattern. + ''' Private Shared ReadOnly MagicRules As ImmutableArray(Of MagicRule) - Private Shared Function Aliases(ParamArray values As String()) As ImmutableArray(Of String) - Return ImmutableArray.Create(values) - End Function + ''' + ''' Initialisiert die Registry deterministisch aus und den zentralen Overrides. + ''' Reihenfolge: Definitionen bauen, Typen ableiten, Aliasindex erzeugen, Magic-Regeln ableiten. + ''' Shared Sub New() Dim definitions = BuildDefinitionsFromEnum() TypesByKind = BuildTypes(definitions) @@ -102,13 +84,18 @@ Namespace Global.Tomtastisch.FileClassifier MagicRules = BuildMagicRules(definitions) End Sub + ''' + ''' Erzeugt die vollständige Menge an aus der Enumquelle. + ''' wird bewusst ausgeschlossen, da Unknown als separater fail-closed Typ geführt wird. + ''' + ''' Unveränderliche Liste aller Definitionsobjekte (ohne Unknown). Private Shared Function BuildDefinitionsFromEnum() As ImmutableArray(Of FileTypeDefinition) Dim b = ImmutableArray.CreateBuilder(Of FileTypeDefinition)() Dim canonicalExtension As String Dim aliases As String() Dim magicPatterns As ImmutableArray(Of MagicPattern) - For Each kind In OrderedKinds() + For Each kind In OrderedKindsCache If kind = FileKind.Unknown Then Continue For canonicalExtension = GetCanonicalExtension(kind) @@ -121,13 +108,31 @@ Namespace Global.Tomtastisch.FileClassifier Return b.ToImmutable() End Function + ''' + ''' Liefert die deterministisch sortierten Enumwerte () aus dem Cache. + ''' + ''' Sortierte Liste aller Enumwerte. Private Shared Function OrderedKinds() As ImmutableArray(Of FileKind) + Return OrderedKindsCache + End Function + + ''' + ''' Baut den Cache der sortierten Enumwerte () einmalig über Reflection. + ''' + ''' Sortierte Liste aller Enumwerte. + Private Shared Function BuildOrderedKinds() As ImmutableArray(Of FileKind) Dim values = [Enum].GetValues(GetType(FileKind)).Cast(Of FileKind)() Return values. OrderBy(Function(kind) CInt(kind)). ToImmutableArray() End Function + ''' + ''' Bestimmt die Canonical-Extension für einen Typ. + ''' Priorität: Override > Enumname (normalisiert) als "." + alias. + ''' + ''' Enumwert des Typs. + ''' Canonical-Extension inklusive führendem Punkt. Private Shared Function GetCanonicalExtension(kind As FileKind) As String Dim overrideExt As String = Nothing If ExtensionOverrides.TryGetValue(kind, overrideExt) Then @@ -137,6 +142,14 @@ Namespace Global.Tomtastisch.FileClassifier Return "." & NormalizeAlias(kind.ToString()) End Function + ''' + ''' Baut die vollständige Aliasliste für einen Typ. + ''' Enthält Canonical-Extension, Enumalias sowie zusätzliche Overrides. + ''' Ergebnis ist deterministisch sortiert und ohne Duplikate. + ''' + ''' Enumwert des Typs. + ''' Canonical-Extension inklusive führendem Punkt. + ''' Sortierte Aliasliste (ohne führende Punkte, kleingeschrieben). Private Shared Function BuildAliases(kind As FileKind, canonicalExtension As String) As String() Dim aliases As New HashSet(Of String)(StringComparer.OrdinalIgnoreCase) Dim extAlias As String @@ -151,11 +164,10 @@ Namespace Global.Tomtastisch.FileClassifier If enumAlias.Length > 0 Then aliases.Add(enumAlias) If AliasOverrides.TryGetValue(kind, additional) Then - additional. - Select(Function(item) NormalizeAlias(item)). - Where(Function(normalized) normalized.Length > 0). - ToList(). - ForEach(Sub(normalized) aliases.Add(normalized)) + For Each rawAlias In additional + Dim normalized = NormalizeAlias(rawAlias) + If normalized.Length > 0 Then aliases.Add(normalized) + Next End If orderedAliases = aliases.ToList() @@ -163,6 +175,11 @@ Namespace Global.Tomtastisch.FileClassifier Return orderedAliases.ToArray() End Function + ''' + ''' Liefert die Magic-Patterns für einen Typ aus dem Katalog. + ''' + ''' Enumwert des Typs. + ''' Magic-Patterns oder ein leeres Array. Private Shared Function GetMagicPatterns(kind As FileKind) As ImmutableArray(Of MagicPattern) Dim patterns As ImmutableArray(Of MagicPattern) = ImmutableArray(Of MagicPattern).Empty If MagicPatternCatalog.TryGetValue(kind, patterns) Then @@ -172,34 +189,62 @@ Namespace Global.Tomtastisch.FileClassifier Return ImmutableArray(Of MagicPattern).Empty End Function + ''' + ''' Erzeugt die Typ-Registry () aus den Definitionsobjekten. + ''' Unknown wird als eigener, fail-closed Eintrag hinzugefügt. + ''' + ''' Definitionsobjekte (ohne Unknown). + ''' Unveränderliches Dictionary mit Einträgen für alle Typen inklusive Unknown. Private Shared Function BuildTypes(definitions As ImmutableArray(Of FileTypeDefinition)) _ As ImmutableDictionary(Of FileKind, FileType) Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, FileType)() - b(FileKind.Unknown) = New FileType(FileKind.Unknown, Nothing, Nothing, False, - ImmutableArray(Of String).Empty) + b(FileKind.Unknown) = CreateUnknownType() For Each d In definitions - b(d.Kind) = New FileType(d.Kind, d.CanonicalExtension, MimeProvider.GetMime(d.CanonicalExtension), True, - d.Aliases) + b(d.Kind) = New FileType( + d.Kind, + d.CanonicalExtension, + MimeProvider.GetMime(d.CanonicalExtension), + True, + d.Aliases + ) Next Return b.ToImmutable() End Function + ''' + ''' Erzeugt den fail-closed für . + ''' + ''' Unknown-Typ ohne Extension und ohne MIME. + Private Shared Function CreateUnknownType() As FileType + Return New FileType(FileKind.Unknown, + Nothing, + Nothing, + False, + ImmutableArray(Of String).Empty) + End Function + + + ''' + ''' Bestimmt den Typ anhand von Magic-Patterns in einem Header-Bytearray. + ''' Die Auswertung erfolgt deterministisch in Regelreihenfolge; erster Treffer gewinnt. + ''' + ''' Dateiheader (mindestens so lang wie die benötigten Segmente). + ''' Erkannter oder . Friend Shared Function DetectByMagic(header As Byte()) As FileKind Dim rule As MagicRule Dim patterns As ImmutableArray(Of MagicPattern) - Dim segments As ImmutableArray(Of MagicSegment) If header Is Nothing OrElse header.Length = 0 Then Return FileKind.Unknown For i = 0 To MagicRules.Length - 1 rule = MagicRules(i) patterns = rule.Patterns + For j = 0 To patterns.Length - 1 - segments = patterns(j).Segments - If segments.All(Function(segment) HasSegment(header, segment)) Then + If MatchesPattern(header, patterns(j)) Then Return rule.Kind End If Next @@ -208,6 +253,29 @@ Namespace Global.Tomtastisch.FileClassifier Return FileKind.Unknown End Function + ''' + ''' Prüft, ob ein Magic-Pattern vollständig gegen den Header matcht. + ''' + ''' Headerdaten. + ''' Pattern mit Segmenten. + ''' True, wenn alle Segmente matchen. + Private Shared Function MatchesPattern(header As Byte(), pattern As MagicPattern) As Boolean + Dim segments As ImmutableArray(Of MagicSegment) = pattern.Segments + + If segments.IsDefaultOrEmpty Then Return False + + For i = 0 To segments.Length - 1 + If Not HasSegment(header, segments(i)) Then Return False + Next + + Return True + End Function + + ''' + ''' Prüft, ob für einen Typ mindestens ein Magic-Pattern für direkte Header-Erkennung hinterlegt ist. + ''' + ''' Enumwert des Typs. + ''' True bei vorhandenem Patternkatalogeintrag. Friend Shared Function HasDirectHeaderDetection(kind As FileKind) As Boolean Dim patterns As ImmutableArray(Of MagicPattern) = ImmutableArray(Of MagicPattern).Empty @@ -215,16 +283,33 @@ Namespace Global.Tomtastisch.FileClassifier Return MagicPatternCatalog.TryGetValue(kind, patterns) AndAlso Not patterns.IsDefaultOrEmpty End Function + ''' + ''' Prüft, ob ein Typ zusätzlich über strukturierte Container-Erkennung klassifiziert wird. + ''' Diese Klassifikation ist unabhängig von direkten Header-Signaturen. + ''' + ''' Enumwert des Typs. + ''' True, wenn strukturierte Container-Erkennung aktiv ist. Friend Shared Function HasStructuredContainerDetection(kind As FileKind) As Boolean Return kind = FileKind.Doc OrElse kind = FileKind.Xls OrElse kind = FileKind.Ppt End Function + ''' + ''' Prüft, ob der Typ eine direkte Inhalts-/Header-Erkennung besitzt + ''' (Magic-Header oder strukturierte Container-Erkennung). + ''' + ''' Enumwert des Typs. + ''' True, wenn Content-Detection verfügbar ist. Friend Shared Function HasDirectContentDetection(kind As FileKind) As Boolean Return HasDirectHeaderDetection(kind) OrElse HasStructuredContainerDetection(kind) End Function + ''' + ''' Liefert alle Typen, die keine direkte Content-Detection besitzen. + ''' Unknown ist ausgeschlossen. + ''' + ''' Liste der Typen ohne direkte Content-Detection. Friend Shared Function KindsWithoutDirectContentDetection() As ImmutableArray(Of FileKind) Return OrderedKinds(). Where(Function(kind) kind <> FileKind.Unknown). @@ -232,6 +317,12 @@ Namespace Global.Tomtastisch.FileClassifier ToImmutableArray() End Function + ''' + ''' Baut die Magic-Regeln aus den Definitionsobjekten. + ''' Es werden ausschließlich Definitionsobjekte mit mindestens einem Magic-Pattern berücksichtigt. + ''' + ''' Definitionsobjekte (ohne Unknown). + ''' Unveränderliche Liste der Magic-Regeln. Private Shared Function BuildMagicRules(definitions As ImmutableArray(Of FileTypeDefinition)) _ As ImmutableArray(Of MagicRule) Return definitions. @@ -240,42 +331,33 @@ Namespace Global.Tomtastisch.FileClassifier ToImmutableArray() End Function - Private Shared Function BuildMagicPatternCatalog() _ - As ImmutableDictionary(Of FileKind, ImmutableArray(Of MagicPattern)) - Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of MagicPattern))() - - b(FileKind.Pdf) = ImmutableArray.Create( - Pattern(Prefix(0, &H25, &H50, &H44, &H46, &H2D))) - - b(FileKind.Png) = ImmutableArray.Create( - Pattern(Prefix(0, &H89, &H50, &H4E, &H47, &HD, &HA, &H1A, &HA))) - - b(FileKind.Jpeg) = ImmutableArray.Create( - Pattern(Prefix(0, &HFF, &HD8, &HFF))) - - b(FileKind.Gif) = ImmutableArray.Create( - Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H37, &H61)), - Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H39, &H61))) - - b(FileKind.Webp) = ImmutableArray.Create( - Pattern(Prefix(0, &H52, &H49, &H46, &H46), Prefix(8, &H57, &H45, &H42, &H50))) - - b(FileKind.Zip) = ImmutableArray.Create( - Pattern(Prefix(0, &H50, &H4B, &H3, &H4)), - Pattern(Prefix(0, &H50, &H4B, &H5, &H6)), - Pattern(Prefix(0, &H50, &H4B, &H7, &H8))) - - Return b.ToImmutable() - End Function + ''' + ''' Erstellt ein aus Segmenten. + ''' + ''' Segmente, die gemeinsam matchen müssen. + ''' Magic-Pattern. Private Shared Function Pattern(ParamArray segments As MagicSegment()) As MagicPattern Return New MagicPattern(ImmutableArray.Create(segments)) End Function + ''' + ''' Erstellt ein Segment, das eine Bytefolge ab einem festen Offset im Header erwartet. + ''' + ''' Startoffset im Header. + ''' Erwartete Bytefolge. + ''' Magic-Segment. Private Shared Function Prefix(offset As Integer, ParamArray bytesValue As Byte()) As MagicSegment Return New MagicSegment(offset, ImmutableArray.Create(bytesValue)) End Function + ''' + ''' Prüft, ob ein einzelnes Segment am angegebenen Offset innerhalb der Daten exakt matcht. + ''' Fail-closed: Bei ungültigen Parametern oder zu kurzen Daten wird False geliefert. + ''' + ''' Headerdaten. + ''' Segmentdefinition. + ''' True bei exaktem Match. Private Shared Function HasSegment(data As Byte(), segment As MagicSegment) As Boolean Dim endPos As Integer @@ -292,32 +374,50 @@ Namespace Global.Tomtastisch.FileClassifier Return True End Function + ''' + ''' Erzeugt den Aliasindex () aus der Typ-Registry. + ''' Aliases werden normalisiert; spätere Einträge überschreiben frühere deterministisch. + ''' + ''' Typ-Registry. + ''' Unveränderliches Dictionary Alias->Kind. Private Shared Function BuildAliasMap(types As ImmutableDictionary(Of FileKind, FileType)) _ As ImmutableDictionary(Of String, FileKind) - Dim entries As List(Of Tuple(Of FileKind, String)) + Dim builder As ImmutableDictionary(Of String, FileKind).Builder If types Is Nothing Then Return ImmutableDictionary(Of String, FileKind).Empty - entries = types. - Where(Function(kv) kv.Value IsNot Nothing). - Where(Function(kv) Not kv.Value.Aliases.IsDefault AndAlso kv.Value.Aliases.Length > 0). - SelectMany(Function(kv) kv.Value.Aliases. - Select(Function(aliasValue) Tuple.Create(kv.Key, NormalizeAlias(aliasValue)))). - Where(Function(item) item.Item2.Length > 0). - ToList() - - Return entries. - Aggregate(ImmutableDictionary.CreateBuilder(Of String, FileKind)(StringComparer.OrdinalIgnoreCase), - Function(builder, entry) - builder(entry.Item2) = entry.Item1 - Return builder - End Function). - ToImmutable() + builder = ImmutableDictionary.CreateBuilder(Of String, FileKind)(StringComparer.OrdinalIgnoreCase) + + For Each kv In types + Dim kind = kv.Key + Dim t = kv.Value + + If t Is Nothing Then Continue For + If t.Aliases.IsDefaultOrEmpty Then Continue For + + For i = 0 To t.Aliases.Length - 1 + Dim aliasKey = NormalizeAlias(t.Aliases(i)) + If aliasKey.Length = 0 Then Continue For + + builder(aliasKey) = kind + Next + Next + + Return builder.ToImmutable() End Function + ''' + ''' Normalisiert einen Aliaswert deterministisch. + ''' Entfernt führende Punkte, trimmt Whitespace und wandelt in Kleinbuchstaben (Invariant) um. + ''' + ''' Rohwert, z.B. ".PDF" oder " pdf ". + ''' Normalisierter Alias ohne Punkt oder leerer String. Friend Shared Function NormalizeAlias(raw As String) As String - Dim s = If(raw, String.Empty).Trim() - If s.StartsWith("."c) Then s = s.Substring(1) + Dim s As String = If(raw, String.Empty).Trim() + + If s.Length = 0 Then Return String.Empty + If s(0) = "."c Then s = s.Substring(1) + Return s.ToLowerInvariant() End Function @@ -348,7 +448,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger FileTypeDefinition für strukturierte Verarbeitungsschritte. ''' - Private Structure FileTypeDefinition + Friend Structure FileTypeDefinition Friend ReadOnly Kind As FileKind Friend ReadOnly CanonicalExtension As String Friend ReadOnly Aliases As String() @@ -366,7 +466,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger MagicRule für strukturierte Verarbeitungsschritte. ''' - Private Structure MagicRule + Friend Structure MagicRule Friend ReadOnly Kind As FileKind Friend ReadOnly Patterns As ImmutableArray(Of MagicPattern) @@ -379,7 +479,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger MagicPattern für strukturierte Verarbeitungsschritte. ''' - Private Structure MagicPattern + Friend Structure MagicPattern Friend ReadOnly Segments As ImmutableArray(Of MagicSegment) Friend Sub New(segments As ImmutableArray(Of MagicSegment)) @@ -390,7 +490,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger MagicSegment für strukturierte Verarbeitungsschritte. ''' - Private Structure MagicSegment + Friend Structure MagicSegment Friend ReadOnly Offset As Integer Friend ReadOnly Bytes As ImmutableArray(Of Byte) From 0a4fbb500510dafa7f79124458519ab495ceca3e Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 15:59:32 +0100 Subject: [PATCH 16/34] fix(qodana): remove dead members and high-severity analyzer findings --- .../Configuration/FileTypeRegistryConfig.vb | 8 ----- .../Detection/FileTypeRegistry.vb | 20 ------------- src/FileTypeDetection/Utils/EnumUtils.vb | 30 +++++++++---------- src/FileTypeDetection/Utils/GuardUtils.vb | 2 +- .../Contracts/PublicApiAllowlistTests.cs | 1 - .../Steps/FileTypeDetectionSteps.cs | 3 +- .../Unit/HashingEvidenceTests.cs | 2 +- 7 files changed, 18 insertions(+), 48 deletions(-) diff --git a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb index 797e73dd..688b0861 100644 --- a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb +++ b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb @@ -78,14 +78,6 @@ Namespace Global.Tomtastisch.FileClassifier Return ImmutableArray.Create(values) End Function - ''' - ''' Liefert ein leeres Aliasarray. - ''' - ''' Leeres . - Private Function EmptyAliases() As ImmutableArray(Of String) - Return ImmutableArray(Of String).Empty - End Function - ''' ''' Liefert die Aliasgruppe für einen Gruppennamen. ''' Fail-closed: Unbekannte Gruppen liefern ein leeres Array. diff --git a/src/FileTypeDetection/Detection/FileTypeRegistry.vb b/src/FileTypeDetection/Detection/FileTypeRegistry.vb index 3293ab09..8b7d2e6d 100644 --- a/src/FileTypeDetection/Detection/FileTypeRegistry.vb +++ b/src/FileTypeDetection/Detection/FileTypeRegistry.vb @@ -331,26 +331,6 @@ Namespace Global.Tomtastisch.FileClassifier ToImmutableArray() End Function - - ''' - ''' Erstellt ein aus Segmenten. - ''' - ''' Segmente, die gemeinsam matchen müssen. - ''' Magic-Pattern. - Private Shared Function Pattern(ParamArray segments As MagicSegment()) As MagicPattern - Return New MagicPattern(ImmutableArray.Create(segments)) - End Function - - ''' - ''' Erstellt ein Segment, das eine Bytefolge ab einem festen Offset im Header erwartet. - ''' - ''' Startoffset im Header. - ''' Erwartete Bytefolge. - ''' Magic-Segment. - Private Shared Function Prefix(offset As Integer, ParamArray bytesValue As Byte()) As MagicSegment - Return New MagicSegment(offset, ImmutableArray.Create(bytesValue)) - End Function - ''' ''' Prüft, ob ein einzelnes Segment am angegebenen Offset innerhalb der Daten exakt matcht. ''' Fail-closed: Bei ungültigen Parametern oder zu kurzen Daten wird False geliefert. diff --git a/src/FileTypeDetection/Utils/EnumUtils.vb b/src/FileTypeDetection/Utils/EnumUtils.vb index 37db6371..2c426c1b 100644 --- a/src/FileTypeDetection/Utils/EnumUtils.vb +++ b/src/FileTypeDetection/Utils/EnumUtils.vb @@ -32,7 +32,7 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' Nicht-Ziele: ''' - Keine zustandsbehaftete Logik. ''' - Keine Abhängigkeiten auf Projektdienste (I/O, Logger, Policy-Engine). - ''' - Keine Reflection-Features außer . + ''' - Keine Reflection-Features außer Enum.GetValues(Type). ''' ''' Public NotInheritable Class EnumUtils @@ -62,7 +62,7 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' ''' ''' Ablaufstruktur: - ''' 1) Werte werden über geladen, + ''' 1) Werte werden über Enum.GetValues(Type) geladen, ''' 2) Ausgabe erfolgt als typisiertes Array TEnum(). ''' ''' @@ -100,10 +100,10 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' ''' ''' Ablaufstruktur: - ''' 1) Werte werden über geladen, + ''' 1) Werte werden über Enum.GetValues(Type) geladen, ''' 2) optional: Sortierung nach numerischem Enum-Wert, ''' 3) Range wird deterministisch geklemmt, - ''' 4) Ausgabe erfolgt als Slice über . + ''' 4) Ausgabe erfolgt als Slice über Array.Copy(values, from, result, 0, length). ''' ''' ''' Range-Semantik (0-basiert, inklusive): @@ -133,7 +133,7 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' Enum-Typ. ''' ''' Sortierreihenfolge: - ''' - : keine Sortierung (Originalreihenfolge von ). + ''' - : keine Sortierung (Originalreihenfolge von Enum.GetValues(Type)). ''' - : aufsteigend nach numerischem Enum-Wert. ''' - : absteigend nach numerischem Enum-Wert. ''' @@ -160,21 +160,21 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ' Deklarationsblock Dim enumType As Type = GetType(TEnum) - Dim raw As Array = Nothing + Dim raw As Array - Dim values() As TEnum = Nothing - Dim keys() As Long = Nothing + Dim values() As TEnum + Dim keys() As Long Dim i As Integer - Dim count As Integer = 0 - Dim maxIndex As Integer = 0 + Dim count As Integer + Dim maxIndex As Integer - Dim effectiveTo As Integer = 0 - Dim effectiveMaxFrom As Integer = 0 - Dim effectiveFrom As Integer = 0 + Dim effectiveTo As Integer + Dim effectiveMaxFrom As Integer + Dim effectiveFrom As Integer - Dim length As Integer = 0 - Dim result() As TEnum = Nothing + Dim length As Integer + Dim result() As TEnum ' ----------------------------------------------------------------- diff --git a/src/FileTypeDetection/Utils/GuardUtils.vb b/src/FileTypeDetection/Utils/GuardUtils.vb index 76cececa..36b3941f 100644 --- a/src/FileTypeDetection/Utils/GuardUtils.vb +++ b/src/FileTypeDetection/Utils/GuardUtils.vb @@ -110,7 +110,7 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' ''' Ablaufstruktur: ''' 1) Prüft auf Nothing und Enum-Typ, - ''' 2) prüft Definition via , + ''' 2) prüft Definition via Enum.IsDefined(enumType, value), ''' 3) bei Verstoß: . ''' ''' diff --git a/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs b/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs index d8f2e7c2..27992a4e 100644 --- a/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs @@ -1,4 +1,3 @@ -using System.Reflection; using Tomtastisch.FileClassifier; namespace FileTypeDetectionLib.Tests.Contracts; diff --git a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs index 7c2542d2..ea99f326 100644 --- a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs +++ b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs @@ -1,4 +1,3 @@ -using System.Collections.Generic; using FileTypeDetectionLib.Tests.Support; using Reqnroll; using Tomtastisch.FileClassifier; @@ -548,7 +547,7 @@ private static void AssertResourceExists(string name) private static bool TryParseFileKindLiteral(string literal, out FileKind kind) { - if (Enum.TryParse(literal, true, out kind)) + if (Enum.TryParse(literal, true, out kind)) { return true; } diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs index 02d6a62d..313e8af8 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs @@ -1123,7 +1123,7 @@ public void CopyBytes_ReturnsEmpty_ForNullOrEmpty() public void TryReadFileBounded_ReturnsFalse_ForMissingPathOrOptions() { var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIo"); - var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; + var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static); Assert.NotNull(method); var bytes = Array.Empty(); From 9ccf81493509307bb5b8cc6783faa49a871b24ef Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 16:07:22 +0100 Subject: [PATCH 17/34] fix(review): close remaining policy and naming threads --- .../Abstractions/Archive/ZipExtractedEntry.vb | 5 +---- src/FileTypeDetection/Abstractions/Detection/FileKind.vb | 6 +++--- .../Abstractions/Hashing/HashRoundTripReport.vb | 6 ++++-- .../Internal/{EvidenceHashingIo.vb => EvidenceHashingIO.vb} | 4 ++-- .../Hashing/Internal/EvidenceHashingRoundTrip.vb | 2 +- .../Abstractions/Hashing/Internal/README.md | 2 +- src/FileTypeDetection/EvidenceHashing.vb | 2 +- src/FileTypeDetection/Infrastructure/ArchiveInternals.vb | 6 +++--- src/FileTypeDetection/Utils/EnumUtils.vb | 2 +- src/FileTypeDetection/Utils/GuardUtils.vb | 2 +- src/FileTypeDetection/Utils/IterableUtils.vb | 2 +- .../FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs | 4 ++-- 12 files changed, 21 insertions(+), 22 deletions(-) rename src/FileTypeDetection/Abstractions/Hashing/Internal/{EvidenceHashingIo.vb => EvidenceHashingIO.vb} (97%) diff --git a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb index 083ea3b5..a9e90398 100644 --- a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb +++ b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb @@ -30,10 +30,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Unveränderlicher Byteinhalt des Eintrags. ''' - Public ReadOnly Property Content As ImmutableArray _ - ( - Of Byte - ) + Public ReadOnly Property Content As ImmutableArray(Of Byte) ''' ''' Größe des Eintragsinhalts in Bytes. diff --git a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb index 2365e730..10710795 100644 --- a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb +++ b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb @@ -56,17 +56,17 @@ Namespace Global.Tomtastisch.FileClassifier Zip ''' - ''' Office Open XML Word-Dokument (DOC. + ''' Office-Word-Dokument (DOC). ''' Doc ''' - ''' Office Open XML Excel-Dokument (XLS). + ''' Office-Excel-Dokument (XLS). ''' Xls ''' - ''' Office Open XML PowerPoint-Dokument (PPT). + ''' Office-PowerPoint-Dokument (PPT). ''' Ppt End Enum diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb index 3a3b95dd..76666791 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb @@ -1,5 +1,5 @@ ' ============================================================================ -' FILE: (neue version)HashRoundTripReport.vb +' FILE: HashRoundTripReport.vb ' ' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) ' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD @@ -107,8 +107,10 @@ Namespace Global.Tomtastisch.FileClassifier ' Deklarationsblock (Pflicht, spaltenartig) Dim slotCount As Integer = RequiredSlots.Length Dim i As Integer + Dim idx As Integer Dim baseEvidence As HashEvidence Dim otherEvidence As HashEvidence + Dim otherSlot As HashSlot Dim eqLogical As Boolean Dim consistentLocal As Boolean = True @@ -143,7 +145,7 @@ Namespace Global.Tomtastisch.FileClassifier For idx = 0 To slotCount - 2 - Dim otherSlot = RequiredSlots(idx + 1) + otherSlot = RequiredSlots(idx + 1) otherEvidence = _evidences(SlotIndex(otherSlot)) eqLogical = EqualLogical(baseEvidence, otherEvidence) diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIo.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIO.vb similarity index 97% rename from src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIo.vb rename to src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIO.vb index e1856b72..85a866e1 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIo.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIO.vb @@ -1,5 +1,5 @@ ' ============================================================================ -' FILE: EvidenceHashingIo.vb +' FILE: EvidenceHashingIO.vb ' ' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) ' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD @@ -17,7 +17,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Die Komponente erzwingt MaxBytes-Limits fail-closed und liefert deterministische Fehltexte. ''' - Friend NotInheritable Class EvidenceHashingIo + Friend NotInheritable Class EvidenceHashingIO Private Sub New() End Sub diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb index 2c7e23cf..aa78b1c0 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb @@ -83,7 +83,7 @@ Namespace Global.Tomtastisch.FileClassifier failed) End If - If Not EvidenceHashingIo.TryReadFileBounded(path, detectorOptions, originalBytes, readError) Then + If Not EvidenceHashingIO.TryReadFileBounded(path, detectorOptions, originalBytes, readError) Then failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, readError) Return New HashRoundTripReport( path, diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md b/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md index 48196a1e..eea3a8cc 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md @@ -6,7 +6,7 @@ Dieses Verzeichnis enthaelt interne, zustandslose Hashing-Bausteine hinter der o ## 2. Inhalt - `EvidenceHashingCore.vb` - `EvidenceHashingRoundTrip.vb` -- `EvidenceHashingIo.vb` +- `EvidenceHashingIO.vb` ## 3. API und Verhalten - Keine Public API in diesem Verzeichnis. diff --git a/src/FileTypeDetection/EvidenceHashing.vb b/src/FileTypeDetection/EvidenceHashing.vb index 2a403e95..d5bb9a4a 100644 --- a/src/FileTypeDetection/EvidenceHashing.vb +++ b/src/FileTypeDetection/EvidenceHashing.vb @@ -92,7 +92,7 @@ Namespace Global.Tomtastisch.FileClassifier Return Failure(HashSourceType.FilePath, path, "Datei nicht gefunden.") End If - If Not EvidenceHashingIo.TryReadFileBounded(path, detectorOptions, fileBytes, readError) Then + If Not EvidenceHashingIO.TryReadFileBounded(path, detectorOptions, fileBytes, readError) Then Return Failure(HashSourceType.FilePath, path, readError) End If diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index 121eacea..f5e82779 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -424,7 +424,7 @@ Namespace Global.Tomtastisch.FileClassifier If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then Return emptyResult - End if + End If Try StreamGuard.RewindToStart(stream) @@ -445,7 +445,7 @@ Namespace Global.Tomtastisch.FileClassifier Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -493,7 +493,7 @@ Namespace Global.Tomtastisch.FileClassifier destinationFull = Path.GetFullPath(destinationDirectory) Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is PathTooLongException OrElse TypeOf ex Is NotSupportedException OrElse diff --git a/src/FileTypeDetection/Utils/EnumUtils.vb b/src/FileTypeDetection/Utils/EnumUtils.vb index 2c426c1b..5e8f83e3 100644 --- a/src/FileTypeDetection/Utils/EnumUtils.vb +++ b/src/FileTypeDetection/Utils/EnumUtils.vb @@ -35,7 +35,7 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' - Keine Reflection-Features außer Enum.GetValues(Type). ''' ''' - Public NotInheritable Class EnumUtils + Friend NotInheritable Class EnumUtils Private Sub New() End Sub diff --git a/src/FileTypeDetection/Utils/GuardUtils.vb b/src/FileTypeDetection/Utils/GuardUtils.vb index 36b3941f..b2511984 100644 --- a/src/FileTypeDetection/Utils/GuardUtils.vb +++ b/src/FileTypeDetection/Utils/GuardUtils.vb @@ -32,7 +32,7 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' - Keine stillen Korrekturen, keine Side-Effects. ''' ''' - Public NotInheritable Class GuardUtils + Friend NotInheritable Class GuardUtils Private Sub New() End Sub diff --git a/src/FileTypeDetection/Utils/IterableUtils.vb b/src/FileTypeDetection/Utils/IterableUtils.vb index e0f20b6e..1b7e2940 100644 --- a/src/FileTypeDetection/Utils/IterableUtils.vb +++ b/src/FileTypeDetection/Utils/IterableUtils.vb @@ -29,7 +29,7 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' - Nothing bleibt Nothing; es findet keine implizite Erzeugung leerer Arrays statt. ''' ''' - Public NotInheritable Class IterableUtils + Friend NotInheritable Class IterableUtils Private Sub New() End Sub diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs index 313e8af8..2450fe11 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs @@ -1122,7 +1122,7 @@ public void CopyBytes_ReturnsEmpty_ForNullOrEmpty() [Fact] public void TryReadFileBounded_ReturnsFalse_ForMissingPathOrOptions() { - var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIo"); + var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIO"); var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static); Assert.NotNull(method); @@ -1141,7 +1141,7 @@ public void TryReadFileBounded_ReturnsFalse_ForMissingPathOrOptions() [Fact] public void TryReadFileBounded_ReturnsFalse_WhenFileTooLarge() { - var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIo"); + var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIO"); var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); From 53f5810f0a113420a266d43daa3e46b5c5c1813e Mon Sep 17 00:00:00 2001 From: "openai-code-agent[bot]" <242516109+Codex@users.noreply.github.com> Date: Sat, 21 Feb 2026 17:28:59 +0000 Subject: [PATCH 18/34] Initial plan From 4b9968f85233bb624376aeea84ef150371b26204 Mon Sep 17 00:00:00 2001 From: Tomtastisch <82227609+tomtastisch@users.noreply.github.com> Date: Sat, 21 Feb 2026 21:58:10 +0100 Subject: [PATCH 19/34] =?UTF-8?q?Revert=20"release(core):=20hashing=20fina?= =?UTF-8?q?lisieren=20und=20lokale=206.0.1-Konvergenz=20vorbe=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4badf0f002174dd324cd029295f781b5cbb034e3. --- AGENTS.md | 15 - Directory.Build.props | 2 +- SECURITY.md | 4 +- .../003_SECURITY_ASSERTION_TRACEABILITY.MD | 2 +- .../103_SECURITY_ASSERTION_TRACEABILITY.MD | 2 +- docs/governance/045_CODE_QUALITY_POLICY_DE.MD | 15 - docs/governance/045_COMPLIANCE_STATUS_DE.MD | 44 -- .../046_ISSUE_105_106_107_CLOSURE_DE.MD | 76 --- docs/governance/145_CODE_QUALITY_POLICY_DE.MD | 15 - docs/versioning/002_HISTORY_VERSIONS.MD | 4 +- docs/versioning/003_CHANGELOG_RELEASES.MD | 22 - docs/versioning/102_HISTORY_VERSIONS.MD | 4 +- docs/versioning/103_CHANGELOG_RELEASES.MD | 22 - .../Abstractions/Archive/ZipExtractedEntry.vb | 8 +- .../Abstractions/Detection/DetectionDetail.vb | 14 +- .../Abstractions/Detection/FileKind.vb | 12 +- .../Abstractions/Detection/FileType.vb | 12 +- .../Abstractions/Hashing/HashDigestSet.vb | 46 +- .../Abstractions/Hashing/HashEvidence.vb | 68 +- .../Abstractions/Hashing/HashOptions.vb | 40 +- .../Hashing/HashRoundTripReport.vb | 303 +++------ .../Hashing/Internal/EvidenceHashingCore.vb | 505 -------------- .../Hashing/Internal/EvidenceHashingIO.vb | 95 --- .../Internal/EvidenceHashingRoundTrip.vb | 179 ----- .../Abstractions/Hashing/Internal/README.md | 24 - src/FileTypeDetection/ArchiveProcessing.vb | 9 +- .../Configuration/FileTypeRegistryConfig.vb | 248 ------- .../Detection/FileTypeRegistry.vb | 342 ++++------ src/FileTypeDetection/EvidenceHashing.vb | 639 ++++++++++++++---- src/FileTypeDetection/FileMaterializer.vb | 26 +- .../FileTypeDetectionLib.vbproj | 8 +- src/FileTypeDetection/FileTypeDetector.vb | 6 +- src/FileTypeDetection/FileTypeOptions.vb | 85 +-- .../Infrastructure/ArchiveInternals.vb | 494 +++++--------- .../Infrastructure/ArchiveManagedInternals.vb | 53 +- .../Infrastructure/CoreInternals.vb | 347 +++++++++- .../Infrastructure/MimeProvider.vb | 10 +- .../Infrastructure/README.md | 5 +- .../Infrastructure/Utils/EnumUtils.vb | 252 ------- .../Utils/Guards/ArchiveGuards.vb | 231 ------- .../Utils/Guards/ArgumentGuard.vb | 163 ----- .../Utils/Guards/DestinationPathGuard.vb | 182 ----- .../Utils/Guards/ExceptionFilterGuard.vb | 65 -- .../Infrastructure/Utils/Guards/IOGuards.vb | 85 --- .../Infrastructure/Utils/Guards/LogGuard.vb | 73 -- .../Utils/Guards/PathResolutionGuard.vb | 58 -- .../Infrastructure/Utils/Guards/README.md | 34 - .../Infrastructure/Utils/IterableUtils.vb | 77 --- .../Infrastructure/Utils/README.md | 39 -- .../Net8_0Plus/HashPrimitivesProvider.vb | 30 +- .../NetStandard2_0/HashPrimitivesProvider.vb | 30 +- src/FileTypeDetection/README.md | 1 - .../Contracts/CodePolicy045ComplianceTests.cs | 63 -- .../Contracts/PublicApiAllowlistTests.cs | 48 -- .../Contracts/public-api.snapshot.txt | 26 +- .../FileTypeDetectionLib.Tests.csproj | 1 - .../Steps/FileTypeDetectionSteps.cs | 29 +- ...tionDetailAndArchiveValidationUnitTests.cs | 2 +- .../Unit/EndToEndFailClosedMatrixUnitTests.cs | 64 +- .../Unit/ExtensionCheckUnitTests.cs | 10 +- .../Unit/FileTypeDetectorEdgeUnitTests.cs | 2 +- .../FileTypeDetectorPrivateBranchUnitTests.cs | 30 +- .../Unit/HashingEvidenceTests.cs | 79 +-- .../Unit/HeaderDetectionWarningUnitTests.cs | 2 +- .../Unit/HeaderOnlyPolicyUnitTests.cs | 2 +- .../LegacyOfficeBinaryRefinerUnitTests.cs | 6 +- .../Unit/OpenXmlRefinerUnitTests.cs | 20 +- tools/audit/verify-security-claims.sh | 9 +- tools/versioning/check-version-policy.sh | 35 +- .../versioning/verify-version-convergence.sh | 6 +- 70 files changed, 1466 insertions(+), 4093 deletions(-) delete mode 100644 docs/governance/045_COMPLIANCE_STATUS_DE.MD delete mode 100644 docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD delete mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb delete mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIO.vb delete mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb delete mode 100644 src/FileTypeDetection/Abstractions/Hashing/Internal/README.md delete mode 100644 src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/EnumUtils.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/README.md delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/IterableUtils.vb delete mode 100644 src/FileTypeDetection/Infrastructure/Utils/README.md delete mode 100644 tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs delete mode 100644 tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs diff --git a/AGENTS.md b/AGENTS.md index db9b6d16..3b17ba01 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -31,21 +31,6 @@ - Code-/Test-/Dokuaenderung im PR mit nachvollziehbarer Evidence. - begruendete Widerlegung als `ASSUMPTION` + Verifikationsnachweis, warum keine Aenderung noetig ist. - Unzulaessig: Threads ohne Bearbeitung nur aus Prozessgruenden zu resolven. -- Verbindlicher Einzelkommentar-Workflow (ab sofort): - - Jeder Review-Kommentar/Thread wird einzeln und iterativ bearbeitet (keine Sammelabarbeitung mehr). - - Fuer jeden bearbeiteten Kommentar gilt: genau ein dedizierter Commit fuer die konkrete Umsetzung. - - Vor `resolved` ist im Thread immer ein Nachweis-Kommentar zu hinterlassen: - - entweder Commit-/Push-Link als Evidence der Umsetzung, - - oder nachvollziehbare Gegenargumentation als `ASSUMPTION` inkl. Verifikationsnachweis. - - Erst danach darf genau dieser Thread auf `resolved` gesetzt werden. - - Bei CI-Nacharbeiten gilt dieselbe Regel: - - pro verursachender Reparatur genau ein Commit, - - eigener Review-Nachweis-Kommentar mit Ursache und Evidence, - - danach erst `resolved`. - - Push-Green-Regel: - - lokale Einzelabarbeitung aller offenen Kommentare gemaess obiger Regeln, - - danach Push und Pflicht-Checks auf gruen, - - Merge erst bei gruenen Checks und ohne offene Threads. - Merge nur wenn: - required checks gruener Status, - keine offenen Review-Threads, diff --git a/Directory.Build.props b/Directory.Build.props index 79cdb468..91864408 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,6 +5,6 @@ true - 6.0.1 + 5.2.1 diff --git a/SECURITY.md b/SECURITY.md index 0cb09805..1cc33bd2 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -16,8 +16,8 @@ Security-Fixes werden nur fuer den aktuell unterstuetzten Major bereitgestellt. | Version | Security-Support | | ------- | ---------------- | -| 6.x | Ja | -| < 6.0 | Nein | +| 5.x | Ja | +| < 5.0 | Nein | ## 3. Meldung einer Sicherheitsluecke Bitte melde Sicherheitsluecken **nicht** ueber oeffentliche Issues. diff --git a/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD b/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD index b37e106a..fcd3c24f 100644 --- a/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD +++ b/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD @@ -9,7 +9,7 @@ Abbildung von Aussagen in `SECURITY.md` auf Nachweisquellen und Verifikationskom | Claim ID | SECURITY-Anker | Claim-Zusammenfassung | Nachweisquelle | Verifikationskommando | Pass-Kriterium | Blocker | |---|---|---|---|---|---|---| -| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security-Support ist an Major 6 gebunden | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version-Major ist `6` | yes | +| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security-Support ist an Major 5 gebunden | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version-Major ist `5` | yes | | SEC-CLAIM-002 | 3. Meldung | Private Vulnerability Reporting ist aktiv | GitHub API `private-vulnerability-reporting` | `gh api "repos/$REPO/private-vulnerability-reporting"` | `.enabled == true` | yes | | SEC-CLAIM-003 | 9. Nachweisbarkeit | Dependabot Security Updates sind aktiv | GitHub API (`security_and_analysis` oder `automated-security-fixes`) | `gh api "repos/$REPO" --jq '.security_and_analysis.dependabot_security_updates.status' || gh api "repos/$REPO/automated-security-fixes" --jq '.enabled'` | `enabled` oder `true` | yes | | SEC-CLAIM-004 | 9. Nachweisbarkeit | Secret Scanning ist aktiv | GitHub API (`security_and_analysis` oder Secret-Scanning-Alerts Endpoint) | `gh api "repos/$REPO" --jq '.security_and_analysis.secret_scanning.status' || gh api "repos/$REPO/secret-scanning/alerts?per_page=1"` | `enabled` oder Endpoint erfolgreich erreichbar | yes | diff --git a/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD b/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD index 3b278577..e9d3d6ab 100644 --- a/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD +++ b/docs/audit/103_SECURITY_ASSERTION_TRACEABILITY.MD @@ -9,7 +9,7 @@ Mapping of claims in `SECURITY.md` to evidence sources and verification commands | Claim ID | SECURITY Anchor | Claim Summary | Evidence Source | Verification Command | Pass Criteria | Blocker | |---|---|---|---|---|---|---| -| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security support is tied to major 6 | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version major is `6` | yes | +| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security support is tied to major 5 | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version major is `5` | yes | | SEC-CLAIM-002 | 3. Meldung | Private vulnerability reporting path exists | GitHub API `private-vulnerability-reporting` | `gh api "repos/$REPO/private-vulnerability-reporting"` | `.enabled == true` | yes | | SEC-CLAIM-003 | 9. Nachweisbarkeit | Dependabot security updates enabled | GitHub API (`security_and_analysis` or `automated-security-fixes`) | `gh api "repos/$REPO" --jq '.security_and_analysis.dependabot_security_updates.status' || gh api "repos/$REPO/automated-security-fixes" --jq '.enabled'` | `enabled` or `true` | yes | | SEC-CLAIM-004 | 9. Nachweisbarkeit | Secret scanning enabled | GitHub API (`security_and_analysis` or Secret-Scanning-Alerts Endpoint) | `gh api "repos/$REPO" --jq '.security_and_analysis.secret_scanning.status' || gh api "repos/$REPO/secret-scanning/alerts?per_page=1"` | `enabled` or endpoint reachable | yes | diff --git a/docs/governance/045_CODE_QUALITY_POLICY_DE.MD b/docs/governance/045_CODE_QUALITY_POLICY_DE.MD index 2a3e8876..2f45a718 100644 --- a/docs/governance/045_CODE_QUALITY_POLICY_DE.MD +++ b/docs/governance/045_CODE_QUALITY_POLICY_DE.MD @@ -79,21 +79,6 @@ Blöcke werden sichtbar getrennt (Leerzeile + Kommentartrenner): - Fallback - I/O Helpers separat -### 5.4 Methodenkopf- und Umbruchschema (verbindlich) -- Für neue oder in einem Ticket angepasste Signaturen sind mehrzeilige Signaturen Pflicht, wenn mindestens einer der Punkte zutrifft: - - mehr als ein Parameter - - Signatur würde in eine lange Zeile kippen (Richtwert: > 100 Zeichen) -- Format für mehrzeilige Signaturen: - - Membername mit Zeilenfortsetzung (`_`) - - öffnende Klammer in eigener Zeile - - genau ein Parameter pro Zeile - - schließende Klammer und Rückgabetyp gemeinsam in einer Zeile -- Nach jeder Methodensignatur (einzeilig oder mehrzeilig) folgt genau eine Leerzeile, - bevor Deklarationsblock oder erste ausführbare Anweisung beginnt. -- Das Schema gilt einheitlich für `Sub`/`Function`/Konstruktoren/Interface-Member. -- Lange Aufrufe werden analog umgebrochen (ein Argument pro Zeile, klar ausgerichtet). -- Keine Trailing-Whitespace-Zeichen und keine „leeren“ Zeilen mit Spaces/Tabs. - ## 6. Variablenregel (Pflicht) - Alle lokalen Variablen werden im „Deklarationsblock“ am Anfang der Funktion definiert. - Platzierung: diff --git a/docs/governance/045_COMPLIANCE_STATUS_DE.MD b/docs/governance/045_COMPLIANCE_STATUS_DE.MD deleted file mode 100644 index 0c87eb98..00000000 --- a/docs/governance/045_COMPLIANCE_STATUS_DE.MD +++ /dev/null @@ -1,44 +0,0 @@ -# 045 Compliance Status (DE) - -Stand: 2026-02-20 -Geltungsbereich: `src/FileTypeDetection/*` - -## Ziel -Nachweisbare, reproduzierbare Einhaltung der Kernvorgaben aus `045_CODE_QUALITY_POLICY_DE.MD` über automatisierte Tests und wiederholbare Build-/Testläufe. - -## Automatisierte Governance-Gates -1. Public-API-Allowlist-Gate - Test: `PublicApiAllowlistTests` - Datei: `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs` - Wirkung: Unerwartete neue Public-Typen werden als blocker erkannt. - -2. Public-API-Snapshot-Gate - Test: `PublicApiContractSnapshotTests` - Datei: `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiContractSnapshotTests.cs` - Wirkung: Surface-Drift in Public API wird deterministisch sichtbar. - -3. 045 Core-Layout-/Policy-Gate - Test: `CodePolicy045ComplianceTests` - Datei: `tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs` - Geprüfte Regeln: - - Header enthält `' FILE:` und `INTERNE POLICY` - - `Option Strict On` und `Option Explicit On` vorhanden - - Reihenfolge: Header -> Options -> Namespace - - Verbotener Catch-Pseudofilter wird ausgeschlossen - - Dateien mit `Public`-Typen enthalten XML-`summary` - - Scan nur auf Source-Dateien (Ausschluss `bin/` und `obj/`) - -## Reproduzierbare Verifikation -1. `dotnet build FileClassifier.sln -v minimal` -2. `dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal` - -## Iterative Verbesserungen (Issue 03) -1. Interne Duplikatkonsolidierung: - - `FileTypeDetector` nutzt zentrale Registry-Regel für structured refinement. - - Alias-Definitionen in `FileTypeRegistry` über Helper vereinheitlicht. -2. Policy-Header harmonisiert: - - `HashRoundTripReport.vb`, `EnumUtils.vb`, `ArgumentGuard.vb`, `IterableUtils.vb`. - -## Hinweis -Diese Statusdatei dokumentiert den nachweisbaren Kernumfang der automatisierten 045-Compliance-Prüfung. -Erweiterungen an den Governance-Gates sind möglich, müssen jedoch fail-closed und mit niedriger False-Positive-Rate erfolgen. diff --git a/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD b/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD deleted file mode 100644 index 01cf6e3f..00000000 --- a/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD +++ /dev/null @@ -1,76 +0,0 @@ -# Issue Closure Matrix (DE) - #105 / #106 / #107 - -Stand: 2026-02-21 - -## Ziel -Nachweis, dass alle offenen Punkte aus den Issues `#105`, `#106` und `#107` im PR `#108` umgesetzt sind. - -## Issue #105 - HashRoundTripReport ersetzen und Usages migrieren -Status: umgesetzt. - -Abdeckung: -- Datei ersetzt: `src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb` -- Legacy-Usages migriert auf neue API (`HashSlot`, `Evidence(slot)`, `LogicalEquals(slot)`, `PhysicalEquals(slot)`). -- API-Contract/Snapshot aktualisiert: - - `tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt` - -Geforderte `rg`-Trefferliste alt vs. neu: -1. Legacy-API-Symbole (alt) - - Kommando: - - `rg -n "LogicalH1Equals|PhysicalH1Equals|HashRoundTripReport\\.(H1|H2|H3|H4)\\b" src tests -S` - - Ergebnis: - - `0` Treffer -2. Neue API-Symbole (neu) - - Kommando: - - `rg -n "HashSlot|Evidence\\(|LogicalEquals\\(|PhysicalEquals\\(" src tests -S` - - Ergebnis: - - Treffer in `HashRoundTripReport.vb`, `HashingEvidenceTests.cs`, `FileTypeDetectionSteps.cs` und `public-api.snapshot.txt`. - -Build/Test-Evidence: -- `dotnet build FileClassifier.sln -v minimal` -> pass -- `dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal` -> pass - -## Issue #106 - Public-API-Policy haerten (Allowlist + Contract-Gates) -Status: umgesetzt. - -Abdeckung: -- Explizite Public-API-Allowlist als blocker: - - `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs` -- Public-API-Snapshot-Gate als blocker: - - `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiContractSnapshotTests.cs` -- Policy-Compliance-Gate als blocker: - - `tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs` - -Dokumentierte freigegebene Public-Hauptklassen: -- In `AllowedPublicTypes` innerhalb - `tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs`. - -Reproduzierbarer Governance-Check: -- `dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal` - -## Issue #107 - Interne Refactor-/Utility-Konsolidierung -Status: umgesetzt. - -Refactor-Matrix (Duplikatquelle -> neue Utility-Stelle): -1. Hashing-Kernlogik in Fassade `EvidenceHashing.vb` - - -> `EvidenceHashingCore.vb` - - Methoden u. a.: `BuildEvidenceFromEntries`, `BuildEvidenceFromRawPayload`, - `TryNormalizeEntries`, `BuildLogicalManifestBytes`, `ComputeSha256Hex`, - `ComputeFastHash`, `ComputeHmacSha256Hex`. -2. RoundTrip-I/O/Temp-Lifecycle in Fassade - - -> `EvidenceHashingRoundTrip.vb` - - Pipeline: TempRoot/Target, Materialisierung, Cleanup, Report-Bildung. -3. Gebundene Datei-Leselogik/Fehleraufbereitung - - -> `EvidenceHashingIo.vb` - - Methoden: `TryReadFileBounded`, `SetReadFileError`. -4. Enum- und Guard-Helfer in mehreren Klassen - - -> `EnumUtils.vb`, `ArgumentGuard.vb`, `IterableUtils.vb` - - Einsatz sichtbar in `HashRoundTripReport.vb`. - -Evidence-Kommandos: -- `rg -n "EvidenceHashingCore|EvidenceHashingRoundTrip|EvidenceHashingIo" src tests -S` -- `rg -n "EnumUtils\\.|ArgumentGuard\\.|IterableUtils\\." src tests -S` -- `dotnet test FileClassifier.sln -v minimal` - -## Entscheidung -Alle offenen Punkte aus `#105`, `#106` und `#107` sind im aktuellen PR-Stand umgesetzt und auditierbar nachgewiesen. diff --git a/docs/governance/145_CODE_QUALITY_POLICY_DE.MD b/docs/governance/145_CODE_QUALITY_POLICY_DE.MD index 5a9789cd..41940479 100644 --- a/docs/governance/145_CODE_QUALITY_POLICY_DE.MD +++ b/docs/governance/145_CODE_QUALITY_POLICY_DE.MD @@ -68,21 +68,6 @@ Use visible block separation (empty line + block comment markers): - fallback - I/O helpers separated -### 5.4 Method Header and Wrapping Scheme (binding) -- For new or ticket-touched signatures, multiline signatures are mandatory when at least one applies: - - more than one parameter - - signature would become a long line (guideline: > 100 characters) -- Format for multiline signatures: - - member name followed by line continuation (`_`) - - opening parenthesis on its own line - - exactly one parameter per line - - closing parenthesis and return type on one line -- After every method signature (single-line or multiline), include exactly one blank line - before the declaration block or first executable statement. -- This applies consistently to `Sub`/`Function`/constructors/interface members. -- Long invocations are wrapped analogously (one argument per line, aligned clearly). -- No trailing whitespace and no visually empty lines containing spaces/tabs. - ## 6. Variable Rule - All local variables are declared in a declaration block at the start of the function. - Placement: diff --git a/docs/versioning/002_HISTORY_VERSIONS.MD b/docs/versioning/002_HISTORY_VERSIONS.MD index 0aa01db4..cbaacc44 100644 --- a/docs/versioning/002_HISTORY_VERSIONS.MD +++ b/docs/versioning/002_HISTORY_VERSIONS.MD @@ -12,7 +12,7 @@ Heuristik fuer die Rueckwirkungs-Zuordnung: - `docs|test|ci|chore|tooling|refactor|fix` => Patch Aktueller Entwicklungsstand: -- Aktuelle Entwicklungslinie enthaelt `6.x` (aktueller Arbeitsstand: `v6.0.1`; Details in `docs/versioning/003_CHANGELOG_RELEASES.MD`). +- Aktuelle Entwicklungslinie enthaelt `5.x` (aktueller Arbeitsstand: `v5.2.1`; Details in `docs/versioning/003_CHANGELOG_RELEASES.MD`). Hinweis: - Die Spalte `Keyword` verwendet den technischen Klassifizierungswert aus der Historie. @@ -20,8 +20,6 @@ Hinweis: | Version | Kurzbeschreibung | Commit | Keyword | |---|---|---|---| -| `6.0.1` | Refactor-Haertung: interne SSOT-Utilities nach `Infrastructure/Utils` konsolidiert, Core-Utility-Logik in dedizierte Dateien gesplittet und Duplikat-Guards vereinheitlicht | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | patch | -| `6.0.0` | Breaking-Release: `FileKind`-Enum und `HashRoundTripReport`-Public-API auf neues Slot-/Methodenmodell umgestellt, Hashing-Interna in Core/RoundTrip/Io ausgelagert | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | SharpCompress-API auf typsichere Aufrufe umgestellt, tar.gz-Verarbeitung fail-closed gehaertet und Qodana-CI-Gate als Pflichtlauf dokumentiert/erzwungen | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | netstandard2.0-Compat-Layer eingefuehrt, Provider-Struktur konsolidiert und TFM-Multi-Targeting erweitert | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | | `5.1.4` | Refactor-Cluster 7C abgeschlossen + Qodana-Alerts auf 0 + Version-Bump fuer Release | [2adeb83](https://github.com/tomtastisch/FileClassifier/commit/2adeb83) | patch | diff --git a/docs/versioning/003_CHANGELOG_RELEASES.MD b/docs/versioning/003_CHANGELOG_RELEASES.MD index a75a0c41..0adec393 100644 --- a/docs/versioning/003_CHANGELOG_RELEASES.MD +++ b/docs/versioning/003_CHANGELOG_RELEASES.MD @@ -7,28 +7,6 @@ Alle Aenderungen werden hier technisch dokumentiert. Die Release-Version selbst ist der Git-Tag `vX.Y.Z` (optional `-prerelease`) als SSOT. -## [6.0.1] -- Added: - - Neues internes Submodul `src/FileTypeDetection/Infrastructure/Utils/` als SSOT fuer wiederverwendbare Guard-/I/O-/Pfad-/Logging-Helfer. -- Changed: - - Utility-Klassen aus `CoreInternals.vb` in dedizierte Dateien unter `Infrastructure/Utils` ausgelagert; `CoreInternals.vb` auf Refinement-Logik fokussiert. - - Vorhandene Utils von `src/FileTypeDetection/Utils/` nach `src/FileTypeDetection/Infrastructure/Utils/` verschoben und Namespace auf `Tomtastisch.FileClassifier.Infrastructure.Utils` konsolidiert. - - Duplizierte Byte-Array-Guard-Checks auf `ByteArrayGuard.HasContent(...)` vereinheitlicht. -- Docs/CI/Tooling: - - Versionskonvergenz auf `6.0.1` nachgezogen (`RepoVersion`, `Version`, `PackageVersion`, Versionshistorie DE/EN). - -## [6.0.0] -- Added: - - Neue interne Hashing-Services (`EvidenceHashingCore`, `EvidenceHashingRoundTrip`, `EvidenceHashingIo`) fuer deterministische Auslagerung ohne neue Dependencies. -- Changed: - - Breaking-Umstellung von `FileKind`-Werten (`Docx`/`Xlsx`/`Pptx` -> `Doc`/`Xls`/`Ppt`). - - Breaking-Umstellung der `HashRoundTripReport`-Public-API auf Slot-basiertes Zugriffsmodell (`HashSlot`, `Evidence(...)`, `LogicalEquals(...)`, `PhysicalEquals(...)`). - - `EvidenceHashing` auf Fassade reduziert; Kernlogik in interne stateless Services verschoben. -- Fixed: - - Reflection-basierte Unit-Tests auf neue interne Hashing-Typen angepasst. -- Docs/CI/Tooling: - - Versionskonvergenz lokal auf `6.0.0` vorbereitet (`RepoVersion`, `Version`, `PackageVersion`, Versionshistorie DE/EN). - ## [5.2.1] - Added: - CI-Dokumentation um expliziten Pflicht-Gate-Charakter fuer Qodana erweitert. diff --git a/docs/versioning/102_HISTORY_VERSIONS.MD b/docs/versioning/102_HISTORY_VERSIONS.MD index 1ed9fabe..934455fc 100644 --- a/docs/versioning/102_HISTORY_VERSIONS.MD +++ b/docs/versioning/102_HISTORY_VERSIONS.MD @@ -12,15 +12,13 @@ Heuristics for retroactive classification: - `docs|test|ci|chore|tooling|refactor|fix` => patch Current state: -- Current release line contains `6.x` (current working state: `v6.0.1`; details in `docs/versioning/103_CHANGELOG_RELEASES.MD`). +- Current release line contains `5.x` (current working state: `v5.2.1`; details in `docs/versioning/103_CHANGELOG_RELEASES.MD`). Note: - The \"short description\" column follows the original commit/PR intent text for deterministic traceability and is not normalized to a single language. | Version | Short description | Commit | Keyword | |---|---|---|---| -| `6.0.1` | Refactor hardening: consolidated internal SSOT utilities into `Infrastructure/Utils`, split core utility logic into dedicated files, and unified duplicate byte guards | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | patch | -| `6.0.0` | Breaking release: migrated `FileKind` enum and `HashRoundTripReport` public API to the new slot/method model and split hashing internals into core/roundtrip/io services | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | Switched SharpCompress calls to type-safe APIs, hardened tar.gz fail-closed handling, and enforced/documented Qodana CI as a mandatory gate | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | Introduce netstandard2.0 compatibility layer, consolidate provider structure, and extend TFM multi-targeting | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | | `5.1.4` | Refactor-Cluster 7C abgeschlossen + Qodana-Alerts auf 0 + Version-Bump fuer Release | [2adeb83](https://github.com/tomtastisch/FileClassifier/commit/2adeb83) | patch | diff --git a/docs/versioning/103_CHANGELOG_RELEASES.MD b/docs/versioning/103_CHANGELOG_RELEASES.MD index 7ca87e28..5654ec02 100644 --- a/docs/versioning/103_CHANGELOG_RELEASES.MD +++ b/docs/versioning/103_CHANGELOG_RELEASES.MD @@ -6,28 +6,6 @@ All changes are documented here in technical terms. The release version itself is the Git tag `vX.Y.Z` (optional `-prerelease`) as SSOT. -## [6.0.1] -- Added: - - New internal submodule `src/FileTypeDetection/Infrastructure/Utils/` as the SSOT for reusable guard/I/O/path/logging helpers. -- Changed: - - Moved utility classes from `CoreInternals.vb` into dedicated files under `Infrastructure/Utils`; narrowed `CoreInternals.vb` to refinement logic. - - Relocated existing utils from `src/FileTypeDetection/Utils/` to `src/FileTypeDetection/Infrastructure/Utils/` and consolidated the namespace to `Tomtastisch.FileClassifier.Infrastructure.Utils`. - - Unified duplicate byte-array guard checks to `ByteArrayGuard.HasContent(...)`. -- Docs/CI/Tooling: - - Updated version convergence to `6.0.1` (`RepoVersion`, `Version`, `PackageVersion`, version history DE/EN). - -## [6.0.0] -- Added: - - New internal hashing services (`EvidenceHashingCore`, `EvidenceHashingRoundTrip`, `EvidenceHashingIo`) for deterministic extraction without adding dependencies. -- Changed: - - Breaking migration of `FileKind` values (`Docx`/`Xlsx`/`Pptx` -> `Doc`/`Xls`/`Ppt`). - - Breaking migration of `HashRoundTripReport` public API to a slot-based model (`HashSlot`, `Evidence(...)`, `LogicalEquals(...)`, `PhysicalEquals(...)`). - - Reduced `EvidenceHashing` to a facade and moved core logic into internal stateless services. -- Fixed: - - Updated reflection-based unit tests to the new internal hashing types. -- Docs/CI/Tooling: - - Prepared local version convergence to `6.0.0` (`RepoVersion`, `Version`, `PackageVersion`, version history DE/EN). - ## [5.2.1] - Added: - Extended CI documentation to state Qodana as an explicit mandatory gate. diff --git a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb index a9e90398..0f10558b 100644 --- a/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb +++ b/src/FileTypeDetection/Abstractions/Archive/ZipExtractedEntry.vb @@ -37,12 +37,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Size As Integer - Friend Sub New _ - ( - entryPath As String, - payload As Byte() - ) - + Friend Sub New(entryPath As String, payload As Byte()) RelativePath = If(entryPath, String.Empty) If payload Is Nothing OrElse payload.Length = 0 Then Content = ImmutableArray(Of Byte).Empty @@ -61,7 +56,6 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Schreibgeschützter mit dem Entry-Inhalt. Public Function OpenReadOnlyStream() As MemoryStream - Dim data = If(Content.IsDefaultOrEmpty, Array.Empty(Of Byte)(), Content.ToArray()) Return New MemoryStream(data, writable:=False) End Function diff --git a/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb b/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb index f686667b..3c8c0b78 100644 --- a/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb +++ b/src/FileTypeDetection/Abstractions/Detection/DetectionDetail.vb @@ -44,14 +44,12 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property ExtensionVerified As Boolean - Friend Sub New _ - ( - detectedType As FileType, - reasonCode As String, - usedZipContentCheck As Boolean, - usedStructuredRefinement As Boolean, - extensionVerified As Boolean - ) + Friend Sub New( + detectedType As FileType, + reasonCode As String, + usedZipContentCheck As Boolean, + usedStructuredRefinement As Boolean, + extensionVerified As Boolean) Me.DetectedType = If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)) Me.ReasonCode = If(reasonCode, String.Empty) diff --git a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb index 10710795..b8aefb8c 100644 --- a/src/FileTypeDetection/Abstractions/Detection/FileKind.vb +++ b/src/FileTypeDetection/Abstractions/Detection/FileKind.vb @@ -56,18 +56,18 @@ Namespace Global.Tomtastisch.FileClassifier Zip ''' - ''' Office-Word-Dokument (DOC). + ''' Office Open XML Word-Dokument (DOCX). ''' - Doc + Docx ''' - ''' Office-Excel-Dokument (XLS). + ''' Office Open XML Excel-Dokument (XLSX). ''' - Xls + Xlsx ''' - ''' Office-PowerPoint-Dokument (PPT). + ''' Office Open XML PowerPoint-Dokument (PPTX). ''' - Ppt + Pptx End Enum End Namespace diff --git a/src/FileTypeDetection/Abstractions/Detection/FileType.vb b/src/FileTypeDetection/Abstractions/Detection/FileType.vb index c8a0076a..9cfbc2c5 100644 --- a/src/FileTypeDetection/Abstractions/Detection/FileType.vb +++ b/src/FileTypeDetection/Abstractions/Detection/FileType.vb @@ -44,15 +44,8 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Aliases As ImmutableArray(Of String) - Friend Sub New _ - ( - kind As FileKind, - canonicalExtension As String, - mime As String, - allowed As Boolean, - aliases As IEnumerable(Of String) - ) - + Friend Sub New(kind As FileKind, canonicalExtension As String, mime As String, allowed As Boolean, + aliases As IEnumerable(Of String)) Dim dedup As HashSet(Of String) = New HashSet(Of String)(StringComparer.OrdinalIgnoreCase) Dim n As String Dim orderedAliases As List(Of String) @@ -83,7 +76,6 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' String-Repräsentation des Feldes . Public Overrides Function ToString() As String - Return Kind.ToString() End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb index df499c73..96cb9f82 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb @@ -58,29 +58,15 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property HasLogicalHash As Boolean - ''' - ''' Interner Vollkonstruktor zur normalisierten Erstellung eines Digest-Sets. - ''' - ''' Physischer SHA-256-Digest. - ''' Logischer SHA-256-Digest. - ''' Optionaler schneller physischer Digest. - ''' Optionaler schneller logischer Digest. - ''' Optionaler HMAC-SHA256 des physischen Payloads. - ''' Optionaler HMAC-SHA256 des logischen Payloads. - ''' Kennzeichnet das Vorliegen physischer Digests. - ''' Kennzeichnet das Vorliegen logischer Digests. - Friend Sub New _ - ( - physicalSha256 As String, - logicalSha256 As String, - fastPhysicalXxHash3 As String, - fastLogicalXxHash3 As String, - hmacPhysicalSha256 As String, - hmacLogicalSha256 As String, - hasPhysicalHash As Boolean, - hasLogicalHash As Boolean - ) - + Friend Sub New( + physicalSha256 As String, + logicalSha256 As String, + fastPhysicalXxHash3 As String, + fastLogicalXxHash3 As String, + hmacPhysicalSha256 As String, + hmacLogicalSha256 As String, + hasPhysicalHash As Boolean, + hasLogicalHash As Boolean) Me.PhysicalSha256 = Normalize(physicalSha256) Me.LogicalSha256 = Normalize(logicalSha256) Me.FastPhysicalXxHash3 = Normalize(fastPhysicalXxHash3) @@ -91,9 +77,6 @@ Namespace Global.Tomtastisch.FileClassifier Me.HasLogicalHash = hasLogicalHash End Sub - ''' - ''' Liefert ein leeres, fail-closed Digest-Set ohne berechnete Hashwerte. - ''' Friend Shared ReadOnly Property Empty As HashDigestSet Get Return New HashDigestSet( @@ -108,16 +91,7 @@ Namespace Global.Tomtastisch.FileClassifier End Get End Property - ''' - ''' Normalisiert Digest-Strings deterministisch (Trim + Invariant-Lowercase). - ''' - ''' Eingabedigest. - ''' Normalisierter Digest oder leerer String. - Private Shared Function Normalize _ - ( - value As String - ) As String - + Private Shared Function Normalize(value As String) As String Return If(value, String.Empty).Trim().ToLowerInvariant() End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb index e83ca06a..47804479 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb @@ -69,33 +69,17 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Notes As String - ''' - ''' Interner Vollkonstruktor zur deterministischen Erzeugung eines Evidence-Snapshots. - ''' - ''' Herkunftskanal der Hashquelle. - ''' Fachliches Quelllabel. - ''' Ermittelter Dateitypkontext. - ''' Optionaler Archiveintrag. - ''' Optionale komprimierte Bytes als Quelle für defensive Kopie. - ''' Optionale unkomprimierte/logische Bytes als Quelle für defensive Kopie. - ''' Anzahl berücksichtigter Entries (wird auf >= 0 normalisiert). - ''' Gesamtgröße der Nutzdaten in Bytes (wird auf >= 0 normalisiert). - ''' Deterministischer Digest-Satz. - ''' Ergänzende Hinweise. - Friend Sub New _ - ( - sourceType As HashSourceType, - label As String, - detectedType As FileType, - entry As ZipExtractedEntry, - compressedBytes As Byte(), - uncompressedBytes As Byte(), - entryCount As Integer, - totalUncompressedBytes As Long, - digests As HashDigestSet, - notes As String - ) - + Friend Sub New( + sourceType As HashSourceType, + label As String, + detectedType As FileType, + entry As ZipExtractedEntry, + compressedBytes As Byte(), + uncompressedBytes As Byte(), + entryCount As Integer, + totalUncompressedBytes As Long, + digests As HashDigestSet, + notes As String) Me.SourceType = sourceType Me.Label = If(label, String.Empty) Me.DetectedType = If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)) @@ -108,20 +92,8 @@ Namespace Global.Tomtastisch.FileClassifier Me.UncompressedBytes = ToImmutable(uncompressedBytes) End Sub - ''' - ''' Erzeugt einen fail-closed Evidence-Snapshot für Fehlerpfade. - ''' - ''' Herkunftskanal der Hashquelle. - ''' Fachliches Quelllabel. - ''' Fehler-/Hinweistext. - ''' Evidence mit leerem Digest-Satz und Unknown-Typkontext. - Friend Shared Function CreateFailure _ - ( - sourceType As HashSourceType, - label As String, - notes As String - ) As HashEvidence - + Friend Shared Function CreateFailure(sourceType As HashSourceType, label As String, notes As String) _ + As HashEvidence Return New HashEvidence( sourceType:=sourceType, label:=label, @@ -135,20 +107,10 @@ Namespace Global.Tomtastisch.FileClassifier notes:=notes) End Function - ''' - ''' Erstellt aus einem Bytearray eine unveränderliche Kopie. - ''' - ''' Quellbytes oder Nothing. - ''' Leeres ImmutableArray bei fehlendem Inhalt, sonst defensive Kopie. - Private Shared Function ToImmutable _ - ( - data As Byte() - ) As Immutable.ImmutableArray(Of Byte) - - If Not ByteArrayGuard.HasContent(data) Then + Private Shared Function ToImmutable(data As Byte()) As Immutable.ImmutableArray(Of Byte) + If data Is Nothing OrElse data.Length = 0 Then Return Immutable.ImmutableArray(Of Byte).Empty End If - Return Immutable.ImmutableArray.Create(data) End Function End Class diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb index f7ce5dab..ba5e0a3c 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb @@ -43,31 +43,18 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public Property MaterializedFileName As String = "deterministic-roundtrip.bin" - ''' - ''' Erstellt eine interne, seiteneffektfreie Kopie der Optionen. - ''' - ''' Neue Instanz mit identischen aktuellen Optionswerten. Friend Function Clone() As HashOptions - Return New HashOptions With { - .IncludePayloadCopies = IncludePayloadCopies, - .IncludeFastHash = IncludeFastHash, - .IncludeSecureHash = IncludeSecureHash, - .MaterializedFileName = If(MaterializedFileName, String.Empty) + .IncludePayloadCopies = IncludePayloadCopies, + .IncludeFastHash = IncludeFastHash, + .IncludeSecureHash = IncludeSecureHash, + .MaterializedFileName = If(MaterializedFileName, String.Empty) } End Function - ''' - ''' Normalisiert ein Optionsobjekt fail-closed auf sichere Standardwerte. - ''' - ''' Zu normalisierende Optionen; Nothing erzeugt Standardoptionen. - ''' Normalisierte Optionskopie mit sicherem Materialisierungsdateinamen. - Friend Shared Function Normalize _ - ( - options As HashOptions - ) As HashOptions - + Friend Shared Function Normalize(options As HashOptions) As HashOptions Dim cloned As HashOptions + If options Is Nothing Then options = New HashOptions() cloned = options.Clone() @@ -75,20 +62,8 @@ Namespace Global.Tomtastisch.FileClassifier Return cloned End Function - ''' - ''' Validiert und normalisiert den Materialisierungsdateinamen auf einen sicheren, deterministischen Wert. - ''' - ''' Kandidat aus den Optionen. - ''' - ''' Sicherheitsnormalisierter Dateiname. Bei ungültigem Eingabewert wird - ''' deterministic-roundtrip.bin zurückgegeben. - ''' - Private Shared Function NormalizeMaterializedFileName _ - ( - candidate As String - ) As String - + Private Shared Function NormalizeMaterializedFileName(candidate As String) As String Dim normalized = If(candidate, String.Empty).Trim() If String.IsNullOrWhiteSpace(normalized) Then Return "deterministic-roundtrip.bin" @@ -105,7 +80,6 @@ Namespace Global.Tomtastisch.FileClassifier If String.IsNullOrWhiteSpace(normalized) Then Return "deterministic-roundtrip.bin" - ' ReSharper disable once LoopCanBeConvertedToQuery For Each invalidChar In IO.Path.GetInvalidFileNameChars() If normalized.IndexOf(invalidChar) >= 0 Then Return "deterministic-roundtrip.bin" diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb index 41a26dd2..69cdd6b0 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb @@ -5,278 +5,131 @@ ' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD ' - Try/Catch konsistent im Catch-Filter-Schema ' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' -' Kontext: -' - Report-Typ zur Bewertung der Konsistenz mehrerer Hash-Evidences über definierte Slots (H1 bis Hn). -' - Fail-closed: fehlende Evidence wird deterministisch als Failure-Eintrag materialisiert. -' -' Hinweise: -' - Keine Behavior-Änderungen durch reines Reformatting: Vergleichslogik bleibt unverändert. -' - Externe API: zentraler Zugriff über Evidence(slot), LogicalEquals(slot), PhysicalEquals(slot). ' ============================================================================ Option Strict On Option Explicit On -Imports System -Imports Tomtastisch.FileClassifier.Infrastructure.Utils - Namespace Global.Tomtastisch.FileClassifier - ''' - ''' Bericht über die Konsistenz mehrerer Hash-Evidences in festen Slots (H1 bis Hn). + ''' Ergebnisbericht für deterministische h1-h4-RoundTrip-Prüfungen. ''' ''' - ''' Zweck: - ''' - Normalisiert fehlende Slots fail-closed zu deterministischen Failure-Evidences. - ''' - Vergleicht H1 gegen H2 bis Hn jeweils logisch und physisch. - ''' - ''' Verantwortlichkeiten: - ''' - Slot-Normalisierung (EnsureEvidence). - ''' - Berechnung der Vergleichsflags (LogicalEquals/PhysicalEquals). - ''' - Aggregation LogicalConsistent (AND über alle logischen Gleichheiten). - ''' - ''' Nicht-Ziele: - ''' - Kein I/O, keine Policy-Engines, keine Logger-Integration. - ''' - Slot-Ermittlung erfolgt zentral über EnumUtils (Enum.GetValues). + ''' Der Bericht stellt Einzel-Evidence und daraus abgeleitete Konsistenzkennzahlen für logische und physische + ''' Digest-Vergleiche bereit. ''' Public NotInheritable Class HashRoundTripReport - - ' ===================================================================== - ' Konstanten / Shared ReadOnly (Single Source of Truth) - ' ===================================================================== - - Public Enum HashSlot - H1 = 1 - H2 = 2 - H3 = 3 - H4 = 4 - End Enum - - Private Shared ReadOnly RequiredSlots As HashSlot() = _ - EnumUtils.GetValues(Of HashSlot)( - sortOrder:=EnumUtils.EnumSortOrder.Ascending - ) - - ' ===================================================================== - ' Felder / Properties (Typzustand) - ' ===================================================================== - + ''' + ''' Eingabepfad des geprüften Objekts. + ''' Public ReadOnly Property InputPath As String - Public ReadOnly Property IsArchiveInput As Boolean - Public ReadOnly Property Notes As String - - Private ReadOnly _evidences As HashEvidence() ' index = slot-1 - Private ReadOnly _logicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn - Private ReadOnly _physicalEq As Boolean() ' index 0..n-2 entspricht H2 bis Hn - - Public ReadOnly Property LogicalConsistent As Boolean - ''' - ''' Liefert die Slots, die in dieser Report-Version geführt werden (in Reihenfolge). + ''' Kennzeichnet, ob die Eingabe als Archiv verarbeitet wurde. ''' - Public ReadOnly Property Slots As HashSlot() - Get - Return IterableUtils.CloneArray(RequiredSlots) - End Get - - End Property - - - ' ===================================================================== - ' Konstruktor(en) - ' ===================================================================== + Public ReadOnly Property IsArchiveInput As Boolean ''' - ''' Erstellt einen Bericht aus Evidences in Slot-Reihenfolge (H1, H2, ...). + ''' Erster Nachweis (Ausgangszustand). ''' - ''' Pfad/Identifier der geprüften Eingabe. - ''' True, wenn die Eingabe als Archiv verarbeitet wurde. - ''' Hinweise (freier Text). - ''' Evidence-Varargs in Slot-Reihenfolge; exakt so viele wie Slots(). - ''' Wird ausgelöst, wenn evidences Nothing ist oder die Slot-Anzahl nicht passt. - Friend Sub New _ - ( - inputPath As String, - isArchiveInput As Boolean, - notes As String, - ParamArray evidences As HashEvidence() - ) - - ' Deklarationsblock (Pflicht, spaltenartig) - Dim slotCount As Integer = RequiredSlots.Length - Dim i As Integer - Dim idx As Integer - Dim baseEvidence As HashEvidence - Dim otherEvidence As HashEvidence - Dim otherSlot As HashSlot - Dim eqLogical As Boolean - Dim consistentLocal As Boolean = True - - ' ----------------------------------------------------------------- - ' Guard-Clauses (fail-closed) - ' ----------------------------------------------------------------- - ArgumentGuard.RequireLength(evidences, slotCount, NameOf(evidences)) - - ' ----------------------------------------------------------------- - ' Snapshot / Assignment (Input) - ' ----------------------------------------------------------------- - Me.InputPath = If(inputPath, String.Empty) - Me.IsArchiveInput = isArchiveInput - Me.Notes = If(notes, String.Empty) - - ' ----------------------------------------------------------------- - ' Normalisierung / Canonicalization (Slots) - ' ----------------------------------------------------------------- - _evidences = New HashEvidence(slotCount - 1) {} - _logicalEq = New Boolean(slotCount - 2) {} - _physicalEq = New Boolean(slotCount - 2) {} - - For i = 0 To slotCount - 1 - Dim slot As HashSlot = RequiredSlots(i) - _evidences(SlotIndex(slot)) = EnsureEvidence(evidences(i), slot) - Next - - ' ----------------------------------------------------------------- - ' Branches (Vergleiche: H1 gegen H2..Hn) - ' ----------------------------------------------------------------- - baseEvidence = _evidences(SlotIndex(HashSlot.H1)) - - For idx = 0 To slotCount - 2 - - otherSlot = RequiredSlots(idx + 1) - otherEvidence = _evidences(SlotIndex(otherSlot)) - - eqLogical = EqualLogical(baseEvidence, otherEvidence) - - _logicalEq(idx) = eqLogical - _physicalEq(idx) = EqualPhysical(baseEvidence, otherEvidence) - - consistentLocal = consistentLocal AndAlso eqLogical - - Next - - LogicalConsistent = consistentLocal - - End Sub - - - ' ===================================================================== - ' Public API - ' ===================================================================== + Public ReadOnly Property H1 As HashEvidence ''' - ''' Liefert die Evidence für einen Slot (H1 bis Hn). + ''' Zweiter Nachweis (kanonische Sicht). ''' - ''' Der Slot, dessen Evidence geliefert werden soll. - ''' Die normalisierte Evidence (nie Nothing). - Public Function Evidence(slot As HashSlot) As HashEvidence - - ' Deklarationsblock - Dim index As Integer - - ArgumentGuard.EnumDefined(GetType(HashSlot), slot, NameOf(slot)) - index = SlotIndex(slot) - - Return _evidences(index) - - End Function + Public ReadOnly Property H2 As HashEvidence ''' - ''' Liefert das Ergebnis des logischen Vergleichs von H1 mit Hx für einen Slot H2 bis Hn. + ''' Dritter Nachweis (logische Bytes). ''' - ''' Der Vergleichsslot (H2 bis Hn). - ''' True, wenn logisch gleich; sonst False. - Public Function LogicalEquals(otherSlot As HashSlot) As Boolean - - ' Deklarationsblock - Dim index As Integer - - If otherSlot = HashSlot.H1 Then - Throw New ArgumentException("Use H2..Hn.", NameOf(otherSlot)) - End If - - ArgumentGuard.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) - index = OtherIndex(otherSlot) - - Return _logicalEq(index) - - End Function + Public ReadOnly Property H3 As HashEvidence ''' - ''' Liefert das Ergebnis des physischen Vergleichs von H1 mit Hx für einen Slot H2 bis Hn. + ''' Vierter Nachweis (materialisierte Zielrepräsentation). ''' - ''' Der Vergleichsslot (H2 bis Hn). - ''' True, wenn physisch gleich; sonst False. - Public Function PhysicalEquals(otherSlot As HashSlot) As Boolean - - ' Deklarationsblock - Dim index As Integer - - If otherSlot = HashSlot.H1 Then - Throw New ArgumentException("Use H2..Hn.", NameOf(otherSlot)) - End If + Public ReadOnly Property H4 As HashEvidence - ArgumentGuard.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) - index = OtherIndex(otherSlot) + ''' + ''' Kennzeichnet Gleichheit der logischen Digests zwischen h1 und h2. + ''' + Public ReadOnly Property LogicalH1EqualsH2 As Boolean - Return _physicalEq(index) + ''' + ''' Kennzeichnet Gleichheit der logischen Digests zwischen h1 und h3. + ''' + Public ReadOnly Property LogicalH1EqualsH3 As Boolean - End Function + ''' + ''' Kennzeichnet Gleichheit der logischen Digests zwischen h1 und h4. + ''' + Public ReadOnly Property LogicalH1EqualsH4 As Boolean + ''' + ''' Kennzeichnet Gleichheit der physischen Digests zwischen h1 und h2. + ''' + Public ReadOnly Property PhysicalH1EqualsH2 As Boolean - ' ===================================================================== - ' Internal/Private Helpers - ' ===================================================================== + ''' + ''' Kennzeichnet Gleichheit der physischen Digests zwischen h1 und h3. + ''' + Public ReadOnly Property PhysicalH1EqualsH3 As Boolean - Private Shared Function EnsureEvidence(evidence As HashEvidence, slot As HashSlot) As HashEvidence - If evidence IsNot Nothing Then Return evidence + ''' + ''' Kennzeichnet Gleichheit der physischen Digests zwischen h1 und h4. + ''' + Public ReadOnly Property PhysicalH1EqualsH4 As Boolean - Return HashEvidence.CreateFailure( - HashSourceType.Unknown, - SlotLabel(slot), - "missing" - ) - End Function + ''' + ''' Gesamtindikator für logische Konsistenz über h1 bis h4. + ''' + Public ReadOnly Property LogicalConsistent As Boolean - Private Shared Function SlotLabel(slot As HashSlot) As String - Return "h" & CInt(slot).ToString() - End Function + ''' + ''' Ergänzende Hinweise zum RoundTrip-Lauf. + ''' + Public ReadOnly Property Notes As String - Private Shared Function SlotIndex(slot As HashSlot) As Integer - Return CInt(slot) - 1 - End Function + Friend Sub New(inputPath As String, isArchiveInput As Boolean, h1 As HashEvidence, h2 As HashEvidence, + h3 As HashEvidence, h4 As HashEvidence, + notes As String) + Me.InputPath = If(inputPath, String.Empty) + Me.IsArchiveInput = isArchiveInput + Me.H1 = + If(h1, HashEvidence.CreateFailure(HashSourceType.Unknown, "h1", "missing")) + Me.H2 = + If(h2, HashEvidence.CreateFailure(HashSourceType.Unknown, "h2", "missing")) + Me.H3 = + If(h3, HashEvidence.CreateFailure(HashSourceType.Unknown, "h3", "missing")) + Me.H4 = + If(h4, HashEvidence.CreateFailure(HashSourceType.Unknown, "h4", "missing")) + Me.Notes = If(notes, String.Empty) - Private Shared Function OtherIndex(otherSlot As HashSlot) As Integer - Return SlotIndex(otherSlot) - 1 - End Function + LogicalH1EqualsH2 = EqualLogical(Me.H1, Me.H2) + LogicalH1EqualsH3 = EqualLogical(Me.H1, Me.H3) + LogicalH1EqualsH4 = EqualLogical(Me.H1, Me.H4) + PhysicalH1EqualsH2 = EqualPhysical(Me.H1, Me.H2) + PhysicalH1EqualsH3 = EqualPhysical(Me.H1, Me.H3) + PhysicalH1EqualsH4 = EqualPhysical(Me.H1, Me.H4) + LogicalConsistent = LogicalH1EqualsH2 AndAlso LogicalH1EqualsH3 AndAlso LogicalH1EqualsH4 + End Sub - ' Hinweis: Vergleichslogik bleibt unverändert; keine Änderung der Semantik. Private Shared Function EqualLogical(leftEvidence As HashEvidence, rightEvidence As HashEvidence) As Boolean If leftEvidence Is Nothing OrElse rightEvidence Is Nothing Then Return False If leftEvidence.Digests Is Nothing OrElse rightEvidence.Digests Is Nothing Then Return False If Not leftEvidence.Digests.HasLogicalHash OrElse Not rightEvidence.Digests.HasLogicalHash Then Return False - - Return String.Equals( - leftEvidence.Digests.LogicalSha256, - rightEvidence.Digests.LogicalSha256, - StringComparison.Ordinal - ) + Return _ + String.Equals(leftEvidence.Digests.LogicalSha256, rightEvidence.Digests.LogicalSha256, + StringComparison.Ordinal) End Function Private Shared Function EqualPhysical(leftEvidence As HashEvidence, rightEvidence As HashEvidence) As Boolean If leftEvidence Is Nothing OrElse rightEvidence Is Nothing Then Return False If leftEvidence.Digests Is Nothing OrElse rightEvidence.Digests Is Nothing Then Return False - If Not leftEvidence.Digests.HasPhysicalHash OrElse Not rightEvidence.Digests.HasPhysicalHash Then Return False - - Return String.Equals( - leftEvidence.Digests.PhysicalSha256, - rightEvidence.Digests.PhysicalSha256, - StringComparison.Ordinal - ) + If Not leftEvidence.Digests.HasPhysicalHash OrElse Not rightEvidence.Digests.HasPhysicalHash Then _ + Return False + Return _ + String.Equals(leftEvidence.Digests.PhysicalSha256, rightEvidence.Digests.PhysicalSha256, + StringComparison.Ordinal) End Function - End Class - End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb deleted file mode 100644 index cb9ff57a..00000000 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb +++ /dev/null @@ -1,505 +0,0 @@ -' ============================================================================ -' FILE: EvidenceHashingCore.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Namespace Global.Tomtastisch.FileClassifier - ''' - ''' Interner, zustandsloser Kernservice für deterministische Evidence-Bildung. - ''' - ''' - ''' - ''' Die Komponente kapselt Normalisierung, Manifestbildung, Digest-Berechnung und optionale HMAC-Verarbeitung - ''' ohne Public-API-Verantwortung. - ''' - ''' - ''' Fehler werden fail-closed über Rückgabewerte und unveränderte Fehltexte in die aufrufende Fassade propagiert. - ''' - ''' - Friend NotInheritable Class EvidenceHashingCore - Private Sub New() - End Sub - - ''' - ''' Baut deterministische Evidence aus normalisierbaren Archiveinträgen. - ''' - ''' Herkunftskanal des Nachweises. - ''' Quelllabel für den Report. - ''' Ermittelter Dateitypkontext. - ''' Optionale komprimierte Originalbytes. - ''' Extrahierte Archiveinträge. - ''' Hash-Konfiguration. - ''' Optionale Basishinweise. - ''' Vollständiges Evidence-Objekt oder fail-closed Fehler-Evidence. - Friend Shared Function BuildEvidenceFromEntries _ - ( - sourceType As HashSourceType, - label As String, - detectedType As FileType, - compressedBytes As Byte(), - entries As IReadOnlyList(Of ZipExtractedEntry), - hashOptions As HashOptions, - notes As String - ) As HashEvidence - - Dim normalizedEntries As List(Of NormalizedEntry) = Nothing - Dim normalizeError As String = String.Empty - Dim logicalBytes As Byte() - Dim logicalSha As String - Dim fastLogical As String - Dim hmacLogical As String - Dim physicalSha As String - Dim fastPhysical As String - Dim hmacPhysical As String - Dim hasPhysical As Boolean - Dim secureNote As String - Dim hmacKey As Byte() - Dim hasHmacKey As Boolean - Dim firstEntry As ZipExtractedEntry = Nothing - Dim digestSet As HashDigestSet - Dim combinedNotes As String - Dim totalBytes As Long - Dim persistedCompressed As Byte() - Dim persistedLogical As Byte() - - If Not TryNormalizeEntries(entries, normalizedEntries, normalizeError) Then - Return HashEvidence.CreateFailure(sourceType, label, normalizeError) - End If - - logicalBytes = BuildLogicalManifestBytes(normalizedEntries) - logicalSha = ComputeSha256Hex(logicalBytes) - fastLogical = ComputeFastHash(logicalBytes, hashOptions) - hmacLogical = String.Empty - physicalSha = String.Empty - fastPhysical = String.Empty - hmacPhysical = String.Empty - hasPhysical = False - secureNote = String.Empty - hmacKey = Array.Empty(Of Byte)() - hasHmacKey = False - - If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then - hasHmacKey = TryResolveHmacKey(hmacKey, secureNote) - If hasHmacKey Then - hmacLogical = ComputeHmacSha256Hex(hmacKey, logicalBytes) - End If - End If - - If compressedBytes IsNot Nothing AndAlso compressedBytes.Length > 0 Then - physicalSha = ComputeSha256Hex(compressedBytes) - fastPhysical = ComputeFastHash(compressedBytes, hashOptions) - hasPhysical = True - If hasHmacKey Then - hmacPhysical = ComputeHmacSha256Hex(hmacKey, compressedBytes) - End If - End If - - If normalizedEntries.Count > 0 Then - firstEntry = New ZipExtractedEntry(normalizedEntries(0).RelativePath, normalizedEntries(0).Content) - End If - - digestSet = New HashDigestSet( - physicalSha256:=physicalSha, - logicalSha256:=logicalSha, - fastPhysicalXxHash3:=fastPhysical, - fastLogicalXxHash3:=fastLogical, - hmacPhysicalSha256:=hmacPhysical, - hmacLogicalSha256:=hmacLogical, - hasPhysicalHash:=hasPhysical, - hasLogicalHash:=True) - - combinedNotes = AppendNoteIfAny(notes, secureNote) - - totalBytes = 0 - For Each entry In normalizedEntries - totalBytes += CLng(entry.Content.LongLength) - Next - - persistedCompressed = If( - hashOptions.IncludePayloadCopies, - CopyBytes(compressedBytes), - Array.Empty(Of Byte)()) - - persistedLogical = If( - hashOptions.IncludePayloadCopies, - CopyBytes(logicalBytes), - Array.Empty(Of Byte)()) - - Return New HashEvidence( - sourceType:=sourceType, - label:=NormalizeLabel(label), - detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), - entry:=firstEntry, - compressedBytes:=persistedCompressed, - uncompressedBytes:=persistedLogical, - entryCount:=normalizedEntries.Count, - totalUncompressedBytes:=totalBytes, - digests:=digestSet, - notes:=combinedNotes) - End Function - - ''' - ''' Baut deterministische Evidence aus einem rohen Payload-Bytearray. - ''' - ''' Herkunftskanal des Nachweises. - ''' Quelllabel für den Report. - ''' Ermittelter Dateitypkontext. - ''' Rohpayload; Nothing wird als leeres Array behandelt. - ''' Hash-Konfiguration. - ''' Optionale Basishinweise. - ''' Evidence-Objekt mit physischen und logischen Digests. - Friend Shared Function BuildEvidenceFromRawPayload _ - ( - sourceType As HashSourceType, - label As String, - detectedType As FileType, - payload As Byte(), - hashOptions As HashOptions, - notes As String - ) As HashEvidence - - Dim safePayload As Byte() = If(payload, Array.Empty(Of Byte)()) - Dim physicalSha As String = ComputeSha256Hex(safePayload) - Dim logicalSha As String = physicalSha - Dim fastPhysical As String = ComputeFastHash(safePayload, hashOptions) - Dim fastLogical As String = fastPhysical - Dim hmacPhysical As String = String.Empty - Dim hmacLogical As String = String.Empty - Dim secureNote As String = String.Empty - Dim hmacKey As Byte() = Array.Empty(Of Byte)() - Dim persistedPayload As Byte() - Dim entry As ZipExtractedEntry - Dim digestSet As HashDigestSet - Dim combinedNotes As String - - If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then - If TryResolveHmacKey(hmacKey, secureNote) Then - hmacPhysical = ComputeHmacSha256Hex(hmacKey, safePayload) - hmacLogical = hmacPhysical - End If - End If - - persistedPayload = If( - hashOptions.IncludePayloadCopies, - CopyBytes(safePayload), - Array.Empty(Of Byte)()) - - entry = New ZipExtractedEntry(EvidenceHashing.DefaultPayloadLabelCore(), safePayload) - - digestSet = New HashDigestSet( - physicalSha256:=physicalSha, - logicalSha256:=logicalSha, - fastPhysicalXxHash3:=fastPhysical, - fastLogicalXxHash3:=fastLogical, - hmacPhysicalSha256:=hmacPhysical, - hmacLogicalSha256:=hmacLogical, - hasPhysicalHash:=True, - hasLogicalHash:=True) - - combinedNotes = AppendNoteIfAny(notes, secureNote) - - Return New HashEvidence( - sourceType:=sourceType, - label:=NormalizeLabel(label), - detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), - entry:=entry, - compressedBytes:=persistedPayload, - uncompressedBytes:=persistedPayload, - entryCount:=1, - totalUncompressedBytes:=safePayload.LongLength, - digests:=digestSet, - notes:=combinedNotes) - End Function - - ''' - ''' Normalisiert Entry-Liste deterministisch und validiert sie fail-closed. - ''' - ''' Zu normalisierende Archiveinträge. - ''' Ausgabe der sortierten, normalisierten Entries. - ''' Fehlermeldung bei Validierungsfehlern. - ''' True bei erfolgreicher Normalisierung, sonst False. - Friend Shared Function TryNormalizeEntries _ - ( - entries As IReadOnlyList(Of ZipExtractedEntry), - ByRef normalizedEntries As List(Of NormalizedEntry), - ByRef errorMessage As String - ) As Boolean - - Dim seen As HashSet(Of String) = New HashSet(Of String)(StringComparer.Ordinal) - Dim normalizedPath As String - Dim payload As Byte() - - normalizedEntries = New List(Of NormalizedEntry)() - errorMessage = String.Empty - - If entries Is Nothing Then - errorMessage = "Entries sind null." - Return False - End If - - For Each entry In entries - If entry Is Nothing Then - errorMessage = "Entry ist null." - Return False - End If - - normalizedPath = Nothing - If Not TryNormalizeEntryPath(entry.RelativePath, normalizedPath) Then - errorMessage = $"Ungültiger Entry-Pfad: '{entry.RelativePath}'." - Return False - End If - - If Not seen.Add(normalizedPath) Then - errorMessage = $"Doppelter Entry-Pfad nach Normalisierung: '{normalizedPath}'." - Return False - End If - - payload = If(entry.Content.IsDefaultOrEmpty, Array.Empty(Of Byte)(), entry.Content.ToArray()) - normalizedEntries.Add(New NormalizedEntry(normalizedPath, payload)) - Next - - normalizedEntries.Sort(Function(a, b) StringComparer.Ordinal.Compare(a.RelativePath, b.RelativePath)) - Return True - End Function - - ''' - ''' Normalisiert einen einzelnen Entry-Pfad nach der zentralen Archiv-Path-Policy. - ''' - ''' Unverarbeiteter Entry-Pfad. - ''' Normalisierter relativer Pfad. - ''' True bei gültigem Pfad, sonst False. - Friend Shared Function TryNormalizeEntryPath _ - ( - rawPath As String, - ByRef normalizedPath As String - ) As Boolean - - Dim isDirectory As Boolean = False - Return ArchiveEntryPathPolicy.TryNormalizeRelativePath( - rawPath, - allowDirectoryMarker:=False, - normalizedPath, - isDirectory) - End Function - - ''' - ''' Erzeugt das kanonische logische Manifest als Bytefolge. - ''' - ''' Normalisierte und sortierte Entries. - ''' Deterministische Manifestbytes als Hash-Basis. - Friend Shared Function BuildLogicalManifestBytes _ - ( - entries As IReadOnlyList(Of NormalizedEntry) - ) As Byte() - - Dim versionBytes As Byte() - Dim pathBytes As Byte() - Dim contentHash As Byte() - - Using ms As New IO.MemoryStream() - Using writer As New IO.BinaryWriter(ms, Text.Encoding.UTF8, leaveOpen:=True) - versionBytes = Text.Encoding.UTF8.GetBytes(EvidenceHashing.LogicalManifestVersionCore()) - writer.Write(versionBytes.Length) - writer.Write(versionBytes) - writer.Write(entries.Count) - - For Each entry In entries - pathBytes = Text.Encoding.UTF8.GetBytes(entry.RelativePath) - contentHash = HashPrimitives.Current.Sha256.ComputeHash(entry.Content) - writer.Write(pathBytes.Length) - writer.Write(pathBytes) - writer.Write(CLng(entry.Content.LongLength)) - writer.Write(contentHash.Length) - writer.Write(contentHash) - Next - End Using - - Return ms.ToArray() - End Using - End Function - - ''' - ''' Berechnet SHA-256 als hexadezimale Kleinbuchstabenrepräsentation. - ''' - ''' Eingabedaten; Nothing wird als leeres Array behandelt. - ''' Hex-String des SHA-256-Digests. - Friend Shared Function ComputeSha256Hex _ - ( - payload As Byte() - ) As String - - Dim data As Byte() = If(payload, Array.Empty(Of Byte)()) - Return HashPrimitives.Current.Sha256.ComputeHashHex(data) - End Function - - ''' - ''' Berechnet optional einen schnellen, nicht-kryptografischen Digest. - ''' - ''' Eingabedaten; Nothing wird als leeres Array behandelt. - ''' Hash-Optionen; ohne Opt-In wird leerer String geliefert. - ''' Fast-Hash als Hex-String oder leerer String. - Friend Shared Function ComputeFastHash _ - ( - payload As Byte(), - options As HashOptions - ) As String - - Dim data As Byte() - - If options Is Nothing OrElse Not options.IncludeFastHash Then Return String.Empty - data = If(payload, Array.Empty(Of Byte)()) - - Return HashPrimitives.Current.FastHash64.ComputeHashHex(data) - End Function - - ''' - ''' Berechnet einen HMAC-SHA256-Digest über die Payload. - ''' - ''' HMAC-Key; Nothing wird als leeres Array behandelt. - ''' Eingabedaten; Nothing wird als leeres Array behandelt. - ''' Hex-String des HMAC-SHA256-Digests. - Friend Shared Function ComputeHmacSha256Hex _ - ( - key As Byte(), - payload As Byte() - ) As String - - Dim safeKey As Byte() = If(key, Array.Empty(Of Byte)()) - Dim data As Byte() = If(payload, Array.Empty(Of Byte)()) - - Using hmac As New Security.Cryptography.HMACSHA256(safeKey) - Return HashPrimitives.Current.HexCodec.EncodeLowerHex(hmac.ComputeHash(data)) - End Using - End Function - - ''' - ''' Liest und validiert den HMAC-Key aus der definierten Environment-Variable. - ''' - ''' Ausgabe des dekodierten Keys. - ''' Ausgabe eines Hinweistextes bei fehlendem/ungültigem Key. - ''' True bei gültigem Key, sonst False. - Friend Shared Function TryResolveHmacKey _ - ( - ByRef key As Byte(), - ByRef note As String - ) As Boolean - - Dim b64 As String - - key = Array.Empty(Of Byte)() - note = String.Empty - - b64 = Environment.GetEnvironmentVariable(EvidenceHashing.HmacKeyEnvVarB64Core()) - If String.IsNullOrWhiteSpace(b64) Then - note = $"Secure hashing requested but env var '{EvidenceHashing.HmacKeyEnvVarB64Core()}' is missing; HMAC digests omitted." - Return False - End If - - Try - key = Convert.FromBase64String(b64.Trim()) - If key Is Nothing OrElse key.Length = 0 Then - key = Array.Empty(Of Byte)() - note = $"Secure hashing requested but env var '{EvidenceHashing.HmacKeyEnvVarB64Core()}' is empty; HMAC digests omitted." - Return False - End If - - Return True - Catch ex As Exception When _ - TypeOf ex Is FormatException OrElse - TypeOf ex Is ArgumentException - key = Array.Empty(Of Byte)() - note = $"Secure hashing requested but env var '{EvidenceHashing.HmacKeyEnvVarB64Core()}' is invalid Base64; HMAC digests omitted." - Return False - End Try - End Function - - ''' - ''' Fügt einen optionalen Hinweistext deterministisch an bestehende Notes an. - ''' - ''' Bereits vorhandene Notes. - ''' Optional anzuhängender Hinweis. - ''' Kombinierter, getrimmter Hinweistext. - Friend Shared Function AppendNoteIfAny _ - ( - baseNotes As String, - toAppend As String - ) As String - - Dim left As String = If(baseNotes, String.Empty).Trim() - Dim right As String = If(toAppend, String.Empty).Trim() - - If right.Length = 0 Then Return left - If left.Length = 0 Then Return right - Return left & " " & right - End Function - - ''' - ''' Normalisiert ein Label fail-closed auf einen stabilen Standardwert. - ''' - ''' Eingabelabel. - ''' Getrimmtes Label oder Standardlabel. - Friend Shared Function NormalizeLabel _ - ( - label As String - ) As String - - Dim normalized As String = If(label, String.Empty).Trim() - If normalized.Length = 0 Then Return EvidenceHashing.DefaultPayloadLabelCore() - Return normalized - End Function - - ''' - ''' Erstellt eine defensive Bytekopie ohne Seiteneffekte. - ''' - ''' Quellarray. - ''' Kopie der Bytes oder leeres Array. - Friend Shared Function CopyBytes _ - ( - data As Byte() - ) As Byte() - - Dim copy As Byte() - - If Not ByteArrayGuard.HasContent(data) Then Return Array.Empty(Of Byte)() - copy = New Byte(data.Length - 1) {} - Buffer.BlockCopy(data, 0, copy, 0, data.Length) - - Return copy - End Function - - ''' - ''' Normalisierte Entry-Repräsentation für kanonische Manifestbildung. - ''' - ''' - ''' Relative Pfade und Inhalte werden nach Guard-Prüfung unveränderlich für deterministische Sortierung gehalten. - ''' - Friend NotInheritable Class NormalizedEntry - ''' - ''' Normalisierter relativer Entry-Pfad. - ''' - Friend ReadOnly Property RelativePath As String - - ''' - ''' Normalisierte Entry-Inhaltsbytes. - ''' - Friend ReadOnly Property Content As Byte() - - ''' - ''' Erstellt eine unveränderliche NormalizedEntry-Instanz. - ''' - ''' Normalisierter relativer Pfad. - ''' Entry-Inhalt als defensive Nutzdatenrepräsentation. - Friend Sub New(relativePath As String, content As Byte()) - Me.RelativePath = If(relativePath, String.Empty) - Me.Content = If(content, Array.Empty(Of Byte)()) - End Sub - End Class - End Class -End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIO.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIO.vb deleted file mode 100644 index 85a866e1..00000000 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingIO.vb +++ /dev/null @@ -1,95 +0,0 @@ -' ============================================================================ -' FILE: EvidenceHashingIO.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Namespace Global.Tomtastisch.FileClassifier - ''' - ''' Interne I/O-Hilfsfunktionen für bounded Dateieinlesung im Hashing-Kontext. - ''' - ''' - ''' Die Komponente erzwingt MaxBytes-Limits fail-closed und liefert deterministische Fehltexte. - ''' - Friend NotInheritable Class EvidenceHashingIO - Private Sub New() - End Sub - - Friend Shared Function TryReadFileBounded _ - ( - path As String, - detectorOptions As FileTypeProjectOptions, - ByRef bytes As Byte(), - ByRef errorMessage As String - ) As Boolean - - Dim fi As IO.FileInfo - - bytes = Array.Empty(Of Byte)() - errorMessage = String.Empty - - If String.IsNullOrWhiteSpace(path) Then - errorMessage = "Pfad ist leer." - Return False - End If - - If detectorOptions Is Nothing Then - errorMessage = "Optionen fehlen." - Return False - End If - - Try - fi = New IO.FileInfo(path) - If Not fi.Exists Then - errorMessage = "Datei existiert nicht." - Return False - End If - - If fi.Length > detectorOptions.MaxBytes Then - errorMessage = "Datei größer als MaxBytes." - Return False - End If - - Using fs As New IO.FileStream( - path, - IO.FileMode.Open, - IO.FileAccess.Read, - IO.FileShare.Read, - InternalIoDefaults.FileStreamBufferSize, - IO.FileOptions.SequentialScan) - - Using ms As New IO.MemoryStream(CInt(Math.Min(Math.Max(fi.Length, 0), Integer.MaxValue))) - StreamBounds.CopyBounded(fs, ms, detectorOptions.MaxBytes) - bytes = ms.ToArray() - End Using - End Using - - Return True - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is IO.IOException OrElse - TypeOf ex Is IO.InvalidDataException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - Return SetReadFileError(ex, errorMessage) - End Try - End Function - - Friend Shared Function SetReadFileError _ - ( - ex As Exception, - ByRef errorMessage As String - ) As Boolean - - errorMessage = $"Datei konnte nicht gelesen werden: {ex.Message}" - Return False - End Function - End Class -End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb deleted file mode 100644 index 3dc53a35..00000000 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb +++ /dev/null @@ -1,179 +0,0 @@ -' ============================================================================ -' FILE: EvidenceHashingRoundTrip.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Namespace Global.Tomtastisch.FileClassifier - ''' - ''' Interne RoundTrip-Pipeline für den deterministischen h1-h4-Hashbericht. - ''' - ''' - ''' - ''' Der Service erstellt temporäre Ziele für Materialisierung, nutzt die öffentliche Fassade für h1/h2/h4, - ''' berechnet h3 über kanonische Logical-Bytes und bereinigt temporäre Verzeichnisse best-effort. - ''' - ''' - ''' Catch-Filter und Fehltexte bleiben unverändert fail-closed. - ''' - ''' - Friend NotInheritable Class EvidenceHashingRoundTrip - Private Sub New() - End Sub - - Friend Shared Function VerifyRoundTrip _ - ( - path As String, - options As HashOptions - ) As HashRoundTripReport - - Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() - Dim normalizedOptions As HashOptions = EvidenceHashing.ResolveHashOptionsCore(detectorOptions, options) - Dim failed As HashEvidence - Dim h1 As HashEvidence - Dim originalBytes As Byte() = Array.Empty(Of Byte)() - Dim readError As String = String.Empty - Dim archiveEntries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() - Dim isArchiveInput As Boolean - Dim h2 As HashEvidence - Dim canonicalBytes As Byte() - Dim normalizedEntries As List(Of EvidenceHashingCore.NormalizedEntry) - Dim normalizeError As String - Dim h3 As HashEvidence - Dim h4 As HashEvidence = HashEvidence.CreateFailure( - HashSourceType.MaterializedFile, - "roundtrip-h4-file", - "Materialization failed.") - - Dim roundTripTempRoot As String = IO.Path.Combine( - IO.Path.GetTempPath(), - "ftd-roundtrip-" & Guid.NewGuid().ToString("N", Globalization.CultureInfo.InvariantCulture)) - - Dim targetFile As String - Dim notes As String - - If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then - failed = HashEvidence.CreateFailure(HashSourceType.FilePath, path, "Datei nicht gefunden.") - Return CreateFailureReport( - path, - "Input file missing.", - failed, - failed, - failed, - failed) - End If - - h1 = EvidenceHashing.HashFile(path, normalizedOptions) - If Not h1.Digests.HasLogicalHash Then - failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, "h1 konnte nicht berechnet werden.") - Return CreateFailureReport( - path, - "h1 missing logical digest.", - h1, - failed, - failed, - failed) - End If - - If Not EvidenceHashingIO.TryReadFileBounded(path, detectorOptions, originalBytes, readError) Then - failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, readError) - Return CreateFailureReport( - path, - readError, - h1, - failed, - failed, - failed) - End If - - isArchiveInput = ArchiveEntryCollector.TryCollectFromFile(path, detectorOptions, archiveEntries) - - If isArchiveInput Then - h2 = EvidenceHashing.HashEntries(archiveEntries, "roundtrip-h2-entries", normalizedOptions) - normalizedEntries = Nothing - normalizeError = String.Empty - If EvidenceHashingCore.TryNormalizeEntries(archiveEntries, normalizedEntries, normalizeError) Then - canonicalBytes = EvidenceHashingCore.BuildLogicalManifestBytes(normalizedEntries) - Else - canonicalBytes = Array.Empty(Of Byte)() - End If - Else - h2 = EvidenceHashing.HashBytes(originalBytes, "roundtrip-h2-bytes", normalizedOptions) - canonicalBytes = EvidenceHashingCore.CopyBytes(originalBytes) - End If - - h3 = EvidenceHashingCore.BuildEvidenceFromRawPayload( - sourceType:=HashSourceType.RawBytes, - label:="roundtrip-h3-logical-bytes", - detectedType:=FileTypeRegistry.Resolve(FileKind.Unknown), - payload:=canonicalBytes, - hashOptions:=normalizedOptions, - notes:="Canonical logical bytes hashed directly.") - - Try - IO.Directory.CreateDirectory(roundTripTempRoot) - targetFile = IO.Path.Combine( - roundTripTempRoot, - EvidenceHashingCore.NormalizeLabel(normalizedOptions.MaterializedFileName)) - - If FileMaterializer.Persist(canonicalBytes, targetFile, overwrite:=False, secureExtract:=False) Then - h4 = EvidenceHashing.HashFile(targetFile, normalizedOptions) - End If - Finally - Try - If IO.Directory.Exists(roundTripTempRoot) Then - IO.Directory.Delete(roundTripTempRoot, recursive:=True) - End If - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is IO.IOException OrElse - TypeOf ex Is IO.PathTooLongException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - LogGuard.Debug(detectorOptions.Logger, $"[HashRoundTrip] Cleanup-Fehler: {ex.Message}") - End Try - End Try - - notes = If( - isArchiveInput, - "Archive roundtrip (h1-h4) executed.", - "Raw file roundtrip (h1-h4) executed.") - - Return New HashRoundTripReport( - path, - isArchiveInput, - notes, - h1, - h2, - h3, - h4) - End Function - - Private Shared Function CreateFailureReport _ - ( - path As String, - notes As String, - h1 As HashEvidence, - h2 As HashEvidence, - h3 As HashEvidence, - h4 As HashEvidence - ) As HashRoundTripReport - - Return New HashRoundTripReport( - path, - isArchiveInput:=False, - notes:=notes, - h1, - h2, - h3, - h4) - End Function - End Class -End Namespace diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md b/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md deleted file mode 100644 index eea3a8cc..00000000 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Abstractions Hashing Internal Modul - -## 1. Zweck -Dieses Verzeichnis enthaelt interne, zustandslose Hashing-Bausteine hinter der oeffentlichen Fassade `EvidenceHashing`. - -## 2. Inhalt -- `EvidenceHashingCore.vb` -- `EvidenceHashingRoundTrip.vb` -- `EvidenceHashingIO.vb` - -## 3. API und Verhalten -- Keine Public API in diesem Verzeichnis. -- Fail-closed Fehlerpfade und deterministische Digest-Bildung werden zentral gekapselt. -- Die RoundTrip-Pipeline materialisiert temporaere Dateien und bereinigt best-effort. - -## 4. Verifikation -- Nutzung wird ueber `EvidenceHashing` sowie Unit-/Integrationstests in `tests/FileTypeDetectionLib.Tests` verifiziert. - -## 5. Diagramm -N/A - -## 6. Verweise -- [Hashing-Abstractions](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Abstractions/Hashing/README.md) -- [Hashing-Contract](https://github.com/tomtastisch/FileClassifier/blob/main/docs/contracts/001_CONTRACT_HASHING.MD) diff --git a/src/FileTypeDetection/ArchiveProcessing.vb b/src/FileTypeDetection/ArchiveProcessing.vb index 3103b2a8..d15fd91e 100644 --- a/src/FileTypeDetection/ArchiveProcessing.vb +++ b/src/FileTypeDetection/ArchiveProcessing.vb @@ -44,7 +44,6 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As Boolean - Return FileTypeDetector.TryValidateArchive(path) End Function @@ -60,7 +59,6 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As Boolean - Dim opt = FileTypeOptions.GetSnapshot() Return ArchivePayloadGuard.IsSafeArchivePayload(data, opt) End Function @@ -81,8 +79,8 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String, verifyBeforeExtract As Boolean - ) As IReadOnlyList(Of ZipExtractedEntry) - + ) _ + As IReadOnlyList(Of ZipExtractedEntry) Return New FileTypeDetector().ExtractArchiveSafeToMemory(path, verifyBeforeExtract) End Function @@ -99,12 +97,11 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As IReadOnlyList(Of ZipExtractedEntry) - Dim opt As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() - If Not ByteArrayGuard.HasContent(data) Then Return emptyResult + If data Is Nothing OrElse data.Length = 0 Then Return emptyResult If Not ArchiveEntryCollector.TryCollectFromBytes(data, opt, entries) Then Return emptyResult Return entries diff --git a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb deleted file mode 100644 index 85c76c06..00000000 --- a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb +++ /dev/null @@ -1,248 +0,0 @@ -' ============================================================================ -' FILE: FileTypeRegistryConfig.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' -' SSOT CONFIG (verbindlich) -' - AliasGroups: zentrale “Wildcard”-Semantik (FileKind.* steht für viele Aliaswerte) -' - AliasOverrides: Kind -> AliasGroup -' - ExtensionOverrides: Canonical-Extension Overrides -' - MagicPatternCatalog: zentrale Magic-Signaturen -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System.Collections.Immutable - -Namespace Global.Tomtastisch.FileClassifier - ''' - ''' Zentrale Konfiguration (SSOT) für . - ''' Definiert: - ''' - Canonical-Extension Overrides - ''' - Aliasgruppen (Wildcard-Semantik) - ''' - Mapping -> Aliasgruppe - ''' - Magic-Pattern-Katalog - ''' - ''' - ''' Diese Konfiguration enthält ausschließlich statische Daten und deterministische Builder. - ''' Normalisierung, Deduplikation und Sortierung erfolgen in . - ''' - Friend Module FileTypeRegistryConfig - - ''' - ''' Canonical-Extension Overrides (SSOT). - ''' Wird genutzt, wenn die Canonical-Extension nicht aus dem Enum-Namen abgeleitet werden soll. - ''' - Friend ReadOnly ExtensionOverrides _ - As ImmutableDictionary(Of FileKind, String) = _ - BuildExtensionOverrides() - - ''' - ''' Aliasgruppen (SSOT) zur Abbildung der Wildcard-Semantik. - ''' Gruppen fassen gleichartige Aliaswerte zusammen (z.B. Archive, Office-Container). - ''' - Friend ReadOnly AliasGroups _ - As ImmutableDictionary(Of String, ImmutableArray(Of String)) = _ - BuildAliasGroups() - - ''' - ''' Mapping -> Aliasgruppe (SSOT). - ''' Die Werte ergänzen die automatisch abgeleiteten Aliases (Enumname + Canonical-Extension). - ''' - Friend ReadOnly AliasOverrides _ - As ImmutableDictionary(Of FileKind, ImmutableArray(Of String)) = _ - BuildAliasOverrides() - - ''' - ''' Magic-Pattern-Katalog (SSOT) für direkte Header-Erkennung. - ''' Enthält pro eine Liste von Patterns; ein Pattern besteht aus Segmenten. - ''' - Friend ReadOnly MagicPatternCatalog _ - As ImmutableDictionary(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern)) = - BuildMagicPatternCatalog() - - ''' - ''' Erstellt ein unveränderliches Aliasarray aus variablen Stringwerten. - ''' - ''' Aliaswerte in Rohform. - ''' Unveränderliches Array der Aliaswerte. - Private Function A _ - ( - ParamArray values As String() - ) As ImmutableArray(Of String) - - Return ImmutableArray.Create(values) - End Function - - ''' - ''' Liefert die Aliasgruppe für einen Gruppennamen. - ''' Fail-closed: Unbekannte Gruppen liefern ein leeres Array. - ''' - ''' Gruppenname (case-insensitive). - ''' Aliasgruppe oder leeres Array. - Private Function GetGroup _ - ( - name As String _ - ) As ImmutableArray(Of String) - - Dim values As ImmutableArray(Of String) = ImmutableArray(Of String).Empty - If AliasGroups.TryGetValue(name, values) Then Return values - Return ImmutableArray(Of String).Empty - End Function - - ''' - ''' Baut die Canonical-Extension Overrides deterministisch. - ''' - ''' Unveränderliches Dictionary Kind->Extension. - Private Function BuildExtensionOverrides() As ImmutableDictionary(Of FileKind, String) - Dim extensionBuilder = ImmutableDictionary.CreateBuilder(Of FileKind, String)() - - extensionBuilder(FileKind.Jpeg) = ".jpg" - - Return extensionBuilder.ToImmutable() - End Function - - ''' - ''' Baut die Aliasgruppen deterministisch. - ''' Gruppen sind fachliche Wildcards und werden in referenziert. - ''' - ''' Unveränderliches Dictionary Gruppenname->Aliasliste. - Private Function BuildAliasGroups() As ImmutableDictionary(Of String, ImmutableArray(Of String)) - Dim aliasGruppenBuilder = ImmutableDictionary.CreateBuilder _ - ( - Of String, - ImmutableArray(Of String) - ) _ - ( - StringComparer.OrdinalIgnoreCase - ) - - ' Wildcard-Semantik (Gruppen): - ' - ARCHIVE: alle Archive/Container, die über FileKind.Zip normalisiert werden. - ' - DOC/XLS/PPT: Office-/OpenDocument-Container (Doc/Xls/Ppt), deren Content/Container-Detection separat läuft. - - aliasGruppenBuilder("JPEG") = A("jpe") - - aliasGruppenBuilder("ARCHIVE") = A( - "tar", "tgz", "gz", "gzip", - "bz2", "bzip2", - "xz", - "7z", "zz", "rar") - - aliasGruppenBuilder("DOC") = A( - "doc", "docx", "docm", "docb", - "dot", "dotm", "dotx", - "odt", "ott") - - aliasGruppenBuilder("XLS") = A( - "xls", "xlsx", "xlsm", "xlsb", - "xlt", "xltm", "xltx", "xltb", - "xlam", "xla", - "ods", "ots") - - aliasGruppenBuilder("PPT") = A( - "ppt", "pptx", "pptm", - "pot", "potm", "potx", - "pps", "ppsm", "ppsx", - "odp", "otp") - - Return aliasGruppenBuilder.ToImmutable() - End Function - - ''' - ''' Baut das Mapping -> Aliasgruppe deterministisch. - ''' - ''' Unveränderliches Dictionary Kind->Aliasliste. - Private Function BuildAliasOverrides() As ImmutableDictionary(Of FileKind, ImmutableArray(Of String)) - Dim aliasMappingBuilder = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of String))() - - aliasMappingBuilder(FileKind.Jpeg) = GetGroup("JPEG") - aliasMappingBuilder(FileKind.Zip) = GetGroup("ARCHIVE") - aliasMappingBuilder(FileKind.Doc) = GetGroup("DOC") - aliasMappingBuilder(FileKind.Xls) = GetGroup("XLS") - aliasMappingBuilder(FileKind.Ppt) = GetGroup("PPT") - - Return aliasMappingBuilder.ToImmutable() - End Function - - ''' - ''' Erstellt ein aus Segmenten. - ''' - ''' Segmente, die gemeinsam matchen müssen. - ''' Magic-Pattern. - Private Function Pattern _ - ( - ParamArray segments As FileTypeRegistry.MagicSegment() - ) As FileTypeRegistry.MagicPattern - - Return New FileTypeRegistry.MagicPattern(ImmutableArray.Create(segments)) - End Function - - ''' - ''' Erstellt ein , das eine Bytefolge ab einem festen Offset erwartet. - ''' - ''' Startoffset im Header. - ''' Erwartete Bytefolge. - ''' Magic-Segment. - Private Function Prefix _ - ( - offset As Integer, - ParamArray bytesValue As Byte() - ) As FileTypeRegistry.MagicSegment - - Return New FileTypeRegistry.MagicSegment(offset, ImmutableArray.Create(bytesValue)) - End Function - - ''' - ''' Baut den Magic-Pattern-Katalog deterministisch. - ''' Einträge sind ausschließlich direkte Header-Signaturen (kein Container-Parsing). - ''' - ''' Unveränderliches Dictionary Kind->Magic-Patterns. - Private Function BuildMagicPatternCatalog _ - () As ImmutableDictionary(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern)) - - Dim magicPatternBuilder = ImmutableDictionary.CreateBuilder _ - ( - Of FileKind, - ImmutableArray(Of FileTypeRegistry.MagicPattern) - )() - - magicPatternBuilder(FileKind.Pdf) = ImmutableArray.Create( - Pattern(Prefix(0, &H25, &H50, &H44, &H46, &H2D)) - ) - - magicPatternBuilder(FileKind.Png) = ImmutableArray.Create( - Pattern(Prefix(0, &H89, &H50, &H4E, &H47, &HD, &HA, &H1A, &HA)) - ) - - magicPatternBuilder(FileKind.Jpeg) = ImmutableArray.Create( - Pattern(Prefix(0, &HFF, &HD8, &HFF)) - ) - - magicPatternBuilder(FileKind.Gif) = ImmutableArray.Create( - Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H37, &H61)), - Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H39, &H61)) - ) - - magicPatternBuilder(FileKind.Webp) = ImmutableArray.Create( - Pattern( - Prefix(0, &H52, &H49, &H46, &H46), - Prefix(8, &H57, &H45, &H42, &H50) - ) - ) - - magicPatternBuilder(FileKind.Zip) = ImmutableArray.Create( - Pattern(Prefix(0, &H50, &H4B, &H3, &H4)), - Pattern(Prefix(0, &H50, &H4B, &H5, &H6)), - Pattern(Prefix(0, &H50, &H4B, &H7, &H8)) - ) - - Return magicPatternBuilder.ToImmutable() - End Function - - End Module -End Namespace diff --git a/src/FileTypeDetection/Detection/FileTypeRegistry.vb b/src/FileTypeDetection/Detection/FileTypeRegistry.vb index 41690935..cc606759 100644 --- a/src/FileTypeDetection/Detection/FileTypeRegistry.vb +++ b/src/FileTypeDetection/Detection/FileTypeRegistry.vb @@ -25,58 +25,74 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - ''' - ''' SSOT: Ordnet jedem den zugehörigen zu. - ''' Der Eintrag ist immer vorhanden (fail-closed). - ''' Friend Shared ReadOnly TypesByKind As ImmutableDictionary(Of FileKind, FileType) - - ''' - ''' Alias-Index: Ordnet normalisierte Aliaswerte (z.B. Endungen ohne Punkt) einem zu. - ''' Die Normalisierung erfolgt über . - ''' Friend Shared ReadOnly KindByAlias As ImmutableDictionary(Of String, FileKind) - - ''' - ''' Canonical-Extension Overrides (SSOT). Wird für einzelne Typen genutzt, - ''' wenn der Enumname nicht der gewünschten Canonical-Extension entspricht. - ''' Private Shared ReadOnly ExtensionOverrides As ImmutableDictionary(Of FileKind, String) = - FileTypeRegistryConfig.ExtensionOverrides + ImmutableDictionary.CreateRange(Of FileKind, String)( + {New KeyValuePair(Of FileKind, String)(FileKind.Jpeg, ".jpg")}) - ''' - ''' Zusätzliche Aliaswerte pro (SSOT). Diese Werte ergänzen die automatisch - ''' abgeleiteten Aliases (Enumname + Canonical-Extension) und werden deterministisch normalisiert. - ''' Private Shared ReadOnly AliasOverrides As ImmutableDictionary(Of FileKind, ImmutableArray(Of String)) = - FileTypeRegistryConfig.AliasOverrides - - ''' - ''' Cache der deterministisch sortierten Enumwerte (). - ''' Vermeidet wiederholte Reflection/Sortierung in Hotpaths. - ''' - Private Shared ReadOnly OrderedKindsCache As ImmutableArray(Of FileKind) = BuildOrderedKinds() + ImmutableDictionary.CreateRange(Of FileKind, ImmutableArray(Of String))( + { _ + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Jpeg, + ImmutableArray. + Create("jpe")), + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Zip, + ImmutableArray. + Create("tar", + "tgz", + "gz", + "gzip", + "bz2", + "bzip2", + "xz", + "7z", + "zz", + "rar")), + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Docx, + ImmutableArray. + Create("doc", + "docm", + "docb", + "dot", + "dotm", + "dotx", + "odt", + "ott")), + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Xlsx, + ImmutableArray. + Create("xls", + "xlsm", + "xlsb", + "xlt", + "xltm", + "xltx", + "xltb", + "xlam", + "xla", + "ods", + "ots")), + New KeyValuePair(Of FileKind, ImmutableArray(Of String))(FileKind.Pptx, + ImmutableArray. + Create("ppt", + "pptm", + "pot", + "potm", + "potx", + "pps", + "ppsm", + "ppsx", + "odp", + "otp")) + }) - ''' - ''' Katalog von Magic-Patterns pro . - ''' Die Datenquelle ist die zentrale Konfiguration FileTypeRegistryConfig. - ''' Private Shared ReadOnly _ MagicPatternCatalog As ImmutableDictionary(Of FileKind, ImmutableArray(Of MagicPattern)) = - FileTypeRegistryConfig.MagicPatternCatalog + BuildMagicPatternCatalog() - ''' - ''' Aus abgeleitete Regeln für die Magic-Erkennung. - ''' Enthält ausschließlich Einträge mit mindestens einem Magic-Pattern. - ''' Private Shared ReadOnly MagicRules As ImmutableArray(Of MagicRule) - - ''' - ''' Initialisiert die Registry deterministisch aus und den zentralen Overrides. - ''' Reihenfolge: Definitionen bauen, Typen ableiten, Aliasindex erzeugen, Magic-Regeln ableiten. - ''' Shared Sub New() Dim definitions = BuildDefinitionsFromEnum() TypesByKind = BuildTypes(definitions) @@ -84,18 +100,13 @@ Namespace Global.Tomtastisch.FileClassifier MagicRules = BuildMagicRules(definitions) End Sub - ''' - ''' Erzeugt die vollständige Menge an aus der Enumquelle. - ''' wird bewusst ausgeschlossen, da Unknown als separater fail-closed Typ geführt wird. - ''' - ''' Unveränderliche Liste aller Definitionsobjekte (ohne Unknown). Private Shared Function BuildDefinitionsFromEnum() As ImmutableArray(Of FileTypeDefinition) Dim b = ImmutableArray.CreateBuilder(Of FileTypeDefinition)() Dim canonicalExtension As String Dim aliases As String() Dim magicPatterns As ImmutableArray(Of MagicPattern) - For Each kind In OrderedKindsCache + For Each kind In OrderedKinds() If kind = FileKind.Unknown Then Continue For canonicalExtension = GetCanonicalExtension(kind) @@ -108,31 +119,13 @@ Namespace Global.Tomtastisch.FileClassifier Return b.ToImmutable() End Function - ''' - ''' Liefert die deterministisch sortierten Enumwerte () aus dem Cache. - ''' - ''' Sortierte Liste aller Enumwerte. Private Shared Function OrderedKinds() As ImmutableArray(Of FileKind) - Return OrderedKindsCache - End Function - - ''' - ''' Baut den Cache der sortierten Enumwerte () einmalig über Reflection. - ''' - ''' Sortierte Liste aller Enumwerte. - Private Shared Function BuildOrderedKinds() As ImmutableArray(Of FileKind) Dim values = [Enum].GetValues(GetType(FileKind)).Cast(Of FileKind)() Return values. OrderBy(Function(kind) CInt(kind)). ToImmutableArray() End Function - ''' - ''' Bestimmt die Canonical-Extension für einen Typ. - ''' Priorität: Override > Enumname (normalisiert) als "." + alias. - ''' - ''' Enumwert des Typs. - ''' Canonical-Extension inklusive führendem Punkt. Private Shared Function GetCanonicalExtension(kind As FileKind) As String Dim overrideExt As String = Nothing If ExtensionOverrides.TryGetValue(kind, overrideExt) Then @@ -142,14 +135,6 @@ Namespace Global.Tomtastisch.FileClassifier Return "." & NormalizeAlias(kind.ToString()) End Function - ''' - ''' Baut die vollständige Aliasliste für einen Typ. - ''' Enthält Canonical-Extension, Enumalias sowie zusätzliche Overrides. - ''' Ergebnis ist deterministisch sortiert und ohne Duplikate. - ''' - ''' Enumwert des Typs. - ''' Canonical-Extension inklusive führendem Punkt. - ''' Sortierte Aliasliste (ohne führende Punkte, kleingeschrieben). Private Shared Function BuildAliases(kind As FileKind, canonicalExtension As String) As String() Dim aliases As New HashSet(Of String)(StringComparer.OrdinalIgnoreCase) Dim extAlias As String @@ -164,10 +149,11 @@ Namespace Global.Tomtastisch.FileClassifier If enumAlias.Length > 0 Then aliases.Add(enumAlias) If AliasOverrides.TryGetValue(kind, additional) Then - For Each rawAlias In additional - Dim normalized = NormalizeAlias(rawAlias) - If normalized.Length > 0 Then aliases.Add(normalized) - Next + additional. + Select(Function(item) NormalizeAlias(item)). + Where(Function(normalized) normalized.Length > 0). + ToList(). + ForEach(Sub(normalized) aliases.Add(normalized)) End If orderedAliases = aliases.ToList() @@ -175,11 +161,6 @@ Namespace Global.Tomtastisch.FileClassifier Return orderedAliases.ToArray() End Function - ''' - ''' Liefert die Magic-Patterns für einen Typ aus dem Katalog. - ''' - ''' Enumwert des Typs. - ''' Magic-Patterns oder ein leeres Array. Private Shared Function GetMagicPatterns(kind As FileKind) As ImmutableArray(Of MagicPattern) Dim patterns As ImmutableArray(Of MagicPattern) = ImmutableArray(Of MagicPattern).Empty If MagicPatternCatalog.TryGetValue(kind, patterns) Then @@ -189,62 +170,34 @@ Namespace Global.Tomtastisch.FileClassifier Return ImmutableArray(Of MagicPattern).Empty End Function - ''' - ''' Erzeugt die Typ-Registry () aus den Definitionsobjekten. - ''' Unknown wird als eigener, fail-closed Eintrag hinzugefügt. - ''' - ''' Definitionsobjekte (ohne Unknown). - ''' Unveränderliches Dictionary mit Einträgen für alle Typen inklusive Unknown. Private Shared Function BuildTypes(definitions As ImmutableArray(Of FileTypeDefinition)) _ As ImmutableDictionary(Of FileKind, FileType) Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, FileType)() - b(FileKind.Unknown) = CreateUnknownType() + b(FileKind.Unknown) = New FileType(FileKind.Unknown, Nothing, Nothing, False, + ImmutableArray(Of String).Empty) For Each d In definitions - b(d.Kind) = New FileType( - d.Kind, - d.CanonicalExtension, - MimeProvider.GetMime(d.CanonicalExtension), - True, - d.Aliases - ) + b(d.Kind) = New FileType(d.Kind, d.CanonicalExtension, MimeProvider.GetMime(d.CanonicalExtension), True, + d.Aliases) Next Return b.ToImmutable() End Function - ''' - ''' Erzeugt den fail-closed für . - ''' - ''' Unknown-Typ ohne Extension und ohne MIME. - Private Shared Function CreateUnknownType() As FileType - Return New FileType(FileKind.Unknown, - Nothing, - Nothing, - False, - ImmutableArray(Of String).Empty) - End Function - - - ''' - ''' Bestimmt den Typ anhand von Magic-Patterns in einem Header-Bytearray. - ''' Die Auswertung erfolgt deterministisch in Regelreihenfolge; erster Treffer gewinnt. - ''' - ''' Dateiheader (mindestens so lang wie die benötigten Segmente). - ''' Erkannter oder . Friend Shared Function DetectByMagic(header As Byte()) As FileKind Dim rule As MagicRule Dim patterns As ImmutableArray(Of MagicPattern) + Dim segments As ImmutableArray(Of MagicSegment) If header Is Nothing OrElse header.Length = 0 Then Return FileKind.Unknown For i = 0 To MagicRules.Length - 1 rule = MagicRules(i) patterns = rule.Patterns - For j = 0 To patterns.Length - 1 - If MatchesPattern(header, patterns(j)) Then + segments = patterns(j).Segments + If segments.All(Function(segment) HasSegment(header, segment)) Then Return rule.Kind End If Next @@ -253,29 +206,6 @@ Namespace Global.Tomtastisch.FileClassifier Return FileKind.Unknown End Function - ''' - ''' Prüft, ob ein Magic-Pattern vollständig gegen den Header matcht. - ''' - ''' Headerdaten. - ''' Pattern mit Segmenten. - ''' True, wenn alle Segmente matchen. - Private Shared Function MatchesPattern(header As Byte(), pattern As MagicPattern) As Boolean - Dim segments As ImmutableArray(Of MagicSegment) = pattern.Segments - - If segments.IsDefaultOrEmpty Then Return False - - For i = 0 To segments.Length - 1 - If Not HasSegment(header, segments(i)) Then Return False - Next - - Return True - End Function - - ''' - ''' Prüft, ob für einen Typ mindestens ein Magic-Pattern für direkte Header-Erkennung hinterlegt ist. - ''' - ''' Enumwert des Typs. - ''' True bei vorhandenem Patternkatalogeintrag. Friend Shared Function HasDirectHeaderDetection(kind As FileKind) As Boolean Dim patterns As ImmutableArray(Of MagicPattern) = ImmutableArray(Of MagicPattern).Empty @@ -283,33 +213,16 @@ Namespace Global.Tomtastisch.FileClassifier Return MagicPatternCatalog.TryGetValue(kind, patterns) AndAlso Not patterns.IsDefaultOrEmpty End Function - ''' - ''' Prüft, ob ein Typ zusätzlich über strukturierte Container-Erkennung klassifiziert wird. - ''' Diese Klassifikation ist unabhängig von direkten Header-Signaturen. - ''' - ''' Enumwert des Typs. - ''' True, wenn strukturierte Container-Erkennung aktiv ist. Friend Shared Function HasStructuredContainerDetection(kind As FileKind) As Boolean - Return kind = FileKind.Doc OrElse - kind = FileKind.Xls OrElse - kind = FileKind.Ppt + Return kind = FileKind.Docx OrElse + kind = FileKind.Xlsx OrElse + kind = FileKind.Pptx End Function - ''' - ''' Prüft, ob der Typ eine direkte Inhalts-/Header-Erkennung besitzt - ''' (Magic-Header oder strukturierte Container-Erkennung). - ''' - ''' Enumwert des Typs. - ''' True, wenn Content-Detection verfügbar ist. Friend Shared Function HasDirectContentDetection(kind As FileKind) As Boolean Return HasDirectHeaderDetection(kind) OrElse HasStructuredContainerDetection(kind) End Function - ''' - ''' Liefert alle Typen, die keine direkte Content-Detection besitzen. - ''' Unknown ist ausgeschlossen. - ''' - ''' Liste der Typen ohne direkte Content-Detection. Friend Shared Function KindsWithoutDirectContentDetection() As ImmutableArray(Of FileKind) Return OrderedKinds(). Where(Function(kind) kind <> FileKind.Unknown). @@ -317,12 +230,6 @@ Namespace Global.Tomtastisch.FileClassifier ToImmutableArray() End Function - ''' - ''' Baut die Magic-Regeln aus den Definitionsobjekten. - ''' Es werden ausschließlich Definitionsobjekte mit mindestens einem Magic-Pattern berücksichtigt. - ''' - ''' Definitionsobjekte (ohne Unknown). - ''' Unveränderliche Liste der Magic-Regeln. Private Shared Function BuildMagicRules(definitions As ImmutableArray(Of FileTypeDefinition)) _ As ImmutableArray(Of MagicRule) Return definitions. @@ -331,13 +238,42 @@ Namespace Global.Tomtastisch.FileClassifier ToImmutableArray() End Function - ''' - ''' Prüft, ob ein einzelnes Segment am angegebenen Offset innerhalb der Daten exakt matcht. - ''' Fail-closed: Bei ungültigen Parametern oder zu kurzen Daten wird False geliefert. - ''' - ''' Headerdaten. - ''' Segmentdefinition. - ''' True bei exaktem Match. + Private Shared Function BuildMagicPatternCatalog() _ + As ImmutableDictionary(Of FileKind, ImmutableArray(Of MagicPattern)) + Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of MagicPattern))() + + b(FileKind.Pdf) = ImmutableArray.Create( + Pattern(Prefix(0, &H25, &H50, &H44, &H46, &H2D))) + + b(FileKind.Png) = ImmutableArray.Create( + Pattern(Prefix(0, &H89, &H50, &H4E, &H47, &HD, &HA, &H1A, &HA))) + + b(FileKind.Jpeg) = ImmutableArray.Create( + Pattern(Prefix(0, &HFF, &HD8, &HFF))) + + b(FileKind.Gif) = ImmutableArray.Create( + Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H37, &H61)), + Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H39, &H61))) + + b(FileKind.Webp) = ImmutableArray.Create( + Pattern(Prefix(0, &H52, &H49, &H46, &H46), Prefix(8, &H57, &H45, &H42, &H50))) + + b(FileKind.Zip) = ImmutableArray.Create( + Pattern(Prefix(0, &H50, &H4B, &H3, &H4)), + Pattern(Prefix(0, &H50, &H4B, &H5, &H6)), + Pattern(Prefix(0, &H50, &H4B, &H7, &H8))) + + Return b.ToImmutable() + End Function + + Private Shared Function Pattern(ParamArray segments As MagicSegment()) As MagicPattern + Return New MagicPattern(ImmutableArray.Create(segments)) + End Function + + Private Shared Function Prefix(offset As Integer, ParamArray bytesValue As Byte()) As MagicSegment + Return New MagicSegment(offset, ImmutableArray.Create(bytesValue)) + End Function + Private Shared Function HasSegment(data As Byte(), segment As MagicSegment) As Boolean Dim endPos As Integer @@ -354,50 +290,32 @@ Namespace Global.Tomtastisch.FileClassifier Return True End Function - ''' - ''' Erzeugt den Aliasindex () aus der Typ-Registry. - ''' Aliases werden normalisiert; spätere Einträge überschreiben frühere deterministisch. - ''' - ''' Typ-Registry. - ''' Unveränderliches Dictionary Alias->Kind. Private Shared Function BuildAliasMap(types As ImmutableDictionary(Of FileKind, FileType)) _ As ImmutableDictionary(Of String, FileKind) - Dim builder As ImmutableDictionary(Of String, FileKind).Builder - Dim kind As FileKind - Dim t As FileType = Nothing + Dim entries As List(Of Tuple(Of FileKind, String)) If types Is Nothing Then Return ImmutableDictionary(Of String, FileKind).Empty - builder = ImmutableDictionary.CreateBuilder(Of String, FileKind)(StringComparer.OrdinalIgnoreCase) - - For Each kind In OrderedKindsCache - If Not types.TryGetValue(kind, t) Then Continue For - If t Is Nothing Then Continue For - If t.Aliases.IsDefaultOrEmpty Then Continue For - - For i = 0 To t.Aliases.Length - 1 - Dim aliasKey = NormalizeAlias(t.Aliases(i)) - If aliasKey.Length = 0 Then Continue For - - builder(aliasKey) = kind - Next - Next - - Return builder.ToImmutable() + entries = types. + Where(Function(kv) kv.Value IsNot Nothing). + Where(Function(kv) Not kv.Value.Aliases.IsDefault AndAlso kv.Value.Aliases.Length > 0). + SelectMany(Function(kv) kv.Value.Aliases. + Select(Function(aliasValue) Tuple.Create(kv.Key, NormalizeAlias(aliasValue)))). + Where(Function(item) item.Item2.Length > 0). + ToList() + + Return entries. + Aggregate(ImmutableDictionary.CreateBuilder(Of String, FileKind)(StringComparer.OrdinalIgnoreCase), + Function(builder, entry) + builder(entry.Item2) = entry.Item1 + Return builder + End Function). + ToImmutable() End Function - ''' - ''' Normalisiert einen Aliaswert deterministisch. - ''' Entfernt führende Punkte, trimmt Whitespace und wandelt in Kleinbuchstaben (Invariant) um. - ''' - ''' Rohwert, z.B. ".PDF" oder " pdf ". - ''' Normalisierter Alias ohne Punkt oder leerer String. Friend Shared Function NormalizeAlias(raw As String) As String - Dim s As String = If(raw, String.Empty).Trim() - - If s.Length = 0 Then Return String.Empty - If s(0) = "."c Then s = s.Substring(1) - + Dim s = If(raw, String.Empty).Trim() + If s.StartsWith("."c) Then s = s.Substring(1) Return s.ToLowerInvariant() End Function @@ -428,7 +346,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger FileTypeDefinition für strukturierte Verarbeitungsschritte. ''' - Friend Structure FileTypeDefinition + Private Structure FileTypeDefinition Friend ReadOnly Kind As FileKind Friend ReadOnly CanonicalExtension As String Friend ReadOnly Aliases As String() @@ -446,7 +364,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger MagicRule für strukturierte Verarbeitungsschritte. ''' - Friend Structure MagicRule + Private Structure MagicRule Friend ReadOnly Kind As FileKind Friend ReadOnly Patterns As ImmutableArray(Of MagicPattern) @@ -459,7 +377,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger MagicPattern für strukturierte Verarbeitungsschritte. ''' - Friend Structure MagicPattern + Private Structure MagicPattern Friend ReadOnly Segments As ImmutableArray(Of MagicSegment) Friend Sub New(segments As ImmutableArray(Of MagicSegment)) @@ -470,7 +388,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Interner, unveränderlicher Datenträger MagicSegment für strukturierte Verarbeitungsschritte. ''' - Friend Structure MagicSegment + Private Structure MagicSegment Friend ReadOnly Offset As Integer Friend ReadOnly Bytes As ImmutableArray(Of Byte) diff --git a/src/FileTypeDetection/EvidenceHashing.vb b/src/FileTypeDetection/EvidenceHashing.vb index d5bb9a4a..0069c993 100644 --- a/src/FileTypeDetection/EvidenceHashing.vb +++ b/src/FileTypeDetection/EvidenceHashing.vb @@ -16,16 +16,15 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' ''' - ''' Die Fassade orchestriert Dateieinlesung, Typdetektion und Archivsicht und delegiert die eigentliche - ''' Hash-Berechnung an interne, zustandslose Utility-Komponenten. + ''' Verantwortung: Die Klasse erzeugt reproduzierbare Digest-Evidence für Dateien, Rohbytes und Archiv-Entries. ''' ''' - ''' Fail-Closed-Verhalten: Ungültige Eingaben, Größenlimit-Verstöße und I/O-Fehler liefern stets ein - ''' deterministisches HashEvidence.CreateFailure(...)-Ergebnis mit unverändertem Fehltext. + ''' Security/Compliance: Optionale HMAC-Digests verwenden den Schlüssel aus + ''' FILECLASSIFIER_HMAC_KEY_B64; fehlt der Schlüssel, wird fail-closed ohne HMAC fortgeführt. ''' ''' - ''' Side-Effects: VerifyRoundTrip materialisiert kanonische Bytes in ein temporäres Dateisystemziel - ''' und bereinigt den temporären Ordner anschließend best-effort. + ''' Nebenwirkungen: VerifyRoundTrip verwendet temporäre Dateisystempfade für die Materialisierung und + ''' bereinigt diese anschließend best-effort. ''' ''' Public NotInheritable Class EvidenceHashing @@ -40,11 +39,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für eine Datei mit Standard-Hashoptionen. ''' ''' - ''' - ''' Ablauf: - ''' 1) Delegation auf die Überladung mit expliziten Optionen, - ''' 2) Anwendung der Snapshot-Defaults aus FileTypeOptions. - ''' + ''' Delegiert auf die Überladung mit expliziten Hashoptionen. ''' ''' Pfad zur Eingabedatei. ''' Hash-Evidence; bei Fehlern ein fail-closed Nachweisobjekt mit Fehlerhinweis. @@ -52,7 +47,6 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As HashEvidence - Return HashFile(path, options:=Nothing) End Function @@ -60,17 +54,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für eine Datei. ''' ''' - ''' - ''' Ablauf: - ''' 1) Snapshot und Normalisierung der Hashoptionen, - ''' 2) Guard-Validierung (Pfad/Existenz/Bounded Read), - ''' 3) Typdetektion, - ''' 4) Archivzweig über kanonisches Manifest oder Fallback-Zweig über Rohpayload, - ''' 5) Rückgabe als deterministisches HashEvidence. - ''' - ''' - ''' Fail-Closed: Bei Guard-/I/O-Fehlern wird eine Failure-Evidence mit unverändertem Fehltext erzeugt. - ''' + ''' Archive werden über kanonisches Manifest gehasht; Nicht-Archive über direkte Payload-Digests. ''' ''' Pfad zur Eingabedatei. ''' Optionale Hashparameter; bei Nothing werden globale Defaults verwendet. @@ -79,8 +63,8 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String, options As HashOptions - ) As HashEvidence - + ) _ + As HashEvidence Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() Dim normalizedOptions As HashOptions = ResolveHashOptions(detectorOptions, options) Dim fileBytes As Byte() = Array.Empty(Of Byte)() @@ -89,16 +73,17 @@ Namespace Global.Tomtastisch.FileClassifier Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then - Return Failure(HashSourceType.FilePath, path, "Datei nicht gefunden.") + Return _ + HashEvidence.CreateFailure(HashSourceType.FilePath, path, "Datei nicht gefunden.") End If - If Not EvidenceHashingIO.TryReadFileBounded(path, detectorOptions, fileBytes, readError) Then - Return Failure(HashSourceType.FilePath, path, readError) + If Not TryReadFileBounded(path, detectorOptions, fileBytes, readError) Then + Return HashEvidence.CreateFailure(HashSourceType.FilePath, path, readError) End If detectedType = New FileTypeDetector().Detect(path) If ArchiveEntryCollector.TryCollectFromFile(path, detectorOptions, entries) Then - Return EvidenceHashingCore.BuildEvidenceFromEntries( + Return BuildEvidenceFromEntries( sourceType:=HashSourceType.FilePath, label:=IO.Path.GetFileName(path), detectedType:=detectedType, @@ -108,7 +93,7 @@ Namespace Global.Tomtastisch.FileClassifier notes:="Archive content hashed via canonical manifest.") End If - Return EvidenceHashingCore.BuildEvidenceFromRawPayload( + Return BuildEvidenceFromRawPayload( sourceType:=HashSourceType.FilePath, label:=IO.Path.GetFileName(path), detectedType:=detectedType, @@ -121,11 +106,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für Rohbytes mit Standardlabel. ''' ''' - ''' - ''' Ablauf: - ''' 1) Delegation auf die Überladung mit Label und Optionen, - ''' 2) Verwendung des stabilen Standardlabels payload.bin. - ''' + ''' Delegiert auf die Überladung mit Label und expliziten Hashoptionen. ''' ''' Zu hashende Rohbytes. ''' Hash-Evidence; bei Fehlern ein fail-closed Nachweisobjekt mit Fehlerhinweis. @@ -133,7 +114,6 @@ Namespace Global.Tomtastisch.FileClassifier ( data As Byte() ) As HashEvidence - Return HashBytes(data, DefaultPayloadLabel, options:=Nothing) End Function @@ -141,11 +121,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für Rohbytes mit benutzerdefiniertem Label. ''' ''' - ''' - ''' Ablauf: - ''' 1) Delegation auf die Überladung mit expliziten Optionen, - ''' 2) Label-Normalisierung im Zielpfad. - ''' + ''' Delegiert auf die Überladung mit expliziten Hashoptionen. ''' ''' Zu hashende Rohbytes. ''' Fachliches Label für den Nachweis. @@ -155,7 +131,6 @@ Namespace Global.Tomtastisch.FileClassifier data As Byte(), label As String ) As HashEvidence - Return HashBytes(data, label, options:=Nothing) End Function @@ -163,17 +138,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis für Rohbytes. ''' ''' - ''' - ''' Ablauf: - ''' 1) Snapshot und Normalisierung der Hashoptionen, - ''' 2) Guard-Validierung (null/MaxBytes), - ''' 3) Typdetektion, - ''' 4) Archivzweig mit kanonischem Manifest oder Rohpayload-Zweig, - ''' 5) Rückgabe als deterministisches HashEvidence. - ''' - ''' - ''' Fail-Closed: Bei Guard-Verletzung wird eine Failure-Evidence mit unverändertem Fehltext erzeugt. - ''' + ''' Die Eingabe wird gegen globale Größenlimits geprüft und anschließend als Archiv- oder Rohpayload verarbeitet. ''' ''' Zu hashende Rohbytes. ''' Fachliches Label für den Nachweis. @@ -184,24 +149,26 @@ Namespace Global.Tomtastisch.FileClassifier data As Byte(), label As String, options As HashOptions - ) As HashEvidence - + ) _ + As HashEvidence Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() Dim normalizedOptions As HashOptions = ResolveHashOptions(detectorOptions, options) Dim detectedType As FileType Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() If data Is Nothing Then - Return Failure(HashSourceType.RawBytes, label, "Payload ist null.") + Return _ + HashEvidence.CreateFailure(HashSourceType.RawBytes, label, "Payload ist null.") End If If CLng(data.Length) > detectorOptions.MaxBytes Then - Return Failure(HashSourceType.RawBytes, label, "Payload größer als MaxBytes.") + Return _ + HashEvidence.CreateFailure(HashSourceType.RawBytes, label, "Payload größer als MaxBytes.") End If detectedType = New FileTypeDetector().Detect(data) If ArchiveEntryCollector.TryCollectFromBytes(data, detectorOptions, entries) Then - Return EvidenceHashingCore.BuildEvidenceFromEntries( + Return BuildEvidenceFromEntries( sourceType:=HashSourceType.RawBytes, label:=NormalizeLabel(label), detectedType:=detectedType, @@ -211,7 +178,7 @@ Namespace Global.Tomtastisch.FileClassifier notes:="Archive bytes hashed via canonical manifest.") End If - Return EvidenceHashingCore.BuildEvidenceFromRawPayload( + Return BuildEvidenceFromRawPayload( sourceType:=HashSourceType.RawBytes, label:=NormalizeLabel(label), detectedType:=detectedType, @@ -224,11 +191,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis aus extrahierten Archiveinträgen mit Standardlabel. ''' ''' - ''' - ''' Ablauf: - ''' 1) Delegation auf die Überladung mit Label und Optionen, - ''' 2) Verwendung des stabilen Labels archive-entries. - ''' + ''' Delegiert auf die Überladung mit Label und expliziten Hashoptionen. ''' ''' Read-only Liste normalisierbarer Archiveinträge. ''' Hash-Evidence; bei Fehlern ein fail-closed Nachweisobjekt mit Fehlerhinweis. @@ -236,7 +199,6 @@ Namespace Global.Tomtastisch.FileClassifier ( entries As IReadOnlyList(Of ZipExtractedEntry) ) As HashEvidence - Return HashEntries(entries, "archive-entries", options:=Nothing) End Function @@ -244,11 +206,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis aus extrahierten Archiveinträgen mit benutzerdefiniertem Label. ''' ''' - ''' - ''' Ablauf: - ''' 1) Delegation auf die Überladung mit expliziten Optionen, - ''' 2) Label-Normalisierung im Zielpfad. - ''' + ''' Delegiert auf die Überladung mit expliziten Hashoptionen. ''' ''' Read-only Liste normalisierbarer Archiveinträge. ''' Fachliches Label für den Nachweis. @@ -257,8 +215,8 @@ Namespace Global.Tomtastisch.FileClassifier ( entries As IReadOnlyList(Of ZipExtractedEntry), label As String - ) As HashEvidence - + ) _ + As HashEvidence Return HashEntries(entries, label, options:=Nothing) End Function @@ -266,16 +224,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Erstellt einen deterministischen Hash-Nachweis aus extrahierten Archiveinträgen. ''' ''' - ''' - ''' Ablauf: - ''' 1) Snapshot und Normalisierung der Hashoptionen, - ''' 2) Deterministische Entry-Normalisierung (Pfad, Deduplizierung, Sortierung), - ''' 3) Manifestbildung und Digest-Berechnung, - ''' 4) Rückgabe als HashEvidence. - ''' - ''' - ''' Fail-Closed: Null-Entries, ungültige Pfade oder Duplikate nach Normalisierung führen zu Failure-Evidence. - ''' + ''' Entry-Pfade und -Inhalte werden vor der Manifestbildung normalisiert, dedupliziert und deterministisch sortiert. ''' ''' Read-only Liste normalisierbarer Archiveinträge. ''' Fachliches Label für den Nachweis. @@ -287,11 +236,9 @@ Namespace Global.Tomtastisch.FileClassifier label As String, options As HashOptions ) As HashEvidence - - Dim projectOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() - Dim normalizedOptions As HashOptions = ResolveHashOptions(projectOptions, options) - - Return EvidenceHashingCore.BuildEvidenceFromEntries( + Dim projectOptions = FileTypeOptions.GetSnapshot() + Dim normalizedOptions = ResolveHashOptions(projectOptions, options) + Return BuildEvidenceFromEntries( sourceType:=HashSourceType.ArchiveEntries, label:=NormalizeLabel(label), detectedType:=FileTypeRegistry.Resolve(FileKind.Zip), @@ -305,11 +252,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Führt den deterministischen h1-h4-RoundTrip mit Standard-Hashoptionen aus. ''' ''' - ''' - ''' Ablauf: - ''' 1) Delegation auf die Überladung mit expliziten Optionen, - ''' 2) Anwendung der Snapshot-Defaults aus FileTypeOptions. - ''' + ''' Delegiert auf die Überladung mit expliziten Hashoptionen. ''' ''' Pfad zur Eingabedatei. ''' RoundTrip-Bericht mit Konsistenzkennzahlen und Notes. @@ -317,7 +260,6 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String ) As HashRoundTripReport - Return VerifyRoundTrip(path, options:=Nothing) End Function @@ -333,11 +275,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' 4) h4: Hash nach Materialisierung der kanonischen Bytes. ''' ''' - ''' Side-Effects: Die Materialisierung erzeugt temporär ein Dateiziel im System-Temp-Pfad und entfernt - ''' den Temp-Ordner anschließend best-effort mit Catch-Filter-Handling. - ''' - ''' - ''' Fail-Closed: Fehlerpfade liefern einen vollständigen Bericht mit Failure-Evidences. + ''' Fehler werden fail-closed als Bericht mit Fehler-Evidence zurückgegeben. ''' ''' ''' Pfad zur Eingabedatei. @@ -347,60 +285,501 @@ Namespace Global.Tomtastisch.FileClassifier ( path As String, options As HashOptions - ) As HashRoundTripReport + ) _ + As HashRoundTripReport + Dim detectorOptions As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() + Dim normalizedOptions As HashOptions = ResolveHashOptions(detectorOptions, options) + Dim failed As HashEvidence + Dim h1 As HashEvidence + Dim originalBytes As Byte() = Array.Empty(Of Byte)() + Dim readError As String = String.Empty + Dim archiveEntries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() + Dim isArchiveInput As Boolean + Dim h2 As HashEvidence + Dim canonicalBytes As Byte() + Dim normalizedEntries As List(Of NormalizedEntry) + Dim normalizeError As String + Dim h3 As HashEvidence + Dim h4 As HashEvidence = HashEvidence.CreateFailure( + HashSourceType.MaterializedFile, + "roundtrip-h4-file", + "Materialization failed." + ) + Dim roundTripTempRoot As String = IO.Path.Combine( + IO.Path.GetTempPath(), + "ftd-roundtrip-" & + Guid.NewGuid().ToString("N", Globalization.CultureInfo.InvariantCulture) + ) + Dim targetFile As String + Dim notes As String - Return EvidenceHashingRoundTrip.VerifyRoundTrip(path, options) - End Function + If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then + failed = HashEvidence.CreateFailure(HashSourceType.FilePath, path, "Datei nicht gefunden.") + Return _ + New HashRoundTripReport(path, isArchiveInput:=False, h1:=failed, h2:=failed, h3:=failed, h4:=failed, + notes:="Input file missing.") + End If - Friend Shared Function ResolveHashOptionsCore _ - ( - projectOptions As FileTypeProjectOptions, - options As HashOptions - ) As HashOptions + h1 = HashFile(path, normalizedOptions) + If Not h1.Digests.HasLogicalHash Then + failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, "h1 konnte nicht berechnet werden.") + Return _ + New HashRoundTripReport(path, isArchiveInput:=False, h1:=h1, h2:=failed, h3:=failed, h4:=failed, + notes:="h1 missing logical digest.") + End If - Return ResolveHashOptions(projectOptions, options) - End Function + If Not TryReadFileBounded(path, detectorOptions, originalBytes, readError) Then + failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, readError) + Return _ + New HashRoundTripReport(path, isArchiveInput:=False, h1:=h1, h2:=failed, h3:=failed, h4:=failed, + notes:=readError) + End If - Friend Shared Function LogicalManifestVersionCore() As String - Return LogicalManifestVersion - End Function + isArchiveInput = ArchiveEntryCollector.TryCollectFromFile(path, detectorOptions, archiveEntries) + + If isArchiveInput Then + h2 = HashEntries(archiveEntries, "roundtrip-h2-entries", normalizedOptions) + normalizedEntries = Nothing + normalizeError = String.Empty + If TryNormalizeEntries(archiveEntries, normalizedEntries, normalizeError) Then + canonicalBytes = BuildLogicalManifestBytes(normalizedEntries) + Else + canonicalBytes = Array.Empty(Of Byte)() + End If + Else + h2 = HashBytes(originalBytes, "roundtrip-h2-bytes", normalizedOptions) + canonicalBytes = CopyBytes(originalBytes) + End If - Friend Shared Function DefaultPayloadLabelCore() As String - Return DefaultPayloadLabel + h3 = BuildEvidenceFromRawPayload( + sourceType:=HashSourceType.RawBytes, + label:="roundtrip-h3-logical-bytes", + detectedType:=FileTypeRegistry.Resolve(FileKind.Unknown), + payload:=canonicalBytes, + hashOptions:=normalizedOptions, + notes:="Canonical logical bytes hashed directly.") + + Try + IO.Directory.CreateDirectory(roundTripTempRoot) + targetFile = IO.Path.Combine( + roundTripTempRoot, + NormalizeLabel(normalizedOptions.MaterializedFileName) + ) + + If FileMaterializer.Persist(canonicalBytes, targetFile, overwrite:=False, secureExtract:=False) Then + h4 = HashFile(targetFile, normalizedOptions) + End If + + Finally + Try + If IO.Directory.Exists(roundTripTempRoot) Then + IO.Directory.Delete(roundTripTempRoot, recursive:=True) + End If + + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is IO.IOException OrElse + TypeOf ex Is IO.PathTooLongException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + LogGuard.Debug(detectorOptions.Logger, $"[HashRoundTrip] Cleanup-Fehler: {ex.Message}") + End Try + End Try + + notes = If( + isArchiveInput, + "Archive roundtrip (h1-h4) executed.", + "Raw file roundtrip (h1-h4) executed." + ) + + Return New HashRoundTripReport(path, isArchiveInput, h1, h2, h3, h4, notes) End Function - Friend Shared Function HmacKeyEnvVarB64Core() As String - Return HmacKeyEnvVarB64 + Private Shared Function BuildEvidenceFromEntries( + sourceType As HashSourceType, + label As String, + detectedType As FileType, + compressedBytes As Byte(), + entries As IReadOnlyList(Of ZipExtractedEntry), + hashOptions As HashOptions, + notes As String + ) As HashEvidence + + Dim normalizedEntries As List(Of NormalizedEntry) = Nothing + Dim normalizeError As String = String.Empty + Dim logicalBytes As Byte() + Dim logicalSha As String + Dim fastLogical As String + Dim hmacLogical As String + Dim physicalSha As String + Dim fastPhysical As String + Dim hmacPhysical As String + Dim hasPhysical As Boolean + Dim secureNote As String + Dim hmacKey As Byte() + Dim hasHmacKey As Boolean + Dim firstEntry As ZipExtractedEntry = Nothing + Dim digestSet As HashDigestSet + Dim combinedNotes As String + Dim totalBytes As Long + Dim persistedCompressed As Byte() + Dim persistedLogical As Byte() + + If Not TryNormalizeEntries(entries, normalizedEntries, normalizeError) Then + Return HashEvidence.CreateFailure(sourceType, label, normalizeError) + End If + + logicalBytes = BuildLogicalManifestBytes(normalizedEntries) + logicalSha = ComputeSha256Hex(logicalBytes) + fastLogical = ComputeFastHash(logicalBytes, hashOptions) + hmacLogical = String.Empty + physicalSha = String.Empty + fastPhysical = String.Empty + hmacPhysical = String.Empty + hasPhysical = False + secureNote = String.Empty + hmacKey = Array.Empty(Of Byte)() + hasHmacKey = False + + If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then + hasHmacKey = TryResolveHmacKey(hmacKey, secureNote) + If hasHmacKey Then + hmacLogical = ComputeHmacSha256Hex(hmacKey, logicalBytes) + End If + End If + + If compressedBytes IsNot Nothing AndAlso compressedBytes.Length > 0 Then + physicalSha = ComputeSha256Hex(compressedBytes) + fastPhysical = ComputeFastHash(compressedBytes, hashOptions) + hasPhysical = True + If hasHmacKey Then + hmacPhysical = ComputeHmacSha256Hex(hmacKey, compressedBytes) + End If + End If + + If normalizedEntries.Count > 0 Then + firstEntry = New ZipExtractedEntry(normalizedEntries(0).RelativePath, normalizedEntries(0).Content) + End If + + digestSet = New HashDigestSet( + physicalSha256:=physicalSha, + logicalSha256:=logicalSha, + fastPhysicalXxHash3:=fastPhysical, + fastLogicalXxHash3:=fastLogical, + hmacPhysicalSha256:=hmacPhysical, + hmacLogicalSha256:=hmacLogical, + hasPhysicalHash:=hasPhysical, + hasLogicalHash:=True) + + combinedNotes = AppendNoteIfAny(notes, secureNote) + + totalBytes = 0 + For Each entry In normalizedEntries + totalBytes += CLng(entry.Content.LongLength) + Next + + persistedCompressed = + If(hashOptions.IncludePayloadCopies, CopyBytes(compressedBytes), Array.Empty(Of Byte)()) + persistedLogical = + If(hashOptions.IncludePayloadCopies, CopyBytes(logicalBytes), Array.Empty(Of Byte)()) + + Return New HashEvidence( + sourceType:=sourceType, + label:=NormalizeLabel(label), + detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), + entry:=firstEntry, + compressedBytes:=persistedCompressed, + uncompressedBytes:=persistedLogical, + entryCount:=normalizedEntries.Count, + totalUncompressedBytes:=totalBytes, + digests:=digestSet, + notes:=combinedNotes) End Function - Private Shared Function Failure _ - ( + Private Shared Function BuildEvidenceFromRawPayload( sourceType As HashSourceType, label As String, + detectedType As FileType, + payload As Byte(), + hashOptions As HashOptions, notes As String ) As HashEvidence - Return HashEvidence.CreateFailure(sourceType, label, notes) + Dim safePayload As Byte() = If(payload, Array.Empty(Of Byte)()) + Dim physicalSha As String = ComputeSha256Hex(safePayload) + Dim logicalSha As String = physicalSha + Dim fastPhysical As String = ComputeFastHash(safePayload, hashOptions) + Dim fastLogical As String = fastPhysical + Dim hmacPhysical As String = String.Empty + Dim hmacLogical As String = String.Empty + Dim secureNote As String = String.Empty + Dim hmacKey As Byte() = Array.Empty(Of Byte)() + Dim persistedPayload As Byte() + Dim entry As ZipExtractedEntry + Dim digestSet As HashDigestSet + Dim combinedNotes As String + + If hashOptions IsNot Nothing AndAlso hashOptions.IncludeSecureHash Then + If TryResolveHmacKey(hmacKey, secureNote) Then + hmacPhysical = ComputeHmacSha256Hex(hmacKey, safePayload) + hmacLogical = hmacPhysical + End If + End If + + persistedPayload = If(hashOptions.IncludePayloadCopies, CopyBytes(safePayload), Array.Empty(Of Byte)()) + entry = New ZipExtractedEntry(DefaultPayloadLabel, safePayload) + + digestSet = New HashDigestSet( + physicalSha256:=physicalSha, + logicalSha256:=logicalSha, + fastPhysicalXxHash3:=fastPhysical, + fastLogicalXxHash3:=fastLogical, + hmacPhysicalSha256:=hmacPhysical, + hmacLogicalSha256:=hmacLogical, + hasPhysicalHash:=True, + hasLogicalHash:=True) + + combinedNotes = AppendNoteIfAny(notes, secureNote) + + Return New HashEvidence( + sourceType:=sourceType, + label:=NormalizeLabel(label), + detectedType:=If(detectedType, FileTypeRegistry.Resolve(FileKind.Unknown)), + entry:=entry, + compressedBytes:=persistedPayload, + uncompressedBytes:=persistedPayload, + entryCount:=1, + totalUncompressedBytes:=safePayload.LongLength, + digests:=digestSet, + notes:=combinedNotes) End Function - Private Shared Function NormalizeLabel _ - ( - label As String - ) As String + Private Shared Function TryNormalizeEntries( + entries As IReadOnlyList(Of ZipExtractedEntry), + ByRef normalizedEntries As List(Of NormalizedEntry), + ByRef errorMessage As String + ) As Boolean + + Dim seen As HashSet(Of String) = New HashSet(Of String)(StringComparer.Ordinal) + Dim normalizedPath As String + Dim payload As Byte() + + normalizedEntries = New List(Of NormalizedEntry)() + errorMessage = String.Empty + + If entries Is Nothing Then + errorMessage = "Entries sind null." + Return False + End If - Return EvidenceHashingCore.NormalizeLabel(label) + For Each entry In entries + If entry Is Nothing Then + errorMessage = "Entry ist null." + Return False + End If + + normalizedPath = Nothing + If Not TryNormalizeEntryPath(entry.RelativePath, normalizedPath) Then + errorMessage = $"Ungültiger Entry-Pfad: '{entry.RelativePath}'." + Return False + End If + + If Not seen.Add(normalizedPath) Then + errorMessage = $"Doppelter Entry-Pfad nach Normalisierung: '{normalizedPath}'." + Return False + End If + + payload = If(entry.Content.IsDefaultOrEmpty, Array.Empty(Of Byte)(), entry.Content.ToArray()) + normalizedEntries.Add(New NormalizedEntry(normalizedPath, payload)) + Next + + normalizedEntries.Sort(Function(a, b) StringComparer.Ordinal.Compare(a.RelativePath, b.RelativePath)) + Return True End Function - Private Shared Function ResolveHashOptions _ - ( - projectOptions As FileTypeProjectOptions, - options As HashOptions - ) As HashOptions + Private Shared Function TryNormalizeEntryPath(rawPath As String, ByRef normalizedPath As String) As Boolean + Dim isDirectory = False + Return _ + ArchiveEntryPathPolicy.TryNormalizeRelativePath(rawPath, allowDirectoryMarker:=False, normalizedPath, + isDirectory) + End Function - If options IsNot Nothing Then Return HashOptions.Normalize(options) - If projectOptions IsNot Nothing Then Return HashOptions.Normalize(projectOptions.DeterministicHash) + Private Shared Function BuildLogicalManifestBytes(entries As IReadOnlyList(Of NormalizedEntry)) As Byte() + Dim versionBytes As Byte() + Dim pathBytes As Byte() + Dim contentHash As Byte() + + Using ms As New IO.MemoryStream() + Using writer As New IO.BinaryWriter(ms, Text.Encoding.UTF8, leaveOpen:=True) + versionBytes = Text.Encoding.UTF8.GetBytes(LogicalManifestVersion) + writer.Write(versionBytes.Length) + writer.Write(versionBytes) + writer.Write(entries.Count) + + For Each entry In entries + pathBytes = Text.Encoding.UTF8.GetBytes(entry.RelativePath) + contentHash = HashPrimitives.Current.Sha256.ComputeHash(entry.Content) + writer.Write(pathBytes.Length) + writer.Write(pathBytes) + writer.Write(CLng(entry.Content.LongLength)) + writer.Write(contentHash.Length) + writer.Write(contentHash) + Next + End Using + Return ms.ToArray() + End Using + End Function + + Private Shared Function ComputeSha256Hex(payload As Byte()) As String + Dim data = If(payload, Array.Empty(Of Byte)()) + Return HashPrimitives.Current.Sha256.ComputeHashHex(data) + End Function + + Private Shared Function TryResolveHmacKey(ByRef key As Byte(), ByRef note As String) As Boolean + Dim b64 As String + key = Array.Empty(Of Byte)() + note = String.Empty + + b64 = Environment.GetEnvironmentVariable(HmacKeyEnvVarB64) + If String.IsNullOrWhiteSpace(b64) Then + note = $"Secure hashing requested but env var '{HmacKeyEnvVarB64}' is missing; HMAC digests omitted." + Return False + End If + + Try + key = Convert.FromBase64String(b64.Trim()) + If key Is Nothing OrElse key.Length = 0 Then + key = Array.Empty(Of Byte)() + note = $"Secure hashing requested but env var '{HmacKeyEnvVarB64}' is empty; HMAC digests omitted." + Return False + End If + Return True + Catch ex As Exception When _ + TypeOf ex Is FormatException OrElse + TypeOf ex Is ArgumentException + key = Array.Empty(Of Byte)() + note = $"Secure hashing requested but env var '{HmacKeyEnvVarB64}' is invalid Base64; HMAC digests omitted." + Return False + End Try + End Function + + Private Shared Function ComputeHmacSha256Hex(key As Byte(), payload As Byte()) As String + Dim safeKey = If(key, Array.Empty(Of Byte)()) + Dim data = If(payload, Array.Empty(Of Byte)()) + Using hmac As New Security.Cryptography.HMACSHA256(safeKey) + Return HashPrimitives.Current.HexCodec.EncodeLowerHex(hmac.ComputeHash(data)) + End Using + End Function + + Private Shared Function ComputeFastHash(payload As Byte(), options As HashOptions) As String + Dim data As Byte() + + If options Is Nothing OrElse Not options.IncludeFastHash Then Return String.Empty + data = If(payload, Array.Empty(Of Byte)()) + Return HashPrimitives.Current.FastHash64.ComputeHashHex(data) + End Function + + Private Shared Function AppendNoteIfAny(baseNotes As String, toAppend As String) As String + Dim left = If(baseNotes, String.Empty).Trim() + Dim right = If(toAppend, String.Empty).Trim() + If right.Length = 0 Then Return left + If left.Length = 0 Then Return right + Return left & " " & right + End Function + + Private Shared Function NormalizeLabel(label As String) As String + Dim normalized = If(label, String.Empty).Trim() + If normalized.Length = 0 Then Return DefaultPayloadLabel + Return normalized + End Function + + Private Shared Function CopyBytes(data As Byte()) As Byte() + Dim copy As Byte() + + If data Is Nothing OrElse data.Length = 0 Then Return Array.Empty(Of Byte)() + copy = New Byte(data.Length - 1) {} + Buffer.BlockCopy(data, 0, copy, 0, data.Length) + Return copy + End Function + + Private Shared Function ResolveHashOptions( + projectOptions As FileTypeProjectOptions, + options As HashOptions + ) As HashOptions + + If options IsNot Nothing Then Return HashOptions.Normalize(options) + If projectOptions IsNot Nothing Then _ + Return HashOptions.Normalize(projectOptions.DeterministicHash) Return HashOptions.Normalize(Nothing) End Function + + Private Shared Function TryReadFileBounded(path As String, detectorOptions As FileTypeProjectOptions, + ByRef bytes As Byte(), ByRef errorMessage As String) As Boolean + Dim fi As IO.FileInfo + + bytes = Array.Empty(Of Byte)() + errorMessage = String.Empty + If String.IsNullOrWhiteSpace(path) Then + errorMessage = "Pfad ist leer." + Return False + End If + + If detectorOptions Is Nothing Then + errorMessage = "Optionen fehlen." + Return False + End If + + Try + fi = New IO.FileInfo(path) + If Not fi.Exists Then + errorMessage = "Datei existiert nicht." + Return False + End If + + If fi.Length > detectorOptions.MaxBytes Then + errorMessage = "Datei größer als MaxBytes." + Return False + End If + + Using _ + fs As _ + New IO.FileStream(path, IO.FileMode.Open, IO.FileAccess.Read, IO.FileShare.Read, + InternalIoDefaults.FileStreamBufferSize, IO.FileOptions.SequentialScan) + Using ms As New IO.MemoryStream(CInt(Math.Min(Math.Max(fi.Length, 0), Integer.MaxValue))) + StreamBounds.CopyBounded(fs, ms, detectorOptions.MaxBytes) + bytes = ms.ToArray() + End Using + End Using + Return True + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is IO.IOException OrElse + TypeOf ex Is IO.InvalidDataException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + Return SetReadFileError(ex, errorMessage) + End Try + End Function + + Private Shared Function SetReadFileError(ex As Exception, ByRef errorMessage As String) As Boolean + errorMessage = $"Datei konnte nicht gelesen werden: {ex.Message}" + Return False + End Function + + ''' + ''' Interne Hilfsklasse NormalizedEntry zur kapselnden Umsetzung von Guard-, I/O- und Policy-Logik. + ''' + Private NotInheritable Class NormalizedEntry + Friend ReadOnly Property RelativePath As String + Friend ReadOnly Property Content As Byte() + + Friend Sub New(relativePath As String, content As Byte()) + Me.RelativePath = If(relativePath, String.Empty) + Me.Content = If(content, Array.Empty(Of Byte)()) + End Sub + End Class End Class End Namespace diff --git a/src/FileTypeDetection/FileMaterializer.vb b/src/FileTypeDetection/FileMaterializer.vb index 402397e1..ae9ebdf6 100644 --- a/src/FileTypeDetection/FileMaterializer.vb +++ b/src/FileTypeDetection/FileMaterializer.vb @@ -105,8 +105,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Zu materialisierende Nutzdaten. ''' Datei- oder Verzeichnisziel abhängig vom Verarbeitungspfad. ''' True, um ein vorhandenes Ziel gemäß Zielpfad-Policy zu ersetzen. - ''' True, um Archivpayloads sicher validieren und - ''' extrahieren zu können; sonst Rohpersistenz. + ''' True, um Archivpayloads sicher zu validieren und zu extrahieren; sonst Rohpersistenz. ''' True bei erfolgreicher Materialisierung; andernfalls False. Public Shared Function Persist _ ( @@ -117,7 +116,7 @@ Namespace Global.Tomtastisch.FileClassifier ) As Boolean Dim opt As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() - Dim destinationFull As String = String.Empty + Dim destinationFull As String Dim descriptor As ArchiveDescriptor = Nothing ' Guard-Clauses: Null-, Größen- und Zielpfadprüfung. @@ -131,15 +130,20 @@ Namespace Global.Tomtastisch.FileClassifier If String.IsNullOrWhiteSpace(destinationPath) Then Return False ' Pfadnormalisierung: Absoluten Zielpfad auflösen. - If Not PathResolutionGuard.TryGetFullPath( - destinationPath, - opt, - "[Materialize] Ungültiger Zielpfad", - warnLevel:=True, - destinationFull - ) Then + Try + destinationFull = Path.GetFullPath(destinationPath) + + Catch ex As Exception When _ + TypeOf ex Is ArgumentException OrElse + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is PathTooLongException OrElse + TypeOf ex Is IOException + + LogGuard.Warn(opt.Logger, $"[Materialize] Ungültiger Zielpfad: {ex.Message}") Return False - End If + End Try ' Secure-Extract-Branch: describe -> safety gate -> extract. If secureExtract Then diff --git a/src/FileTypeDetection/FileTypeDetectionLib.vbproj b/src/FileTypeDetection/FileTypeDetectionLib.vbproj index 9175f2b7..8e009026 100644 --- a/src/FileTypeDetection/FileTypeDetectionLib.vbproj +++ b/src/FileTypeDetection/FileTypeDetectionLib.vbproj @@ -7,8 +7,8 @@ true false Tomtastisch.FileClassifier - 6.0.1 - 6.0.1 + 5.2.1 + 5.2.1 tomtastisch Deterministic file type and MIME detection with fail-closed archive safety checks, secure extraction primitives, and reproducible hashing evidence for .NET. filetype;mime;detection;magic-bytes;sniffing;archive;zip;tar;7z;rar;zipslip;security;hashing;sha256;deterministic;dotnet;netstandard2.0;net8;net10 @@ -49,10 +49,6 @@ - - - - diff --git a/src/FileTypeDetection/FileTypeDetector.vb b/src/FileTypeDetection/FileTypeDetector.vb index 5a6deb04..e08ad769 100644 --- a/src/FileTypeDetection/FileTypeDetector.vb +++ b/src/FileTypeDetection/FileTypeDetector.vb @@ -562,7 +562,7 @@ Namespace Global.Tomtastisch.FileClassifier Dim trace As DetectionTrace = DetectionTrace.Empty - If Not ByteArrayGuard.HasContent(data) Then Return UnknownType() + If data Is Nothing OrElse data.Length = 0 Then Return UnknownType() If CLng(data.Length) > opt.MaxBytes Then LogGuard.Warn(opt.Logger, $"[Detect] Daten zu groß ({data.Length} > {opt.MaxBytes}).") @@ -796,7 +796,9 @@ Namespace Global.Tomtastisch.FileClassifier If refined.Kind <> FileKind.Unknown Then WarnIfNoDirectContentDetection(refined.Kind, opt) - trace.UsedStructuredRefinement = FileTypeRegistry.HasStructuredContainerDetection(refined.Kind) + trace.UsedStructuredRefinement = + (refined.Kind = FileKind.Docx OrElse refined.Kind = FileKind.Xlsx OrElse + refined.Kind = FileKind.Pptx) trace.ReasonCode = If(trace.UsedStructuredRefinement, ReasonArchiveStructuredRefined, ReasonArchiveRefined) Return refined diff --git a/src/FileTypeDetection/FileTypeOptions.vb b/src/FileTypeDetection/FileTypeOptions.vb index a4852491..a0e97180 100644 --- a/src/FileTypeDetection/FileTypeOptions.vb +++ b/src/FileTypeDetection/FileTypeOptions.vb @@ -204,11 +204,7 @@ Namespace Global.Tomtastisch.FileClassifier Return Text.Json.JsonSerializer.Serialize(dto) End Function - Friend Shared Function LoadOptionsFromPath _ - ( - path As String - ) As Boolean - + Friend Shared Function LoadOptionsFromPath(path As String) As Boolean If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then Return False If Not path.EndsWith(".json", StringComparison.OrdinalIgnoreCase) Then Return False @@ -238,91 +234,60 @@ Namespace Global.Tomtastisch.FileClassifier End SyncLock End Sub - Private Shared Function SafeInt _ - ( - el As Text.Json.JsonElement, - fallback As Integer - ) As Integer - + Private Shared Function SafeInt(el As Text.Json.JsonElement, fallback As Integer) As Integer Dim v As Integer If el.ValueKind = Text.Json.JsonValueKind.Number AndAlso el.TryGetInt32(v) Then Return v Return fallback End Function - Private Shared Function SafeLong _ - ( - el As Text.Json.JsonElement, - fallback As Long - ) As Long - + Private Shared Function SafeLong(el As Text.Json.JsonElement, fallback As Long) As Long Dim v As Long If el.ValueKind = Text.Json.JsonValueKind.Number AndAlso el.TryGetInt64(v) Then Return v Return fallback End Function - Private Shared Function ParsePositiveInt _ - ( - el As Text.Json.JsonElement, - fallback As Integer, - name As String, - logger As Microsoft.Extensions.Logging.ILogger - ) As Integer - + Private Shared Function ParsePositiveInt(el As Text.Json.JsonElement, fallback As Integer, + name As String, + logger As Microsoft.Extensions.Logging.ILogger) As Integer Dim v = SafeInt(el, fallback) If v > 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParseNonNegativeInt _ - ( - el As Text.Json.JsonElement, - fallback As Integer, - name As String, - logger As Microsoft.Extensions.Logging.ILogger - ) As Integer - + Private Shared Function ParseNonNegativeInt(el As Text.Json.JsonElement, fallback As Integer, + name As String, + logger As Microsoft.Extensions.Logging.ILogger) As Integer Dim v = SafeInt(el, fallback) If v >= 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParsePositiveLong _ - ( - el As Text.Json.JsonElement, - fallback As Long, - name As String, - logger As Microsoft.Extensions.Logging.ILogger - ) As Long - + Private Shared Function ParsePositiveLong(el As Text.Json.JsonElement, fallback As Long, + name As String, + logger As Microsoft.Extensions.Logging.ILogger) _ + As Long Dim v = SafeLong(el, fallback) If v > 0 Then Return v LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParseBoolean _ - ( - el As Text.Json.JsonElement, - fallback As Boolean, - name As String, - logger As Microsoft.Extensions.Logging.ILogger - ) As Boolean - + Private Shared Function ParseBoolean(el As Text.Json.JsonElement, fallback As Boolean, + name As String, + logger As Microsoft.Extensions.Logging.ILogger) _ + As Boolean If el.ValueKind = Text.Json.JsonValueKind.True Then Return True If el.ValueKind = Text.Json.JsonValueKind.False Then Return False LogGuard.Warn(logger, $"[Config] Ungültiger Wert für '{name}', fallback={fallback}.") Return fallback End Function - Private Shared Function ParseString _ - ( - el As Text.Json.JsonElement, - fallback As String, - name As String, - logger As Microsoft.Extensions.Logging.ILogger - ) As String + Private Shared Function ParseString(el As Text.Json.JsonElement, fallback As String, + name As String, + logger As Microsoft.Extensions.Logging.ILogger) _ + As String Dim value As String @@ -334,8 +299,7 @@ Namespace Global.Tomtastisch.FileClassifier Return fallback End Function - Private Shared Sub TryParseHashOptions _ - ( + Private Shared Sub TryParseHashOptions( el As Text.Json.JsonElement, ByRef includePayloadCopies As Boolean, ByRef includeFastHash As Boolean, @@ -386,10 +350,9 @@ Namespace Global.Tomtastisch.FileClassifier Next End Sub - Private Shared Function Snapshot _ - ( + Private Shared Function Snapshot( opt As FileTypeProjectOptions - ) As FileTypeProjectOptions + ) As FileTypeProjectOptions Dim snap As FileTypeProjectOptions diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index c45d58ed..b9faba97 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -47,13 +47,8 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property ContainerChain As IReadOnlyList(Of ArchiveContainerType) - Private Sub New _ - ( - logicalKind As FileKind, - containerType As ArchiveContainerType, - containerChain As ArchiveContainerType() - ) - + Private Sub New(logicalKind As FileKind, containerType As ArchiveContainerType, + containerChain As ArchiveContainerType()) Me.LogicalKind = logicalKind Me.ContainerType = containerType Dim chain = If(containerChain, Array.Empty(Of ArchiveContainerType)()) @@ -61,11 +56,9 @@ Namespace Global.Tomtastisch.FileClassifier End Sub Friend Shared Function UnknownDescriptor() As ArchiveDescriptor - Return New ArchiveDescriptor( - FileKind.Unknown, - ArchiveContainerType.Unknown, - Array.Empty(Of ArchiveContainerType)() - ) + Return _ + New ArchiveDescriptor(FileKind.Unknown, ArchiveContainerType.Unknown, + Array.Empty(Of ArchiveContainerType)()) End Function Friend Shared Function ForContainerType(containerType As ArchiveContainerType) As ArchiveDescriptor @@ -96,14 +89,13 @@ Namespace Global.Tomtastisch.FileClassifier Friend Interface IArchiveBackend ReadOnly Property ContainerType As ArchiveContainerType - Function Process _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - containerTypeValue As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean + Function Process( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + containerTypeValue As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean End Interface ''' @@ -116,11 +108,7 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function Resolve _ - ( - containerType As ArchiveContainerType - ) As IArchiveBackend - + Friend Shared Function Resolve(containerType As ArchiveContainerType) As IArchiveBackend Select Case containerType Case ArchiveContainerType.Zip Return ManagedArchiveBackend @@ -137,11 +125,7 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function OpenArchive _ - ( - stream As Stream - ) As SharpCompress.Archives.IArchive - + Friend Shared Function OpenArchive(stream As Stream) As SharpCompress.Archives.IArchive Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenArchiveFactoryCompat(stream, options) @@ -158,12 +142,9 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function OpenArchiveForContainer _ - ( - stream As Stream, - containerTypeValue As ArchiveContainerType - ) As SharpCompress.Archives.IArchive - + Friend Shared Function OpenArchiveForContainer(stream As Stream, + containerTypeValue As ArchiveContainerType) _ + As SharpCompress.Archives.IArchive If containerTypeValue = ArchiveContainerType.GZip Then Dim gzipArchive = OpenGZipArchive(stream) If gzipArchive IsNot Nothing Then Return gzipArchive @@ -171,11 +152,7 @@ Namespace Global.Tomtastisch.FileClassifier Return OpenArchive(stream) End Function - Friend Shared Function HasGZipMagic _ - ( - stream As Stream - ) As Boolean - + Friend Shared Function HasGZipMagic(stream As Stream) As Boolean If stream Is Nothing OrElse Not stream.CanRead Then Return False If Not stream.CanSeek Then Return False If stream.Length < 2 Then Return False @@ -185,11 +162,7 @@ Namespace Global.Tomtastisch.FileClassifier Return first = &H1F AndAlso second = &H8B End Function - Private Shared Function OpenGZipArchive _ - ( - stream As Stream - ) As SharpCompress.Archives.IArchive - + Private Shared Function OpenGZipArchive(stream As Stream) As SharpCompress.Archives.IArchive Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenGZipArchiveCompat(stream, options) @@ -206,11 +179,7 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function IsExpectedInvocationException _ - ( - ex As TargetInvocationException - ) As Boolean - + Private Shared Function IsExpectedInvocationException(ex As TargetInvocationException) As Boolean Dim inner = ex?.InnerException If inner Is Nothing Then Return False @@ -221,49 +190,38 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf inner Is IOException End Function - Private Shared Function OpenArchiveFactoryCompat _ - ( - stream As Stream, - options As SharpCompress.Readers.ReaderOptions - ) As SharpCompress.Archives.IArchive - + Private Shared Function OpenArchiveFactoryCompat( + stream As Stream, + options As SharpCompress.Readers.ReaderOptions + ) As SharpCompress.Archives.IArchive Dim method = GetOpenCompatMethod(GetType(SharpCompress.Archives.ArchiveFactory)) Dim opened = method.Invoke(Nothing, New Object() {stream, options}) - Return CType(opened, SharpCompress.Archives.IArchive) End Function - Private Shared Function OpenGZipArchiveCompat _ - ( - stream As Stream, - options As SharpCompress.Readers.ReaderOptions - ) As SharpCompress.Archives.IArchive - + Private Shared Function OpenGZipArchiveCompat( + stream As Stream, + options As SharpCompress.Readers.ReaderOptions + ) As SharpCompress.Archives.IArchive Dim method = GetOpenCompatMethod(GetType(SharpCompress.Archives.GZip.GZipArchive)) Dim opened = method.Invoke(Nothing, New Object() {stream, options}) - Return CType(opened, SharpCompress.Archives.IArchive) End Function - Private Shared Function GetOpenCompatMethod(type As Type) As MethodInfo + Private Shared Function GetOpenCompatMethod(type As Type) As System.Reflection.MethodInfo Dim signature = New Type() {GetType(Stream), GetType(SharpCompress.Readers.ReaderOptions)} - Dim method = type.GetMethod( - "OpenArchive", - BindingFlags.Public Or BindingFlags.Static, - binder:=Nothing, - types:=signature, - modifiers:=Nothing - ) - + Dim method = type.GetMethod("OpenArchive", BindingFlags.Public Or + BindingFlags.Static, + binder:=Nothing, + types:=signature, + modifiers:=Nothing) If method IsNot Nothing Then Return method - method = type.GetMethod( - "Open", - BindingFlags.Public Or BindingFlags.Static, - binder:=Nothing, - types:=signature, - modifiers:=Nothing - ) + method = type.GetMethod("Open", BindingFlags.Public Or + BindingFlags.Static, + binder:=Nothing, + types:=signature, + modifiers:=Nothing) If method IsNot Nothing Then Return method Throw New MissingMethodException(type.FullName, "OpenArchive/Open(Stream, ReaderOptions)") @@ -277,15 +235,10 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function TryDescribeBytes _ - ( - data As Byte(), - opt As FileTypeProjectOptions, - ByRef descriptor As ArchiveDescriptor - ) As Boolean - + Friend Shared Function TryDescribeBytes(data As Byte(), opt As FileTypeProjectOptions, + ByRef descriptor As ArchiveDescriptor) As Boolean descriptor = ArchiveDescriptor.UnknownDescriptor() - If Not ByteArrayGuard.HasContent(data) Then Return False + If data Is Nothing OrElse data.Length = 0 Then Return False If opt Is Nothing Then Return False Try @@ -307,13 +260,8 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function TryDescribeStream _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - ByRef descriptor As ArchiveDescriptor - ) As Boolean - + Friend Shared Function TryDescribeStream(stream As Stream, opt As FileTypeProjectOptions, + ByRef descriptor As ArchiveDescriptor) As Boolean Dim mapped As ArchiveContainerType Dim gzipWrapped As Boolean @@ -365,11 +313,7 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function MapArchiveType _ - ( - type As SharpCompress.Common.ArchiveType - ) As ArchiveContainerType - + Friend Shared Function MapArchiveType(type As SharpCompress.Common.ArchiveType) As ArchiveContainerType Select Case type Case SharpCompress.Common.ArchiveType.Zip Return ArchiveContainerType.Zip @@ -395,26 +339,18 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function ValidateArchiveStream _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - descriptor As ArchiveDescriptor - ) As Boolean - + Friend Shared Function ValidateArchiveStream(stream As Stream, opt As FileTypeProjectOptions, depth As Integer, + descriptor As ArchiveDescriptor) As Boolean Return ProcessArchiveStream(stream, opt, depth, descriptor, Nothing) End Function - Friend Shared Function ProcessArchiveStream _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - descriptor As ArchiveDescriptor, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean - + Friend Shared Function ProcessArchiveStream( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + descriptor As ArchiveDescriptor, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean Dim backend As IArchiveBackend If Not StreamGuard.IsReadable(stream) Then Return False @@ -437,36 +373,25 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function TryExtractArchiveStreamToMemory _ - ( - stream As Stream, - opt As FileTypeProjectOptions - ) As IReadOnlyList(Of ZipExtractedEntry) - + Friend Shared Function TryExtractArchiveStreamToMemory(stream As Stream, opt As FileTypeProjectOptions) _ + As IReadOnlyList(Of ZipExtractedEntry) Dim descriptor As ArchiveDescriptor = Nothing Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() If Not ArchiveTypeResolver.TryDescribeStream(stream, opt, descriptor) Then Return emptyResult - Return TryExtractArchiveStreamToMemory(stream, opt, descriptor) End Function - Friend Shared Function TryExtractArchiveStreamToMemory _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - descriptor As ArchiveDescriptor - ) As IReadOnlyList(Of ZipExtractedEntry) - + Friend Shared Function TryExtractArchiveStreamToMemory(stream As Stream, opt As FileTypeProjectOptions, + descriptor As ArchiveDescriptor) _ + As IReadOnlyList(Of ZipExtractedEntry) Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() Dim entries As List(Of ZipExtractedEntry) = New List(Of ZipExtractedEntry)() Dim ok As Boolean If Not StreamGuard.IsReadable(stream) Then Return emptyResult If opt Is Nothing Then Return emptyResult - - If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then + If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then _ Return emptyResult - End If Try StreamGuard.RewindToStart(stream) @@ -484,10 +409,9 @@ Namespace Global.Tomtastisch.FileClassifier End If Return entries.AsReadOnly() - Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse + TypeOf ex Is SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -500,27 +424,17 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function TryExtractArchiveStream _ - ( - stream As Stream, - destinationDirectory As String, - opt As FileTypeProjectOptions - ) As Boolean - + Friend Shared Function TryExtractArchiveStream(stream As Stream, destinationDirectory As String, + opt As FileTypeProjectOptions) As Boolean Dim descriptor As ArchiveDescriptor = Nothing If Not ArchiveTypeResolver.TryDescribeStream(stream, opt, descriptor) Then Return False Return TryExtractArchiveStream(stream, destinationDirectory, opt, descriptor) End Function - Friend Shared Function TryExtractArchiveStream _ - ( - stream As Stream, - destinationDirectory As String, - opt As FileTypeProjectOptions, - descriptor As ArchiveDescriptor - ) As Boolean - - Dim destinationFull As String = String.Empty + Friend Shared Function TryExtractArchiveStream(stream As Stream, destinationDirectory As String, + opt As FileTypeProjectOptions, descriptor As ArchiveDescriptor) _ + As Boolean + Dim destinationFull As String Dim parent As String Dim stageDir As String Dim stagePrefix As String @@ -531,15 +445,18 @@ Namespace Global.Tomtastisch.FileClassifier If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then Return False If String.IsNullOrWhiteSpace(destinationDirectory) Then Return False - If Not PathResolutionGuard.TryGetFullPath( - destinationDirectory, - opt, - "[ArchiveExtract] Ungültiger Zielpfad", - warnLevel:=False, - destinationFull - ) Then + Try + destinationFull = Path.GetFullPath(destinationDirectory) + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is PathTooLongException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + LogGuard.Debug(opt.Logger, $"[ArchiveExtract] Ungültiger Zielpfad: {ex.Message}") Return False - End If + End Try If Not DestinationPathGuard.ValidateNewExtractionTarget(destinationFull, opt) Then Return False @@ -566,7 +483,6 @@ Namespace Global.Tomtastisch.FileClassifier Directory.Move(stageDir, destinationFull) Return True - Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -578,7 +494,6 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf ex Is ObjectDisposedException LogGuard.Debug(opt.Logger, $"[ArchiveExtract] Fehler: {ex.Message}") Return False - Finally If Directory.Exists(stageDir) Then Try @@ -595,16 +510,11 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function ExtractEntryToDirectory _ - ( - entry As IArchiveEntryModel, - destinationPrefix As String, - opt As FileTypeProjectOptions - ) As Boolean - + Private Shared Function ExtractEntryToDirectory(entry As IArchiveEntryModel, destinationPrefix As String, + opt As FileTypeProjectOptions) As Boolean Dim entryName As String = Nothing Dim isDirectory As Boolean = False - Dim targetPath As String = String.Empty + Dim targetPath As String Dim targetDir As String If entry Is Nothing Then Return False @@ -612,15 +522,18 @@ Namespace Global.Tomtastisch.FileClassifier If Not TryGetSafeEntryName(entry, opt, entryName, isDirectory) Then Return False - If Not PathResolutionGuard.TryGetFullPath( - Path.Combine(destinationPrefix, entryName), - opt, - "[ArchiveExtract] Zielpfad-Fehler", - warnLevel:=False, - targetPath - ) Then + Try + targetPath = Path.GetFullPath(Path.Combine(destinationPrefix, entryName)) + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is PathTooLongException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + LogGuard.Debug(opt.Logger, $"[ArchiveExtract] Zielpfad-Fehler: {ex.Message}") Return False - End If + End Try If Not targetPath.StartsWith(destinationPrefix, StringComparison.Ordinal) Then LogGuard.Warn(opt.Logger, "[ArchiveExtract] Path traversal erkannt.") @@ -654,7 +567,6 @@ Namespace Global.Tomtastisch.FileClassifier End Using End Using Return True - Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -669,13 +581,8 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function ExtractEntryToMemory _ - ( - entry As IArchiveEntryModel, - entries As List(Of ZipExtractedEntry), - opt As FileTypeProjectOptions - ) As Boolean - + Private Shared Function ExtractEntryToMemory(entry As IArchiveEntryModel, entries As List(Of ZipExtractedEntry), + opt As FileTypeProjectOptions) As Boolean Dim entryName As String = Nothing Dim isDirectory As Boolean = False Dim payload As Byte() @@ -716,14 +623,9 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function TryGetSafeEntryName _ - ( - entry As IArchiveEntryModel, - opt As FileTypeProjectOptions, - ByRef safeEntryName As String, - ByRef isDirectory As Boolean - ) As Boolean - + Private Shared Function TryGetSafeEntryName(entry As IArchiveEntryModel, opt As FileTypeProjectOptions, + ByRef safeEntryName As String, ByRef isDirectory As Boolean) _ + As Boolean Dim entryName As String = Nothing Dim normalizedDirectoryFlag As Boolean = False @@ -732,32 +634,25 @@ Namespace Global.Tomtastisch.FileClassifier If entry Is Nothing Then Return False If opt Is Nothing Then Return False - If ArchiveLinkGuard.IsRejectedLink(opt, entry.LinkTarget, "[ArchiveExtract]", logWhenRejected:=True) Then + If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(entry.LinkTarget) Then + LogGuard.Warn(opt.Logger, "[ArchiveExtract] Link-Entry ist nicht erlaubt.") Return False End If - If Not ArchiveEntryPathPolicy.TryNormalizeRelativePath( - entry.RelativePath, - allowDirectoryMarker:=True, - entryName, - normalizedDirectoryFlag - ) Then - + If _ + Not _ + ArchiveEntryPathPolicy.TryNormalizeRelativePath(entry.RelativePath, allowDirectoryMarker:=True, + entryName, normalizedDirectoryFlag) Then Return False End If safeEntryName = entryName - isDirectory = entry.IsDirectory OrElse normalizedDirectoryFlag OrElse entryName.EndsWith("/"c) - + isDirectory = entry.IsDirectory OrElse normalizedDirectoryFlag OrElse + entryName.EndsWith("/"c) Return True End Function - Private Shared Function ValidateEntrySize _ - ( - entry As IArchiveEntryModel, - opt As FileTypeProjectOptions - ) As Boolean - + Private Shared Function ValidateEntrySize(entry As IArchiveEntryModel, opt As FileTypeProjectOptions) As Boolean Dim sizeValue As Long? If entry Is Nothing OrElse opt Is Nothing Then Return False @@ -765,7 +660,9 @@ Namespace Global.Tomtastisch.FileClassifier sizeValue = entry.UncompressedSize If sizeValue.HasValue Then - If sizeValue.Value < 0 Then Return opt.AllowUnknownArchiveEntrySize + If sizeValue.Value < 0 Then + Return opt.AllowUnknownArchiveEntrySize + End If If sizeValue.Value > opt.MaxZipEntryUncompressedBytes Then Return False Return True @@ -774,17 +671,12 @@ Namespace Global.Tomtastisch.FileClassifier Return opt.AllowUnknownArchiveEntrySize End Function - Private Shared Function EnsureTrailingSeparator _ - ( - dirPath As String - ) As String - + Private Shared Function EnsureTrailingSeparator(dirPath As String) As String If String.IsNullOrEmpty(dirPath) Then Return Path.DirectorySeparatorChar.ToString() - If dirPath.EndsWith(Path.DirectorySeparatorChar) _ - OrElse dirPath.EndsWith(Path.AltDirectorySeparatorChar) Then + If dirPath.EndsWith(Path.DirectorySeparatorChar) OrElse dirPath.EndsWith(Path.AltDirectorySeparatorChar) _ + Then Return dirPath End If - Return dirPath & Path.DirectorySeparatorChar End Function End Class @@ -796,13 +688,8 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function TryCollectFromFile _ - ( - path As String, - opt As FileTypeProjectOptions, - ByRef entries As IReadOnlyList(Of ZipExtractedEntry) - ) As Boolean - + Friend Shared Function TryCollectFromFile(path As String, opt As FileTypeProjectOptions, + ByRef entries As IReadOnlyList(Of ZipExtractedEntry)) As Boolean Dim descriptor As ArchiveDescriptor = Nothing entries = Array.Empty(Of ZipExtractedEntry)() @@ -810,24 +697,17 @@ Namespace Global.Tomtastisch.FileClassifier If opt Is Nothing Then Return False Try - Using fs As New FileStream( - path, FileMode.Open, - FileAccess.Read, - FileShare.Read, - InternalIoDefaults.FileStreamBufferSize, - FileOptions.SequentialScan - ) - + Using _ + fs As _ + New FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, + InternalIoDefaults.FileStreamBufferSize, FileOptions.SequentialScan) If Not ArchiveTypeResolver.TryDescribeStream(fs, opt, descriptor) Then Return False StreamGuard.RewindToStart(fs) - If Not ArchiveSafetyGate.IsArchiveSafeStream(fs, opt, descriptor, depth:=0) Then Return False StreamGuard.RewindToStart(fs) - entries = ArchiveExtractor.TryExtractArchiveStreamToMemory(fs, opt, descriptor) Return entries IsNot Nothing AndAlso entries.Count > 0 End Using - Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -839,32 +719,25 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf ex Is ObjectDisposedException LogGuard.Debug(opt.Logger, $"[ArchiveCollect] Datei-Fehler: {ex.Message}") entries = Array.Empty(Of ZipExtractedEntry)() - Return False End Try End Function - Friend Shared Function TryCollectFromBytes _ - ( - data As Byte(), - opt As FileTypeProjectOptions, - ByRef entries As IReadOnlyList(Of ZipExtractedEntry) - ) As Boolean - + Friend Shared Function TryCollectFromBytes(data As Byte(), opt As FileTypeProjectOptions, + ByRef entries As IReadOnlyList(Of ZipExtractedEntry)) As Boolean Dim descriptor As ArchiveDescriptor = Nothing entries = Array.Empty(Of ZipExtractedEntry)() - If Not ByteArrayGuard.HasContent(data) Then Return False + If data Is Nothing OrElse data.Length = 0 Then Return False If opt Is Nothing Then Return False Try - If Not ArchivePayloadGuard.TryDescribeSafeArchivePayload(data, opt, descriptor) Then Return False - + If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False + If Not ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) Then Return False Using ms As New MemoryStream(data, writable:=False) entries = ArchiveExtractor.TryExtractArchiveStreamToMemory(ms, opt, descriptor) Return entries IsNot Nothing AndAlso entries.Count > 0 End Using - Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -900,15 +773,13 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Verarbeitet ein Archiv über SharpCompress fail-closed und optionalen Entry-Callback. ''' - Public Function Process _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - containerTypeValue As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean Implements IArchiveBackend.Process - + Public Function Process( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + containerTypeValue As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean Implements IArchiveBackend.Process Dim mapped As ArchiveContainerType Dim entries As List(Of SharpCompress.Archives.IArchiveEntry) Dim nestedResult As Boolean = False @@ -932,31 +803,19 @@ Namespace Global.Tomtastisch.FileClassifier If containerTypeValue = ArchiveContainerType.GZip AndAlso Not gzipWrapped Then Return False Using archive = OpenArchiveForContainerCompat(stream, containerTypeValue) - If archive Is Nothing Then Return False - mapped = ArchiveTypeResolver.MapArchiveType(archive.Type) - - gzipWrappedTar = gzipWrapped AndAlso containerTypeValue = ArchiveContainerType.GZip _ - AndAlso mapped = ArchiveContainerType.Tar - + gzipWrappedTar = gzipWrapped AndAlso containerTypeValue = ArchiveContainerType.GZip AndAlso _ + mapped = ArchiveContainerType.Tar If mapped <> containerTypeValue AndAlso Not gzipWrappedTar Then Return False - entries = archive.Entries.OrderBy( - Function(e) If(e.Key, String.Empty), - StringComparer.Ordinal - ).ToList() + entries = archive.Entries. + OrderBy(Function(e) If(e.Key, String.Empty), StringComparer.Ordinal). + ToList() If Not gzipWrappedTar Then - nestedHandled = TryProcessNestedGArchive( - entries, - opt, - depth, - containerTypeValue, - extractEntry, - nestedResult - ) - + nestedHandled = TryProcessNestedGArchive(entries, opt, depth, containerTypeValue, extractEntry, + nestedResult) If nestedHandled Then Return nestedResult End If @@ -969,18 +828,17 @@ Namespace Global.Tomtastisch.FileClassifier model = New SharpCompressEntryModel(entry) - If ArchiveLinkGuard.IsRejectedLink(opt, model.LinkTarget, "[ArchiveGate]", logWhenRejected:=True) Then + If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(model.LinkTarget) Then + LogGuard.Warn(opt.Logger, "[ArchiveGate] Link-Entry ist nicht erlaubt.") Return False End If If Not model.IsDirectory Then knownSize = 0 requireKnownForTotal = (extractEntry Is Nothing) OrElse depth > 0 - If gzipWrappedTar Then requireKnownForTotal = False End If - If Not TryGetValidatedSize(model, opt, knownSize, requireKnownForTotal) Then Return False totalUncompressed += knownSize If totalUncompressed > opt.MaxZipTotalUncompressedBytes Then Return False @@ -1007,24 +865,20 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function OpenArchiveForContainerCompat _ - ( - stream As Stream, - containerTypeValue As ArchiveContainerType - ) As SharpCompress.Archives.IArchive - + Private Shared Function OpenArchiveForContainerCompat(stream As Stream, + containerTypeValue As ArchiveContainerType) _ + As SharpCompress.Archives.IArchive Return ArchiveSharpCompressCompat.OpenArchiveForContainer(stream, containerTypeValue) End Function Private Shared Function TryProcessNestedGArchive( - entries As List(Of SharpCompress.Archives.IArchiveEntry), - opt As FileTypeProjectOptions, - depth As Integer, - containerType As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean), - ByRef nestedResult As Boolean - ) As Boolean - + entries As List(Of SharpCompress.Archives.IArchiveEntry), + opt As FileTypeProjectOptions, + depth As Integer, + containerType As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean), + ByRef nestedResult As Boolean + ) As Boolean Dim onlyEntry As SharpCompress.Archives.IArchiveEntry Dim model As IArchiveEntryModel Dim payload As Byte() = Nothing @@ -1038,7 +892,7 @@ Namespace Global.Tomtastisch.FileClassifier If onlyEntry Is Nothing OrElse onlyEntry.IsDirectory Then Return False model = New SharpCompressEntryModel(onlyEntry) - If ArchiveLinkGuard.IsRejectedLink(opt, model.LinkTarget, "[ArchiveGate]", logWhenRejected:=False) Then + If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(model.LinkTarget) Then nestedResult = False Return True End If @@ -1061,42 +915,32 @@ Namespace Global.Tomtastisch.FileClassifier End If Using nestedMs As New MemoryStream(payload, writable:=False) - nestedResult = ArchiveProcessingEngine.ProcessArchiveStream( - nestedMs, - opt, - depth + 1, - nestedDescriptor, - extractEntry - ) + nestedResult = ArchiveProcessingEngine.ProcessArchiveStream(nestedMs, opt, depth + 1, nestedDescriptor, + extractEntry) End Using Return True End Function - Private Shared Function TryReadEntryPayloadBoundedWithOptions _ - ( - entry As SharpCompress.Archives.IArchiveEntry, - maxBytes As Long, - opt As FileTypeProjectOptions, - ByRef payload As Byte() - ) As Boolean - + Private Shared Function TryReadEntryPayloadBoundedWithOptions( + entry As SharpCompress.Archives.IArchiveEntry, + maxBytes As Long, + opt As FileTypeProjectOptions, + ByRef payload As Byte() + ) As Boolean payload = Array.Empty(Of Byte)() If entry Is Nothing Then Return False If maxBytes <= 0 Then Return False If opt Is Nothing Then Return False Try - Using source = entry.OpenEntryStream() If source Is Nothing OrElse Not source.CanRead Then Return False - Using ms As New MemoryStream() StreamBounds.CopyBounded(source, ms, maxBytes) payload = ms.ToArray() Return True End Using End Using - Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse @@ -1112,14 +956,8 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function TryGetValidatedSize _ - ( - entry As IArchiveEntryModel, - opt As FileTypeProjectOptions, - ByRef knownSize As Long, - requireKnownForTotal As Boolean - ) As Boolean - + Private Shared Function TryGetValidatedSize(entry As IArchiveEntryModel, opt As FileTypeProjectOptions, + ByRef knownSize As Long, requireKnownForTotal As Boolean) As Boolean Dim value As Long? knownSize = 0 @@ -1142,13 +980,8 @@ Namespace Global.Tomtastisch.FileClassifier Return TryMeasureEntrySize(entry, opt, knownSize) End Function - Private Shared Function TryMeasureEntrySize _ - ( - entry As IArchiveEntryModel, - opt As FileTypeProjectOptions, - ByRef measured As Long - ) As Boolean - + Private Shared Function TryMeasureEntrySize(entry As IArchiveEntryModel, opt As FileTypeProjectOptions, + ByRef measured As Long) As Boolean Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte Dim n As Integer @@ -1160,7 +993,6 @@ Namespace Global.Tomtastisch.FileClassifier Try Using source = entry.OpenStream() If source Is Nothing OrElse Not source.CanRead Then Return False - While True n = source.Read(buf, 0, buf.Length) If n <= 0 Then Exit While @@ -1168,9 +1000,7 @@ Namespace Global.Tomtastisch.FileClassifier If measured > opt.MaxZipEntryUncompressedBytes Then Return False End While End Using - Return True - Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse TypeOf ex Is SecurityException OrElse diff --git a/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb index aba31d1e..559db333 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb @@ -24,24 +24,17 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function ValidateArchiveStream _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer - ) As Boolean - + Friend Shared Function ValidateArchiveStream(stream As Stream, opt As FileTypeProjectOptions, depth As Integer) _ + As Boolean Return ProcessArchiveStream(stream, opt, depth, Nothing) End Function - Friend Shared Function ProcessArchiveStream _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - extractEntry As Func(Of ZipArchiveEntry, Boolean) - ) As Boolean - + Friend Shared Function ProcessArchiveStream( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + extractEntry As Func(Of ZipArchiveEntry, Boolean) + ) As Boolean Dim totalUncompressed As Long Dim ordered As IEnumerable(Of ZipArchiveEntry) Dim u As Long @@ -81,7 +74,6 @@ Namespace Global.Tomtastisch.FileClassifier Try Using es = e.Open() - Using nestedMs = RecyclableStreams.GetStream("ArchiveStreamEngine.Nested") StreamBounds.CopyBounded(es, nestedMs, opt.MaxZipNestedBytes) nestedMs.Position = 0 @@ -93,7 +85,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -114,7 +106,7 @@ Namespace Global.Tomtastisch.FileClassifier Return True Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -126,12 +118,7 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function IsNestedArchiveEntry _ - ( - entry As ZipArchiveEntry, - opt As FileTypeProjectOptions - ) As Boolean - + Private Shared Function IsNestedArchiveEntry(entry As ZipArchiveEntry, opt As FileTypeProjectOptions) As Boolean Dim header(15) As Byte Dim read As Integer Dim exact As Byte() @@ -154,7 +141,7 @@ Namespace Global.Tomtastisch.FileClassifier End Using Catch ex As Exception When _ TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is System.Security.SecurityException OrElse TypeOf ex Is IOException OrElse TypeOf ex Is InvalidDataException OrElse TypeOf ex Is NotSupportedException OrElse @@ -186,15 +173,13 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Verarbeitet ZIP-Archive fail-closed über die Managed-Archive-Engine. ''' - Public Function Process _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - containerTypeValue As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean Implements IArchiveBackend.Process - + Public Function Process( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + containerTypeValue As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean Implements IArchiveBackend.Process If containerTypeValue <> ArchiveContainerType.Zip Then Return False If extractEntry Is Nothing Then diff --git a/src/FileTypeDetection/Infrastructure/CoreInternals.vb b/src/FileTypeDetection/Infrastructure/CoreInternals.vb index f6c419de..8c722e66 100644 --- a/src/FileTypeDetection/Infrastructure/CoreInternals.vb +++ b/src/FileTypeDetection/Infrastructure/CoreInternals.vb @@ -13,9 +13,260 @@ Option Explicit On Imports System.IO Imports System.IO.Compression Imports System.Text -Imports Tomtastisch.FileClassifier.Infrastructure.Utils +Imports Microsoft.Extensions.Logging Namespace Global.Tomtastisch.FileClassifier + ''' + ''' Interne Hilfsklasse InternalIoDefaults zur kapselnden Umsetzung von Guard-, I/O- und Policy-Logik. + ''' + Friend NotInheritable Class InternalIoDefaults + Friend Const CopyBufferSize As Integer = 8192 + Friend Const FileStreamBufferSize As Integer = 81920 + Friend Const DefaultSniffBytes As Integer = 4096 + + Private Sub New() + End Sub + End Class + + ''' + ''' Zentrale IO-Helfer für harte Grenzen. + ''' SSOT-Regel: bounded copy wird nur hier gepflegt. + ''' + Friend NotInheritable Class StreamBounds + Private Sub New() + End Sub + + Friend Shared Sub CopyBounded(input As Stream, output As Stream, maxBytes As Long) + Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte + Dim total As Long = 0 + Dim n As Integer + + While True + n = input.Read(buf, 0, buf.Length) + If n <= 0 Then Exit While + + total += n + If total > maxBytes Then Throw New InvalidOperationException("bounded copy exceeded") + output.Write(buf, 0, n) + End While + End Sub + End Class + + ''' + ''' Kleine, zentrale Stream-Guards, um duplizierte Pattern-Checks in Archivroutinen zu reduzieren. + ''' Keine Semantik: reine Abfrage/Positionierung. + ''' + Friend NotInheritable Class StreamGuard + Private Sub New() + End Sub + + Friend Shared Function IsReadable(stream As Stream) As Boolean + Return stream IsNot Nothing AndAlso stream.CanRead + End Function + + Friend Shared Sub RewindToStart(stream As Stream) + If stream Is Nothing Then Return + If stream.CanSeek Then stream.Position = 0 + End Sub + End Class + + ''' + ''' Sicherheits-Gate für Archive-Container. + ''' + Friend NotInheritable Class ArchiveSafetyGate + Private Sub New() + End Sub + + Friend Shared Function IsArchiveSafeBytes(data As Byte(), opt As FileTypeProjectOptions, + descriptor As ArchiveDescriptor) As Boolean + If data Is Nothing OrElse data.Length = 0 Then Return False + If opt Is Nothing Then Return False + If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then Return False + + Try + Using ms As New MemoryStream(data, writable:=False) + Return IsArchiveSafeStream(ms, opt, descriptor, depth:=0) + End Using + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is InvalidDataException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException OrElse + TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException + LogGuard.Debug(opt.Logger, $"[ArchiveGate] Bytes-Fehler: {ex.Message}") + Return False + End Try + End Function + + Friend Shared Function IsArchiveSafeStream(stream As Stream, opt As FileTypeProjectOptions, + descriptor As ArchiveDescriptor, depth As Integer) As Boolean + If Not StreamGuard.IsReadable(stream) Then Return False + If opt Is Nothing Then Return False + Return ArchiveProcessingEngine.ValidateArchiveStream(stream, opt, depth, descriptor) + End Function + End Class + + ''' + ''' Gemeinsame Guards für signaturbasierte Archiv-Byte-Payloads. + ''' + Friend NotInheritable Class ArchiveSignaturePayloadGuard + Private Sub New() + End Sub + + Friend Shared Function IsArchiveSignatureCandidate(data As Byte()) As Boolean + If data Is Nothing OrElse data.Length = 0 Then Return False + Return FileTypeRegistry.DetectByMagic(data) = FileKind.Zip + End Function + End Class + + ''' + ''' Gemeinsame Guards für beliebige Archive-Byte-Payloads. + ''' + Friend NotInheritable Class ArchivePayloadGuard + Private Sub New() + End Sub + + Friend Shared Function IsSafeArchivePayload(data As Byte(), opt As FileTypeProjectOptions) As Boolean + Dim descriptor As ArchiveDescriptor = Nothing + + If data Is Nothing OrElse data.Length = 0 Then Return False + If opt Is Nothing Then Return False + If CLng(data.Length) > opt.MaxBytes Then Return False + + If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False + Return ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) + End Function + End Class + + ''' + ''' Gemeinsame Zielpfad-Policy für Materialisierung und Archiv-Extraktion. + ''' + Friend NotInheritable Class DestinationPathGuard + Private Sub New() + End Sub + + Friend Shared Function PrepareMaterializationTarget(destinationFull As String, overwrite As Boolean, + opt As FileTypeProjectOptions) As Boolean + If IsRootPath(destinationFull) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") + Return False + End If + + If File.Exists(destinationFull) Then + If Not overwrite Then Return False + File.Delete(destinationFull) + ElseIf Directory.Exists(destinationFull) Then + If Not overwrite Then Return False + Directory.Delete(destinationFull, recursive:=True) + End If + + Return True + End Function + + Friend Shared Function ValidateNewExtractionTarget(destinationFull As String, opt As FileTypeProjectOptions) _ + As Boolean + Dim parent As String + + If IsRootPath(destinationFull) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") + Return False + End If + + If File.Exists(destinationFull) OrElse Directory.Exists(destinationFull) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel existiert bereits.") + Return False + End If + + parent = Path.GetDirectoryName(destinationFull) + If String.IsNullOrWhiteSpace(parent) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel ohne gültigen Parent.") + Return False + End If + + Return True + End Function + + Friend Shared Function IsRootPath(destinationFull As String) As Boolean + Dim rootPath As String + + If String.IsNullOrWhiteSpace(destinationFull) Then Return False + + Try + rootPath = Path.GetPathRoot(destinationFull) + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is System.Security.SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + Return False + End Try + + If String.IsNullOrWhiteSpace(rootPath) Then Return False + + Return String.Equals( + destinationFull.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), + rootPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), + StringComparison.OrdinalIgnoreCase) + End Function + End Class + + ''' + ''' Gemeinsame Normalisierung für relative Archiv-Entry-Pfade. + ''' + Friend NotInheritable Class ArchiveEntryPathPolicy + Private Sub New() + End Sub + + Friend Shared Function TryNormalizeRelativePath( + rawPath As String, + allowDirectoryMarker As Boolean, + ByRef normalizedPath As String, + ByRef isDirectory As Boolean + ) As Boolean + Dim safe As String + Dim trimmed As String + Dim segments As String() + + normalizedPath = String.Empty + isDirectory = False + + safe = If(rawPath, String.Empty).Trim() + If safe.Length = 0 Then Return False + If safe.Contains(ChrW(0)) Then Return False + + safe = safe.Replace("\"c, "/"c) + If Path.IsPathRooted(safe) Then Return False + safe = safe.TrimStart("/"c) + If safe.Length = 0 Then Return False + + trimmed = safe.TrimEnd("/"c) + If trimmed.Length = 0 Then + If Not allowDirectoryMarker Then Return False + normalizedPath = safe + isDirectory = True + Return True + End If + + segments = trimmed.Split("/"c) + For Each seg In segments + If seg.Length = 0 Then Return False + If seg = "." OrElse seg = ".." Then Return False + Next + + If safe.Length <> trimmed.Length AndAlso Not allowDirectoryMarker Then + Return False + End If + + normalizedPath = If(allowDirectoryMarker, safe, trimmed) + isDirectory = allowDirectoryMarker AndAlso safe.Length <> trimmed.Length + Return True + End Function + End Class + ''' ''' Verfeinert ZIP-basierte Office-Container zu Dokumenttypen anhand kanonischer Paketmarker. ''' Implementationsprinzip: @@ -120,9 +371,9 @@ Namespace Global.Tomtastisch.FileClassifier Return FileTypeRegistry.Resolve(FileKind.Unknown) End If - If hasDocxMarker Then Return FileTypeRegistry.Resolve(FileKind.Doc) - If hasXlsxMarker Then Return FileTypeRegistry.Resolve(FileKind.Xls) - If hasPptxMarker Then Return FileTypeRegistry.Resolve(FileKind.Ppt) + If hasDocxMarker Then Return FileTypeRegistry.Resolve(FileKind.Docx) + If hasXlsxMarker Then Return FileTypeRegistry.Resolve(FileKind.Xlsx) + If hasPptxMarker Then Return FileTypeRegistry.Resolve(FileKind.Pptx) End If If hasOpenDocumentConflict Then @@ -167,12 +418,12 @@ Namespace Global.Tomtastisch.FileClassifier If String.IsNullOrWhiteSpace(mimeValue) Then Return FileKind.Unknown normalizedMime = mimeValue.Trim().ToLowerInvariant() - If normalizedMime = "application/vnd.oasis.opendocument.text" Then Return FileKind.Doc - If normalizedMime = "application/vnd.oasis.opendocument.text-template" Then Return FileKind.Doc - If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet" Then Return FileKind.Xls - If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet-template" Then Return FileKind.Xls - If normalizedMime = "application/vnd.oasis.opendocument.presentation" Then Return FileKind.Ppt - If normalizedMime = "application/vnd.oasis.opendocument.presentation-template" Then Return FileKind.Ppt + If normalizedMime = "application/vnd.oasis.opendocument.text" Then Return FileKind.Docx + If normalizedMime = "application/vnd.oasis.opendocument.text-template" Then Return FileKind.Docx + If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet" Then Return FileKind.Xlsx + If normalizedMime = "application/vnd.oasis.opendocument.spreadsheet-template" Then Return FileKind.Xlsx + If normalizedMime = "application/vnd.oasis.opendocument.presentation" Then Return FileKind.Pptx + If normalizedMime = "application/vnd.oasis.opendocument.presentation-template" Then Return FileKind.Pptx Return FileKind.Unknown End Function @@ -189,11 +440,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ZIP-Entry, der gelesen werden soll. ''' Maximal erlaubte Größe in Byte. ''' ASCII-Textinhalt oder leerer String bei Guard-/Fehlerpfad. - Private Shared Function ReadZipEntryText _ - ( - entry As ZipArchiveEntry, - maxBytes As Integer - ) As String + Private Shared Function ReadZipEntryText(entry As ZipArchiveEntry, maxBytes As Integer) As String Dim buffer As Byte() Dim readTotal As Integer Dim readCount As Integer @@ -275,7 +522,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Kompletter oder teilweiser OLE-Payload. ''' Gemappter Office-Typ oder . Friend Shared Function TryRefineBytes(data As Byte()) As FileType - If Not ByteArrayGuard.HasContent(data) Then Return FileTypeRegistry.Resolve(FileKind.Unknown) + If data Is Nothing OrElse data.Length = 0 Then Return FileTypeRegistry.Resolve(FileKind.Unknown) Try Return RefineByMarkers(data) @@ -298,11 +545,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Lesbarer Quellstream. ''' Maximale Probegröße; wird intern defensiv gekappt. ''' Gemappter Office-Typ oder . - Friend Shared Function TryRefineStream _ - ( - stream As Stream, - maxProbeBytes As Integer - ) As FileType + Friend Shared Function TryRefineStream(stream As Stream, maxProbeBytes As Integer) As FileType Dim probeLimit As Integer Dim chunk(4095) As Byte Dim readTotal As Integer @@ -368,9 +611,9 @@ Namespace Global.Tomtastisch.FileClassifier If hasPowerPoint Then markerCount += 1 If markerCount <> 1 Then Return FileTypeRegistry.Resolve(FileKind.Unknown) - If hasWord Then Return FileTypeRegistry.Resolve(FileKind.Doc) - If hasExcel Then Return FileTypeRegistry.Resolve(FileKind.Xls) - If hasPowerPoint Then Return FileTypeRegistry.Resolve(FileKind.Ppt) + If hasWord Then Return FileTypeRegistry.Resolve(FileKind.Docx) + If hasExcel Then Return FileTypeRegistry.Resolve(FileKind.Xlsx) + If hasPowerPoint Then Return FileTypeRegistry.Resolve(FileKind.Pptx) Return FileTypeRegistry.Resolve(FileKind.Unknown) End Function @@ -381,11 +624,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Quellpuffer. ''' Gesuchte Marker-Bytefolge. ''' True bei Treffer, sonst False. - Private Shared Function ContainsMarker _ - ( - data As Byte(), - marker As Byte() - ) As Boolean + Private Shared Function ContainsMarker(data As Byte(), marker As Byte()) As Boolean Dim i As Integer Dim j As Integer @@ -405,4 +644,54 @@ Namespace Global.Tomtastisch.FileClassifier End Function End Class + ''' + ''' Defensiver Logger-Schutz. + ''' Logging darf niemals zu Erkennungsfehlern oder Exceptions führen. + ''' + Friend NotInheritable Class LogGuard + Private Sub New() + End Sub + + Friend Shared Sub Debug(logger As ILogger, message As String) + If logger Is Nothing Then Return + If Not logger.IsEnabled(LogLevel.Debug) Then Return + Try + logger.LogDebug("{Message}", message) + Catch ex As Exception When _ + TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException OrElse + TypeOf ex Is FormatException OrElse + TypeOf ex Is ArgumentException + ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. + End Try + End Sub + + Friend Shared Sub Warn(logger As ILogger, message As String) + If logger Is Nothing Then Return + If Not logger.IsEnabled(LogLevel.Warning) Then Return + Try + logger.LogWarning("{Message}", message) + Catch ex As Exception When _ + TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException OrElse + TypeOf ex Is FormatException OrElse + TypeOf ex Is ArgumentException + ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. + End Try + End Sub + + Friend Shared Sub [Error](logger As ILogger, message As String, ex As Exception) + If logger Is Nothing Then Return + If Not logger.IsEnabled(LogLevel.Error) Then Return + Try + logger.LogError(ex, "{Message}", message) + Catch logEx As Exception When _ + TypeOf logEx Is InvalidOperationException OrElse + TypeOf logEx Is ObjectDisposedException OrElse + TypeOf logEx Is FormatException OrElse + TypeOf logEx Is ArgumentException + ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. + End Try + End Sub + End Class End Namespace diff --git a/src/FileTypeDetection/Infrastructure/MimeProvider.vb b/src/FileTypeDetection/Infrastructure/MimeProvider.vb index 5165ed87..1dfd55a3 100644 --- a/src/FileTypeDetection/Infrastructure/MimeProvider.vb +++ b/src/FileTypeDetection/Infrastructure/MimeProvider.vb @@ -29,14 +29,10 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Dateiendung mit oder ohne führenden Punkt. ''' Kanonischer MIME-Typ oder leerer String. - Friend Shared Function GetMime _ - ( - extWithDot As String - ) As String + Friend Shared Function GetMime(extWithDot As String) As String + Dim ext As String = extWithDot - Dim ext As String = If(String.IsNullOrWhiteSpace(extWithDot), String.Empty, extWithDot) - - If ext.Length = 0 Then Return String.Empty + If String.IsNullOrWhiteSpace(extWithDot) Then Return String.Empty If Not ext.StartsWith("."c) Then ext = "." & ext diff --git a/src/FileTypeDetection/Infrastructure/README.md b/src/FileTypeDetection/Infrastructure/README.md index 7fbc2a2c..5d8e3c47 100644 --- a/src/FileTypeDetection/Infrastructure/README.md +++ b/src/FileTypeDetection/Infrastructure/README.md @@ -4,9 +4,7 @@ Dieses Verzeichnis kapselt sicherheitskritische interne Ausführungslogik für Archive, Bounds, Guards und Extraktion. ## 2. Inhalt -- `CoreInternals.vb`: container-spezifische Verfeinerungslogik (OpenXML, Legacy-Office). -- `ArchiveInternals.vb`, `ArchiveManagedInternals.vb`, `MimeProvider.vb`. -- Untermodul `Utils/` als zentrale SSOT fuer interne Guards, Pfad-Policies, Logging und wiederverwendbare I/O-Helfer. +- `CoreInternals.vb`, `ArchiveInternals.vb`, `ArchiveManagedInternals.vb`, `MimeProvider.vb`. ## 3. API und Verhalten - Erzwingt fail-closed bei Traversal, Link-Entries, Größenlimits und ungültigen Archiven. @@ -25,6 +23,5 @@ flowchart LR ## 6. Verweise - [Modulübersicht](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) -- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) - [Policy CI](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/001_POLICY_CI.MD) diff --git a/src/FileTypeDetection/Infrastructure/Utils/EnumUtils.vb b/src/FileTypeDetection/Infrastructure/Utils/EnumUtils.vb deleted file mode 100644 index 579a069d..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/EnumUtils.vb +++ /dev/null @@ -1,252 +0,0 @@ -' ============================================================================ -' FILE: EnumUtils.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' -' Kontext: -' - Allgemeine Enum-Helfer (Java-ähnliches values()). -' - Liefert Enum-Werte als typisiertes Array, optional sortiert und optional als Index-Range. -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Utility-Funktionen für Enum-Typen (values()). - ''' - ''' - ''' - ''' Zweck: - ''' - Liefert Enum-Werte als typisiertes Array ohne LINQ. - ''' - Optional: Sortierung nach numerischem Enum-Wert. - ''' - Optional: Index-Range (from/to) mit deterministischem Clamping. - ''' - ''' - ''' Nicht-Ziele: - ''' - Keine zustandsbehaftete Logik. - ''' - Keine Abhängigkeiten auf Projektdienste (I/O, Logger, Policy-Engine). - ''' - Keine Reflection-Features außer Enum.GetValues(Type). - ''' - ''' - Friend NotInheritable Class EnumUtils - - Private Sub New() - End Sub - - - ' ===================================================================== - ' Internal (Projekt-intern): Sortier-Optionen - ' ===================================================================== - - Friend Enum EnumSortOrder - None = 0 - Ascending = 1 - Descending = 2 - End Enum - - - ' ===================================================================== - ' Public API - ' ===================================================================== - - ''' - ''' Liefert alle Werte eines Enum-Typs als typisiertes Array. - ''' - ''' - ''' - ''' Ablaufstruktur: - ''' 1) Werte werden über Enum.GetValues(Type) geladen, - ''' 2) Ausgabe erfolgt als typisiertes Array TEnum(). - ''' - ''' - ''' Hinweis: - ''' - Diese öffentliche Überladung liefert bewusst die gesamte Menge ohne Sortierung und ohne Range. - ''' - Erweiterte Optionen (Sortierung/Range) sind projektintern gehalten. - ''' - ''' - ''' Enum-Typ. - ''' Enum-Werte als Array. - ''' Wird ausgelöst, wenn kein Enum ist. - ''' - ''' - ''' ' Beispiel: alle Werte ohne Sortierung/Range - ''' Dim values() As ExampleSlot = EnumUtils.GetValues(Of ExampleSlot)() - ''' For Each v As ExampleSlot In values - ''' Console.WriteLine(v) - ''' Next - ''' - ''' - Public Shared Function GetValues(Of TEnum As Structure)() As TEnum() - - Return GetValues(Of TEnum)( - EnumSortOrder.None, fromIndex:=Nothing, toIndex:=Nothing - ) - End Function - - - ' ===================================================================== - ' Internal API (Projekt-intern): Sortierung + Range - ' ===================================================================== - - ''' - ''' Liefert Enum-Werte als typisiertes Array (optional sortiert) und optional als Index-Range. - ''' - ''' - ''' - ''' Ablaufstruktur: - ''' 1) Werte werden über Enum.GetValues(Type) geladen, - ''' 2) optional: Sortierung nach numerischem Enum-Wert, - ''' 3) Range wird deterministisch geklemmt, - ''' 4) Ausgabe erfolgt als Slice über Array.Copy(values, from, result, 0, length). - ''' - ''' - ''' Range-Semantik (0-basiert, inklusive): - ''' - Keine Angabe: gesamte Menge. - ''' - Nur : von bis letztes Element. - ''' - Nur : von 0 bis . - ''' - Beide: von bis . - ''' - ''' - ''' Clamping-Regeln (deterministisch): - ''' 1) wird zuerst geklemmt: - ''' - < 0 => 0 - ''' - > max => max - ''' - Nothing => max - ''' 2) wird danach geklemmt: - ''' - Nothing => 0 - ''' - < 0 => 0 - ''' - > max2 => max2 - ''' wobei max2 = geklemmter (falls gesetzt), sonst max. - ''' - ''' - ''' Fail-Closed: - ''' - Ist kein Enum, wird eine ausgelöst. - ''' - Leere Enums liefern ein leeres Array. - ''' - ''' - ''' Enum-Typ. - ''' - ''' Sortierreihenfolge: - ''' - : keine Sortierung (Originalreihenfolge von Enum.GetValues(Type)). - ''' - : aufsteigend nach numerischem Enum-Wert. - ''' - : absteigend nach numerischem Enum-Wert. - ''' - ''' Startindex (0-basiert, inklusive); Nothing bedeutet 0. - ''' Endindex (0-basiert, inklusive); Nothing bedeutet max. - ''' Enum-Werte als Array (ggf. sortiert und/oder gefiltert). - ''' Wird ausgelöst, wenn kein Enum ist. - ''' - ''' - ''' ' Aufsteigend sortiert, dann Slice der Indizes 1..3 (inklusive) - ''' Dim slice() As ExampleSlot = EnumUtils.GetValues(Of ExampleSlot)( - ''' sortOrder:=EnumUtils.EnumSortOrder.Ascending, - ''' fromIndex:=1, - ''' toIndex:=3 - ''' ) - ''' - ''' - Friend Shared Function GetValues(Of TEnum As Structure) _ - ( - Optional sortOrder As EnumSortOrder = EnumSortOrder.None, - Optional fromIndex As Nullable(Of Integer) = Nothing, - Optional toIndex As Nullable(Of Integer) = Nothing - ) As TEnum() - - ' Deklarationsblock - Dim enumType As Type = GetType(TEnum) - Dim raw As Array - - Dim values() As TEnum - Dim keys() As Long - - Dim i As Integer - Dim count As Integer - Dim maxIndex As Integer - - Dim effectiveTo As Integer - Dim effectiveMaxFrom As Integer - Dim effectiveFrom As Integer - - Dim length As Integer - Dim result() As TEnum - - - ' ----------------------------------------------------------------- - ' Guard-Clauses - ' ----------------------------------------------------------------- - If Not enumType.IsEnum Then - Throw New ArgumentException("TEnum muss ein Enum-Typ sein.", NameOf(TEnum)) - End If - - - ' ----------------------------------------------------------------- - ' Werte laden - ' ----------------------------------------------------------------- - raw = [Enum].GetValues(enumType) - - count = raw.Length - If count = 0 Then - Return New TEnum() {} - End If - - maxIndex = count - 1 - - values = New TEnum(count - 1) {} - For i = 0 To count - 1 - values(i) = CType(raw.GetValue(i), TEnum) - Next - - - ' ----------------------------------------------------------------- - ' Optional: Sortierung (Keys nur bei Bedarf) - ' ----------------------------------------------------------------- - If sortOrder <> EnumSortOrder.None Then - - keys = New Long(count - 1) {} - For i = 0 To count - 1 - keys(i) = Convert.ToInt64(values(i)) - Next - - Array.Sort(keys, values) - - If sortOrder = EnumSortOrder.Descending Then - Array.Reverse(values) - End If - - End If - - - ' ----------------------------------------------------------------- - ' Range: toIndex zuerst clampen, dann fromIndex - ' ----------------------------------------------------------------- - effectiveTo = If(toIndex.HasValue, toIndex.Value, maxIndex) - effectiveTo = Math.Min(Math.Max(effectiveTo, 0), maxIndex) - - effectiveMaxFrom = If(toIndex.HasValue, effectiveTo, maxIndex) - - effectiveFrom = If(fromIndex.HasValue, fromIndex.Value, 0) - effectiveFrom = Math.Min(Math.Max(effectiveFrom, 0), effectiveMaxFrom) - - - ' ----------------------------------------------------------------- - ' Slice kopieren (inklusive) - ' ----------------------------------------------------------------- - length = (effectiveTo - effectiveFrom) + 1 - result = New TEnum(length - 1) {} - - Array.Copy(values, effectiveFrom, result, 0, length) - - Return result - - End Function - - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb deleted file mode 100644 index 67e40fcf..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb +++ /dev/null @@ -1,231 +0,0 @@ -' ============================================================================ -' FILE: ArchiveGuards.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System -Imports System.IO -Imports Tomtastisch.FileClassifier - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Zentrale Byte-Array-Guards für konsistente Null-/Leer-Prüfungen. - ''' - Friend NotInheritable Class ByteArrayGuard - Private Sub New() - End Sub - - Friend Shared Function HasContent(data As Byte()) As Boolean - - Return data IsNot Nothing AndAlso data.Length > 0 - End Function - End Class - - ''' - ''' Sicherheits-Gate für Archive-Container. - ''' - Friend NotInheritable Class ArchiveSafetyGate - Private Sub New() - End Sub - - Friend Shared Function IsArchiveSafeBytes _ - ( - data As Byte(), - opt As FileTypeProjectOptions, - descriptor As ArchiveDescriptor - ) As Boolean - - If Not ByteArrayGuard.HasContent(data) Then Return False - If opt Is Nothing Then Return False - If descriptor Is Nothing OrElse - descriptor.ContainerType = ArchiveContainerType.Unknown Then Return False - - Try - Using ms As New MemoryStream(data, writable:=False) - Return IsArchiveSafeStream(ms, opt, descriptor, depth:=0) - End Using - Catch ex As Exception When ExceptionFilterGuard.IsArchiveValidationException(ex) - LogGuard.Debug(opt.Logger, $"[ArchiveGate] Bytes-Fehler: {ex.Message}") - Return False - End Try - End Function - - Friend Shared Function IsArchiveSafeStream _ - ( - stream As Stream, - opt As FileTypeProjectOptions, - descriptor As ArchiveDescriptor, - depth As Integer - ) As Boolean - - If Not StreamGuard.IsReadable(stream) Then Return False - If opt Is Nothing Then Return False - Return ArchiveProcessingEngine.ValidateArchiveStream(stream, opt, depth, descriptor) - End Function - End Class - - ''' - ''' Gemeinsame Guards für signaturbasierte Archiv-Byte-Payloads. - ''' - Friend NotInheritable Class ArchiveSignaturePayloadGuard - Private Sub New() - End Sub - - Friend Shared Function IsArchiveSignatureCandidate _ - ( - data As Byte() - ) As Boolean - - If Not ByteArrayGuard.HasContent(data) Then Return False - Return FileTypeRegistry.DetectByMagic(data) = FileKind.Zip - End Function - End Class - - ''' - ''' Gemeinsame Policy-Prüfung für Link-Entries in Archiven. - ''' - Friend NotInheritable Class ArchiveLinkGuard - Private Sub New() - End Sub - - Friend Shared Function IsRejectedLink _ - ( - opt As FileTypeProjectOptions, - linkTarget As String, - logPrefix As String, - logWhenRejected As Boolean - ) As Boolean - - If opt Is Nothing Then Return True - - If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(linkTarget) Then - If logWhenRejected Then - LogGuard.Warn(opt.Logger, $"{logPrefix} Link-Entry ist nicht erlaubt.") - End If - - Return True - End If - - Return False - End Function - End Class - - ''' - ''' Gemeinsame Guards für beliebige Archive-Byte-Payloads. - ''' - Friend NotInheritable Class ArchivePayloadGuard - Private Sub New() - End Sub - - Friend Shared Function IsSafeArchivePayload _ - ( - data As Byte(), - opt As FileTypeProjectOptions - ) As Boolean - - Dim descriptor As ArchiveDescriptor = ArchiveDescriptor.UnknownDescriptor() - - Return TryDescribeSafeArchivePayload(data, opt, descriptor) - End Function - - Friend Shared Function TryDescribeSafeArchivePayload _ - ( - data As Byte(), - opt As FileTypeProjectOptions, - ByRef descriptor As ArchiveDescriptor - ) As Boolean - - descriptor = ArchiveDescriptor.UnknownDescriptor() - - If Not ByteArrayGuard.HasContent(data) Then Return False - If opt Is Nothing Then Return False - If CLng(data.Length) > opt.MaxBytes Then Return False - - If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False - Return ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) - End Function - - End Class - - ''' - ''' Gemeinsame Normalisierung für relative Archiv-Entry-Pfade. - ''' - Friend NotInheritable Class ArchiveEntryPathPolicy - Private Sub New() - End Sub - - Friend Shared Function TryNormalizeRelativePath _ - ( - rawPath As String, - allowDirectoryMarker As Boolean, - ByRef normalizedPath As String, - ByRef isDirectory As Boolean - ) As Boolean - - Dim safe As String = String.Empty - Dim trimmed As String - - normalizedPath = String.Empty - isDirectory = False - - If Not TryPrepareRelativePath(rawPath, safe) Then Return False - - trimmed = safe.TrimEnd("/"c) - If trimmed.Length = 0 Then - If Not allowDirectoryMarker Then Return False - normalizedPath = safe - isDirectory = True - Return True - End If - - If Not HasOnlyAllowedPathSegments(trimmed) Then Return False - - If safe.Length <> trimmed.Length AndAlso Not allowDirectoryMarker Then - Return False - End If - - normalizedPath = If(allowDirectoryMarker, safe, trimmed) - isDirectory = allowDirectoryMarker AndAlso safe.Length <> trimmed.Length - Return True - End Function - - Private Shared Function TryPrepareRelativePath _ - ( - rawPath As String, - ByRef preparedPath As String - ) As Boolean - - preparedPath = If(rawPath, String.Empty).Trim() - If preparedPath.Length = 0 Then Return False - If preparedPath.Contains(ChrW(0)) Then Return False - If Path.IsPathRooted(preparedPath) Then Return False - - preparedPath = preparedPath.Replace("\"c, "/"c).TrimStart("/"c) - If preparedPath.Length = 0 Then Return False - - Return True - End Function - - Private Shared Function HasOnlyAllowedPathSegments(pathValue As String) As Boolean - - Dim segments As String() - - segments = pathValue.Split("/"c) - For Each seg In segments - If seg.Length = 0 Then Return False - If seg = "." OrElse seg = ".." Then Return False - Next - - Return True - End Function - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb deleted file mode 100644 index 2fd111ac..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb +++ /dev/null @@ -1,163 +0,0 @@ -' ============================================================================ -' FILE: ArgumentGuard.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' -' Kontext: -' - Minimale Guard-Utilities für Argumentprüfung (fail-closed via Exceptions). -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Utility-Funktionen für Guard-Clauses (Argumentprüfung). - ''' - ''' - ''' - ''' Zweck: - ''' - Zentralisierte, konsistente Argumentprüfungen. - ''' - Reduziert Boilerplate in Konstruktoren und Public APIs. - ''' - ''' - ''' Fail-Closed: - ''' - Bei Verstoß wird eine passende Exception ausgelöst (ArgumentNull/Argument/ArgumentOutOfRange). - ''' - Keine stillen Korrekturen, keine Side-Effects. - ''' - ''' - Friend NotInheritable Class ArgumentGuard - - Private Sub New() - End Sub - - - ' ===================================================================== - ' Public API (Shared; Utility, stateless) - ' ===================================================================== - - ''' - ''' Erzwingt, dass nicht Nothing ist. - ''' - ''' - ''' - ''' Ablaufstruktur: - ''' 1) Nullprüfung, - ''' 2) bei Nothing: . - ''' - ''' - ''' Beliebiger Referenztyp. - ''' Zu prüfender Wert. - ''' Parametername für Exception-Metadaten. - ''' Wird ausgelöst, wenn Nothing ist. - Public Shared Sub NotNothing(Of T) _ - ( - value As T, - paramName As String - ) - - ' Deklarationsblock - Dim isNull As Boolean - - isNull = (value Is Nothing) - If isNull Then - Throw New ArgumentNullException(paramName) - End If - - End Sub - - ''' - ''' Erzwingt, dass nicht Nothing ist und die erwartete Länge hat. - ''' - ''' - ''' - ''' Ablaufstruktur: - ''' 1) Nullprüfung, - ''' 2) Längenprüfung, - ''' 3) bei Abweichung: mit Erwartungs-/Istwert. - ''' - ''' - ''' Array, das geprüft werden soll. - ''' Erwartete Länge. - ''' Parametername für Exception-Metadaten. - ''' Wird ausgelöst, wenn Nothing ist. - ''' Wird ausgelöst, wenn die Länge nicht entspricht. - Public Shared Sub RequireLength _ - ( - value As Array, - expectedLength As Integer, - paramName As String - ) - - ' Deklarationsblock - Dim actualLength As Integer - - If value Is Nothing Then - Throw New ArgumentNullException(paramName) - End If - - actualLength = value.Length - If actualLength <> expectedLength Then - Throw New ArgumentException( - $"Expected length {expectedLength}, but was {actualLength}.", - paramName - ) - End If - - End Sub - - ''' - ''' Erzwingt, dass als Wert in definiert ist. - ''' - ''' - ''' - ''' Ablaufstruktur: - ''' 1) Prüft auf Nothing und Enum-Typ, - ''' 2) prüft Definition via Enum.IsDefined(enumType, value), - ''' 3) bei Verstoß: . - ''' - ''' - ''' Hinweis: - ''' - Diese Guard-Funktion ist bewusst untyped, um auch Validierung bei dynamischen Enum-Zugriffen abzudecken. - ''' - ''' - ''' Enum-Typ, gegen den geprüft wird. - ''' Zu prüfender Enum-Wert (boxed). - ''' Parametername für Exception-Metadaten. - ''' Wird ausgelöst, wenn Nothing ist. - ''' Wird ausgelöst, wenn kein Enum ist. - ''' Wird ausgelöst, wenn nicht definiert ist. - Public Shared Sub EnumDefined _ - ( - enumType As Type, - value As Object, - paramName As String - ) - - ' Deklarationsblock - Dim isEnumValueDefined As Boolean - - If enumType Is Nothing Then - Throw New ArgumentNullException(NameOf(enumType)) - End If - - If Not enumType.IsEnum Then - Throw New ArgumentException("enumType muss ein Enum-Typ sein.", NameOf(enumType)) - End If - - isEnumValueDefined = [Enum].IsDefined(enumType, value) - If Not isEnumValueDefined Then - Throw New ArgumentOutOfRangeException(paramName) - End If - - End Sub - - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb deleted file mode 100644 index 838526a9..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb +++ /dev/null @@ -1,182 +0,0 @@ -' ============================================================================ -' FILE: DestinationPathGuard.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System -Imports System.IO -Imports Tomtastisch.FileClassifier - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Gemeinsame Zielpfad-Policy für Materialisierung und Archiv-Extraktion. - ''' - Friend Interface IDestinationPathPolicy - Function PrepareMaterializationTarget _ - ( - destinationFull As String, - overwrite As Boolean, - opt As FileTypeProjectOptions - ) As Boolean - - Function ValidateNewExtractionTarget _ - ( - destinationFull As String, - opt As FileTypeProjectOptions - ) As Boolean - - Function IsRootPath _ - ( - destinationFull As String - ) As Boolean - End Interface - - ''' - ''' Standardimplementierung der internen Zielpfad-Policy. - ''' - Friend NotInheritable Class DefaultDestinationPathPolicy - Implements IDestinationPathPolicy - - Friend Shared ReadOnly Instance As IDestinationPathPolicy = _ - New DefaultDestinationPathPolicy() - - Private Sub New() - End Sub - - Public Function PrepareMaterializationTarget _ - ( - destinationFull As String, - overwrite As Boolean, - opt As FileTypeProjectOptions - ) As Boolean Implements IDestinationPathPolicy.PrepareMaterializationTarget - - If IsRootPath(destinationFull) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") - Return False - End If - - If Not TryDeleteExistingTarget(destinationFull, overwrite) Then Return False - - Return True - End Function - - Public Function ValidateNewExtractionTarget _ - ( - destinationFull As String, - opt As FileTypeProjectOptions - ) As Boolean Implements IDestinationPathPolicy.ValidateNewExtractionTarget - - Dim parent As String - - If IsRootPath(destinationFull) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") - Return False - End If - - If File.Exists(destinationFull) OrElse Directory.Exists(destinationFull) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel existiert bereits.") - Return False - End If - - parent = Path.GetDirectoryName(destinationFull) - If String.IsNullOrWhiteSpace(parent) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel ohne gültigen Parent.") - Return False - End If - - Return True - End Function - - Public Function IsRootPath _ - ( - destinationFull As String - ) As Boolean Implements IDestinationPathPolicy.IsRootPath - - Dim rootPath As String - - If String.IsNullOrWhiteSpace(destinationFull) Then Return False - - Try - rootPath = Path.GetPathRoot(destinationFull) - Catch ex As Exception When ExceptionFilterGuard.IsPathNormalizationException(ex) - Return False - End Try - - If String.IsNullOrWhiteSpace(rootPath) Then Return False - - Return String.Equals( - destinationFull.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), - rootPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), - StringComparison.OrdinalIgnoreCase) - End Function - - Private Shared Function TryDeleteExistingTarget _ - ( - destinationFull As String, - overwrite As Boolean - ) As Boolean - - Dim existsAsFile As Boolean - Dim existsAsDirectory As Boolean - - existsAsFile = File.Exists(destinationFull) - existsAsDirectory = Directory.Exists(destinationFull) - - If Not existsAsFile AndAlso Not existsAsDirectory Then Return True - If Not overwrite Then Return False - - If existsAsFile Then - File.Delete(destinationFull) - Return True - End If - - Directory.Delete(destinationFull, recursive:=True) - Return True - End Function - End Class - - Friend NotInheritable Class DestinationPathGuard - Private Shared ReadOnly Policy As IDestinationPathPolicy = _ - DefaultDestinationPathPolicy.Instance - - Private Sub New() - End Sub - - Friend Shared Function PrepareMaterializationTarget _ - ( - destinationFull As String, - overwrite As Boolean, - opt As FileTypeProjectOptions - ) As Boolean - - Return Policy.PrepareMaterializationTarget(destinationFull, overwrite, opt) - End Function - - Friend Shared Function ValidateNewExtractionTarget _ - ( - destinationFull As String, - opt As FileTypeProjectOptions - ) _ - As Boolean - - Return Policy.ValidateNewExtractionTarget(destinationFull, opt) - End Function - - Friend Shared Function IsRootPath _ - ( - destinationFull As String - ) As Boolean - - Return Policy.IsRootPath(destinationFull) - End Function - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb deleted file mode 100644 index 5f0017d9..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb +++ /dev/null @@ -1,65 +0,0 @@ -' ============================================================================ -' FILE: ExceptionFilterGuard.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System -Imports System.IO - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' SSOT für wiederkehrende Exception-Filter in Guard-Klassen. - ''' - ''' - ''' Diese Utility kapselt Catch-Filter-Sets deterministisch, um - ''' Duplikate zu vermeiden und die Filter-Semantik zentral auditierbar zu halten. - ''' - Friend NotInheritable Class ExceptionFilterGuard - Private Sub New() - End Sub - - Friend Shared Function IsArchiveValidationException(ex As Exception) As Boolean - - Return TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is InvalidDataException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException OrElse - TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException - End Function - - Friend Shared Function IsPathNormalizationException(ex As Exception) As Boolean - - Return TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - End Function - - Friend Shared Function IsPathResolutionException(ex As Exception) As Boolean - - Return IsPathNormalizationException(ex) OrElse - TypeOf ex Is PathTooLongException - End Function - - Friend Shared Function IsLoggerWriteException(ex As Exception) As Boolean - - Return TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException OrElse - TypeOf ex Is FormatException OrElse - TypeOf ex Is ArgumentException - End Function - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb deleted file mode 100644 index de327946..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb +++ /dev/null @@ -1,85 +0,0 @@ -' ============================================================================ -' FILE: IOGuards.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System.IO - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Interne Hilfsklasse InternalIoDefaults zur kapselnden Umsetzung von Guard-, I/O- und Policy-Logik. - ''' - Friend NotInheritable Class InternalIoDefaults - Friend Const CopyBufferSize As Integer = 8192 - Friend Const FileStreamBufferSize As Integer = 81920 - Friend Const DefaultSniffBytes As Integer = 4096 - - Private Sub New() - End Sub - End Class - - ''' - ''' Zentrale IO-Helfer für harte Grenzen. - ''' SSOT-Regel: bounded copy wird nur hier gepflegt. - ''' - Friend NotInheritable Class StreamBounds - Private Sub New() - End Sub - - Friend Shared Sub CopyBounded _ - ( - input As Stream, - output As Stream, - maxBytes As Long - ) - - Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte - Dim total As Long = 0 - Dim n As Integer - - While True - n = input.Read(buf, 0, buf.Length) - If n <= 0 Then Exit While - - total += n - If total > maxBytes Then Throw New InvalidOperationException("bounded copy exceeded") - output.Write(buf, 0, n) - End While - End Sub - End Class - - ''' - ''' Kleine, zentrale Stream-Guards, um duplizierte Pattern-Checks in Archivroutinen zu reduzieren. - ''' Keine Semantik: reine Abfrage/Positionierung. - ''' - Friend NotInheritable Class StreamGuard - Private Sub New() - End Sub - - Friend Shared Function IsReadable _ - ( - stream As Stream - ) As Boolean - - Return stream IsNot Nothing AndAlso stream.CanRead - End Function - - Friend Shared Sub RewindToStart _ - ( - stream As Stream - ) - - If stream Is Nothing Then Return - If stream.CanSeek Then stream.Position = 0 - End Sub - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb deleted file mode 100644 index c4e4a5e4..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb +++ /dev/null @@ -1,73 +0,0 @@ -' ============================================================================ -' FILE: LogGuard.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System -Imports Microsoft.Extensions.Logging - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Defensiver Logger-Schutz. - ''' Logging darf niemals zu Erkennungsfehlern oder Exceptions führen. - ''' - Friend NotInheritable Class LogGuard - Private Sub New() - End Sub - - Friend Shared Sub Debug _ - ( - logger As ILogger, - message As String - ) - - If logger Is Nothing Then Return - If Not logger.IsEnabled(LogLevel.Debug) Then Return - Try - logger.LogDebug("{Message}", message) - Catch ex As Exception When ExceptionFilterGuard.IsLoggerWriteException(ex) - ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. - End Try - End Sub - - Friend Shared Sub Warn _ - ( - logger As ILogger, - message As String - ) - - If logger Is Nothing Then Return - If Not logger.IsEnabled(LogLevel.Warning) Then Return - Try - logger.LogWarning("{Message}", message) - Catch ex As Exception When ExceptionFilterGuard.IsLoggerWriteException(ex) - ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. - End Try - End Sub - - Friend Shared Sub [Error] _ - ( - logger As ILogger, - message As String, - ex As Exception - ) - - If logger Is Nothing Then Return - If Not logger.IsEnabled(LogLevel.Error) Then Return - Try - logger.LogError(ex, "{Message}", message) - Catch logEx As Exception When ExceptionFilterGuard.IsLoggerWriteException(logEx) - ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. - End Try - End Sub - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb deleted file mode 100644 index 105d67ec..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb +++ /dev/null @@ -1,58 +0,0 @@ -' ============================================================================ -' FILE: PathResolutionGuard.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' ============================================================================ - -Option Strict On -Option Explicit On - -Imports System -Imports System.IO -Imports Tomtastisch.FileClassifier - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Zentrale FullPath-Auflösung mit fail-closed Fehlerbehandlung und konfigurierbarer Protokollstufe. - ''' - Friend NotInheritable Class PathResolutionGuard - Private Sub New() - End Sub - - Friend Shared Function TryGetFullPath _ - ( - rawPath As String, - opt As FileTypeProjectOptions, - logPrefix As String, - warnLevel As Boolean, - ByRef fullPath As String - ) As Boolean - - Dim message As String - - fullPath = String.Empty - - Try - fullPath = Path.GetFullPath(rawPath) - Return True - Catch ex As Exception When ExceptionFilterGuard.IsPathResolutionException(ex) - - If opt IsNot Nothing Then - message = $"{logPrefix}: {ex.Message}" - If warnLevel Then - LogGuard.Warn(opt.Logger, message) - Else - LogGuard.Debug(opt.Logger, message) - End If - End If - - Return False - End Try - End Function - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md b/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md deleted file mode 100644 index eafecb0a..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Infrastructure.Utils.Guards Modul - -## 1. Zweck -Dieses Untermodul kapselt ausschliesslich Guard- und Policy-Utilities mit fail-closed Verhalten. - -## 2. Inhalt -- `ArgumentGuard.vb`: deterministische Argument- und Enum-Validierung. -- `IOGuards.vb`: Stream-Lesbarkeit, Rewind und bounded Copy. -- `ArchiveGuards.vb`: Archive-Payload-, Link- und Entry-Path-Guards. -- `PathResolutionGuard.vb`: sichere FullPath-Aufloesung mit kontrollierter Protokollierung. -- `DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung/Extraktion. -- `LogGuard.vb`: defensives Logging ohne Rekursion. -- `ExceptionFilterGuard.vb`: zentrale Catch-Filter-Sets als SSOT fuer redundanzfreie Exception-Guards. - -## 3. API und Verhalten -- Alle Klassen sind stateless und deterministisch. -- Fehlerpfade sind fail-closed und liefern klare Rueckgaben. -- Wiederkehrende Exception-Filter werden zentral ueber `ExceptionFilterGuard` gepflegt. - -## 4. Verifikation -- Nutzung erfolgt in `FileMaterializer`, `ArchiveInternals`, `CoreInternals` und Hashing-Komponenten. - -## 5. Diagramm -```mermaid -flowchart LR - A["Call Site"] --> B["Utils/Guards"] - B --> C["Guard Decision"] - C --> D["Fail-Closed Output"] -``` - -## 6. Verweise -- [Utils-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) -- [Infrastructure-Modul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) -- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/src/FileTypeDetection/Infrastructure/Utils/IterableUtils.vb b/src/FileTypeDetection/Infrastructure/Utils/IterableUtils.vb deleted file mode 100644 index bc293861..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/IterableUtils.vb +++ /dev/null @@ -1,77 +0,0 @@ -' ============================================================================ -' FILE: IterableUtils.vb -' -' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) -' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD -' - Try/Catch konsistent im Catch-Filter-Schema -' - Variablen im Deklarationsblock, spaltenartig ausgerichtet -' -' Kontext: -' - Minimale Array-/Iterable-Helfer (Defensive Copies). -' ============================================================================ - -Option Strict On -Option Explicit On - -Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils - - ''' - ''' Utility-Funktionen für defensive Kopien (Array-basierte Rückgaben). - ''' - ''' - ''' - ''' Zweck: - ''' - Verhindert, dass interne Arrays über Public API als Referenz nach außen geleakt werden. - ''' - Unterstützt defensive Copies bei Rückgaben und Snapshots. - ''' - ''' - ''' Fail-Closed: - ''' - Nothing bleibt Nothing; es findet keine implizite Erzeugung leerer Arrays statt. - ''' - ''' - Friend NotInheritable Class IterableUtils - - Private Sub New() - End Sub - - - ' ===================================================================== - ' Public API (Shared; Utility, stateless) - ' ===================================================================== - - ''' - ''' Erstellt eine defensive Kopie von . - ''' - ''' - ''' - ''' Ablaufstruktur: - ''' 1) Nullprüfung, - ''' 2) Kopie via (shallow copy), - ''' 3) Rückgabe als typisiertes Array. - ''' - ''' - ''' Hinweis: - ''' - Bei Referenztypen werden die Referenzen kopiert (shallow copy), nicht die Objekte selbst. - ''' - ''' - ''' Elementtyp. - ''' Quelle; Nothing bleibt Nothing. - ''' Defensive Kopie oder Nothing. - Public Shared Function CloneArray(Of T) _ - ( - source As T() - ) As T() - - ' Deklarationsblock - Dim copy() As T - - If source Is Nothing Then Return Nothing - - copy = CType(source.Clone(), T()) - Return copy - - End Function - - End Class - -End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/README.md b/src/FileTypeDetection/Infrastructure/Utils/README.md deleted file mode 100644 index 275635a3..00000000 --- a/src/FileTypeDetection/Infrastructure/Utils/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Infrastructure.Utils Modul - -## 1. Zweck -Dieses Verzeichnis enthaelt die zentrale SSOT-Schicht fuer interne, wiederverwendbare Utility-Helfer. -Der Fokus liegt auf deterministischen Guards, sicherer Pfad-/Archive-Validierung, defensiver I/O-Hilfe und policy-konformem Logging. - -## 2. Inhalt -- `EnumUtils.vb`: deterministische Enum-Wertauflistung mit optionaler Sortierung und Range (kein Guard). -- `IterableUtils.vb`: defensive Array-Kopien fuer sichere Rueckgaben (kein Guard). -- `Guards/ArgumentGuard.vb`: Argument-Guards fuer Null-, Enum- und Laengenpruefungen. -- `Guards/IOGuards.vb`: zentrale Stream-/Buffer-Helfer (`StreamGuard`, `StreamBounds`, `InternalIoDefaults`). -- `Guards/ArchiveGuards.vb`: Archive-spezifische Guards und Entry-Pfadnormalisierung. -- `Guards/PathResolutionGuard.vb`: fail-closed FullPath-Aufloesung mit kontrollierter Protokollierung. -- `Guards/DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung und Extraktion. -- `Guards/LogGuard.vb`: defensiver Logger-Schutz ohne Rekursion/Seiteneffekte. -- `Guards/ExceptionFilterGuard.vb`: zentrale Catch-Filter-SSOT fuer wiederkehrende Exception-Mengen. - -## 3. API und Verhalten -- Utilities sind stateless und deterministisch. -- Fehlerpfade sind fail-closed und liefern definierte Rueckgaben oder klar typisierte Exceptions. -- Utility-Klassen sind standardmaessig intern (`Friend`) und kapseln wiederholte Sicherheits-/Validierungsmuster. - -## 4. Verifikation -- Nutzung erfolgt in Core-/Infrastructure-/Abstraction-Typen. -- Korrektheit und Verhaltenstreue werden ueber Build-, Unit- und Contract-Tests abgesichert. - -## 5. Diagramm -```mermaid -flowchart LR - A["Call Site"] --> B["Infrastructure.Utils (SSOT)"] - B --> C["Deterministic Guard / IO / Path Decision"] - C --> D["Fail-Closed Result"] -``` - -## 6. Verweise -- [Modul-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) -- [Infrastructure-Modul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) -- [Guards-Cluster](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md) -- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb b/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb index 3f0216d9..bec15d0a 100644 --- a/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb +++ b/src/FileTypeDetection/Providers/Net8_0Plus/HashPrimitivesProvider.vb @@ -63,11 +63,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Zu kodierende Eingabedaten; Nothing wird als leeres Array behandelt. ''' Hex-String in Kleinbuchstaben ohne Trennzeichen. - Public Function EncodeLowerHex _ - ( - data As Byte() - ) As String Implements IHexCodec.EncodeLowerHex - + Public Function EncodeLowerHex(data As Byte()) As String Implements IHexCodec.EncodeLowerHex Dim safeData = If(data, Array.Empty(Of Byte)()) Return Convert.ToHexString(safeData).ToLowerInvariant() End Function @@ -97,11 +93,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Byte-Array. - Public Function ComputeHash _ - ( - data As Byte() - ) As Byte() Implements ISha256Primitives.ComputeHash - + Public Function ComputeHash(data As Byte()) As Byte() Implements ISha256Primitives.ComputeHash Dim safeData = If(data, Array.Empty(Of Byte)()) Return Security.Cryptography.SHA256.HashData(safeData) End Function @@ -111,11 +103,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex _ - ( - data As Byte() - ) As String Implements ISha256Primitives.ComputeHashHex - + Public Function ComputeHashHex(data As Byte()) As String Implements ISha256Primitives.ComputeHashHex Return _codec.EncodeLowerHex(ComputeHash(data)) End Function End Class @@ -134,11 +122,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' Fasthash als . - Public Function ComputeHashUInt64 _ - ( - data As Byte() - ) As ULong Implements IFastHash64.ComputeHashUInt64 - + Public Function ComputeHashUInt64(data As Byte()) As ULong Implements IFastHash64.ComputeHashUInt64 Dim safeData = If(data, Array.Empty(Of Byte)()) Return IO.Hashing.XxHash3.HashToUInt64(safeData) End Function @@ -148,11 +132,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' 16-stelliger Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex _ - ( - data As Byte() - ) As String Implements IFastHash64.ComputeHashHex - + Public Function ComputeHashHex(data As Byte()) As String Implements IFastHash64.ComputeHashHex Return ComputeHashUInt64(data).ToString("x16", CultureInfo.InvariantCulture) End Function End Class diff --git a/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb b/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb index deae05f3..31abe3a2 100644 --- a/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb +++ b/src/FileTypeDetection/Providers/NetStandard2_0/HashPrimitivesProvider.vb @@ -65,11 +65,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Zu kodierende Eingabedaten; Nothing wird als leeres Array behandelt. ''' Hex-String in Kleinbuchstaben ohne Trennzeichen. - Public Function EncodeLowerHex _ - ( - data As Byte() - ) As String Implements IHexCodec.EncodeLowerHex - + Public Function EncodeLowerHex(data As Byte()) As String Implements IHexCodec.EncodeLowerHex Dim safeData = If(data, Array.Empty(Of Byte)()) Dim chars As Char() Dim index As Integer = 0 @@ -111,11 +107,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Byte-Array. - Public Function ComputeHash _ - ( - data As Byte() - ) As Byte() Implements ISha256Primitives.ComputeHash - + Public Function ComputeHash(data As Byte()) As Byte() Implements ISha256Primitives.ComputeHash Dim safeData = If(data, Array.Empty(Of Byte)()) Using sha As Security.Cryptography.SHA256 = Security.Cryptography.SHA256.Create() Return sha.ComputeHash(safeData) @@ -127,11 +119,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' SHA256-Digest als Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex _ - ( - data As Byte() - ) As String Implements ISha256Primitives.ComputeHashHex - + Public Function ComputeHashHex(data As Byte()) As String Implements ISha256Primitives.ComputeHashHex Return _codec.EncodeLowerHex(ComputeHash(data)) End Function End Class @@ -150,11 +138,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' Fasthash als . - Public Function ComputeHashUInt64 _ - ( - data As Byte() - ) As ULong Implements IFastHash64.ComputeHashUInt64 - + Public Function ComputeHashUInt64(data As Byte()) As ULong Implements IFastHash64.ComputeHashUInt64 Dim safeData = If(data, Array.Empty(Of Byte)()) Return IO.Hashing.XxHash3.HashToUInt64(safeData) End Function @@ -164,11 +148,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Eingabedaten; Nothing wird als leeres Array behandelt. ''' 16-stelliger Hex-String in Kleinbuchstaben. - Public Function ComputeHashHex _ - ( - data As Byte() - ) As String Implements IFastHash64.ComputeHashHex - + Public Function ComputeHashHex(data As Byte()) As String Implements IFastHash64.ComputeHashHex Return ComputeHashUInt64(data).ToString("x16", CultureInfo.InvariantCulture) End Function End Class diff --git a/src/FileTypeDetection/README.md b/src/FileTypeDetection/README.md index 2466ec95..ead8d0af 100644 --- a/src/FileTypeDetection/README.md +++ b/src/FileTypeDetection/README.md @@ -32,7 +32,6 @@ flowchart LR - [Audit Index](https://github.com/tomtastisch/FileClassifier/blob/main/docs/audit/000_INDEX.MD) - [Detektion-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Detection/README.md) - [Infrastruktur-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) -- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) - [Konfiguration-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Configuration/README.md) - [Abstractions-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Abstractions/README.md) - [Composition-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Composition/README.md) diff --git a/tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs b/tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs deleted file mode 100644 index 697576e6..00000000 --- a/tests/FileTypeDetectionLib.Tests/Contracts/CodePolicy045ComplianceTests.cs +++ /dev/null @@ -1,63 +0,0 @@ -using System.Text.RegularExpressions; - -namespace FileTypeDetectionLib.Tests.Contracts; - -[Trait("Category", "Governance")] -public sealed class CodePolicy045ComplianceTests -{ - private static readonly Regex NamespaceRegex = new(@"^\s*Namespace\s+", - RegexOptions.Multiline | RegexOptions.CultureInvariant); - private static readonly Regex PublicTypeRegex = new(@"^\s*Public\s+(?:NotInheritable\s+)?(?:Class|Enum|Structure|Module|Interface)\s+", - RegexOptions.Multiline | RegexOptions.CultureInvariant); - private static readonly Regex ForbiddenCatchPseudoFilterRegex = new(@"Catch\s+\w+\s+As\s+Exception\s+When\s+TypeOf\s+\w+\s+Is\s+Exception", - RegexOptions.CultureInvariant); - - [Fact] - public void VbFiles_UnderSrcFileTypeDetection_ComplyWithCore045LayoutRules() - { - var repoRoot = Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..")); - var sourceRoot = Path.Combine(repoRoot, "src", "FileTypeDetection"); - Assert.True(Directory.Exists(sourceRoot), $"Source root missing: {sourceRoot}"); - - var files = Directory.GetFiles(sourceRoot, "*.vb", SearchOption.AllDirectories) - .Where(path => !path.Contains($"{Path.DirectorySeparatorChar}bin{Path.DirectorySeparatorChar}", - StringComparison.OrdinalIgnoreCase)) - .Where(path => !path.Contains($"{Path.DirectorySeparatorChar}obj{Path.DirectorySeparatorChar}", - StringComparison.OrdinalIgnoreCase)) - .OrderBy(path => path, StringComparer.Ordinal) - .ToArray(); - - Assert.NotEmpty(files); - - foreach (var file in files) - { - var content = File.ReadAllText(file); - - Assert.Contains("' FILE:", content, StringComparison.Ordinal); - Assert.Contains("INTERNE POLICY", content, StringComparison.Ordinal); - Assert.Contains("Option Strict On", content, StringComparison.Ordinal); - Assert.Contains("Option Explicit On", content, StringComparison.Ordinal); - Assert.True(NamespaceRegex.IsMatch(content), $"Missing namespace declaration: {file}"); - - var fileIndex = content.IndexOf("' FILE:", StringComparison.Ordinal); - var strictIndex = content.IndexOf("Option Strict On", StringComparison.Ordinal); - var explicitIndex = content.IndexOf("Option Explicit On", StringComparison.Ordinal); - var namespaceIndex = NamespaceRegex.Match(content).Index; - - Assert.True(fileIndex >= 0 && strictIndex > fileIndex, - $"Policy 045 order violated ('FILE' before Option Strict): {file}"); - Assert.True(explicitIndex > strictIndex, - $"Policy 045 order violated (Option Explicit after Option Strict): {file}"); - Assert.True(namespaceIndex > explicitIndex, - $"Policy 045 order violated (Namespace after options): {file}"); - - Assert.False(ForbiddenCatchPseudoFilterRegex.IsMatch(content), - $"Policy 045 violation (forbidden catch pseudo-filter): {file}"); - - if (PublicTypeRegex.IsMatch(content)) - { - Assert.Contains("''' ", content, StringComparison.Ordinal); - } - } - } -} diff --git a/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs b/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs deleted file mode 100644 index 27992a4e..00000000 --- a/tests/FileTypeDetectionLib.Tests/Contracts/PublicApiAllowlistTests.cs +++ /dev/null @@ -1,48 +0,0 @@ -using Tomtastisch.FileClassifier; - -namespace FileTypeDetectionLib.Tests.Contracts; - -[Trait("Category", "ApiContract")] -public sealed class PublicApiAllowlistTests -{ - private static readonly string[] AllowedPublicTypes = - { - "Tomtastisch.FileClassifier.ArchiveProcessing", - "Tomtastisch.FileClassifier.DetectionDetail", - "Tomtastisch.FileClassifier.EvidenceHashing", - "Tomtastisch.FileClassifier.FileKind", - "Tomtastisch.FileClassifier.FileMaterializer", - "Tomtastisch.FileClassifier.FileType", - "Tomtastisch.FileClassifier.FileTypeDetector", - "Tomtastisch.FileClassifier.FileTypeOptions", - "Tomtastisch.FileClassifier.FileTypeProjectBaseline", - "Tomtastisch.FileClassifier.FileTypeProjectOptions", - "Tomtastisch.FileClassifier.HashDigestSet", - "Tomtastisch.FileClassifier.HashEvidence", - "Tomtastisch.FileClassifier.HashOptions", - "Tomtastisch.FileClassifier.HashRoundTripReport", - "Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot", - "Tomtastisch.FileClassifier.HashSourceType", - "Tomtastisch.FileClassifier.ZipExtractedEntry" - }; - - [Fact] - public void PublicTypes_MatchExplicitAllowlist() - { - var assembly = typeof(FileTypeDetector).Assembly; - var actual = assembly.GetTypes() - .Where(type => (type.IsPublic || type.IsNestedPublic) && - type.Namespace == "Tomtastisch.FileClassifier") - .Select(type => type.FullName) - .Where(name => !string.IsNullOrWhiteSpace(name)) - .Cast() - .OrderBy(name => name, StringComparer.Ordinal) - .ToArray(); - - var expected = AllowedPublicTypes - .OrderBy(name => name, StringComparer.Ordinal) - .ToArray(); - - Assert.Equal(expected, actual); - } -} diff --git a/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt b/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt index 4f895c90..0fc137a7 100644 --- a/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt +++ b/tests/FileTypeDetectionLib.Tests/Contracts/public-api.snapshot.txt @@ -1,12 +1,12 @@ -F:Tomtastisch.FileClassifier.FileKind.Doc:Tomtastisch.FileClassifier.FileKind +F:Tomtastisch.FileClassifier.FileKind.Docx:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Gif:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Jpeg:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Pdf:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Png:Tomtastisch.FileClassifier.FileKind -F:Tomtastisch.FileClassifier.FileKind.Ppt:Tomtastisch.FileClassifier.FileKind +F:Tomtastisch.FileClassifier.FileKind.Pptx:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Unknown:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Webp:Tomtastisch.FileClassifier.FileKind -F:Tomtastisch.FileClassifier.FileKind.Xls:Tomtastisch.FileClassifier.FileKind +F:Tomtastisch.FileClassifier.FileKind.Xlsx:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.FileKind.Zip:Tomtastisch.FileClassifier.FileKind F:Tomtastisch.FileClassifier.HashSourceType.ArchiveEntries:Tomtastisch.FileClassifier.HashSourceType F:Tomtastisch.FileClassifier.HashSourceType.FilePath:Tomtastisch.FileClassifier.HashSourceType @@ -48,9 +48,6 @@ M:Tomtastisch.FileClassifier.FileTypeOptions.LoadOptions(System.String):System.B M:Tomtastisch.FileClassifier.FileTypeProjectBaseline.ApplyDeterministicDefaults():System.Void M:Tomtastisch.FileClassifier.FileTypeProjectOptions..ctor() M:Tomtastisch.FileClassifier.HashOptions..ctor() -M:Tomtastisch.FileClassifier.HashRoundTripReport.Evidence(Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot):Tomtastisch.FileClassifier.HashEvidence -M:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalEquals(Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot):System.Boolean -M:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalEquals(Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot):System.Boolean M:Tomtastisch.FileClassifier.ZipExtractedEntry.OpenReadOnlyStream():System.IO.MemoryStream P:Tomtastisch.FileClassifier.DetectionDetail.DetectedType:Tomtastisch.FileClassifier.FileType P:Tomtastisch.FileClassifier.DetectionDetail.ExtensionVerified:System.Boolean @@ -97,11 +94,20 @@ P:Tomtastisch.FileClassifier.HashOptions.IncludeFastHash:System.Boolean P:Tomtastisch.FileClassifier.HashOptions.IncludePayloadCopies:System.Boolean P:Tomtastisch.FileClassifier.HashOptions.IncludeSecureHash:System.Boolean P:Tomtastisch.FileClassifier.HashOptions.MaterializedFileName:System.String +P:Tomtastisch.FileClassifier.HashRoundTripReport.H1:Tomtastisch.FileClassifier.HashEvidence +P:Tomtastisch.FileClassifier.HashRoundTripReport.H2:Tomtastisch.FileClassifier.HashEvidence +P:Tomtastisch.FileClassifier.HashRoundTripReport.H3:Tomtastisch.FileClassifier.HashEvidence +P:Tomtastisch.FileClassifier.HashRoundTripReport.H4:Tomtastisch.FileClassifier.HashEvidence P:Tomtastisch.FileClassifier.HashRoundTripReport.InputPath:System.String P:Tomtastisch.FileClassifier.HashRoundTripReport.IsArchiveInput:System.Boolean P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalConsistent:System.Boolean +P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalH1EqualsH2:System.Boolean +P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalH1EqualsH3:System.Boolean +P:Tomtastisch.FileClassifier.HashRoundTripReport.LogicalH1EqualsH4:System.Boolean P:Tomtastisch.FileClassifier.HashRoundTripReport.Notes:System.String -P:Tomtastisch.FileClassifier.HashRoundTripReport.Slots:Tomtastisch.FileClassifier.HashRoundTripReport+HashSlot[] +P:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalH1EqualsH2:System.Boolean +P:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalH1EqualsH3:System.Boolean +P:Tomtastisch.FileClassifier.HashRoundTripReport.PhysicalH1EqualsH4:System.Boolean P:Tomtastisch.FileClassifier.ZipExtractedEntry.Content:System.Collections.Immutable.ImmutableArray P:Tomtastisch.FileClassifier.ZipExtractedEntry.RelativePath:System.String P:Tomtastisch.FileClassifier.ZipExtractedEntry.Size:System.Int32 @@ -121,15 +127,15 @@ T:class Tomtastisch.FileClassifier.HashRoundTripReport T:class Tomtastisch.FileClassifier.ZipExtractedEntry T:enum Tomtastisch.FileClassifier.FileKind T:enum Tomtastisch.FileClassifier.HashSourceType -V:Tomtastisch.FileClassifier.FileKind.Doc +V:Tomtastisch.FileClassifier.FileKind.Docx V:Tomtastisch.FileClassifier.FileKind.Gif V:Tomtastisch.FileClassifier.FileKind.Jpeg V:Tomtastisch.FileClassifier.FileKind.Pdf V:Tomtastisch.FileClassifier.FileKind.Png -V:Tomtastisch.FileClassifier.FileKind.Ppt +V:Tomtastisch.FileClassifier.FileKind.Pptx V:Tomtastisch.FileClassifier.FileKind.Unknown V:Tomtastisch.FileClassifier.FileKind.Webp -V:Tomtastisch.FileClassifier.FileKind.Xls +V:Tomtastisch.FileClassifier.FileKind.Xlsx V:Tomtastisch.FileClassifier.FileKind.Zip V:Tomtastisch.FileClassifier.HashSourceType.ArchiveEntries V:Tomtastisch.FileClassifier.HashSourceType.FilePath diff --git a/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj b/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj index 688c5a1d..4f44faa8 100644 --- a/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj +++ b/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj @@ -25,7 +25,6 @@ - diff --git a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs index ea99f326..3cf46cf9 100644 --- a/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs +++ b/tests/FileTypeDetectionLib.Tests/Steps/FileTypeDetectionSteps.cs @@ -9,13 +9,6 @@ public sealed class FileTypeDetectionSteps { private const string StateKey = "detection_state"; private const string ResourceColumn = "ressource"; - private static readonly IReadOnlyDictionary LegacyFileKindAliases = - new Dictionary(StringComparer.OrdinalIgnoreCase) - { - ["Docx"] = FileKind.Doc, - ["Xlsx"] = FileKind.Xls, - ["Pptx"] = FileKind.Ppt - }; private readonly ScenarioContext _scenarioContext; public FileTypeDetectionSteps(ScenarioContext scenarioContext) @@ -283,7 +276,8 @@ public void WhenICheckCurrentBytesType(string expectedKind) { var state = State(); Assert.NotNull(state.CurrentPayload); - Assert.True(TryParseFileKindLiteral(expectedKind, out var kind), + Assert.True( + Enum.TryParse(expectedKind, true, out var kind), $"Unknown FileKind literal in feature: {expectedKind}"); var detector = new FileTypeDetector(); @@ -296,7 +290,8 @@ public void ThenTheDetectedKindIs(string expectedKind) var state = State(); Assert.NotNull(state.LastResult); - Assert.True(TryParseFileKindLiteral(expectedKind, out var expected), + Assert.True( + Enum.TryParse(expectedKind, true, out var expected), $"Unknown FileKind literal in feature: {expectedKind}"); Assert.Equal(expected, state.LastResult!.Kind); @@ -425,9 +420,9 @@ public void ThenHashReportIsLogicallyConsistent() var state = State(); Assert.NotNull(state.LastRoundTripReport); Assert.True(state.LastRoundTripReport!.LogicalConsistent); - Assert.True(state.LastRoundTripReport.LogicalEquals(HashRoundTripReport.HashSlot.H2)); - Assert.True(state.LastRoundTripReport.LogicalEquals(HashRoundTripReport.HashSlot.H3)); - Assert.True(state.LastRoundTripReport.LogicalEquals(HashRoundTripReport.HashSlot.H4)); + Assert.True(state.LastRoundTripReport.LogicalH1EqualsH2); + Assert.True(state.LastRoundTripReport.LogicalH1EqualsH3); + Assert.True(state.LastRoundTripReport.LogicalH1EqualsH4); } [Then("ist der Hashbericht als Archiv klassifiziert {string}")] @@ -545,16 +540,6 @@ private static void AssertResourceExists(string name) Assert.True(File.Exists(path), $"Test resource missing: {path}"); } - private static bool TryParseFileKindLiteral(string literal, out FileKind kind) - { - if (Enum.TryParse(literal, true, out kind)) - { - return true; - } - - return LegacyFileKindAliases.TryGetValue(literal, out kind); - } - private static byte[] CreateArchivePayload(string archiveType) { var normalized = archiveType.Trim().ToLowerInvariant(); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs index 9c58b8e8..aaf755cb 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/DetectionDetailAndArchiveValidationUnitTests.cs @@ -13,7 +13,7 @@ public void DetectDetailed_ReturnsStructuredArchiveTrace_ForDocx() var path = TestResources.Resolve("sample.docx"); var detail = new FileTypeDetector().DetectDetailed(path); - Assert.Equal(FileKind.Doc, detail.DetectedType.Kind); + Assert.Equal(FileKind.Docx, detail.DetectedType.Kind); Assert.Equal("ArchiveStructuredRefined", detail.ReasonCode); Assert.True(detail.UsedZipContentCheck); Assert.True(detail.UsedStructuredRefinement); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs index bee7bd8e..f7afd2b8 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/EndToEndFailClosedMatrixUnitTests.cs @@ -12,9 +12,9 @@ public sealed class EndToEndFailClosedMatrixUnitTests { "sample.jpg", FileKind.Jpeg, false }, { "sample.gif", FileKind.Gif, false }, { "sample.webp", FileKind.Webp, false }, - { "sample.docx", FileKind.Doc, false }, - { "sample.xlsx", FileKind.Xls, false }, - { "sample.pptx", FileKind.Ppt, false }, + { "sample.docx", FileKind.Docx, false }, + { "sample.xlsx", FileKind.Xlsx, false }, + { "sample.pptx", FileKind.Pptx, false }, { "sample.zip", FileKind.Zip, true }, { "sample.7z", FileKind.Zip, true }, { "sample.rar", FileKind.Zip, true }, @@ -23,35 +23,35 @@ public sealed class EndToEndFailClosedMatrixUnitTests public static TheoryData SupportedAliasMatrix => new() { - { "sample.docx", ".doc", FileKind.Doc }, - { "sample.docx", ".docm", FileKind.Doc }, - { "sample.docx", ".docb", FileKind.Doc }, - { "sample.docx", ".dot", FileKind.Doc }, - { "sample.docx", ".dotm", FileKind.Doc }, - { "sample.docx", ".dotx", FileKind.Doc }, - { "sample.docx", ".odt", FileKind.Doc }, - { "sample.docx", ".ott", FileKind.Doc }, - { "sample.xlsx", ".xls", FileKind.Xls }, - { "sample.xlsx", ".xlsm", FileKind.Xls }, - { "sample.xlsx", ".xlsb", FileKind.Xls }, - { "sample.xlsx", ".xlt", FileKind.Xls }, - { "sample.xlsx", ".xltm", FileKind.Xls }, - { "sample.xlsx", ".xltx", FileKind.Xls }, - { "sample.xlsx", ".xltb", FileKind.Xls }, - { "sample.xlsx", ".xlam", FileKind.Xls }, - { "sample.xlsx", ".xla", FileKind.Xls }, - { "sample.xlsx", ".ods", FileKind.Xls }, - { "sample.xlsx", ".ots", FileKind.Xls }, - { "sample.pptx", ".ppt", FileKind.Ppt }, - { "sample.pptx", ".pptm", FileKind.Ppt }, - { "sample.pptx", ".pot", FileKind.Ppt }, - { "sample.pptx", ".potm", FileKind.Ppt }, - { "sample.pptx", ".potx", FileKind.Ppt }, - { "sample.pptx", ".pps", FileKind.Ppt }, - { "sample.pptx", ".ppsm", FileKind.Ppt }, - { "sample.pptx", ".ppsx", FileKind.Ppt }, - { "sample.pptx", ".odp", FileKind.Ppt }, - { "sample.pptx", ".otp", FileKind.Ppt }, + { "sample.docx", ".doc", FileKind.Docx }, + { "sample.docx", ".docm", FileKind.Docx }, + { "sample.docx", ".docb", FileKind.Docx }, + { "sample.docx", ".dot", FileKind.Docx }, + { "sample.docx", ".dotm", FileKind.Docx }, + { "sample.docx", ".dotx", FileKind.Docx }, + { "sample.docx", ".odt", FileKind.Docx }, + { "sample.docx", ".ott", FileKind.Docx }, + { "sample.xlsx", ".xls", FileKind.Xlsx }, + { "sample.xlsx", ".xlsm", FileKind.Xlsx }, + { "sample.xlsx", ".xlsb", FileKind.Xlsx }, + { "sample.xlsx", ".xlt", FileKind.Xlsx }, + { "sample.xlsx", ".xltm", FileKind.Xlsx }, + { "sample.xlsx", ".xltx", FileKind.Xlsx }, + { "sample.xlsx", ".xltb", FileKind.Xlsx }, + { "sample.xlsx", ".xlam", FileKind.Xlsx }, + { "sample.xlsx", ".xla", FileKind.Xlsx }, + { "sample.xlsx", ".ods", FileKind.Xlsx }, + { "sample.xlsx", ".ots", FileKind.Xlsx }, + { "sample.pptx", ".ppt", FileKind.Pptx }, + { "sample.pptx", ".pptm", FileKind.Pptx }, + { "sample.pptx", ".pot", FileKind.Pptx }, + { "sample.pptx", ".potm", FileKind.Pptx }, + { "sample.pptx", ".potx", FileKind.Pptx }, + { "sample.pptx", ".pps", FileKind.Pptx }, + { "sample.pptx", ".ppsm", FileKind.Pptx }, + { "sample.pptx", ".ppsx", FileKind.Pptx }, + { "sample.pptx", ".odp", FileKind.Pptx }, + { "sample.pptx", ".otp", FileKind.Pptx }, { "sample.zip", ".tar", FileKind.Zip }, { "sample.zip", ".tgz", FileKind.Zip }, { "sample.zip", ".tar.gz", FileKind.Zip }, diff --git a/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs index b5119829..90684448 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/ExtensionCheckUnitTests.cs @@ -72,7 +72,7 @@ public void Detect_DocxPayloadWithPdfExtension_RemainsDocx_UnlessVerifyExtension var detectedWithoutExtensionPolicy = detector.Detect(path); var detectedWithExtensionPolicy = detector.Detect(path, true); - Assert.Equal(FileKind.Doc, detectedWithoutExtensionPolicy.Kind); + Assert.Equal(FileKind.Docx, detectedWithoutExtensionPolicy.Kind); Assert.Equal(FileKind.Unknown, detectedWithExtensionPolicy.Kind); } finally @@ -91,7 +91,7 @@ public void DetectAndVerifyExtension_AcceptsXlsmExtension_ForSpreadsheetOpenXmlP try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Xls, detected.Kind); + Assert.Equal(FileKind.Xlsx, detected.Kind); } finally { @@ -109,7 +109,7 @@ public void DetectAndVerifyExtension_AcceptsXlsbExtension_ForSpreadsheetBinaryWo try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Xls, detected.Kind); + Assert.Equal(FileKind.Xlsx, detected.Kind); } finally { @@ -127,7 +127,7 @@ public void DetectAndVerifyExtension_AcceptsOdsExtension_ForOpenDocumentSpreadsh try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Xls, detected.Kind); + Assert.Equal(FileKind.Xlsx, detected.Kind); } finally { @@ -145,7 +145,7 @@ public void DetectAndVerifyExtension_AcceptsDocExtension_ForLegacyOfficePayload( try { var detected = detector.Detect(path, true); - Assert.Equal(FileKind.Doc, detected.Kind); + Assert.Equal(FileKind.Docx, detected.Kind); } finally { diff --git a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs index c70d9490..69e3db9f 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorEdgeUnitTests.cs @@ -56,7 +56,7 @@ public void DetectDetailed_ReturnsStructuredRefined_ForDocx() var path = TestResources.Resolve("sample.docx"); var detail = new FileTypeDetector().DetectDetailed(path); - Assert.Equal(FileKind.Doc, detail.DetectedType.Kind); + Assert.Equal(FileKind.Docx, detail.DetectedType.Kind); Assert.Equal("ArchiveStructuredRefined", detail.ReasonCode); Assert.True(detail.UsedStructuredRefinement); } diff --git a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs index 3c1b0ba6..4d9e6ae9 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/FileTypeDetectorPrivateBranchUnitTests.cs @@ -173,10 +173,10 @@ public void FinalizeArchiveDetection_ReturnsRefined_WhenNotUnknown() var trace = Activator.CreateInstance(traceType!); var opt = FileTypeProjectOptions.DefaultOptions(); - var refined = FileTypeRegistry.Resolve(FileKind.Doc); + var refined = FileTypeRegistry.Resolve(FileKind.Docx); var result = TestGuard.NotNull(method.Invoke(null, new[] { refined, opt, trace! }) as FileType); - Assert.Equal(FileKind.Doc, result.Kind); + Assert.Equal(FileKind.Docx, result.Kind); } [Fact] @@ -207,19 +207,19 @@ public void ExtensionMatchesKind_HandlesEmptyAndMismatch() } [Theory] - [InlineData("file.doc", FileKind.Doc)] - [InlineData("file.docm", FileKind.Doc)] - [InlineData("file.docx", FileKind.Doc)] - [InlineData("file.odt", FileKind.Doc)] - [InlineData("file.xls", FileKind.Xls)] - [InlineData("file.xlsm", FileKind.Xls)] - [InlineData("file.xlsx", FileKind.Xls)] - [InlineData("file.xlsb", FileKind.Xls)] - [InlineData("file.ods", FileKind.Xls)] - [InlineData("file.ppt", FileKind.Ppt)] - [InlineData("file.pptm", FileKind.Ppt)] - [InlineData("file.pptx", FileKind.Ppt)] - [InlineData("file.odp", FileKind.Ppt)] + [InlineData("file.doc", FileKind.Docx)] + [InlineData("file.docm", FileKind.Docx)] + [InlineData("file.docx", FileKind.Docx)] + [InlineData("file.odt", FileKind.Docx)] + [InlineData("file.xls", FileKind.Xlsx)] + [InlineData("file.xlsm", FileKind.Xlsx)] + [InlineData("file.xlsx", FileKind.Xlsx)] + [InlineData("file.xlsb", FileKind.Xlsx)] + [InlineData("file.ods", FileKind.Xlsx)] + [InlineData("file.ppt", FileKind.Pptx)] + [InlineData("file.pptm", FileKind.Pptx)] + [InlineData("file.pptx", FileKind.Pptx)] + [InlineData("file.odp", FileKind.Pptx)] public void ExtensionMatchesKind_AcceptsOfficeVariantAliases(string path, FileKind expectedKind) { var method = diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs index 2450fe11..a7e5abad 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HashingEvidenceTests.cs @@ -60,13 +60,6 @@ internal static string ComputeHmacSha256Hex(byte[] key, byte[] payload) using var hmac = new HMACSHA256(key); return Convert.ToHexString(hmac.ComputeHash(payload)).ToLowerInvariant(); } - - internal static Type GetInternalType(string fullName) - { - var type = typeof(EvidenceHashing).Assembly.GetType(fullName, throwOnError: false); - Assert.NotNull(type); - return type!; - } } // Section 1: SHA256 physical vs logical behavior @@ -407,8 +400,7 @@ public void EvidenceHashing_HashBytes_UsesLoadedIncludeFastHash() [Fact] public void ComputeFastHash_ReturnsEmpty_WhenOptionDisabled() { - var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); - var method = coreType.GetMethod("ComputeFastHash", BindingFlags.NonPublic | BindingFlags.Static)!; + var method = typeof(EvidenceHashing).GetMethod("ComputeFastHash", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var options = new HashOptions { IncludeFastHash = false }; @@ -642,9 +634,9 @@ public void VerifyRoundTrip_ProducesLogicalConsistency(string fixtureId, bool ex Assert.Equal(expectedArchive, report.IsArchiveInput); Assert.True(report.LogicalConsistent); - Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); - Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H3)); - Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H4)); + Assert.True(report.LogicalH1EqualsH2); + Assert.True(report.LogicalH1EqualsH3); + Assert.True(report.LogicalH1EqualsH4); } [Theory] @@ -792,7 +784,7 @@ public void VerifyRoundTrip_ReturnsFailure_WhenH1MissingLogicalDigest() var path = TestResources.Resolve("sample.pdf"); var report = EvidenceHashing.VerifyRoundTrip(path); - Assert.False(report.Evidence(HashRoundTripReport.HashSlot.H1).Digests.HasLogicalHash); + Assert.False(report.H1.Digests.HasLogicalHash); Assert.Contains("h1", report.Notes, StringComparison.OrdinalIgnoreCase); } @@ -963,8 +955,7 @@ public void ResolveHashOptions_FallsBack_WhenProjectOptionsNull() [Fact] public void NormalizedEntry_Defaults_WhenConstructedWithNulls() { - var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); - var type = coreType.GetNestedTypes(BindingFlags.NonPublic | BindingFlags.Public) + var type = typeof(EvidenceHashing).GetNestedTypes(BindingFlags.NonPublic) .First(t => t.Name == "NormalizedEntry"); var ctor = type.GetConstructors(BindingFlags.NonPublic | BindingFlags.Instance) @@ -972,11 +963,9 @@ public void NormalizedEntry_Defaults_WhenConstructedWithNulls() var instance = ctor.Invoke(new object?[] { null, null }); var relativePath = - (string)type.GetProperty("RelativePath", BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance)! - .GetValue(instance)!; + (string)type.GetProperty("RelativePath", BindingFlags.NonPublic | BindingFlags.Instance)!.GetValue(instance)!; var content = - (byte[])type.GetProperty("Content", BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance)! - .GetValue(instance)!; + (byte[])type.GetProperty("Content", BindingFlags.NonPublic | BindingFlags.Instance)!.GetValue(instance)!; Assert.Equal(string.Empty, relativePath); Assert.NotNull(content); @@ -986,16 +975,16 @@ public void NormalizedEntry_Defaults_WhenConstructedWithNulls() [Fact] public void HashRoundTripReport_Constructor_DefaultsToFailureEvidence_WhenInputsNull() { - var report = new HashRoundTripReport("", isArchiveInput: false, notes: null, null, null, null, - null); + var report = new HashRoundTripReport("", isArchiveInput: false, h1: null, h2: null, h3: null, + h4: null, notes: null); Assert.False(report.LogicalConsistent); - Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); - Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H3)); - Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H4)); - Assert.False(report.PhysicalEquals(HashRoundTripReport.HashSlot.H2)); - Assert.False(report.PhysicalEquals(HashRoundTripReport.HashSlot.H3)); - Assert.False(report.PhysicalEquals(HashRoundTripReport.HashSlot.H4)); + Assert.False(report.LogicalH1EqualsH2); + Assert.False(report.LogicalH1EqualsH3); + Assert.False(report.LogicalH1EqualsH4); + Assert.False(report.PhysicalH1EqualsH2); + Assert.False(report.PhysicalH1EqualsH3); + Assert.False(report.PhysicalH1EqualsH4); } [Fact] @@ -1023,16 +1012,16 @@ public void HashRoundTripReport_Constructor_ReportsConsistency_WhenLogicalAndPhy digests: digest, notes: "ok"); - var report = new HashRoundTripReport("x", isArchiveInput: false, notes: "ok", evidence, evidence, - evidence, evidence); + var report = new HashRoundTripReport("x", isArchiveInput: false, h1: evidence, h2: evidence, + h3: evidence, h4: evidence, notes: "ok"); Assert.True(report.LogicalConsistent); - Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); - Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H3)); - Assert.True(report.LogicalEquals(HashRoundTripReport.HashSlot.H4)); - Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H2)); - Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H3)); - Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H4)); + Assert.True(report.LogicalH1EqualsH2); + Assert.True(report.LogicalH1EqualsH3); + Assert.True(report.LogicalH1EqualsH4); + Assert.True(report.PhysicalH1EqualsH2); + Assert.True(report.PhysicalH1EqualsH3); + Assert.True(report.PhysicalH1EqualsH4); } [Fact] @@ -1060,11 +1049,11 @@ public void HashRoundTripReport_Constructor_DistinguishesPhysicalWhenLogicalMiss digests: digest, notes: "ok"); - var report = new HashRoundTripReport("x", isArchiveInput: false, notes: "ok", evidence, evidence, - evidence, evidence); + var report = new HashRoundTripReport("x", isArchiveInput: false, h1: evidence, h2: evidence, + h3: evidence, h4: evidence, notes: "ok"); - Assert.False(report.LogicalEquals(HashRoundTripReport.HashSlot.H2)); - Assert.True(report.PhysicalEquals(HashRoundTripReport.HashSlot.H2)); + Assert.False(report.LogicalH1EqualsH2); + Assert.True(report.PhysicalH1EqualsH2); } [Fact] @@ -1094,8 +1083,7 @@ public void HashRoundTripReport_EqualPhysical_ReturnsFalse_WhenEvidenceNull() [Fact] public void NormalizeLabel_FallsBack_ForNullOrWhitespace() { - var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); - var method = coreType.GetMethod("NormalizeLabel", BindingFlags.NonPublic | BindingFlags.Static)!; + var method = typeof(EvidenceHashing).GetMethod("NormalizeLabel", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var label1 = TestGuard.NotNull(method.Invoke(null, new object?[] { null }) as string); @@ -1108,8 +1096,7 @@ public void NormalizeLabel_FallsBack_ForNullOrWhitespace() [Fact] public void CopyBytes_ReturnsEmpty_ForNullOrEmpty() { - var coreType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingCore"); - var method = coreType.GetMethod("CopyBytes", BindingFlags.NonPublic | BindingFlags.Static)!; + var method = typeof(EvidenceHashing).GetMethod("CopyBytes", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var empty1 = TestGuard.NotNull(method.Invoke(null, new object?[] { null }) as byte[]); @@ -1122,8 +1109,7 @@ public void CopyBytes_ReturnsEmpty_ForNullOrEmpty() [Fact] public void TryReadFileBounded_ReturnsFalse_ForMissingPathOrOptions() { - var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIO"); - var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static); + var method = typeof(EvidenceHashing).GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); var bytes = Array.Empty(); @@ -1141,8 +1127,7 @@ public void TryReadFileBounded_ReturnsFalse_ForMissingPathOrOptions() [Fact] public void TryReadFileBounded_ReturnsFalse_WhenFileTooLarge() { - var ioType = HashingEvidenceTestHelpers.GetInternalType("Tomtastisch.FileClassifier.EvidenceHashingIO"); - var method = ioType.GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; + var method = typeof(EvidenceHashing).GetMethod("TryReadFileBounded", BindingFlags.NonPublic | BindingFlags.Static)!; Assert.NotNull(method); using var scope = TestTempPaths.CreateScope("ftd-hash-read"); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs index c10a0604..1781e08e 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HeaderDetectionWarningUnitTests.cs @@ -20,7 +20,7 @@ public void Detect_DoesNotLogWarning_ForStructuredDocxDetection() var source = TestResources.Resolve("sample.docx"); var detected = new FileTypeDetector().Detect(source); - Assert.Equal(FileKind.Doc, detected.Kind); + Assert.Equal(FileKind.Docx, detected.Kind); Assert.DoesNotContain(logger.Messages, m => m.Contains("Keine direkte Content-Erkennung", StringComparison.Ordinal)); } diff --git a/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs index b4f306f4..adef7cb8 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/HeaderOnlyPolicyUnitTests.cs @@ -21,7 +21,7 @@ public void Detect_StillRefines_ArchiveContainers_WhenHeaderOnlyNonZipIsTrue() var source = TestResources.Resolve("sample.docx"); var detected = new FileTypeDetector().Detect(source); - Assert.Equal(FileKind.Doc, detected.Kind); + Assert.Equal(FileKind.Docx, detected.Kind); } [Fact] diff --git a/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs index 6e63e544..61f6427d 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/LegacyOfficeBinaryRefinerUnitTests.cs @@ -5,9 +5,9 @@ namespace FileTypeDetectionLib.Tests.Unit; public sealed class LegacyOfficeBinaryRefinerUnitTests { [Theory] - [InlineData("WordDocument", FileKind.Doc)] - [InlineData("Workbook", FileKind.Xls)] - [InlineData("PowerPoint Document", FileKind.Ppt)] + [InlineData("WordDocument", FileKind.Docx)] + [InlineData("Workbook", FileKind.Xlsx)] + [InlineData("PowerPoint Document", FileKind.Pptx)] public void TryRefineBytes_DetectsLegacyOfficeMarkers(string marker, FileKind expected) { var payload = CreateOleLikePayload(marker); diff --git a/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs b/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs index f5195a2c..badef674 100644 --- a/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs +++ b/tests/FileTypeDetectionLib.Tests/Unit/OpenXmlRefinerUnitTests.cs @@ -25,10 +25,10 @@ public void TryRefineStream_ReturnsUnknown_ForUnreadableStream() } [Theory] - [InlineData("word/document.xml", FileKind.Doc)] - [InlineData("xl/workbook.xml", FileKind.Xls)] - [InlineData("xl/workbook.bin", FileKind.Xls)] - [InlineData("ppt/presentation.xml", FileKind.Ppt)] + [InlineData("word/document.xml", FileKind.Docx)] + [InlineData("xl/workbook.xml", FileKind.Xlsx)] + [InlineData("xl/workbook.bin", FileKind.Xlsx)] + [InlineData("ppt/presentation.xml", FileKind.Pptx)] public void TryRefineStream_DetectsOpenXmlKinds(string markerPath, FileKind expected) { var payload = CreateOpenXmlPackage(markerPath); @@ -40,12 +40,12 @@ public void TryRefineStream_DetectsOpenXmlKinds(string markerPath, FileKind expe } [Theory] - [InlineData("application/vnd.oasis.opendocument.text", FileKind.Doc)] - [InlineData("application/vnd.oasis.opendocument.text-template", FileKind.Doc)] - [InlineData("application/vnd.oasis.opendocument.spreadsheet", FileKind.Xls)] - [InlineData("application/vnd.oasis.opendocument.spreadsheet-template", FileKind.Xls)] - [InlineData("application/vnd.oasis.opendocument.presentation", FileKind.Ppt)] - [InlineData("application/vnd.oasis.opendocument.presentation-template", FileKind.Ppt)] + [InlineData("application/vnd.oasis.opendocument.text", FileKind.Docx)] + [InlineData("application/vnd.oasis.opendocument.text-template", FileKind.Docx)] + [InlineData("application/vnd.oasis.opendocument.spreadsheet", FileKind.Xlsx)] + [InlineData("application/vnd.oasis.opendocument.spreadsheet-template", FileKind.Xlsx)] + [InlineData("application/vnd.oasis.opendocument.presentation", FileKind.Pptx)] + [InlineData("application/vnd.oasis.opendocument.presentation-template", FileKind.Pptx)] public void TryRefineStream_DetectsOpenDocumentKinds(string mimeType, FileKind expected) { var payload = CreateOpenDocumentPackage(mimeType); diff --git a/tools/audit/verify-security-claims.sh b/tools/audit/verify-security-claims.sh index a7dc3795..695f0a9a 100755 --- a/tools/audit/verify-security-claims.sh +++ b/tools/audit/verify-security-claims.sh @@ -151,19 +151,16 @@ if [[ -z "${REPO_FULL}" ]]; then add_violation "CI-SEC-CLAIM-001" "fail" "Unable to determine GitHub repository slug" "SECURITY.md" fi -# Claim-Bindung an SECURITY.md: -# - Security-Support gilt nur fuer den aktuellen Major 6.x. -# - Ein Major-Wechsel erfordert immer synchrones Update von SECURITY.md, -# Versionierungsdokumenten und dieser Claim-Pruefung. +# Claim: 5.x supported and <5.0 unsupported maps to current package major = 5 pkg_ver="$(sed -n 's:.*\([^<]*\).*:\1:p' "${ROOT_DIR}/src/FileTypeDetection/FileTypeDetectionLib.vbproj" | head -n1)" if [[ -z "${pkg_ver}" ]]; then add_violation "CI-SEC-CLAIM-002" "fail" "Package version not found" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" else major="${pkg_ver%%.*}" - if [[ "${major}" == "6" ]]; then + if [[ "${major}" == "5" ]]; then add_pass else - add_violation "CI-SEC-CLAIM-002" "fail" "Expected package major 6 for SECURITY.md support claim, found ${pkg_ver}" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" + add_violation "CI-SEC-CLAIM-002" "fail" "Expected package major 5 for SECURITY.md support claim, found ${pkg_ver}" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" fi fi diff --git a/tools/versioning/check-version-policy.sh b/tools/versioning/check-version-policy.sh index b7b706db..77f26497 100755 --- a/tools/versioning/check-version-policy.sh +++ b/tools/versioning/check-version-policy.sh @@ -30,6 +30,8 @@ collect_version_policy_violations() { local -a files=() local -a violations=() local -a patterns=( + '' + '' '' '' '' @@ -55,42 +57,15 @@ collect_version_policy_violations() { } run_ci_mode() { - local violations repo_version vbproj_version vbproj_package_version - - repo_version="$(sed -n 's/.*\([^<]*\)<\/RepoVersion>.*/\1/p' Directory.Build.props | head -n1)" - vbproj_version="$(sed -n 's/.*\([^<]*\)<\/Version>.*/\1/p' src/FileTypeDetection/FileTypeDetectionLib.vbproj | head -n1)" - vbproj_package_version="$(sed -n 's/.*\([^<]*\)<\/PackageVersion>.*/\1/p' src/FileTypeDetection/FileTypeDetectionLib.vbproj | head -n1)" - - if [[ -z "${repo_version}" ]]; then - echo "version-policy: RepoVersion missing in Directory.Build.props" >&2 - return 1 - fi - if [[ -z "${vbproj_version}" ]]; then - echo "version-policy: Version missing in src/FileTypeDetection/FileTypeDetectionLib.vbproj" >&2 - return 1 - fi - if [[ -z "${vbproj_package_version}" ]]; then - echo "version-policy: PackageVersion missing in src/FileTypeDetection/FileTypeDetectionLib.vbproj" >&2 - return 1 - fi - - if [[ "${vbproj_version}" != "${repo_version}" ]]; then - echo "version-policy: Version (${vbproj_version}) != RepoVersion (${repo_version})" >&2 - return 1 - fi - if [[ "${vbproj_package_version}" != "${repo_version}" ]]; then - echo "version-policy: PackageVersion (${vbproj_package_version}) != RepoVersion (${repo_version})" >&2 - return 1 - fi - + local violations violations="$(collect_version_policy_violations)" if [[ -n "${violations}" ]]; then - echo "version-policy: forbidden static assembly/version fields detected." >&2 + echo "version-policy: static version fields are forbidden (tag is SSOT)." >&2 echo "${violations}" >&2 return 1 fi - echo "version-policy: convergence fields valid and no forbidden static fields detected." + echo "version-policy: no static package/assembly version fields detected." } read_nupkg_version() { diff --git a/tools/versioning/verify-version-convergence.sh b/tools/versioning/verify-version-convergence.sh index 3ba1b257..2d3d841e 100755 --- a/tools/versioning/verify-version-convergence.sh +++ b/tools/versioning/verify-version-convergence.sh @@ -97,10 +97,8 @@ main() { if [[ "${REQUIRE_REMOTE}" == "1" ]]; then require_cmd curl require_cmd gh - # Reihenfolge ist bewusst: GH_TOKEN hat Vorrang, danach CI-Standard GITHUB_TOKEN, - # danach SECURITY_CLAIMS_TOKEN als expliziter Fallback fuer gleichwertige Repo-Claims. - export GH_TOKEN="${GH_TOKEN:-${GITHUB_TOKEN:-${SECURITY_CLAIMS_TOKEN:-}}}" - [[ -n "${GH_TOKEN}" ]] || fail "REQUIRE_REMOTE=1 needs GH_TOKEN/GITHUB_TOKEN/SECURITY_CLAIMS_TOKEN" + export GH_TOKEN="${GH_TOKEN:-${GITHUB_TOKEN:-}}" + [[ -n "${GH_TOKEN}" ]] || fail "REQUIRE_REMOTE=1 needs GH_TOKEN/GITHUB_TOKEN" release_tag="$(retry_with_backoff "github_release_lookup" gh api "repos/${REPO_SLUG}/releases/latest" --jq '.tag_name')" release_version="$(normalize_tag "${release_tag}")" From 094ed623ccda3a9ac52c9a5786dc7e91e1b07d78 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 18:29:06 +0100 Subject: [PATCH 20/34] refactor(core): finalize ssot utils migration and hardening - move and consolidate utility logic under Infrastructure/Utils as SSOT - align style/policy with ArchiveInternals method header wrapping rules - bump version convergence to 6.0.1 and refresh docs/evidence - support SECURITY_CLAIMS_TOKEN fallback for remote version convergence - wire SECURITY_CLAIMS_TOKEN into ci version-convergence workflow step --- .github/workflows/ci.yml | 1 + Directory.Build.props | 2 +- docs/governance/045_CODE_QUALITY_POLICY_DE.MD | 13 + docs/governance/145_CODE_QUALITY_POLICY_DE.MD | 13 + docs/versioning/002_HISTORY_VERSIONS.MD | 3 +- docs/versioning/003_CHANGELOG_RELEASES.MD | 10 + docs/versioning/102_HISTORY_VERSIONS.MD | 3 +- docs/versioning/103_CHANGELOG_RELEASES.MD | 10 + .../Abstractions/Hashing/HashEvidence.vb | 2 +- .../Hashing/HashRoundTripReport.vb | 2 +- .../Hashing/Internal/EvidenceHashingCore.vb | 2 +- src/FileTypeDetection/ArchiveProcessing.vb | 2 +- src/FileTypeDetection/FileMaterializer.vb | 26 +- .../FileTypeDetectionLib.vbproj | 8 +- src/FileTypeDetection/FileTypeDetector.vb | 2 +- .../Infrastructure/ArchiveInternals.vb | 177 ++++++---- .../Infrastructure/CoreInternals.vb | 323 +----------------- .../Infrastructure/README.md | 5 +- .../Infrastructure/Utils/ArchiveGuards.vb | 214 ++++++++++++ .../Utils/DestinationPathGuard.vb | 166 +++++++++ .../{ => Infrastructure}/Utils/EnumUtils.vb | 7 +- .../{ => Infrastructure}/Utils/GuardUtils.vb | 22 +- .../Infrastructure/Utils/IoGuards.vb | 82 +++++ .../Utils/IterableUtils.vb | 7 +- .../Infrastructure/Utils/LogGuard.vb | 82 +++++ .../Utils/PathResolutionGuard.vb | 64 ++++ .../Infrastructure/Utils/README.md | 37 ++ src/FileTypeDetection/README.md | 1 + src/FileTypeDetection/Utils/README.md | 25 -- .../FileTypeDetectionLib.Tests.csproj | 1 + .../versioning/verify-version-convergence.sh | 4 +- 31 files changed, 876 insertions(+), 440 deletions(-) create mode 100644 src/FileTypeDetection/Infrastructure/Utils/ArchiveGuards.vb create mode 100644 src/FileTypeDetection/Infrastructure/Utils/DestinationPathGuard.vb rename src/FileTypeDetection/{ => Infrastructure}/Utils/EnumUtils.vb (98%) rename src/FileTypeDetection/{ => Infrastructure}/Utils/GuardUtils.vb (91%) create mode 100644 src/FileTypeDetection/Infrastructure/Utils/IoGuards.vb rename src/FileTypeDetection/{ => Infrastructure}/Utils/IterableUtils.vb (93%) create mode 100644 src/FileTypeDetection/Infrastructure/Utils/LogGuard.vb create mode 100644 src/FileTypeDetection/Infrastructure/Utils/PathResolutionGuard.vb create mode 100644 src/FileTypeDetection/Infrastructure/Utils/README.md delete mode 100644 src/FileTypeDetection/Utils/README.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cc86f4e0..017c23fd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -152,6 +152,7 @@ jobs: # during the normal sequence merge -> tag/release -> registry propagation. env: REQUIRE_REMOTE: "0" + SECURITY_CLAIMS_TOKEN: ${{ secrets.SECURITY_CLAIMS_TOKEN }} run: bash -euo pipefail tools/ci/bin/run.sh version-convergence - name: Upload Artifact if: always() diff --git a/Directory.Build.props b/Directory.Build.props index 93a7b379..79cdb468 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,6 +5,6 @@ true - 6.0.0 + 6.0.1 diff --git a/docs/governance/045_CODE_QUALITY_POLICY_DE.MD b/docs/governance/045_CODE_QUALITY_POLICY_DE.MD index 2f45a718..79c48683 100644 --- a/docs/governance/045_CODE_QUALITY_POLICY_DE.MD +++ b/docs/governance/045_CODE_QUALITY_POLICY_DE.MD @@ -79,6 +79,19 @@ Blöcke werden sichtbar getrennt (Leerzeile + Kommentartrenner): - Fallback - I/O Helpers separat +### 5.4 Methodenkopf- und Umbruchschema (verbindlich) +- Für neue oder in einem Ticket angepasste Signaturen sind mehrzeilige Signaturen Pflicht, wenn mindestens einer der Punkte zutrifft: + - mehr als ein Parameter + - Signatur würde in eine lange Zeile kippen (Richtwert: > 100 Zeichen) +- Format für mehrzeilige Signaturen: + - Membername mit Zeilenfortsetzung (`_`) + - öffnende Klammer in eigener Zeile + - genau ein Parameter pro Zeile + - schließende Klammer und Rückgabetyp gemeinsam in einer Zeile +- Das Schema gilt einheitlich für `Sub`/`Function`/Konstruktoren/Interface-Member. +- Lange Aufrufe werden analog umgebrochen (ein Argument pro Zeile, klar ausgerichtet). +- Keine Trailing-Whitespace-Zeichen und keine „leeren“ Zeilen mit Spaces/Tabs. + ## 6. Variablenregel (Pflicht) - Alle lokalen Variablen werden im „Deklarationsblock“ am Anfang der Funktion definiert. - Platzierung: diff --git a/docs/governance/145_CODE_QUALITY_POLICY_DE.MD b/docs/governance/145_CODE_QUALITY_POLICY_DE.MD index 41940479..4514cbba 100644 --- a/docs/governance/145_CODE_QUALITY_POLICY_DE.MD +++ b/docs/governance/145_CODE_QUALITY_POLICY_DE.MD @@ -68,6 +68,19 @@ Use visible block separation (empty line + block comment markers): - fallback - I/O helpers separated +### 5.4 Method Header and Wrapping Scheme (binding) +- For new or ticket-touched signatures, multiline signatures are mandatory when at least one applies: + - more than one parameter + - signature would become a long line (guideline: > 100 characters) +- Format for multiline signatures: + - member name followed by line continuation (`_`) + - opening parenthesis on its own line + - exactly one parameter per line + - closing parenthesis and return type on one line +- This applies consistently to `Sub`/`Function`/constructors/interface members. +- Long invocations are wrapped analogously (one argument per line, aligned clearly). +- No trailing whitespace and no visually empty lines containing spaces/tabs. + ## 6. Variable Rule - All local variables are declared in a declaration block at the start of the function. - Placement: diff --git a/docs/versioning/002_HISTORY_VERSIONS.MD b/docs/versioning/002_HISTORY_VERSIONS.MD index baae397f..81020eee 100644 --- a/docs/versioning/002_HISTORY_VERSIONS.MD +++ b/docs/versioning/002_HISTORY_VERSIONS.MD @@ -12,7 +12,7 @@ Heuristik fuer die Rueckwirkungs-Zuordnung: - `docs|test|ci|chore|tooling|refactor|fix` => Patch Aktueller Entwicklungsstand: -- Aktuelle Entwicklungslinie enthaelt `6.x` (aktueller Arbeitsstand: `v6.0.0`; Details in `docs/versioning/003_CHANGELOG_RELEASES.MD`). +- Aktuelle Entwicklungslinie enthaelt `6.x` (aktueller Arbeitsstand: `v6.0.1`; Details in `docs/versioning/003_CHANGELOG_RELEASES.MD`). Hinweis: - Die Spalte `Keyword` verwendet den technischen Klassifizierungswert aus der Historie. @@ -20,6 +20,7 @@ Hinweis: | Version | Kurzbeschreibung | Commit | Keyword | |---|---|---|---| +| `6.0.1` | Refactor-Haertung: interne SSOT-Utilities nach `Infrastructure/Utils` konsolidiert, Core-Utility-Logik in dedizierte Dateien gesplittet und Duplikat-Guards vereinheitlicht | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v6.0.0...HEAD) | patch | | `6.0.0` | Breaking-Release: `FileKind`-Enum und `HashRoundTripReport`-Public-API auf neues Slot-/Methodenmodell umgestellt, Hashing-Interna in Core/RoundTrip/Io ausgelagert | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | SharpCompress-API auf typsichere Aufrufe umgestellt, tar.gz-Verarbeitung fail-closed gehaertet und Qodana-CI-Gate als Pflichtlauf dokumentiert/erzwungen | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | netstandard2.0-Compat-Layer eingefuehrt, Provider-Struktur konsolidiert und TFM-Multi-Targeting erweitert | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | diff --git a/docs/versioning/003_CHANGELOG_RELEASES.MD b/docs/versioning/003_CHANGELOG_RELEASES.MD index 10cf2a90..a75a0c41 100644 --- a/docs/versioning/003_CHANGELOG_RELEASES.MD +++ b/docs/versioning/003_CHANGELOG_RELEASES.MD @@ -7,6 +7,16 @@ Alle Aenderungen werden hier technisch dokumentiert. Die Release-Version selbst ist der Git-Tag `vX.Y.Z` (optional `-prerelease`) als SSOT. +## [6.0.1] +- Added: + - Neues internes Submodul `src/FileTypeDetection/Infrastructure/Utils/` als SSOT fuer wiederverwendbare Guard-/I/O-/Pfad-/Logging-Helfer. +- Changed: + - Utility-Klassen aus `CoreInternals.vb` in dedizierte Dateien unter `Infrastructure/Utils` ausgelagert; `CoreInternals.vb` auf Refinement-Logik fokussiert. + - Vorhandene Utils von `src/FileTypeDetection/Utils/` nach `src/FileTypeDetection/Infrastructure/Utils/` verschoben und Namespace auf `Tomtastisch.FileClassifier.Infrastructure.Utils` konsolidiert. + - Duplizierte Byte-Array-Guard-Checks auf `ByteArrayGuard.HasContent(...)` vereinheitlicht. +- Docs/CI/Tooling: + - Versionskonvergenz auf `6.0.1` nachgezogen (`RepoVersion`, `Version`, `PackageVersion`, Versionshistorie DE/EN). + ## [6.0.0] - Added: - Neue interne Hashing-Services (`EvidenceHashingCore`, `EvidenceHashingRoundTrip`, `EvidenceHashingIo`) fuer deterministische Auslagerung ohne neue Dependencies. diff --git a/docs/versioning/102_HISTORY_VERSIONS.MD b/docs/versioning/102_HISTORY_VERSIONS.MD index f6ba1c29..829190c2 100644 --- a/docs/versioning/102_HISTORY_VERSIONS.MD +++ b/docs/versioning/102_HISTORY_VERSIONS.MD @@ -12,13 +12,14 @@ Heuristics for retroactive classification: - `docs|test|ci|chore|tooling|refactor|fix` => patch Current state: -- Current release line contains `6.x` (current working state: `v6.0.0`; details in `docs/versioning/103_CHANGELOG_RELEASES.MD`). +- Current release line contains `6.x` (current working state: `v6.0.1`; details in `docs/versioning/103_CHANGELOG_RELEASES.MD`). Note: - The \"short description\" column follows the original commit/PR intent text for deterministic traceability and is not normalized to a single language. | Version | Short description | Commit | Keyword | |---|---|---|---| +| `6.0.1` | Refactor hardening: consolidated internal SSOT utilities into `Infrastructure/Utils`, split core utility logic into dedicated files, and unified duplicate byte guards | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v6.0.0...HEAD) | patch | | `6.0.0` | Breaking release: migrated `FileKind` enum and `HashRoundTripReport` public API to the new slot/method model and split hashing internals into core/roundtrip/io services | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | Switched SharpCompress calls to type-safe APIs, hardened tar.gz fail-closed handling, and enforced/documented Qodana CI as a mandatory gate | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | Introduce netstandard2.0 compatibility layer, consolidate provider structure, and extend TFM multi-targeting | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | diff --git a/docs/versioning/103_CHANGELOG_RELEASES.MD b/docs/versioning/103_CHANGELOG_RELEASES.MD index b065566a..7ca87e28 100644 --- a/docs/versioning/103_CHANGELOG_RELEASES.MD +++ b/docs/versioning/103_CHANGELOG_RELEASES.MD @@ -6,6 +6,16 @@ All changes are documented here in technical terms. The release version itself is the Git tag `vX.Y.Z` (optional `-prerelease`) as SSOT. +## [6.0.1] +- Added: + - New internal submodule `src/FileTypeDetection/Infrastructure/Utils/` as the SSOT for reusable guard/I/O/path/logging helpers. +- Changed: + - Moved utility classes from `CoreInternals.vb` into dedicated files under `Infrastructure/Utils`; narrowed `CoreInternals.vb` to refinement logic. + - Relocated existing utils from `src/FileTypeDetection/Utils/` to `src/FileTypeDetection/Infrastructure/Utils/` and consolidated the namespace to `Tomtastisch.FileClassifier.Infrastructure.Utils`. + - Unified duplicate byte-array guard checks to `ByteArrayGuard.HasContent(...)`. +- Docs/CI/Tooling: + - Updated version convergence to `6.0.1` (`RepoVersion`, `Version`, `PackageVersion`, version history DE/EN). + ## [6.0.0] - Added: - New internal hashing services (`EvidenceHashingCore`, `EvidenceHashingRoundTrip`, `EvidenceHashingIo`) for deterministic extraction without adding dependencies. diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb index c978946d..702a4201 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb @@ -120,7 +120,7 @@ Namespace Global.Tomtastisch.FileClassifier data As Byte() ) As Immutable.ImmutableArray(Of Byte) - If data Is Nothing OrElse data.Length = 0 Then + If Not ByteArrayGuard.HasContent(data) Then Return Immutable.ImmutableArray(Of Byte).Empty End If diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb index 76666791..c1e4a18f 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb @@ -19,7 +19,7 @@ Option Strict On Option Explicit On Imports System -Imports Tomtastisch.FileClassifier.Utils +Imports Tomtastisch.FileClassifier.Infrastructure.Utils Namespace Global.Tomtastisch.FileClassifier diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb index ad793049..b6946481 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb @@ -389,7 +389,7 @@ Namespace Global.Tomtastisch.FileClassifier Dim copy As Byte() - If data Is Nothing OrElse data.Length = 0 Then Return Array.Empty(Of Byte)() + If Not ByteArrayGuard.HasContent(data) Then Return Array.Empty(Of Byte)() copy = New Byte(data.Length - 1) {} Buffer.BlockCopy(data, 0, copy, 0, data.Length) diff --git a/src/FileTypeDetection/ArchiveProcessing.vb b/src/FileTypeDetection/ArchiveProcessing.vb index fa3b328f..3103b2a8 100644 --- a/src/FileTypeDetection/ArchiveProcessing.vb +++ b/src/FileTypeDetection/ArchiveProcessing.vb @@ -104,7 +104,7 @@ Namespace Global.Tomtastisch.FileClassifier Dim emptyResult As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() Dim entries As IReadOnlyList(Of ZipExtractedEntry) = Array.Empty(Of ZipExtractedEntry)() - If data Is Nothing OrElse data.Length = 0 Then Return emptyResult + If Not ByteArrayGuard.HasContent(data) Then Return emptyResult If Not ArchiveEntryCollector.TryCollectFromBytes(data, opt, entries) Then Return emptyResult Return entries diff --git a/src/FileTypeDetection/FileMaterializer.vb b/src/FileTypeDetection/FileMaterializer.vb index ae9ebdf6..402397e1 100644 --- a/src/FileTypeDetection/FileMaterializer.vb +++ b/src/FileTypeDetection/FileMaterializer.vb @@ -105,7 +105,8 @@ Namespace Global.Tomtastisch.FileClassifier ''' Zu materialisierende Nutzdaten. ''' Datei- oder Verzeichnisziel abhängig vom Verarbeitungspfad. ''' True, um ein vorhandenes Ziel gemäß Zielpfad-Policy zu ersetzen. - ''' True, um Archivpayloads sicher zu validieren und zu extrahieren; sonst Rohpersistenz. + ''' True, um Archivpayloads sicher validieren und + ''' extrahieren zu können; sonst Rohpersistenz. ''' True bei erfolgreicher Materialisierung; andernfalls False. Public Shared Function Persist _ ( @@ -116,7 +117,7 @@ Namespace Global.Tomtastisch.FileClassifier ) As Boolean Dim opt As FileTypeProjectOptions = FileTypeOptions.GetSnapshot() - Dim destinationFull As String + Dim destinationFull As String = String.Empty Dim descriptor As ArchiveDescriptor = Nothing ' Guard-Clauses: Null-, Größen- und Zielpfadprüfung. @@ -130,20 +131,15 @@ Namespace Global.Tomtastisch.FileClassifier If String.IsNullOrWhiteSpace(destinationPath) Then Return False ' Pfadnormalisierung: Absoluten Zielpfad auflösen. - Try - destinationFull = Path.GetFullPath(destinationPath) - - Catch ex As Exception When _ - TypeOf ex Is ArgumentException OrElse - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is PathTooLongException OrElse - TypeOf ex Is IOException - - LogGuard.Warn(opt.Logger, $"[Materialize] Ungültiger Zielpfad: {ex.Message}") + If Not PathResolutionGuard.TryGetFullPath( + destinationPath, + opt, + "[Materialize] Ungültiger Zielpfad", + warnLevel:=True, + destinationFull + ) Then Return False - End Try + End If ' Secure-Extract-Branch: describe -> safety gate -> extract. If secureExtract Then diff --git a/src/FileTypeDetection/FileTypeDetectionLib.vbproj b/src/FileTypeDetection/FileTypeDetectionLib.vbproj index f61a0d12..9175f2b7 100644 --- a/src/FileTypeDetection/FileTypeDetectionLib.vbproj +++ b/src/FileTypeDetection/FileTypeDetectionLib.vbproj @@ -7,8 +7,8 @@ true false Tomtastisch.FileClassifier - 6.0.0 - 6.0.0 + 6.0.1 + 6.0.1 tomtastisch Deterministic file type and MIME detection with fail-closed archive safety checks, secure extraction primitives, and reproducible hashing evidence for .NET. filetype;mime;detection;magic-bytes;sniffing;archive;zip;tar;7z;rar;zipslip;security;hashing;sha256;deterministic;dotnet;netstandard2.0;net8;net10 @@ -49,6 +49,10 @@ + + + + diff --git a/src/FileTypeDetection/FileTypeDetector.vb b/src/FileTypeDetection/FileTypeDetector.vb index 57caa6b7..5a6deb04 100644 --- a/src/FileTypeDetection/FileTypeDetector.vb +++ b/src/FileTypeDetection/FileTypeDetector.vb @@ -562,7 +562,7 @@ Namespace Global.Tomtastisch.FileClassifier Dim trace As DetectionTrace = DetectionTrace.Empty - If data Is Nothing OrElse data.Length = 0 Then Return UnknownType() + If Not ByteArrayGuard.HasContent(data) Then Return UnknownType() If CLng(data.Length) > opt.MaxBytes Then LogGuard.Warn(opt.Logger, $"[Detect] Daten zu groß ({data.Length} > {opt.MaxBytes}).") diff --git a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb index f5e82779..c45d58ed 100644 --- a/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb +++ b/src/FileTypeDetection/Infrastructure/ArchiveInternals.vb @@ -47,8 +47,13 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property ContainerChain As IReadOnlyList(Of ArchiveContainerType) - Private Sub New(logicalKind As FileKind, containerType As ArchiveContainerType, - containerChain As ArchiveContainerType()) + Private Sub New _ + ( + logicalKind As FileKind, + containerType As ArchiveContainerType, + containerChain As ArchiveContainerType() + ) + Me.LogicalKind = logicalKind Me.ContainerType = containerType Dim chain = If(containerChain, Array.Empty(Of ArchiveContainerType)()) @@ -56,9 +61,11 @@ Namespace Global.Tomtastisch.FileClassifier End Sub Friend Shared Function UnknownDescriptor() As ArchiveDescriptor - Return _ - New ArchiveDescriptor(FileKind.Unknown, ArchiveContainerType.Unknown, - Array.Empty(Of ArchiveContainerType)()) + Return New ArchiveDescriptor( + FileKind.Unknown, + ArchiveContainerType.Unknown, + Array.Empty(Of ArchiveContainerType)() + ) End Function Friend Shared Function ForContainerType(containerType As ArchiveContainerType) As ArchiveDescriptor @@ -89,13 +96,14 @@ Namespace Global.Tomtastisch.FileClassifier Friend Interface IArchiveBackend ReadOnly Property ContainerType As ArchiveContainerType - Function Process( - stream As Stream, - opt As FileTypeProjectOptions, - depth As Integer, - containerTypeValue As ArchiveContainerType, - extractEntry As Func(Of IArchiveEntryModel, Boolean) - ) As Boolean + Function Process _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + depth As Integer, + containerTypeValue As ArchiveContainerType, + extractEntry As Func(Of IArchiveEntryModel, Boolean) + ) As Boolean End Interface ''' @@ -108,7 +116,11 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function Resolve(containerType As ArchiveContainerType) As IArchiveBackend + Friend Shared Function Resolve _ + ( + containerType As ArchiveContainerType + ) As IArchiveBackend + Select Case containerType Case ArchiveContainerType.Zip Return ManagedArchiveBackend @@ -125,7 +137,11 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub - Friend Shared Function OpenArchive(stream As Stream) As SharpCompress.Archives.IArchive + Friend Shared Function OpenArchive _ + ( + stream As Stream + ) As SharpCompress.Archives.IArchive + Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenArchiveFactoryCompat(stream, options) @@ -142,9 +158,12 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Friend Shared Function OpenArchiveForContainer(stream As Stream, - containerTypeValue As ArchiveContainerType) _ - As SharpCompress.Archives.IArchive + Friend Shared Function OpenArchiveForContainer _ + ( + stream As Stream, + containerTypeValue As ArchiveContainerType + ) As SharpCompress.Archives.IArchive + If containerTypeValue = ArchiveContainerType.GZip Then Dim gzipArchive = OpenGZipArchive(stream) If gzipArchive IsNot Nothing Then Return gzipArchive @@ -152,7 +171,11 @@ Namespace Global.Tomtastisch.FileClassifier Return OpenArchive(stream) End Function - Friend Shared Function HasGZipMagic(stream As Stream) As Boolean + Friend Shared Function HasGZipMagic _ + ( + stream As Stream + ) As Boolean + If stream Is Nothing OrElse Not stream.CanRead Then Return False If Not stream.CanSeek Then Return False If stream.Length < 2 Then Return False @@ -162,7 +185,11 @@ Namespace Global.Tomtastisch.FileClassifier Return first = &H1F AndAlso second = &H8B End Function - Private Shared Function OpenGZipArchive(stream As Stream) As SharpCompress.Archives.IArchive + Private Shared Function OpenGZipArchive _ + ( + stream As Stream + ) As SharpCompress.Archives.IArchive + Try Dim options = New SharpCompress.Readers.ReaderOptions() With {.LeaveStreamOpen = True} Return OpenGZipArchiveCompat(stream, options) @@ -179,7 +206,11 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function - Private Shared Function IsExpectedInvocationException(ex As TargetInvocationException) As Boolean + Private Shared Function IsExpectedInvocationException _ + ( + ex As TargetInvocationException + ) As Boolean + Dim inner = ex?.InnerException If inner Is Nothing Then Return False @@ -190,38 +221,49 @@ Namespace Global.Tomtastisch.FileClassifier TypeOf inner Is IOException End Function - Private Shared Function OpenArchiveFactoryCompat( - stream As Stream, - options As SharpCompress.Readers.ReaderOptions - ) As SharpCompress.Archives.IArchive + Private Shared Function OpenArchiveFactoryCompat _ + ( + stream As Stream, + options As SharpCompress.Readers.ReaderOptions + ) As SharpCompress.Archives.IArchive + Dim method = GetOpenCompatMethod(GetType(SharpCompress.Archives.ArchiveFactory)) Dim opened = method.Invoke(Nothing, New Object() {stream, options}) + Return CType(opened, SharpCompress.Archives.IArchive) End Function - Private Shared Function OpenGZipArchiveCompat( - stream As Stream, - options As SharpCompress.Readers.ReaderOptions - ) As SharpCompress.Archives.IArchive + Private Shared Function OpenGZipArchiveCompat _ + ( + stream As Stream, + options As SharpCompress.Readers.ReaderOptions + ) As SharpCompress.Archives.IArchive + Dim method = GetOpenCompatMethod(GetType(SharpCompress.Archives.GZip.GZipArchive)) Dim opened = method.Invoke(Nothing, New Object() {stream, options}) + Return CType(opened, SharpCompress.Archives.IArchive) End Function - Private Shared Function GetOpenCompatMethod(type As Type) As System.Reflection.MethodInfo + Private Shared Function GetOpenCompatMethod(type As Type) As MethodInfo Dim signature = New Type() {GetType(Stream), GetType(SharpCompress.Readers.ReaderOptions)} - Dim method = type.GetMethod("OpenArchive", BindingFlags.Public Or - BindingFlags.Static, - binder:=Nothing, - types:=signature, - modifiers:=Nothing) + Dim method = type.GetMethod( + "OpenArchive", + BindingFlags.Public Or BindingFlags.Static, + binder:=Nothing, + types:=signature, + modifiers:=Nothing + ) + If method IsNot Nothing Then Return method - method = type.GetMethod("Open", BindingFlags.Public Or - BindingFlags.Static, - binder:=Nothing, - types:=signature, - modifiers:=Nothing) + method = type.GetMethod( + "Open", + BindingFlags.Public Or BindingFlags.Static, + binder:=Nothing, + types:=signature, + modifiers:=Nothing + ) If method IsNot Nothing Then Return method Throw New MissingMethodException(type.FullName, "OpenArchive/Open(Stream, ReaderOptions)") @@ -243,7 +285,7 @@ Namespace Global.Tomtastisch.FileClassifier ) As Boolean descriptor = ArchiveDescriptor.UnknownDescriptor() - If data Is Nothing OrElse data.Length = 0 Then Return False + If Not ByteArrayGuard.HasContent(data) Then Return False If opt Is Nothing Then Return False Try @@ -478,7 +520,7 @@ Namespace Global.Tomtastisch.FileClassifier descriptor As ArchiveDescriptor ) As Boolean - Dim destinationFull As String + Dim destinationFull As String = String.Empty Dim parent As String Dim stageDir As String Dim stagePrefix As String @@ -489,18 +531,15 @@ Namespace Global.Tomtastisch.FileClassifier If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then Return False If String.IsNullOrWhiteSpace(destinationDirectory) Then Return False - Try - destinationFull = Path.GetFullPath(destinationDirectory) - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is PathTooLongException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - LogGuard.Debug(opt.Logger, $"[ArchiveExtract] Ungültiger Zielpfad: {ex.Message}") + If Not PathResolutionGuard.TryGetFullPath( + destinationDirectory, + opt, + "[ArchiveExtract] Ungültiger Zielpfad", + warnLevel:=False, + destinationFull + ) Then Return False - End Try + End If If Not DestinationPathGuard.ValidateNewExtractionTarget(destinationFull, opt) Then Return False @@ -565,7 +604,7 @@ Namespace Global.Tomtastisch.FileClassifier Dim entryName As String = Nothing Dim isDirectory As Boolean = False - Dim targetPath As String + Dim targetPath As String = String.Empty Dim targetDir As String If entry Is Nothing Then Return False @@ -573,18 +612,15 @@ Namespace Global.Tomtastisch.FileClassifier If Not TryGetSafeEntryName(entry, opt, entryName, isDirectory) Then Return False - Try - targetPath = Path.GetFullPath(Path.Combine(destinationPrefix, entryName)) - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is PathTooLongException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - LogGuard.Debug(opt.Logger, $"[ArchiveExtract] Zielpfad-Fehler: {ex.Message}") + If Not PathResolutionGuard.TryGetFullPath( + Path.Combine(destinationPrefix, entryName), + opt, + "[ArchiveExtract] Zielpfad-Fehler", + warnLevel:=False, + targetPath + ) Then Return False - End Try + End If If Not targetPath.StartsWith(destinationPrefix, StringComparison.Ordinal) Then LogGuard.Warn(opt.Logger, "[ArchiveExtract] Path traversal erkannt.") @@ -696,8 +732,7 @@ Namespace Global.Tomtastisch.FileClassifier If entry Is Nothing Then Return False If opt Is Nothing Then Return False - If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(entry.LinkTarget) Then - LogGuard.Warn(opt.Logger, "[ArchiveExtract] Link-Entry ist nicht erlaubt.") + If ArchiveLinkGuard.IsRejectedLink(opt, entry.LinkTarget, "[ArchiveExtract]", logWhenRejected:=True) Then Return False End If @@ -819,12 +854,11 @@ Namespace Global.Tomtastisch.FileClassifier Dim descriptor As ArchiveDescriptor = Nothing entries = Array.Empty(Of ZipExtractedEntry)() - If data Is Nothing OrElse data.Length = 0 Then Return False + If Not ByteArrayGuard.HasContent(data) Then Return False If opt Is Nothing Then Return False Try - If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False - If Not ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) Then Return False + If Not ArchivePayloadGuard.TryDescribeSafeArchivePayload(data, opt, descriptor) Then Return False Using ms As New MemoryStream(data, writable:=False) entries = ArchiveExtractor.TryExtractArchiveStreamToMemory(ms, opt, descriptor) @@ -935,8 +969,7 @@ Namespace Global.Tomtastisch.FileClassifier model = New SharpCompressEntryModel(entry) - If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(model.LinkTarget) Then - LogGuard.Warn(opt.Logger, "[ArchiveGate] Link-Entry ist nicht erlaubt.") + If ArchiveLinkGuard.IsRejectedLink(opt, model.LinkTarget, "[ArchiveGate]", logWhenRejected:=True) Then Return False End If @@ -1005,7 +1038,7 @@ Namespace Global.Tomtastisch.FileClassifier If onlyEntry Is Nothing OrElse onlyEntry.IsDirectory Then Return False model = New SharpCompressEntryModel(onlyEntry) - If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(model.LinkTarget) Then + If ArchiveLinkGuard.IsRejectedLink(opt, model.LinkTarget, "[ArchiveGate]", logWhenRejected:=False) Then nestedResult = False Return True End If diff --git a/src/FileTypeDetection/Infrastructure/CoreInternals.vb b/src/FileTypeDetection/Infrastructure/CoreInternals.vb index b013e948..f6c419de 100644 --- a/src/FileTypeDetection/Infrastructure/CoreInternals.vb +++ b/src/FileTypeDetection/Infrastructure/CoreInternals.vb @@ -13,260 +13,9 @@ Option Explicit On Imports System.IO Imports System.IO.Compression Imports System.Text -Imports Microsoft.Extensions.Logging +Imports Tomtastisch.FileClassifier.Infrastructure.Utils Namespace Global.Tomtastisch.FileClassifier - ''' - ''' Interne Hilfsklasse InternalIoDefaults zur kapselnden Umsetzung von Guard-, I/O- und Policy-Logik. - ''' - Friend NotInheritable Class InternalIoDefaults - Friend Const CopyBufferSize As Integer = 8192 - Friend Const FileStreamBufferSize As Integer = 81920 - Friend Const DefaultSniffBytes As Integer = 4096 - - Private Sub New() - End Sub - End Class - - ''' - ''' Zentrale IO-Helfer für harte Grenzen. - ''' SSOT-Regel: bounded copy wird nur hier gepflegt. - ''' - Friend NotInheritable Class StreamBounds - Private Sub New() - End Sub - - Friend Shared Sub CopyBounded(input As Stream, output As Stream, maxBytes As Long) - Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte - Dim total As Long = 0 - Dim n As Integer - - While True - n = input.Read(buf, 0, buf.Length) - If n <= 0 Then Exit While - - total += n - If total > maxBytes Then Throw New InvalidOperationException("bounded copy exceeded") - output.Write(buf, 0, n) - End While - End Sub - End Class - - ''' - ''' Kleine, zentrale Stream-Guards, um duplizierte Pattern-Checks in Archivroutinen zu reduzieren. - ''' Keine Semantik: reine Abfrage/Positionierung. - ''' - Friend NotInheritable Class StreamGuard - Private Sub New() - End Sub - - Friend Shared Function IsReadable(stream As Stream) As Boolean - Return stream IsNot Nothing AndAlso stream.CanRead - End Function - - Friend Shared Sub RewindToStart(stream As Stream) - If stream Is Nothing Then Return - If stream.CanSeek Then stream.Position = 0 - End Sub - End Class - - ''' - ''' Sicherheits-Gate für Archive-Container. - ''' - Friend NotInheritable Class ArchiveSafetyGate - Private Sub New() - End Sub - - Friend Shared Function IsArchiveSafeBytes(data As Byte(), opt As FileTypeProjectOptions, - descriptor As ArchiveDescriptor) As Boolean - If data Is Nothing OrElse data.Length = 0 Then Return False - If opt Is Nothing Then Return False - If descriptor Is Nothing OrElse descriptor.ContainerType = ArchiveContainerType.Unknown Then Return False - - Try - Using ms As New MemoryStream(data, writable:=False) - Return IsArchiveSafeStream(ms, opt, descriptor, depth:=0) - End Using - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is InvalidDataException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException OrElse - TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException - LogGuard.Debug(opt.Logger, $"[ArchiveGate] Bytes-Fehler: {ex.Message}") - Return False - End Try - End Function - - Friend Shared Function IsArchiveSafeStream(stream As Stream, opt As FileTypeProjectOptions, - descriptor As ArchiveDescriptor, depth As Integer) As Boolean - If Not StreamGuard.IsReadable(stream) Then Return False - If opt Is Nothing Then Return False - Return ArchiveProcessingEngine.ValidateArchiveStream(stream, opt, depth, descriptor) - End Function - End Class - - ''' - ''' Gemeinsame Guards für signaturbasierte Archiv-Byte-Payloads. - ''' - Friend NotInheritable Class ArchiveSignaturePayloadGuard - Private Sub New() - End Sub - - Friend Shared Function IsArchiveSignatureCandidate(data As Byte()) As Boolean - If data Is Nothing OrElse data.Length = 0 Then Return False - Return FileTypeRegistry.DetectByMagic(data) = FileKind.Zip - End Function - End Class - - ''' - ''' Gemeinsame Guards für beliebige Archive-Byte-Payloads. - ''' - Friend NotInheritable Class ArchivePayloadGuard - Private Sub New() - End Sub - - Friend Shared Function IsSafeArchivePayload(data As Byte(), opt As FileTypeProjectOptions) As Boolean - Dim descriptor As ArchiveDescriptor = Nothing - - If data Is Nothing OrElse data.Length = 0 Then Return False - If opt Is Nothing Then Return False - If CLng(data.Length) > opt.MaxBytes Then Return False - - If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False - Return ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) - End Function - End Class - - ''' - ''' Gemeinsame Zielpfad-Policy für Materialisierung und Archiv-Extraktion. - ''' - Friend NotInheritable Class DestinationPathGuard - Private Sub New() - End Sub - - Friend Shared Function PrepareMaterializationTarget(destinationFull As String, overwrite As Boolean, - opt As FileTypeProjectOptions) As Boolean - If IsRootPath(destinationFull) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") - Return False - End If - - If File.Exists(destinationFull) Then - If Not overwrite Then Return False - File.Delete(destinationFull) - ElseIf Directory.Exists(destinationFull) Then - If Not overwrite Then Return False - Directory.Delete(destinationFull, recursive:=True) - End If - - Return True - End Function - - Friend Shared Function ValidateNewExtractionTarget(destinationFull As String, opt As FileTypeProjectOptions) _ - As Boolean - Dim parent As String - - If IsRootPath(destinationFull) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") - Return False - End If - - If File.Exists(destinationFull) OrElse Directory.Exists(destinationFull) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel existiert bereits.") - Return False - End If - - parent = Path.GetDirectoryName(destinationFull) - If String.IsNullOrWhiteSpace(parent) Then - LogGuard.Warn(opt.Logger, "[PathGuard] Ziel ohne gültigen Parent.") - Return False - End If - - Return True - End Function - - Friend Shared Function IsRootPath(destinationFull As String) As Boolean - Dim rootPath As String - - If String.IsNullOrWhiteSpace(destinationFull) Then Return False - - Try - rootPath = Path.GetPathRoot(destinationFull) - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is System.Security.SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException - Return False - End Try - - If String.IsNullOrWhiteSpace(rootPath) Then Return False - - Return String.Equals( - destinationFull.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), - rootPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), - StringComparison.OrdinalIgnoreCase) - End Function - End Class - - ''' - ''' Gemeinsame Normalisierung für relative Archiv-Entry-Pfade. - ''' - Friend NotInheritable Class ArchiveEntryPathPolicy - Private Sub New() - End Sub - - Friend Shared Function TryNormalizeRelativePath( - rawPath As String, - allowDirectoryMarker As Boolean, - ByRef normalizedPath As String, - ByRef isDirectory As Boolean - ) As Boolean - Dim safe As String - Dim trimmed As String - Dim segments As String() - - normalizedPath = String.Empty - isDirectory = False - - safe = If(rawPath, String.Empty).Trim() - If safe.Length = 0 Then Return False - If safe.Contains(ChrW(0)) Then Return False - - safe = safe.Replace("\"c, "/"c) - If Path.IsPathRooted(safe) Then Return False - safe = safe.TrimStart("/"c) - If safe.Length = 0 Then Return False - - trimmed = safe.TrimEnd("/"c) - If trimmed.Length = 0 Then - If Not allowDirectoryMarker Then Return False - normalizedPath = safe - isDirectory = True - Return True - End If - - segments = trimmed.Split("/"c) - For Each seg In segments - If seg.Length = 0 Then Return False - If seg = "." OrElse seg = ".." Then Return False - Next - - If safe.Length <> trimmed.Length AndAlso Not allowDirectoryMarker Then - Return False - End If - - normalizedPath = If(allowDirectoryMarker, safe, trimmed) - isDirectory = allowDirectoryMarker AndAlso safe.Length <> trimmed.Length - Return True - End Function - End Class - ''' ''' Verfeinert ZIP-basierte Office-Container zu Dokumenttypen anhand kanonischer Paketmarker. ''' Implementationsprinzip: @@ -440,7 +189,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ZIP-Entry, der gelesen werden soll. ''' Maximal erlaubte Größe in Byte. ''' ASCII-Textinhalt oder leerer String bei Guard-/Fehlerpfad. - Private Shared Function ReadZipEntryText(entry As ZipArchiveEntry, maxBytes As Integer) As String + Private Shared Function ReadZipEntryText _ + ( + entry As ZipArchiveEntry, + maxBytes As Integer + ) As String Dim buffer As Byte() Dim readTotal As Integer Dim readCount As Integer @@ -522,7 +275,7 @@ Namespace Global.Tomtastisch.FileClassifier ''' Kompletter oder teilweiser OLE-Payload. ''' Gemappter Office-Typ oder . Friend Shared Function TryRefineBytes(data As Byte()) As FileType - If data Is Nothing OrElse data.Length = 0 Then Return FileTypeRegistry.Resolve(FileKind.Unknown) + If Not ByteArrayGuard.HasContent(data) Then Return FileTypeRegistry.Resolve(FileKind.Unknown) Try Return RefineByMarkers(data) @@ -545,7 +298,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Lesbarer Quellstream. ''' Maximale Probegröße; wird intern defensiv gekappt. ''' Gemappter Office-Typ oder . - Friend Shared Function TryRefineStream(stream As Stream, maxProbeBytes As Integer) As FileType + Friend Shared Function TryRefineStream _ + ( + stream As Stream, + maxProbeBytes As Integer + ) As FileType Dim probeLimit As Integer Dim chunk(4095) As Byte Dim readTotal As Integer @@ -624,7 +381,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' Quellpuffer. ''' Gesuchte Marker-Bytefolge. ''' True bei Treffer, sonst False. - Private Shared Function ContainsMarker(data As Byte(), marker As Byte()) As Boolean + Private Shared Function ContainsMarker _ + ( + data As Byte(), + marker As Byte() + ) As Boolean Dim i As Integer Dim j As Integer @@ -644,54 +405,4 @@ Namespace Global.Tomtastisch.FileClassifier End Function End Class - ''' - ''' Defensiver Logger-Schutz. - ''' Logging darf niemals zu Erkennungsfehlern oder Exceptions führen. - ''' - Friend NotInheritable Class LogGuard - Private Sub New() - End Sub - - Friend Shared Sub Debug(logger As ILogger, message As String) - If logger Is Nothing Then Return - If Not logger.IsEnabled(LogLevel.Debug) Then Return - Try - logger.LogDebug("{Message}", message) - Catch ex As Exception When _ - TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException OrElse - TypeOf ex Is FormatException OrElse - TypeOf ex Is ArgumentException - ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. - End Try - End Sub - - Friend Shared Sub Warn(logger As ILogger, message As String) - If logger Is Nothing Then Return - If Not logger.IsEnabled(LogLevel.Warning) Then Return - Try - logger.LogWarning("{Message}", message) - Catch ex As Exception When _ - TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException OrElse - TypeOf ex Is FormatException OrElse - TypeOf ex Is ArgumentException - ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. - End Try - End Sub - - Friend Shared Sub [Error](logger As ILogger, message As String, ex As Exception) - If logger Is Nothing Then Return - If Not logger.IsEnabled(LogLevel.Error) Then Return - Try - logger.LogError(ex, "{Message}", message) - Catch logEx As Exception When _ - TypeOf logEx Is InvalidOperationException OrElse - TypeOf logEx Is ObjectDisposedException OrElse - TypeOf logEx Is FormatException OrElse - TypeOf logEx Is ArgumentException - ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. - End Try - End Sub - End Class End Namespace diff --git a/src/FileTypeDetection/Infrastructure/README.md b/src/FileTypeDetection/Infrastructure/README.md index 5d8e3c47..7fbc2a2c 100644 --- a/src/FileTypeDetection/Infrastructure/README.md +++ b/src/FileTypeDetection/Infrastructure/README.md @@ -4,7 +4,9 @@ Dieses Verzeichnis kapselt sicherheitskritische interne Ausführungslogik für Archive, Bounds, Guards und Extraktion. ## 2. Inhalt -- `CoreInternals.vb`, `ArchiveInternals.vb`, `ArchiveManagedInternals.vb`, `MimeProvider.vb`. +- `CoreInternals.vb`: container-spezifische Verfeinerungslogik (OpenXML, Legacy-Office). +- `ArchiveInternals.vb`, `ArchiveManagedInternals.vb`, `MimeProvider.vb`. +- Untermodul `Utils/` als zentrale SSOT fuer interne Guards, Pfad-Policies, Logging und wiederverwendbare I/O-Helfer. ## 3. API und Verhalten - Erzwingt fail-closed bei Traversal, Link-Entries, Größenlimits und ungültigen Archiven. @@ -23,5 +25,6 @@ flowchart LR ## 6. Verweise - [Modulübersicht](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) +- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) - [Policy CI](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/001_POLICY_CI.MD) diff --git a/src/FileTypeDetection/Infrastructure/Utils/ArchiveGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/ArchiveGuards.vb new file mode 100644 index 00000000..9afe0ece --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/ArchiveGuards.vb @@ -0,0 +1,214 @@ +' ============================================================================ +' FILE: ArchiveGuards.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System +Imports System.IO +Imports Tomtastisch.FileClassifier + +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils + + ''' + ''' Zentrale Byte-Array-Guards für konsistente Null-/Leer-Prüfungen. + ''' + Friend NotInheritable Class ByteArrayGuard + Private Sub New() + End Sub + + Friend Shared Function HasContent(data As Byte()) As Boolean + Return data IsNot Nothing AndAlso data.Length > 0 + End Function + End Class + + ''' + ''' Sicherheits-Gate für Archive-Container. + ''' + Friend NotInheritable Class ArchiveSafetyGate + Private Sub New() + End Sub + + Friend Shared Function IsArchiveSafeBytes _ + ( + data As Byte(), + opt As FileTypeProjectOptions, + descriptor As ArchiveDescriptor + ) As Boolean + If Not ByteArrayGuard.HasContent(data) Then Return False + If opt Is Nothing Then Return False + If descriptor Is Nothing OrElse + descriptor.ContainerType = ArchiveContainerType.Unknown Then Return False + + Try + Using ms As New MemoryStream(data, writable:=False) + Return IsArchiveSafeStream(ms, opt, descriptor, depth:=0) + End Using + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is InvalidDataException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException OrElse + TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException + LogGuard.Debug(opt.Logger, $"[ArchiveGate] Bytes-Fehler: {ex.Message}") + Return False + End Try + End Function + + Friend Shared Function IsArchiveSafeStream _ + ( + stream As Stream, + opt As FileTypeProjectOptions, + descriptor As ArchiveDescriptor, + depth As Integer + ) As Boolean + If Not StreamGuard.IsReadable(stream) Then Return False + If opt Is Nothing Then Return False + Return ArchiveProcessingEngine.ValidateArchiveStream(stream, opt, depth, descriptor) + End Function + End Class + + ''' + ''' Gemeinsame Guards für signaturbasierte Archiv-Byte-Payloads. + ''' + Friend NotInheritable Class ArchiveSignaturePayloadGuard + Private Sub New() + End Sub + + Friend Shared Function IsArchiveSignatureCandidate _ + ( + data As Byte() + ) As Boolean + If Not ByteArrayGuard.HasContent(data) Then Return False + Return FileTypeRegistry.DetectByMagic(data) = FileKind.Zip + End Function + End Class + + ''' + ''' Gemeinsame Policy-Prüfung für Link-Entries in Archiven. + ''' + Friend NotInheritable Class ArchiveLinkGuard + Private Sub New() + End Sub + + Friend Shared Function IsRejectedLink _ + ( + opt As FileTypeProjectOptions, + linkTarget As String, + logPrefix As String, + logWhenRejected As Boolean + ) As Boolean + If opt Is Nothing Then Return True + + If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(linkTarget) Then + If logWhenRejected Then + LogGuard.Warn(opt.Logger, $"{logPrefix} Link-Entry ist nicht erlaubt.") + End If + + Return True + End If + + Return False + End Function + End Class + + ''' + ''' Gemeinsame Guards für beliebige Archive-Byte-Payloads. + ''' + Friend NotInheritable Class ArchivePayloadGuard + Private Sub New() + End Sub + + Friend Shared Function IsSafeArchivePayload _ + ( + data As Byte(), + opt As FileTypeProjectOptions + ) As Boolean + Dim descriptor As ArchiveDescriptor = ArchiveDescriptor.UnknownDescriptor() + + Return TryDescribeSafeArchivePayload(data, opt, descriptor) + End Function + + Friend Shared Function TryDescribeSafeArchivePayload _ + ( + data As Byte(), + opt As FileTypeProjectOptions, + ByRef descriptor As ArchiveDescriptor + ) As Boolean + + descriptor = ArchiveDescriptor.UnknownDescriptor() + + If Not ByteArrayGuard.HasContent(data) Then Return False + If opt Is Nothing Then Return False + If CLng(data.Length) > opt.MaxBytes Then Return False + + If Not ArchiveTypeResolver.TryDescribeBytes(data, opt, descriptor) Then Return False + Return ArchiveSafetyGate.IsArchiveSafeBytes(data, opt, descriptor) + End Function + + End Class + + ''' + ''' Gemeinsame Normalisierung für relative Archiv-Entry-Pfade. + ''' + Friend NotInheritable Class ArchiveEntryPathPolicy + Private Sub New() + End Sub + + Friend Shared Function TryNormalizeRelativePath _ + ( + rawPath As String, + allowDirectoryMarker As Boolean, + ByRef normalizedPath As String, + ByRef isDirectory As Boolean + ) As Boolean + Dim safe As String + Dim trimmed As String + Dim segments As String() + + normalizedPath = String.Empty + isDirectory = False + + safe = If(rawPath, String.Empty).Trim() + If safe.Length = 0 Then Return False + If safe.Contains(ChrW(0)) Then Return False + + safe = safe.Replace("\"c, "/"c) + If Path.IsPathRooted(safe) Then Return False + safe = safe.TrimStart("/"c) + If safe.Length = 0 Then Return False + + trimmed = safe.TrimEnd("/"c) + If trimmed.Length = 0 Then + If Not allowDirectoryMarker Then Return False + normalizedPath = safe + isDirectory = True + Return True + End If + + segments = trimmed.Split("/"c) + For Each seg In segments + If seg.Length = 0 Then Return False + If seg = "." OrElse seg = ".." Then Return False + Next + + If safe.Length <> trimmed.Length AndAlso Not allowDirectoryMarker Then + Return False + End If + + normalizedPath = If(allowDirectoryMarker, safe, trimmed) + isDirectory = allowDirectoryMarker AndAlso safe.Length <> trimmed.Length + Return True + End Function + End Class + +End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/DestinationPathGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/DestinationPathGuard.vb new file mode 100644 index 00000000..d832b1da --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/DestinationPathGuard.vb @@ -0,0 +1,166 @@ +' ============================================================================ +' FILE: DestinationPathGuard.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System +Imports System.IO +Imports Tomtastisch.FileClassifier + +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils + + ''' + ''' Gemeinsame Zielpfad-Policy für Materialisierung und Archiv-Extraktion. + ''' + Friend Interface IDestinationPathPolicy + Function PrepareMaterializationTarget _ + ( + destinationFull As String, + overwrite As Boolean, + opt As FileTypeProjectOptions + ) As Boolean + + Function ValidateNewExtractionTarget _ + ( + destinationFull As String, + opt As FileTypeProjectOptions + ) As Boolean + + Function IsRootPath _ + ( + destinationFull As String + ) As Boolean + End Interface + + ''' + ''' Standardimplementierung der internen Zielpfad-Policy. + ''' + Friend NotInheritable Class DefaultDestinationPathPolicy + Implements IDestinationPathPolicy + + Friend Shared ReadOnly Instance As IDestinationPathPolicy = _ + New DefaultDestinationPathPolicy() + + Private Sub New() + End Sub + + Public Function PrepareMaterializationTarget _ + ( + destinationFull As String, + overwrite As Boolean, + opt As FileTypeProjectOptions + ) As Boolean _ + Implements IDestinationPathPolicy.PrepareMaterializationTarget + If IsRootPath(destinationFull) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") + Return False + End If + + If File.Exists(destinationFull) Then + If Not overwrite Then Return False + File.Delete(destinationFull) + ElseIf Directory.Exists(destinationFull) Then + If Not overwrite Then Return False + Directory.Delete(destinationFull, recursive:=True) + End If + + Return True + End Function + + Public Function ValidateNewExtractionTarget _ + ( + destinationFull As String, + opt As FileTypeProjectOptions + ) _ + As Boolean Implements IDestinationPathPolicy.ValidateNewExtractionTarget + Dim parent As String + + If IsRootPath(destinationFull) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") + Return False + End If + + If File.Exists(destinationFull) OrElse Directory.Exists(destinationFull) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel existiert bereits.") + Return False + End If + + parent = Path.GetDirectoryName(destinationFull) + If String.IsNullOrWhiteSpace(parent) Then + LogGuard.Warn(opt.Logger, "[PathGuard] Ziel ohne gültigen Parent.") + Return False + End If + + Return True + End Function + + Public Function IsRootPath _ + ( + destinationFull As String + ) As Boolean _ + Implements IDestinationPathPolicy.IsRootPath + Dim rootPath As String + + If String.IsNullOrWhiteSpace(destinationFull) Then Return False + + Try + rootPath = Path.GetPathRoot(destinationFull) + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + Return False + End Try + + If String.IsNullOrWhiteSpace(rootPath) Then Return False + + Return String.Equals( + destinationFull.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), + rootPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), + StringComparison.OrdinalIgnoreCase) + End Function + End Class + + Friend NotInheritable Class DestinationPathGuard + Private Shared ReadOnly Policy As IDestinationPathPolicy = _ + DefaultDestinationPathPolicy.Instance + + Private Sub New() + End Sub + + Friend Shared Function PrepareMaterializationTarget _ + ( + destinationFull As String, + overwrite As Boolean, + opt As FileTypeProjectOptions + ) As Boolean + Return Policy.PrepareMaterializationTarget(destinationFull, overwrite, opt) + End Function + + Friend Shared Function ValidateNewExtractionTarget _ + ( + destinationFull As String, + opt As FileTypeProjectOptions + ) _ + As Boolean + Return Policy.ValidateNewExtractionTarget(destinationFull, opt) + End Function + + Friend Shared Function IsRootPath _ + ( + destinationFull As String + ) As Boolean + Return Policy.IsRootPath(destinationFull) + End Function + End Class + +End Namespace diff --git a/src/FileTypeDetection/Utils/EnumUtils.vb b/src/FileTypeDetection/Infrastructure/Utils/EnumUtils.vb similarity index 98% rename from src/FileTypeDetection/Utils/EnumUtils.vb rename to src/FileTypeDetection/Infrastructure/Utils/EnumUtils.vb index 5e8f83e3..579a069d 100644 --- a/src/FileTypeDetection/Utils/EnumUtils.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/EnumUtils.vb @@ -16,7 +16,7 @@ Option Explicit On Imports System -Namespace Global.Tomtastisch.FileClassifier.Utils +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ''' ''' Utility-Funktionen für Enum-Typen (values()). @@ -85,8 +85,9 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' Public Shared Function GetValues(Of TEnum As Structure)() As TEnum() - Return GetValues(Of TEnum)(EnumSortOrder.None, fromIndex:=Nothing, toIndex:=Nothing) - + Return GetValues(Of TEnum)( + EnumSortOrder.None, fromIndex:=Nothing, toIndex:=Nothing + ) End Function diff --git a/src/FileTypeDetection/Utils/GuardUtils.vb b/src/FileTypeDetection/Infrastructure/Utils/GuardUtils.vb similarity index 91% rename from src/FileTypeDetection/Utils/GuardUtils.vb rename to src/FileTypeDetection/Infrastructure/Utils/GuardUtils.vb index b2511984..efd1fbf1 100644 --- a/src/FileTypeDetection/Utils/GuardUtils.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/GuardUtils.vb @@ -15,7 +15,7 @@ Option Explicit On Imports System -Namespace Global.Tomtastisch.FileClassifier.Utils +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ''' ''' Utility-Funktionen für Guard-Clauses (Argumentprüfung). @@ -56,7 +56,11 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' Zu prüfender Wert. ''' Parametername für Exception-Metadaten. ''' Wird ausgelöst, wenn Nothing ist. - Public Shared Sub NotNothing(Of T)(value As T, paramName As String) + Public Shared Sub NotNothing(Of T) _ + ( + value As T, + paramName As String + ) ' Deklarationsblock Dim isNull As Boolean @@ -84,7 +88,12 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' Parametername für Exception-Metadaten. ''' Wird ausgelöst, wenn Nothing ist. ''' Wird ausgelöst, wenn die Länge nicht entspricht. - Public Shared Sub RequireLength(value As Array, expectedLength As Integer, paramName As String) + Public Shared Sub RequireLength _ + ( + value As Array, + expectedLength As Integer, + paramName As String + ) ' Deklarationsblock Dim actualLength As Integer @@ -124,7 +133,12 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' Wird ausgelöst, wenn Nothing ist. ''' Wird ausgelöst, wenn kein Enum ist. ''' Wird ausgelöst, wenn nicht definiert ist. - Public Shared Sub EnumDefined(enumType As Type, value As Object, paramName As String) + Public Shared Sub EnumDefined _ + ( + enumType As Type, + value As Object, + paramName As String + ) ' Deklarationsblock Dim isOk As Boolean diff --git a/src/FileTypeDetection/Infrastructure/Utils/IoGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/IoGuards.vb new file mode 100644 index 00000000..c0b4a733 --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/IoGuards.vb @@ -0,0 +1,82 @@ +' ============================================================================ +' FILE: IoGuards.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System.IO + +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils + + ''' + ''' Interne Hilfsklasse InternalIoDefaults zur kapselnden Umsetzung von Guard-, I/O- und Policy-Logik. + ''' + Friend NotInheritable Class InternalIoDefaults + Friend Const CopyBufferSize As Integer = 8192 + Friend Const FileStreamBufferSize As Integer = 81920 + Friend Const DefaultSniffBytes As Integer = 4096 + + Private Sub New() + End Sub + End Class + + ''' + ''' Zentrale IO-Helfer für harte Grenzen. + ''' SSOT-Regel: bounded copy wird nur hier gepflegt. + ''' + Friend NotInheritable Class StreamBounds + Private Sub New() + End Sub + + Friend Shared Sub CopyBounded _ + ( + input As Stream, + output As Stream, + maxBytes As Long + ) + Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte + Dim total As Long = 0 + Dim n As Integer + + While True + n = input.Read(buf, 0, buf.Length) + If n <= 0 Then Exit While + + total += n + If total > maxBytes Then Throw New InvalidOperationException("bounded copy exceeded") + output.Write(buf, 0, n) + End While + End Sub + End Class + + ''' + ''' Kleine, zentrale Stream-Guards, um duplizierte Pattern-Checks in Archivroutinen zu reduzieren. + ''' Keine Semantik: reine Abfrage/Positionierung. + ''' + Friend NotInheritable Class StreamGuard + Private Sub New() + End Sub + + Friend Shared Function IsReadable _ + ( + stream As Stream + ) As Boolean + Return stream IsNot Nothing AndAlso stream.CanRead + End Function + + Friend Shared Sub RewindToStart _ + ( + stream As Stream + ) + If stream Is Nothing Then Return + If stream.CanSeek Then stream.Position = 0 + End Sub + End Class + +End Namespace diff --git a/src/FileTypeDetection/Utils/IterableUtils.vb b/src/FileTypeDetection/Infrastructure/Utils/IterableUtils.vb similarity index 93% rename from src/FileTypeDetection/Utils/IterableUtils.vb rename to src/FileTypeDetection/Infrastructure/Utils/IterableUtils.vb index 1b7e2940..bc293861 100644 --- a/src/FileTypeDetection/Utils/IterableUtils.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/IterableUtils.vb @@ -13,7 +13,7 @@ Option Strict On Option Explicit On -Namespace Global.Tomtastisch.FileClassifier.Utils +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ''' ''' Utility-Funktionen für defensive Kopien (Array-basierte Rückgaben). @@ -57,7 +57,10 @@ Namespace Global.Tomtastisch.FileClassifier.Utils ''' Elementtyp. ''' Quelle; Nothing bleibt Nothing. ''' Defensive Kopie oder Nothing. - Public Shared Function CloneArray(Of T)(source As T()) As T() + Public Shared Function CloneArray(Of T) _ + ( + source As T() + ) As T() ' Deklarationsblock Dim copy() As T diff --git a/src/FileTypeDetection/Infrastructure/Utils/LogGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/LogGuard.vb new file mode 100644 index 00000000..e562097e --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/LogGuard.vb @@ -0,0 +1,82 @@ +' ============================================================================ +' FILE: LogGuard.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System +Imports Microsoft.Extensions.Logging + +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils + + ''' + ''' Defensiver Logger-Schutz. + ''' Logging darf niemals zu Erkennungsfehlern oder Exceptions führen. + ''' + Friend NotInheritable Class LogGuard + Private Sub New() + End Sub + + Friend Shared Sub Debug _ + ( + logger As ILogger, + message As String + ) + If logger Is Nothing Then Return + If Not logger.IsEnabled(LogLevel.Debug) Then Return + Try + logger.LogDebug("{Message}", message) + Catch ex As Exception When _ + TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException OrElse + TypeOf ex Is FormatException OrElse + TypeOf ex Is ArgumentException + ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. + End Try + End Sub + + Friend Shared Sub Warn _ + ( + logger As ILogger, + message As String + ) + If logger Is Nothing Then Return + If Not logger.IsEnabled(LogLevel.Warning) Then Return + Try + logger.LogWarning("{Message}", message) + Catch ex As Exception When _ + TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException OrElse + TypeOf ex Is FormatException OrElse + TypeOf ex Is ArgumentException + ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. + End Try + End Sub + + Friend Shared Sub [Error] _ + ( + logger As ILogger, + message As String, + ex As Exception + ) + If logger Is Nothing Then Return + If Not logger.IsEnabled(LogLevel.Error) Then Return + Try + logger.LogError(ex, "{Message}", message) + Catch logEx As Exception When _ + TypeOf logEx Is InvalidOperationException OrElse + TypeOf logEx Is ObjectDisposedException OrElse + TypeOf logEx Is FormatException OrElse + TypeOf logEx Is ArgumentException + ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. + End Try + End Sub + End Class + +End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/PathResolutionGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/PathResolutionGuard.vb new file mode 100644 index 00000000..36db099d --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/PathResolutionGuard.vb @@ -0,0 +1,64 @@ +' ============================================================================ +' FILE: PathResolutionGuard.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System +Imports System.IO +Imports Tomtastisch.FileClassifier + +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils + + ''' + ''' Zentrale FullPath-Auflösung mit fail-closed Fehlerbehandlung und konfigurierbarer Protokollstufe. + ''' + Friend NotInheritable Class PathResolutionGuard + Private Sub New() + End Sub + + Friend Shared Function TryGetFullPath _ + ( + rawPath As String, + opt As FileTypeProjectOptions, + logPrefix As String, + warnLevel As Boolean, + ByRef fullPath As String + ) As Boolean + + Dim message As String + + fullPath = String.Empty + + Try + fullPath = Path.GetFullPath(rawPath) + Return True + Catch ex As Exception When _ + TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is PathTooLongException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + + If opt IsNot Nothing Then + message = $"{logPrefix}: {ex.Message}" + If warnLevel Then + LogGuard.Warn(opt.Logger, message) + Else + LogGuard.Debug(opt.Logger, message) + End If + End If + + Return False + End Try + End Function + End Class + +End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/README.md b/src/FileTypeDetection/Infrastructure/Utils/README.md new file mode 100644 index 00000000..321588ec --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/README.md @@ -0,0 +1,37 @@ +# Infrastructure.Utils Modul + +## 1. Zweck +Dieses Verzeichnis enthaelt die zentrale SSOT-Schicht fuer interne, wiederverwendbare Utility-Helfer. +Der Fokus liegt auf deterministischen Guards, sicherer Pfad-/Archive-Validierung, defensiver I/O-Hilfe und policy-konformem Logging. + +## 2. Inhalt +- `GuardUtils.vb`: Argument-Guards fuer Null-, Enum- und Laengenpruefungen. +- `EnumUtils.vb`: deterministische Enum-Wertauflistung mit optionaler Sortierung und Range. +- `IterableUtils.vb`: defensive Array-Kopien fuer sichere Rueckgaben. +- `IoGuards.vb`: zentrale Stream-/Buffer-Helfer (`StreamGuard`, `StreamBounds`, `InternalIoDefaults`). +- `ArchiveGuards.vb`: Archive-spezifische Guards und Entry-Pfadnormalisierung. +- `PathResolutionGuard.vb`: fail-closed FullPath-Aufloesung mit kontrollierter Protokollierung. +- `DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung und Extraktion. +- `LogGuard.vb`: defensiver Logger-Schutz ohne Rekursion/Seiteneffekte. + +## 3. API und Verhalten +- Utilities sind stateless und deterministisch. +- Fehlerpfade sind fail-closed und liefern definierte Rueckgaben oder klar typisierte Exceptions. +- Utility-Klassen sind standardmaessig intern (`Friend`) und kapseln wiederholte Sicherheits-/Validierungsmuster. + +## 4. Verifikation +- Nutzung erfolgt in Core-/Infrastructure-/Abstraction-Typen. +- Korrektheit und Verhaltenstreue werden ueber Build-, Unit- und Contract-Tests abgesichert. + +## 5. Diagramm +```mermaid +flowchart LR + A["Call Site"] --> B["Infrastructure.Utils (SSOT)"] + B --> C["Deterministic Guard / IO / Path Decision"] + C --> D["Fail-Closed Result"] +``` + +## 6. Verweise +- [Modul-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) +- [Infrastructure-Modul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) +- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/src/FileTypeDetection/README.md b/src/FileTypeDetection/README.md index ead8d0af..2466ec95 100644 --- a/src/FileTypeDetection/README.md +++ b/src/FileTypeDetection/README.md @@ -32,6 +32,7 @@ flowchart LR - [Audit Index](https://github.com/tomtastisch/FileClassifier/blob/main/docs/audit/000_INDEX.MD) - [Detektion-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Detection/README.md) - [Infrastruktur-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) +- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) - [Konfiguration-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Configuration/README.md) - [Abstractions-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Abstractions/README.md) - [Composition-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Composition/README.md) diff --git a/src/FileTypeDetection/Utils/README.md b/src/FileTypeDetection/Utils/README.md deleted file mode 100644 index 9ffd2596..00000000 --- a/src/FileTypeDetection/Utils/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Utils Modul - -## 1. Zweck -Dieses Verzeichnis enthaelt kleine, wiederverwendbare Utility-Helfer fuer Guards, Enum-Werte und defensive Kopien. - -## 2. Inhalt -- `GuardUtils.vb` -- `EnumUtils.vb` -- `IterableUtils.vb` - -## 3. API und Verhalten -- Utilities sind stateless und deterministisch. -- `GuardUtils` validiert Argumente fail-closed per Exceptions. -- `EnumUtils` liefert typisierte Enum-Werte ohne LINQ-Zwang in Call-Sites. -- `IterableUtils` erstellt defensive Kopien fuer Array-Rueckgaben. - -## 4. Verifikation -- Nutzung erfolgt in Core-/Abstraction-Typen; Korrektheit wird durch bestehende Unit- und Contract-Tests abgesichert. - -## 5. Diagramm -N/A - -## 6. Verweise -- [Modul-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) -- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj b/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj index 4f44faa8..688c5a1d 100644 --- a/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj +++ b/tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj @@ -25,6 +25,7 @@ + diff --git a/tools/versioning/verify-version-convergence.sh b/tools/versioning/verify-version-convergence.sh index 2d3d841e..b579deab 100755 --- a/tools/versioning/verify-version-convergence.sh +++ b/tools/versioning/verify-version-convergence.sh @@ -97,8 +97,8 @@ main() { if [[ "${REQUIRE_REMOTE}" == "1" ]]; then require_cmd curl require_cmd gh - export GH_TOKEN="${GH_TOKEN:-${GITHUB_TOKEN:-}}" - [[ -n "${GH_TOKEN}" ]] || fail "REQUIRE_REMOTE=1 needs GH_TOKEN/GITHUB_TOKEN" + export GH_TOKEN="${GH_TOKEN:-${GITHUB_TOKEN:-${SECURITY_CLAIMS_TOKEN:-}}}" + [[ -n "${GH_TOKEN}" ]] || fail "REQUIRE_REMOTE=1 needs GH_TOKEN/GITHUB_TOKEN/SECURITY_CLAIMS_TOKEN" release_tag="$(retry_with_backoff "github_release_lookup" gh api "repos/${REPO_SLUG}/releases/latest" --jq '.tag_name')" release_version="$(normalize_tag "${release_tag}")" From 43a602893db510ddc673cdb5c4ed03fed4b56e7e Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 18:37:28 +0100 Subject: [PATCH 21/34] fix(docs): remove branch-coupled links and invalid compare base - switch new module links to relative paths - use existing baseline tag v5.2.1 for 6.0.1 compare URLs --- docs/versioning/002_HISTORY_VERSIONS.MD | 2 +- docs/versioning/102_HISTORY_VERSIONS.MD | 2 +- src/FileTypeDetection/Infrastructure/README.md | 2 +- src/FileTypeDetection/Infrastructure/Utils/README.md | 6 +++--- src/FileTypeDetection/README.md | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/versioning/002_HISTORY_VERSIONS.MD b/docs/versioning/002_HISTORY_VERSIONS.MD index 81020eee..0aa01db4 100644 --- a/docs/versioning/002_HISTORY_VERSIONS.MD +++ b/docs/versioning/002_HISTORY_VERSIONS.MD @@ -20,7 +20,7 @@ Hinweis: | Version | Kurzbeschreibung | Commit | Keyword | |---|---|---|---| -| `6.0.1` | Refactor-Haertung: interne SSOT-Utilities nach `Infrastructure/Utils` konsolidiert, Core-Utility-Logik in dedizierte Dateien gesplittet und Duplikat-Guards vereinheitlicht | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v6.0.0...HEAD) | patch | +| `6.0.1` | Refactor-Haertung: interne SSOT-Utilities nach `Infrastructure/Utils` konsolidiert, Core-Utility-Logik in dedizierte Dateien gesplittet und Duplikat-Guards vereinheitlicht | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | patch | | `6.0.0` | Breaking-Release: `FileKind`-Enum und `HashRoundTripReport`-Public-API auf neues Slot-/Methodenmodell umgestellt, Hashing-Interna in Core/RoundTrip/Io ausgelagert | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | SharpCompress-API auf typsichere Aufrufe umgestellt, tar.gz-Verarbeitung fail-closed gehaertet und Qodana-CI-Gate als Pflichtlauf dokumentiert/erzwungen | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | netstandard2.0-Compat-Layer eingefuehrt, Provider-Struktur konsolidiert und TFM-Multi-Targeting erweitert | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | diff --git a/docs/versioning/102_HISTORY_VERSIONS.MD b/docs/versioning/102_HISTORY_VERSIONS.MD index 829190c2..1ed9fabe 100644 --- a/docs/versioning/102_HISTORY_VERSIONS.MD +++ b/docs/versioning/102_HISTORY_VERSIONS.MD @@ -19,7 +19,7 @@ Note: | Version | Short description | Commit | Keyword | |---|---|---|---| -| `6.0.1` | Refactor hardening: consolidated internal SSOT utilities into `Infrastructure/Utils`, split core utility logic into dedicated files, and unified duplicate byte guards | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v6.0.0...HEAD) | patch | +| `6.0.1` | Refactor hardening: consolidated internal SSOT utilities into `Infrastructure/Utils`, split core utility logic into dedicated files, and unified duplicate byte guards | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | patch | | `6.0.0` | Breaking release: migrated `FileKind` enum and `HashRoundTripReport` public API to the new slot/method model and split hashing internals into core/roundtrip/io services | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.1...HEAD) | breaking | | `5.2.1` | Switched SharpCompress calls to type-safe APIs, hardened tar.gz fail-closed handling, and enforced/documented Qodana CI as a mandatory gate | [unreleased](https://github.com/tomtastisch/FileClassifier/compare/v5.2.0...HEAD) | patch | | `5.2.0` | Introduce netstandard2.0 compatibility layer, consolidate provider structure, and extend TFM multi-targeting | [8d65a52](https://github.com/tomtastisch/FileClassifier/commit/8d65a52) | minor | diff --git a/src/FileTypeDetection/Infrastructure/README.md b/src/FileTypeDetection/Infrastructure/README.md index 7fbc2a2c..e832103d 100644 --- a/src/FileTypeDetection/Infrastructure/README.md +++ b/src/FileTypeDetection/Infrastructure/README.md @@ -25,6 +25,6 @@ flowchart LR ## 6. Verweise - [Modulübersicht](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) -- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) +- [Infrastructure.Utils-Submodul](Utils/README.md) - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) - [Policy CI](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/001_POLICY_CI.MD) diff --git a/src/FileTypeDetection/Infrastructure/Utils/README.md b/src/FileTypeDetection/Infrastructure/Utils/README.md index 321588ec..e51284e6 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/README.md +++ b/src/FileTypeDetection/Infrastructure/Utils/README.md @@ -32,6 +32,6 @@ flowchart LR ``` ## 6. Verweise -- [Modul-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) -- [Infrastructure-Modul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) -- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) +- [Modul-Root](../../README.md) +- [Infrastructure-Modul](../README.md) +- [Code-Quality-Policy](../../../../docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/src/FileTypeDetection/README.md b/src/FileTypeDetection/README.md index 2466ec95..fb13b5a9 100644 --- a/src/FileTypeDetection/README.md +++ b/src/FileTypeDetection/README.md @@ -31,8 +31,8 @@ flowchart LR - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) - [Audit Index](https://github.com/tomtastisch/FileClassifier/blob/main/docs/audit/000_INDEX.MD) - [Detektion-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Detection/README.md) -- [Infrastruktur-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) -- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) +- [Infrastruktur-Submodul](Infrastructure/README.md) +- [Infrastructure.Utils-Submodul](Infrastructure/Utils/README.md) - [Konfiguration-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Configuration/README.md) - [Abstractions-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Abstractions/README.md) - [Composition-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Composition/README.md) From 1e2c6bb42399a46f1dd4c3d8e6af293b44307df4 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 18:41:28 +0100 Subject: [PATCH 22/34] refactor(utils): cluster guards and normalize guard naming - move guard-focused utilities to Infrastructure/Utils/Guards - rename GuardUtils to ArgumentGuard and IoGuards to IOGuards - keep non-guard helpers (EnumUtils/IterableUtils) at Utils root - add Guards README and update governance/docs references --- docs/governance/045_COMPLIANCE_STATUS_DE.MD | 2 +- .../046_ISSUE_105_106_107_CLOSURE_DE.MD | 4 +-- .../Hashing/HashRoundTripReport.vb | 8 ++--- .../Utils/{ => Guards}/ArchiveGuards.vb | 0 .../ArgumentGuard.vb} | 4 +-- .../{ => Guards}/DestinationPathGuard.vb | 0 .../Utils/{IoGuards.vb => Guards/IOGuards.vb} | 2 +- .../Utils/{ => Guards}/LogGuard.vb | 0 .../Utils/{ => Guards}/PathResolutionGuard.vb | 0 .../Infrastructure/Utils/Guards/README.md | 32 +++++++++++++++++++ .../Infrastructure/Utils/README.md | 17 +++++----- 11 files changed, 51 insertions(+), 18 deletions(-) rename src/FileTypeDetection/Infrastructure/Utils/{ => Guards}/ArchiveGuards.vb (100%) rename src/FileTypeDetection/Infrastructure/Utils/{GuardUtils.vb => Guards/ArgumentGuard.vb} (98%) rename src/FileTypeDetection/Infrastructure/Utils/{ => Guards}/DestinationPathGuard.vb (100%) rename src/FileTypeDetection/Infrastructure/Utils/{IoGuards.vb => Guards/IOGuards.vb} (99%) rename src/FileTypeDetection/Infrastructure/Utils/{ => Guards}/LogGuard.vb (100%) rename src/FileTypeDetection/Infrastructure/Utils/{ => Guards}/PathResolutionGuard.vb (100%) create mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/README.md diff --git a/docs/governance/045_COMPLIANCE_STATUS_DE.MD b/docs/governance/045_COMPLIANCE_STATUS_DE.MD index cb45a0ed..0c87eb98 100644 --- a/docs/governance/045_COMPLIANCE_STATUS_DE.MD +++ b/docs/governance/045_COMPLIANCE_STATUS_DE.MD @@ -37,7 +37,7 @@ Nachweisbare, reproduzierbare Einhaltung der Kernvorgaben aus `045_CODE_QUALITY_ - `FileTypeDetector` nutzt zentrale Registry-Regel für structured refinement. - Alias-Definitionen in `FileTypeRegistry` über Helper vereinheitlicht. 2. Policy-Header harmonisiert: - - `HashRoundTripReport.vb`, `EnumUtils.vb`, `GuardUtils.vb`, `IterableUtils.vb`. + - `HashRoundTripReport.vb`, `EnumUtils.vb`, `ArgumentGuard.vb`, `IterableUtils.vb`. ## Hinweis Diese Statusdatei dokumentiert den nachweisbaren Kernumfang der automatisierten 045-Compliance-Prüfung. diff --git a/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD b/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD index 654f8a8f..01cf6e3f 100644 --- a/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD +++ b/docs/governance/046_ISSUE_105_106_107_CLOSURE_DE.MD @@ -64,12 +64,12 @@ Refactor-Matrix (Duplikatquelle -> neue Utility-Stelle): - -> `EvidenceHashingIo.vb` - Methoden: `TryReadFileBounded`, `SetReadFileError`. 4. Enum- und Guard-Helfer in mehreren Klassen - - -> `EnumUtils.vb`, `GuardUtils.vb`, `IterableUtils.vb` + - -> `EnumUtils.vb`, `ArgumentGuard.vb`, `IterableUtils.vb` - Einsatz sichtbar in `HashRoundTripReport.vb`. Evidence-Kommandos: - `rg -n "EvidenceHashingCore|EvidenceHashingRoundTrip|EvidenceHashingIo" src tests -S` -- `rg -n "EnumUtils\\.|GuardUtils\\.|IterableUtils\\." src tests -S` +- `rg -n "EnumUtils\\.|ArgumentGuard\\.|IterableUtils\\." src tests -S` - `dotnet test FileClassifier.sln -v minimal` ## Entscheidung diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb index c1e4a18f..41a26dd2 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashRoundTripReport.vb @@ -117,7 +117,7 @@ Namespace Global.Tomtastisch.FileClassifier ' ----------------------------------------------------------------- ' Guard-Clauses (fail-closed) ' ----------------------------------------------------------------- - GuardUtils.RequireLength(evidences, slotCount, NameOf(evidences)) + ArgumentGuard.RequireLength(evidences, slotCount, NameOf(evidences)) ' ----------------------------------------------------------------- ' Snapshot / Assignment (Input) @@ -176,7 +176,7 @@ Namespace Global.Tomtastisch.FileClassifier ' Deklarationsblock Dim index As Integer - GuardUtils.EnumDefined(GetType(HashSlot), slot, NameOf(slot)) + ArgumentGuard.EnumDefined(GetType(HashSlot), slot, NameOf(slot)) index = SlotIndex(slot) Return _evidences(index) @@ -197,7 +197,7 @@ Namespace Global.Tomtastisch.FileClassifier Throw New ArgumentException("Use H2..Hn.", NameOf(otherSlot)) End If - GuardUtils.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) + ArgumentGuard.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) index = OtherIndex(otherSlot) Return _logicalEq(index) @@ -218,7 +218,7 @@ Namespace Global.Tomtastisch.FileClassifier Throw New ArgumentException("Use H2..Hn.", NameOf(otherSlot)) End If - GuardUtils.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) + ArgumentGuard.EnumDefined(GetType(HashSlot), otherSlot, NameOf(otherSlot)) index = OtherIndex(otherSlot) Return _physicalEq(index) diff --git a/src/FileTypeDetection/Infrastructure/Utils/ArchiveGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb similarity index 100% rename from src/FileTypeDetection/Infrastructure/Utils/ArchiveGuards.vb rename to src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb diff --git a/src/FileTypeDetection/Infrastructure/Utils/GuardUtils.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb similarity index 98% rename from src/FileTypeDetection/Infrastructure/Utils/GuardUtils.vb rename to src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb index efd1fbf1..8d6e57c6 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/GuardUtils.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb @@ -1,5 +1,5 @@ ' ============================================================================ -' FILE: GuardUtils.vb +' FILE: ArgumentGuard.vb ' ' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) ' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD @@ -32,7 +32,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ''' - Keine stillen Korrekturen, keine Side-Effects. ''' ''' - Friend NotInheritable Class GuardUtils + Friend NotInheritable Class ArgumentGuard Private Sub New() End Sub diff --git a/src/FileTypeDetection/Infrastructure/Utils/DestinationPathGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb similarity index 100% rename from src/FileTypeDetection/Infrastructure/Utils/DestinationPathGuard.vb rename to src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb diff --git a/src/FileTypeDetection/Infrastructure/Utils/IoGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb similarity index 99% rename from src/FileTypeDetection/Infrastructure/Utils/IoGuards.vb rename to src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb index c0b4a733..e8e3e7e8 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/IoGuards.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb @@ -1,5 +1,5 @@ ' ============================================================================ -' FILE: IoGuards.vb +' FILE: IOGuards.vb ' ' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) ' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD diff --git a/src/FileTypeDetection/Infrastructure/Utils/LogGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb similarity index 100% rename from src/FileTypeDetection/Infrastructure/Utils/LogGuard.vb rename to src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb diff --git a/src/FileTypeDetection/Infrastructure/Utils/PathResolutionGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb similarity index 100% rename from src/FileTypeDetection/Infrastructure/Utils/PathResolutionGuard.vb rename to src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md b/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md new file mode 100644 index 00000000..d2d80490 --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md @@ -0,0 +1,32 @@ +# Infrastructure.Utils.Guards Modul + +## 1. Zweck +Dieses Untermodul kapselt ausschliesslich Guard- und Policy-Utilities mit fail-closed Verhalten. + +## 2. Inhalt +- `ArgumentGuard.vb`: deterministische Argument- und Enum-Validierung. +- `IOGuards.vb`: Stream-Lesbarkeit, Rewind und bounded Copy. +- `ArchiveGuards.vb`: Archive-Payload-, Link- und Entry-Path-Guards. +- `PathResolutionGuard.vb`: sichere FullPath-Aufloesung mit kontrollierter Protokollierung. +- `DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung/Extraktion. +- `LogGuard.vb`: defensives Logging ohne Rekursion. + +## 3. API und Verhalten +- Alle Klassen sind stateless und deterministisch. +- Fehlerpfade sind fail-closed und liefern klare Rueckgaben. + +## 4. Verifikation +- Nutzung erfolgt in `FileMaterializer`, `ArchiveInternals`, `CoreInternals` und Hashing-Komponenten. + +## 5. Diagramm +```mermaid +flowchart LR + A["Call Site"] --> B["Utils/Guards"] + B --> C["Guard Decision"] + C --> D["Fail-Closed Output"] +``` + +## 6. Verweise +- [Utils-Root](../README.md) +- [Infrastructure-Modul](../../README.md) +- [Code-Quality-Policy](../../../../../docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/src/FileTypeDetection/Infrastructure/Utils/README.md b/src/FileTypeDetection/Infrastructure/Utils/README.md index e51284e6..f9188640 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/README.md +++ b/src/FileTypeDetection/Infrastructure/Utils/README.md @@ -5,14 +5,14 @@ Dieses Verzeichnis enthaelt die zentrale SSOT-Schicht fuer interne, wiederverwen Der Fokus liegt auf deterministischen Guards, sicherer Pfad-/Archive-Validierung, defensiver I/O-Hilfe und policy-konformem Logging. ## 2. Inhalt -- `GuardUtils.vb`: Argument-Guards fuer Null-, Enum- und Laengenpruefungen. -- `EnumUtils.vb`: deterministische Enum-Wertauflistung mit optionaler Sortierung und Range. -- `IterableUtils.vb`: defensive Array-Kopien fuer sichere Rueckgaben. -- `IoGuards.vb`: zentrale Stream-/Buffer-Helfer (`StreamGuard`, `StreamBounds`, `InternalIoDefaults`). -- `ArchiveGuards.vb`: Archive-spezifische Guards und Entry-Pfadnormalisierung. -- `PathResolutionGuard.vb`: fail-closed FullPath-Aufloesung mit kontrollierter Protokollierung. -- `DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung und Extraktion. -- `LogGuard.vb`: defensiver Logger-Schutz ohne Rekursion/Seiteneffekte. +- `EnumUtils.vb`: deterministische Enum-Wertauflistung mit optionaler Sortierung und Range (kein Guard). +- `IterableUtils.vb`: defensive Array-Kopien fuer sichere Rueckgaben (kein Guard). +- `Guards/ArgumentGuard.vb`: Argument-Guards fuer Null-, Enum- und Laengenpruefungen. +- `Guards/IOGuards.vb`: zentrale Stream-/Buffer-Helfer (`StreamGuard`, `StreamBounds`, `InternalIoDefaults`). +- `Guards/ArchiveGuards.vb`: Archive-spezifische Guards und Entry-Pfadnormalisierung. +- `Guards/PathResolutionGuard.vb`: fail-closed FullPath-Aufloesung mit kontrollierter Protokollierung. +- `Guards/DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung und Extraktion. +- `Guards/LogGuard.vb`: defensiver Logger-Schutz ohne Rekursion/Seiteneffekte. ## 3. API und Verhalten - Utilities sind stateless und deterministisch. @@ -34,4 +34,5 @@ flowchart LR ## 6. Verweise - [Modul-Root](../../README.md) - [Infrastructure-Modul](../README.md) +- [Guards-Cluster](Guards/README.md) - [Code-Quality-Policy](../../../../docs/governance/045_CODE_QUALITY_POLICY_DE.MD) From 905a28b64ba6599b685968a2770165789479522e Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 18:43:01 +0100 Subject: [PATCH 23/34] docs(hashing): complete policy-level member documentation - add XML docs for internal ctor/helper members in HashEvidence - add XML docs for Clone/Normalize and filename normalization in HashOptions --- .../Abstractions/Hashing/HashEvidence.vb | 25 +++++++++++++++++++ .../Abstractions/Hashing/HashOptions.vb | 17 +++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb index 702a4201..e83ca06a 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashEvidence.vb @@ -69,6 +69,19 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property Notes As String + ''' + ''' Interner Vollkonstruktor zur deterministischen Erzeugung eines Evidence-Snapshots. + ''' + ''' Herkunftskanal der Hashquelle. + ''' Fachliches Quelllabel. + ''' Ermittelter Dateitypkontext. + ''' Optionaler Archiveintrag. + ''' Optionale komprimierte Bytes als Quelle für defensive Kopie. + ''' Optionale unkomprimierte/logische Bytes als Quelle für defensive Kopie. + ''' Anzahl berücksichtigter Entries (wird auf >= 0 normalisiert). + ''' Gesamtgröße der Nutzdaten in Bytes (wird auf >= 0 normalisiert). + ''' Deterministischer Digest-Satz. + ''' Ergänzende Hinweise. Friend Sub New _ ( sourceType As HashSourceType, @@ -95,6 +108,13 @@ Namespace Global.Tomtastisch.FileClassifier Me.UncompressedBytes = ToImmutable(uncompressedBytes) End Sub + ''' + ''' Erzeugt einen fail-closed Evidence-Snapshot für Fehlerpfade. + ''' + ''' Herkunftskanal der Hashquelle. + ''' Fachliches Quelllabel. + ''' Fehler-/Hinweistext. + ''' Evidence mit leerem Digest-Satz und Unknown-Typkontext. Friend Shared Function CreateFailure _ ( sourceType As HashSourceType, @@ -115,6 +135,11 @@ Namespace Global.Tomtastisch.FileClassifier notes:=notes) End Function + ''' + ''' Erstellt aus einem Bytearray eine unveränderliche Kopie. + ''' + ''' Quellbytes oder Nothing. + ''' Leeres ImmutableArray bei fehlendem Inhalt, sonst defensive Kopie. Private Shared Function ToImmutable _ ( data As Byte() diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb index d63ff250..f7ce5dab 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashOptions.vb @@ -43,6 +43,10 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public Property MaterializedFileName As String = "deterministic-roundtrip.bin" + ''' + ''' Erstellt eine interne, seiteneffektfreie Kopie der Optionen. + ''' + ''' Neue Instanz mit identischen aktuellen Optionswerten. Friend Function Clone() As HashOptions Return New HashOptions With { @@ -53,6 +57,11 @@ Namespace Global.Tomtastisch.FileClassifier } End Function + ''' + ''' Normalisiert ein Optionsobjekt fail-closed auf sichere Standardwerte. + ''' + ''' Zu normalisierende Optionen; Nothing erzeugt Standardoptionen. + ''' Normalisierte Optionskopie mit sicherem Materialisierungsdateinamen. Friend Shared Function Normalize _ ( options As HashOptions @@ -66,6 +75,14 @@ Namespace Global.Tomtastisch.FileClassifier Return cloned End Function + ''' + ''' Validiert und normalisiert den Materialisierungsdateinamen auf einen sicheren, deterministischen Wert. + ''' + ''' Kandidat aus den Optionen. + ''' + ''' Sicherheitsnormalisierter Dateiname. Bei ungültigem Eingabewert wird + ''' deterministic-roundtrip.bin zurückgegeben. + ''' Private Shared Function NormalizeMaterializedFileName _ ( From 8a8da4bfbe30b28cdd0a3bebc186c6c7c06b8814 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 18:46:14 +0100 Subject: [PATCH 24/34] docs(hashing-core): complete internal member documentation - add XML docs for normalization/digest helper methods and NormalizedEntry members --- .../Hashing/Internal/EvidenceHashingCore.vb | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb index b6946481..cb9ff57a 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingCore.vb @@ -27,6 +27,17 @@ Namespace Global.Tomtastisch.FileClassifier Private Sub New() End Sub + ''' + ''' Baut deterministische Evidence aus normalisierbaren Archiveinträgen. + ''' + ''' Herkunftskanal des Nachweises. + ''' Quelllabel für den Report. + ''' Ermittelter Dateitypkontext. + ''' Optionale komprimierte Originalbytes. + ''' Extrahierte Archiveinträge. + ''' Hash-Konfiguration. + ''' Optionale Basishinweise. + ''' Vollständiges Evidence-Objekt oder fail-closed Fehler-Evidence. Friend Shared Function BuildEvidenceFromEntries _ ( sourceType As HashSourceType, @@ -134,6 +145,16 @@ Namespace Global.Tomtastisch.FileClassifier notes:=combinedNotes) End Function + ''' + ''' Baut deterministische Evidence aus einem rohen Payload-Bytearray. + ''' + ''' Herkunftskanal des Nachweises. + ''' Quelllabel für den Report. + ''' Ermittelter Dateitypkontext. + ''' Rohpayload; Nothing wird als leeres Array behandelt. + ''' Hash-Konfiguration. + ''' Optionale Basishinweise. + ''' Evidence-Objekt mit physischen und logischen Digests. Friend Shared Function BuildEvidenceFromRawPayload _ ( sourceType As HashSourceType, @@ -197,6 +218,13 @@ Namespace Global.Tomtastisch.FileClassifier notes:=combinedNotes) End Function + ''' + ''' Normalisiert Entry-Liste deterministisch und validiert sie fail-closed. + ''' + ''' Zu normalisierende Archiveinträge. + ''' Ausgabe der sortierten, normalisierten Entries. + ''' Fehlermeldung bei Validierungsfehlern. + ''' True bei erfolgreicher Normalisierung, sonst False. Friend Shared Function TryNormalizeEntries _ ( entries As IReadOnlyList(Of ZipExtractedEntry), @@ -241,6 +269,12 @@ Namespace Global.Tomtastisch.FileClassifier Return True End Function + ''' + ''' Normalisiert einen einzelnen Entry-Pfad nach der zentralen Archiv-Path-Policy. + ''' + ''' Unverarbeiteter Entry-Pfad. + ''' Normalisierter relativer Pfad. + ''' True bei gültigem Pfad, sonst False. Friend Shared Function TryNormalizeEntryPath _ ( rawPath As String, @@ -255,6 +289,11 @@ Namespace Global.Tomtastisch.FileClassifier isDirectory) End Function + ''' + ''' Erzeugt das kanonische logische Manifest als Bytefolge. + ''' + ''' Normalisierte und sortierte Entries. + ''' Deterministische Manifestbytes als Hash-Basis. Friend Shared Function BuildLogicalManifestBytes _ ( entries As IReadOnlyList(Of NormalizedEntry) @@ -286,6 +325,11 @@ Namespace Global.Tomtastisch.FileClassifier End Using End Function + ''' + ''' Berechnet SHA-256 als hexadezimale Kleinbuchstabenrepräsentation. + ''' + ''' Eingabedaten; Nothing wird als leeres Array behandelt. + ''' Hex-String des SHA-256-Digests. Friend Shared Function ComputeSha256Hex _ ( payload As Byte() @@ -295,6 +339,12 @@ Namespace Global.Tomtastisch.FileClassifier Return HashPrimitives.Current.Sha256.ComputeHashHex(data) End Function + ''' + ''' Berechnet optional einen schnellen, nicht-kryptografischen Digest. + ''' + ''' Eingabedaten; Nothing wird als leeres Array behandelt. + ''' Hash-Optionen; ohne Opt-In wird leerer String geliefert. + ''' Fast-Hash als Hex-String oder leerer String. Friend Shared Function ComputeFastHash _ ( payload As Byte(), @@ -309,6 +359,12 @@ Namespace Global.Tomtastisch.FileClassifier Return HashPrimitives.Current.FastHash64.ComputeHashHex(data) End Function + ''' + ''' Berechnet einen HMAC-SHA256-Digest über die Payload. + ''' + ''' HMAC-Key; Nothing wird als leeres Array behandelt. + ''' Eingabedaten; Nothing wird als leeres Array behandelt. + ''' Hex-String des HMAC-SHA256-Digests. Friend Shared Function ComputeHmacSha256Hex _ ( key As Byte(), @@ -323,6 +379,12 @@ Namespace Global.Tomtastisch.FileClassifier End Using End Function + ''' + ''' Liest und validiert den HMAC-Key aus der definierten Environment-Variable. + ''' + ''' Ausgabe des dekodierten Keys. + ''' Ausgabe eines Hinweistextes bei fehlendem/ungültigem Key. + ''' True bei gültigem Key, sonst False. Friend Shared Function TryResolveHmacKey _ ( ByRef key As Byte(), @@ -358,6 +420,12 @@ Namespace Global.Tomtastisch.FileClassifier End Try End Function + ''' + ''' Fügt einen optionalen Hinweistext deterministisch an bestehende Notes an. + ''' + ''' Bereits vorhandene Notes. + ''' Optional anzuhängender Hinweis. + ''' Kombinierter, getrimmter Hinweistext. Friend Shared Function AppendNoteIfAny _ ( baseNotes As String, @@ -372,6 +440,11 @@ Namespace Global.Tomtastisch.FileClassifier Return left & " " & right End Function + ''' + ''' Normalisiert ein Label fail-closed auf einen stabilen Standardwert. + ''' + ''' Eingabelabel. + ''' Getrimmtes Label oder Standardlabel. Friend Shared Function NormalizeLabel _ ( label As String @@ -382,6 +455,11 @@ Namespace Global.Tomtastisch.FileClassifier Return normalized End Function + ''' + ''' Erstellt eine defensive Bytekopie ohne Seiteneffekte. + ''' + ''' Quellarray. + ''' Kopie der Bytes oder leeres Array. Friend Shared Function CopyBytes _ ( data As Byte() @@ -403,9 +481,21 @@ Namespace Global.Tomtastisch.FileClassifier ''' Relative Pfade und Inhalte werden nach Guard-Prüfung unveränderlich für deterministische Sortierung gehalten. ''' Friend NotInheritable Class NormalizedEntry + ''' + ''' Normalisierter relativer Entry-Pfad. + ''' Friend ReadOnly Property RelativePath As String + + ''' + ''' Normalisierte Entry-Inhaltsbytes. + ''' Friend ReadOnly Property Content As Byte() + ''' + ''' Erstellt eine unveränderliche NormalizedEntry-Instanz. + ''' + ''' Normalisierter relativer Pfad. + ''' Entry-Inhalt als defensive Nutzdatenrepräsentation. Friend Sub New(relativePath As String, content As Byte()) Me.RelativePath = If(relativePath, String.Empty) Me.Content = If(content, Array.Empty(Of Byte)()) From e8aefbfd0d9e137fbf030fcc56f6717412516594 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 18:56:59 +0100 Subject: [PATCH 25/34] fix(review): address remaining determinism and CI exposure threads - deduplicate early HashRoundTripReport failure construction - build alias map in stable FileKind order - remove unused SECURITY_CLAIMS_TOKEN from ci version-convergence job --- .github/workflows/ci.yml | 1 - .../Internal/EvidenceHashingRoundTrip.vb | 35 ++++++++++++++----- .../Detection/FileTypeRegistry.vb | 8 ++--- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 017c23fd..cc86f4e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -152,7 +152,6 @@ jobs: # during the normal sequence merge -> tag/release -> registry propagation. env: REQUIRE_REMOTE: "0" - SECURITY_CLAIMS_TOKEN: ${{ secrets.SECURITY_CLAIMS_TOKEN }} run: bash -euo pipefail tools/ci/bin/run.sh version-convergence - name: Upload Artifact if: always() diff --git a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb index aa78b1c0..3dc53a35 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/Internal/EvidenceHashingRoundTrip.vb @@ -60,10 +60,9 @@ Namespace Global.Tomtastisch.FileClassifier If String.IsNullOrWhiteSpace(path) OrElse Not IO.File.Exists(path) Then failed = HashEvidence.CreateFailure(HashSourceType.FilePath, path, "Datei nicht gefunden.") - Return New HashRoundTripReport( + Return CreateFailureReport( path, - isArchiveInput:=False, - notes:="Input file missing.", + "Input file missing.", failed, failed, failed, @@ -73,10 +72,9 @@ Namespace Global.Tomtastisch.FileClassifier h1 = EvidenceHashing.HashFile(path, normalizedOptions) If Not h1.Digests.HasLogicalHash Then failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, "h1 konnte nicht berechnet werden.") - Return New HashRoundTripReport( + Return CreateFailureReport( path, - isArchiveInput:=False, - notes:="h1 missing logical digest.", + "h1 missing logical digest.", h1, failed, failed, @@ -85,10 +83,9 @@ Namespace Global.Tomtastisch.FileClassifier If Not EvidenceHashingIO.TryReadFileBounded(path, detectorOptions, originalBytes, readError) Then failed = HashEvidence.CreateFailure(HashSourceType.Unknown, path, readError) - Return New HashRoundTripReport( + Return CreateFailureReport( path, - isArchiveInput:=False, - notes:=readError, + readError, h1, failed, failed, @@ -158,5 +155,25 @@ Namespace Global.Tomtastisch.FileClassifier h3, h4) End Function + + Private Shared Function CreateFailureReport _ + ( + path As String, + notes As String, + h1 As HashEvidence, + h2 As HashEvidence, + h3 As HashEvidence, + h4 As HashEvidence + ) As HashRoundTripReport + + Return New HashRoundTripReport( + path, + isArchiveInput:=False, + notes:=notes, + h1, + h2, + h3, + h4) + End Function End Class End Namespace diff --git a/src/FileTypeDetection/Detection/FileTypeRegistry.vb b/src/FileTypeDetection/Detection/FileTypeRegistry.vb index 8b7d2e6d..ed93843a 100644 --- a/src/FileTypeDetection/Detection/FileTypeRegistry.vb +++ b/src/FileTypeDetection/Detection/FileTypeRegistry.vb @@ -363,15 +363,15 @@ Namespace Global.Tomtastisch.FileClassifier Private Shared Function BuildAliasMap(types As ImmutableDictionary(Of FileKind, FileType)) _ As ImmutableDictionary(Of String, FileKind) Dim builder As ImmutableDictionary(Of String, FileKind).Builder + Dim kind As FileKind + Dim t As FileType If types Is Nothing Then Return ImmutableDictionary(Of String, FileKind).Empty builder = ImmutableDictionary.CreateBuilder(Of String, FileKind)(StringComparer.OrdinalIgnoreCase) - For Each kv In types - Dim kind = kv.Key - Dim t = kv.Value - + For Each kind In OrderedKindsCache + If Not types.TryGetValue(kind, t) Then Continue For If t Is Nothing Then Continue For If t.Aliases.IsDefaultOrEmpty Then Continue For From 1877dc3471bbf4c6a4402fa000701dc3321b4389 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 19:02:53 +0100 Subject: [PATCH 26/34] fix(review): finalize docs and deterministic registry alias mapping - add missing HashDigestSet member docs per policy - replace ambiguous builder variable names in FileTypeRegistryConfig - initialize alias map temp variable to avoid warnings --- .../Abstractions/Hashing/HashDigestSet.vb | 19 +++++++ .../Configuration/FileTypeRegistryConfig.vb | 50 +++++++++---------- .../Detection/FileTypeRegistry.vb | 2 +- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb index 462e9b19..df499c73 100644 --- a/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb +++ b/src/FileTypeDetection/Abstractions/Hashing/HashDigestSet.vb @@ -58,6 +58,17 @@ Namespace Global.Tomtastisch.FileClassifier ''' Public ReadOnly Property HasLogicalHash As Boolean + ''' + ''' Interner Vollkonstruktor zur normalisierten Erstellung eines Digest-Sets. + ''' + ''' Physischer SHA-256-Digest. + ''' Logischer SHA-256-Digest. + ''' Optionaler schneller physischer Digest. + ''' Optionaler schneller logischer Digest. + ''' Optionaler HMAC-SHA256 des physischen Payloads. + ''' Optionaler HMAC-SHA256 des logischen Payloads. + ''' Kennzeichnet das Vorliegen physischer Digests. + ''' Kennzeichnet das Vorliegen logischer Digests. Friend Sub New _ ( physicalSha256 As String, @@ -80,6 +91,9 @@ Namespace Global.Tomtastisch.FileClassifier Me.HasLogicalHash = hasLogicalHash End Sub + ''' + ''' Liefert ein leeres, fail-closed Digest-Set ohne berechnete Hashwerte. + ''' Friend Shared ReadOnly Property Empty As HashDigestSet Get Return New HashDigestSet( @@ -94,6 +108,11 @@ Namespace Global.Tomtastisch.FileClassifier End Get End Property + ''' + ''' Normalisiert Digest-Strings deterministisch (Trim + Invariant-Lowercase). + ''' + ''' Eingabedigest. + ''' Normalisierter Digest oder leerer String. Private Shared Function Normalize _ ( value As String diff --git a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb index 688b0861..9e50a053 100644 --- a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb +++ b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb @@ -99,11 +99,11 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Unveränderliches Dictionary Kind->Extension. Private Function BuildExtensionOverrides() As ImmutableDictionary(Of FileKind, String) - Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, String)() + Dim extensionBuilder = ImmutableDictionary.CreateBuilder(Of FileKind, String)() - b(FileKind.Jpeg) = ".jpg" + extensionBuilder(FileKind.Jpeg) = ".jpg" - Return b.ToImmutable() + Return extensionBuilder.ToImmutable() End Function ''' @@ -112,38 +112,38 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Unveränderliches Dictionary Gruppenname->Aliasliste. Private Function BuildAliasGroups() As ImmutableDictionary(Of String, ImmutableArray(Of String)) - Dim b = ImmutableDictionary.CreateBuilder(Of String, ImmutableArray(Of String))(StringComparer.OrdinalIgnoreCase) + Dim aliasGruppenBuilder = ImmutableDictionary.CreateBuilder(Of String, ImmutableArray(Of String))(StringComparer.OrdinalIgnoreCase) ' Wildcard-Semantik (Gruppen): ' - ARCHIVE: alle Archive/Container, die über FileKind.Zip normalisiert werden. ' - OFFICE_*: Office/ähnliche Container (Doc/Xls/Ppt), deren Content/Container-Detection separat läuft. - b("JPEG") = A("jpe") + aliasGruppenBuilder("JPEG") = A("jpe") - b("ARCHIVE") = A( + aliasGruppenBuilder("ARCHIVE") = A( "tar", "tgz", "gz", "gzip", "bz2", "bzip2", "xz", "7z", "zz", "rar") - b("OFFICE_DOC") = A( + aliasGruppenBuilder("OFFICE_DOC") = A( "doc", "docx", "docm", "docb", "dot", "dotm", "dotx", "odt", "ott") - b("OFFICE_XLS") = A( + aliasGruppenBuilder("OFFICE_XLS") = A( "xls", "xlsx", "xlsm", "xlsb", "xlt", "xltm", "xltx", "xltb", "xlam", "xla", "ods", "ots") - b("OFFICE_PPT") = A( + aliasGruppenBuilder("OFFICE_PPT") = A( "ppt", "pptx", "pptm", "pot", "potm", "potx", "pps", "ppsm", "ppsx", "odp", "otp") - Return b.ToImmutable() + Return aliasGruppenBuilder.ToImmutable() End Function ''' @@ -151,15 +151,15 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Unveränderliches Dictionary Kind->Aliasliste. Private Function BuildAliasOverrides() As ImmutableDictionary(Of FileKind, ImmutableArray(Of String)) - Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of String))() + Dim aliasMappingBuilder = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of String))() - b(FileKind.Jpeg) = GetGroup("JPEG") - b(FileKind.Zip) = GetGroup("ARCHIVE") - b(FileKind.Doc) = GetGroup("OFFICE_DOC") - b(FileKind.Xls) = GetGroup("OFFICE_XLS") - b(FileKind.Ppt) = GetGroup("OFFICE_PPT") + aliasMappingBuilder(FileKind.Jpeg) = GetGroup("JPEG") + aliasMappingBuilder(FileKind.Zip) = GetGroup("ARCHIVE") + aliasMappingBuilder(FileKind.Doc) = GetGroup("OFFICE_DOC") + aliasMappingBuilder(FileKind.Xls) = GetGroup("OFFICE_XLS") + aliasMappingBuilder(FileKind.Ppt) = GetGroup("OFFICE_PPT") - Return b.ToImmutable() + Return aliasMappingBuilder.ToImmutable() End Function ''' @@ -198,39 +198,39 @@ Namespace Global.Tomtastisch.FileClassifier Private Function BuildMagicPatternCatalog _ () As ImmutableDictionary(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern)) - Dim b = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern))() + Dim magicPatternBuilder = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern))() - b(FileKind.Pdf) = ImmutableArray.Create( + magicPatternBuilder(FileKind.Pdf) = ImmutableArray.Create( Pattern(Prefix(0, &H25, &H50, &H44, &H46, &H2D)) ) - b(FileKind.Png) = ImmutableArray.Create( + magicPatternBuilder(FileKind.Png) = ImmutableArray.Create( Pattern(Prefix(0, &H89, &H50, &H4E, &H47, &HD, &HA, &H1A, &HA)) ) - b(FileKind.Jpeg) = ImmutableArray.Create( + magicPatternBuilder(FileKind.Jpeg) = ImmutableArray.Create( Pattern(Prefix(0, &HFF, &HD8, &HFF)) ) - b(FileKind.Gif) = ImmutableArray.Create( + magicPatternBuilder(FileKind.Gif) = ImmutableArray.Create( Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H37, &H61)), Pattern(Prefix(0, &H47, &H49, &H46, &H38, &H39, &H61)) ) - b(FileKind.Webp) = ImmutableArray.Create( + magicPatternBuilder(FileKind.Webp) = ImmutableArray.Create( Pattern( Prefix(0, &H52, &H49, &H46, &H46), Prefix(8, &H57, &H45, &H42, &H50) ) ) - b(FileKind.Zip) = ImmutableArray.Create( + magicPatternBuilder(FileKind.Zip) = ImmutableArray.Create( Pattern(Prefix(0, &H50, &H4B, &H3, &H4)), Pattern(Prefix(0, &H50, &H4B, &H5, &H6)), Pattern(Prefix(0, &H50, &H4B, &H7, &H8)) ) - Return b.ToImmutable() + Return magicPatternBuilder.ToImmutable() End Function End Module diff --git a/src/FileTypeDetection/Detection/FileTypeRegistry.vb b/src/FileTypeDetection/Detection/FileTypeRegistry.vb index ed93843a..41690935 100644 --- a/src/FileTypeDetection/Detection/FileTypeRegistry.vb +++ b/src/FileTypeDetection/Detection/FileTypeRegistry.vb @@ -364,7 +364,7 @@ Namespace Global.Tomtastisch.FileClassifier As ImmutableDictionary(Of String, FileKind) Dim builder As ImmutableDictionary(Of String, FileKind).Builder Dim kind As FileKind - Dim t As FileType + Dim t As FileType = Nothing If types Is Nothing Then Return ImmutableDictionary(Of String, FileKind).Empty From 721afbc3d46b10778e1157130c0c5b3625af36e0 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 19:26:07 +0100 Subject: [PATCH 27/34] fix(policy): enforce method-header spacing and guard exception ssot --- docs/governance/045_CODE_QUALITY_POLICY_DE.MD | 2 + docs/governance/145_CODE_QUALITY_POLICY_DE.MD | 2 + .../Infrastructure/README.md | 2 +- .../Utils/Guards/ArchiveGuards.vb | 17 +++-- .../Utils/Guards/DestinationPathGuard.vb | 13 ++-- .../Utils/Guards/ExceptionFilterGuard.vb | 65 +++++++++++++++++++ .../Infrastructure/Utils/Guards/IOGuards.vb | 3 + .../Infrastructure/Utils/Guards/LogGuard.vb | 21 ++---- .../Utils/Guards/PathResolutionGuard.vb | 8 +-- .../Infrastructure/Utils/Guards/README.md | 8 ++- .../Infrastructure/Utils/README.md | 9 +-- src/FileTypeDetection/README.md | 4 +- 12 files changed, 107 insertions(+), 47 deletions(-) create mode 100644 src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb diff --git a/docs/governance/045_CODE_QUALITY_POLICY_DE.MD b/docs/governance/045_CODE_QUALITY_POLICY_DE.MD index 79c48683..2a3e8876 100644 --- a/docs/governance/045_CODE_QUALITY_POLICY_DE.MD +++ b/docs/governance/045_CODE_QUALITY_POLICY_DE.MD @@ -88,6 +88,8 @@ Blöcke werden sichtbar getrennt (Leerzeile + Kommentartrenner): - öffnende Klammer in eigener Zeile - genau ein Parameter pro Zeile - schließende Klammer und Rückgabetyp gemeinsam in einer Zeile +- Nach jeder Methodensignatur (einzeilig oder mehrzeilig) folgt genau eine Leerzeile, + bevor Deklarationsblock oder erste ausführbare Anweisung beginnt. - Das Schema gilt einheitlich für `Sub`/`Function`/Konstruktoren/Interface-Member. - Lange Aufrufe werden analog umgebrochen (ein Argument pro Zeile, klar ausgerichtet). - Keine Trailing-Whitespace-Zeichen und keine „leeren“ Zeilen mit Spaces/Tabs. diff --git a/docs/governance/145_CODE_QUALITY_POLICY_DE.MD b/docs/governance/145_CODE_QUALITY_POLICY_DE.MD index 4514cbba..5a9789cd 100644 --- a/docs/governance/145_CODE_QUALITY_POLICY_DE.MD +++ b/docs/governance/145_CODE_QUALITY_POLICY_DE.MD @@ -77,6 +77,8 @@ Use visible block separation (empty line + block comment markers): - opening parenthesis on its own line - exactly one parameter per line - closing parenthesis and return type on one line +- After every method signature (single-line or multiline), include exactly one blank line + before the declaration block or first executable statement. - This applies consistently to `Sub`/`Function`/constructors/interface members. - Long invocations are wrapped analogously (one argument per line, aligned clearly). - No trailing whitespace and no visually empty lines containing spaces/tabs. diff --git a/src/FileTypeDetection/Infrastructure/README.md b/src/FileTypeDetection/Infrastructure/README.md index e832103d..7fbc2a2c 100644 --- a/src/FileTypeDetection/Infrastructure/README.md +++ b/src/FileTypeDetection/Infrastructure/README.md @@ -25,6 +25,6 @@ flowchart LR ## 6. Verweise - [Modulübersicht](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) -- [Infrastructure.Utils-Submodul](Utils/README.md) +- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) - [Policy CI](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/001_POLICY_CI.MD) diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb index 9afe0ece..56c9066e 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb @@ -24,6 +24,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils End Sub Friend Shared Function HasContent(data As Byte()) As Boolean + Return data IsNot Nothing AndAlso data.Length > 0 End Function End Class @@ -41,6 +42,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils opt As FileTypeProjectOptions, descriptor As ArchiveDescriptor ) As Boolean + If Not ByteArrayGuard.HasContent(data) Then Return False If opt Is Nothing Then Return False If descriptor Is Nothing OrElse @@ -50,15 +52,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils Using ms As New MemoryStream(data, writable:=False) Return IsArchiveSafeStream(ms, opt, descriptor, depth:=0) End Using - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is InvalidDataException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException OrElse - TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException + Catch ex As Exception When ExceptionFilterGuard.IsArchiveValidationException(ex) LogGuard.Debug(opt.Logger, $"[ArchiveGate] Bytes-Fehler: {ex.Message}") Return False End Try @@ -71,6 +65,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils descriptor As ArchiveDescriptor, depth As Integer ) As Boolean + If Not StreamGuard.IsReadable(stream) Then Return False If opt Is Nothing Then Return False Return ArchiveProcessingEngine.ValidateArchiveStream(stream, opt, depth, descriptor) @@ -88,6 +83,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ( data As Byte() ) As Boolean + If Not ByteArrayGuard.HasContent(data) Then Return False Return FileTypeRegistry.DetectByMagic(data) = FileKind.Zip End Function @@ -107,6 +103,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils logPrefix As String, logWhenRejected As Boolean ) As Boolean + If opt Is Nothing Then Return True If opt.RejectArchiveLinks AndAlso Not String.IsNullOrWhiteSpace(linkTarget) Then @@ -133,6 +130,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils data As Byte(), opt As FileTypeProjectOptions ) As Boolean + Dim descriptor As ArchiveDescriptor = ArchiveDescriptor.UnknownDescriptor() Return TryDescribeSafeArchivePayload(data, opt, descriptor) @@ -171,6 +169,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ByRef normalizedPath As String, ByRef isDirectory As Boolean ) As Boolean + Dim safe As String Dim trimmed As String Dim segments As String() diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb index d832b1da..99d41008 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb @@ -58,6 +58,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils opt As FileTypeProjectOptions ) As Boolean _ Implements IDestinationPathPolicy.PrepareMaterializationTarget + If IsRootPath(destinationFull) Then LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") Return False @@ -80,6 +81,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils opt As FileTypeProjectOptions ) _ As Boolean Implements IDestinationPathPolicy.ValidateNewExtractionTarget + Dim parent As String If IsRootPath(destinationFull) Then @@ -106,18 +108,14 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils destinationFull As String ) As Boolean _ Implements IDestinationPathPolicy.IsRootPath + Dim rootPath As String If String.IsNullOrWhiteSpace(destinationFull) Then Return False Try rootPath = Path.GetPathRoot(destinationFull) - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException + Catch ex As Exception When ExceptionFilterGuard.IsPathNormalizationException(ex) Return False End Try @@ -143,6 +141,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils overwrite As Boolean, opt As FileTypeProjectOptions ) As Boolean + Return Policy.PrepareMaterializationTarget(destinationFull, overwrite, opt) End Function @@ -152,6 +151,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils opt As FileTypeProjectOptions ) _ As Boolean + Return Policy.ValidateNewExtractionTarget(destinationFull, opt) End Function @@ -159,6 +159,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ( destinationFull As String ) As Boolean + Return Policy.IsRootPath(destinationFull) End Function End Class diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb new file mode 100644 index 00000000..5f0017d9 --- /dev/null +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/ExceptionFilterGuard.vb @@ -0,0 +1,65 @@ +' ============================================================================ +' FILE: ExceptionFilterGuard.vb +' +' INTERNE POLICY (DIN-/Norm-orientiert, verbindlich) +' - Datei- und Type-Struktur gemäß docs/governance/045_CODE_QUALITY_POLICY_DE.MD +' - Try/Catch konsistent im Catch-Filter-Schema +' - Variablen im Deklarationsblock, spaltenartig ausgerichtet +' ============================================================================ + +Option Strict On +Option Explicit On + +Imports System +Imports System.IO + +Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils + + ''' + ''' SSOT für wiederkehrende Exception-Filter in Guard-Klassen. + ''' + ''' + ''' Diese Utility kapselt Catch-Filter-Sets deterministisch, um + ''' Duplikate zu vermeiden und die Filter-Semantik zentral auditierbar zu halten. + ''' + Friend NotInheritable Class ExceptionFilterGuard + Private Sub New() + End Sub + + Friend Shared Function IsArchiveValidationException(ex As Exception) As Boolean + + Return TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is InvalidDataException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException OrElse + TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException + End Function + + Friend Shared Function IsPathNormalizationException(ex As Exception) As Boolean + + Return TypeOf ex Is UnauthorizedAccessException OrElse + TypeOf ex Is Security.SecurityException OrElse + TypeOf ex Is IOException OrElse + TypeOf ex Is NotSupportedException OrElse + TypeOf ex Is ArgumentException + End Function + + Friend Shared Function IsPathResolutionException(ex As Exception) As Boolean + + Return IsPathNormalizationException(ex) OrElse + TypeOf ex Is PathTooLongException + End Function + + Friend Shared Function IsLoggerWriteException(ex As Exception) As Boolean + + Return TypeOf ex Is InvalidOperationException OrElse + TypeOf ex Is ObjectDisposedException OrElse + TypeOf ex Is FormatException OrElse + TypeOf ex Is ArgumentException + End Function + End Class + +End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb index e8e3e7e8..de327946 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/IOGuards.vb @@ -40,6 +40,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils output As Stream, maxBytes As Long ) + Dim buf(InternalIoDefaults.CopyBufferSize - 1) As Byte Dim total As Long = 0 Dim n As Integer @@ -67,6 +68,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ( stream As Stream ) As Boolean + Return stream IsNot Nothing AndAlso stream.CanRead End Function @@ -74,6 +76,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ( stream As Stream ) + If stream Is Nothing Then Return If stream.CanSeek Then stream.Position = 0 End Sub diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb index e562097e..c4e4a5e4 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/LogGuard.vb @@ -28,15 +28,12 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils logger As ILogger, message As String ) + If logger Is Nothing Then Return If Not logger.IsEnabled(LogLevel.Debug) Then Return Try logger.LogDebug("{Message}", message) - Catch ex As Exception When _ - TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException OrElse - TypeOf ex Is FormatException OrElse - TypeOf ex Is ArgumentException + Catch ex As Exception When ExceptionFilterGuard.IsLoggerWriteException(ex) ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. End Try End Sub @@ -46,15 +43,12 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils logger As ILogger, message As String ) + If logger Is Nothing Then Return If Not logger.IsEnabled(LogLevel.Warning) Then Return Try logger.LogWarning("{Message}", message) - Catch ex As Exception When _ - TypeOf ex Is InvalidOperationException OrElse - TypeOf ex Is ObjectDisposedException OrElse - TypeOf ex Is FormatException OrElse - TypeOf ex Is ArgumentException + Catch ex As Exception When ExceptionFilterGuard.IsLoggerWriteException(ex) ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. End Try End Sub @@ -65,15 +59,12 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils message As String, ex As Exception ) + If logger Is Nothing Then Return If Not logger.IsEnabled(LogLevel.Error) Then Return Try logger.LogError(ex, "{Message}", message) - Catch logEx As Exception When _ - TypeOf logEx Is InvalidOperationException OrElse - TypeOf logEx Is ObjectDisposedException OrElse - TypeOf logEx Is FormatException OrElse - TypeOf logEx Is ArgumentException + Catch logEx As Exception When ExceptionFilterGuard.IsLoggerWriteException(logEx) ' Keine Rekursion im Logger-Schutz: Logging-Fehler werden bewusst fail-closed unterdrückt. End Try End Sub diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb index 36db099d..105d67ec 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/PathResolutionGuard.vb @@ -39,13 +39,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils Try fullPath = Path.GetFullPath(rawPath) Return True - Catch ex As Exception When _ - TypeOf ex Is UnauthorizedAccessException OrElse - TypeOf ex Is Security.SecurityException OrElse - TypeOf ex Is IOException OrElse - TypeOf ex Is PathTooLongException OrElse - TypeOf ex Is NotSupportedException OrElse - TypeOf ex Is ArgumentException + Catch ex As Exception When ExceptionFilterGuard.IsPathResolutionException(ex) If opt IsNot Nothing Then message = $"{logPrefix}: {ex.Message}" diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md b/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md index d2d80490..eafecb0a 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md @@ -10,10 +10,12 @@ Dieses Untermodul kapselt ausschliesslich Guard- und Policy-Utilities mit fail-c - `PathResolutionGuard.vb`: sichere FullPath-Aufloesung mit kontrollierter Protokollierung. - `DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung/Extraktion. - `LogGuard.vb`: defensives Logging ohne Rekursion. +- `ExceptionFilterGuard.vb`: zentrale Catch-Filter-Sets als SSOT fuer redundanzfreie Exception-Guards. ## 3. API und Verhalten - Alle Klassen sind stateless und deterministisch. - Fehlerpfade sind fail-closed und liefern klare Rueckgaben. +- Wiederkehrende Exception-Filter werden zentral ueber `ExceptionFilterGuard` gepflegt. ## 4. Verifikation - Nutzung erfolgt in `FileMaterializer`, `ArchiveInternals`, `CoreInternals` und Hashing-Komponenten. @@ -27,6 +29,6 @@ flowchart LR ``` ## 6. Verweise -- [Utils-Root](../README.md) -- [Infrastructure-Modul](../../README.md) -- [Code-Quality-Policy](../../../../../docs/governance/045_CODE_QUALITY_POLICY_DE.MD) +- [Utils-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) +- [Infrastructure-Modul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) +- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/src/FileTypeDetection/Infrastructure/Utils/README.md b/src/FileTypeDetection/Infrastructure/Utils/README.md index f9188640..275635a3 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/README.md +++ b/src/FileTypeDetection/Infrastructure/Utils/README.md @@ -13,6 +13,7 @@ Der Fokus liegt auf deterministischen Guards, sicherer Pfad-/Archive-Validierung - `Guards/PathResolutionGuard.vb`: fail-closed FullPath-Aufloesung mit kontrollierter Protokollierung. - `Guards/DestinationPathGuard.vb`: Zielpfad-Policy fuer Materialisierung und Extraktion. - `Guards/LogGuard.vb`: defensiver Logger-Schutz ohne Rekursion/Seiteneffekte. +- `Guards/ExceptionFilterGuard.vb`: zentrale Catch-Filter-SSOT fuer wiederkehrende Exception-Mengen. ## 3. API und Verhalten - Utilities sind stateless und deterministisch. @@ -32,7 +33,7 @@ flowchart LR ``` ## 6. Verweise -- [Modul-Root](../../README.md) -- [Infrastructure-Modul](../README.md) -- [Guards-Cluster](Guards/README.md) -- [Code-Quality-Policy](../../../../docs/governance/045_CODE_QUALITY_POLICY_DE.MD) +- [Modul-Root](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/README.md) +- [Infrastructure-Modul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) +- [Guards-Cluster](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/Guards/README.md) +- [Code-Quality-Policy](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/045_CODE_QUALITY_POLICY_DE.MD) diff --git a/src/FileTypeDetection/README.md b/src/FileTypeDetection/README.md index fb13b5a9..2466ec95 100644 --- a/src/FileTypeDetection/README.md +++ b/src/FileTypeDetection/README.md @@ -31,8 +31,8 @@ flowchart LR - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) - [Audit Index](https://github.com/tomtastisch/FileClassifier/blob/main/docs/audit/000_INDEX.MD) - [Detektion-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Detection/README.md) -- [Infrastruktur-Submodul](Infrastructure/README.md) -- [Infrastructure.Utils-Submodul](Infrastructure/Utils/README.md) +- [Infrastruktur-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) +- [Infrastructure.Utils-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/Utils/README.md) - [Konfiguration-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Configuration/README.md) - [Abstractions-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Abstractions/README.md) - [Composition-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Composition/README.md) From 774eabc9647bce7d0c169daba0df913f9a80dc17 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 19:28:28 +0100 Subject: [PATCH 28/34] fix(review): align guard semantics and registry naming refinements --- .../Configuration/FileTypeRegistryConfig.vb | 29 +++++++---- .../Utils/Guards/ArchiveGuards.vb | 48 +++++++++++++------ .../Utils/Guards/ArgumentGuard.vb | 6 +-- .../Utils/Guards/DestinationPathGuard.vb | 9 ++-- 4 files changed, 59 insertions(+), 33 deletions(-) diff --git a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb index 9e50a053..85c76c06 100644 --- a/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb +++ b/src/FileTypeDetection/Configuration/FileTypeRegistryConfig.vb @@ -112,11 +112,18 @@ Namespace Global.Tomtastisch.FileClassifier ''' ''' Unveränderliches Dictionary Gruppenname->Aliasliste. Private Function BuildAliasGroups() As ImmutableDictionary(Of String, ImmutableArray(Of String)) - Dim aliasGruppenBuilder = ImmutableDictionary.CreateBuilder(Of String, ImmutableArray(Of String))(StringComparer.OrdinalIgnoreCase) + Dim aliasGruppenBuilder = ImmutableDictionary.CreateBuilder _ + ( + Of String, + ImmutableArray(Of String) + ) _ + ( + StringComparer.OrdinalIgnoreCase + ) ' Wildcard-Semantik (Gruppen): ' - ARCHIVE: alle Archive/Container, die über FileKind.Zip normalisiert werden. - ' - OFFICE_*: Office/ähnliche Container (Doc/Xls/Ppt), deren Content/Container-Detection separat läuft. + ' - DOC/XLS/PPT: Office-/OpenDocument-Container (Doc/Xls/Ppt), deren Content/Container-Detection separat läuft. aliasGruppenBuilder("JPEG") = A("jpe") @@ -126,18 +133,18 @@ Namespace Global.Tomtastisch.FileClassifier "xz", "7z", "zz", "rar") - aliasGruppenBuilder("OFFICE_DOC") = A( + aliasGruppenBuilder("DOC") = A( "doc", "docx", "docm", "docb", "dot", "dotm", "dotx", "odt", "ott") - aliasGruppenBuilder("OFFICE_XLS") = A( + aliasGruppenBuilder("XLS") = A( "xls", "xlsx", "xlsm", "xlsb", "xlt", "xltm", "xltx", "xltb", "xlam", "xla", "ods", "ots") - aliasGruppenBuilder("OFFICE_PPT") = A( + aliasGruppenBuilder("PPT") = A( "ppt", "pptx", "pptm", "pot", "potm", "potx", "pps", "ppsm", "ppsx", @@ -155,9 +162,9 @@ Namespace Global.Tomtastisch.FileClassifier aliasMappingBuilder(FileKind.Jpeg) = GetGroup("JPEG") aliasMappingBuilder(FileKind.Zip) = GetGroup("ARCHIVE") - aliasMappingBuilder(FileKind.Doc) = GetGroup("OFFICE_DOC") - aliasMappingBuilder(FileKind.Xls) = GetGroup("OFFICE_XLS") - aliasMappingBuilder(FileKind.Ppt) = GetGroup("OFFICE_PPT") + aliasMappingBuilder(FileKind.Doc) = GetGroup("DOC") + aliasMappingBuilder(FileKind.Xls) = GetGroup("XLS") + aliasMappingBuilder(FileKind.Ppt) = GetGroup("PPT") Return aliasMappingBuilder.ToImmutable() End Function @@ -198,7 +205,11 @@ Namespace Global.Tomtastisch.FileClassifier Private Function BuildMagicPatternCatalog _ () As ImmutableDictionary(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern)) - Dim magicPatternBuilder = ImmutableDictionary.CreateBuilder(Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern))() + Dim magicPatternBuilder = ImmutableDictionary.CreateBuilder _ + ( + Of FileKind, + ImmutableArray(Of FileTypeRegistry.MagicPattern) + )() magicPatternBuilder(FileKind.Pdf) = ImmutableArray.Create( Pattern(Prefix(0, &H25, &H50, &H44, &H46, &H2D)) diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb index 56c9066e..67e40fcf 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArchiveGuards.vb @@ -170,21 +170,13 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ByRef isDirectory As Boolean ) As Boolean - Dim safe As String + Dim safe As String = String.Empty Dim trimmed As String - Dim segments As String() normalizedPath = String.Empty isDirectory = False - safe = If(rawPath, String.Empty).Trim() - If safe.Length = 0 Then Return False - If safe.Contains(ChrW(0)) Then Return False - - safe = safe.Replace("\"c, "/"c) - If Path.IsPathRooted(safe) Then Return False - safe = safe.TrimStart("/"c) - If safe.Length = 0 Then Return False + If Not TryPrepareRelativePath(rawPath, safe) Then Return False trimmed = safe.TrimEnd("/"c) If trimmed.Length = 0 Then @@ -194,11 +186,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils Return True End If - segments = trimmed.Split("/"c) - For Each seg In segments - If seg.Length = 0 Then Return False - If seg = "." OrElse seg = ".." Then Return False - Next + If Not HasOnlyAllowedPathSegments(trimmed) Then Return False If safe.Length <> trimmed.Length AndAlso Not allowDirectoryMarker Then Return False @@ -208,6 +196,36 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils isDirectory = allowDirectoryMarker AndAlso safe.Length <> trimmed.Length Return True End Function + + Private Shared Function TryPrepareRelativePath _ + ( + rawPath As String, + ByRef preparedPath As String + ) As Boolean + + preparedPath = If(rawPath, String.Empty).Trim() + If preparedPath.Length = 0 Then Return False + If preparedPath.Contains(ChrW(0)) Then Return False + If Path.IsPathRooted(preparedPath) Then Return False + + preparedPath = preparedPath.Replace("\"c, "/"c).TrimStart("/"c) + If preparedPath.Length = 0 Then Return False + + Return True + End Function + + Private Shared Function HasOnlyAllowedPathSegments(pathValue As String) As Boolean + + Dim segments As String() + + segments = pathValue.Split("/"c) + For Each seg In segments + If seg.Length = 0 Then Return False + If seg = "." OrElse seg = ".." Then Return False + Next + + Return True + End Function End Class End Namespace diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb index 8d6e57c6..2fd111ac 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/ArgumentGuard.vb @@ -141,7 +141,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ) ' Deklarationsblock - Dim isOk As Boolean + Dim isEnumValueDefined As Boolean If enumType Is Nothing Then Throw New ArgumentNullException(NameOf(enumType)) @@ -151,8 +151,8 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils Throw New ArgumentException("enumType muss ein Enum-Typ sein.", NameOf(enumType)) End If - isOk = [Enum].IsDefined(enumType, value) - If Not isOk Then + isEnumValueDefined = [Enum].IsDefined(enumType, value) + If Not isEnumValueDefined Then Throw New ArgumentOutOfRangeException(paramName) End If diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb index 99d41008..7d3863d0 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb @@ -56,8 +56,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils destinationFull As String, overwrite As Boolean, opt As FileTypeProjectOptions - ) As Boolean _ - Implements IDestinationPathPolicy.PrepareMaterializationTarget + ) As Boolean Implements IDestinationPathPolicy.PrepareMaterializationTarget If IsRootPath(destinationFull) Then LogGuard.Warn(opt.Logger, "[PathGuard] Ziel darf kein Root-Verzeichnis sein.") @@ -79,8 +78,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils ( destinationFull As String, opt As FileTypeProjectOptions - ) _ - As Boolean Implements IDestinationPathPolicy.ValidateNewExtractionTarget + ) As Boolean Implements IDestinationPathPolicy.ValidateNewExtractionTarget Dim parent As String @@ -106,8 +104,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils Public Function IsRootPath _ ( destinationFull As String - ) As Boolean _ - Implements IDestinationPathPolicy.IsRootPath + ) As Boolean Implements IDestinationPathPolicy.IsRootPath Dim rootPath As String From 4077858c15ba2cab0a6e32a3ac5f8f84272ed6f7 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 19:29:30 +0100 Subject: [PATCH 29/34] refactor(guard): deduplicate destination deletion flow --- .../Utils/Guards/DestinationPathGuard.vb | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb index 7d3863d0..838526a9 100644 --- a/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb +++ b/src/FileTypeDetection/Infrastructure/Utils/Guards/DestinationPathGuard.vb @@ -63,13 +63,7 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils Return False End If - If File.Exists(destinationFull) Then - If Not overwrite Then Return False - File.Delete(destinationFull) - ElseIf Directory.Exists(destinationFull) Then - If Not overwrite Then Return False - Directory.Delete(destinationFull, recursive:=True) - End If + If Not TryDeleteExistingTarget(destinationFull, overwrite) Then Return False Return True End Function @@ -123,6 +117,30 @@ Namespace Global.Tomtastisch.FileClassifier.Infrastructure.Utils rootPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar), StringComparison.OrdinalIgnoreCase) End Function + + Private Shared Function TryDeleteExistingTarget _ + ( + destinationFull As String, + overwrite As Boolean + ) As Boolean + + Dim existsAsFile As Boolean + Dim existsAsDirectory As Boolean + + existsAsFile = File.Exists(destinationFull) + existsAsDirectory = Directory.Exists(destinationFull) + + If Not existsAsFile AndAlso Not existsAsDirectory Then Return True + If Not overwrite Then Return False + + If existsAsFile Then + File.Delete(destinationFull) + Return True + End If + + Directory.Delete(destinationFull, recursive:=True) + Return True + End Function End Class Friend NotInheritable Class DestinationPathGuard From 6591d86d4a73e8fe916286dea24fe8b623bbbfb9 Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 20:22:55 +0100 Subject: [PATCH 30/34] docs(agents): enforce per-comment evidence and resolve workflow --- AGENTS.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 3b17ba01..db9b6d16 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -31,6 +31,21 @@ - Code-/Test-/Dokuaenderung im PR mit nachvollziehbarer Evidence. - begruendete Widerlegung als `ASSUMPTION` + Verifikationsnachweis, warum keine Aenderung noetig ist. - Unzulaessig: Threads ohne Bearbeitung nur aus Prozessgruenden zu resolven. +- Verbindlicher Einzelkommentar-Workflow (ab sofort): + - Jeder Review-Kommentar/Thread wird einzeln und iterativ bearbeitet (keine Sammelabarbeitung mehr). + - Fuer jeden bearbeiteten Kommentar gilt: genau ein dedizierter Commit fuer die konkrete Umsetzung. + - Vor `resolved` ist im Thread immer ein Nachweis-Kommentar zu hinterlassen: + - entweder Commit-/Push-Link als Evidence der Umsetzung, + - oder nachvollziehbare Gegenargumentation als `ASSUMPTION` inkl. Verifikationsnachweis. + - Erst danach darf genau dieser Thread auf `resolved` gesetzt werden. + - Bei CI-Nacharbeiten gilt dieselbe Regel: + - pro verursachender Reparatur genau ein Commit, + - eigener Review-Nachweis-Kommentar mit Ursache und Evidence, + - danach erst `resolved`. + - Push-Green-Regel: + - lokale Einzelabarbeitung aller offenen Kommentare gemaess obiger Regeln, + - danach Push und Pflicht-Checks auf gruen, + - Merge erst bei gruenen Checks und ohne offene Threads. - Merge nur wenn: - required checks gruener Status, - keine offenen Review-Threads, From 9455ce34fb89aa6177c53d5479efff0970702f4a Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 20:25:53 +0100 Subject: [PATCH 31/34] refactor(mime): normalize ext initialization and remove redundant guard --- src/FileTypeDetection/Infrastructure/MimeProvider.vb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/FileTypeDetection/Infrastructure/MimeProvider.vb b/src/FileTypeDetection/Infrastructure/MimeProvider.vb index cb17aa3c..5165ed87 100644 --- a/src/FileTypeDetection/Infrastructure/MimeProvider.vb +++ b/src/FileTypeDetection/Infrastructure/MimeProvider.vb @@ -34,9 +34,9 @@ Namespace Global.Tomtastisch.FileClassifier extWithDot As String ) As String - Dim ext As String = extWithDot + Dim ext As String = If(String.IsNullOrWhiteSpace(extWithDot), String.Empty, extWithDot) - If String.IsNullOrWhiteSpace(extWithDot) Then Return String.Empty + If ext.Length = 0 Then Return String.Empty If Not ext.StartsWith("."c) Then ext = "." & ext From ce3d892ad336d1e0459a7610b3902f6bf628dc2b Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 20:26:11 +0100 Subject: [PATCH 32/34] docs(versioning): clarify GH token fallback precedence --- tools/versioning/verify-version-convergence.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/versioning/verify-version-convergence.sh b/tools/versioning/verify-version-convergence.sh index b579deab..3ba1b257 100755 --- a/tools/versioning/verify-version-convergence.sh +++ b/tools/versioning/verify-version-convergence.sh @@ -97,6 +97,8 @@ main() { if [[ "${REQUIRE_REMOTE}" == "1" ]]; then require_cmd curl require_cmd gh + # Reihenfolge ist bewusst: GH_TOKEN hat Vorrang, danach CI-Standard GITHUB_TOKEN, + # danach SECURITY_CLAIMS_TOKEN als expliziter Fallback fuer gleichwertige Repo-Claims. export GH_TOKEN="${GH_TOKEN:-${GITHUB_TOKEN:-${SECURITY_CLAIMS_TOKEN:-}}}" [[ -n "${GH_TOKEN}" ]] || fail "REQUIRE_REMOTE=1 needs GH_TOKEN/GITHUB_TOKEN/SECURITY_CLAIMS_TOKEN" From 2a73134c66ba928ca54f47fe0c47d2068473e03a Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 20:27:13 +0100 Subject: [PATCH 33/34] docs(security-claims): annotate major-version claim coupling --- tools/audit/verify-security-claims.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/audit/verify-security-claims.sh b/tools/audit/verify-security-claims.sh index ace4e158..a7dc3795 100755 --- a/tools/audit/verify-security-claims.sh +++ b/tools/audit/verify-security-claims.sh @@ -151,7 +151,10 @@ if [[ -z "${REPO_FULL}" ]]; then add_violation "CI-SEC-CLAIM-001" "fail" "Unable to determine GitHub repository slug" "SECURITY.md" fi -# Claim: 6.x supported and <6.0 unsupported maps to current package major = 6 +# Claim-Bindung an SECURITY.md: +# - Security-Support gilt nur fuer den aktuellen Major 6.x. +# - Ein Major-Wechsel erfordert immer synchrones Update von SECURITY.md, +# Versionierungsdokumenten und dieser Claim-Pruefung. pkg_ver="$(sed -n 's:.*\([^<]*\).*:\1:p' "${ROOT_DIR}/src/FileTypeDetection/FileTypeDetectionLib.vbproj" | head -n1)" if [[ -z "${pkg_ver}" ]]; then add_violation "CI-SEC-CLAIM-002" "fail" "Package version not found" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" From f91274600998ce5dc1467e1292e91c76e2c54f5c Mon Sep 17 00:00:00 2001 From: GitHub Copilot Agent Date: Sat, 21 Feb 2026 22:18:35 +0100 Subject: [PATCH 34/34] docs(governance): add full PR-108 review audit matrix with evidence --- docs/governance/047_PR_108_REVIEW_AUDIT_DE.MD | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 docs/governance/047_PR_108_REVIEW_AUDIT_DE.MD diff --git a/docs/governance/047_PR_108_REVIEW_AUDIT_DE.MD b/docs/governance/047_PR_108_REVIEW_AUDIT_DE.MD new file mode 100644 index 00000000..8a6ea993 --- /dev/null +++ b/docs/governance/047_PR_108_REVIEW_AUDIT_DE.MD @@ -0,0 +1,74 @@ +# PR-108 Review-Audit (DE) + +- Quelle: PR #108 (https://github.com/tomtastisch/FileClassifier/pull/108) +- Stand: automatisierter Snapshot via GitHub GraphQL + manuell verifizierte Evidence-Mapping-Liste +- Review-Threads gesamt: 53 +- Thread-Status: UMGESETZT=44, ASSUMPTION=9, OFFEN=0 +- GitHub-Resolved-Flag: unresolved=0 +- PR-Konversationskommentare von @tomtastisch: 2 + +## Audit-Tabelle (Review-Threads) + +| # | Erstkommentar | Autor | Befund (gekürzt) | Status | Evidence | Auflösungsnotiz (gekürzt) | +| --- | --- | --- | --- | --- | --- | --- | +| 1 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836118721) | `copilot-pull-request-reviewer` | The XML documentation comment for the Doc enum value has a typo: it reads "Office Open XML Word-Dokument (DOCX." but is missing the closing parenthesis. It should be "(DOCX)." ```s… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/3e25950) | Typo in DOCX summary korrigiert; Enum-Dokumentation vereinheitlicht. | +| 2 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836118733) | `copilot-pull-request-reviewer` | The variable `idx` is used in the For loop at line 144, but it is not declared in the "Deklarationsblock" section (lines 107-113) as required by the internal policy. According to t… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | Deklarationsblock angepasst; idx explizit im Methoden-Deklarationsbereich geführt. | +| 3 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836121176) | `chatgpt-codex-connector` | **![P2 Badge](https://img.shields.io/badge/P2-yellow?style=flat) Restrict internal utility classes to Friend visibility** `EnumUtils` is declared `Public` e… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | Interne Utility-Typen auf Friend/NotInheritable gehärtet; Public-API-Expansion vermieden. | +| 4 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836124228) | `tomtastisch` | bitte korrekt benennen: EvidenceHashingIO | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | Benennung auf EvidenceHashingIO vereinheitlicht (Datei- und Typname). | +| 5 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836126215) | `copilot-pull-request-reviewer` | The FILE header comment contains "(neue version)" which appears to be a development artifact. This should be cleaned up to just "FILE: HashRoundTripReport.vb" to match the establis… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | Header-/Kommentarartefakte bereinigt; Policy-konforme FILE-Header konsolidiert. | +| 6 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836126220) | `copilot-pull-request-reviewer` | VB.NET keywords should be consistently cased. Line 427 uses lowercase "End if" but should be "End If" (capital I) to match the language standard and the rest of the codebase. ```su… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | VB-Keyword-Casing bereinigt (End If) und Style-Drift entfernt. | +| 7 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836126223) | `copilot-pull-request-reviewer` | Inconsistent indentation in the Catch filter clause. Line 448 has extra indentation (4 additional spaces) compared to the other TypeOf checks in the same filter (lines 447, 449-454… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | Catch-Filter-Indentation auf einheitliches Layout gebracht. | +| 8 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836126227) | `copilot-pull-request-reviewer` | Inconsistent indentation in the Catch filter clause. Line 496 has extra indentation (4 additional spaces) compared to the other TypeOf checks in the same filter (lines 495, 497-500… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | Catch-Filter-Indentation auf einheitliches Layout gebracht. | +| 9 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836126232) | `copilot-pull-request-reviewer` | The property declaration is unnecessarily split across multiple lines (lines 33-36). The type `ImmutableArray(Of Byte)` should be kept on the same line as the property declaration … | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | Property-Deklaration auf policy-konformes Single-Line-Layout normalisiert. | +| 10 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836126235) | `copilot-pull-request-reviewer` | The XML documentation comment is incomplete. Line 59 shows "(DOC." but is missing the closing parenthesis. It should be "(DOC)." or more appropriately describe what DOC represents … | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9ccf814) | XML-Dokumentation für Office/DOC-Domäne präzisiert und geschlossen. | +| 11 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836390250) | `tomtastisch` | IOGruards nicht IoGuard IO immer beide Buchstaben Uppercase | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | IO-Guard-Benennung vereinheitlicht (IO uppercase) und konsolidiert. | +| 12 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836393241) | `tomtastisch` | Benennung ist nciht passend Einheitlich bleiben in der benneung, andere klassen sind immer als *...Guard.vb, entsprechend: diese klasse logisch sinnvoll benennen, jedoch wenn Guard… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Guard-Namensschema vereinheitlicht (*Guard) und Redundanzen entfernt. | +| 13 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836394356) | `tomtastisch` | Guard-utillities sollte unter Utils/Guards/* gekapselt/geclustert werden. Dies steigert die striktheit + lesbarkeit und ist sinnvoll bezüglich logisch getrennter Zuordnung | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Guards nach Infrastructure/Utils/Guards geclustert und Imports angepasst. | +| 14 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836394786) | `tomtastisch` | Gleiche Anmerkung wie in : https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836394356 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Cluster-Änderung aus Referenzthread übernommen (gleiche Anmerkung). | +| 15 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836395189) | `tomtastisch` | Gleiche Anmerkung wie in: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836394356 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Cluster-Änderung aus Referenzthread übernommen (gleiche Anmerkung). | +| 16 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836395308) | `tomtastisch` | Gleiche Anmerkung wie in: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836394356 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Cluster-Änderung aus Referenzthread übernommen (gleiche Anmerkung). | +| 17 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836395872) | `tomtastisch` | Gleiche anmerkung wie in: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836394356 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Cluster-Änderung aus Referenzthread übernommen (gleiche Anmerkung). | +| 18 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836395938) | `tomtastisch` | Gleiche anmerkung wie in: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836394356 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Cluster-Änderung aus Referenzthread übernommen (gleiche Anmerkung). | +| 19 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836396242) | `tomtastisch` | Gleiche anmerkung wie in: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836394356 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Cluster-Änderung aus Referenzthread übernommen (gleiche Anmerkung). | +| 20 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836396351) | `tomtastisch` | Gleiche anmerkung wie in: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836394356 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1e2c6bb) | Cluster-Änderung aus Referenzthread übernommen (gleiche Anmerkung). | +| 21 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836399164) | `tomtastisch` | Fehlende dokumentation entsprechend der Code-Policy | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/905a28b) | Member-Dokumentation für Hashing-Typen nach Policy ergänzt. | +| 22 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836399321) | `tomtastisch` | Fehlende dokumentation entsprechend der Code-Policy | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/8a8da4b) | Interne Hashing-Core-Dokumentation vollständig ergänzt. | +| 23 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836401562) | `tomtastisch` | Fehlende dokumentation entsprechend der Code-Policy anscheinend nur teilweise dokumentiert | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/077f14f) | Ergänzende interne Doku-Lücken geschlossen (policy-konform). | +| 24 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836403769) | `tomtastisch` | Alle drei Erstellungen von HashRoundTripReport haben starke Ähnlichkei (redundanz)! Zusammenführen und sinnvoll durch einen einzigen return erzeugen durch entsprechend logisch/sin… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1209ad8) | Umgesetzt in Commit 1209ad8: CreateFailureReport(...) eingefuehrt und alle Fail-Pfade in EvidenceHashingRoundTrip zentralisiert. Evidence: https://github.com/tomtastisch/FileClassi… | +| 25 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836403838) | `copilot-pull-request-reviewer` | `BuildAliasMap` iteriert direkt über `ImmutableDictionary` (`For Each kv In types`). Die Enumerationsreihenfolge ist nicht garantiert stabil; damit ist das im Kommentar erwähnte „d… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1209ad8) | Umgesetzt in Commit 1209ad8: BuildAliasMap iteriert deterministisch ueber OrderedKindsCache + TryGetValue. Evidence: https://github.com/tomtastisch/FileClassifier/commit/1209ad8 | +| 26 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836403840) | `copilot-pull-request-reviewer` | Im Job `version-convergence` wird `SECURITY_CLAIMS_TOKEN` als Secret in die Umgebung injiziert, obwohl `REQUIRE_REMOTE: "0"` gesetzt ist. Das erhöht unnötig die Secret-Exposure-Flä… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1209ad8) | Umgesetzt in Commit 1209ad8: SECURITY_CLAIMS_TOKEN wurde aus dem version-convergence Job bei REQUIRE_REMOTE=0 entfernt. Evidence: https://github.com/tomtastisch/FileClassifier/comm… | +| 27 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836403845) | `copilot-pull-request-reviewer` | PR-Beschreibung/Titel sprechen von lokaler Konvergenz auf `6.0.0`, aber `RepoVersion` wird hier auf `6.0.1` gesetzt (und weitere Dateien/Dokus folgen ebenfalls `6.0.1`). Bitte entw… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/pull/108) | Umgesetzt: PR-Metadaten auf 6.0.1 vereinheitlicht (Titel + Body), RepoVersion bleibt 6.0.1. Evidence: https://github.com/tomtastisch/FileClassifier/pull/108 | +| 28 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836406984) | `tomtastisch` | Allgemein ist die klasse zu stark auf h1,h2...,h4 spezifiziert, dies ist stark fehleranfällig, wenn sich in zukünftigen anpassungen die h+n erweitern. Hierbei wäre ein dynamisches … | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1209ad8) | ASSUMPTION + Evidence: Vollstaendig dynamisches Hn-Modell in EvidenceHashingRoundTrip waere ausserhalb des aktuellen API-stabilen Scope; Redundanz wurde ohne Vertragsbruch reduzier… | +| 29 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836407638) | `tomtastisch` | Fehlende dokumentation entsprechend der Code-Policy | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/479dd6d) | Umgesetzt in Commit 479dd6d: fehlende Dokumentation in HashDigestSet.vb vollstaendig ergaenzt. Evidence: https://github.com/tomtastisch/FileClassifier/commit/479dd6d | +| 30 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836409254) | `tomtastisch` | `b = ...` ist nicht eindeutig in dessen Benennung und sollte angepasst werden, son das es ein prägnantem deutschen und zuordenbaren Ausdruck gemäß der policy und best-practise für… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/479dd6d) | Umgesetzt in Commit 479dd6d: unpraezises b wurde in FileTypeRegistryConfig auf sprechende Builder-Bezeichner umgestellt. Evidence: https://github.com/tomtastisch/FileClassifier/com… | +| 31 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836414353) | `tomtastisch` | # Policy Semantik-Regel einhalten/ergänzen Leerzeile nach Methodenkopf bitte immer beachten, diese Leerzeile ist/sollte in der Code-Policy bereits vorgegeben sein. Wenn nciht: nach… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1b6f8db) | Umgesetzt in Commit 1b6f8db: Leerzeile nach Methodenkopf in Guard-Dateien umgesetzt und Policy 045/145 explizit ergaenzt. Evidence: https://github.com/tomtastisch/FileClassifier/co… | +| 32 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836418150) | `tomtastisch` | Es gibt sehr oft identische TypeOf-Blöcke Überlegung: Util-klasse für bestimmte valiadte*(...) methoden mit interner redundanzfreier clustering-/map-logik o.Ä. welche dann entspre… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1b6f8db) | Umgesetzt in Commit 1b6f8db: ExceptionFilterGuard als SSOT eingefuehrt und redundante Catch-TypeOf-Bloecke zentralisiert. Evidence: https://github.com/tomtastisch/FileClassifier/co… | +| 33 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836421216) | `tomtastisch` | Line 178 bis 198 ist stark vereinfachter, beziehungsweise sinnvoller dynamisch umzusetzen ohne redundanter ablaufe durch hilfsfunktionen/interner vereinheitliung durch interne meth… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/86225ec) | Umgesetzt in Commit 86225ec: TryPrepareRelativePath + HasOnlyAllowedPathSegments extrahiert, Pfadnormalisierung in ArchiveGuards dynamisch vereinfacht. Evidence: https://github.com… | +| 34 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836422624) | `tomtastisch` | ```suggestion Dim aliasGruppenBuilder = ImmutableDictionary.CreateBuilder( Of String, ImmutableArray(Of String) )( … | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/86225ec) | Umgesetzt in Commit 86225ec: aliasGruppenBuilder auf das vorgeschlagene mehrzeilige Builder-Format umgestellt. Evidence: https://github.com/tomtastisch/FileClassifier/commit/86225e… | +| 35 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836425395) | `tomtastisch` | Benennungen ohne Office_* Bitte, da diese (siehe extension-zuordnung und Deklaration/doku) sowohl ms-office, als auch open-office abdecken (sollten) und entsprechend reicht Doc, XL… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/86225ec) | Umgesetzt in Commit 86225ec: Aliasgruppenbezeichner von OFFICE_* auf DOC/XLS/PPT vereinheitlicht. Evidence: https://github.com/tomtastisch/FileClassifier/commit/86225ec | +| 36 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836426264) | `tomtastisch` | ```suggestion Dim magicPatternBuilder = ImmutableDictionary.CreateBuilder( Of FileKind, ImmutableArray(Of FileTypeRegistry.MagicPattern… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/86225ec) | Umgesetzt in Commit 86225ec: magicPatternBuilder auf das vorgeschlagene mehrzeilige Builder-Format umgestellt. Evidence: https://github.com/tomtastisch/FileClassifier/commit/86225e… | +| 37 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836427094) | `tomtastisch` | isOk ist zwar logisch korrekt aber inhaltlich schwer zuzuordnen/zu breit gefächert | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/86225ec) | Umgesetzt in Commit 86225ec: isOk in ArgumentGuard auf isEnumValueDefined praezisiert. Evidence: https://github.com/tomtastisch/FileClassifier/commit/86225ec | +| 38 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836428195) | `tomtastisch` | ```suggestion Public Function PrepareMaterializationTarget _ ( destinationFull As String, overwrite As Boolean, … | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/86225ec) | Umgesetzt in Commit 86225ec: Signaturformat in DestinationPathGuard auf vorgeschlagenes Layout + Leerzeile angepasst. Evidence: https://github.com/tomtastisch/FileClassifier/commit… | +| 39 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836438317) | `tomtastisch` | Zeile 66 bis 72 ist logisch redundant und kann sinnvoll zusammengeführt werden Beispiel: wenn man die Methoden `File.Delete(destinationFull)` und `Directory.Delete(destinationFul… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/808b7a2) | Umgesetzt in Commit 808b7a2: redundante Delete-Branches in TryDeleteExistingTarget(...) zentralisiert. Evidence: https://github.com/tomtastisch/FileClassifier/commit/808b7a2 | +| 40 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836440516) | `tomtastisch` | Bezüglich der catch-blöcke, wie im anderen review-kommentar bereits erwähnt: Bitte um redundante catch blocke kümmern und lösung umsetzen/vorschlagen | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1b6f8db) | Umgesetzt in Commit 1b6f8db: redundante Catch-Filter in LogGuard ueber ExceptionFilterGuard.IsLoggerWriteException(...) dedupliziert. Evidence: https://github.com/tomtastisch/FileC… | +| 41 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836442521) | `tomtastisch` | Using-keyowrd sinnvoll hier? | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/edf624f) | Using-Verwendung für Stream-Lebensdauer ist erforderlich und bereits fail-closed umgesetzt. | +| 42 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836442825) | `tomtastisch` | Using-keyowrd sinnvoll hier? | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/edf624f) | Gleiche Begründung wie Thread 41; kritische Streams sind per Using abgesichert. | +| 43 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836444882) | `copilot-pull-request-reviewer` | In this test, the new `HashRoundTripReport` ctor expects exactly one evidence per slot (H1..H4). The call passes 5 `null` evidences after `notes`, so `ArgumentGuard.RequireLength(.… | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1396e6d) | Ctor-Arity wurde validiert; Tests bestätigen korrekte Slot-Anzahl ohne Behavior-Drift. | +| 44 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836444891) | `copilot-pull-request-reviewer` | PR description states local deterministic version convergence to `6.0.0`, but this change sets `RepoVersion` to `6.0.1` (and other files in the PR also use `6.0.1`). Please align t… | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/pull/108) | Umgesetzt: PR-Beschreibung auf 6.0.1 aktualisiert und mit RepoVersion abgeglichen. Evidence: https://github.com/tomtastisch/FileClassifier/pull/108 | +| 45 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836461136) | `tomtastisch` | Variable `ext` ist semantisch besser deklarierter: ```suggestion Dim ext As String = If(String.IsNullOrWhiteSpace(extWithDot), String.Empty, extWithDot) ``` | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/ff730f0) | Umgesetzt in Commit ff730f0: ext-Initialisierung auf If(String.IsNullOrWhiteSpace(...)) vereinheitlicht. Evidence: https://github.com/tomtastisch/FileClassifier/commit/ff730f0 | +| 46 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836462535) | `tomtastisch` | kann entfernt werden wenn `https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836461136` umgesetzt wird | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/ff730f0) | Umgesetzt in Commit ff730f0: redundante Folgezeile nach ext-Normalisierung entfernt. Evidence: https://github.com/tomtastisch/FileClassifier/commit/ff730f0 | +| 47 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836467869) | `tomtastisch` | Prüfung ob versionierung aufgrund der Menge an Änderungen - bei Sinnvoller Aufgliederung in entsprechende Umsetzungsblöcke - nicht unter umständen bereits höher ist (also: 6.*.* )? | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/d6164e7) | Patch-Bump 6.0.1 bleibt korrekt: Refactor/Hardening ohne neue Public-Features/Breaking API. | +| 48 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836470732) | `tomtastisch` | ```suggestion Assert.True( fileIndex >= 0 && strictIndex > fileIndex, $"Policy 045 order violated ('FILE' before Option Stri… | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1396e6d) | Einzeiler-Stilvorschlag nicht zwingend; bestehendes Format bleibt policy-konform und lesbar. | +| 49 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836471718) | `tomtastisch` | Kann man als Einzeiler schreiben: ```suggestion .Where(type => (type.IsPublic // type.IsNestedPublic) && type.Namespace == "Tomtastisch.FileClassifier")… | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/1396e6d) | Einzeiler-Stilvorschlag nicht zwingend; bestehendes Format bleibt policy-konform und lesbar. | +| 50 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836479408) | `tomtastisch` | Wird zu: ` / < 5.2.1 / Nein / / > 5.2.1 / Ja / ` | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9efc309) | SECURITY.md bleibt eingefroren (Repo-Contract); Security-Claim-Kopplung stattdessen im Audit-Skript dokumentiert. | +| 51 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836480145) | `tomtastisch` | Prüfung entsprechend Kommentar: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836467869 | `ASSUMPTION` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/d6164e7) | Versioning erneut geprüft; 6.0.1-Konsistenz über RepoVersion/Version/PackageVersion/SVT belegt. | +| 52 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836481328) | `tomtastisch` | Sollte zur Vermeidung von Missverständnissen Dokumentiert werden (Eincode-kommentar) zur Erläuterung gegenüber anderer Entwickler | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/74fa1db) | Umgesetzt in Commit 74fa1db: Inline-Kommentar zur GH_TOKEN/GITHUB_TOKEN/SECURITY_CLAIMS_TOKEN-Fallback-Reihenfolge ergaenzt. Evidence: https://github.com/tomtastisch/FileClassifier… | +| 53 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#discussion_r2836482528) | `tomtastisch` | Siehe Kommentar: https://github.com/tomtastisch/FileClassifier/pull/108/changes#r2836467869 | `UMGESETZT` | [Nachweis](https://github.com/tomtastisch/FileClassifier/commit/9820930) | Umgesetzt in Commit 9820930: Kommentar zur Kopplung von SECURITY-Claim und Major-Version im Security-Claims-Skript ergaenzt. Evidence: https://github.com/tomtastisch/FileClassifier… | + +## PR-Konversationskommentare von @tomtastisch + +| # | Kommentar | Auszug | +| --- | --- | --- | +| 1 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#issuecomment-3939130713) | @codex[agent] review | +| 2 | [Link](https://github.com/tomtastisch/FileClassifier/pull/108#issuecomment-3939149500) | Prüfung ob unter "Utils/*", beziehungsweise nach Anpassung "Utils/Guards/*" wirklich alle Util-klassen Guard-bezeichnungen korrekt im namen haben oder eine andere Zuordnung sinnvoller wäre | +