-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathextract-all-streams.php
More file actions
84 lines (64 loc) · 2.17 KB
/
extract-all-streams.php
File metadata and controls
84 lines (64 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
<?php
/**
* Example: Extract all streams from a compound file to individual files
*
* This example demonstrates how to:
* - Open a compound binary file
* - Iterate through all directory entries
* - Extract stream contents to separate files
* - Handle both little-endian and big-endian files
*/
require_once __DIR__ . '/../vendor/autoload.php';
use DanRossiter\BinaryCompoundFile\CompoundFile;
use DanRossiter\BinaryCompoundFile\DirectoryEntry;
use DanRossiter\BinaryCompoundFile\StorageType;
// Configuration
$inputFile = __DIR__ . '/../tests/fixtures/Dan Rossiter Resume-BE.doc';
$outputDir = __DIR__ . '/../extracted_streams';
// Open the compound binary file
$fp = fopen($inputFile, 'rb');
if (!$fp) {
die("Failed to open file: $inputFile\n");
}
printf("Opened file: %s\n", $inputFile);
// Parse the compound file
$cfb = new CompoundFile($fp);
// Validate the file
if (!$cfb->isValid()) {
die("Invalid compound file format\n");
}
printf("Valid compound file. Version: %d, Sector size: %d bytes\n",
$cfb->getHeader()->getRevision(),
$cfb->getHeader()->getSectSize()
);
// Create output directory
if (!is_dir($outputDir)) {
mkdir($outputDir, 0755, true);
printf("Created output directory: %s\n", $outputDir);
}
// Get all directories
$directories = $cfb->getDirectories();
printf("Found %d directory entries\n\n", count($directories));
// Extract all streams
$streamCount = 0;
$totalBytes = 0;
foreach ($directories as $name => $dir) {
// Only process stream entries (not storage or root)
if ($dir->getMse() !== StorageType::STREAM) {
continue;
}
// Get stream content
$content = $cfb->getStream($dir);
$size = strlen($content);
// Create safe filename
$filename = preg_replace('/[^a-zA-Z0-9_\-.]/', '_', $name);
$outputPath = $outputDir . '/' . $filename . '.bin';
// Write to file
file_put_contents($outputPath, $content);
printf("Extracted: %-30s %8d bytes -> %s\n", $name, $size, $filename . '.bin');
$streamCount++;
$totalBytes += $size;
}
fclose($fp);
printf("\nExtracted %d streams, %d total bytes\n", $streamCount, $totalBytes);
printf("Output directory: %s\n", $outputDir);