|
1 | 1 | package robaho.net.httpserver.extras; |
2 | 2 |
|
3 | 3 | import java.io.BufferedInputStream; |
| 4 | +import java.io.BufferedOutputStream; |
4 | 5 | import java.io.ByteArrayOutputStream; |
| 6 | +import java.io.File; |
| 7 | +import java.io.FileOutputStream; |
5 | 8 | import java.io.IOException; |
6 | 9 | import java.io.InputStream; |
| 10 | +import java.io.OutputStream; |
| 11 | +import java.nio.charset.Charset; |
7 | 12 | import java.nio.charset.StandardCharsets; |
8 | 13 | import java.nio.file.Path; |
9 | | -import java.util.Collections; |
| 14 | +import java.util.HashMap; |
| 15 | +import java.util.LinkedList; |
10 | 16 | import java.util.List; |
11 | 17 | import java.util.Map; |
| 18 | +import java.util.regex.Matcher; |
| 19 | +import java.util.regex.Pattern; |
12 | 20 |
|
13 | | -/** This is a work in progress and does not work!!! */ |
| 21 | +/** |
| 22 | + * parse multipart form data |
| 23 | + */ |
14 | 24 | public class MultipartFormParser { |
| 25 | + /** |
| 26 | + * a multipart part. |
| 27 | + * |
| 28 | + * either data or file will be non-null, but not both. |
| 29 | + * |
| 30 | + * @param contentType if non-null, the content type of the data |
| 31 | + * @param filename if non-null, the form provided filename |
| 32 | + * @param file if non-null, points to the uploaded file data (the name may |
| 33 | + * differ from filename). This file is marked as delete on exit. |
| 34 | + * @param data if non-null, contains the part data as a String. |
| 35 | + */ |
| 36 | + public record Part(String contentType, String filename, String data, File file) { |
| 37 | + |
| 38 | + } |
| 39 | + |
| 40 | + private record PartMetadata(String name, String filename) { |
| 41 | + |
| 42 | + } |
15 | 43 |
|
16 | 44 | /** |
17 | 45 | * parse a multipart input stream, write files to storage. The caller is |
18 | 46 | * responsible to delete files when they are no longer needed. |
19 | 47 | * |
20 | 48 | * @return a map of key to either a String (non-file) or a File |
21 | 49 | */ |
22 | | - public static Map<String, List<Object>> parse(String encoding, String content_type, InputStream is, Path storage) throws IOException { |
23 | | - if (encoding == null) { |
24 | | - encoding = StandardCharsets.ISO_8859_1.name(); |
| 50 | + public static Map<String, List<Part>> parse(String encoding, String content_type, InputStream is, Path storage) throws IOException { |
| 51 | + Charset charset = encoding == null ? StandardCharsets.ISO_8859_1 : Charset.forName(encoding); |
| 52 | + |
| 53 | + if (!content_type.contains("boundary=")) { |
| 54 | + throw new IllegalStateException("content type does not contain boundary"); |
25 | 55 | } |
26 | 56 |
|
| 57 | + String boundary = content_type.split("boundary=")[1]; |
| 58 | + |
27 | 59 | is = new BufferedInputStream(is); |
28 | 60 |
|
29 | | - String boundary = content_type.split("boundary=")[1]; |
| 61 | + Map<String, List<Part>> results = new HashMap<>(); |
| 62 | + |
| 63 | + // the CRLF is considered part of the boundary |
| 64 | + byte[] boundaryCheck = ("\r\n--" + boundary).getBytes(charset); |
30 | 65 |
|
31 | | - byte[] boundaryCheck = ("--" + boundary).getBytes(encoding); |
32 | | - byte[] buffer = new byte[8192]; |
| 66 | + List<String> headers = new LinkedList<>(); |
33 | 67 |
|
| 68 | + System.out.println("reading until start of part"); |
| 69 | + // read until boundary found |
| 70 | + int matchCount = 0; |
34 | 71 | while (true) { |
35 | | - System.out.println("reading part header"); |
36 | | - while (true) { |
37 | | - String s = readLine(encoding, is); |
38 | | - if (s == null) { |
| 72 | + int c = is.read(); |
| 73 | + if (c == -1) { |
| 74 | + return results; |
| 75 | + } |
| 76 | + if (c == boundaryCheck[matchCount]) { |
| 77 | + matchCount++; |
| 78 | + if (matchCount == boundaryCheck.length) { |
| 79 | + System.out.println("found boundary marker"); |
39 | 80 | break; |
40 | 81 | } |
41 | | - System.out.println(":::" + s); |
42 | | - if ("".equals(s)) { |
43 | | - break; |
| 82 | + } else { |
| 83 | + matchCount = 0; |
| 84 | + if (c == boundaryCheck[matchCount]) { |
| 85 | + matchCount++; |
44 | 86 | } |
45 | 87 | } |
46 | | - System.out.println("reading part data"); |
| 88 | + } |
| 89 | + |
| 90 | + // read to end of line |
| 91 | + String s = readLine(charset, is); |
| 92 | + if (s == null || "--".equals(s)) { |
| 93 | + return results; |
| 94 | + } |
| 95 | + |
| 96 | + headers.clear(); |
| 97 | + |
| 98 | + while (true) { |
| 99 | + // read part headers until blank line |
| 100 | + System.out.println("reading part headers"); |
47 | 101 | while (true) { |
48 | | - String s = readLine(encoding, is); |
| 102 | + s = readLine(charset, is); |
49 | 103 | if (s == null) { |
50 | | - break; |
| 104 | + return results; |
51 | 105 | } |
52 | | - System.out.println(">>>" + s); |
53 | 106 | if ("".equals(s)) { |
54 | 107 | break; |
55 | 108 | } |
| 109 | + headers.add(s); |
56 | 110 | } |
57 | | - if (is.available() == 0) { |
58 | | - break; |
| 111 | + |
| 112 | + System.out.println("reading part data"); |
| 113 | + // read part data - need to detect end of part |
| 114 | + PartMetadata meta = parseHeaders(headers); |
| 115 | + |
| 116 | + Runnable addToResults; |
| 117 | + OutputStream os; |
| 118 | + if (meta.filename == null) { |
| 119 | + var bos = new ByteArrayOutputStream(); |
| 120 | + os = bos; |
| 121 | + addToResults = () -> results.computeIfAbsent(meta.name, k -> new LinkedList<Part>()).add(new Part(null, null, bos.toString(charset), null)); |
| 122 | + } else { |
| 123 | + File file = Path.of(storage.toString(), meta.filename).toFile(); |
| 124 | + file.deleteOnExit(); |
| 125 | + os = new BufferedOutputStream(new FileOutputStream(file)); |
| 126 | + addToResults = () -> results.computeIfAbsent(meta.name, k -> new LinkedList<Part>()).add(new Part(null, meta.filename, null, file)); |
| 127 | + } |
| 128 | + |
| 129 | + try (os) { |
| 130 | + matchCount = 0; |
| 131 | + while (true) { |
| 132 | + int c = is.read(); |
| 133 | + if (c == -1) { |
| 134 | + return results; |
| 135 | + } |
| 136 | + if (c == boundaryCheck[matchCount]) { |
| 137 | + matchCount++; |
| 138 | + if (matchCount == boundaryCheck.length) { |
| 139 | + System.out.println("found boundary marker"); |
| 140 | + break; |
| 141 | + } |
| 142 | + } else { |
| 143 | + if (matchCount > 0) { |
| 144 | + os.write(boundaryCheck, 0, matchCount); |
| 145 | + matchCount = 0; |
| 146 | + } |
| 147 | + if (c == boundaryCheck[matchCount]) { |
| 148 | + matchCount++; |
| 149 | + } else { |
| 150 | + os.write(c); |
| 151 | + } |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + addToResults.run(); |
| 157 | + |
| 158 | + // read to end of line |
| 159 | + s = readLine(charset, is); |
| 160 | + if ("--".equals(s)) { |
| 161 | + return results; |
| 162 | + } |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + private static final Pattern optionPattern = Pattern.compile("\\s(?<key>.*)=\"(?<value>.*)\""); |
| 167 | + |
| 168 | + private static PartMetadata parseHeaders(List<String> headers) { |
| 169 | + String name = null; |
| 170 | + String filename = null; |
| 171 | + for (var header : headers) { |
| 172 | + String[] parts = header.split(":", 2); |
| 173 | + if ("content-disposition".equalsIgnoreCase(parts[0])) { |
| 174 | + String[] options = parts[1].split(";"); |
| 175 | + for (var option : options) { |
| 176 | + Matcher m = optionPattern.matcher(option); |
| 177 | + if (m.matches()) { |
| 178 | + String key = m.group("key"); |
| 179 | + String value = m.group("value"); |
| 180 | + if ("name".equals(key)) { |
| 181 | + name = value; |
| 182 | + } |
| 183 | + if ("filename".equals(key)) { |
| 184 | + filename = value; |
| 185 | + } |
| 186 | + } |
| 187 | + } |
| 188 | + |
59 | 189 | } |
60 | 190 | } |
61 | | - System.out.println("finished reading form"); |
62 | | - return Collections.EMPTY_MAP; |
| 191 | + return new PartMetadata(name, filename); |
63 | 192 | } |
64 | 193 |
|
65 | | - private static String readLine(String encoding, InputStream is) throws IOException { |
| 194 | + private static String readLine(Charset charset, InputStream is) throws IOException { |
66 | 195 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); |
67 | 196 | boolean prevCR = false; |
68 | 197 | while (true) { |
69 | 198 | int c = is.read(); |
70 | 199 | if (c == -1) { |
71 | 200 | if (bos.size() > 0) { |
72 | | - return bos.toString(encoding); |
| 201 | + return bos.toString(charset); |
73 | 202 | } |
74 | 203 | return null; |
75 | 204 | } |
76 | 205 | if (c == '\r') { |
77 | 206 | prevCR = true; |
78 | 207 | } else if (c == '\n') { |
79 | 208 | if (prevCR) { |
80 | | - return bos.toString(encoding); |
| 209 | + return bos.toString(charset); |
81 | 210 | } else { |
82 | 211 | bos.write(c); |
83 | 212 | } |
|
0 commit comments