Skip to content

Commit b0b76c3

Browse files
committed
multipart form parsing
1 parent b03d1cd commit b0b76c3

File tree

3 files changed

+262
-26
lines changed

3 files changed

+262
-26
lines changed

src/main/java/robaho/net/httpserver/extras/MultipartFormParser.java

Lines changed: 155 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,212 @@
11
package robaho.net.httpserver.extras;
22

33
import java.io.BufferedInputStream;
4+
import java.io.BufferedOutputStream;
45
import java.io.ByteArrayOutputStream;
6+
import java.io.File;
7+
import java.io.FileOutputStream;
58
import java.io.IOException;
69
import java.io.InputStream;
10+
import java.io.OutputStream;
11+
import java.nio.charset.Charset;
712
import java.nio.charset.StandardCharsets;
813
import java.nio.file.Path;
9-
import java.util.Collections;
14+
import java.util.HashMap;
15+
import java.util.LinkedList;
1016
import java.util.List;
1117
import java.util.Map;
18+
import java.util.regex.Matcher;
19+
import java.util.regex.Pattern;
1220

13-
/** This is a work in progress and does not work!!! */
21+
/**
22+
* parse multipart form data
23+
*/
1424
public class MultipartFormParser {
25+
/**
26+
* a multipart part.
27+
*
28+
* either data or file will be non-null, but not both.
29+
*
30+
* @param contentType if non-null, the content type of the data
31+
* @param filename if non-null, the form provided filename
32+
* @param file if non-null, points to the uploaded file data (the name may
33+
* differ from filename). This file is marked as delete on exit.
34+
* @param data if non-null, contains the part data as a String.
35+
*/
36+
public record Part(String contentType, String filename, String data, File file) {
37+
38+
}
39+
40+
private record PartMetadata(String name, String filename) {
41+
42+
}
1543

1644
/**
1745
* parse a multipart input stream, write files to storage. The caller is
1846
* responsible to delete files when they are no longer needed.
1947
*
2048
* @return a map of key to either a String (non-file) or a File
2149
*/
22-
public static Map<String, List<Object>> parse(String encoding, String content_type, InputStream is, Path storage) throws IOException {
23-
if (encoding == null) {
24-
encoding = StandardCharsets.ISO_8859_1.name();
50+
public static Map<String, List<Part>> parse(String encoding, String content_type, InputStream is, Path storage) throws IOException {
51+
Charset charset = encoding == null ? StandardCharsets.ISO_8859_1 : Charset.forName(encoding);
52+
53+
if (!content_type.contains("boundary=")) {
54+
throw new IllegalStateException("content type does not contain boundary");
2555
}
2656

57+
String boundary = content_type.split("boundary=")[1];
58+
2759
is = new BufferedInputStream(is);
2860

29-
String boundary = content_type.split("boundary=")[1];
61+
Map<String, List<Part>> results = new HashMap<>();
62+
63+
// the CRLF is considered part of the boundary
64+
byte[] boundaryCheck = ("\r\n--" + boundary).getBytes(charset);
3065

31-
byte[] boundaryCheck = ("--" + boundary).getBytes(encoding);
32-
byte[] buffer = new byte[8192];
66+
List<String> headers = new LinkedList<>();
3367

68+
System.out.println("reading until start of part");
69+
// read until boundary found
70+
int matchCount = 0;
3471
while (true) {
35-
System.out.println("reading part header");
36-
while (true) {
37-
String s = readLine(encoding, is);
38-
if (s == null) {
72+
int c = is.read();
73+
if (c == -1) {
74+
return results;
75+
}
76+
if (c == boundaryCheck[matchCount]) {
77+
matchCount++;
78+
if (matchCount == boundaryCheck.length) {
79+
System.out.println("found boundary marker");
3980
break;
4081
}
41-
System.out.println(":::" + s);
42-
if ("".equals(s)) {
43-
break;
82+
} else {
83+
matchCount = 0;
84+
if (c == boundaryCheck[matchCount]) {
85+
matchCount++;
4486
}
4587
}
46-
System.out.println("reading part data");
88+
}
89+
90+
// read to end of line
91+
String s = readLine(charset, is);
92+
if (s == null || "--".equals(s)) {
93+
return results;
94+
}
95+
96+
headers.clear();
97+
98+
while (true) {
99+
// read part headers until blank line
100+
System.out.println("reading part headers");
47101
while (true) {
48-
String s = readLine(encoding, is);
102+
s = readLine(charset, is);
49103
if (s == null) {
50-
break;
104+
return results;
51105
}
52-
System.out.println(">>>" + s);
53106
if ("".equals(s)) {
54107
break;
55108
}
109+
headers.add(s);
56110
}
57-
if (is.available() == 0) {
58-
break;
111+
112+
System.out.println("reading part data");
113+
// read part data - need to detect end of part
114+
PartMetadata meta = parseHeaders(headers);
115+
116+
Runnable addToResults;
117+
OutputStream os;
118+
if (meta.filename == null) {
119+
var bos = new ByteArrayOutputStream();
120+
os = bos;
121+
addToResults = () -> results.computeIfAbsent(meta.name, k -> new LinkedList<Part>()).add(new Part(null, null, bos.toString(charset), null));
122+
} else {
123+
File file = Path.of(storage.toString(), meta.filename).toFile();
124+
file.deleteOnExit();
125+
os = new BufferedOutputStream(new FileOutputStream(file));
126+
addToResults = () -> results.computeIfAbsent(meta.name, k -> new LinkedList<Part>()).add(new Part(null, meta.filename, null, file));
127+
}
128+
129+
try (os) {
130+
matchCount = 0;
131+
while (true) {
132+
int c = is.read();
133+
if (c == -1) {
134+
return results;
135+
}
136+
if (c == boundaryCheck[matchCount]) {
137+
matchCount++;
138+
if (matchCount == boundaryCheck.length) {
139+
System.out.println("found boundary marker");
140+
break;
141+
}
142+
} else {
143+
if (matchCount > 0) {
144+
os.write(boundaryCheck, 0, matchCount);
145+
matchCount = 0;
146+
}
147+
if (c == boundaryCheck[matchCount]) {
148+
matchCount++;
149+
} else {
150+
os.write(c);
151+
}
152+
}
153+
}
154+
}
155+
156+
addToResults.run();
157+
158+
// read to end of line
159+
s = readLine(charset, is);
160+
if ("--".equals(s)) {
161+
return results;
162+
}
163+
}
164+
}
165+
166+
private static final Pattern optionPattern = Pattern.compile("\\s(?<key>.*)=\"(?<value>.*)\"");
167+
168+
private static PartMetadata parseHeaders(List<String> headers) {
169+
String name = null;
170+
String filename = null;
171+
for (var header : headers) {
172+
String[] parts = header.split(":", 2);
173+
if ("content-disposition".equalsIgnoreCase(parts[0])) {
174+
String[] options = parts[1].split(";");
175+
for (var option : options) {
176+
Matcher m = optionPattern.matcher(option);
177+
if (m.matches()) {
178+
String key = m.group("key");
179+
String value = m.group("value");
180+
if ("name".equals(key)) {
181+
name = value;
182+
}
183+
if ("filename".equals(key)) {
184+
filename = value;
185+
}
186+
}
187+
}
188+
59189
}
60190
}
61-
System.out.println("finished reading form");
62-
return Collections.EMPTY_MAP;
191+
return new PartMetadata(name, filename);
63192
}
64193

65-
private static String readLine(String encoding, InputStream is) throws IOException {
194+
private static String readLine(Charset charset, InputStream is) throws IOException {
66195
ByteArrayOutputStream bos = new ByteArrayOutputStream();
67196
boolean prevCR = false;
68197
while (true) {
69198
int c = is.read();
70199
if (c == -1) {
71200
if (bos.size() > 0) {
72-
return bos.toString(encoding);
201+
return bos.toString(charset);
73202
}
74203
return null;
75204
}
76205
if (c == '\r') {
77206
prevCR = true;
78207
} else if (c == '\n') {
79208
if (prevCR) {
80-
return bos.toString(encoding);
209+
return bos.toString(charset);
81210
} else {
82211
bos.write(c);
83212
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package robaho.net.httpserver.extras;
2+
3+
import java.io.ByteArrayInputStream;
4+
import java.io.ByteArrayOutputStream;
5+
import java.io.File;
6+
import java.io.IOException;
7+
import java.io.UnsupportedEncodingException;
8+
import java.nio.file.Files;
9+
import java.nio.file.Path;
10+
import java.util.Arrays;
11+
import java.util.List;
12+
13+
import org.testng.Assert;
14+
import org.testng.annotations.Test;
15+
16+
import robaho.net.httpserver.extras.MultipartFormParser.Part;
17+
import static robaho.net.httpserver.extras.MultipartFormParser.parse;
18+
19+
public class MultipartFormParserTest {
20+
21+
@Test
22+
public void testFiles() throws UnsupportedEncodingException, IOException {
23+
var body = "trash1\r\n";
24+
body += "------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n";
25+
body += "Content-Disposition: form-data; name=\"uploads[]\"; filename=\"A.txt\"\r\n";
26+
body += "Content-Type: text/plain\r\n";
27+
body += "\r\n\r\n";
28+
body += "@11X";
29+
body += "111Y\r\n";
30+
body += "111Z\rCCCC\nCCCC\r\nCCCCC@\r\n\r\n";
31+
body += "------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n";
32+
body += "Content-Disposition: form-data; name=\"uploads[]\"; filename=\"B.txt\"\r\n";
33+
body += "Content-Type: text/plain\r\n";
34+
body += "\r\n\r\n";
35+
body += "@22X";
36+
body += "222Y\r\n";
37+
body += "222Z\r222W\n2220\r\n666@\r\n";
38+
body += "------WebKitFormBoundaryvef1fLxmoUdYZWXp--\r\n";
39+
40+
Path storage = Path.of("/tmp", "parser_test");
41+
storage.toFile().mkdirs();
42+
43+
var results = parse("UTF-8", "Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryvef1fLxmoUdYZWXp", new ByteArrayInputStream(body.getBytes("UTF-8")), storage);
44+
45+
Assert.assertEquals(results.size(), 1);
46+
List<Part> values = results.get("uploads[]");
47+
Assert.assertEquals(values.size(), 2);
48+
49+
var s = "\r\n";
50+
s += "@11X";
51+
s += "111Y\r\n";
52+
s += "111Z\rCCCC\nCCCC\r\nCCCCC@\r\n";
53+
54+
Assert.assertEquals(s.getBytes("UTF-8"), Files.readAllBytes((values.get(0).file()).toPath()), "file1 failed");
55+
56+
s = "\r\n";
57+
s += "@22X";
58+
s += "222Y\r\n";
59+
s += "222Z\r222W\n2220\r\n666@";
60+
61+
Assert.assertEquals(s.getBytes("UTF-8"), Files.readAllBytes((values.get(1).file()).toPath()), "file2 failed");
62+
}
63+
64+
@Test
65+
public void testBinary() throws IOException {
66+
String hex
67+
= // data generated using curl --form
68+
"0d 0a "
69+
+ "2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d " // ----------------
70+
+ "2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 35 38 32 34 33 32 " // ----------582432
71+
+ "38 64 62 37 65 32 33 30 61 35 0d 0a 43 6f 6e 74 " // 8db7e230a5..Cont
72+
+ "65 6e 74 2d 44 69 73 70 6f 73 69 74 69 6f 6e 3a " // ent-Disposition:
73+
+ "20 66 6f 72 6d 2d 64 61 74 61 3b 20 6e 61 6d 65 " // form-data; name
74+
+ "3d 22 70 69 63 74 75 72 65 5b 75 70 6c 6f 61 64 " // ="picture[upload
75+
+ "65 64 5f 64 61 74 61 5d 22 3b 20 66 69 6c 65 6e " // ed_data]"; filen
76+
+ "61 6d 65 3d 22 73 6d 61 6c 6c 2e 70 6e 67 22 0d " // ame="small.png".
77+
+ "0a 43 6f 6e 74 65 6e 74 2d 54 79 70 65 3a 20 69 " // .Content-Type: i
78+
+ "6d 61 67 65 2f 70 6e 67 0d 0a 0d 0a 89 50 4e 47 " // mage/png.....PNG
79+
+ "0d 0a 1a 0a 00 00 00 0d 49 48 44 52 00 00 01 00 " // ........IHDR....
80+
+ "00 00 01 00 01 03 00 00 00 66 bc 3a 25 00 00 00 " // .........f.:%...
81+
+ "03 50 4c 54 45 b5 d0 d0 63 04 16 ea 00 00 00 1f " // .PLTE...c.......
82+
+ "49 44 41 54 68 81 ed c1 01 0d 00 00 00 c2 a0 f7 " // IDATh...........
83+
+ "4f 6d 0e 37 a0 00 00 00 00 00 00 00 00 be 0d 21 " // Om.7...........!
84+
+ "00 00 01 9a 60 e1 d5 00 00 00 00 49 45 4e 44 ae " // ....`......IEND.
85+
+ "42 60 82 0d 0a 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d " // B`...-----------
86+
+ "2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 35 " // ---------------5
87+
+ "38 32 34 33 32 38 64 62 37 65 32 33 30 61 35 2d " // 824328db7e230a5-
88+
+ "2d 0d 0a"; // -..
89+
90+
ByteArrayOutputStream bos = new ByteArrayOutputStream();
91+
Arrays.stream(hex.split(" ")).map(s -> Integer.parseInt(s, 16)).forEach(i -> bos.write(i));
92+
byte[] input = bos.toByteArray();
93+
94+
String boundary = "------------------------5824328db7e230a5";
95+
96+
Path storage = Path.of("/tmp", "parser_test");
97+
storage.toFile().mkdirs();
98+
99+
var results = parse("UTF-8", "Content-Type: multipart/form-data; boundary=" + boundary, new ByteArrayInputStream(input), storage);
100+
101+
Assert.assertEquals(results.size(), 1);
102+
List<Part> values = results.get("picture[uploaded_data]");
103+
Assert.assertEquals(values.size(), 1);
104+
105+
Assert.assertEquals(Files.readAllBytes(Path.of("src/test/resources/small.png")), Files.readAllBytes((values.get(0).file()).toPath()), "parse failed");
106+
}
107+
}

src/test/resources/small.png

103 Bytes
Loading

0 commit comments

Comments
 (0)