Skip to content

Commit d0031f6

Browse files
committed
Use tokenizer instead.
1 parent effa8ed commit d0031f6

File tree

6 files changed

+287
-278
lines changed

6 files changed

+287
-278
lines changed

httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java

Lines changed: 7 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@
3636
import java.util.Arrays;
3737
import java.util.BitSet;
3838
import java.util.Collections;
39-
import java.util.LinkedList;
4039
import java.util.List;
4140

4241
import org.apache.hc.core5.http.HttpHost;
4342
import org.apache.hc.core5.http.NameValuePair;
4443
import org.apache.hc.core5.http.URIScheme;
4544
import org.apache.hc.core5.http.message.BasicNameValuePair;
4645
import org.apache.hc.core5.http.message.ParserCursor;
46+
import org.apache.hc.core5.net.uri.Rfc3986Uri;
4747
import org.apache.hc.core5.util.Args;
4848
import org.apache.hc.core5.util.TextUtils;
4949
import org.apache.hc.core5.util.Tokenizer;
@@ -1118,58 +1118,16 @@ public URIBuilder normalizeSyntax() {
11181118
* @since 5.3
11191119
*/
11201120
public URIBuilder optimize() {
1121-
final String scheme = this.scheme;
1122-
if (scheme != null) {
1123-
this.scheme = TextUtils.toLowerCase(scheme);
1124-
}
1125-
1126-
if (this.pathRootless) {
1121+
final String raw = this.toString();
1122+
try {
1123+
final Rfc3986Uri u = Rfc3986Uri.parse(raw).optimize();
1124+
return new URIBuilder(u.toString());
1125+
} catch (final IllegalArgumentException | URISyntaxException ex) {
11271126
return this;
11281127
}
1129-
1130-
// Force Percent-Encoding re-encoding
1131-
this.encodedSchemeSpecificPart = null;
1132-
this.encodedAuthority = null;
1133-
this.encodedUserInfo = null;
1134-
this.encodedPath = null;
1135-
this.encodedQuery = null;
1136-
this.encodedFragment = null;
1137-
1138-
final String host = this.host;
1139-
if (host != null) {
1140-
this.host = TextUtils.toLowerCase(host);
1141-
}
1142-
1143-
if (this.pathSegments != null) {
1144-
final List<String> inputSegments = this.pathSegments;
1145-
if (!inputSegments.isEmpty()) {
1146-
final LinkedList<String> outputSegments = new LinkedList<>();
1147-
for (final String inputSegment : inputSegments) {
1148-
if (!inputSegment.isEmpty() && !".".equals(inputSegment)) {
1149-
if ("..".equals(inputSegment)) {
1150-
if (!outputSegments.isEmpty()) {
1151-
outputSegments.removeLast();
1152-
}
1153-
} else {
1154-
outputSegments.addLast(inputSegment);
1155-
}
1156-
}
1157-
}
1158-
if (!inputSegments.isEmpty()) {
1159-
final String lastSegment = inputSegments.get(inputSegments.size() - 1);
1160-
if (lastSegment.isEmpty()) {
1161-
outputSegments.addLast("");
1162-
}
1163-
}
1164-
this.pathSegments = outputSegments;
1165-
} else {
1166-
this.pathSegments = Collections.singletonList("");
1167-
}
1168-
}
1169-
1170-
return this;
11711128
}
11721129

1130+
11731131
/**
11741132
* Converts this instance to a URI string.
11751133
*

httpcore5/src/main/java/org/apache/hc/core5/net/uri/DotSegments.java

Lines changed: 55 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@
3232
import org.apache.hc.core5.annotation.Contract;
3333
import org.apache.hc.core5.annotation.Internal;
3434
import org.apache.hc.core5.annotation.ThreadingBehavior;
35+
import org.apache.hc.core5.util.Tokenizer;
3536

3637
/**
37-
* RFC 3986 §5.2.4 dot-segment removal.
38+
* Dot-segment removal.
3839
* <p>
3940
* - Preserves empty segments inside the path (e.g. {@code "/a//b"}).
4041
* - Does <strong>not</strong> preserve the artificial leading empty segment of absolute paths.
@@ -51,102 +52,114 @@ static String remove(final String path) {
5152
return path == null ? null : "";
5253
}
5354

54-
final boolean absolute = path.startsWith("/");
55-
final boolean hadTrailingSlash = path.endsWith("/");
55+
final char[] buf = path.toCharArray();
56+
final int end = buf.length;
5657

57-
final Deque<String> out = new ArrayDeque<>();
58+
final boolean absolute = buf[0] == '/';
59+
final boolean hadTrailingSlash = buf[end - 1] == '/';
5860

59-
int i = 0;
60-
final int n = path.length();
61-
boolean firstSegment = true; // suppress the artificial leading "" for absolute paths
62-
boolean forceTrailingSlash = false; // terminal "." or ".." wants slash in most cases
61+
final Deque<int[]> out = new ArrayDeque<>();
62+
boolean firstSegment = true; // suppress artificial leading "" for absolute paths
63+
boolean forceTrailingSlash = false; // terminal "." or ".." wants trailing slash in most cases
6364

64-
while (i <= n) {
65-
final int j = i < n ? path.indexOf('/', i) : -1;
65+
final Tokenizer.Cursor cursor = new Tokenizer.Cursor(0, end);
66+
while (!cursor.atEnd()) {
67+
final int segStart = cursor.getPos();
6668

67-
final String seg;
68-
if (j == -1) {
69-
seg = path.substring(i, n);
70-
i = n + 1;
69+
// scan to next '/' (or end)
70+
int i = segStart;
71+
while (i < end && buf[i] != '/') {
72+
i++;
73+
}
74+
final int segEnd = i;
75+
76+
// advance cursor past this segment and optional '/'
77+
if (i < end) {
78+
cursor.updatePos(i + 1);
7179
} else {
72-
seg = path.substring(i, j);
73-
i = j + 1;
80+
cursor.updatePos(i);
7481
}
82+
final boolean isLast = cursor.atEnd();
7583

76-
// Skip the artificial leading empty segment for absolute paths.
77-
if (firstSegment && absolute && seg.isEmpty()) {
84+
// Skip artificial leading empty segment for absolute paths (path begins with '/')
85+
if (firstSegment && absolute && segStart == segEnd) {
7886
firstSegment = false;
79-
if (j == -1) {
80-
break; // path was "/" only
81-
}
8287
continue;
8388
}
8489
firstSegment = false;
8590

86-
final boolean isLast = j == -1;
91+
final int len = segEnd - segStart;
8792

88-
if (seg.equals(".")) {
89-
// Drop "."; if last, remember to add trailing slash (except for empty relative).
93+
if (len == 1 && buf[segStart] == '.') {
94+
// drop "."
9095
if (isLast && (absolute || !out.isEmpty())) {
9196
forceTrailingSlash = true;
9297
}
93-
} else if (seg.equals("..")) {
98+
} else if (len == 2 && buf[segStart] == '.' && buf[segStart + 1] == '.') {
99+
// handle ".."
94100
if (!out.isEmpty()) {
95-
final String last = out.peekLast();
96-
if (!last.equals("..")) {
101+
final int[] last = out.peekLast();
102+
if (!isDotDot(buf, last)) {
97103
out.removeLast();
98104
} else if (!absolute) {
99-
out.addLast("..");
105+
out.addLast(new int[]{segStart, segEnd});
100106
}
101107
} else if (!absolute) {
102-
out.addLast("..");
108+
out.addLast(new int[]{segStart, segEnd});
103109
}
104-
// Terminal ".." prefers trailing slash, but not for pure relative "..".
105110
if (isLast && (absolute || !out.isEmpty())) {
106111
forceTrailingSlash = true;
107112
}
108113
} else {
109-
// Normal (and internal empty) segments preserved.
110-
out.addLast(seg);
114+
// normal (and internal empty) segments preserved verbatim
115+
out.addLast(new int[]{segStart, segEnd});
111116
}
112117

113-
if (j == -1) {
114-
break;
115-
}
118+
// If we consumed a '/', continue to next segment; if at end and
119+
// the last char was '/', we will emit a trailing slash below.
116120
}
117121

118-
// Rebuild
122+
// Rebuild result
119123
final StringBuilder b = new StringBuilder(path.length());
120124
if (absolute) {
121125
b.append('/');
122126
}
123127
boolean first = true;
124-
for (final String seg : out) {
128+
for (final int[] seg : out) {
125129
if (!first) {
126130
b.append('/');
127131
}
128-
b.append(seg);
132+
b.append(buf, seg[0], seg[1] - seg[0]);
129133
first = false;
130134
}
131135

132-
// Keep original trailing slash OR add one for terminal "."/".."
133-
// BUT: do not add for pure relative ".." (i.e., out = [".."], not absolute, and original had no trailing slash).
136+
// Keep original trailing slash OR add for terminal "."/".."
137+
// BUT: do not add for pure relative ".." (out = [".."], not absolute, and original had no trailing slash).
134138
final boolean wantsTrailing =
135139
hadTrailingSlash
136-
|| forceTrailingSlash && (absolute || !out.isEmpty() && !"..".equals(out.peekLast()));
140+
|| forceTrailingSlash && (absolute || !out.isEmpty() && !isDotDot(buf, out.peekLast()));
137141

138142
if (wantsTrailing && (b.length() == 0 || b.charAt(b.length() - 1) != '/')) {
139143
b.append('/');
140144
}
141145

142-
// Absolute path that reduced to empty -> "/"
146+
// Absolute path reduced to empty -> "/"
143147
if (absolute && b.length() == 0) {
144148
b.append('/');
145149
}
146150

147151
return b.toString();
148152
}
149153

154+
private static boolean isDotDot(final char[] buf, final int[] seg) {
155+
if (seg == null) {
156+
return false;
157+
}
158+
final int len = seg[1] - seg[0];
159+
return len == 2 && buf[seg[0]] == '.' && buf[seg[0] + 1] == '.';
160+
}
161+
150162
private DotSegments() {
163+
// no instances
151164
}
152165
}

0 commit comments

Comments
 (0)