Skip to content

Commit 50464ae

Browse files
akiselev98karianna
andauthored
Optimize decorator parsing (#458)
* Optimize decorator parsing Profiling showed that a relatively high percentage of CPU time and memory allocations are coming from decorator parsing. This makes sense, since this parsing is applied to every unified log line which goes through gctoolkit. To speed things up, we: - Fold the tags regex into the broader decorator regex, avoiding the need for a second matcher, and removing the need for an expensive negative lookbehind in the tag pattern - Add a start of line anchor to the decorator regex - Defer sanitization of tags until getTags() is called for the first time To reduce unnecessary memory allocations, match groups are retrieved once and stored in an array. In testing, these optimizations cut the running time of gctoolkit roughly in half. * Update parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java * Update parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java * Use constants to refer to decorator group indices * Use decoratorMatcher groupCount to initialize decorators array --------- Co-authored-by: Martijn Verburg <martijnverburg@gmail.com>
1 parent 2b918d0 commit 50464ae

File tree

2 files changed

+44
-30
lines changed

2 files changed

+44
-30
lines changed

parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -64,54 +64,64 @@ public class Decorators {
6464
private static final long TWENTY_YEARS_IN_MILLIS = 731L * 24L * 60L * 60L * 1000L;
6565
private static final long TWENTY_YEARS_IN_NANO = 731L * 24L * 60L * 60L * 1000L;
6666

67+
private static final int DATE_STAMP_GROUP = 0;
68+
private static final int UPTIME_GROUP = 1;
69+
private static final int TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP = 2;
70+
private static final int UPTIME_MILLIS_GROUP = 3;
71+
private static final int TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP = 4;
72+
private static final int UPTIME_NANOS_GROUP = 5;
73+
private static final int PID_GROUP = 6;
74+
private static final int TID_GROUP = 7;
75+
private static final int LOG_LEVEL_GROUP = 8;
76+
private static final int TAGS_GROUP = 9;
77+
6778
int numberOfDecorators;
6879

69-
private Matcher decoratorMatcher = null;
80+
private String[] decoratorGroups;
7081
private String tags;
7182

7283
public Decorators(String line) {
7384
extractValues(line);
7485
}
7586

7687
private void extractValues(String line) {
77-
7888
if (!line.startsWith("["))
7989
return;
8090

81-
decoratorMatcher = UnifiedLoggingTokens.DECORATORS.matcher(line);
91+
Matcher decoratorMatcher = UnifiedLoggingTokens.DECORATORS.matcher(line);
8292
if (!decoratorMatcher.find()) {
8393
return;
8494
}
8595

86-
for ( int i = 1; i <= decoratorMatcher.groupCount(); i++) {
87-
if ( decoratorMatcher.group(i) != null)
96+
// Retrieving a group from a matcher calls substring each time
97+
// Store all the groups in an array ahead of time to avoid paying this cost unnecessarily
98+
decoratorGroups = new String[decoratorMatcher.groupCount()];
99+
for (int i = 1; i <= decoratorMatcher.groupCount(); i++) {
100+
String group = decoratorMatcher.group(i);
101+
decoratorGroups[i-1] = group;
102+
if (group != null)
88103
numberOfDecorators++;
89104
}
90-
91-
Matcher tagMatcher = UnifiedLoggingTokens.TAGS.matcher(line);
92-
if (tagMatcher.find()) {
93-
numberOfDecorators++;
94-
tags = String.join(",", Arrays.asList(tagMatcher.group(1).trim().split(",")));
95-
}
96105
}
97106

98107
// For some reason, ISO_DATE_TIME doesn't like that time-zone is -0100. It wants -01:00.
99108
private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ");
100109

101110
public ZonedDateTime getDateStamp() {
102111
try {
103-
String value = decoratorMatcher.group(1);
112+
String value = decoratorGroups[DATE_STAMP_GROUP];
104113
if (value != null) {
105114
TemporalAccessor temporalAccessor = formatter.parse(value.substring(1, value.length()-1));
106-
return ZonedDateTime.from(temporalAccessor); }
115+
return ZonedDateTime.from(temporalAccessor);
116+
}
107117
} catch (NullPointerException npe) {
108118
LOGGER.log(Level.SEVERE, npe.getMessage(), npe);
109119
}
110120
return null;
111121
}
112122

113123
public double getUpTime() {
114-
String value = decoratorMatcher.group(2);
124+
String value = decoratorGroups[UPTIME_GROUP];
115125
if (value != null) {
116126
value = value.replace(",", ".");
117127
return Double.parseDouble(unboxValue(value, 1));
@@ -121,24 +131,24 @@ public double getUpTime() {
121131

122132
private long extractClock(int groupIndex, long threshold) {
123133
long clockReading = -1L;
124-
String stringValue = decoratorMatcher.group(groupIndex);
134+
String stringValue = decoratorGroups[groupIndex];
125135
if (stringValue != null) {
126136
clockReading = Long.parseLong(unboxValue(stringValue, 2));
127-
if (decoratorMatcher.group(groupIndex + 1) == null)
137+
if (decoratorGroups[groupIndex + 1] == null)
128138
if (clockReading < threshold)
129139
clockReading = -1L;
130140
}
131141
return clockReading;
132142
}
133143

134144
public long getTimeMillis() {
135-
return extractClock(3, TWENTY_YEARS_IN_MILLIS);
145+
return extractClock(TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP, TWENTY_YEARS_IN_MILLIS);
136146
}
137147

138148
public long getUptimeMillis() {
139-
String value = decoratorMatcher.group(4);
149+
String value = decoratorGroups[UPTIME_MILLIS_GROUP];
140150
if (value == null) {
141-
value = decoratorMatcher.group(3);
151+
value = decoratorGroups[TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP];
142152
}
143153
if (value != null) {
144154
long longValue = Long.parseLong(unboxValue(value, 2));
@@ -149,13 +159,13 @@ public long getUptimeMillis() {
149159
}
150160

151161
public long getTimeNano() {
152-
return extractClock(5, TWENTY_YEARS_IN_NANO);
162+
return extractClock(TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP, TWENTY_YEARS_IN_NANO);
153163
}
154164

155165
public long getUptimeNano() {
156-
String value = decoratorMatcher.group(6);
166+
String value = decoratorGroups[UPTIME_NANOS_GROUP];
157167
if (value == null) {
158-
value = decoratorMatcher.group(5);
168+
value = decoratorGroups[TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP];
159169
}
160170
if (value != null) {
161171
long longValue = Long.parseLong(unboxValue(value, 2));
@@ -166,23 +176,23 @@ public long getUptimeNano() {
166176
}
167177

168178
public int getPid() {
169-
String value = decoratorMatcher.group(7);
179+
String value = decoratorGroups[PID_GROUP];
170180
if (value != null) {
171181
return Integer.parseInt(unboxValue(value));
172182
}
173183
return -1;
174184
}
175185

176186
public int getTid() {
177-
String value = decoratorMatcher.group(8);
187+
String value = decoratorGroups[TID_GROUP];
178188
if (value != null) {
179189
return Integer.parseInt(unboxValue(value));
180190
}
181191
return -1;
182192
}
183193

184194
public Optional<UnifiedLoggingLevel> getLogLevel() {
185-
String level = decoratorMatcher.group(9);
195+
String level = decoratorGroups[LOG_LEVEL_GROUP];
186196
if (level != null)
187197
try {
188198
return Optional.of(UnifiedLoggingLevel.valueOf(unboxValue(level)));
@@ -211,10 +221,15 @@ private String unboxValue(String boxedValue) {
211221
}
212222

213223
public boolean tagsContain(String tagList) {
214-
return tags.contains(tagList);
224+
return getTags().contains(tagList);
215225
}
216226

217227
public String getTags() {
228+
if (tags == null && decoratorGroups[TAGS_GROUP] != null) {
229+
tags = String.join(",", Arrays.asList(
230+
unboxValue(decoratorGroups[TAGS_GROUP]).trim().split(",")
231+
));
232+
}
218233
return tags;
219234
}
220235
}

parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@ public interface UnifiedLoggingTokens extends GenericTokens {
3232
String TIME_NANOS = "\\[\\d+ns\\]";
3333
String PID_TID = "\\[\\d+\\]";
3434
String UNIFIED_LOG_LEVEL_BLOCK = "\\[(?:error|warning|info|debug|trace|develop) *\\]";
35-
Pattern DECORATORS = Pattern.compile("(" + DATE_STAMP + ")?(" + UPTIME + ")?(" + TIME_MILLIS + ")?(" + TIME_MILLIS + ")?(" + TIME_NANOS + ")?(" + TIME_NANOS + ")?(" + PID_TID + ")?(" + PID_TID + ")?(" + UNIFIED_LOG_LEVEL_BLOCK + ")?");
36-
//Using zero-width negative lookbehind to miss capturing records formatted like [0x1f03].
37-
//[0.081s][trace][safepoint] Thread: 0x00007fd0d2006800 [0x1f03] State: _at_safepoint _has_called_back 0 _at_poll_safepoint 0
38-
Pattern TAGS = Pattern.compile(".*(?<=^|\\])\\[([a-z0-9,. ]+)\\]");
35+
// We combine TAGS with the DECORATORS for performance reasons.
36+
String TAGS = "\\[[a-z0-9,. ]+\\]";
37+
Pattern DECORATORS = Pattern.compile("^(" + DATE_STAMP + ")?(" + UPTIME + ")?(" + TIME_MILLIS + ")?(" + TIME_MILLIS + ")?(" + TIME_NANOS + ")?(" + TIME_NANOS + ")?(" + PID_TID + ")?(" + PID_TID + ")?(" + UNIFIED_LOG_LEVEL_BLOCK + ")?(" + TAGS + ")?");
3938

4039
String UNIFIED_META_RECORD = "Metaspace: " + BEFORE_AFTER_CONFIGURED;
4140
String WORKER_SUMMARY_REAL = "Min:\\s+" + TIME + ", Avg:\\s+" + TIME + ", Max:\\s+" + TIME + ", Diff:\\s+" + TIME + ", Sum:\\s+" + TIME + ", Workers:\\s+" + COUNTER;

0 commit comments

Comments
 (0)