From 6039bf581ef86114d027fa3733551ddc2cc11b41 Mon Sep 17 00:00:00 2001 From: Anthony Kiselev Date: Fri, 17 Oct 2025 14:58:33 -0500 Subject: [PATCH 1/5] Optimize decorator parsing Profiling showed that a relatively high percentage of CPU time and memory allocations are coming from decorator parsing. This makes sense, since this parsing is applied to every unified log line which goes through gctoolkit. To speed things up, we: - Fold the tags regex into the broader decorator regex, avoiding the need for a second matcher, and removing the need for an expensive negative lookbehind in the tag pattern - Add a start of line anchor to the decorator regex - Defer sanitization of tags until getTags() is called for the first time To reduce unnecessary memory allocations, match groups are retrieved once and stored in an array. In testing, these optimizations cut the running time of gctoolkit roughly in half. --- .../gctoolkit/parser/jvm/Decorators.java | 50 ++++++++++--------- .../parser/unified/UnifiedLoggingTokens.java | 6 +-- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java index 955db7d3..548f2877 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java @@ -66,7 +66,7 @@ public class Decorators { int numberOfDecorators; - private Matcher decoratorMatcher = null; + private String[] decoratorGroups; private String tags; public Decorators(String line) { @@ -74,25 +74,23 @@ public Decorators(String line) { } private void extractValues(String line) { - if (!line.startsWith("[")) return; - decoratorMatcher = UnifiedLoggingTokens.DECORATORS.matcher(line); + Matcher decoratorMatcher = UnifiedLoggingTokens.DECORATORS.matcher(line); if (!decoratorMatcher.find()) { return; } - for ( int i = 1; i <= decoratorMatcher.groupCount(); i++) { - if ( decoratorMatcher.group(i) != null) + // Retrieving a group from a matcher calls substring each time + // Store all the groups in an array ahead of time to avoid paying this cost unnecessarily + decoratorGroups = new String[11]; + for (int i = 1; i <= decoratorMatcher.groupCount(); i++) { + String group = decoratorMatcher.group(i); + decoratorGroups[i] = group; + if ( group != null) numberOfDecorators++; } - - Matcher tagMatcher = UnifiedLoggingTokens.TAGS.matcher(line); - if (tagMatcher.find()) { - numberOfDecorators++; - tags = String.join(",", Arrays.asList(tagMatcher.group(1).trim().split(","))); - } } // For some reason, ISO_DATE_TIME doesn't like that time-zone is -0100. It wants -01:00. @@ -100,10 +98,11 @@ private void extractValues(String line) { public ZonedDateTime getDateStamp() { try { - String value = decoratorMatcher.group(1); + String value = decoratorGroups[1]; if (value != null) { TemporalAccessor temporalAccessor = formatter.parse(value.substring(1, value.length()-1)); - return ZonedDateTime.from(temporalAccessor); } + return ZonedDateTime.from(temporalAccessor); + } } catch (NullPointerException npe) { LOGGER.log(Level.SEVERE, npe.getMessage(), npe); } @@ -111,7 +110,7 @@ public ZonedDateTime getDateStamp() { } public double getUpTime() { - String value = decoratorMatcher.group(2); + String value = decoratorGroups[2]; if (value != null) { value = value.replace(",", "."); return Double.parseDouble(unboxValue(value, 1)); @@ -121,10 +120,10 @@ public double getUpTime() { private long extractClock(int groupIndex, long threshold) { long clockReading = -1L; - String stringValue = decoratorMatcher.group(groupIndex); + String stringValue = decoratorGroups[groupIndex]; if (stringValue != null) { clockReading = Long.parseLong(unboxValue(stringValue, 2)); - if (decoratorMatcher.group(groupIndex + 1) == null) + if (decoratorGroups[groupIndex + 1] == null) if (clockReading < threshold) clockReading = -1L; } @@ -136,9 +135,9 @@ public long getTimeMillis() { } public long getUptimeMillis() { - String value = decoratorMatcher.group(4); + String value = decoratorGroups[4]; if (value == null) { - value = decoratorMatcher.group(3); + value = decoratorGroups[3]; } if (value != null) { long longValue = Long.parseLong(unboxValue(value, 2)); @@ -153,9 +152,9 @@ public long getTimeNano() { } public long getUptimeNano() { - String value = decoratorMatcher.group(6); + String value = decoratorGroups[6]; if (value == null) { - value = decoratorMatcher.group(5); + value = decoratorGroups[5]; } if (value != null) { long longValue = Long.parseLong(unboxValue(value, 2)); @@ -166,7 +165,7 @@ public long getUptimeNano() { } public int getPid() { - String value = decoratorMatcher.group(7); + String value = decoratorGroups[7]; if (value != null) { return Integer.parseInt(unboxValue(value)); } @@ -174,7 +173,7 @@ public int getPid() { } public int getTid() { - String value = decoratorMatcher.group(8); + String value = decoratorGroups[8]; if (value != null) { return Integer.parseInt(unboxValue(value)); } @@ -182,7 +181,7 @@ public int getTid() { } public Optional getLogLevel() { - String level = decoratorMatcher.group(9); + String level = decoratorGroups[9]; if (level != null) try { return Optional.of(UnifiedLoggingLevel.valueOf(unboxValue(level))); @@ -211,10 +210,13 @@ private String unboxValue(String boxedValue) { } public boolean tagsContain(String tagList) { - return tags.contains(tagList); + return getTags().contains(tagList); } public String getTags() { + if (tags == null && decoratorGroups[10] != null) { + tags = String.join(",", Arrays.asList(unboxValue(decoratorGroups[10]).trim().split(","))); + } return tags; } } \ No newline at end of file diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java index 59b008fd..cb6e787e 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java @@ -32,10 +32,8 @@ public interface UnifiedLoggingTokens extends GenericTokens { String TIME_NANOS = "\\[\\d+ns\\]"; String PID_TID = "\\[\\d+\\]"; String UNIFIED_LOG_LEVEL_BLOCK = "\\[(?:error|warning|info|debug|trace|develop) *\\]"; - Pattern DECORATORS = Pattern.compile("(" + DATE_STAMP + ")?(" + UPTIME + ")?(" + TIME_MILLIS + ")?(" + TIME_MILLIS + ")?(" + TIME_NANOS + ")?(" + TIME_NANOS + ")?(" + PID_TID + ")?(" + PID_TID + ")?(" + UNIFIED_LOG_LEVEL_BLOCK + ")?"); - //Using zero-width negative lookbehind to miss capturing records formatted like [0x1f03]. - //[0.081s][trace][safepoint] Thread: 0x00007fd0d2006800 [0x1f03] State: _at_safepoint _has_called_back 0 _at_poll_safepoint 0 - Pattern TAGS = Pattern.compile(".*(?<=^|\\])\\[([a-z0-9,. ]+)\\]"); + String TAGS = "\\[[a-z0-9,. ]+\\]"; + Pattern DECORATORS = Pattern.compile("^(" + DATE_STAMP + ")?(" + UPTIME + ")?(" + TIME_MILLIS + ")?(" + TIME_MILLIS + ")?(" + TIME_NANOS + ")?(" + TIME_NANOS + ")?(" + PID_TID + ")?(" + PID_TID + ")?(" + UNIFIED_LOG_LEVEL_BLOCK + ")?(" + TAGS + ")?"); String UNIFIED_META_RECORD = "Metaspace: " + BEFORE_AFTER_CONFIGURED; String WORKER_SUMMARY_REAL = "Min:\\s+" + TIME + ", Avg:\\s+" + TIME + ", Max:\\s+" + TIME + ", Diff:\\s+" + TIME + ", Sum:\\s+" + TIME + ", Workers:\\s+" + COUNTER; From 13b132daebeca67d87cf4e0e1cc10b675a16887e Mon Sep 17 00:00:00 2001 From: Martijn Verburg Date: Mon, 20 Oct 2025 09:31:37 +1300 Subject: [PATCH 2/5] Update parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java --- .../java/com/microsoft/gctoolkit/parser/jvm/Decorators.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java index 548f2877..2dec1040 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java @@ -88,7 +88,7 @@ private void extractValues(String line) { for (int i = 1; i <= decoratorMatcher.groupCount(); i++) { String group = decoratorMatcher.group(i); decoratorGroups[i] = group; - if ( group != null) + if (group != null) numberOfDecorators++; } } From cf5795b9574df96df6813c4fac6310a2c0ef52b0 Mon Sep 17 00:00:00 2001 From: Martijn Verburg Date: Mon, 20 Oct 2025 09:31:43 +1300 Subject: [PATCH 3/5] Update parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java --- .../microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java | 1 + 1 file changed, 1 insertion(+) diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java index cb6e787e..e6bb694c 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/unified/UnifiedLoggingTokens.java @@ -32,6 +32,7 @@ public interface UnifiedLoggingTokens extends GenericTokens { String TIME_NANOS = "\\[\\d+ns\\]"; String PID_TID = "\\[\\d+\\]"; String UNIFIED_LOG_LEVEL_BLOCK = "\\[(?:error|warning|info|debug|trace|develop) *\\]"; + // We combine TAGS with the DECORATORS for performance reasons. String TAGS = "\\[[a-z0-9,. ]+\\]"; Pattern DECORATORS = Pattern.compile("^(" + DATE_STAMP + ")?(" + UPTIME + ")?(" + TIME_MILLIS + ")?(" + TIME_MILLIS + ")?(" + TIME_NANOS + ")?(" + TIME_NANOS + ")?(" + PID_TID + ")?(" + PID_TID + ")?(" + UNIFIED_LOG_LEVEL_BLOCK + ")?(" + TAGS + ")?"); From aeea867829785cb28da5d0990d911672903cbeac Mon Sep 17 00:00:00 2001 From: Anthony Kiselev Date: Wed, 29 Oct 2025 13:23:51 -0500 Subject: [PATCH 4/5] Use constants to refer to decorator group indices --- .../gctoolkit/parser/jvm/Decorators.java | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java index 2dec1040..676b316d 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java @@ -64,6 +64,17 @@ public class Decorators { private static final long TWENTY_YEARS_IN_MILLIS = 731L * 24L * 60L * 60L * 1000L; private static final long TWENTY_YEARS_IN_NANO = 731L * 24L * 60L * 60L * 1000L; + private static final int DATE_STAMP_GROUP = 1; + private static final int UPTIME_GROUP = 2; + private static final int TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP = 3; + private static final int UPTIME_MILLIS_GROUP = 4; + private static final int TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP = 5; + private static final int UPTIME_NANOS_GROUP = 6; + private static final int PID_GROUP = 7; + private static final int TID_GROUP = 8; + private static final int LOG_LEVEL_GROUP = 9; + private static final int TAGS_GROUP = 10; + int numberOfDecorators; private String[] decoratorGroups; @@ -98,7 +109,7 @@ private void extractValues(String line) { public ZonedDateTime getDateStamp() { try { - String value = decoratorGroups[1]; + String value = decoratorGroups[DATE_STAMP_GROUP]; if (value != null) { TemporalAccessor temporalAccessor = formatter.parse(value.substring(1, value.length()-1)); return ZonedDateTime.from(temporalAccessor); @@ -110,7 +121,7 @@ public ZonedDateTime getDateStamp() { } public double getUpTime() { - String value = decoratorGroups[2]; + String value = decoratorGroups[UPTIME_GROUP]; if (value != null) { value = value.replace(",", "."); return Double.parseDouble(unboxValue(value, 1)); @@ -131,13 +142,13 @@ private long extractClock(int groupIndex, long threshold) { } public long getTimeMillis() { - return extractClock(3, TWENTY_YEARS_IN_MILLIS); + return extractClock(TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP, TWENTY_YEARS_IN_MILLIS); } public long getUptimeMillis() { - String value = decoratorGroups[4]; + String value = decoratorGroups[UPTIME_MILLIS_GROUP]; if (value == null) { - value = decoratorGroups[3]; + value = decoratorGroups[TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP]; } if (value != null) { long longValue = Long.parseLong(unboxValue(value, 2)); @@ -148,13 +159,13 @@ public long getUptimeMillis() { } public long getTimeNano() { - return extractClock(5, TWENTY_YEARS_IN_NANO); + return extractClock(TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP, TWENTY_YEARS_IN_NANO); } public long getUptimeNano() { - String value = decoratorGroups[6]; + String value = decoratorGroups[UPTIME_NANOS_GROUP]; if (value == null) { - value = decoratorGroups[5]; + value = decoratorGroups[TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP]; } if (value != null) { long longValue = Long.parseLong(unboxValue(value, 2)); @@ -165,7 +176,7 @@ public long getUptimeNano() { } public int getPid() { - String value = decoratorGroups[7]; + String value = decoratorGroups[PID_GROUP]; if (value != null) { return Integer.parseInt(unboxValue(value)); } @@ -173,7 +184,7 @@ public int getPid() { } public int getTid() { - String value = decoratorGroups[8]; + String value = decoratorGroups[TID_GROUP]; if (value != null) { return Integer.parseInt(unboxValue(value)); } @@ -181,7 +192,7 @@ public int getTid() { } public Optional getLogLevel() { - String level = decoratorGroups[9]; + String level = decoratorGroups[LOG_LEVEL_GROUP]; if (level != null) try { return Optional.of(UnifiedLoggingLevel.valueOf(unboxValue(level))); @@ -214,8 +225,10 @@ public boolean tagsContain(String tagList) { } public String getTags() { - if (tags == null && decoratorGroups[10] != null) { - tags = String.join(",", Arrays.asList(unboxValue(decoratorGroups[10]).trim().split(","))); + if (tags == null && decoratorGroups[TAGS_GROUP] != null) { + tags = String.join(",", Arrays.asList( + unboxValue(decoratorGroups[TAGS_GROUP]).trim().split(",") + )); } return tags; } From ca74dfd2a209043af9ea2747c76cf00f09151035 Mon Sep 17 00:00:00 2001 From: Anthony Kiselev Date: Thu, 30 Oct 2025 08:32:04 -0500 Subject: [PATCH 5/5] Use decoratorMatcher groupCount to initialize decorators array --- .../gctoolkit/parser/jvm/Decorators.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java index 676b316d..11659c39 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/jvm/Decorators.java @@ -64,16 +64,16 @@ public class Decorators { private static final long TWENTY_YEARS_IN_MILLIS = 731L * 24L * 60L * 60L * 1000L; private static final long TWENTY_YEARS_IN_NANO = 731L * 24L * 60L * 60L * 1000L; - private static final int DATE_STAMP_GROUP = 1; - private static final int UPTIME_GROUP = 2; - private static final int TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP = 3; - private static final int UPTIME_MILLIS_GROUP = 4; - private static final int TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP = 5; - private static final int UPTIME_NANOS_GROUP = 6; - private static final int PID_GROUP = 7; - private static final int TID_GROUP = 8; - private static final int LOG_LEVEL_GROUP = 9; - private static final int TAGS_GROUP = 10; + private static final int DATE_STAMP_GROUP = 0; + private static final int UPTIME_GROUP = 1; + private static final int TIME_MILLIS_OR_MAYBE_UPTIME_MILLIS_GROUP = 2; + private static final int UPTIME_MILLIS_GROUP = 3; + private static final int TIME_NANOS_OR_MAYBE_UPTIME_NANOS_GROUP = 4; + private static final int UPTIME_NANOS_GROUP = 5; + private static final int PID_GROUP = 6; + private static final int TID_GROUP = 7; + private static final int LOG_LEVEL_GROUP = 8; + private static final int TAGS_GROUP = 9; int numberOfDecorators; @@ -95,10 +95,10 @@ private void extractValues(String line) { // Retrieving a group from a matcher calls substring each time // Store all the groups in an array ahead of time to avoid paying this cost unnecessarily - decoratorGroups = new String[11]; + decoratorGroups = new String[decoratorMatcher.groupCount()]; for (int i = 1; i <= decoratorMatcher.groupCount(); i++) { String group = decoratorMatcher.group(i); - decoratorGroups[i] = group; + decoratorGroups[i-1] = group; if (group != null) numberOfDecorators++; }