From 99369e9ea64a079e4da9e96dd74dcbd117e675b7 Mon Sep 17 00:00:00 2001 From: khope Date: Tue, 10 Feb 2026 18:07:55 +0900 Subject: [PATCH] feat: add flame graph, CLI monitor, OTLP export, and install script - Flame graph: d3-flamegraph visualization with 60s auto-reset window, pause/resume, server-side reset via /flame-graph?reset=true - CLI (argus top): htop-style terminal monitor in new argus-cli module, zero external dependencies, parallel HTTP polling - OTLP: push-based metrics export to OpenTelemetry collectors, hand-coded JSON (no SDK), configurable via -Dargus.otlp.* properties - install.sh: one-line installer with PATH setup for zsh/bash/fish - Fix CDN URL (d3-flamegraph -> d3-flame-graph package name) - Fix demo duration passthrough in Gradle tasks (default 5min) - Update README with new features, installation, and configuration Co-Authored-By: Claude Opus 4.6 --- README.md | 161 +++++++--- argus-cli/build.gradle.kts | 29 ++ .../main/java/io/argus/cli/ArgusClient.java | 290 ++++++++++++++++++ .../src/main/java/io/argus/cli/ArgusTop.java | 104 +++++++ .../java/io/argus/cli/MetricsSnapshot.java | 62 ++++ .../java/io/argus/cli/TerminalRenderer.java | 240 +++++++++++++++ .../io/argus/core/config/AgentConfig.java | 94 +++++- .../src/main/resources/public/css/style.css | 56 ++++ .../src/main/resources/public/index.html | 20 ++ .../src/main/resources/public/js/app.js | 105 ++++++- .../java/io/argus/server/ArgusServer.java | 35 ++- .../server/analysis/FlameGraphAnalyzer.java | 215 +++++++++++++ .../server/handler/ArgusChannelHandler.java | 45 ++- .../argus/server/http/HttpResponseHelper.java | 4 + .../argus/server/metrics/OtlpJsonBuilder.java | 261 ++++++++++++++++ .../server/metrics/OtlpMetricsExporter.java | 102 ++++++ .../server/websocket/EventBroadcaster.java | 8 + install.sh | 171 +++++++++++ .../build.gradle.kts | 30 +- settings.gradle.kts | 1 + 20 files changed, 1958 insertions(+), 75 deletions(-) create mode 100644 argus-cli/build.gradle.kts create mode 100644 argus-cli/src/main/java/io/argus/cli/ArgusClient.java create mode 100644 argus-cli/src/main/java/io/argus/cli/ArgusTop.java create mode 100644 argus-cli/src/main/java/io/argus/cli/MetricsSnapshot.java create mode 100644 argus-cli/src/main/java/io/argus/cli/TerminalRenderer.java create mode 100644 argus-server/src/main/java/io/argus/server/analysis/FlameGraphAnalyzer.java create mode 100644 argus-server/src/main/java/io/argus/server/metrics/OtlpJsonBuilder.java create mode 100644 argus-server/src/main/java/io/argus/server/metrics/OtlpMetricsExporter.java create mode 100755 install.sh diff --git a/README.md b/README.md index a6d7342..0a87f1a 100644 --- a/README.md +++ b/README.md @@ -8,37 +8,47 @@ Inspired by **Argus Panoptes** from Greek mythology - the giant with a hundred eyes who never slept and watched over everything - this project observes and analyzes all Virtual Threads in the JVM in real-time. -A next-generation real-time visualization profiler for JVM 21+ environments, focusing on Virtual Threads (Project Loom) monitoring and memory analysis. +A lightweight, zero-dependency JVM monitoring tool for Java 21+ environments. Real-time dashboard, terminal CLI, flame graphs, and OpenTelemetry export — all powered by JDK Flight Recorder. ## Features +### Real-time Dashboard +- **Interactive Charts**: WebSocket-based streaming with Chart.js visualizations +- **Flame Graph**: Continuous profiling visualization with d3-flamegraph (zoom, hover, export) +- **Dual Tabs**: Virtual Threads tab + JVM Overview tab + +### CLI Monitor (`argus top`) +- **htop-style Terminal UI**: CPU, heap, GC, virtual threads at a glance +- **ANSI Color Coding**: Green/yellow/red thresholds for instant status +- **Zero Dependencies**: Standalone JAR, connects to any running Argus server + ### Virtual Thread Monitoring - **Thread Lifecycle**: Track creation, termination, and pinning of virtual threads - **Pinning Detection**: Identify pinned threads with detailed stack traces -- **Real-time State Tracking**: Monitor running, pinned, and ended thread states +- **Carrier Thread Analysis**: Per-carrier virtual thread distribution ### Memory & GC Monitoring - **GC Events**: Real-time garbage collection tracking with pause time analysis - **Heap Usage**: Before/after heap visualization with trend analysis - **Allocation Rate**: Track object allocation rate and top allocating classes - **Metaspace Monitoring**: Monitor metaspace usage and growth rate -- **GC Overhead**: Calculate GC overhead percentage with warnings -### CPU & Performance Monitoring -- **CPU Utilization**: JVM and system CPU tracking with history +### CPU & Profiling +- **CPU Utilization**: JVM and system CPU tracking with 60s history - **Method Profiling**: Hot method detection via execution sampling +- **Flame Graph**: Interactive flame graph from continuous profiling data - **Lock Contention**: Monitor thread contention and lock wait times -### Correlation Analysis -- **GC ↔ CPU Correlation**: Detect CPU spikes related to GC events -- **GC ↔ Pinning Correlation**: Identify pinning increases during GC -- **Automatic Recommendations**: Get actionable insights based on metrics +### Observability Export +- **Prometheus**: `/prometheus` endpoint for scraping +- **OTLP Export**: Push metrics to OpenTelemetry collectors (hand-coded, no SDK) +- **Data Export**: Export events in CSV, JSON, or JSONL formats -### Core Features +### Core Architecture - **JFR Streaming**: Low-overhead event collection using JDK Flight Recorder -- **Real-time Dashboard**: WebSocket-based streaming with interactive charts -- **Lock-free Architecture**: High-performance ring buffer for event collection -- **Data Export**: Export events in CSV, JSON, or JSONL formats +- **Lock-free Ring Buffer**: High-performance event collection +- **Zero External Dependencies**: Only Netty for HTTP server (no Jackson, no Gson, no OTEL SDK) +- **Correlation Analysis**: Cross-metric correlation with automatic recommendations ## Requirements @@ -47,48 +57,93 @@ A next-generation real-time visualization profiler for JVM 21+ environments, foc ## Installation -### Option 1: Download via curl (Recommended) +### Option 1: One-line Install (Recommended) ```bash -# Download the latest agent JAR -curl -LO https://github.com/rlaope/argus/releases/latest/download/argus-agent.jar +curl -fsSL https://raw.githubusercontent.com/rlaope/argus/master/install.sh | bash +``` -# Or download a specific version -curl -LO https://github.com/rlaope/argus/releases/download/v0.1.0/argus-agent.jar +This downloads the agent + CLI, installs to `~/.argus/`, and adds the `argus` command to your PATH. + +```bash +# Install a specific version +curl -fsSL https://raw.githubusercontent.com/rlaope/argus/master/install.sh | bash -s -- v0.3.0 ``` -### Option 2: Build from Source +After installation, restart your terminal or run `source ~/.zshrc` (or `~/.bashrc`). + +### Option 2: Manual Download + +```bash +# Download JARs from GitHub Releases +curl -LO https://github.com/rlaope/argus/releases/latest/download/argus-agent-0.3.0.jar +curl -LO https://github.com/rlaope/argus/releases/latest/download/argus-cli-0.3.0-all.jar + +# Run the CLI directly +java -jar argus-cli-0.3.0-all.jar +``` + +### Option 3: Build from Source ```bash git clone https://github.com/rlaope/argus.git cd argus ./gradlew build +./gradlew :argus-cli:fatJar -# JARs are located at: -# argus-agent/build/libs/argus-agent-x.x.x-SNAPSHOT.jar -# argus-server/build/libs/argus-server-x.x.x-SNAPSHOT.jar +# JARs: +# argus-agent/build/libs/argus-agent-0.3.0.jar +# argus-cli/build/libs/argus-cli-0.3.0-all.jar ``` ## Quick Start -### Run with Java Agent +### 1. Attach Argus to Your App ```bash -java -javaagent:argus-agent.jar \ - --enable-preview \ +java -javaagent:$(argus-agent --path) \ -jar your-application.jar + +# Or with the JAR path directly +java -javaagent:~/.argus/argus-agent.jar \ + -jar your-application.jar +``` + +### 2. Open the Dashboard + +``` +http://localhost:9202/ ``` -### With Built-in Dashboard Server +### 3. Use the CLI Monitor ```bash -java -javaagent:argus-agent.jar \ - -Dargus.server.enabled=true \ - --enable-preview \ +# Connect to local Argus server +argus + +# Custom host/port and refresh interval +argus --host 192.168.1.100 --port 9202 --interval 2 + +# Disable colors (for piping/logging) +argus --no-color +``` + +### 4. Enable Profiling & Flame Graph + +```bash +java -javaagent:~/.argus/argus-agent.jar \ + -Dargus.profiling.enabled=true \ + -Dargus.contention.enabled=true \ -jar your-application.jar +``` -# Open dashboard: http://localhost:9202/ -# View metrics: curl http://localhost:9202/metrics +### 5. Export Metrics to OpenTelemetry + +```bash +java -javaagent:~/.argus/argus-agent.jar \ + -Dargus.otlp.enabled=true \ + -Dargus.otlp.endpoint=http://localhost:4318/v1/metrics \ + -jar your-application.jar ``` ### Configuration @@ -111,30 +166,37 @@ The agent accepts the following system properties: | `argus.contention.enabled` | `false` | Enable lock contention tracking | | `argus.contention.threshold` | `50` | Minimum contention duration (ms) | | `argus.correlation.enabled` | `true` | Enable correlation analysis | - -See [Configuration Guide](docs/configuration.md) for detailed documentation. +| `argus.otlp.enabled` | `false` | Enable OTLP metrics export | +| `argus.otlp.endpoint` | `http://localhost:4318/v1/metrics` | OTLP collector endpoint | +| `argus.otlp.interval` | `15000` | OTLP push interval in milliseconds | +| `argus.otlp.headers` | *(empty)* | Auth headers (`key=val,key=val`) | +| `argus.otlp.service.name` | `argus` | OTLP resource service name | ## Architecture ``` ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ argus-agent │───▶│ argus-core │◀───│ argus-server │ -│ (JFR Stream) │ │ (Ring Buffer) │ │ (WebSocket) │ +│ (JFR Stream) │ │ (Config/Buffer) │ │ (Netty/Analysis)│ └─────────────────┘ └─────────────────┘ └─────────────────┘ - │ │ - │ JFR Events │ - ▼ ▼ -┌─────────────────┐ ┌─────────────────┐ -│ Target JVM │ │ Frontend │ -│ (Virtual Threads)│ │ (Visualization)│ -└─────────────────┘ └─────────────────┘ + │ ▲ │ + │ │ ▼ + ▼ ┌──────┴──────┐ ┌─────────────────┐ +┌─────────────────┐ │ argus-cli │ │ argus-frontend │ +│ Target JVM │ │ (argus top) │ │ (Dashboard UI) │ +└─────────────────┘ └─────────────┘ └─────────────────┘ + │ │ + HTTP Polling WebSocket + + (10 endpoints) Flame Graph ``` ## Modules -- **argus-core**: Core event models, ring buffer, and serialization -- **argus-agent**: Java agent with JFR streaming engine -- **argus-server**: WebSocket server for event streaming +- **argus-core**: Shared config, event models, ring buffer +- **argus-agent**: Java agent entry point with JFR streaming engine +- **argus-server**: Netty HTTP/WebSocket server, 10 analyzers, Prometheus + OTLP export +- **argus-frontend**: Static HTML/JS dashboard with Chart.js and d3-flamegraph +- **argus-cli**: Standalone terminal monitor (`argus top`), zero external dependencies ## JFR Events Captured @@ -172,6 +234,17 @@ See [Configuration Guide](docs/configuration.md) for detailed documentation. | `/method-profiling` | Hot methods (Top 20) | | `/contention-analysis` | Lock contention hotspots | | `/correlation` | Correlation analysis and recommendations | +| `/flame-graph` | Flame graph data (JSON or `?format=collapsed`) | +| `/prometheus` | Prometheus metrics endpoint | +| `/carrier-threads` | Carrier thread distribution | +| `/active-threads` | Currently active virtual threads | + +## Uninstall + +```bash +rm -rf ~/.argus +# Then remove the PATH line from ~/.zshrc or ~/.bashrc +``` ## Contributing diff --git a/argus-cli/build.gradle.kts b/argus-cli/build.gradle.kts new file mode 100644 index 0000000..3f2e035 --- /dev/null +++ b/argus-cli/build.gradle.kts @@ -0,0 +1,29 @@ +plugins { + id("java") + id("application") +} + +application { + mainClass.set("io.argus.cli.ArgusTop") +} + +dependencies { + implementation(project(":argus-core")) +} + +tasks.jar { + manifest { + attributes["Main-Class"] = "io.argus.cli.ArgusTop" + } +} + +// Fat JAR for standalone execution +tasks.register("fatJar") { + archiveClassifier.set("all") + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + manifest { + attributes["Main-Class"] = "io.argus.cli.ArgusTop" + } + from(configurations.runtimeClasspath.get().map { if (it.isDirectory) it else zipTree(it) }) + with(tasks.jar.get()) +} diff --git a/argus-cli/src/main/java/io/argus/cli/ArgusClient.java b/argus-cli/src/main/java/io/argus/cli/ArgusClient.java new file mode 100644 index 0000000..908395c --- /dev/null +++ b/argus-cli/src/main/java/io/argus/cli/ArgusClient.java @@ -0,0 +1,290 @@ +package io.argus.cli; + +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +/** + * HTTP client that polls Argus server endpoints for metrics data. + */ +public final class ArgusClient { + + private final String baseUrl; + private final HttpClient httpClient; + + public ArgusClient(String host, int port) { + this.baseUrl = "http://" + host + ":" + port; + this.httpClient = HttpClient.newBuilder() + .connectTimeout(Duration.ofSeconds(5)) + .build(); + } + + /** + * Fetches all metrics from the server in parallel. + * + * @return a complete metrics snapshot + */ + public MetricsSnapshot fetchAll() { + try { + // Fetch all endpoints in parallel + var healthFuture = fetchAsync("/health"); + var metricsFuture = fetchAsync("/metrics"); + var cpuFuture = fetchAsync("/cpu-metrics"); + var gcFuture = fetchAsync("/gc-analysis"); + var activeFuture = fetchAsync("/active-threads"); + var pinningFuture = fetchAsync("/pinning-analysis"); + var carrierFuture = fetchAsync("/carrier-threads"); + var profilingFuture = fetchAsync("/method-profiling"); + var metaspaceFuture = fetchAsync("/metaspace-metrics"); + var contentionFuture = fetchAsync("/contention-analysis"); + + // Wait for all + CompletableFuture.allOf(healthFuture, metricsFuture, cpuFuture, gcFuture, + activeFuture, pinningFuture, carrierFuture, profilingFuture, + metaspaceFuture, contentionFuture).join(); + + String health = healthFuture.join(); + String metrics = metricsFuture.join(); + String cpu = cpuFuture.join(); + String gc = gcFuture.join(); + String active = activeFuture.join(); + String pinning = pinningFuture.join(); + String carrier = carrierFuture.join(); + String profiling = profilingFuture.join(); + String metaspace = metaspaceFuture.join(); + String contention = contentionFuture.join(); + + return buildSnapshot(health, metrics, cpu, gc, active, pinning, + carrier, profiling, metaspace, contention); + } catch (Exception e) { + return MetricsSnapshot.disconnected(); + } + } + + private CompletableFuture fetchAsync(String path) { + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create(baseUrl + path)) + .timeout(Duration.ofSeconds(3)) + .GET() + .build(); + + return httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) + .thenApply(resp -> resp.statusCode() == 200 ? resp.body() : "") + .exceptionally(e -> ""); + } + + private MetricsSnapshot buildSnapshot(String health, String metrics, String cpu, + String gc, String active, String pinning, + String carrier, String profiling, + String metaspace, String contention) { + int clientCount = jsonInt(health, "clients"); + + long totalEvents = jsonLong(metrics, "totalEvents"); + long startEvents = jsonLong(metrics, "startEvents"); + long endEvents = jsonLong(metrics, "endEvents"); + long pinnedEvents = jsonLong(metrics, "pinnedEvents"); + int activeThreads = countJsonArrayElements(active); + + double cpuJvm = jsonDouble(cpu, "currentJvmPercent"); + double cpuMachine = jsonDouble(cpu, "currentMachinePercent"); + double cpuPeak = jsonDouble(cpu, "peakJvmTotal") * 100; + + long gcTotal = jsonLong(gc, "totalGCEvents"); + double gcPause = jsonDouble(gc, "totalPauseTimeMs"); + double gcOverhead = jsonDouble(gc, "gcOverheadPercent"); + long heapUsed = jsonLong(gc, "currentHeapUsed"); + long heapCommitted = jsonLong(gc, "currentHeapCommitted"); + + double metaUsed = jsonDouble(metaspace, "currentUsedMB"); + long classes = jsonLong(metaspace, "currentClassCount"); + + int carriers = jsonInt(carrier, "totalCarriers"); + double avgVt = jsonDouble(carrier, "avgVirtualThreadsPerCarrier"); + + long totalPinned = jsonLong(pinning, "totalPinnedEvents"); + int uniqueStacks = jsonInt(pinning, "uniqueStackTraces"); + + long samples = jsonLong(profiling, "totalSamples"); + List hotMethods = parseHotMethods(profiling); + + long contentionEvts = jsonLong(contention, "totalContentionEvents"); + double contentionTime = jsonDouble(contention, "totalContentionTimeMs"); + List hotspots = parseContentionHotspots(contention); + + return new MetricsSnapshot(true, clientCount, totalEvents, startEvents, + endEvents, pinnedEvents, activeThreads, cpuJvm, cpuMachine, cpuPeak, + gcTotal, gcPause, gcOverhead, heapUsed, heapCommitted, + metaUsed, classes, carriers, avgVt, totalPinned, uniqueStacks, + samples, hotMethods, contentionEvts, contentionTime, hotspots); + } + + // Simple JSON value parsers (no external library) + + private static long jsonLong(String json, String key) { + String val = extractJsonValue(json, key); + if (val == null || val.isEmpty()) return 0; + try { + return Long.parseLong(val); + } catch (NumberFormatException e) { + return 0; + } + } + + private static int jsonInt(String json, String key) { + return (int) jsonLong(json, key); + } + + private static double jsonDouble(String json, String key) { + String val = extractJsonValue(json, key); + if (val == null || val.isEmpty()) return 0.0; + try { + return Double.parseDouble(val); + } catch (NumberFormatException e) { + return 0.0; + } + } + + private static String extractJsonValue(String json, String key) { + if (json == null || json.isEmpty()) return null; + String pattern = "\"" + key + "\":"; + int idx = json.indexOf(pattern); + if (idx < 0) return null; + int start = idx + pattern.length(); + if (start >= json.length()) return null; + + char first = json.charAt(start); + if (first == '"') { + int end = json.indexOf('"', start + 1); + return end > start ? json.substring(start + 1, end) : null; + } + + int end = start; + while (end < json.length()) { + char c = json.charAt(end); + if (c == ',' || c == '}' || c == ']') break; + end++; + } + return json.substring(start, end).trim(); + } + + private static int countJsonArrayElements(String json) { + if (json == null || json.isEmpty() || "[]".equals(json.trim())) return 0; + int count = 0; + boolean inString = false; + int depth = 0; + for (int i = 0; i < json.length(); i++) { + char c = json.charAt(i); + if (c == '"' && (i == 0 || json.charAt(i - 1) != '\\')) { + inString = !inString; + } else if (!inString) { + if (c == '{') { + if (depth == 1) count++; + depth++; + } else if (c == '}') { + depth--; + } else if (c == '[') { + depth++; + } else if (c == ']') { + depth--; + } + } + } + return count; + } + + private static List parseHotMethods(String json) { + List methods = new ArrayList<>(); + if (json == null || json.isEmpty()) return methods; + + int idx = json.indexOf("\"topMethods\":["); + if (idx < 0) return methods; + + String section = json.substring(idx); + int arrEnd = findMatchingBracket(section, section.indexOf('[')); + if (arrEnd < 0) return methods; + String arr = section.substring(section.indexOf('['), arrEnd + 1); + + int pos = 0; + while (pos < arr.length() && methods.size() < 5) { + int objStart = arr.indexOf('{', pos); + if (objStart < 0) break; + int objEnd = arr.indexOf('}', objStart); + if (objEnd < 0) break; + String obj = arr.substring(objStart, objEnd + 1); + + String className = extractJsonStringValue(obj, "className"); + String methodName = extractJsonStringValue(obj, "methodName"); + double pct = jsonDouble(obj, "percentage"); + + if (className != null && methodName != null) { + methods.add(new MetricsSnapshot.HotMethodInfo(className, methodName, pct)); + } + pos = objEnd + 1; + } + return methods; + } + + private static List parseContentionHotspots(String json) { + List hotspots = new ArrayList<>(); + if (json == null || json.isEmpty()) return hotspots; + + int idx = json.indexOf("\"hotspots\":["); + if (idx < 0) return hotspots; + + String section = json.substring(idx); + int arrEnd = findMatchingBracket(section, section.indexOf('[')); + if (arrEnd < 0) return hotspots; + String arr = section.substring(section.indexOf('['), arrEnd + 1); + + int pos = 0; + while (pos < arr.length() && hotspots.size() < 3) { + int objStart = arr.indexOf('{', pos); + if (objStart < 0) break; + int objEnd = arr.indexOf('}', objStart); + if (objEnd < 0) break; + String obj = arr.substring(objStart, objEnd + 1); + + String monitor = extractJsonStringValue(obj, "monitorClass"); + long count = jsonLong(obj, "eventCount"); + + if (monitor != null) { + hotspots.add(new MetricsSnapshot.ContentionHotspot(monitor, count)); + } + pos = objEnd + 1; + } + return hotspots; + } + + private static String extractJsonStringValue(String json, String key) { + String pattern = "\"" + key + "\":\""; + int idx = json.indexOf(pattern); + if (idx < 0) return null; + int start = idx + pattern.length(); + int end = json.indexOf('"', start); + return end > start ? json.substring(start, end) : null; + } + + private static int findMatchingBracket(String s, int openIdx) { + if (openIdx < 0) return -1; + int depth = 0; + boolean inString = false; + for (int i = openIdx; i < s.length(); i++) { + char c = s.charAt(i); + if (c == '"' && (i == 0 || s.charAt(i - 1) != '\\')) { + inString = !inString; + } else if (!inString) { + if (c == '[') depth++; + else if (c == ']') { + depth--; + if (depth == 0) return i; + } + } + } + return -1; + } +} diff --git a/argus-cli/src/main/java/io/argus/cli/ArgusTop.java b/argus-cli/src/main/java/io/argus/cli/ArgusTop.java new file mode 100644 index 0000000..546f68d --- /dev/null +++ b/argus-cli/src/main/java/io/argus/cli/ArgusTop.java @@ -0,0 +1,104 @@ +package io.argus.cli; + +import java.io.IOException; + +/** + * Terminal-based JVM monitoring tool for Argus. + * + *

Connects to a running Argus server via HTTP and displays + * real-time metrics in an htop-like terminal interface. + * + *

Usage: {@code java -jar argus-cli.jar [--host HOST] [--port PORT] [--interval SECS] [--no-color]} + */ +public final class ArgusTop { + + private static final String DEFAULT_HOST = "localhost"; + private static final int DEFAULT_PORT = 9202; + private static final int DEFAULT_INTERVAL = 1; + + public static void main(String[] args) { + String host = DEFAULT_HOST; + int port = DEFAULT_PORT; + int interval = DEFAULT_INTERVAL; + boolean color = true; + + // Parse arguments + for (int i = 0; i < args.length; i++) { + switch (args[i]) { + case "--host", "-h" -> { + if (i + 1 < args.length) host = args[++i]; + } + case "--port", "-p" -> { + if (i + 1 < args.length) port = Integer.parseInt(args[++i]); + } + case "--interval", "-i" -> { + if (i + 1 < args.length) interval = Integer.parseInt(args[++i]); + } + case "--no-color" -> color = false; + case "--help" -> { + printUsage(); + return; + } + } + } + + ArgusClient client = new ArgusClient(host, port); + TerminalRenderer renderer = new TerminalRenderer(color, host, port, interval); + + // Shutdown hook for clean exit + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + System.out.print("\033[?25h"); // Show cursor + System.out.println("\nArgus CLI stopped."); + })); + + // Hide cursor + System.out.print("\033[?25l"); + + System.out.println("Connecting to Argus server at " + host + ":" + port + "..."); + + // Main loop + final long intervalMs = interval * 1000L; + while (!Thread.currentThread().isInterrupted()) { + MetricsSnapshot snapshot = client.fetchAll(); + renderer.render(snapshot); + + try { + // Check for 'q' key (non-blocking stdin check) + if (System.in.available() > 0) { + int ch = System.in.read(); + if (ch == 'q' || ch == 'Q') { + break; + } + } + Thread.sleep(intervalMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } catch (IOException e) { + // Ignore stdin errors + } + } + + System.out.print("\033[?25h"); // Show cursor + } + + private static void printUsage() { + System.out.println(""" + Argus Top - Terminal JVM Monitor + + Usage: argus-top [OPTIONS] + + Options: + --host, -h HOST Server host (default: localhost) + --port, -p PORT Server port (default: 9202) + --interval, -i SECS Refresh interval in seconds (default: 1) + --no-color Disable ANSI colors + --help Show this help + + Examples: + argus-top + argus-top --port 9202 --interval 2 + argus-top --host 192.168.1.100 --port 9202 --no-color + """); + } +} diff --git a/argus-cli/src/main/java/io/argus/cli/MetricsSnapshot.java b/argus-cli/src/main/java/io/argus/cli/MetricsSnapshot.java new file mode 100644 index 0000000..c54b0c4 --- /dev/null +++ b/argus-cli/src/main/java/io/argus/cli/MetricsSnapshot.java @@ -0,0 +1,62 @@ +package io.argus.cli; + +import java.util.List; +import java.util.Map; + +/** + * Immutable snapshot of all metrics from one poll cycle. + */ +public record MetricsSnapshot( + // Connection + boolean connected, + int clientCount, + + // Basic metrics + long totalEvents, + long startEvents, + long endEvents, + long pinnedEvents, + int activeThreads, + + // CPU + double cpuJvmPercent, + double cpuMachinePercent, + double cpuPeakJvm, + + // GC + long gcTotalEvents, + double gcTotalPauseMs, + double gcOverheadPercent, + long heapUsedBytes, + long heapCommittedBytes, + + // Metaspace + double metaspaceUsedMB, + long classCount, + + // Carrier threads + int carrierCount, + double avgVtPerCarrier, + + // Pinning + long totalPinnedEvents, + int uniquePinningStacks, + + // Profiling + long profilingSamples, + List hotMethods, + + // Contention + long contentionEvents, + double contentionTimeMs, + List contentionHotspots +) { + public static MetricsSnapshot disconnected() { + return new MetricsSnapshot(false, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, List.of(), 0, 0, List.of()); + } + + public record HotMethodInfo(String className, String methodName, double percentage) {} + public record ContentionHotspot(String monitorClass, long eventCount) {} +} diff --git a/argus-cli/src/main/java/io/argus/cli/TerminalRenderer.java b/argus-cli/src/main/java/io/argus/cli/TerminalRenderer.java new file mode 100644 index 0000000..57c05a2 --- /dev/null +++ b/argus-cli/src/main/java/io/argus/cli/TerminalRenderer.java @@ -0,0 +1,240 @@ +package io.argus.cli; + +/** + * Renders metrics data to the terminal using ANSI escape codes. + */ +public final class TerminalRenderer { + + private static final String RESET = "\033[0m"; + private static final String BOLD = "\033[1m"; + private static final String DIM = "\033[2m"; + private static final String GREEN = "\033[32m"; + private static final String YELLOW = "\033[33m"; + private static final String RED = "\033[31m"; + private static final String CYAN = "\033[36m"; + private static final String WHITE = "\033[37m"; + private static final String BG_RED = "\033[41m"; + private static final String CLEAR_SCREEN = "\033[2J\033[H"; + + private final boolean useColor; + private final String host; + private final int port; + private final int intervalSec; + private long startTime; + + public TerminalRenderer(boolean useColor, String host, int port, int intervalSec) { + this.useColor = useColor; + this.host = host; + this.port = port; + this.intervalSec = intervalSec; + this.startTime = System.currentTimeMillis(); + } + + /** + * Renders a full metrics snapshot to the terminal. + */ + public void render(MetricsSnapshot snap) { + StringBuilder sb = new StringBuilder(2048); + sb.append(CLEAR_SCREEN); + + if (!snap.connected()) { + renderDisconnected(sb); + System.out.print(sb); + System.out.flush(); + return; + } + + renderHeader(sb); + renderSeparator(sb); + renderSystemMetrics(sb, snap); + renderSeparator(sb); + renderVirtualThreads(sb, snap); + renderSeparator(sb); + renderProfilingContention(sb, snap); + renderSeparator(sb); + renderFooter(sb); + + System.out.print(sb); + System.out.flush(); + } + + private void renderDisconnected(StringBuilder sb) { + renderHeader(sb); + sb.append('\n'); + sb.append(color(RED, BOLD)).append(" CONNECTION FAILED") + .append(color(RESET)).append('\n'); + sb.append(color(DIM)).append(" Cannot connect to Argus server at ") + .append(host).append(':').append(port).append(color(RESET)).append('\n'); + sb.append(color(DIM)).append(" Retrying every ").append(intervalSec) + .append("s...").append(color(RESET)).append('\n'); + } + + private void renderHeader(StringBuilder sb) { + String uptime = formatUptime(System.currentTimeMillis() - startTime); + sb.append(color(BOLD, CYAN)).append(" Argus JVM Monitor") + .append(color(RESET, DIM)).append(" | ") + .append(host).append(':').append(port) + .append(" | uptime ").append(uptime) + .append(" | refresh ").append(intervalSec).append('s') + .append(color(RESET)).append('\n'); + } + + private void renderSeparator(StringBuilder sb) { + sb.append(color(DIM)).append(" ").append("─".repeat(65)) + .append(color(RESET)).append('\n'); + } + + private void renderSystemMetrics(StringBuilder sb, MetricsSnapshot snap) { + // CPU + sb.append(" CPU "); + appendProgressBar(sb, snap.cpuJvmPercent(), 16); + sb.append(colorByThreshold(snap.cpuJvmPercent(), 70, 90)) + .append(String.format(" %5.1f%%", snap.cpuJvmPercent())) + .append(color(RESET)).append(" JVM | ") + .append(String.format("%5.1f%%", snap.cpuMachinePercent())) + .append(" Machine\n"); + + // Heap + double heapUsedMB = snap.heapUsedBytes() / (1024.0 * 1024.0); + double heapCommitMB = snap.heapCommittedBytes() / (1024.0 * 1024.0); + double heapPct = heapCommitMB > 0 ? (heapUsedMB / heapCommitMB) * 100 : 0; + sb.append(" Heap "); + appendProgressBar(sb, heapPct, 16); + sb.append(colorByThreshold(heapPct, 70, 90)) + .append(String.format(" %s/%s", formatBytes(snap.heapUsedBytes()), formatBytes(snap.heapCommittedBytes()))) + .append(color(RESET)); + sb.append(" | GC: ") + .append(colorByThreshold(snap.gcOverheadPercent(), 2, 5)) + .append(String.format("%.1f%%", snap.gcOverheadPercent())) + .append(color(RESET)).append(" overhead\n"); + + // Metaspace + GC + sb.append(" Meta ") + .append(String.format("%.0fMB", snap.metaspaceUsedMB())) + .append(" (").append(formatNumber(snap.classCount())).append(" classes)") + .append(" | GC events: ") + .append(formatNumber(snap.gcTotalEvents())) + .append(" (").append(String.format("%.0fms", snap.gcTotalPauseMs())).append(" total)\n"); + } + + private void renderVirtualThreads(StringBuilder sb, MetricsSnapshot snap) { + sb.append(" Virtual Threads: ") + .append(color(BOLD)).append(formatNumber(snap.activeThreads())) + .append(color(RESET)).append(" active | ") + .append(formatNumber(snap.startEvents())).append(" total"); + + if (snap.totalPinnedEvents() > 0) { + sb.append(" | ").append(color(YELLOW)) + .append(formatNumber(snap.totalPinnedEvents())).append(" pinned") + .append(color(RESET)) + .append(" (").append(snap.uniquePinningStacks()).append(" stacks)"); + } + sb.append('\n'); + + sb.append(" Carriers: ") + .append(snap.carrierCount()).append(" threads") + .append(" | avg ").append(String.format("%.1f", snap.avgVtPerCarrier())) + .append(" VT/carrier\n"); + } + + private void renderProfilingContention(StringBuilder sb, MetricsSnapshot snap) { + // Hot Methods column + if (snap.profilingSamples() > 0 && !snap.hotMethods().isEmpty()) { + sb.append(color(BOLD)).append(" Hot Methods") + .append(color(RESET, DIM)) + .append(" (").append(formatNumber(snap.profilingSamples())).append(" samples)") + .append(color(RESET)).append('\n'); + + for (var m : snap.hotMethods()) { + String shortClass = shortenClassName(m.className()); + sb.append(colorByThreshold(m.percentage(), 30, 60)) + .append(String.format(" %5.1f%%", m.percentage())) + .append(color(RESET)).append(' ') + .append(shortClass).append('.').append(m.methodName()) + .append('\n'); + } + } else { + sb.append(color(DIM)).append(" Profiling: disabled") + .append(color(RESET)).append('\n'); + } + + // Contention + if (snap.contentionEvents() > 0 && !snap.contentionHotspots().isEmpty()) { + sb.append(color(BOLD)).append(" Contention") + .append(color(RESET, DIM)) + .append(" (").append(formatNumber(snap.contentionEvents())).append(" events, ") + .append(String.format("%.0fms", snap.contentionTimeMs())).append(")") + .append(color(RESET)).append('\n'); + + for (var h : snap.contentionHotspots()) { + sb.append(" ").append(shortenClassName(h.monitorClass())) + .append(" (").append(h.eventCount()).append(" events)\n"); + } + } + } + + private void renderFooter(StringBuilder sb) { + sb.append(color(DIM)) + .append(" q: quit | Ctrl+C: exit") + .append(color(RESET)).append('\n'); + } + + // --- Helpers --- + + private void appendProgressBar(StringBuilder sb, double percent, int width) { + int filled = (int) Math.round(percent / 100.0 * width); + filled = Math.max(0, Math.min(width, filled)); + + sb.append('['); + sb.append(colorByThreshold(percent, 70, 90)); + for (int i = 0; i < width; i++) { + sb.append(i < filled ? '\u2588' : '\u2591'); + } + sb.append(color(RESET)); + sb.append(']'); + } + + private String colorByThreshold(double value, double warn, double crit) { + if (!useColor) return ""; + if (value >= crit) return RED; + if (value >= warn) return YELLOW; + return GREEN; + } + + private String color(String... codes) { + if (!useColor) return ""; + return String.join("", codes); + } + + private static String formatUptime(long ms) { + long sec = ms / 1000; + if (sec < 60) return sec + "s"; + long min = sec / 60; + sec %= 60; + if (min < 60) return min + "m " + sec + "s"; + long hr = min / 60; + min %= 60; + return hr + "h " + min + "m"; + } + + private static String formatBytes(long bytes) { + if (bytes <= 0) return "0B"; + double mb = bytes / (1024.0 * 1024.0); + if (mb >= 1024) return String.format("%.1fG", mb / 1024.0); + return String.format("%.0fM", mb); + } + + private static String formatNumber(long n) { + if (n < 1000) return String.valueOf(n); + if (n < 1_000_000) return String.format("%.1fK", n / 1000.0); + return String.format("%.1fM", n / 1_000_000.0); + } + + private static String shortenClassName(String className) { + if (className == null) return "?"; + // Keep last 2 segments: "io.argus.server.ArgusServer" -> "server.ArgusServer" + String[] parts = className.split("\\."); + if (parts.length <= 2) return className; + return parts[parts.length - 2] + "." + parts[parts.length - 1]; + } +} diff --git a/argus-core/src/main/java/io/argus/core/config/AgentConfig.java b/argus-core/src/main/java/io/argus/core/config/AgentConfig.java index 42f8080..3f12585 100644 --- a/argus-core/src/main/java/io/argus/core/config/AgentConfig.java +++ b/argus-core/src/main/java/io/argus/core/config/AgentConfig.java @@ -23,6 +23,11 @@ *

  • {@code argus.contention.threshold} - Minimum contention time to track in ms (default: 50)
  • *
  • {@code argus.correlation.enabled} - Enable correlation analysis (default: true)
  • *
  • {@code argus.metrics.prometheus.enabled} - Enable Prometheus metrics endpoint (default: true)
  • + *
  • {@code argus.otlp.enabled} - Enable OTLP metrics push export (default: false)
  • + *
  • {@code argus.otlp.endpoint} - OTLP collector endpoint (default: http://localhost:4318/v1/metrics)
  • + *
  • {@code argus.otlp.interval} - OTLP push interval in ms (default: 15000)
  • + *
  • {@code argus.otlp.headers} - OTLP auth headers as key=val,key=val (default: empty)
  • + *
  • {@code argus.otlp.service.name} - OTLP resource service name (default: argus)
  • * */ public final class AgentConfig { @@ -42,6 +47,11 @@ public final class AgentConfig { private static final int DEFAULT_CONTENTION_THRESHOLD_MS = 50; // Higher threshold for less noise private static final boolean DEFAULT_CORRELATION_ENABLED = true; private static final boolean DEFAULT_PROMETHEUS_ENABLED = true; + private static final boolean DEFAULT_OTLP_ENABLED = false; + private static final String DEFAULT_OTLP_ENDPOINT = "http://localhost:4318/v1/metrics"; + private static final int DEFAULT_OTLP_INTERVAL_MS = 15000; + private static final String DEFAULT_OTLP_HEADERS = ""; + private static final String DEFAULT_OTLP_SERVICE_NAME = "argus"; private final int bufferSize; private final int serverPort; @@ -58,6 +68,11 @@ public final class AgentConfig { private final int contentionThresholdMs; private final boolean correlationEnabled; private final boolean prometheusEnabled; + private final boolean otlpEnabled; + private final String otlpEndpoint; + private final int otlpIntervalMs; + private final String otlpHeaders; + private final String otlpServiceName; private AgentConfig(int bufferSize, int serverPort, boolean serverEnabled, boolean gcEnabled, boolean cpuEnabled, int cpuIntervalMs, @@ -65,7 +80,9 @@ private AgentConfig(int bufferSize, int serverPort, boolean serverEnabled, boolean metaspaceEnabled, boolean profilingEnabled, int profilingIntervalMs, boolean contentionEnabled, int contentionThresholdMs, boolean correlationEnabled, - boolean prometheusEnabled) { + boolean prometheusEnabled, boolean otlpEnabled, + String otlpEndpoint, int otlpIntervalMs, + String otlpHeaders, String otlpServiceName) { this.bufferSize = bufferSize; this.serverPort = serverPort; this.serverEnabled = serverEnabled; @@ -81,6 +98,11 @@ private AgentConfig(int bufferSize, int serverPort, boolean serverEnabled, this.contentionThresholdMs = contentionThresholdMs; this.correlationEnabled = correlationEnabled; this.prometheusEnabled = prometheusEnabled; + this.otlpEnabled = otlpEnabled; + this.otlpEndpoint = otlpEndpoint; + this.otlpIntervalMs = otlpIntervalMs; + this.otlpHeaders = otlpHeaders; + this.otlpServiceName = otlpServiceName; } /** @@ -113,10 +135,17 @@ public static AgentConfig fromSystemProperties() { System.getProperty("argus.correlation.enabled", String.valueOf(DEFAULT_CORRELATION_ENABLED))); boolean prometheusEnabled = Boolean.parseBoolean( System.getProperty("argus.metrics.prometheus.enabled", String.valueOf(DEFAULT_PROMETHEUS_ENABLED))); + boolean otlpEnabled = Boolean.parseBoolean( + System.getProperty("argus.otlp.enabled", String.valueOf(DEFAULT_OTLP_ENABLED))); + String otlpEndpoint = System.getProperty("argus.otlp.endpoint", DEFAULT_OTLP_ENDPOINT); + int otlpIntervalMs = Integer.getInteger("argus.otlp.interval", DEFAULT_OTLP_INTERVAL_MS); + String otlpHeaders = System.getProperty("argus.otlp.headers", DEFAULT_OTLP_HEADERS); + String otlpServiceName = System.getProperty("argus.otlp.service.name", DEFAULT_OTLP_SERVICE_NAME); return new AgentConfig(bufferSize, serverPort, serverEnabled, gcEnabled, cpuEnabled, cpuIntervalMs, allocationEnabled, allocationThreshold, metaspaceEnabled, profilingEnabled, profilingIntervalMs, - contentionEnabled, contentionThresholdMs, correlationEnabled, prometheusEnabled); + contentionEnabled, contentionThresholdMs, correlationEnabled, prometheusEnabled, + otlpEnabled, otlpEndpoint, otlpIntervalMs, otlpHeaders, otlpServiceName); } /** @@ -129,7 +158,9 @@ public static AgentConfig defaults() { DEFAULT_GC_ENABLED, DEFAULT_CPU_ENABLED, DEFAULT_CPU_INTERVAL_MS, DEFAULT_ALLOCATION_ENABLED, DEFAULT_ALLOCATION_THRESHOLD, DEFAULT_METASPACE_ENABLED, DEFAULT_PROFILING_ENABLED, DEFAULT_PROFILING_INTERVAL_MS, DEFAULT_CONTENTION_ENABLED, - DEFAULT_CONTENTION_THRESHOLD_MS, DEFAULT_CORRELATION_ENABLED, DEFAULT_PROMETHEUS_ENABLED); + DEFAULT_CONTENTION_THRESHOLD_MS, DEFAULT_CORRELATION_ENABLED, DEFAULT_PROMETHEUS_ENABLED, + DEFAULT_OTLP_ENABLED, DEFAULT_OTLP_ENDPOINT, DEFAULT_OTLP_INTERVAL_MS, + DEFAULT_OTLP_HEADERS, DEFAULT_OTLP_SERVICE_NAME); } /** @@ -201,6 +232,26 @@ public boolean isPrometheusEnabled() { return prometheusEnabled; } + public boolean isOtlpEnabled() { + return otlpEnabled; + } + + public String getOtlpEndpoint() { + return otlpEndpoint; + } + + public int getOtlpIntervalMs() { + return otlpIntervalMs; + } + + public String getOtlpHeaders() { + return otlpHeaders; + } + + public String getOtlpServiceName() { + return otlpServiceName; + } + @Override public String toString() { return "AgentConfig{" + @@ -219,6 +270,10 @@ public String toString() { ", contentionThresholdMs=" + contentionThresholdMs + ", correlationEnabled=" + correlationEnabled + ", prometheusEnabled=" + prometheusEnabled + + ", otlpEnabled=" + otlpEnabled + + ", otlpEndpoint='" + otlpEndpoint + '\'' + + ", otlpIntervalMs=" + otlpIntervalMs + + ", otlpServiceName='" + otlpServiceName + '\'' + '}'; } @@ -241,6 +296,11 @@ public static final class Builder { private int contentionThresholdMs = DEFAULT_CONTENTION_THRESHOLD_MS; private boolean correlationEnabled = DEFAULT_CORRELATION_ENABLED; private boolean prometheusEnabled = DEFAULT_PROMETHEUS_ENABLED; + private boolean otlpEnabled = DEFAULT_OTLP_ENABLED; + private String otlpEndpoint = DEFAULT_OTLP_ENDPOINT; + private int otlpIntervalMs = DEFAULT_OTLP_INTERVAL_MS; + private String otlpHeaders = DEFAULT_OTLP_HEADERS; + private String otlpServiceName = DEFAULT_OTLP_SERVICE_NAME; private Builder() { } @@ -320,12 +380,38 @@ public Builder prometheusEnabled(boolean prometheusEnabled) { return this; } + public Builder otlpEnabled(boolean otlpEnabled) { + this.otlpEnabled = otlpEnabled; + return this; + } + + public Builder otlpEndpoint(String otlpEndpoint) { + this.otlpEndpoint = otlpEndpoint; + return this; + } + + public Builder otlpIntervalMs(int otlpIntervalMs) { + this.otlpIntervalMs = otlpIntervalMs; + return this; + } + + public Builder otlpHeaders(String otlpHeaders) { + this.otlpHeaders = otlpHeaders; + return this; + } + + public Builder otlpServiceName(String otlpServiceName) { + this.otlpServiceName = otlpServiceName; + return this; + } + public AgentConfig build() { return new AgentConfig(bufferSize, serverPort, serverEnabled, gcEnabled, cpuEnabled, cpuIntervalMs, allocationEnabled, allocationThreshold, metaspaceEnabled, profilingEnabled, profilingIntervalMs, contentionEnabled, contentionThresholdMs, - correlationEnabled, prometheusEnabled); + correlationEnabled, prometheusEnabled, otlpEnabled, + otlpEndpoint, otlpIntervalMs, otlpHeaders, otlpServiceName); } } } diff --git a/argus-frontend/src/main/resources/public/css/style.css b/argus-frontend/src/main/resources/public/css/style.css index 81320e3..5d48833 100644 --- a/argus-frontend/src/main/resources/public/css/style.css +++ b/argus-frontend/src/main/resources/public/css/style.css @@ -580,6 +580,62 @@ main { opacity: 0.5; } +/* Flame Graph */ +.flamegraph-section { + margin-bottom: 1.5rem; +} + +.flamegraph-section .section-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 1rem; +} + +.flamegraph-actions { + display: flex; + align-items: center; + gap: 1rem; +} + +.flamegraph-actions .btn-small { + padding: 0.25rem 0.75rem; + font-size: 0.8rem; + background: var(--card-bg); + border: 1px solid var(--border-color); + color: var(--text-secondary); + border-radius: 4px; + cursor: pointer; + text-decoration: none; +} + +.flamegraph-actions .btn-small:hover { + color: var(--text-primary); + border-color: var(--accent-blue); +} + +.flamegraph-container { + background: var(--card-bg); + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 1rem; + min-height: 300px; + overflow: hidden; +} + +.flamegraph-placeholder { + display: flex; + align-items: center; + justify-content: center; + min-height: 280px; + color: var(--text-secondary); + font-style: italic; +} + +.flamegraph-container svg { + width: 100% !important; +} + /* Main Tab Navigation (top-level) */ .main-tab-nav { display: flex; diff --git a/argus-frontend/src/main/resources/public/index.html b/argus-frontend/src/main/resources/public/index.html index de3c588..6da8f80 100644 --- a/argus-frontend/src/main/resources/public/index.html +++ b/argus-frontend/src/main/resources/public/index.html @@ -6,6 +6,9 @@ Argus - Virtual Thread Profiler + + +
    @@ -351,6 +354,23 @@

    Metaspace Usage

    + +
    +
    +

    Flame Graph

    +
    + Samples: 0 + + + + Download +
    +
    +
    +
    Enable profiling (-Dargus.profiling.enabled=true) to see flame graph
    +
    +
    +
    diff --git a/argus-frontend/src/main/resources/public/js/app.js b/argus-frontend/src/main/resources/public/js/app.js index e538dad..865db76 100644 --- a/argus-frontend/src/main/resources/public/js/app.js +++ b/argus-frontend/src/main/resources/public/js/app.js @@ -149,7 +149,15 @@ const elements = { // Recommendations recommendationsList: document.getElementById('recommendations-list'), - refreshRecommendationsBtn: document.getElementById('refresh-recommendations') + refreshRecommendationsBtn: document.getElementById('refresh-recommendations'), + + // Flame graph + flamegraphContainer: document.getElementById('flamegraph-container'), + flamegraphSamples: document.getElementById('flamegraph-samples'), + flamegraphTimestamp: document.getElementById('flamegraph-timestamp'), + flamegraphPause: document.getElementById('flamegraph-pause'), + flamegraphReset: document.getElementById('flamegraph-reset'), + flamegraphDownload: document.getElementById('flamegraph-download') }; const maxEvents = 500; @@ -204,6 +212,7 @@ function init() { fetchMethodProfiling(); fetchContentionAnalysis(); fetchCorrelation(); + fetchFlameGraph(); // Setup periodic updates setInterval(updateCharts, 1000); @@ -216,6 +225,7 @@ function init() { setInterval(fetchMethodProfiling, 5000); setInterval(fetchContentionAnalysis, 5000); setInterval(fetchCorrelation, 10000); + setInterval(fetchFlameGraph, 5000); setInterval(() => { renderThreadStateView(elements.threadsContainer, elements.threadCount); }, 1000); @@ -244,6 +254,38 @@ function setupEventListeners() { }); }); + // Flame graph pause/resume + if (elements.flamegraphPause) { + elements.flamegraphPause.addEventListener('click', () => { + flameGraphPaused = !flameGraphPaused; + elements.flamegraphPause.textContent = flameGraphPaused ? 'Resume' : 'Pause'; + if (flameGraphPaused && elements.flamegraphTimestamp) { + elements.flamegraphTimestamp.textContent = + 'Paused at ' + new Date().toLocaleTimeString(); + } + }); + } + + // Flame graph reset (clears server data + resets chart) + if (elements.flamegraphReset) { + elements.flamegraphReset.addEventListener('click', async () => { + await fetch('/flame-graph?reset=true'); + flameChart = null; + flameGraphPaused = false; + if (elements.flamegraphPause) { + elements.flamegraphPause.textContent = 'Pause'; + } + if (elements.flamegraphContainer) { + elements.flamegraphContainer.innerHTML = + '
    Collecting samples...
    '; + } + if (elements.flamegraphTimestamp) { + elements.flamegraphTimestamp.textContent = + 'Reset at ' + new Date().toLocaleTimeString(); + } + }); + } + // Help modal elements.helpBtn.addEventListener('click', () => elements.helpModal.classList.remove('hidden')); elements.helpModalBackdrop.addEventListener('click', () => elements.helpModal.classList.add('hidden')); @@ -736,6 +778,67 @@ async function fetchContentionAnalysis() { } } +// Flame graph state +let flameChart = null; +let flameGraphPaused = false; + +async function fetchFlameGraph() { + if (flameGraphPaused) return; + try { + const response = await fetch('/flame-graph'); + if (response.ok) { + const data = await response.json(); + if (!data.error && data.value > 0) { + renderFlameGraph(data); + if (elements.flamegraphSamples) { + elements.flamegraphSamples.textContent = formatNumber(data.value); + } + if (elements.flamegraphTimestamp) { + elements.flamegraphTimestamp.textContent = + 'Updated: ' + new Date().toLocaleTimeString(); + } + } + } + } catch (e) { + // Flame graph / profiling might not be enabled + } +} + +function renderFlameGraph(data) { + if (!elements.flamegraphContainer || typeof flamegraph === 'undefined') return; + + const container = elements.flamegraphContainer; + const width = container.clientWidth - 32; + + if (!flameChart) { + // Clear placeholder + container.innerHTML = ''; + + flameChart = flamegraph() + .width(width) + .cellHeight(18) + .transitionDuration(300) + .minFrameSize(2) + .transitionEase(d3.easeCubic) + .sort(true) + .title('') + .selfValue(false); + + d3.select(container) + .datum(data) + .call(flameChart); + } else { + flameChart.width(width); + flameChart.update(data); + } + + // Update download link + if (elements.flamegraphDownload) { + elements.flamegraphDownload.href = '/flame-graph?format=collapsed'; + elements.flamegraphDownload.download = 'argus-flamegraph.collapsed'; + } +} + async function fetchCorrelation() { try { const response = await fetch('/correlation'); diff --git a/argus-server/src/main/java/io/argus/server/ArgusServer.java b/argus-server/src/main/java/io/argus/server/ArgusServer.java index 24652a2..04a9a37 100644 --- a/argus-server/src/main/java/io/argus/server/ArgusServer.java +++ b/argus-server/src/main/java/io/argus/server/ArgusServer.java @@ -15,9 +15,12 @@ import io.argus.server.analysis.CPUAnalyzer; import io.argus.server.analysis.GCAnalyzer; import io.argus.server.analysis.MetaspaceAnalyzer; +import io.argus.server.analysis.FlameGraphAnalyzer; import io.argus.server.analysis.MethodProfilingAnalyzer; import io.argus.server.analysis.PinningAnalyzer; import io.argus.server.handler.ArgusChannelHandler; +import io.argus.server.metrics.OtlpJsonBuilder; +import io.argus.server.metrics.OtlpMetricsExporter; import io.argus.server.metrics.PrometheusMetricsCollector; import io.argus.server.metrics.ServerMetrics; import io.argus.core.config.AgentConfig; @@ -92,9 +95,11 @@ public final class ArgusServer { private final AllocationAnalyzer allocationAnalyzer = new AllocationAnalyzer(); private final MetaspaceAnalyzer metaspaceAnalyzer = new MetaspaceAnalyzer(); private final MethodProfilingAnalyzer methodProfilingAnalyzer = new MethodProfilingAnalyzer(); + private final FlameGraphAnalyzer flameGraphAnalyzer = new FlameGraphAnalyzer(); private final ContentionAnalyzer contentionAnalyzer = new ContentionAnalyzer(); private CorrelationAnalyzer correlationAnalyzer; private PrometheusMetricsCollector prometheusCollector; + private OtlpMetricsExporter otlpExporter; private final ThreadStateManager threadStateManager = new ThreadStateManager(); private final EventJsonSerializer serializer = new EventJsonSerializer(); private EventBroadcaster broadcaster; @@ -187,6 +192,18 @@ public void start() throws InterruptedException { contentionEventBuffer != null ? contentionAnalyzer : null); } + // Initialize OTLP metrics exporter if enabled + if (config.isOtlpEnabled()) { + OtlpJsonBuilder otlpJsonBuilder = new OtlpJsonBuilder( + config, metrics, activeThreads, + pinningAnalyzer, carrierAnalyzer, gcAnalyzer, cpuAnalyzer, + allocationEventBuffer != null ? allocationAnalyzer : null, + metaspaceEventBuffer != null ? metaspaceAnalyzer : null, + executionSampleEventBuffer != null ? methodProfilingAnalyzer : null, + contentionEventBuffer != null ? contentionAnalyzer : null); + otlpExporter = new OtlpMetricsExporter(config, otlpJsonBuilder); + } + // Initialize broadcaster with all event buffers broadcaster = new EventBroadcaster( eventBuffer, gcEventBuffer, cpuEventBuffer, @@ -194,7 +211,9 @@ public void start() throws InterruptedException { executionSampleEventBuffer, contentionEventBuffer, clients, metrics, activeThreads, recentEvents, threadEvents, pinningAnalyzer, carrierAnalyzer, gcAnalyzer, cpuAnalyzer, - allocationAnalyzer, metaspaceAnalyzer, methodProfilingAnalyzer, contentionAnalyzer, + allocationAnalyzer, metaspaceAnalyzer, methodProfilingAnalyzer, + executionSampleEventBuffer != null ? flameGraphAnalyzer : null, + contentionAnalyzer, correlationAnalyzer, threadStateManager, serializer); // Initialize Netty @@ -217,6 +236,7 @@ protected void initChannel(SocketChannel ch) { allocationEventBuffer != null ? allocationAnalyzer : null, metaspaceEventBuffer != null ? metaspaceAnalyzer : null, executionSampleEventBuffer != null ? methodProfilingAnalyzer : null, + executionSampleEventBuffer != null ? flameGraphAnalyzer : null, contentionEventBuffer != null ? contentionAnalyzer : null, correlationAnalyzer, broadcaster, @@ -231,6 +251,11 @@ protected void initChannel(SocketChannel ch) { // Start event broadcasting broadcaster.start(); + // Start OTLP exporter + if (otlpExporter != null) { + otlpExporter.start(); + } + // Log startup info LOG.log(System.Logger.Level.INFO, "Started on port " + port); LOG.log(System.Logger.Level.INFO, "Dashboard: http://localhost:" + port + "/"); @@ -240,6 +265,10 @@ protected void initChannel(SocketChannel ch) { if (config.isPrometheusEnabled()) { LOG.log(System.Logger.Level.INFO, "Prometheus endpoint: http://localhost:" + port + "/prometheus"); } + if (config.isOtlpEnabled()) { + LOG.log(System.Logger.Level.INFO, "OTLP export: pushing to " + config.getOtlpEndpoint() + + " every " + config.getOtlpIntervalMs() + "ms"); + } } /** @@ -250,6 +279,10 @@ public void stop() { return; } + if (otlpExporter != null) { + otlpExporter.stop(); + } + if (broadcaster != null) { broadcaster.stop(); } diff --git a/argus-server/src/main/java/io/argus/server/analysis/FlameGraphAnalyzer.java b/argus-server/src/main/java/io/argus/server/analysis/FlameGraphAnalyzer.java new file mode 100644 index 0000000..77ba615 --- /dev/null +++ b/argus-server/src/main/java/io/argus/server/analysis/FlameGraphAnalyzer.java @@ -0,0 +1,215 @@ +package io.argus.server.analysis; + +import io.argus.core.event.ExecutionSampleEvent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Analyzes execution sample events and builds flame graph data. + * + *

    Aggregates stack traces into a tree structure suitable for d3-flamegraph + * visualization and collapsed stack format for external tools. + */ +public final class FlameGraphAnalyzer { + + private static final long DEFAULT_WINDOW_MS = 60_000; // 60 seconds + + private final AtomicLong totalSamples = new AtomicLong(0); + private final long windowMs; + private volatile long windowStartTime; + private FlameNode root = new FlameNode("root"); + + public FlameGraphAnalyzer() { + this(DEFAULT_WINDOW_MS); + } + + public FlameGraphAnalyzer(long windowMs) { + this.windowMs = windowMs; + this.windowStartTime = System.currentTimeMillis(); + } + + /** + * Records an execution sample event for flame graph aggregation. + * + * @param event the execution sample event to record + */ + public void recordSample(ExecutionSampleEvent event) { + if (event == null || event.stackTrace() == null || event.stackTrace().isBlank()) { + return; + } + + totalSamples.incrementAndGet(); + + List frames = parseStackTrace(event.stackTrace()); + if (frames.isEmpty()) { + return; + } + + synchronized (root) { + // Auto-rotate: if time window has passed, start fresh + if (System.currentTimeMillis() - windowStartTime > windowMs) { + root = new FlameNode("root"); + windowStartTime = System.currentTimeMillis(); + } + + root.value++; + FlameNode current = root; + for (String frame : frames) { + current = current.children.computeIfAbsent(frame, FlameNode::new); + current.value++; + } + } + } + + /** + * Returns the flame graph data as d3-flamegraph compatible JSON. + * + * @return JSON string for d3-flamegraph + */ + public String getFlameGraphJson() { + synchronized (root) { + StringBuilder sb = new StringBuilder(4096); + appendNodeJson(sb, root); + return sb.toString(); + } + } + + /** + * Returns the flame graph data in collapsed stack format. + * + *

    Each line: {@code frame1;frame2;frame3 count} + * + * @return collapsed stacks string + */ + public String getCollapsedStacks() { + synchronized (root) { + StringBuilder sb = new StringBuilder(4096); + List path = new ArrayList<>(); + collectCollapsedStacks(root, path, sb); + return sb.toString(); + } + } + + /** + * Returns the total number of samples recorded. + * + * @return total sample count + */ + public long getTotalSamples() { + return totalSamples.get(); + } + + /** + * Clears all recorded data and resets the time window. + */ + public void clear() { + synchronized (root) { + totalSamples.set(0); + root = new FlameNode("root"); + windowStartTime = System.currentTimeMillis(); + } + } + + /** + * Returns the remaining seconds in the current time window. + */ + public int getWindowRemainingSeconds() { + long elapsed = System.currentTimeMillis() - windowStartTime; + long remaining = Math.max(0, windowMs - elapsed); + return (int) (remaining / 1000); + } + + /** + * Parses a stack trace string into a list of frame names. + * JFR stack traces are top-frame-first; this reverses them so + * the root (entry point) is first for flame graph rendering. + */ + private List parseStackTrace(String stackTrace) { + String[] lines = stackTrace.split("\n"); + List frames = new ArrayList<>(lines.length); + + for (String line : lines) { + String trimmed = line.trim(); + if (trimmed.startsWith("at ")) { + String frame = trimmed.substring(3); + // Remove source info: "class.method(line:N)" -> "class.method" + int parenIdx = frame.indexOf('('); + if (parenIdx > 0) { + frame = frame.substring(0, parenIdx); + } + if (!frame.isEmpty()) { + frames.add(frame); + } + } + } + + // Reverse: JFR is leaf-first, flame graph needs root-first + java.util.Collections.reverse(frames); + return frames; + } + + private void appendNodeJson(StringBuilder sb, FlameNode node) { + sb.append("{\"name\":\""); + escapeJson(sb, node.name); + sb.append("\",\"value\":").append(node.value); + + if (!node.children.isEmpty()) { + sb.append(",\"children\":["); + boolean first = true; + for (FlameNode child : node.children.values()) { + if (!first) sb.append(','); + first = false; + appendNodeJson(sb, child); + } + sb.append(']'); + } + + sb.append('}'); + } + + private void collectCollapsedStacks(FlameNode node, List path, StringBuilder sb) { + if (node.children.isEmpty() && !path.isEmpty()) { + // Leaf node: output the path with count + sb.append(String.join(";", path)); + sb.append(' ').append(node.value).append('\n'); + return; + } + + for (Map.Entry entry : node.children.entrySet()) { + path.add(entry.getKey()); + collectCollapsedStacks(entry.getValue(), path, sb); + path.remove(path.size() - 1); + } + } + + private void escapeJson(StringBuilder sb, String text) { + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + switch (c) { + case '"' -> sb.append("\\\""); + case '\\' -> sb.append("\\\\"); + case '\n' -> sb.append("\\n"); + case '\r' -> sb.append("\\r"); + case '\t' -> sb.append("\\t"); + default -> sb.append(c); + } + } + } + + /** + * Internal tree node for flame graph aggregation. + */ + private static final class FlameNode { + final String name; + int value; + final Map children = new ConcurrentHashMap<>(); + + FlameNode(String name) { + this.name = name; + } + } +} diff --git a/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java b/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java index 27b1fdc..a608c79 100644 --- a/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java +++ b/argus-server/src/main/java/io/argus/server/handler/ArgusChannelHandler.java @@ -6,6 +6,7 @@ import io.argus.server.analysis.AllocationAnalyzer; import io.argus.server.analysis.ContentionAnalyzer; import io.argus.server.analysis.CorrelationAnalyzer; +import io.argus.server.analysis.FlameGraphAnalyzer; import io.argus.server.analysis.CPUAnalyzer; import io.argus.server.analysis.GCAnalyzer; import io.argus.server.analysis.MetaspaceAnalyzer; @@ -47,6 +48,7 @@ public final class ArgusChannelHandler extends SimpleChannelInboundHandlerHand-coded OTLP JSON format following the OpenTelemetry specification, + * maintaining the project's zero-dependency philosophy. + */ +public final class OtlpJsonBuilder { + + private final AgentConfig config; + private final ServerMetrics metrics; + private final ActiveThreadsRegistry activeThreads; + private final PinningAnalyzer pinningAnalyzer; + private final CarrierThreadAnalyzer carrierAnalyzer; + private final GCAnalyzer gcAnalyzer; + private final CPUAnalyzer cpuAnalyzer; + private final AllocationAnalyzer allocationAnalyzer; + private final MetaspaceAnalyzer metaspaceAnalyzer; + private final MethodProfilingAnalyzer methodProfilingAnalyzer; + private final ContentionAnalyzer contentionAnalyzer; + + public OtlpJsonBuilder(AgentConfig config, ServerMetrics metrics, + ActiveThreadsRegistry activeThreads, + PinningAnalyzer pinningAnalyzer, + CarrierThreadAnalyzer carrierAnalyzer, + GCAnalyzer gcAnalyzer, CPUAnalyzer cpuAnalyzer, + AllocationAnalyzer allocationAnalyzer, + MetaspaceAnalyzer metaspaceAnalyzer, + MethodProfilingAnalyzer methodProfilingAnalyzer, + ContentionAnalyzer contentionAnalyzer) { + this.config = config; + this.metrics = metrics; + this.activeThreads = activeThreads; + this.pinningAnalyzer = pinningAnalyzer; + this.carrierAnalyzer = carrierAnalyzer; + this.gcAnalyzer = gcAnalyzer; + this.cpuAnalyzer = cpuAnalyzer; + this.allocationAnalyzer = allocationAnalyzer; + this.metaspaceAnalyzer = metaspaceAnalyzer; + this.methodProfilingAnalyzer = methodProfilingAnalyzer; + this.contentionAnalyzer = contentionAnalyzer; + } + + /** + * Builds the complete OTLP JSON metrics payload. + * + * @return OTLP JSON string + */ + public String build() { + long nowNano = System.currentTimeMillis() * 1_000_000L; + StringBuilder sb = new StringBuilder(4096); + + sb.append("{\"resourceMetrics\":[{"); + appendResource(sb); + sb.append(",\"scopeMetrics\":[{"); + sb.append("\"scope\":{\"name\":\"io.argus.metrics\",\"version\":\"") + .append(getVersion()).append("\"},"); + sb.append("\"metrics\":["); + + boolean first = true; + first = appendVirtualThreadMetrics(sb, nowNano, first); + first = appendGcMetrics(sb, nowNano, first); + first = appendCpuMetrics(sb, nowNano, first); + first = appendAllocationMetrics(sb, nowNano, first); + first = appendMetaspaceMetrics(sb, nowNano, first); + first = appendProfilingMetrics(sb, nowNano, first); + appendContentionMetrics(sb, nowNano, first); + + sb.append("]}]}]}"); + return sb.toString(); + } + + private void appendResource(StringBuilder sb) { + sb.append("\"resource\":{\"attributes\":["); + appendStringAttribute(sb, "service.name", config.getOtlpServiceName()); + sb.append(','); + appendStringAttribute(sb, "service.version", getVersion()); + sb.append(','); + appendStringAttribute(sb, "telemetry.sdk.name", "argus"); + sb.append(','); + appendStringAttribute(sb, "telemetry.sdk.language", "java"); + sb.append("]}"); + } + + // --- Virtual Thread Metrics (always enabled) --- + + private boolean appendVirtualThreadMetrics(StringBuilder sb, long nowNano, boolean first) { + first = appendGauge(sb, first, "argus_virtual_threads_active", + "Currently active virtual threads", nowNano, activeThreads.size()); + first = appendSum(sb, first, "argus_virtual_threads_started_total", + "Total virtual threads started", nowNano, metrics.getStartEvents()); + first = appendSum(sb, first, "argus_virtual_threads_ended_total", + "Total virtual threads ended", nowNano, metrics.getEndEvents()); + first = appendSum(sb, first, "argus_virtual_threads_pinned_total", + "Total pinning events", nowNano, metrics.getPinnedEvents()); + first = appendSum(sb, first, "argus_virtual_threads_submit_failed_total", + "Total submit failures", nowNano, metrics.getSubmitFailedEvents()); + + var pinAnalysis = pinningAnalyzer.getAnalysis(); + first = appendGauge(sb, first, "argus_virtual_threads_pinned_unique_stacks", + "Unique pinning stack traces", nowNano, pinAnalysis.uniqueStackTraces()); + + var carrierAnalysis = carrierAnalyzer.getAnalysis(); + first = appendGauge(sb, first, "argus_carrier_threads_total", + "Total carrier threads", nowNano, carrierAnalysis.totalCarriers()); + first = appendGaugeDouble(sb, first, "argus_carrier_threads_avg_per_carrier", + "Average virtual threads per carrier", nowNano, carrierAnalysis.avgVirtualThreadsPerCarrier()); + return first; + } + + // --- GC Metrics --- + + private boolean appendGcMetrics(StringBuilder sb, long nowNano, boolean first) { + if (!config.isGcEnabled()) return first; + + var analysis = gcAnalyzer.getAnalysis(); + first = appendSum(sb, first, "argus_gc_events_total", + "Total GC events", nowNano, analysis.totalGCEvents()); + first = appendSumDouble(sb, first, "argus_gc_pause_time_seconds_total", + "Total GC pause time in seconds", nowNano, analysis.totalPauseTimeMs() / 1000.0); + first = appendGaugeDouble(sb, first, "argus_gc_pause_time_seconds_max", + "Maximum GC pause time", nowNano, analysis.maxPauseTimeMs() / 1000.0); + first = appendGaugeDouble(sb, first, "argus_gc_overhead_ratio", + "GC overhead percentage", nowNano, analysis.gcOverheadPercent()); + first = appendGauge(sb, first, "argus_heap_used_bytes", + "Current heap used", nowNano, analysis.currentHeapUsed()); + first = appendGauge(sb, first, "argus_heap_committed_bytes", + "Current heap committed", nowNano, analysis.currentHeapCommitted()); + return first; + } + + // --- CPU Metrics --- + + private boolean appendCpuMetrics(StringBuilder sb, long nowNano, boolean first) { + if (!config.isCpuEnabled()) return first; + + var analysis = cpuAnalyzer.getAnalysis(); + first = appendGaugeDouble(sb, first, "argus_cpu_jvm_user_ratio", + "JVM user CPU ratio", nowNano, analysis.currentJvmUser()); + first = appendGaugeDouble(sb, first, "argus_cpu_jvm_system_ratio", + "JVM system CPU ratio", nowNano, analysis.currentJvmSystem()); + first = appendGaugeDouble(sb, first, "argus_cpu_machine_total_ratio", + "Machine total CPU ratio", nowNano, analysis.currentMachineTotal()); + return first; + } + + // --- Allocation Metrics --- + + private boolean appendAllocationMetrics(StringBuilder sb, long nowNano, boolean first) { + if (allocationAnalyzer == null) return first; + + var analysis = allocationAnalyzer.getAnalysis(); + first = appendSum(sb, first, "argus_allocation_total", + "Total allocations", nowNano, analysis.totalAllocations()); + first = appendSum(sb, first, "argus_allocation_bytes_total", + "Total bytes allocated", nowNano, analysis.totalBytesAllocated()); + first = appendGaugeDouble(sb, first, "argus_allocation_rate_bytes_per_second", + "Allocation rate", nowNano, analysis.allocationRateBytesPerSec()); + return first; + } + + // --- Metaspace Metrics --- + + private boolean appendMetaspaceMetrics(StringBuilder sb, long nowNano, boolean first) { + if (metaspaceAnalyzer == null) return first; + + var analysis = metaspaceAnalyzer.getAnalysis(); + first = appendGauge(sb, first, "argus_metaspace_used_bytes", + "Metaspace used", nowNano, analysis.currentUsed()); + first = appendGauge(sb, first, "argus_metaspace_committed_bytes", + "Metaspace committed", nowNano, analysis.currentCommitted()); + first = appendGauge(sb, first, "argus_metaspace_classes_loaded", + "Loaded classes", nowNano, analysis.currentClassCount()); + return first; + } + + // --- Profiling Metrics --- + + private boolean appendProfilingMetrics(StringBuilder sb, long nowNano, boolean first) { + if (methodProfilingAnalyzer == null) return first; + + var analysis = methodProfilingAnalyzer.getAnalysis(); + first = appendSum(sb, first, "argus_profiling_samples_total", + "Total profiling samples", nowNano, analysis.totalSamples()); + return first; + } + + // --- Contention Metrics --- + + private boolean appendContentionMetrics(StringBuilder sb, long nowNano, boolean first) { + if (contentionAnalyzer == null) return first; + + var analysis = contentionAnalyzer.getAnalysis(); + first = appendSum(sb, first, "argus_contention_events_total", + "Total contention events", nowNano, analysis.totalContentionEvents()); + appendSumDouble(sb, first, "argus_contention_time_seconds_total", + "Total contention time", nowNano, analysis.totalContentionTimeMs() / 1000.0); + return first; + } + + // --- Helper methods for OTLP JSON metric types --- + + private boolean appendGauge(StringBuilder sb, boolean first, String name, + String description, long nowNano, long value) { + if (!first) sb.append(','); + sb.append("{\"name\":\"").append(name).append("\","); + sb.append("\"description\":\"").append(description).append("\","); + sb.append("\"gauge\":{\"dataPoints\":[{"); + sb.append("\"timeUnixNano\":\"").append(nowNano).append("\","); + sb.append("\"asInt\":\"").append(value).append("\"}]}}"); + return false; + } + + private boolean appendGaugeDouble(StringBuilder sb, boolean first, String name, + String description, long nowNano, double value) { + if (!first) sb.append(','); + sb.append("{\"name\":\"").append(name).append("\","); + sb.append("\"description\":\"").append(description).append("\","); + sb.append("\"gauge\":{\"dataPoints\":[{"); + sb.append("\"timeUnixNano\":\"").append(nowNano).append("\","); + sb.append("\"asDouble\":").append(value).append("}]}}"); + return false; + } + + private boolean appendSum(StringBuilder sb, boolean first, String name, + String description, long nowNano, long value) { + if (!first) sb.append(','); + sb.append("{\"name\":\"").append(name).append("\","); + sb.append("\"description\":\"").append(description).append("\","); + sb.append("\"sum\":{\"dataPoints\":[{"); + sb.append("\"timeUnixNano\":\"").append(nowNano).append("\","); + sb.append("\"asInt\":\"").append(value).append("\"}],"); + sb.append("\"aggregationTemporality\":2,\"isMonotonic\":true}}"); + return false; + } + + private boolean appendSumDouble(StringBuilder sb, boolean first, String name, + String description, long nowNano, double value) { + if (!first) sb.append(','); + sb.append("{\"name\":\"").append(name).append("\","); + sb.append("\"description\":\"").append(description).append("\","); + sb.append("\"sum\":{\"dataPoints\":[{"); + sb.append("\"timeUnixNano\":\"").append(nowNano).append("\","); + sb.append("\"asDouble\":").append(value).append("}],"); + sb.append("\"aggregationTemporality\":2,\"isMonotonic\":true}}"); + return false; + } + + private void appendStringAttribute(StringBuilder sb, String key, String value) { + sb.append("{\"key\":\"").append(key).append("\",\"value\":{\"stringValue\":\"").append(value).append("\"}}"); + } + + private String getVersion() { + return "0.4.0"; + } +} diff --git a/argus-server/src/main/java/io/argus/server/metrics/OtlpMetricsExporter.java b/argus-server/src/main/java/io/argus/server/metrics/OtlpMetricsExporter.java new file mode 100644 index 0000000..b072fae --- /dev/null +++ b/argus-server/src/main/java/io/argus/server/metrics/OtlpMetricsExporter.java @@ -0,0 +1,102 @@ +package io.argus.server.metrics; + +import io.argus.core.config.AgentConfig; + +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +/** + * Pushes metrics to an OpenTelemetry collector in OTLP JSON format. + * + *

    Uses {@link java.net.http.HttpClient} (JDK built-in) to POST metrics + * at a configurable interval. No external SDK dependency required. + */ +public final class OtlpMetricsExporter { + + private static final System.Logger LOG = System.getLogger(OtlpMetricsExporter.class.getName()); + + private final AgentConfig config; + private final OtlpJsonBuilder jsonBuilder; + private final HttpClient httpClient; + private final ScheduledExecutorService scheduler; + + public OtlpMetricsExporter(AgentConfig config, OtlpJsonBuilder jsonBuilder) { + this.config = config; + this.jsonBuilder = jsonBuilder; + this.httpClient = HttpClient.newBuilder() + .connectTimeout(Duration.ofSeconds(10)) + .build(); + this.scheduler = Executors.newSingleThreadScheduledExecutor( + r -> Thread.ofPlatform().name("argus-otlp-exporter").daemon(true).unstarted(r) + ); + } + + /** + * Starts the periodic OTLP metrics push. + */ + public void start() { + long intervalMs = config.getOtlpIntervalMs(); + scheduler.scheduleAtFixedRate(this::pushMetrics, intervalMs, intervalMs, TimeUnit.MILLISECONDS); + LOG.log(System.Logger.Level.INFO, "OTLP exporter started (endpoint: {0}, interval: {1}ms)", + config.getOtlpEndpoint(), intervalMs); + } + + /** + * Stops the OTLP metrics exporter. + */ + public void stop() { + scheduler.shutdown(); + try { + if (!scheduler.awaitTermination(5, TimeUnit.SECONDS)) { + scheduler.shutdownNow(); + } + } catch (InterruptedException e) { + scheduler.shutdownNow(); + Thread.currentThread().interrupt(); + } + LOG.log(System.Logger.Level.INFO, "OTLP exporter stopped"); + } + + private void pushMetrics() { + try { + String json = jsonBuilder.build(); + + HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() + .uri(URI.create(config.getOtlpEndpoint())) + .header("Content-Type", "application/json") + .timeout(Duration.ofSeconds(10)) + .POST(HttpRequest.BodyPublishers.ofString(json)); + + // Add custom headers (e.g., auth tokens) + String headers = config.getOtlpHeaders(); + if (headers != null && !headers.isBlank()) { + for (String header : headers.split(",")) { + int eq = header.indexOf('='); + if (eq > 0) { + String key = header.substring(0, eq).trim(); + String value = header.substring(eq + 1).trim(); + requestBuilder.header(key, value); + } + } + } + + HttpResponse response = httpClient.send( + requestBuilder.build(), HttpResponse.BodyHandlers.ofString()); + + if (response.statusCode() >= 400) { + LOG.log(System.Logger.Level.WARNING, + "OTLP push failed: HTTP {0} - {1}", response.statusCode(), response.body()); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } catch (Exception e) { + LOG.log(System.Logger.Level.WARNING, "OTLP push error: {0}", e.getMessage()); + } + } +} diff --git a/argus-server/src/main/java/io/argus/server/websocket/EventBroadcaster.java b/argus-server/src/main/java/io/argus/server/websocket/EventBroadcaster.java index 3c3e479..f819cfa 100644 --- a/argus-server/src/main/java/io/argus/server/websocket/EventBroadcaster.java +++ b/argus-server/src/main/java/io/argus/server/websocket/EventBroadcaster.java @@ -15,6 +15,7 @@ import io.argus.server.analysis.CPUAnalyzer; import io.argus.server.analysis.GCAnalyzer; import io.argus.server.analysis.MetaspaceAnalyzer; +import io.argus.server.analysis.FlameGraphAnalyzer; import io.argus.server.analysis.MethodProfilingAnalyzer; import io.argus.server.analysis.PinningAnalyzer; import io.argus.server.metrics.ServerMetrics; @@ -67,6 +68,7 @@ public final class EventBroadcaster { private final AllocationAnalyzer allocationAnalyzer; private final MetaspaceAnalyzer metaspaceAnalyzer; private final MethodProfilingAnalyzer methodProfilingAnalyzer; + private final FlameGraphAnalyzer flameGraphAnalyzer; private final ContentionAnalyzer contentionAnalyzer; private final CorrelationAnalyzer correlationAnalyzer; private final ThreadStateManager threadStateManager; @@ -96,6 +98,7 @@ public final class EventBroadcaster { * @param allocationAnalyzer the allocation analyzer (can be null) * @param metaspaceAnalyzer the metaspace analyzer (can be null) * @param methodProfilingAnalyzer the method profiling analyzer (can be null) + * @param flameGraphAnalyzer the flame graph analyzer (can be null) * @param contentionAnalyzer the contention analyzer (can be null) * @param correlationAnalyzer the correlation analyzer (can be null) * @param threadStateManager the thread state manager for real-time state tracking @@ -121,6 +124,7 @@ public EventBroadcaster( AllocationAnalyzer allocationAnalyzer, MetaspaceAnalyzer metaspaceAnalyzer, MethodProfilingAnalyzer methodProfilingAnalyzer, + FlameGraphAnalyzer flameGraphAnalyzer, ContentionAnalyzer contentionAnalyzer, CorrelationAnalyzer correlationAnalyzer, ThreadStateManager threadStateManager, @@ -144,6 +148,7 @@ public EventBroadcaster( this.allocationAnalyzer = allocationAnalyzer; this.metaspaceAnalyzer = metaspaceAnalyzer; this.methodProfilingAnalyzer = methodProfilingAnalyzer; + this.flameGraphAnalyzer = flameGraphAnalyzer; this.contentionAnalyzer = contentionAnalyzer; this.correlationAnalyzer = correlationAnalyzer; this.threadStateManager = threadStateManager; @@ -295,6 +300,9 @@ private void drainAndBroadcast() { if (executionSampleEventBuffer != null && methodProfilingAnalyzer != null) { executionSampleEventBuffer.drain(event -> { methodProfilingAnalyzer.recordExecutionSample(event); + if (flameGraphAnalyzer != null) { + flameGraphAnalyzer.recordSample(event); + } }); } diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..a266cd4 --- /dev/null +++ b/install.sh @@ -0,0 +1,171 @@ +#!/usr/bin/env bash +# +# Argus Installer +# +# Downloads argus-agent and argus-cli from GitHub releases, +# installs them to ~/.argus/, and creates the 'argus' command. +# +# Usage: +# curl -fsSL https://raw.githubusercontent.com/rlaope/argus/master/install.sh | bash +# +# Or with a specific version: +# curl -fsSL https://raw.githubusercontent.com/rlaope/argus/master/install.sh | bash -s -- v0.3.0 +# + +set -euo pipefail + +REPO="rlaope/argus" +INSTALL_DIR="$HOME/.argus" +BIN_DIR="$INSTALL_DIR/bin" +VERSION="${1:-latest}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +RESET='\033[0m' + +info() { echo -e "${CYAN}[INFO]${RESET} $*"; } +ok() { echo -e "${GREEN}[OK]${RESET} $*"; } +warn() { echo -e "${YELLOW}[WARN]${RESET} $*"; } +error() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } + +# --- Pre-flight checks --- + +if ! command -v java &>/dev/null; then + error "Java is not installed. Argus requires Java 21+." + exit 1 +fi + +JAVA_VER=$(java -version 2>&1 | head -1 | sed 's/.*"\([0-9]*\).*/\1/') +if [ "$JAVA_VER" -lt 21 ] 2>/dev/null; then + warn "Java $JAVA_VER detected. Argus requires Java 21+." +fi + +if ! command -v curl &>/dev/null; then + error "curl is required to download Argus." + exit 1 +fi + +# --- Resolve version --- + +if [ "$VERSION" = "latest" ]; then + info "Resolving latest release..." + VERSION=$(curl -fsSL "https://api.github.com/repos/$REPO/releases/latest" \ + | grep '"tag_name"' | head -1 | sed 's/.*"tag_name"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') + + if [ -z "$VERSION" ]; then + warn "Could not resolve latest release. Using 'v0.3.0' as fallback." + VERSION="v0.3.0" + fi +fi + +# Strip 'v' prefix for JAR filenames +VER_NUM="${VERSION#v}" + +echo "" +echo -e "${BOLD} Argus Installer${RESET}" +echo -e " Version: ${CYAN}${VERSION}${RESET}" +echo -e " Install: ${CYAN}${INSTALL_DIR}${RESET}" +echo "" + +# --- Download --- + +mkdir -p "$INSTALL_DIR" "$BIN_DIR" + +DOWNLOAD_BASE="https://github.com/$REPO/releases/download/$VERSION" + +info "Downloading argus-agent-${VER_NUM}.jar ..." +curl -fSL "$DOWNLOAD_BASE/argus-agent-${VER_NUM}.jar" -o "$INSTALL_DIR/argus-agent.jar" \ + || { error "Failed to download argus-agent. Check version: $VERSION"; exit 1; } +ok "argus-agent.jar" + +info "Downloading argus-cli-${VER_NUM}-all.jar ..." +curl -fSL "$DOWNLOAD_BASE/argus-cli-${VER_NUM}-all.jar" -o "$INSTALL_DIR/argus-cli.jar" \ + || { warn "argus-cli not found in release. CLI may not be available in $VERSION."; } +ok "argus-cli.jar" + +# --- Create wrapper scripts --- + +# argus - CLI monitor (argus top) +cat > "$BIN_DIR/argus" << 'WRAPPER' +#!/usr/bin/env bash +java -jar "$HOME/.argus/argus-cli.jar" "$@" +WRAPPER +chmod +x "$BIN_DIR/argus" + +# argus-agent - prints the agent JAR path (for -javaagent) +cat > "$BIN_DIR/argus-agent" << 'WRAPPER' +#!/usr/bin/env bash +if [ "$1" = "--path" ] || [ "$1" = "-p" ]; then + echo "$HOME/.argus/argus-agent.jar" +else + echo "Argus Agent JAR: $HOME/.argus/argus-agent.jar" + echo "" + echo "Usage:" + echo " java -javaagent:\$(argus-agent --path) -jar your-app.jar" + echo "" + echo " Or directly:" + echo " java -javaagent:$HOME/.argus/argus-agent.jar -jar your-app.jar" +fi +WRAPPER +chmod +x "$BIN_DIR/argus-agent" + +# --- Add to PATH --- + +SHELL_NAME=$(basename "$SHELL" 2>/dev/null || echo "bash") +PROFILE="" + +case "$SHELL_NAME" in + zsh) PROFILE="$HOME/.zshrc" ;; + bash) + if [ -f "$HOME/.bash_profile" ]; then + PROFILE="$HOME/.bash_profile" + else + PROFILE="$HOME/.bashrc" + fi + ;; + fish) PROFILE="$HOME/.config/fish/config.fish" ;; + *) PROFILE="$HOME/.profile" ;; +esac + +PATH_LINE="export PATH=\"\$HOME/.argus/bin:\$PATH\"" + +if [ "$SHELL_NAME" = "fish" ]; then + PATH_LINE="set -gx PATH \$HOME/.argus/bin \$PATH" +fi + +if [ -n "$PROFILE" ] && ! grep -q '.argus/bin' "$PROFILE" 2>/dev/null; then + echo "" >> "$PROFILE" + echo "# Argus JVM Monitor" >> "$PROFILE" + echo "$PATH_LINE" >> "$PROFILE" + ok "Added to PATH in $PROFILE" +else + ok "PATH already configured in $PROFILE" +fi + +# --- Done --- + +echo "" +echo -e "${GREEN}${BOLD} Argus installed successfully!${RESET}" +echo "" +echo -e " ${BOLD}Quick Start:${RESET}" +echo "" +echo -e " 1. Restart your terminal or run:" +echo -e " ${CYAN}source $PROFILE${RESET}" +echo "" +echo -e " 2. Attach to your Java app:" +echo -e " ${CYAN}java -javaagent:\$(argus-agent --path) -jar your-app.jar${RESET}" +echo "" +echo -e " 3. Open the dashboard:" +echo -e " ${CYAN}http://localhost:9202/${RESET}" +echo "" +echo -e " 4. Or use the CLI monitor:" +echo -e " ${CYAN}argus${RESET}" +echo -e " ${CYAN}argus --port 9202 --interval 2${RESET}" +echo "" +echo -e " ${BOLD}Uninstall:${RESET}" +echo -e " ${CYAN}rm -rf ~/.argus${RESET} and remove the PATH line from $PROFILE" +echo "" diff --git a/samples/virtual-thread-simulation/build.gradle.kts b/samples/virtual-thread-simulation/build.gradle.kts index c6428bb..11ccb29 100644 --- a/samples/virtual-thread-simulation/build.gradle.kts +++ b/samples/virtual-thread-simulation/build.gradle.kts @@ -45,19 +45,13 @@ tasks.register("runSimulation") { languageVersion.set(JavaLanguageVersion.of(21)) }) - // Pass duration property if provided: -Dduration=10 - val duration = System.getProperty("duration") - jvmArgs( "--enable-preview", "-javaagent:${rootProject.projectDir}/argus-agent/build/libs/argus-agent-${rootProject.property("argusVersion")}.jar", "-Dargus.server.enabled=true", - "-Dargus.server.port=9202" + "-Dargus.server.port=9202", + "-Dduration=${System.getProperty("duration") ?: "300"}" ) - - if (duration != null) { - jvmArgs("-Dduration=$duration") - } } // Run metrics demo with GC/CPU activity @@ -72,9 +66,6 @@ tasks.register("runMetricsDemo") { languageVersion.set(JavaLanguageVersion.of(21)) }) - // Duration in seconds (default: 60) - val duration = System.getProperty("duration") - jvmArgs( "--enable-preview", "-Xmx512m", // Enough heap for JFR + Netty + app @@ -84,12 +75,9 @@ tasks.register("runMetricsDemo") { "-Dargus.server.enabled=true", "-Dargus.server.port=9202", "-Dargus.gc.enabled=true", - "-Dargus.cpu.enabled=true" + "-Dargus.cpu.enabled=true", + "-Dduration=${System.getProperty("duration") ?: "300"}" ) - - if (duration != null) { - jvmArgs("-Dduration=$duration") - } } // Run metrics demo with ALL features enabled (including high-overhead ones) @@ -104,8 +92,6 @@ tasks.register("runMetricsDemoFull") { languageVersion.set(JavaLanguageVersion.of(21)) }) - val duration = System.getProperty("duration") - jvmArgs( "--enable-preview", "-Xmx1g", // More heap for full profiling @@ -125,10 +111,8 @@ tasks.register("runMetricsDemoFull") { "-Dargus.profiling.interval=50", // 50ms interval (lower overhead) "-Dargus.contention.enabled=true", "-Dargus.contention.threshold=20", // 20ms threshold - "-Dargus.correlation.enabled=true" + "-Dargus.correlation.enabled=true", + // Default: 5 minutes (override with -Dduration=N) + "-Dduration=${System.getProperty("duration") ?: "300"}" ) - - if (duration != null) { - jvmArgs("-Dduration=$duration") - } } diff --git a/settings.gradle.kts b/settings.gradle.kts index 953d9f3..9a5074b 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -8,6 +8,7 @@ include("argus-core") include("argus-agent") include("argus-server") include("argus-frontend") +include("argus-cli") // Sample projects include("samples:virtual-thread-demo")