Skip to content

Commit d05ea5b

Browse files
Merge pull request #163 from IBM/160-generate-learning-dataset-javacores
#160 Generating learning dataset
2 parents 014f813 + 42a71f4 commit d05ea5b

File tree

5 files changed

+1574
-1
lines changed

5 files changed

+1574
-1
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ waitress # Production WSGI server
66
flask # WSGI server for development the code
77
tqdm
88
haralyzer
9+
pandas

src/javacore_analyser/ai/__init__.py

Whitespace-only changes.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#
2+
# Copyright IBM Corp. 2025 - 2025
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
6+
import sys
7+
8+
import pandas as pd
9+
10+
from javacore_analyser.javacore_analyser_batch import generate_javecore_set_data
11+
12+
13+
def main(input_files):
14+
"""
15+
Generates data.csv files with input parameters which can be used for a model.
16+
17+
This function processes one or more javacore files (or archives with javacores) and generates a csv file named 'data.csv'.
18+
It extracts various parameters from the javacore data such as thread name, CPU usage, allocated memory, state, number of blocking threads, and stack trace.
19+
20+
:param input_files: one or more javacore files (or archives with javacores) from which to generate csv file
21+
:return: No value returned. Generates csv file 'data.csv'
22+
"""
23+
javacore_set = generate_javecore_set_data(input_files)
24+
data = []
25+
for thread in javacore_set.threads:
26+
for snapshot in thread.thread_snapshots:
27+
name = snapshot.name
28+
cpu_usage = snapshot.cpu_usage
29+
allocated_mem = snapshot.allocated_mem
30+
state = snapshot.state
31+
blocking_threads = len(snapshot.blocking)
32+
stack_trace = snapshot.stack_trace
33+
stack_trace_depth = snapshot.get_java_stack_depth()
34+
if stack_trace is None:
35+
stack_trace = ""
36+
else:
37+
stack_trace = stack_trace.to_string().replace("\n", " ").replace("\r", " ")
38+
data.append({'name': name, 'cpu_usage': cpu_usage, 'allocated_mem': allocated_mem, 'state': state,
39+
'blocking_threads': blocking_threads, 'stack_trace': stack_trace,
40+
'stack_trace_depth': stack_trace_depth})
41+
data.sort(key = lambda java_thread : len(java_thread['stack_trace']), reverse=True)
42+
pd.DataFrame.from_records(data).to_csv('data.csv', index=False)
43+
44+
# Check if we can load data
45+
df = pd.read_csv('data.csv')
46+
print("Displaying data:")
47+
print(df)
48+
49+
50+
if __name__ == '__main__':
51+
main(sys.argv[1:])

0 commit comments

Comments
 (0)