Skip to content

Commit 23c874e

Browse files
committed
refine list core job
1 parent eece0b3 commit 23c874e

File tree

4 files changed

+413
-518
lines changed

4 files changed

+413
-518
lines changed

lib/jobs/list_core.js

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
'use strict';
2+
3+
const os = require('os');
4+
const fs = require('fs');
5+
const fsx = require('fs').promises;
6+
const path = require('path');
7+
const exec = require('child_process').exec;
8+
9+
const REPORT_INTERVAL = 60 * 1000;
10+
11+
// if user renamed core file, then only check those with the
12+
// prefix
13+
function check(prefixList, item) {
14+
return prefixList.some((prefix) => {
15+
return item.startsWith(prefix) && !item.endsWith('.gz');
16+
});
17+
}
18+
19+
class ListCoreJob {
20+
/**
21+
* 在以下3种路径下查找core文件
22+
* 1. 用户指定路径 config.json里面 coredir:[dir1, dir2, ...]
23+
* 2. 用户在 /proc/sys/kernel/core_pattern 指定core生成格式
24+
* /proc/sys/kernel/core_pattern 可以设置格式化的 core 文件保存位置或文件名
25+
* echo “/opt/corefile/core-%e-%p-%t” > /proc/sys/kernel/core_pattern
26+
* 将会控制所产生的 core 文件会存放到 /corefile
27+
* 以下是参数列表 :
28+
* %p - insert pid into filename
29+
* %u - insert current uid into filename
30+
* %g - insert current gid into filename
31+
* %s - insert signal that caused the coredump into the filename
32+
* %t - insert UNIX time that the coredump occurred into filename
33+
* %h - insert hostname where the coredump happened into filename
34+
* %e - insert coredumping executable name into filename
35+
* 注: 只处理上面这种形式,对于第三方处理的情况不考虑
36+
* 3. 那么对于linux系统,在Node进程的pwd目录,对于Mac在/cores下查找
37+
*/
38+
constructor(config) {
39+
this.coreFileNamePrefix = ['core'];
40+
41+
if (config && config.coredir) {
42+
if (Array.isArray(config.coredir)) {
43+
this.coredir = config.coredir;
44+
} else if (typeof config.coredir === 'string') {
45+
this.coredir = [config.coredir];
46+
}
47+
} else {
48+
this.coredir = [];
49+
}
50+
51+
// for linux, check
52+
// core dir specified by /proc/sys/kernal/core_pattern e.g. '/tmp/core_%e.%p'
53+
if (os.platform() === 'linux') {
54+
if (!fs.existsSync('/proc/sys/kernel/core_pattern')) {
55+
return;
56+
}
57+
58+
const patt = fs.readFileSync('/proc/sys/kernel/core_pattern', 'utf8').trim().split(' ')[0];
59+
if (patt.indexOf('%') > 0) {
60+
// /tmp/core_%e.%p
61+
const coredir_ = path.parse(patt).dir;
62+
if (fs.existsSync(coredir_)) {
63+
try {
64+
fs.accessSync(coredir_, fs.R_OK);
65+
exports.coredir.push(coredir_);
66+
const prefix = path.parse(patt).name.split('%')[0];
67+
if (prefix !== this.coreFileNamePrefix[0]) {
68+
this.coreFileNamePrefix.push(prefix);
69+
}
70+
} catch (e) {
71+
console.log(coredir_ + ' is unaccessible: ' + e.message);
72+
}
73+
}
74+
}
75+
}
76+
}
77+
78+
async getNodePids() {
79+
const nodes = await exec('ps -e -o pid,args | grep -E "node " | grep -v grep');
80+
const pids = [];
81+
const processes = nodes.toString().trim().split('\n');
82+
for (let i = 0; i < processes.length; i++) {
83+
if (processes[i] && processes[i].split(' ')[0]) {
84+
pids.push(processes[i].split(' ')[0]);
85+
}
86+
}
87+
return pids;
88+
}
89+
90+
async getNodePWD(pid) {
91+
const path = `/proc/${pid}/environ`;
92+
try {
93+
await fsx.access(path, fs.constants.R_OK);
94+
} catch (err) {
95+
// 忽略该进程
96+
return null;
97+
}
98+
99+
const env = await fsx.readFile(path, 'utf8');
100+
const envs = env.toString().trim().split('\u0000');
101+
for (let i = 0; i < envs.length; i++) {
102+
if (envs[i].indexOf('PWD') === 0) {
103+
return envs[i].split('=')[1];
104+
}
105+
}
106+
107+
return null;
108+
}
109+
110+
async findCores(dirs) {
111+
const corelist = { ok: true, data: [] };
112+
const count = dirs.length;
113+
for (let i = 0; i < count; i++) {
114+
const dir = dirs[i];
115+
const list = await this.statDir(dir);
116+
corelist.data = [...corelist.data, ...list];
117+
}
118+
return corelist;
119+
}
120+
121+
async statDir(dir) {
122+
const results = [];
123+
const files = await fsx.readdir(dir);
124+
for (let i = 0; i < files.length; i++) {
125+
if (check(this.coreFileNamePrefix, files[i])) {
126+
const file = path.join(dir, files[i]);
127+
const stat = await fsx.stat(file);
128+
// bypass directory
129+
if (!stat.isFile()) {
130+
continue;
131+
}
132+
133+
// bypass core created before agentx startup
134+
if (stat.ctimeMs < Date.now() - REPORT_INTERVAL) {
135+
continue;
136+
}
137+
138+
results.push({
139+
path: file,
140+
size: stat.size,
141+
ctime: stat.ctime
142+
});
143+
}
144+
}
145+
}
146+
147+
async run() {
148+
// 非 linux,不处理,不上报
149+
if (os.platform() !== 'linux') {
150+
return null;
151+
}
152+
153+
// 查找当前运行中的 Node 进程 pid 列表
154+
const pids = await this.getNodePids();
155+
156+
const pwds = [];
157+
for (let i = 0; i < pids.length; i++) {
158+
// 根据进程 ID,获取进程的 PWD 目录
159+
const pwd = await this.getNodePWD(pids[i]);
160+
pwds.push(pwd);
161+
}
162+
163+
// 合并目录并去重
164+
const dirs = Array.from(new Set([...this.coredir, ...pwds]));
165+
if (dirs.length === 0) {
166+
return null;
167+
}
168+
169+
// 从目录中查找符合条件的 core 文件列表
170+
const cores = await this.findCores(dirs);
171+
const result = {
172+
type: 'coredump',
173+
metrics: cores
174+
};
175+
return result;
176+
}
177+
178+
static reportInterval = REPORT_INTERVAL; // 1 min
179+
}
180+
181+
module.exports = ListCoreJob;

0 commit comments

Comments
 (0)