|
| 1 | +'use strict'; |
| 2 | + |
| 3 | +const os = require('os'); |
| 4 | +const fs = require('fs'); |
| 5 | +const fsx = require('fs').promises; |
| 6 | +const path = require('path'); |
| 7 | +const exec = require('child_process').exec; |
| 8 | + |
| 9 | +const REPORT_INTERVAL = 60 * 1000; |
| 10 | + |
| 11 | +// if user renamed core file, then only check those with the |
| 12 | +// prefix |
| 13 | +function check(prefixList, item) { |
| 14 | + return prefixList.some((prefix) => { |
| 15 | + return item.startsWith(prefix) && !item.endsWith('.gz'); |
| 16 | + }); |
| 17 | +} |
| 18 | + |
| 19 | +class ListCoreJob { |
| 20 | + /** |
| 21 | + * 在以下3种路径下查找core文件 |
| 22 | + * 1. 用户指定路径 config.json里面 coredir:[dir1, dir2, ...] |
| 23 | + * 2. 用户在 /proc/sys/kernel/core_pattern 指定core生成格式 |
| 24 | + * /proc/sys/kernel/core_pattern 可以设置格式化的 core 文件保存位置或文件名 |
| 25 | + * echo “/opt/corefile/core-%e-%p-%t” > /proc/sys/kernel/core_pattern |
| 26 | + * 将会控制所产生的 core 文件会存放到 /corefile |
| 27 | + * 以下是参数列表 : |
| 28 | + * %p - insert pid into filename |
| 29 | + * %u - insert current uid into filename |
| 30 | + * %g - insert current gid into filename |
| 31 | + * %s - insert signal that caused the coredump into the filename |
| 32 | + * %t - insert UNIX time that the coredump occurred into filename |
| 33 | + * %h - insert hostname where the coredump happened into filename |
| 34 | + * %e - insert coredumping executable name into filename |
| 35 | + * 注: 只处理上面这种形式,对于第三方处理的情况不考虑 |
| 36 | + * 3. 那么对于linux系统,在Node进程的pwd目录,对于Mac在/cores下查找 |
| 37 | + */ |
| 38 | + constructor(config) { |
| 39 | + this.coreFileNamePrefix = ['core']; |
| 40 | + |
| 41 | + if (config && config.coredir) { |
| 42 | + if (Array.isArray(config.coredir)) { |
| 43 | + this.coredir = config.coredir; |
| 44 | + } else if (typeof config.coredir === 'string') { |
| 45 | + this.coredir = [config.coredir]; |
| 46 | + } |
| 47 | + } else { |
| 48 | + this.coredir = []; |
| 49 | + } |
| 50 | + |
| 51 | + // for linux, check |
| 52 | + // core dir specified by /proc/sys/kernal/core_pattern e.g. '/tmp/core_%e.%p' |
| 53 | + if (os.platform() === 'linux') { |
| 54 | + if (!fs.existsSync('/proc/sys/kernel/core_pattern')) { |
| 55 | + return; |
| 56 | + } |
| 57 | + |
| 58 | + const patt = fs.readFileSync('/proc/sys/kernel/core_pattern', 'utf8').trim().split(' ')[0]; |
| 59 | + if (patt.indexOf('%') > 0) { |
| 60 | + // /tmp/core_%e.%p |
| 61 | + const coredir_ = path.parse(patt).dir; |
| 62 | + if (fs.existsSync(coredir_)) { |
| 63 | + try { |
| 64 | + fs.accessSync(coredir_, fs.R_OK); |
| 65 | + exports.coredir.push(coredir_); |
| 66 | + const prefix = path.parse(patt).name.split('%')[0]; |
| 67 | + if (prefix !== this.coreFileNamePrefix[0]) { |
| 68 | + this.coreFileNamePrefix.push(prefix); |
| 69 | + } |
| 70 | + } catch (e) { |
| 71 | + console.log(coredir_ + ' is unaccessible: ' + e.message); |
| 72 | + } |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + } |
| 77 | + |
| 78 | + async getNodePids() { |
| 79 | + const nodes = await exec('ps -e -o pid,args | grep -E "node " | grep -v grep'); |
| 80 | + const pids = []; |
| 81 | + const processes = nodes.toString().trim().split('\n'); |
| 82 | + for (let i = 0; i < processes.length; i++) { |
| 83 | + if (processes[i] && processes[i].split(' ')[0]) { |
| 84 | + pids.push(processes[i].split(' ')[0]); |
| 85 | + } |
| 86 | + } |
| 87 | + return pids; |
| 88 | + } |
| 89 | + |
| 90 | + async getNodePWD(pid) { |
| 91 | + const path = `/proc/${pid}/environ`; |
| 92 | + try { |
| 93 | + await fsx.access(path, fs.constants.R_OK); |
| 94 | + } catch (err) { |
| 95 | + // 忽略该进程 |
| 96 | + return null; |
| 97 | + } |
| 98 | + |
| 99 | + const env = await fsx.readFile(path, 'utf8'); |
| 100 | + const envs = env.toString().trim().split('\u0000'); |
| 101 | + for (let i = 0; i < envs.length; i++) { |
| 102 | + if (envs[i].indexOf('PWD') === 0) { |
| 103 | + return envs[i].split('=')[1]; |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + return null; |
| 108 | + } |
| 109 | + |
| 110 | + async findCores(dirs) { |
| 111 | + const corelist = { ok: true, data: [] }; |
| 112 | + const count = dirs.length; |
| 113 | + for (let i = 0; i < count; i++) { |
| 114 | + const dir = dirs[i]; |
| 115 | + const list = await this.statDir(dir); |
| 116 | + corelist.data = [...corelist.data, ...list]; |
| 117 | + } |
| 118 | + return corelist; |
| 119 | + } |
| 120 | + |
| 121 | + async statDir(dir) { |
| 122 | + const results = []; |
| 123 | + const files = await fsx.readdir(dir); |
| 124 | + for (let i = 0; i < files.length; i++) { |
| 125 | + if (check(this.coreFileNamePrefix, files[i])) { |
| 126 | + const file = path.join(dir, files[i]); |
| 127 | + const stat = await fsx.stat(file); |
| 128 | + // bypass directory |
| 129 | + if (!stat.isFile()) { |
| 130 | + continue; |
| 131 | + } |
| 132 | + |
| 133 | + // bypass core created before agentx startup |
| 134 | + if (stat.ctimeMs < Date.now() - REPORT_INTERVAL) { |
| 135 | + continue; |
| 136 | + } |
| 137 | + |
| 138 | + results.push({ |
| 139 | + path: file, |
| 140 | + size: stat.size, |
| 141 | + ctime: stat.ctime |
| 142 | + }); |
| 143 | + } |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | + async run() { |
| 148 | + // 非 linux,不处理,不上报 |
| 149 | + if (os.platform() !== 'linux') { |
| 150 | + return null; |
| 151 | + } |
| 152 | + |
| 153 | + // 查找当前运行中的 Node 进程 pid 列表 |
| 154 | + const pids = await this.getNodePids(); |
| 155 | + |
| 156 | + const pwds = []; |
| 157 | + for (let i = 0; i < pids.length; i++) { |
| 158 | + // 根据进程 ID,获取进程的 PWD 目录 |
| 159 | + const pwd = await this.getNodePWD(pids[i]); |
| 160 | + pwds.push(pwd); |
| 161 | + } |
| 162 | + |
| 163 | + // 合并目录并去重 |
| 164 | + const dirs = Array.from(new Set([...this.coredir, ...pwds])); |
| 165 | + if (dirs.length === 0) { |
| 166 | + return null; |
| 167 | + } |
| 168 | + |
| 169 | + // 从目录中查找符合条件的 core 文件列表 |
| 170 | + const cores = await this.findCores(dirs); |
| 171 | + const result = { |
| 172 | + type: 'coredump', |
| 173 | + metrics: cores |
| 174 | + }; |
| 175 | + return result; |
| 176 | + } |
| 177 | + |
| 178 | + static reportInterval = REPORT_INTERVAL; // 1 min |
| 179 | +} |
| 180 | + |
| 181 | +module.exports = ListCoreJob; |
0 commit comments