From 685dae7ef0f1e5a32a1244a00345367ea754897a Mon Sep 17 00:00:00 2001 From: rlaope Date: Sun, 16 Nov 2025 18:38:19 +0900 Subject: [PATCH 1/3] another metrics --- README.md | 30 ++++++++++- docs/jvm/metrics.md | 10 ++++ docs/kafka/metrics.md | 11 ++++ docs/linux/metrics.md | 14 ++++++ jvm/collector.py | 33 ++++++++++++ kafka/collector.py | 36 +++++++++++++ linux/collector.py | 114 ++++++++++++++++++++++++++++++++++++++++++ monitor.py | 30 ++++++++--- 8 files changed, 269 insertions(+), 9 deletions(-) create mode 100644 docs/jvm/metrics.md create mode 100644 docs/kafka/metrics.md create mode 100644 docs/linux/metrics.md create mode 100644 jvm/collector.py create mode 100644 kafka/collector.py create mode 100644 linux/collector.py diff --git a/README.md b/README.md index fa2af9b..6f6e03b 100644 --- a/README.md +++ b/README.md @@ -1 +1,29 @@ -# Systools +# systools + +## Install + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +## Run + +```bash +# redis +python monitor.py --target redis --redis-url redis://localhost:6379/0 --interval 5 --output pretty + +# linux +python monitor.py --target linux --interval 5 --output json +``` + +- `--target`: redis | linux | kafka | jvm +- `--redis-url`: Redis 연결 URL (예: `redis://:password@host:6379/0`) +- `--interval`: 수집 주기(초), 0이면 1회만 수집 +- `--output`: pretty | json + +use config: +```bash +python monitor.py --target redis --config config.yaml +``` diff --git a/docs/jvm/metrics.md b/docs/jvm/metrics.md new file mode 100644 index 0000000..74c4b5d --- /dev/null +++ b/docs/jvm/metrics.md @@ -0,0 +1,10 @@ +# JVM 모니터링 지표(스켈레톤) + +현재는 구조만 제공되며, 실제 수집은 JMX 연동(jolokia/pyjmx 등)을 통해 추가 예정입니다. + +- memory: heap_used_bytes, heap_max_bytes, non_heap_used_bytes +- gc: gc_count, gc_time_ms +- threads: thread_count, daemon_thread_count + +요청 시 JMX 접근 방식(보안/SSL/자격증명 포함)을 정의하고 구현합니다. + diff --git a/docs/kafka/metrics.md b/docs/kafka/metrics.md new file mode 100644 index 0000000..9d103b4 --- /dev/null +++ b/docs/kafka/metrics.md @@ -0,0 +1,11 @@ +# Kafka 모니터링 지표(스켈레톤) + +현재는 구조만 제공됩니다. 실제 연결/수집은 향후 `kafka-python` 또는 관리 API/JMX 연동으로 추가됩니다. + +- broker: cluster_id, num_brokers, controller_id +- topics: num_topics, num_partitions +- throughput: bytes_in_per_sec, bytes_out_per_sec +- lag: consumer_lag_total + +요청 시 Brokers/JMX 연결 방식과 인증 옵션을 설계하여 구현을 진행합니다. + diff --git a/docs/linux/metrics.md b/docs/linux/metrics.md new file mode 100644 index 0000000..a4fcd3e --- /dev/null +++ b/docs/linux/metrics.md @@ -0,0 +1,14 @@ +# Linux 모니터링 지표(초안) + +본 모듈은 리눅스 호스트의 핵심 시스템 지표를 경량 수집합니다. + +- system: uptime_seconds, cpu_count, loadavg_1/5/15 +- memory: MemTotal/Available(kB), SwapTotal/Free(kB) — `/proc/meminfo` +- disk: 루트(`/`) 디스크 total/used/free(bytes) +- network: 총 수신/송신 바이트 — `/proc/net/dev` 합계 + +주의 +- 리눅스 `/proc` 의존. 일부 환경(컨테이너/특수 커널)에서 값이 제한될 수 있음. +- 상세 지표 확대(퍼센트, per-interface, per-mount)는 차기 버전 예정. + + diff --git a/jvm/collector.py b/jvm/collector.py new file mode 100644 index 0000000..2c29156 --- /dev/null +++ b/jvm/collector.py @@ -0,0 +1,33 @@ +import time +from typing import Any, Dict + + +class JvmMetricsCollector: + def __init__(self, jmx_url: str | None = None): + # 실제 구현은 JMX 접속 라이브러리 필요(jolokia/pyjmx 등) + self.jmx_url = jmx_url + + def collect_all(self) -> Dict[str, Dict[str, Any]]: + # 스켈레톤: 구조만 반환 + return { + "memory": { + "heap_used_bytes": None, + "heap_max_bytes": None, + "non_heap_used_bytes": None, + }, + "gc": { + "gc_count": None, + "gc_time_ms": None, + }, + "threads": { + "thread_count": None, + "daemon_thread_count": None, + }, + "meta": { + "timestamp": int(time.time()), + "jmx_url": self.jmx_url, + "not_implemented": True, + }, + } + + diff --git a/kafka/collector.py b/kafka/collector.py new file mode 100644 index 0000000..d8b4b05 --- /dev/null +++ b/kafka/collector.py @@ -0,0 +1,36 @@ +import time +from typing import Any, Dict + + +class KafkaMetricsCollector: + def __init__(self, bootstrap_servers: str | None = None): + # 실제 구현은 kafka-python/관리 API/JMX 등이 필요 + self.bootstrap_servers = bootstrap_servers + + def collect_all(self) -> Dict[str, Dict[str, Any]]: + # 스켈레톤: 실제 브로커 연결 없이 구조만 반환 + return { + "broker": { + "cluster_id": None, + "num_brokers": None, + "controller_id": None, + }, + "topics": { + "num_topics": None, + "num_partitions": None, + }, + "throughput": { + "bytes_in_per_sec": None, + "bytes_out_per_sec": None, + }, + "lag": { + "consumer_lag_total": None, + }, + "meta": { + "timestamp": int(time.time()), + "bootstrap_servers": self.bootstrap_servers, + "not_implemented": True, + }, + } + + diff --git a/linux/collector.py b/linux/collector.py new file mode 100644 index 0000000..26f0335 --- /dev/null +++ b/linux/collector.py @@ -0,0 +1,114 @@ +import os +import shutil +from typing import Dict, Any, Tuple +import time + + +class LinuxMetricsCollector: + def __init__(self): + if os.name != "posix": + # 리눅스 전용(일부 macOS에서도 동작하지만 /proc 의존 기능은 제한) + pass + + def _read_proc_meminfo(self) -> Dict[str, int]: + result: Dict[str, int] = {} + try: + with open("/proc/meminfo", "r", encoding="utf-8") as f: + for line in f: + parts = line.split(":") + if len(parts) < 2: + continue + key = parts[0].strip() + value_part = parts[1].strip().split()[0] + try: + result[key] = int(value_part) # kB 단위 + except Exception: + continue + except Exception: + pass + return result + + def _read_uptime(self) -> float: + try: + with open("/proc/uptime", "r", encoding="utf-8") as f: + return float(f.read().split()[0]) + except Exception: + return 0.0 + + def _read_net_dev_bytes(self) -> Tuple[int, int]: + rx = 0 + tx = 0 + try: + with open("/proc/net/dev", "r", encoding="utf-8") as f: + for line in f: + if ":" not in line: + continue + iface, data = line.split(":", 1) + fields = data.split() + if len(fields) >= 16: + rx += int(fields[0]) + tx += int(fields[8]) + except Exception: + pass + return rx, tx + + def collect_all(self) -> Dict[str, Dict[str, Any]]: + # 로드 평균 + try: + load1, load5, load15 = os.getloadavg() + except Exception: + load1 = load5 = load15 = None + + # 메모리 + meminfo = self._read_proc_meminfo() + mem_total_kb = meminfo.get("MemTotal") + mem_available_kb = meminfo.get("MemAvailable") + swap_total_kb = meminfo.get("SwapTotal") + swap_free_kb = meminfo.get("SwapFree") + + # 디스크(root) + try: + du = shutil.disk_usage("/") + disk_total = du.total + disk_used = du.used + disk_free = du.free + except Exception: + disk_total = disk_used = disk_free = None + + # 네트워크 총계 + rx_bytes, tx_bytes = self._read_net_dev_bytes() + + # 기타 + uptime_seconds = self._read_uptime() + cpu_count = os.cpu_count() + + return { + "system": { + "uptime_seconds": uptime_seconds, + "cpu_count": cpu_count, + "loadavg_1": load1, + "loadavg_5": load5, + "loadavg_15": load15, + }, + "memory": { + "mem_total_kb": mem_total_kb, + "mem_available_kb": mem_available_kb, + "swap_total_kb": swap_total_kb, + "swap_free_kb": swap_free_kb, + }, + "disk": { + "root_total_bytes": disk_total, + "root_used_bytes": disk_used, + "root_free_bytes": disk_free, + }, + "network": { + "rx_bytes_total": rx_bytes, + "tx_bytes_total": tx_bytes, + }, + "meta": { + "timestamp": int(time.time()), + "node": os.uname().nodename if hasattr(os, "uname") else None, + }, + } + + diff --git a/monitor.py b/monitor.py index 8251fec..f558b69 100644 --- a/monitor.py +++ b/monitor.py @@ -8,6 +8,9 @@ from tabulate import tabulate from redis.collector import RedisMetricsCollector +from linux.collector import LinuxMetricsCollector +from kafka.collector import KafkaMetricsCollector +from jvm.collector import JvmMetricsCollector def load_config(args) -> dict: @@ -57,21 +60,32 @@ def sec(title: str, items: dict): def main(): - parser = argparse.ArgumentParser(description="Redis Monitoring (핵심 33선)") + parser = argparse.ArgumentParser(description="System Monitoring CLI") parser.add_argument("--config", type=str, help="설정 파일 경로 (YAML)") + parser.add_argument("--target", type=str, choices=["redis", "linux", "kafka", "jvm"], default="redis", help="모니터링 대상") parser.add_argument("--redis-url", type=str, help="Redis URL, 예: redis://localhost:6379/0") parser.add_argument("--interval", type=int, help="수집 주기(초). 0이면 1회 수집") parser.add_argument("--output", type=str, choices=["pretty", "json"], help="출력 형식") - parser.add_argument("--ping-samples", type=int, help="핑 지연 샘플 수") - parser.add_argument("--ping-timeout-ms", type=int, help="핑 타임아웃(ms)") + parser.add_argument("--ping-samples", type=int, help="핑 지연 샘플 수 (redis)") + parser.add_argument("--ping-timeout-ms", type=int, help="핑 타임아웃(ms) (redis)") args = parser.parse_args() config = load_config(args) - collector = RedisMetricsCollector( - redis_url=config["redis_url"], - ping_samples=config["ping_samples"], - ping_timeout_ms=config["ping_timeout_ms"], - ) + target = args.target or "redis" + if target == "redis": + collector = RedisMetricsCollector( + redis_url=config["redis_url"], + ping_samples=config["ping_samples"], + ping_timeout_ms=config["ping_timeout_ms"], + ) + elif target == "linux": + collector = LinuxMetricsCollector() + elif target == "kafka": + collector = KafkaMetricsCollector() + elif target == "jvm": + collector = JvmMetricsCollector() + else: + raise ValueError(f"unknown target: {target}") interval = int(config["interval"]) while True: From b0701660202a0a25d7d28a44d66b9200192b2179 Mon Sep 17 00:00:00 2001 From: rlaope Date: Sun, 16 Nov 2025 18:43:47 +0900 Subject: [PATCH 2/3] linux --- docker-compose.yml | 2 +- docs/jvm/.gitkeep | 2 - docs/jvm/metrics.md | 52 +++++++++++++-- docs/kafka/.gitkeep | 2 - docs/kafka/metrics.md | 33 ++++++++-- docs/linux/.gitkeep | 2 - docs/linux/metrics.md | 37 ++++++++--- docs/redis/.gitkeep | 2 - jvm/.gitkeep | 2 - jvm/collector.py | 59 ++++++++++++----- kafka/.gitkeep | 2 - kafka/collector.py | 114 +++++++++++++++++++++++++++----- linux/.gitkeep | 2 - linux/collector.py | 150 ++++++++++++++++++++++++++++++++++++++---- monitor.py | 7 +- requirements.txt | 1 + 16 files changed, 389 insertions(+), 80 deletions(-) delete mode 100644 docs/jvm/.gitkeep delete mode 100644 docs/kafka/.gitkeep delete mode 100644 docs/linux/.gitkeep delete mode 100644 docs/redis/.gitkeep delete mode 100644 jvm/.gitkeep delete mode 100644 kafka/.gitkeep delete mode 100644 linux/.gitkeep diff --git a/docker-compose.yml b/docker-compose.yml index a0295f3..bfde990 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: "3.8" +gversion: "3.8" services: redis: diff --git a/docs/jvm/.gitkeep b/docs/jvm/.gitkeep deleted file mode 100644 index 139597f..0000000 --- a/docs/jvm/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/docs/jvm/metrics.md b/docs/jvm/metrics.md index 74c4b5d..79dae7c 100644 --- a/docs/jvm/metrics.md +++ b/docs/jvm/metrics.md @@ -1,10 +1,50 @@ -# JVM 모니터링 지표(스켈레톤) +# JVM 모니터링 지표(24선, 구조 확정) -현재는 구조만 제공되며, 실제 수집은 JMX 연동(jolokia/pyjmx 등)을 통해 추가 예정입니다. +현재 문서는 JVM 메트릭 24개 항목의 구조/의미를 정의합니다. 실제 값은 추후 JMX 연동(jolokia/pyjmx 등)으로 채웁니다. -- memory: heap_used_bytes, heap_max_bytes, non_heap_used_bytes -- gc: gc_count, gc_time_ms -- threads: thread_count, daemon_thread_count +## Memory +1) heap_used_bytes +2) heap_committed_bytes +3) heap_max_bytes +4) non_heap_used_bytes +5) non_heap_committed_bytes +6) metaspace_used_bytes +7) metaspace_committed_bytes +- 의미: JVM 메모리 사용/커밋/최대, 메타스페이스 사용량 -요청 시 JMX 접근 방식(보안/SSL/자격증명 포함)을 정의하고 구현합니다. +## GC +8) young_gc_count +9) young_gc_time_ms +10) old_gc_count +11) old_gc_time_ms +- 의미: Young/Old(G1/Parallel/CMS 등) 컬렉션 횟수/시간 + +## Threads +12) thread_count +13) daemon_thread_count +14) peak_thread_count +15) deadlocked_thread_count +- 의미: 스레드 수/피크/교착상태 스레드 수 + +## ClassLoading +16) loaded_class_count +17) total_loaded_class_count +18) unloaded_class_count +- 의미: 클래스 로딩/언로딩 통계 + +## CPU +19) process_cpu_load +20) system_cpu_load +21) process_cpu_time_ns +- 의미: 프로세스/시스템 CPU 부하 및 프로세스 CPU 누적 시간 + +## Runtime +22) uptime_ms +23) compiler_total_time_ms +24) safepoint_count +- 의미: JVM 업타임, JIT 컴파일 누적, 세이프포인트 진입 횟수 + +주의 +- HotSpot 기반 MXBean/JFR/Jolokia에서 제공되는 표준/벤더 지표를 우선 사용합니다. +- 실제 수집은 JMX URL, 인증, SSL 등 연결 설정이 필요합니다. diff --git a/docs/kafka/.gitkeep b/docs/kafka/.gitkeep deleted file mode 100644 index 139597f..0000000 --- a/docs/kafka/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/docs/kafka/metrics.md b/docs/kafka/metrics.md index 9d103b4..f77adf6 100644 --- a/docs/kafka/metrics.md +++ b/docs/kafka/metrics.md @@ -1,11 +1,30 @@ -# Kafka 모니터링 지표(스켈레톤) +# Kafka 모니터링 지표(초안) -현재는 구조만 제공됩니다. 실제 연결/수집은 향후 `kafka-python` 또는 관리 API/JMX 연동으로 추가됩니다. +본 모듈은 `kafka-python`을 이용해 경량 메트릭을 수집합니다. -- broker: cluster_id, num_brokers, controller_id -- topics: num_topics, num_partitions -- throughput: bytes_in_per_sec, bytes_out_per_sec -- lag: consumer_lag_total +## 수집 항목 +- broker + - num_brokers: 메타데이터 상 브로커 수(추정) +- topics + - num_topics: 토픽 수 + - num_partitions: 전체 파티션 수 합 +- lag (옵션) + - consumer_group_id: CLI로 전달된 그룹 ID + - consumer_lag_total: 각 파티션의 end_offset - committed_offset 합(음수는 0 처리) +- meta + - timestamp, bootstrap_servers -요청 시 Brokers/JMX 연결 방식과 인증 옵션을 설계하여 구현을 진행합니다. +주의 +- 그룹 랙 계산은 `--kafka-group` 제공 시에만 작동합니다. +- 브로커 수/메타데이터는 클라이언트 내부 메타데이터 기반이므로 일시적으로 부정확할 수 있습니다. +- Throughput(초당 in/out 바이트)은 브로커 JMX/관리 API 연동 시 확장 예정입니다. + +## 실행 예시 +```bash +# 토픽/파티션/브로커 수만 +python monitor.py --target kafka --kafka-bootstrap localhost:9092 --output json + +# 그룹 랙 포함 +python monitor.py --target kafka --kafka-bootstrap localhost:9092 --kafka-group my-consumer --output json +``` diff --git a/docs/linux/.gitkeep b/docs/linux/.gitkeep deleted file mode 100644 index 139597f..0000000 --- a/docs/linux/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/docs/linux/metrics.md b/docs/linux/metrics.md index a4fcd3e..8a18708 100644 --- a/docs/linux/metrics.md +++ b/docs/linux/metrics.md @@ -1,14 +1,35 @@ -# Linux 모니터링 지표(초안) +# Linux 모니터링 지표 -본 모듈은 리눅스 호스트의 핵심 시스템 지표를 경량 수집합니다. +리눅스 호스트의 핵심 시스템 지표를 `/proc` 기반으로 경량 수집합니다. -- system: uptime_seconds, cpu_count, loadavg_1/5/15 -- memory: MemTotal/Available(kB), SwapTotal/Free(kB) — `/proc/meminfo` -- disk: 루트(`/`) 디스크 total/used/free(bytes) -- network: 총 수신/송신 바이트 — `/proc/net/dev` 합계 +## System +- uptime_seconds: 호스트 업타임(초) — `/proc/uptime` +- cpu_count: 논리 코어 수 +- loadavg_1/5/15: 시스템 로드 평균 — `os.getloadavg()` +- cpu_usage_percent: 짧은 샘플(기본 200ms)로 계산한 CPU 사용률 — `/proc/stat` +- process_count: 현재 프로세스 개수 — `/proc` 디렉터리 내 PID 개수 + +## Memory +- mem_total_kb, mem_available_kb — `/proc/meminfo` +- mem_used_kb, mem_used_percent +- swap_total_kb, swap_free_kb, swap_used_kb, swap_used_percent + +## Disk +- 마운트별 사용량(물리 FS 위주: ext2/3/4, xfs, btrfs, zfs) + - {mount}.total_bytes, used_bytes, free_bytes, used_percent — `shutil.disk_usage` + +## Network +- rx_bytes_total, tx_bytes_total — `/proc/net/dev` 합계 +- interfaces: 인터페이스별 {rx_bytes, rx_packets, tx_bytes, tx_packets} + +## File system +- fd_allocated, fd_max — `/proc/sys/fs/file-nr` + +## Meta +- timestamp, node(hostname), kernel(uname 전체 문자열) 주의 -- 리눅스 `/proc` 의존. 일부 환경(컨테이너/특수 커널)에서 값이 제한될 수 있음. -- 상세 지표 확대(퍼센트, per-interface, per-mount)는 차기 버전 예정. +- `/proc` 의존으로 일부 컨테이너/보안 환경에서 접근이 제한될 수 있습니다. +- CPU%는 짧은 샘플링 기반으로 약간의 지연(기본 ~200ms)을 유발합니다. diff --git a/docs/redis/.gitkeep b/docs/redis/.gitkeep deleted file mode 100644 index 139597f..0000000 --- a/docs/redis/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/jvm/.gitkeep b/jvm/.gitkeep deleted file mode 100644 index 139597f..0000000 --- a/jvm/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/jvm/collector.py b/jvm/collector.py index 2c29156..abfb1cf 100644 --- a/jvm/collector.py +++ b/jvm/collector.py @@ -4,25 +4,54 @@ class JvmMetricsCollector: def __init__(self, jmx_url: str | None = None): - # 실제 구현은 JMX 접속 라이브러리 필요(jolokia/pyjmx 등) + # 실제 구현은 JMX 연동(jolokia/pyjmx 등)을 통해 값 채움 self.jmx_url = jmx_url def collect_all(self) -> Dict[str, Dict[str, Any]]: - # 스켈레톤: 구조만 반환 + # 24개 대표 메트릭 필드(기본 None) - 구조 확정 + memory = { + "heap_used_bytes": None, # 1 + "heap_committed_bytes": None, # 2 + "heap_max_bytes": None, # 3 + "non_heap_used_bytes": None, # 4 + "non_heap_committed_bytes": None, # 5 + "metaspace_used_bytes": None, # 6 + "metaspace_committed_bytes": None, # 7 + } + gc = { + "young_gc_count": None, # 8 + "young_gc_time_ms": None, # 9 + "old_gc_count": None, # 10 + "old_gc_time_ms": None, # 11 + } + threads = { + "thread_count": None, # 12 + "daemon_thread_count": None, # 13 + "peak_thread_count": None, # 14 + "deadlocked_thread_count": None, # 15 + } + classloading = { + "loaded_class_count": None, # 16 + "total_loaded_class_count": None, # 17 + "unloaded_class_count": None, # 18 + } + cpu = { + "process_cpu_load": None, # 19 + "system_cpu_load": None, # 20 + "process_cpu_time_ns": None, # 21 + } + runtime = { + "uptime_ms": None, # 22 + "compiler_total_time_ms": None, # 23 + "safepoint_count": None, # 24 + } return { - "memory": { - "heap_used_bytes": None, - "heap_max_bytes": None, - "non_heap_used_bytes": None, - }, - "gc": { - "gc_count": None, - "gc_time_ms": None, - }, - "threads": { - "thread_count": None, - "daemon_thread_count": None, - }, + "memory": memory, + "gc": gc, + "threads": threads, + "classloading": classloading, + "cpu": cpu, + "runtime": runtime, "meta": { "timestamp": int(time.time()), "jmx_url": self.jmx_url, diff --git a/kafka/.gitkeep b/kafka/.gitkeep deleted file mode 100644 index 139597f..0000000 --- a/kafka/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/kafka/collector.py b/kafka/collector.py index d8b4b05..8a38588 100644 --- a/kafka/collector.py +++ b/kafka/collector.py @@ -1,35 +1,115 @@ import time -from typing import Any, Dict +from typing import Any, Dict, List + +try: + from kafka import KafkaConsumer, TopicPartition + from kafka.errors import KafkaError +except Exception: # pragma: no cover + KafkaConsumer = None + TopicPartition = None + KafkaError = Exception class KafkaMetricsCollector: - def __init__(self, bootstrap_servers: str | None = None): - # 실제 구현은 kafka-python/관리 API/JMX 등이 필요 + def __init__(self, bootstrap_servers: str | None = None, group_id: str | None = None, timeout_ms: int = 3000): self.bootstrap_servers = bootstrap_servers + self.group_id = group_id + self.timeout_ms = timeout_ms + + def _build_consumer(self) -> KafkaConsumer | None: + if KafkaConsumer is None or not self.bootstrap_servers: + return None + try: + consumer = KafkaConsumer( + bootstrap_servers=self.bootstrap_servers, + group_id=self.group_id if self.group_id else None, + client_id="systools-kafka", + enable_auto_commit=False, + consumer_timeout_ms=self.timeout_ms, + request_timeout_ms=max(self.timeout_ms, 5000), + metadata_max_age_ms=5000, + api_version_auto_timeout_ms=3000, + ) + return consumer + except Exception: + return None + + def _compute_topics_partitions(self, consumer: KafkaConsumer) -> Dict[str, int]: + num_topics = 0 + num_partitions = 0 + try: + topics = consumer.topics() + num_topics = len(topics or []) + for t in topics or []: + parts = consumer.partitions_for_topic(t) + if parts: + num_partitions += len(parts) + except Exception: + pass + return {"num_topics": num_topics, "num_partitions": num_partitions} + + def _num_brokers(self, consumer: KafkaConsumer) -> int | None: + try: + cluster = consumer._client.cluster # 내부 속성 사용(없으면 None) + if cluster: + return len(cluster.brokers()) + except Exception: + return None + return None + + def _group_lag(self, consumer: KafkaConsumer) -> int | None: + if not self.group_id: + return None + try: + topics = list(consumer.topics() or []) + tps: List[TopicPartition] = [] + for t in topics: + parts = consumer.partitions_for_topic(t) or [] + for p in parts: + tps.append(TopicPartition(t, p)) + if not tps: + return 0 + end_offsets = consumer.end_offsets(tps) + total_lag = 0 + for tp in tps: + committed = consumer.committed(tp) + end = end_offsets.get(tp, None) + if committed is None or end is None: + continue + lag = max(end - committed, 0) + total_lag += lag + return total_lag + except Exception: + return None def collect_all(self) -> Dict[str, Dict[str, Any]]: - # 스켈레톤: 실제 브로커 연결 없이 구조만 반환 + consumer = self._build_consumer() + + num_brokers = None + topics_info = {"num_topics": None, "num_partitions": None} + group_lag_total = None + + if consumer: + num_brokers = self._num_brokers(consumer) + topics_info = self._compute_topics_partitions(consumer) + group_lag_total = self._group_lag(consumer) + try: + consumer.close() + except Exception: + pass + return { "broker": { - "cluster_id": None, - "num_brokers": None, - "controller_id": None, - }, - "topics": { - "num_topics": None, - "num_partitions": None, - }, - "throughput": { - "bytes_in_per_sec": None, - "bytes_out_per_sec": None, + "num_brokers": num_brokers, }, + "topics": topics_info, "lag": { - "consumer_lag_total": None, + "consumer_group_id": self.group_id, + "consumer_lag_total": group_lag_total, }, "meta": { "timestamp": int(time.time()), "bootstrap_servers": self.bootstrap_servers, - "not_implemented": True, }, } diff --git a/linux/.gitkeep b/linux/.gitkeep deleted file mode 100644 index 139597f..0000000 --- a/linux/.gitkeep +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/linux/collector.py b/linux/collector.py index 26f0335..c0b17d9 100644 --- a/linux/collector.py +++ b/linux/collector.py @@ -1,6 +1,6 @@ import os import shutil -from typing import Dict, Any, Tuple +from typing import Dict, Any, Tuple, List import time @@ -35,6 +35,34 @@ def _read_uptime(self) -> float: except Exception: return 0.0 + def _read_proc_stat_cpu(self) -> Tuple[int, int]: + # returns (idle, total) jiffies + try: + with open("/proc/stat", "r", encoding="utf-8") as f: + line = f.readline() + if not line.startswith("cpu "): + return 0, 0 + parts = line.strip().split() + values = list(map(int, parts[1:])) # user nice system idle iowait irq softirq steal guest guest_nice + idle = values[3] + (values[4] if len(values) > 4 else 0) + total = sum(values) + return idle, total + except Exception: + return 0, 0 + + def _cpu_usage_percent(self, sample_ms: int = 200) -> float | None: + idle1, total1 = self._read_proc_stat_cpu() + if total1 == 0: + return None + time.sleep(max(sample_ms, 1) / 1000.0) + idle2, total2 = self._read_proc_stat_cpu() + delta_total = total2 - total1 + delta_idle = idle2 - idle1 + if delta_total <= 0: + return None + usage = 100.0 * (delta_total - delta_idle) / delta_total + return round(usage, 2) + def _read_net_dev_bytes(self) -> Tuple[int, int]: rx = 0 tx = 0 @@ -52,6 +80,54 @@ def _read_net_dev_bytes(self) -> Tuple[int, int]: pass return rx, tx + def _read_net_dev_per_iface(self) -> Dict[str, Dict[str, int]]: + stats: Dict[str, Dict[str, int]] = {} + try: + with open("/proc/net/dev", "r", encoding="utf-8") as f: + for line in f: + if ":" not in line: + continue + iface, data = line.split(":", 1) + iface = iface.strip() + fields = data.split() + if len(fields) >= 16: + stats[iface] = { + "rx_bytes": int(fields[0]), + "rx_packets": int(fields[1]), + "tx_bytes": int(fields[8]), + "tx_packets": int(fields[9]), + } + except Exception: + pass + return stats + + def _list_mountpoints(self) -> List[Tuple[str, str]]: + mounts: List[Tuple[str, str]] = [] + try: + with open("/proc/mounts", "r", encoding="utf-8") as f: + for line in f: + parts = line.split() + if len(parts) < 3: + continue + device, mnt, fstype = parts[0], parts[1], parts[2] + # 물리 볼륨 위주 필터 + if fstype in ("ext2", "ext3", "ext4", "xfs", "btrfs", "zfs"): + mounts.append((mnt, fstype)) + except Exception: + pass + # 루트는 항상 포함 + if ("/", "unknown") not in mounts and not any(m == "/" for m, _ in mounts): + mounts.append(("/", "unknown")) + # 중복 제거 + seen = set() + unique: List[Tuple[str, str]] = [] + for m, t in mounts: + if m in seen: + continue + seen.add(m) + unique.append((m, t)) + return unique + def collect_all(self) -> Dict[str, Dict[str, Any]]: # 로드 평균 try: @@ -65,22 +141,66 @@ def collect_all(self) -> Dict[str, Dict[str, Any]]: mem_available_kb = meminfo.get("MemAvailable") swap_total_kb = meminfo.get("SwapTotal") swap_free_kb = meminfo.get("SwapFree") + mem_used_kb = None + mem_used_percent = None + swap_used_kb = None + swap_used_percent = None + try: + if mem_total_kb is not None and mem_available_kb is not None: + mem_used_kb = max(mem_total_kb - mem_available_kb, 0) + mem_used_percent = round(100.0 * mem_used_kb / mem_total_kb, 2) if mem_total_kb > 0 else None + if swap_total_kb is not None and swap_free_kb is not None: + swap_used_kb = max(swap_total_kb - swap_free_kb, 0) + swap_used_percent = round(100.0 * swap_used_kb / swap_total_kb, 2) if swap_total_kb > 0 else None + except Exception: + pass # 디스크(root) + per_mount: Dict[str, Dict[str, int | float | None]] = {} try: - du = shutil.disk_usage("/") - disk_total = du.total - disk_used = du.used - disk_free = du.free + for mount, fstype in self._list_mountpoints(): + try: + du = shutil.disk_usage(mount) + used_percent = round(100.0 * du.used / du.total, 2) if du.total > 0 else None + per_mount[mount] = { + "fstype": fstype, + "total_bytes": du.total, + "used_bytes": du.used, + "free_bytes": du.free, + "used_percent": used_percent, + } + except Exception: + continue except Exception: - disk_total = disk_used = disk_free = None + pass # 네트워크 총계 rx_bytes, tx_bytes = self._read_net_dev_bytes() + per_iface = self._read_net_dev_per_iface() # 기타 uptime_seconds = self._read_uptime() cpu_count = os.cpu_count() + cpu_usage_percent = self._cpu_usage_percent() + + # 프로세스 수 + process_count = None + try: + process_count = sum(1 for name in os.listdir("/proc") if name.isdigit()) + except Exception: + pass + + # 파일 디스크립터 + fd_allocated = fd_max = None + try: + with open("/proc/sys/fs/file-nr", "r", encoding="utf-8") as f: + parts = f.read().split() + # file-nr: allocated unused max + if len(parts) >= 3: + fd_allocated = int(parts[0]) + fd_max = int(parts[2]) + except Exception: + pass return { "system": { @@ -89,25 +209,33 @@ def collect_all(self) -> Dict[str, Dict[str, Any]]: "loadavg_1": load1, "loadavg_5": load5, "loadavg_15": load15, + "cpu_usage_percent": cpu_usage_percent, + "process_count": process_count, }, "memory": { "mem_total_kb": mem_total_kb, "mem_available_kb": mem_available_kb, + "mem_used_kb": mem_used_kb, + "mem_used_percent": mem_used_percent, "swap_total_kb": swap_total_kb, "swap_free_kb": swap_free_kb, + "swap_used_kb": swap_used_kb, + "swap_used_percent": swap_used_percent, }, - "disk": { - "root_total_bytes": disk_total, - "root_used_bytes": disk_used, - "root_free_bytes": disk_free, - }, + "disk": per_mount, "network": { "rx_bytes_total": rx_bytes, "tx_bytes_total": tx_bytes, + "interfaces": per_iface, + }, + "fs": { + "fd_allocated": fd_allocated, + "fd_max": fd_max, }, "meta": { "timestamp": int(time.time()), "node": os.uname().nodename if hasattr(os, "uname") else None, + "kernel": " ".join(os.uname()) if hasattr(os, "uname") else None, }, } diff --git a/monitor.py b/monitor.py index f558b69..ad606ad 100644 --- a/monitor.py +++ b/monitor.py @@ -64,6 +64,8 @@ def main(): parser.add_argument("--config", type=str, help="설정 파일 경로 (YAML)") parser.add_argument("--target", type=str, choices=["redis", "linux", "kafka", "jvm"], default="redis", help="모니터링 대상") parser.add_argument("--redis-url", type=str, help="Redis URL, 예: redis://localhost:6379/0") + parser.add_argument("--kafka-bootstrap", type=str, help="Kafka bootstrap servers, 예: localhost:9092") + parser.add_argument("--kafka-group", type=str, help="Kafka consumer group(선택, 제공 시 그룹 랙 추정)") parser.add_argument("--interval", type=int, help="수집 주기(초). 0이면 1회 수집") parser.add_argument("--output", type=str, choices=["pretty", "json"], help="출력 형식") parser.add_argument("--ping-samples", type=int, help="핑 지연 샘플 수 (redis)") @@ -81,7 +83,10 @@ def main(): elif target == "linux": collector = LinuxMetricsCollector() elif target == "kafka": - collector = KafkaMetricsCollector() + collector = KafkaMetricsCollector( + bootstrap_servers=args.kafka_bootstrap, + group_id=args.kafka_group, + ) elif target == "jvm": collector = JvmMetricsCollector() else: diff --git a/requirements.txt b/requirements.txt index c7b69a2..6cff739 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ redis==5.0.1 PyYAML==6.0.2 tabulate==0.9.0 +kafka-python==2.0.2 From c73e8a7a6c803224709758f9566c463c7371b7e4 Mon Sep 17 00:00:00 2001 From: rlaope Date: Sun, 16 Nov 2025 18:47:48 +0900 Subject: [PATCH 3/3] attribute --- .gitattributes | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4a49fed --- /dev/null +++ b/.gitattributes @@ -0,0 +1,22 @@ +# 기본: 모든 텍스트 파일은 LF 정규화 +* text=auto eol=lf + +# 예외: HTML/CSS/JS는 라인엔드 변환 제외 +*.html -text +*.css -text +*.js -text + +# 공통 바이너리 +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.webp binary +*.ico binary +*.pdf binary +*.zip binary +*.tar binary +*.gz binary +*.bz2 binary +*.xz binary +