From 78175de6650b186c512a297c751cb5670a72ef9b Mon Sep 17 00:00:00 2001 From: Tanel Andreson Date: Thu, 12 Mar 2026 17:34:01 +0200 Subject: [PATCH] compression for tarball, claude.md, sleep faster during vm startup --- CLAUDE.md | 174 ++++++++++++++++++++++++++++++++++++++++++++ vmtool/aws.py | 22 ++++-- vmtool/tarball.py | 26 ++++++- vmtool/tarfilter.py | 4 +- 4 files changed, 217 insertions(+), 9 deletions(-) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..a323008 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,174 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +vmtool is an orchestration tool for managing stateful cloud VMs on AWS. It's designed for database and other stateful workloads, providing comprehensive VM lifecycle management, backup handling, failover capabilities, and cost tracking. + +## Development Commands + +### Testing +```bash +# Run all tests with coverage +tox + +# Run tests with pytest directly (after installing deps) +pytest --cov + +# Run linting +tox -e lint +# or +pylint vmtool +``` + +### Running vmtool + +The tool can be run in two ways: + +1. **Via wrapper script** (auto-manages virtualenv): +```bash +./run_vmtool.sh --env=ENV_NAME COMMAND [args...] +``` + +2. **Direct invocation** (after pip install): +```bash +vmtool --env=ENV_NAME COMMAND [args...] +``` + +### Installation +```bash +# Install dependencies +pip install -r requirements.txt + +# Install in development mode +pip install -e . +``` + +## Architecture + +### Command Execution Flow + +1. **Entry**: `vmtool/run.py` - Parses CLI args, loads environment config +2. **Config Loading**: `vmtool/envconfig.py` - Loads `config_${env}.ini` from conf directory +3. **Command Dispatch**: `vmtool/scripting.py` - EnvScript base class dispatches to `cmd_*` methods +4. **Implementation**: `vmtool/aws.py` - VmTool class with 70+ command implementations + +### Key Components + +- **vmtool/aws.py**: Main VmTool class inheriting from EnvScript. Contains all VM management commands as `cmd_*` methods (e.g., `cmd_create`, `cmd_start`, `cmd_ssh`) +- **vmtool/config.py**: Advanced config parser with variable interpolation, supports `${var}` syntax and custom functions like `${FILE!path}`, `${TF!var}` +- **vmtool/envconfig.py**: Environment-specific config loading. Looks for configs in `$git_dir/conf/config_${env}.ini` +- **vmtool/scripting.py**: EnvScript base class providing command dispatch, logging, and config management +- **vmtool/terra.py**: Terraform state file parsing for reading output variables +- **vmtool/certs.py**: Certificate management integration with sysca +- **vmtool/gpg.py**: GPG file decryption utilities +- **vmtool/util.py**: Common utilities (SSH, formatting, subprocess helpers) + +### Configuration System + +vmtool uses a sophisticated INI-based config system: + +- **Location**: `${VMTOOL_CONFIG_DIR}/config_${env}.ini` (default: `$gittop/conf/`) +- **Main section**: `[vm-config]` +- **Interpolation**: Supports recursive variable expansion with `${var}` and `${section:var}` +- **Functions**: Custom interpolation functions via `${FUNC!arg}` syntax: + - `FILE`: Read file contents + - `KEY`: Read SSH key + - `TF`: Read Terraform output variable + - `TFAZ`: Read Terraform AZ variable + - `PRIMARY_VM`: Get primary VM identifier +- **Dependencies**: Can include other configs via `config_depends = file1.ini, file2.ini` +- **Aliases**: Support command and role aliases via `[alias.command_name]` sections + +### Environment Variables + +- `VMTOOL_ENV_NAME`: Environment name (can be overridden with `--env`) +- `VMTOOL_CONFIG_DIR`: Config directory (default: `$gittop/vmconf` or `$gittop/conf`) +- `VMTOOL_KEY_DIR`: SSH keys directory (default: `$gittop/keys`) +- `VMTOOL_CA_LOG_DIR`: CA log directory (required) +- `VMTOOL_GIT_DIR`: Git repository root (auto-detected or can be set) +- `VMTOOL_USERNAME`: Username for VM access (fallback to `USER` or `LOGNAME`) + +### Command Pattern + +New commands are added as methods in `vmtool/aws.py`: +```python +def cmd_commandname(self, *args): + """Command description.""" + # Implementation +``` + +The method name determines the command: `cmd_show_vms` → `vmtool show-vms` (dashes converted to underscores). + +## Project Structure + +``` +vmtool/ +├── vmtool/ # Main package +│ ├── run.py # CLI entry point and command routing +│ ├── aws.py # VmTool class with all commands (~4500 lines) +│ ├── config.py # Advanced config parser +│ ├── envconfig.py # Environment config loader +│ ├── scripting.py # EnvScript base class +│ ├── terra.py # Terraform integration +│ ├── certs.py # Certificate management +│ ├── gpg.py # GPG utilities +│ ├── util.py # Common utilities +│ └── xglob.py # Extended glob matching +├── tests/ # Unit tests +├── pricing/ # AWS pricing analysis scripts +├── requirements.txt # Python dependencies +├── setup.py # Package setup +├── tox.ini # Test configuration +└── run_vmtool.sh # Wrapper script with auto-venv +``` + +## Pricing Scripts + +The `pricing/` directory contains AWS pricing analysis tools: +```bash +# Fetch pricing data +cd pricing +./fetch_cache.py + +# Query instance prices +./list_vms.py --region='eu-west-*' m5.large +``` + +## Key Operations + +### VM Lifecycle +- Create: `create`, `create_primary`, `create_secondary` +- Control: `start`, `stop`, `terminate` +- Access: `ssh`, `ssh_admin`, `rsync` +- Info: `show_vms`, `show_primary`, `get_output` + +### High Availability +- `failover`: Promote secondary to primary +- `takeover`: Coordinated primary/secondary switch +- `drop_node`: Remove node from cluster + +### Backup Management +- `show_backups`, `ls_backups`: List backups +- `get_backup`: Restore from backup +- `clean_backups`: Remove old backups + +### Cost Tracking +- `show_vmcost`: VM costs +- `show_ebscost`: EBS volume costs +- `show_s3cost`: S3 bucket costs + +## Testing Notes + +- Unit tests in `tests/` cover config parsing and utility functions +- Test individual modules with: `pytest tests/test_module.py` +- The main aws.py module has extensive manual testing requirements due to AWS API dependencies +- Use `--cov` flag for coverage reports + +## Code Style + +- Python 3 codebase +- Uses pylint for linting (config in `.pylintrc`) +- Line length and other style rules defined in pylintrc +- Prefer explicit over implicit, especially for AWS operations diff --git a/vmtool/aws.py b/vmtool/aws.py index ebe3de7..0c05541 100644 --- a/vmtool/aws.py +++ b/vmtool/aws.py @@ -2255,7 +2255,7 @@ def vm_create_start(self): # actual launch res = client.run_instances(**args) - time.sleep(20) # FIXME + time.sleep(10) # FIXME # collect ids ids = [] @@ -2570,11 +2570,11 @@ def make_user_creation(self): script.append(mk_sshuser_script(user, auth_groups, pubkey)) return '\n'.join(script) - def make_tar_filter(self, extra_defs=None): + def make_tar_filter(self, extra_defs=None, comp='xz', compresslevel=9): defs = {} if extra_defs: defs.update(extra_defs) - tb = TarFilter(self.filter_key_lookup, defs) + tb = TarFilter(self.filter_key_lookup, defs, comp=comp, compresslevel=compresslevel) tb.set_live(self.is_live) return tb @@ -2902,7 +2902,18 @@ def modcmd_build_tgz(self, cmd_name, globs, cmd_cf=None): if not mods_ok: sys.exit(1) - dst = self.make_tar_filter(defs) + # allow compression algorithm to be configured; default is xz (best for text) + # options: 'gz' (gzip), 'bz2' (bzip2), 'xz' (lzma), or '' (no compression) + comp = self.cf.get('tgz_compression', 'xz') + # compression level (1-9, where 9 is highest); default is 9 for maximum compression + compresslevel = 9 + try: + compresslevel = self.cf.getint('tgz_compresslevel') + except (NoOptionError, ValueError): + pass + # start timer for building/compressing the archive + start_time = time.time() + dst = self.make_tar_filter(defs, comp=comp, compresslevel=compresslevel) for tmp in globs: subdir = '.' @@ -2965,8 +2976,9 @@ def modcmd_build_tgz(self, cmd_name, globs, cmd_cf=None): # finish dst.close() tgz = dst.getvalue() + elapsed = time.time() - start_time self._PREP_TGZ_CACHE[cmd_name] = tgz - time_printf("%s: tgz bytes: %s", cmd_name, len(tgz)) + time_printf("%s: tgz bytes: %s elapsed=%.2fs", cmd_name, len(tgz), elapsed) def load_ca_keypair(self, ca_name): intca_dir = self.cf.get(ca_name + '_dir', '') diff --git a/vmtool/tarball.py b/vmtool/tarball.py index 148abf6..e6ef40d 100644 --- a/vmtool/tarball.py +++ b/vmtool/tarball.py @@ -21,9 +21,31 @@ class TarBall(object): - def __init__(self): + def __init__(self, comp='xz', compresslevel=9): + """Initialize TarBall with configurable compression. + + Args: + comp: Compression algorithm ('gz', 'bz2', 'xz', or '' for none) + compresslevel: Compression level (1-9, where 9 is highest compression) + Default is 9 for maximum compression of text scripts + """ self.buf = io.BytesIO() - self.tf = tarfile.open('buf.tgz', 'w|gz', self.buf, format=tarfile.PAX_FORMAT) + self.comp = comp + self.compresslevel = compresslevel + + # Build tarfile mode string + if comp: + mode = 'w|' + comp + else: + mode = 'w|' + + # Open with compression level if supported and specified + if compresslevel is not None and comp in ('gz', 'bz2', 'xz'): + self.tf = tarfile.open('buf.tar.' + comp, mode, self.buf, + format=tarfile.PAX_FORMAT, compresslevel=compresslevel) + else: + self.tf = tarfile.open('buf.tar.' + comp, mode, self.buf, + format=tarfile.PAX_FORMAT) def filter_data(self, fname, data): """Overridable function.""" diff --git a/vmtool/tarfilter.py b/vmtool/tarfilter.py index a582501..4f8cd46 100644 --- a/vmtool/tarfilter.py +++ b/vmtool/tarfilter.py @@ -26,8 +26,8 @@ class TarFilter(TarBall): _password_master = None - def __init__(self, key_lookup_func, key_lookup_arg): - super(TarFilter, self).__init__() + def __init__(self, key_lookup_func, key_lookup_arg, comp='xz', compresslevel=9): + super(TarFilter, self).__init__(comp=comp, compresslevel=compresslevel) self.live = 0 self.key_lookup_func = key_lookup_func self.key_lookup_arg = key_lookup_arg