From 7f76eeadef7fd9f5db78fdc7aea30511c161dcfc Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 15:38:01 -0500 Subject: [PATCH 01/54] Add voice transcription session commands --- poopbot.py | 549 +++++++++++++++++++++++------------------------------ 1 file changed, 238 insertions(+), 311 deletions(-) diff --git a/poopbot.py b/poopbot.py index 3b82b6d..d63912a 100644 --- a/poopbot.py +++ b/poopbot.py @@ -8,51 +8,44 @@ import urllib.request from urllib.parse import urlparse import json +import tempfile +import shutil +import importlib.util import xml.etree.ElementTree as ET from collections import deque from dataclasses import dataclass from datetime import datetime, timezone, date, time as dtime +from pathlib import Path import time - import discord from discord import app_commands from discord.ext import commands, tasks - try: from zoneinfo import ZoneInfo from zoneinfo import ZoneInfoNotFoundError except ImportError: raise RuntimeError("Python 3.9+ required for zoneinfo") - # ========================= # CONFIG # ========================= load_dotenv() # loads variables from .env into the process environment - TOKEN = os.getenv("DISCORD_TOKEN") if not TOKEN: raise RuntimeError("DISCORD_TOKEN not found. Check your .env file and WorkingDirectory.") - DB_DIR = "db" CONFIG_DB_PATH = os.path.join(DB_DIR, "poopbot_config.db") CLEANUP_DB_PATH = os.path.join(DB_DIR, "poopbot_cleanup.db") - POOP_EMOJI = "πŸ’©" UNDO_EMOJI = "🧻" - # Deletes ANY non-bot message posted in this channel CLEANUP_CHANNEL_ID = 1419130398683959398 - # Ticketing configuration TICKET_DEV_USER_ID = os.getenv("TICKET_DEV_USER_ID") TICKET_ARCHIVE_CHANNEL_ID = os.getenv("TICKET_ARCHIVE_CHANNEL_ID") - # Daily post time (12:00am Pacific) TZ_NAME = "America/Los_Angeles" - # Rotate button message every N poops per guild ROTATE_EVERY = 10 - WESROTH_HANDLE_URL = "https://www.youtube.com/@WesRoth" WESROTH_CHANNEL_ID = os.getenv("WESROTH_CHANNEL_ID") WESROTH_ALERT_CHANNEL_ID = 1350269523902857369 @@ -73,12 +66,10 @@ "UH OH.", "THIS IS IT.", ] - FETCH_TRACK_INFO_TIMEOUT_SECONDS = 25 FETCH_TRACK_INFO_TIMEOUT_MESSAGE = ( "Timed out while fetching track info from YouTube. Please try again in a moment." ) - # ========================= # MESSAGES # ========================= @@ -139,7 +130,6 @@ "All clear. Nice work, {user}.", "System reset complete, {user}.", "Poop event recorded. Excellent, {user}.", - "Mission accomplished, but at what cost, {user}.", "Payload delivered… with collateral damage, {user}.", "Successful deployment; splash radius exceeded expectations, {user}.", @@ -158,8 +148,6 @@ "Output achieved; the situation got spicy, {user}.", "Task finished. The final seconds were a gamble, {user}.", "All done, {user}. Nobody’s calling that a clean run.", - - # Dota 2 "Space created, {user}.", @@ -204,22 +192,18 @@ "{user} β€” POOPTASTROPHE!", "{user} β€” POOPOCALYPSE!", "{user} β€” POOPIONAIRE!", - ] - UNDO_MSGS = [ "Rollback complete, {user}.", "Okay {user}, I removed your last poop.", "Wiped from history, {user}.", "Deleted one (1) poop from the timeline, {user}.", ] - WESROTH_NS = { "atom": "http://www.w3.org/2005/Atom", "yt": "http://www.youtube.com/xml/schemas/2015", "media": "http://search.yahoo.com/mrss/", } - # ========================= # TIMEZONE # ========================= @@ -231,92 +215,171 @@ f" python -m pip install tzdata\n" f"Then restart." ) from e - - def current_year_local() -> int: return datetime.now(LOCAL_TZ).year - - # ========================= # DISCORD SETUP # ========================= intents = discord.Intents.default() intents.reactions = True intents.message_content = True # needed for cleanup logging - bot = commands.Bot(command_prefix="!", intents=intents) - # serialize DB writes to avoid sqlite "database is locked" db_write_lock = asyncio.Lock() - - - @dataclass class QueueTrack: title: str source_url: str duration_seconds: int requested_by: int - - class GuildMusicState: def __init__(self): self.queue: deque[QueueTrack] = deque() self.current_track: QueueTrack | None = None self.track_started_at: datetime | None = None self.lock = asyncio.Lock() - - music_states: dict[int, GuildMusicState] = {} - - +class GuildTranscriptionSession: + def __init__(self, guild_id: int, voice_channel_id: int): + self.guild_id = guild_id + self.voice_channel_id = voice_channel_id + self.started_at = datetime.now(timezone.utc) + self.temp_dir = Path(tempfile.mkdtemp(prefix=f"gokibot_transcribe_{guild_id}_")) + self.voice_paths_by_user: dict[int, Path] = {} + self.aliases_by_user: dict[int, str] = {} +transcription_sessions: dict[int, GuildTranscriptionSession] = {} def get_music_state(guild_id: int) -> GuildMusicState: state = music_states.get(guild_id) if state is None: state = GuildMusicState() music_states[guild_id] = state return state - - +def get_transcription_session(guild_id: int) -> GuildTranscriptionSession | None: + return transcription_sessions.get(guild_id) +def remove_transcription_session(guild_id: int): + session = transcription_sessions.pop(guild_id, None) + if session is None: + return + shutil.rmtree(session.temp_dir, ignore_errors=True) +def resolve_display_name(guild: discord.Guild | None, user_id: int, aliases_by_user: dict[int, str]) -> str: + alias = aliases_by_user.get(user_id) + if alias: + return alias + if guild is not None: + member = guild.get_member(user_id) + if member is not None: + return member.display_name + return str(user_id) +def can_record_voice() -> bool: + return hasattr(discord, "sinks") and hasattr(discord.sinks, "WaveSink") +def get_whisper_transcriber() -> tuple[str | None, object | None]: + if importlib.util.find_spec("faster_whisper") is not None: + from faster_whisper import WhisperModel + model_name = os.getenv("WHISPER_MODEL", "base") + model = WhisperModel(model_name, device="cpu", compute_type="int8") + return "faster_whisper", model + if importlib.util.find_spec("whisper") is not None: + import whisper + model_name = os.getenv("WHISPER_MODEL", "base") + model = whisper.load_model(model_name) + return "whisper", model + return None, None +def transcribe_audio_file(engine_name: str, engine: object, file_path: Path) -> str: + if engine_name == "faster_whisper": + segments, _ = engine.transcribe(str(file_path), vad_filter=True) + return " ".join(seg.text.strip() for seg in segments if seg.text.strip()) + if engine_name == "whisper": + result = engine.transcribe(str(file_path), fp16=False) + return str(result.get("text") or "").strip() + return "" +def copy_recorded_audio_to_session( + sink: object, + guild: discord.Guild, + session: GuildTranscriptionSession, +) -> dict[int, Path]: + copied_files: dict[int, Path] = {} + sink_audio_data = getattr(sink, "audio_data", None) + if not isinstance(sink_audio_data, dict): + return copied_files + for user_id, audio_obj in sink_audio_data.items(): + if not isinstance(user_id, int): + continue + file_path = getattr(audio_obj, "file", None) + if file_path is None: + continue + temp_output = session.temp_dir / f"{user_id}.wav" + try: + if hasattr(file_path, "seek"): + file_path.seek(0) + if hasattr(file_path, "read"): + temp_output.write_bytes(file_path.read()) + else: + shutil.copy(str(file_path), temp_output) + except OSError: + continue + copied_files[user_id] = temp_output + session.voice_paths_by_user = copied_files + return copied_files +async def finalize_transcription_session( + interaction: discord.Interaction, + session: GuildTranscriptionSession, +) -> tuple[Path | None, str]: + guild = interaction.guild + if guild is None: + return None, "This command only works in a server." + if not session.voice_paths_by_user: + return None, "No captured audio was found for this session." + engine_name, engine = get_whisper_transcriber() + if engine is None or engine_name is None: + return None, ( + "No local transcription engine was found. Install `faster-whisper` (recommended) " + "or `openai-whisper` on the host." + ) + transcript_path = session.temp_dir / f"transcript-{guild.id}-{int(time.time())}.txt" + lines = [ + f"GokiBot transcription session for guild {guild.id}", + f"Started UTC: {session.started_at.isoformat()}", + f"Ended UTC: {datetime.now(timezone.utc).isoformat()}", + "", + ] + for user_id, audio_path in sorted(session.voice_paths_by_user.items(), key=lambda item: item[0]): + speaker_name = resolve_display_name(guild, user_id, session.aliases_by_user) + transcript = transcribe_audio_file(engine_name, engine, audio_path) + if not transcript: + transcript = "[No speech detected]" + lines.append(f"[{speaker_name} | {user_id}]") + lines.append(transcript) + lines.append("") + transcript_path.write_text("\n".join(lines), encoding="utf-8") + return transcript_path, "" def format_duration(duration_seconds: int) -> str: mins, secs = divmod(max(duration_seconds, 0), 60) hours, mins = divmod(mins, 60) if hours: return f"{hours:d}:{mins:02d}:{secs:02d}" return f"{mins:d}:{secs:02d}" - - def parse_duration_seconds(value: object) -> int: if isinstance(value, (int, float)): return max(int(value), 0) - if not isinstance(value, str): return 0 - text = value.strip() if not text: return 0 - if text.isdigit(): return int(text) - parts = text.split(":") if not all(part.isdigit() for part in parts): return 0 - total = 0 for part in parts: total = (total * 60) + int(part) return total - - def log_music_timing(step: str, phase: str, started_at: float, **fields: object): elapsed = time.perf_counter() - started_at details = " ".join(f"{key}={value!r}" for key, value in fields.items()) suffix = f" {details}" if details else "" print(f"[music] {step} {phase} elapsed={elapsed:.2f}s{suffix}") - - def pick_track_info(info: dict[str, object]) -> dict[str, object]: entries = info.get("entries") if isinstance(entries, list): @@ -325,13 +388,10 @@ def pick_track_info(info: dict[str, object]) -> dict[str, object]: return entry raise RuntimeError("No playable track found for that query.") return info - - def extract_stream_url(info: dict[str, object]) -> str: direct_url = str(info.get("url") or "").strip() if direct_url: return direct_url - requested_formats = info.get("requested_formats") if isinstance(requested_formats, list): for fmt in requested_formats: @@ -342,15 +402,12 @@ def extract_stream_url(info: dict[str, object]) -> str: continue if str(fmt.get("vcodec") or "") == "none": return format_url - formats = info.get("formats") if not isinstance(formats, list): raise RuntimeError("yt-dlp did not provide an audio stream URL.") - def _is_hls_protocol(fmt: dict[str, object]) -> bool: protocol = str(fmt.get("protocol") or "").lower() return "m3u8" in protocol or protocol == "http_dash_segments" - best_audio_url = "" best_audio_score = -1.0 best_hls_audio_url = "" @@ -365,21 +422,17 @@ def _is_hls_protocol(fmt: dict[str, object]) -> bool: continue if not fallback_url: fallback_url = format_url - is_hls = _is_hls_protocol(fmt) if not is_hls and not fallback_non_hls_url: fallback_non_hls_url = format_url - is_audio_only = str(fmt.get("vcodec") or "") == "none" if not is_audio_only: continue - bitrate = fmt.get("abr") or fmt.get("tbr") or 0 try: score = float(bitrate) except (TypeError, ValueError): score = 0.0 - if is_hls: if score >= best_hls_audio_score: best_hls_audio_score = score @@ -388,7 +441,6 @@ def _is_hls_protocol(fmt: dict[str, object]) -> bool: if score >= best_audio_score: best_audio_score = score best_audio_url = format_url - if best_audio_url: return best_audio_url if fallback_non_hls_url: @@ -398,8 +450,6 @@ def _is_hls_protocol(fmt: dict[str, object]) -> bool: if fallback_url: return fallback_url raise RuntimeError("yt-dlp returned an empty stream URL.") - - def parse_tracks_from_info(info: dict[str, object], source: str) -> list[QueueTrack]: entries = info.get("entries") if isinstance(entries, list): @@ -407,12 +457,10 @@ def parse_tracks_from_info(info: dict[str, object], source: str) -> list[QueueTr for entry in entries: if not isinstance(entry, dict): continue - title = str(entry.get("title") or "Unknown title") duration_seconds = parse_duration_seconds(entry.get("duration")) if duration_seconds <= 0: duration_seconds = parse_duration_seconds(entry.get("duration_string")) - webpage_url = str(entry.get("webpage_url") or entry.get("url") or "").strip() if not webpage_url: entry_id = str(entry.get("id") or "").strip() @@ -420,7 +468,6 @@ def parse_tracks_from_info(info: dict[str, object], source: str) -> list[QueueTr webpage_url = f"https://www.youtube.com/watch?v={entry_id}" else: webpage_url = source - tracks.append( QueueTrack( title=title, @@ -429,11 +476,9 @@ def parse_tracks_from_info(info: dict[str, object], source: str) -> list[QueueTr requested_by=0, ) ) - if tracks: return tracks raise RuntimeError("No playable tracks found for that playlist.") - track_info = pick_track_info(info) title = str(track_info.get("title") or "Unknown title") duration_seconds = parse_duration_seconds(track_info.get("duration")) @@ -441,9 +486,7 @@ def parse_tracks_from_info(info: dict[str, object], source: str) -> list[QueueTr duration_seconds = parse_duration_seconds(track_info.get("duration_string")) if duration_seconds <= 0: duration_seconds = parse_duration_seconds(info.get("duration_string")) - webpage_url = str(track_info.get("webpage_url") or info.get("webpage_url") or source) - return [ QueueTrack( title=title, @@ -452,8 +495,6 @@ def parse_tracks_from_info(info: dict[str, object], source: str) -> list[QueueTr requested_by=0, ) ] - - async def fetch_tracks(source: str) -> list[QueueTrack]: info_proc = await asyncio.create_subprocess_exec( "yt-dlp", @@ -468,15 +509,11 @@ async def fetch_tracks(source: str) -> list[QueueTrack]: if info_proc.returncode != 0: err = info_stderr.decode("utf-8", errors="ignore").strip() or "yt-dlp failed" raise RuntimeError(err) - try: info = json.loads(info_stdout.decode("utf-8", errors="ignore")) except Exception as exc: raise RuntimeError("Unable to read track metadata.") from exc - return parse_tracks_from_info(info, source) - - async def resolve_stream_url(source_url: str) -> str: stream_proc = await asyncio.create_subprocess_exec( "yt-dlp", @@ -492,24 +529,18 @@ async def resolve_stream_url(source_url: str) -> str: if stream_proc.returncode != 0: err = stream_stderr.decode("utf-8", errors="ignore").strip() or "yt-dlp failed" raise RuntimeError(err) - try: info = json.loads(stream_stdout.decode("utf-8", errors="ignore")) except Exception as exc: raise RuntimeError("Unable to read playback stream URL.") from exc - return extract_stream_url(pick_track_info(info)) - - def is_youtube_url(value: str) -> bool: try: parsed = urlparse(value) except ValueError: return False - if parsed.scheme not in {"http", "https"}: return False - hostname = (parsed.hostname or "").lower() youtube_hosts = { "youtube.com", @@ -520,8 +551,6 @@ def is_youtube_url(value: str) -> bool: "www.youtu.be", } return hostname in youtube_hosts - - async def ensure_voice_channel(interaction: discord.Interaction) -> discord.VoiceChannel | None: if interaction.guild is None: return None @@ -531,35 +560,28 @@ async def ensure_voice_channel(interaction: discord.Interaction) -> discord.Voic if not isinstance(member.voice.channel, discord.VoiceChannel): return None return member.voice.channel - - async def play_next_track(guild: discord.Guild): voice_client = guild.voice_client if voice_client is None: return - state = get_music_state(guild.id) async with state.lock: if voice_client.is_playing() or voice_client.is_paused(): return - if not state.queue: state.current_track = None state.track_started_at = None await voice_client.disconnect(force=True) return - next_track = state.queue.popleft() state.current_track = next_track state.track_started_at = datetime.now(timezone.utc) - try: stream_url = await resolve_stream_url(next_track.source_url) except RuntimeError as exc: print(f"Failed to resolve stream URL for '{next_track.title}': {exc}") await play_next_track(guild) return - ffmpeg_source = discord.FFmpegPCMAudio( stream_url, before_options=( @@ -571,7 +593,6 @@ async def play_next_track(guild: discord.Guild): ), options="-vn -loglevel warning -af aresample=async=1:min_hard_comp=0.100:first_pts=0", ) - def _after_playback(play_error: Exception | None): if play_error: print(f"Playback error: {play_error}") @@ -580,11 +601,8 @@ def _after_playback(play_error: Exception | None): fut.result() except Exception as exc: print(f"Failed to start next track: {exc}") - print(f"[music] voice_client.play start track='{next_track.title}'") voice_client.play(ffmpeg_source, after=_after_playback) - - # ========================= # DATABASE HELPERS # ========================= @@ -592,37 +610,27 @@ def _apply_sqlite_pragmas(conn: sqlite3.Connection): conn.execute("PRAGMA journal_mode=WAL;") conn.execute("PRAGMA synchronous=NORMAL;") conn.execute("PRAGMA busy_timeout=10000;") # ms - - def db_config() -> sqlite3.Connection: os.makedirs(DB_DIR, exist_ok=True) conn = sqlite3.connect(CONFIG_DB_PATH, timeout=10) conn.row_factory = sqlite3.Row _apply_sqlite_pragmas(conn) return conn - - def db_cleanup() -> sqlite3.Connection: os.makedirs(DB_DIR, exist_ok=True) conn = sqlite3.connect(CLEANUP_DB_PATH, timeout=10) conn.row_factory = sqlite3.Row _apply_sqlite_pragmas(conn) return conn - - def db_path_for_year(year: int) -> str: os.makedirs(DB_DIR, exist_ok=True) return os.path.join(DB_DIR, f"poopbot_{year}.db") - - def db_year(year: int) -> sqlite3.Connection: path = db_path_for_year(year) conn = sqlite3.connect(path, timeout=10) conn.row_factory = sqlite3.Row _apply_sqlite_pragmas(conn) return conn - - def init_config_db(): with db_config() as conn: conn.execute(""" @@ -672,8 +680,6 @@ def init_config_db(): conn.execute("ALTER TABLE tickets ADD COLUMN archive_thread_id INTEGER;") if "closed_at_utc" not in columns: conn.execute("ALTER TABLE tickets ADD COLUMN closed_at_utc TEXT;") - - def init_cleanup_db(): with db_cleanup() as conn: conn.execute(""" @@ -688,8 +694,6 @@ def init_cleanup_db(): created_at_utc TEXT NOT NULL ); """) - - def init_year_db(year: int): with db_year(year) as conn: conn.execute(""" @@ -711,8 +715,6 @@ def init_year_db(year: int): """) conn.execute("CREATE INDEX IF NOT EXISTS idx_events_user_time ON events(user_id, timestamp_utc);") conn.execute("CREATE INDEX IF NOT EXISTS idx_events_type ON events(event_type);") - - # ---- config/state (persistent) ---- def gset(guild_id: int, key: str, value: str): with db_config() as conn: @@ -720,40 +722,28 @@ def gset(guild_id: int, key: str, value: str): INSERT INTO guild_state(guild_id, key, value) VALUES(?, ?, ?) ON CONFLICT(guild_id, key) DO UPDATE SET value=excluded.value """, (guild_id, key, value)) - - def gget(guild_id: int, key: str) -> str | None: with db_config() as conn: row = conn.execute(""" SELECT value FROM guild_state WHERE guild_id=? AND key=? """, (guild_id, key)).fetchone() return row["value"] if row else None - - def gget_int(guild_id: int, key: str, default: int = 0) -> int: v = gget(guild_id, key) try: return int(v) if v is not None else default except ValueError: return default - - def gset_int(guild_id: int, key: str, value: int): gset(guild_id, key, str(value)) - - def set_ticket_target(guild_id: int, user_id: int, channel_id: int): gset(guild_id, f"ticket_target_{user_id}", str(channel_id)) - - def get_ticket_target(guild_id: int, user_id: int) -> int | None: value = gget(guild_id, f"ticket_target_{user_id}") try: return int(value) if value else None except ValueError: return None - - def get_ticket_dev_user_id() -> int | None: if not TICKET_DEV_USER_ID: return None @@ -761,8 +751,6 @@ def get_ticket_dev_user_id() -> int | None: return int(TICKET_DEV_USER_ID) except ValueError: return None - - def get_ticket_archive_channel_id() -> int | None: if not TICKET_ARCHIVE_CHANNEL_ID: return None @@ -770,8 +758,6 @@ def get_ticket_archive_channel_id() -> int | None: return int(TICKET_ARCHIVE_CHANNEL_ID) except ValueError: return None - - async def create_ticket_request(guild_id: int, requester_id: int, requester_name: str) -> int: created_at = datetime.now(timezone.utc).isoformat() async with db_write_lock: @@ -784,8 +770,6 @@ async def create_ticket_request(guild_id: int, requester_id: int, requester_name VALUES (?, ?, ?, NULL, NULL, ?, 'open') """, (guild_id, requester_id, requester_name, created_at)) return int(cur.lastrowid) - - async def update_ticket_request(ticket_id: int, channel_id: int, thread_id: int): async with db_write_lock: with db_config() as conn: @@ -794,8 +778,6 @@ async def update_ticket_request(ticket_id: int, channel_id: int, thread_id: int) SET channel_id=?, thread_id=? WHERE ticket_id=? """, (channel_id, thread_id, ticket_id)) - - async def close_ticket_request(ticket_id: int, archive_thread_id: int): closed_at = datetime.now(timezone.utc).isoformat() async with db_write_lock: @@ -805,8 +787,6 @@ async def close_ticket_request(ticket_id: int, archive_thread_id: int): SET archive_thread_id=?, closed_at_utc=?, status='closed' WHERE ticket_id=? """, (archive_thread_id, closed_at, ticket_id)) - - async def add_ticket_collaborator(ticket_id: int, user_id: int, added_by_id: int): added_at = datetime.now(timezone.utc).isoformat() async with db_write_lock: @@ -820,8 +800,6 @@ async def add_ticket_collaborator(ticket_id: int, user_id: int, added_by_id: int added_by_id=excluded.added_by_id, added_at_utc=excluded.added_at_utc """, (ticket_id, user_id, added_by_id, added_at)) - - def set_guild_channel(guild_id: int, channel_id: int): with db_config() as conn: conn.execute(""" @@ -829,13 +807,9 @@ def set_guild_channel(guild_id: int, channel_id: int): VALUES(?, ?, 1) ON CONFLICT(guild_id) DO UPDATE SET channel_id=excluded.channel_id, enabled=1 """, (guild_id, channel_id)) - - def disable_guild(guild_id: int): with db_config() as conn: conn.execute("UPDATE guild_config SET enabled=0 WHERE guild_id=?", (guild_id,)) - - def get_enabled_guilds(): with db_config() as conn: return conn.execute(""" @@ -843,8 +817,6 @@ def get_enabled_guilds(): FROM guild_config WHERE enabled=1 """).fetchall() - - def get_guild_config(guild_id: int): with db_config() as conn: return conn.execute(""" @@ -852,8 +824,6 @@ def get_guild_config(guild_id: int): FROM guild_config WHERE enabled=1 AND guild_id=? """, (guild_id,)).fetchone() - - def get_ticket_by_thread_id(thread_id: int): with db_config() as conn: return conn.execute(""" @@ -862,8 +832,6 @@ def get_ticket_by_thread_id(thread_id: int): FROM tickets WHERE thread_id=? """, (thread_id,)).fetchone() - - # ========================= # EVENT LOGGING (yearly) # ========================= @@ -871,8 +839,6 @@ def now_utc_local(): utc = datetime.now(timezone.utc) local = utc.astimezone(LOCAL_TZ) return utc, local - - def _fetch_url_text(url: str) -> str: req = urllib.request.Request( url, @@ -885,8 +851,6 @@ def _fetch_url_text(url: str) -> str: ) with urllib.request.urlopen(req, timeout=20) as response: return response.read().decode("utf-8") - - def _extract_channel_id_from_handle_page(html: str) -> str | None: marker = '"channelId":"' idx = html.find(marker) @@ -897,8 +861,6 @@ def _extract_channel_id_from_handle_page(html: str) -> str | None: if end == -1: return None return html[start:end] - - async def resolve_wesroth_channel_id() -> str | None: if WESROTH_CHANNEL_ID: return WESROTH_CHANNEL_ID @@ -911,8 +873,6 @@ async def resolve_wesroth_channel_id() -> str | None: if not channel_id: print("Failed to parse WesRoth channel id from handle page.") return channel_id - - def _parse_wesroth_feed(xml_text: str) -> list[dict]: root = ET.fromstring(xml_text) entries = [] @@ -939,8 +899,6 @@ def _parse_wesroth_feed(xml_text: str) -> list[dict]: "duration_seconds": duration_seconds, }) return entries - - async def fetch_wesroth_latest_today() -> dict | None: channel_id = await resolve_wesroth_channel_id() if not channel_id: @@ -954,7 +912,6 @@ async def fetch_wesroth_latest_today() -> dict | None: entries = _parse_wesroth_feed(xml_text) if not entries: return None - today_local = datetime.now(LOCAL_TZ).date() todays_entries = [] for entry in entries: @@ -969,13 +926,9 @@ async def fetch_wesroth_latest_today() -> dict | None: continue entry["published_dt"] = published_dt todays_entries.append(entry) - if not todays_entries: return None - return max(todays_entries, key=lambda item: item["published_dt"]) - - async def log_event( event_type: str, user_id: int, @@ -989,9 +942,7 @@ async def log_event( utc, local = now_utc_local() event_year = local.year init_year_db(event_year) - event_id = str(uuid.uuid4()) - async with db_write_lock: with db_year(event_year) as conn: conn.execute(""" @@ -1012,10 +963,7 @@ async def log_event( guild_id, channel_id, message_id, target_event_id, note )) - return event_id - - async def log_cleanup_message(message: discord.Message): created_at = message.created_at.astimezone(timezone.utc) async with db_write_lock: @@ -1035,7 +983,6 @@ async def log_cleanup_message(message: discord.Message): message.content, created_at.isoformat() )) - print( "Cleanup message stored:", f"user={message.author} ({message.author.id})", @@ -1043,15 +990,11 @@ async def log_cleanup_message(message: discord.Message): f"time={created_at.isoformat()}", f"text={message.content!r}" ) - - def find_last_active_poop_event_id(user_id: int, year: int) -> str | None: """Most recent POOP in the given year that has NOT been undone by that same user.""" init_year_db(year) - start_local = datetime(year, 1, 1, 0, 0, 0, tzinfo=LOCAL_TZ) end_local = datetime(year + 1, 1, 1, 0, 0, 0, tzinfo=LOCAL_TZ) - with db_year(year) as conn: poops = conn.execute(""" SELECT event_id @@ -1063,12 +1006,9 @@ def find_last_active_poop_event_id(user_id: int, year: int) -> str | None: ORDER BY timestamp_local DESC LIMIT 500 """, (user_id, start_local.isoformat(), end_local.isoformat())).fetchall() - if not poops: return None - poop_ids = [r["event_id"] for r in poops] - undone = conn.execute(f""" SELECT target_event_id FROM events @@ -1076,22 +1016,16 @@ def find_last_active_poop_event_id(user_id: int, year: int) -> str | None: AND user_id=? AND target_event_id IN ({",".join("?" * len(poop_ids))}) """, (user_id, *poop_ids)).fetchall() - undone_set = {r["target_event_id"] for r in undone} - for pid in poop_ids: if pid not in undone_set: return pid - return None - - # ========================= # BUTTON POSTING (per guild) # ========================= async def post_button_for_guild(guild_id: int, channel_id: int): channel = await bot.fetch_channel(channel_id) - # delete previous active button message old_message_id = gget(guild_id, "active_message_id") if old_message_id: @@ -1101,7 +1035,6 @@ async def post_button_for_guild(guild_id: int, channel_id: int): await old_msg.delete() except (discord.NotFound, discord.Forbidden, discord.HTTPException): pass - local_now = datetime.now(LOCAL_TZ) msg = await channel.send( f"πŸ’© **Click here to log a poop** β€” {local_now.strftime('%Y-%m-%d')} (Pacific)\n" @@ -1112,12 +1045,9 @@ async def post_button_for_guild(guild_id: int, channel_id: int): ) await msg.add_reaction(POOP_EMOJI) await msg.add_reaction(UNDO_EMOJI) - gset(guild_id, "active_message_id", str(msg.id)) gset(guild_id, "active_date_local", local_now.date().isoformat()) gset_int(guild_id, "poops_since_post", 0) - - @tasks.loop(time=dtime(hour=0, minute=0, tzinfo=LOCAL_TZ)) async def daily_midnight_pacific(): today_local = datetime.now(LOCAL_TZ).date().isoformat() @@ -1131,32 +1061,24 @@ async def daily_midnight_pacific(): await post_button_for_guild(gid, cid) except (discord.Forbidden, discord.NotFound, discord.HTTPException): continue - - @tasks.loop(minutes=WESROTH_POLL_MINUTES) async def wesroth_upload_watch(): latest = await fetch_wesroth_latest_today() if not latest: return - last_video_id = gget(0, "wesroth_last_video_id") if last_video_id == latest["video_id"]: return - channel = bot.get_channel(WESROTH_ALERT_CHANNEL_ID) if channel is None: try: channel = await bot.fetch_channel(WESROTH_ALERT_CHANNEL_ID) except (discord.Forbidden, discord.NotFound, discord.HTTPException): return - caption = random.choice(WESROTH_CAPTIONS) await channel.send(f"{caption}\n{latest['link']}") - gset(0, "wesroth_last_video_id", latest["video_id"]) gset(0, "wesroth_last_post_date_local", datetime.now(LOCAL_TZ).date().isoformat()) - - # ========================= # MESSAGE CLEANUP CHANNEL # ========================= @@ -1171,10 +1093,7 @@ async def on_message(message: discord.Message): pass # still allow commands processing elsewhere; this message is gone anyway return - await bot.process_commands(message) - - # ========================= # REACTIONS # ========================= @@ -1184,18 +1103,14 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): return if payload.guild_id is None: return - active_message_id = gget(payload.guild_id, "active_message_id") if not active_message_id or str(payload.message_id) != active_message_id: return - emoji = str(payload.emoji) - channel = await bot.fetch_channel(payload.channel_id) message = await channel.fetch_message(payload.message_id) user = await bot.fetch_user(payload.user_id) mention = f"<@{payload.user_id}>" - if emoji == POOP_EMOJI: await log_event( event_type="POOP", @@ -1205,25 +1120,19 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): channel_id=payload.channel_id, message_id=payload.message_id ) - await channel.send(random.choice(CONGRATS).format(user=mention)) - # remove reaction so they can click again try: await message.remove_reaction(payload.emoji, user) except discord.Forbidden: pass - count = gget_int(payload.guild_id, "poops_since_post", 0) + 1 gset_int(payload.guild_id, "poops_since_post", count) - if count >= ROTATE_EVERY: cfg = get_guild_config(payload.guild_id) if cfg: await post_button_for_guild(payload.guild_id, int(cfg["channel_id"])) - return - if emoji == UNDO_EMOJI: year = current_year_local() target = find_last_active_poop_event_id(payload.user_id, year) @@ -1240,15 +1149,11 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): target_event_id=target ) await channel.send(random.choice(UNDO_MSGS).format(user=mention)) - try: await message.remove_reaction(payload.emoji, user) except discord.Forbidden: pass - return - - # ========================= # STATS HELPERS # ========================= @@ -1262,22 +1167,16 @@ def circular_mean_time(minutes_list: list[float]) -> float | None: if mean_angle < 0: mean_angle += 2 * math.pi return mean_angle * 1440.0 / (2 * math.pi) - - def fmt_minutes_as_time(mins: float) -> str: mins = mins % 1440.0 h = int(mins // 60) m = int(mins % 60) return f"{h:02d}:{m:02d}" - - def _net_poop_rows_for_year(user_id: int, year: int): """Return list of (event_id, date_local, time_local, timestamp_local) for POOP not undone.""" init_year_db(year) - start_local = datetime(year, 1, 1, 0, 0, 0, tzinfo=LOCAL_TZ) end_local = datetime(year + 1, 1, 1, 0, 0, 0, tzinfo=LOCAL_TZ) - with db_year(year) as conn: poops = conn.execute(""" SELECT event_id, date_local, time_local, timestamp_local @@ -1287,12 +1186,9 @@ def _net_poop_rows_for_year(user_id: int, year: int): AND timestamp_local >= ? AND timestamp_local < ? """, (user_id, start_local.isoformat(), end_local.isoformat())).fetchall() - if not poops: return [] - poop_ids = [r["event_id"] for r in poops] - undone = conn.execute(f""" SELECT target_event_id FROM events @@ -1300,55 +1196,39 @@ def _net_poop_rows_for_year(user_id: int, year: int): AND user_id=? AND target_event_id IN ({",".join("?" * len(poop_ids))}) """, (user_id, *poop_ids)).fetchall() - undone_set = {r["target_event_id"] for r in undone} - net = [r for r in poops if r["event_id"] not in undone_set] return net - - def get_user_year_stats(user_id: int, year: int): net = _net_poop_rows_for_year(user_id, year) if not net: return 0, [], 0 - active_count = len(net) - first_active_date = min(date.fromisoformat(r["date_local"]) for r in net) today_local = datetime.now(LOCAL_TZ).date() days_elapsed = (today_local - first_active_date).days + 1 # inclusive - times = [] for r in net: hh, mm, ss = r["time_local"].split(":") minutes = int(hh) * 60 + int(mm) + (int(ss) / 60.0) times.append(minutes) - return active_count, times, days_elapsed - - def get_latest_poop(user_id: int, year: int) -> str | None: net = _net_poop_rows_for_year(user_id, year) if not net: return None latest = max(net, key=lambda r: r["timestamp_local"]) return latest["timestamp_local"] - - def get_max_poops_in_one_day(user_id: int, year: int) -> tuple[int, str | None]: net = _net_poop_rows_for_year(user_id, year) if not net: return 0, None - counts: dict[str, int] = {} for r in net: d = r["date_local"] counts[d] = counts.get(d, 0) + 1 - best_date = max(counts, key=lambda d: counts[d]) return counts[best_date], best_date - - # ========================= # COMMANDS (slash) # ========================= @@ -1362,8 +1242,6 @@ async def setpoopchannel(interaction: discord.Interaction): await interaction.response.send_message( f"βœ… Poop channel set to {interaction.channel.mention} for this server." ) - - @bot.tree.command(name="disablepoop", description="Disable poop posting for this server.") @app_commands.checks.has_permissions(administrator=True) @app_commands.guild_only() @@ -1372,8 +1250,6 @@ async def disablepoop(interaction: discord.Interaction): return disable_guild(interaction.guild.id) await interaction.response.send_message("πŸ›‘ Poop posting disabled for this server.") - - @bot.tree.command(name="debugpoop", description="Force-create a new poop button message.") @app_commands.checks.has_permissions(administrator=True) @app_commands.guild_only() @@ -1389,29 +1265,22 @@ async def debugpoop(interaction: discord.Interaction): return await post_button_for_guild(interaction.guild.id, int(cfg["channel_id"])) await interaction.response.send_message("πŸ§ͺ Debug: recreated poop button.") - - @bot.tree.command(name="poopstats", description="Show your poop stats for the current year.") @app_commands.guild_only() async def poopstats(interaction: discord.Interaction): user_id = interaction.user.id year = current_year_local() - total, times, days_elapsed = get_user_year_stats(user_id, year) avg_per_day = (total / days_elapsed) if days_elapsed else 0.0 - mean_minutes = circular_mean_time(times) mean_time_str = fmt_minutes_as_time(mean_minutes) if mean_minutes is not None else "N/A" - latest = get_latest_poop(user_id, year) max_day_count, max_day_date = get_max_poops_in_one_day(user_id, year) - latest_str = ( datetime.fromisoformat(latest).strftime("%Y-%m-%d %H:%M") if latest else "N/A" ) max_day_str = f"{max_day_count} on {max_day_date}" if max_day_date else "N/A" - await interaction.response.send_message( f"**{interaction.user.mention} β€” {year} Poop Stats**\n" f"- Total poops: **{total}**\n" @@ -1420,8 +1289,6 @@ async def poopstats(interaction: discord.Interaction): f"- Latest poop: **{latest_str}**\n" f"- Most poops in one day: **{max_day_str}**" ) - - @bot.tree.command(name="featurerequest", description="Start a feature request ticket.") @app_commands.guild_only() async def featurerequest(interaction: discord.Interaction): @@ -1431,10 +1298,8 @@ async def featurerequest(interaction: discord.Interaction): ephemeral=True ) return - dev_user_id = get_ticket_dev_user_id() dev_member = interaction.guild.get_member(dev_user_id) if dev_user_id else None - ticket_id = await create_ticket_request( guild_id=interaction.guild.id, requester_id=interaction.user.id, @@ -1448,10 +1313,8 @@ async def featurerequest(interaction: discord.Interaction): await ticket_target.add_user(interaction.user) if dev_member: await ticket_target.add_user(dev_member) - await update_ticket_request(ticket_id, interaction.channel.id, ticket_target.id) set_ticket_target(interaction.guild.id, interaction.user.id, ticket_target.id) - dev_mention = dev_member.mention if dev_member else "" mention_line = " ".join(part for part in [interaction.user.mention, dev_mention] if part) prompt_lines = [ @@ -1468,8 +1331,6 @@ async def featurerequest(interaction: discord.Interaction): f"βœ… Created ticket #{ticket_id} in {ticket_target.mention}.", ephemeral=True ) - - @bot.tree.command(name="collab", description="Add a collaborator to the current ticket thread.") @app_commands.guild_only() @app_commands.describe(user="User to add to the ticket thread.") @@ -1480,14 +1341,12 @@ async def collab(interaction: discord.Interaction, user: discord.Member): ephemeral=True ) return - if not isinstance(interaction.channel, discord.Thread): await interaction.response.send_message( "Please use this command inside a ticket thread.", ephemeral=True ) return - ticket = get_ticket_by_thread_id(interaction.channel.id) if not ticket: await interaction.response.send_message( @@ -1495,7 +1354,6 @@ async def collab(interaction: discord.Interaction, user: discord.Member): ephemeral=True ) return - try: await interaction.channel.add_user(user) except (discord.Forbidden, discord.HTTPException): @@ -1504,23 +1362,18 @@ async def collab(interaction: discord.Interaction, user: discord.Member): ephemeral=True ) return - await add_ticket_collaborator(ticket["ticket_id"], user.id, interaction.user.id) await interaction.response.send_message( f"βœ… Added {user.mention} to this ticket thread." ) - - @bot.tree.command(name="closeticket", description="Close the current ticket thread.") @app_commands.guild_only() async def closeticket(interaction: discord.Interaction): if interaction.guild is None: return - dev_user_id = get_ticket_dev_user_id() if dev_user_id is None or interaction.user.id != dev_user_id: return - ticket = get_ticket_by_thread_id(interaction.channel.id) if not ticket: await interaction.response.send_message( @@ -1534,7 +1387,6 @@ async def closeticket(interaction: discord.Interaction): ephemeral=True ) return - archive_channel_id = get_ticket_archive_channel_id() if archive_channel_id is None: await interaction.response.send_message( @@ -1542,24 +1394,19 @@ async def closeticket(interaction: discord.Interaction): ephemeral=True ) return - await interaction.response.send_message( "πŸ”’ This ticket has been closed and will be archived and deleted in 24h." ) - archive_channel = await bot.fetch_channel(archive_channel_id) thread_name = f"ticket-{ticket['ticket_id']}-archive" archive_thread = await archive_channel.create_thread( name=thread_name, type=discord.ChannelType.private_thread ) - dev_member = archive_channel.guild.get_member(dev_user_id) if dev_member: await archive_thread.add_user(dev_member) - await archive_thread.send(f"**Ticket #{ticket['ticket_id']} archive**") - allowed_ids = {ticket["requester_id"], dev_user_id} async for message in interaction.channel.history(oldest_first=True, limit=None): if message.author.id not in allowed_ids: @@ -1573,17 +1420,10 @@ async def closeticket(interaction: discord.Interaction): if not content: continue await archive_thread.send(f"**{message.author.display_name}:** {content}") - await close_ticket_request(ticket["ticket_id"], archive_thread.id) - - - - def is_dev_user(user_id: int) -> bool: dev_user_id = get_ticket_dev_user_id() return bool(dev_user_id and user_id == dev_user_id) - - @app_commands.guild_only() @bot.tree.command(name="gplay", description="Queue and play audio from a YouTube link or search term.") @app_commands.describe(youtube_link="A YouTube URL or search text.") @@ -1591,7 +1431,6 @@ async def gplay(interaction: discord.Interaction, youtube_link: str): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return - voice_channel = await ensure_voice_channel(interaction) if voice_channel is None: await interaction.response.send_message( @@ -1599,31 +1438,25 @@ async def gplay(interaction: discord.Interaction, youtube_link: str): ephemeral=True ) return - await interaction.response.defer(ephemeral=True, thinking=True) - if interaction.guild.voice_client and interaction.guild.voice_client.channel != voice_channel: await interaction.followup.send( f"You must be in {interaction.guild.voice_client.channel.mention} to control playback.", ephemeral=True ) return - source = youtube_link.strip() if not source: await interaction.followup.send("Please provide a YouTube link or search query.", ephemeral=True) return - if not is_youtube_url(source): source = f"ytsearch1:{source}" - if interaction.guild.voice_client is None: try: await voice_channel.connect() except discord.DiscordException as exc: await interaction.followup.send(f"Could not join voice channel: {exc}", ephemeral=True) return - fetch_started_at = time.perf_counter() try: tracks = await asyncio.wait_for( @@ -1638,18 +1471,14 @@ async def gplay(interaction: discord.Interaction, youtube_link: str): except RuntimeError as exc: await interaction.followup.send(f"Could not fetch audio: {exc}", ephemeral=True) return - for track in tracks: track.requested_by = interaction.user.id - state = get_music_state(interaction.guild.id) async with state.lock: starting_queue_size = len(state.queue) state.queue.extend(tracks) first_queue_position = starting_queue_size + 1 - await play_next_track(interaction.guild) - if len(tracks) == 1: track = tracks[0] await interaction.followup.send( @@ -1660,7 +1489,6 @@ async def gplay(interaction: discord.Interaction, youtube_link: str): ephemeral=True ) return - total_seconds = sum(track.duration_seconds for track in tracks) first_track = tracks[0] await interaction.followup.send( @@ -1672,15 +1500,12 @@ async def gplay(interaction: discord.Interaction, youtube_link: str): ), ephemeral=True ) - - @app_commands.guild_only() @bot.tree.command(name="gqueue", description="Show the current playback queue.") async def gqueue(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return - voice_channel = await ensure_voice_channel(interaction) if voice_channel is None: await interaction.response.send_message( @@ -1688,7 +1513,6 @@ async def gqueue(interaction: discord.Interaction): ephemeral=True ) return - vc = interaction.guild.voice_client if vc is not None and vc.channel != voice_channel: await interaction.response.send_message( @@ -1696,13 +1520,11 @@ async def gqueue(interaction: discord.Interaction): ephemeral=True ) return - state = get_music_state(interaction.guild.id) async with state.lock: current_track = state.current_track started_at = state.track_started_at queued_tracks = list(state.queue) - lines = ["**Goki Queue**"] if current_track: elapsed = 0 @@ -1717,7 +1539,6 @@ async def gqueue(interaction: discord.Interaction): ) else: lines.append("Now playing: *(nothing)*") - if queued_tracks: lines.append("\n**Up next:**") for i, track in enumerate(queued_tracks, start=1): @@ -1726,17 +1547,13 @@ async def gqueue(interaction: discord.Interaction): ) else: lines.append("\nQueue is empty.") - await interaction.response.send_message("\n".join(lines), ephemeral=True) - - @app_commands.guild_only() @bot.tree.command(name="gskip", description="Skip the currently playing track.") async def gskip(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return - voice_channel = await ensure_voice_channel(interaction) if voice_channel is None: await interaction.response.send_message( @@ -1744,26 +1561,144 @@ async def gskip(interaction: discord.Interaction): ephemeral=True ) return - vc = interaction.guild.voice_client if vc is None or not vc.is_connected(): await interaction.response.send_message("Nothing is playing right now.", ephemeral=True) return - if vc.channel != voice_channel: await interaction.response.send_message( f"You must be in {vc.channel.mention} to skip tracks.", ephemeral=True ) return - if not vc.is_playing() and not vc.is_paused(): await interaction.response.send_message("Nothing is currently playing.", ephemeral=True) return - vc.stop() await interaction.response.send_message("⏭️ Skipped current track.", ephemeral=True) - +@app_commands.guild_only() +@bot.tree.command(name="gtranscribe", description="Join your voice channel and start recording speakers for transcription.") +async def gtranscribe(interaction: discord.Interaction): + if interaction.guild is None: + await interaction.response.send_message("This command only works in a server.", ephemeral=True) + return + if not can_record_voice(): + await interaction.response.send_message( + "This bot runtime does not support Discord voice recording (missing `discord.sinks`).", + ephemeral=True, + ) + return + voice_channel = await ensure_voice_channel(interaction) + if voice_channel is None: + await interaction.response.send_message( + "You must be in a voice channel to start transcription.", + ephemeral=True, + ) + return + existing = get_transcription_session(interaction.guild.id) + if existing is not None: + await interaction.response.send_message( + "A transcription session is already active in this server. Use `/gendsession` first.", + ephemeral=True, + ) + return + vc = interaction.guild.voice_client + if vc is not None and vc.channel != voice_channel: + await interaction.response.send_message( + f"I am already connected to {vc.channel.mention}. Disconnect or move me first.", + ephemeral=True, + ) + return + if vc is None or not vc.is_connected(): + try: + vc = await voice_channel.connect() + except (discord.ClientException, discord.HTTPException): + await interaction.response.send_message("I couldn't join your voice channel.", ephemeral=True) + return + session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id) + transcription_sessions[interaction.guild.id] = session + def _recording_finished(sink: object, channel: object, *_: object): + guild = interaction.guild + if guild is None: + return + active_session = get_transcription_session(guild.id) + if active_session is None: + return + copy_recorded_audio_to_session(sink, guild, active_session) + sink = discord.sinks.WaveSink() + vc.start_recording( + sink, + _recording_finished, + interaction.channel, + ) + await interaction.response.send_message( + f"πŸŽ™οΈ Started transcription capture in {voice_channel.mention}. Use `/gendsession` when you're done.", + ephemeral=True, + ) +@app_commands.guild_only() +@bot.tree.command(name="gsetuser", description="Set a display alias for a Discord user id in the active transcription session.") +@app_commands.describe(user="User to alias", name="Alias to write in the transcript") +async def gsetuser(interaction: discord.Interaction, user: discord.Member, name: str): + if interaction.guild is None: + await interaction.response.send_message("This command only works in a server.", ephemeral=True) + return + session = get_transcription_session(interaction.guild.id) + if session is None: + await interaction.response.send_message( + "No active transcription session. Start one with `/gtranscribe`.", + ephemeral=True, + ) + return + alias = name.strip() + if not alias: + await interaction.response.send_message("Alias cannot be empty.", ephemeral=True) + return + session.aliases_by_user[user.id] = alias + await interaction.response.send_message( + f"βœ… Transcript alias set: `{user.id}` β†’ **{alias}**", + ephemeral=True, + ) +@app_commands.guild_only() +@bot.tree.command(name="gendsession", description="Stop recording and export the transcript text file.") +async def gendsession(interaction: discord.Interaction): + if interaction.guild is None: + await interaction.response.send_message("This command only works in a server.", ephemeral=True) + return + session = get_transcription_session(interaction.guild.id) + if session is None: + await interaction.response.send_message( + "No active transcription session in this server.", + ephemeral=True, + ) + return + vc = interaction.guild.voice_client + if vc is None or not vc.is_connected(): + remove_transcription_session(interaction.guild.id) + await interaction.response.send_message( + "The voice session was already disconnected, so no recording could be finalized.", + ephemeral=True, + ) + return + await interaction.response.defer(ephemeral=True, thinking=True) + if getattr(vc, "recording", False): + vc.stop_recording() + await asyncio.sleep(1) + transcript_path, error_message = await finalize_transcription_session(interaction, session) + try: + await vc.disconnect(force=True) + except (discord.HTTPException, discord.ClientException): + pass + if transcript_path is None: + remove_transcription_session(interaction.guild.id) + await interaction.followup.send(f"⚠️ Session ended, but transcript export failed: {error_message}", ephemeral=True) + return + transcript_file = discord.File(str(transcript_path), filename=transcript_path.name) + await interaction.followup.send( + content="πŸ“ Transcription session ended. Here is your transcript file.", + file=transcript_file, + ephemeral=True, + ) + remove_transcription_session(interaction.guild.id) @bot.tree.command(name="gokibothelp", description="Show all available GokiBot commands.") async def gokibothelp(interaction: discord.Interaction): command_lines = [ @@ -1774,9 +1709,11 @@ async def gokibothelp(interaction: discord.Interaction): "- `/gplay ` β€” Queue and play YouTube audio.", "- `/gqueue` β€” Show the current playback queue.", "- `/gskip` β€” Skip the currently playing track.", + "- `/gtranscribe` β€” Start recording and isolate speakers for transcription.", + "- `/gsetuser ` β€” Alias a Discord user in transcript output.", + "- `/gendsession` β€” Stop recording and export transcript text.", "- `/gokibothelp` β€” Show this help message." ] - if is_dev_user(interaction.user.id): command_lines.extend([ "", @@ -1786,10 +1723,7 @@ async def gokibothelp(interaction: discord.Interaction): "- `/debugpoop` β€” Force-create a new poop button (admin only).", "- `/closeticket` β€” Close the current ticket thread (dev only)." ]) - await interaction.response.send_message("\n".join(command_lines), ephemeral=True) - - # ========================= # STARTUP # ========================= @@ -1798,17 +1732,14 @@ async def on_ready(): init_config_db() init_year_db(current_year_local()) init_cleanup_db() - try: await bot.tree.sync() except (discord.HTTPException, discord.Forbidden): pass - if not daily_midnight_pacific.is_running(): daily_midnight_pacific.start() if not wesroth_upload_watch.is_running(): wesroth_upload_watch.start() - # If configured guilds haven't posted today, post immediately today_local = datetime.now(LOCAL_TZ).date().isoformat() for row in get_enabled_guilds(): @@ -1820,11 +1751,7 @@ async def on_ready(): await post_button_for_guild(gid, cid) except (discord.Forbidden, discord.NotFound, discord.HTTPException): continue - print(f"Logged in as {bot.user} (id={bot.user.id})") - - if not TOKEN or TOKEN == "PUT_TOKEN_HERE_FOR_TESTING": raise RuntimeError("Set DISCORD_TOKEN_POOPBOT env var or paste token into TOKEN.") - -bot.run(TOKEN) +bot.run(TOKEN) \ No newline at end of file From 38b40e86d90d0856b15532402ea2a33475183331 Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 20:16:28 -0500 Subject: [PATCH 02/54] Swap to Pycord slash API and document voice sink dependency --- DEPLOYMENT.md | 36 ++++++++++++++++++++++ poopbot.py | 79 +++++++++++++++++++++++++----------------------- requirements.txt | 3 ++ 3 files changed, 80 insertions(+), 38 deletions(-) create mode 100644 DEPLOYMENT.md create mode 100644 requirements.txt diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..d2eaaf7 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,36 @@ +# GokiBot deployment notes + +## Python dependencies +Install dependencies from `requirements.txt`: + +```bash +python -m pip install -r requirements.txt +``` + +This bot is pinned to **Pycord** so Discord voice recording sinks are available: + +- `py-cord==2.6.1` (provides `discord.sinks`) + +Additional runtime dependencies used by this bot: + +- `PyNaCl==1.6.1` (voice transport encryption support) +- `python-dotenv==1.2.1` (loads `.env` values) + +## Raspberry Pi / Linux system packages +Voice/music features require FFmpeg on the host: + +```bash +sudo apt update +sudo apt install -y ffmpeg +``` + +## Environment variables +At minimum, configure: + +- `DISCORD_TOKEN` + +Optional feature variables used by ticketing and alerts: + +- `TICKET_DEV_USER_ID` +- `TICKET_ARCHIVE_CHANNEL_ID` +- `WESROTH_CHANNEL_ID` diff --git a/poopbot.py b/poopbot.py index d63912a..3996bf2 100644 --- a/poopbot.py +++ b/poopbot.py @@ -18,7 +18,6 @@ from pathlib import Path import time import discord -from discord import app_commands from discord.ext import commands, tasks try: from zoneinfo import ZoneInfo @@ -1232,9 +1231,9 @@ def get_max_poops_in_one_day(user_id: int, year: int) -> tuple[int, str | None]: # ========================= # COMMANDS (slash) # ========================= -@bot.tree.command(name="setpoopchannel", description="Set the poop logging channel for this server.") -@app_commands.checks.has_permissions(administrator=True) -@app_commands.guild_only() +@bot.slash_command(name="setpoopchannel", description="Set the poop logging channel for this server.") +@discord.default_permissions(administrator=True) +@discord.guild_only() async def setpoopchannel(interaction: discord.Interaction): if interaction.guild is None or interaction.channel is None: return @@ -1242,17 +1241,17 @@ async def setpoopchannel(interaction: discord.Interaction): await interaction.response.send_message( f"βœ… Poop channel set to {interaction.channel.mention} for this server." ) -@bot.tree.command(name="disablepoop", description="Disable poop posting for this server.") -@app_commands.checks.has_permissions(administrator=True) -@app_commands.guild_only() +@bot.slash_command(name="disablepoop", description="Disable poop posting for this server.") +@discord.default_permissions(administrator=True) +@discord.guild_only() async def disablepoop(interaction: discord.Interaction): if interaction.guild is None: return disable_guild(interaction.guild.id) await interaction.response.send_message("πŸ›‘ Poop posting disabled for this server.") -@bot.tree.command(name="debugpoop", description="Force-create a new poop button message.") -@app_commands.checks.has_permissions(administrator=True) -@app_commands.guild_only() +@bot.slash_command(name="debugpoop", description="Force-create a new poop button message.") +@discord.default_permissions(administrator=True) +@discord.guild_only() async def debugpoop(interaction: discord.Interaction): """Force-create a new poop button message in this guild's configured channel.""" if interaction.guild is None: @@ -1265,8 +1264,8 @@ async def debugpoop(interaction: discord.Interaction): return await post_button_for_guild(interaction.guild.id, int(cfg["channel_id"])) await interaction.response.send_message("πŸ§ͺ Debug: recreated poop button.") -@bot.tree.command(name="poopstats", description="Show your poop stats for the current year.") -@app_commands.guild_only() +@bot.slash_command(name="poopstats", description="Show your poop stats for the current year.") +@discord.guild_only() async def poopstats(interaction: discord.Interaction): user_id = interaction.user.id year = current_year_local() @@ -1289,8 +1288,8 @@ async def poopstats(interaction: discord.Interaction): f"- Latest poop: **{latest_str}**\n" f"- Most poops in one day: **{max_day_str}**" ) -@bot.tree.command(name="featurerequest", description="Start a feature request ticket.") -@app_commands.guild_only() +@bot.slash_command(name="featurerequest", description="Start a feature request ticket.") +@discord.guild_only() async def featurerequest(interaction: discord.Interaction): if interaction.guild is None or interaction.channel is None: await interaction.response.send_message( @@ -1331,10 +1330,10 @@ async def featurerequest(interaction: discord.Interaction): f"βœ… Created ticket #{ticket_id} in {ticket_target.mention}.", ephemeral=True ) -@bot.tree.command(name="collab", description="Add a collaborator to the current ticket thread.") -@app_commands.guild_only() -@app_commands.describe(user="User to add to the ticket thread.") -async def collab(interaction: discord.Interaction, user: discord.Member): +@bot.slash_command(name="collab", description="Add a collaborator to the current ticket thread.") +@discord.guild_only() + +async def collab(interaction: discord.Interaction, user: discord.Option(discord.Member, "User to add to the ticket thread.")): if interaction.guild is None: await interaction.response.send_message( "This command can only be used in a server.", @@ -1366,8 +1365,8 @@ async def collab(interaction: discord.Interaction, user: discord.Member): await interaction.response.send_message( f"βœ… Added {user.mention} to this ticket thread." ) -@bot.tree.command(name="closeticket", description="Close the current ticket thread.") -@app_commands.guild_only() +@bot.slash_command(name="closeticket", description="Close the current ticket thread.") +@discord.guild_only() async def closeticket(interaction: discord.Interaction): if interaction.guild is None: return @@ -1424,10 +1423,10 @@ async def closeticket(interaction: discord.Interaction): def is_dev_user(user_id: int) -> bool: dev_user_id = get_ticket_dev_user_id() return bool(dev_user_id and user_id == dev_user_id) -@app_commands.guild_only() -@bot.tree.command(name="gplay", description="Queue and play audio from a YouTube link or search term.") -@app_commands.describe(youtube_link="A YouTube URL or search text.") -async def gplay(interaction: discord.Interaction, youtube_link: str): +@discord.guild_only() +@bot.slash_command(name="gplay", description="Queue and play audio from a YouTube link or search term.") + +async def gplay(interaction: discord.Interaction, youtube_link: discord.Option(str, "A YouTube URL or search text.")): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return @@ -1500,8 +1499,8 @@ async def gplay(interaction: discord.Interaction, youtube_link: str): ), ephemeral=True ) -@app_commands.guild_only() -@bot.tree.command(name="gqueue", description="Show the current playback queue.") +@discord.guild_only() +@bot.slash_command(name="gqueue", description="Show the current playback queue.") async def gqueue(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1548,8 +1547,8 @@ async def gqueue(interaction: discord.Interaction): else: lines.append("\nQueue is empty.") await interaction.response.send_message("\n".join(lines), ephemeral=True) -@app_commands.guild_only() -@bot.tree.command(name="gskip", description="Skip the currently playing track.") +@discord.guild_only() +@bot.slash_command(name="gskip", description="Skip the currently playing track.") async def gskip(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1576,8 +1575,8 @@ async def gskip(interaction: discord.Interaction): return vc.stop() await interaction.response.send_message("⏭️ Skipped current track.", ephemeral=True) -@app_commands.guild_only() -@bot.tree.command(name="gtranscribe", description="Join your voice channel and start recording speakers for transcription.") +@discord.guild_only() +@bot.slash_command(name="gtranscribe", description="Join your voice channel and start recording speakers for transcription.") async def gtranscribe(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1635,10 +1634,14 @@ def _recording_finished(sink: object, channel: object, *_: object): f"πŸŽ™οΈ Started transcription capture in {voice_channel.mention}. Use `/gendsession` when you're done.", ephemeral=True, ) -@app_commands.guild_only() -@bot.tree.command(name="gsetuser", description="Set a display alias for a Discord user id in the active transcription session.") -@app_commands.describe(user="User to alias", name="Alias to write in the transcript") -async def gsetuser(interaction: discord.Interaction, user: discord.Member, name: str): +@discord.guild_only() +@bot.slash_command(name="gsetuser", description="Set a display alias for a Discord user id in the active transcription session.") + +async def gsetuser( + interaction: discord.Interaction, + user: discord.Option(discord.Member, "User to alias"), + name: discord.Option(str, "Alias to write in the transcript"), +): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return @@ -1658,8 +1661,8 @@ async def gsetuser(interaction: discord.Interaction, user: discord.Member, name: f"βœ… Transcript alias set: `{user.id}` β†’ **{alias}**", ephemeral=True, ) -@app_commands.guild_only() -@bot.tree.command(name="gendsession", description="Stop recording and export the transcript text file.") +@discord.guild_only() +@bot.slash_command(name="gendsession", description="Stop recording and export the transcript text file.") async def gendsession(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1699,7 +1702,7 @@ async def gendsession(interaction: discord.Interaction): ephemeral=True, ) remove_transcription_session(interaction.guild.id) -@bot.tree.command(name="gokibothelp", description="Show all available GokiBot commands.") +@bot.slash_command(name="gokibothelp", description="Show all available GokiBot commands.") async def gokibothelp(interaction: discord.Interaction): command_lines = [ "**GokiBot Commands**", @@ -1733,7 +1736,7 @@ async def on_ready(): init_year_db(current_year_local()) init_cleanup_db() try: - await bot.tree.sync() + await bot.sync_commands() except (discord.HTTPException, discord.Forbidden): pass if not daily_midnight_pacific.is_running(): diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f138a47 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +py-cord==2.6.1 +python-dotenv==1.2.1 +PyNaCl==1.6.1 From 1863d35c71247f4224853d3fd5a031466eb24582 Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Wed, 18 Feb 2026 20:26:19 -0500 Subject: [PATCH 03/54] added req --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f138a47..73ed8bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ py-cord==2.6.1 python-dotenv==1.2.1 PyNaCl==1.6.1 +audioop-lts From 5aa009de2f9a62737ba855f7df17fa5c29dcebdc Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 20:35:18 -0500 Subject: [PATCH 04/54] Fix gtranscribe timeout and voice-start failure handling --- DEPLOYMENT.md | 36 +++++++++++++ poopbot.py | 130 +++++++++++++++++++++++++++++------------------ requirements.txt | 3 ++ 3 files changed, 119 insertions(+), 50 deletions(-) create mode 100644 DEPLOYMENT.md create mode 100644 requirements.txt diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..d2eaaf7 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,36 @@ +# GokiBot deployment notes + +## Python dependencies +Install dependencies from `requirements.txt`: + +```bash +python -m pip install -r requirements.txt +``` + +This bot is pinned to **Pycord** so Discord voice recording sinks are available: + +- `py-cord==2.6.1` (provides `discord.sinks`) + +Additional runtime dependencies used by this bot: + +- `PyNaCl==1.6.1` (voice transport encryption support) +- `python-dotenv==1.2.1` (loads `.env` values) + +## Raspberry Pi / Linux system packages +Voice/music features require FFmpeg on the host: + +```bash +sudo apt update +sudo apt install -y ffmpeg +``` + +## Environment variables +At minimum, configure: + +- `DISCORD_TOKEN` + +Optional feature variables used by ticketing and alerts: + +- `TICKET_DEV_USER_ID` +- `TICKET_ARCHIVE_CHANNEL_ID` +- `WESROTH_CHANNEL_ID` diff --git a/poopbot.py b/poopbot.py index d63912a..076c130 100644 --- a/poopbot.py +++ b/poopbot.py @@ -18,7 +18,6 @@ from pathlib import Path import time import discord -from discord import app_commands from discord.ext import commands, tasks try: from zoneinfo import ZoneInfo @@ -1232,9 +1231,9 @@ def get_max_poops_in_one_day(user_id: int, year: int) -> tuple[int, str | None]: # ========================= # COMMANDS (slash) # ========================= -@bot.tree.command(name="setpoopchannel", description="Set the poop logging channel for this server.") -@app_commands.checks.has_permissions(administrator=True) -@app_commands.guild_only() +@bot.slash_command(name="setpoopchannel", description="Set the poop logging channel for this server.") +@discord.default_permissions(administrator=True) +@discord.guild_only() async def setpoopchannel(interaction: discord.Interaction): if interaction.guild is None or interaction.channel is None: return @@ -1242,17 +1241,17 @@ async def setpoopchannel(interaction: discord.Interaction): await interaction.response.send_message( f"βœ… Poop channel set to {interaction.channel.mention} for this server." ) -@bot.tree.command(name="disablepoop", description="Disable poop posting for this server.") -@app_commands.checks.has_permissions(administrator=True) -@app_commands.guild_only() +@bot.slash_command(name="disablepoop", description="Disable poop posting for this server.") +@discord.default_permissions(administrator=True) +@discord.guild_only() async def disablepoop(interaction: discord.Interaction): if interaction.guild is None: return disable_guild(interaction.guild.id) await interaction.response.send_message("πŸ›‘ Poop posting disabled for this server.") -@bot.tree.command(name="debugpoop", description="Force-create a new poop button message.") -@app_commands.checks.has_permissions(administrator=True) -@app_commands.guild_only() +@bot.slash_command(name="debugpoop", description="Force-create a new poop button message.") +@discord.default_permissions(administrator=True) +@discord.guild_only() async def debugpoop(interaction: discord.Interaction): """Force-create a new poop button message in this guild's configured channel.""" if interaction.guild is None: @@ -1265,8 +1264,8 @@ async def debugpoop(interaction: discord.Interaction): return await post_button_for_guild(interaction.guild.id, int(cfg["channel_id"])) await interaction.response.send_message("πŸ§ͺ Debug: recreated poop button.") -@bot.tree.command(name="poopstats", description="Show your poop stats for the current year.") -@app_commands.guild_only() +@bot.slash_command(name="poopstats", description="Show your poop stats for the current year.") +@discord.guild_only() async def poopstats(interaction: discord.Interaction): user_id = interaction.user.id year = current_year_local() @@ -1289,8 +1288,8 @@ async def poopstats(interaction: discord.Interaction): f"- Latest poop: **{latest_str}**\n" f"- Most poops in one day: **{max_day_str}**" ) -@bot.tree.command(name="featurerequest", description="Start a feature request ticket.") -@app_commands.guild_only() +@bot.slash_command(name="featurerequest", description="Start a feature request ticket.") +@discord.guild_only() async def featurerequest(interaction: discord.Interaction): if interaction.guild is None or interaction.channel is None: await interaction.response.send_message( @@ -1331,10 +1330,10 @@ async def featurerequest(interaction: discord.Interaction): f"βœ… Created ticket #{ticket_id} in {ticket_target.mention}.", ephemeral=True ) -@bot.tree.command(name="collab", description="Add a collaborator to the current ticket thread.") -@app_commands.guild_only() -@app_commands.describe(user="User to add to the ticket thread.") -async def collab(interaction: discord.Interaction, user: discord.Member): +@bot.slash_command(name="collab", description="Add a collaborator to the current ticket thread.") +@discord.guild_only() + +async def collab(interaction: discord.Interaction, user: discord.Option(discord.Member, "User to add to the ticket thread.")): if interaction.guild is None: await interaction.response.send_message( "This command can only be used in a server.", @@ -1366,8 +1365,8 @@ async def collab(interaction: discord.Interaction, user: discord.Member): await interaction.response.send_message( f"βœ… Added {user.mention} to this ticket thread." ) -@bot.tree.command(name="closeticket", description="Close the current ticket thread.") -@app_commands.guild_only() +@bot.slash_command(name="closeticket", description="Close the current ticket thread.") +@discord.guild_only() async def closeticket(interaction: discord.Interaction): if interaction.guild is None: return @@ -1424,10 +1423,10 @@ async def closeticket(interaction: discord.Interaction): def is_dev_user(user_id: int) -> bool: dev_user_id = get_ticket_dev_user_id() return bool(dev_user_id and user_id == dev_user_id) -@app_commands.guild_only() -@bot.tree.command(name="gplay", description="Queue and play audio from a YouTube link or search term.") -@app_commands.describe(youtube_link="A YouTube URL or search text.") -async def gplay(interaction: discord.Interaction, youtube_link: str): +@discord.guild_only() +@bot.slash_command(name="gplay", description="Queue and play audio from a YouTube link or search term.") + +async def gplay(interaction: discord.Interaction, youtube_link: discord.Option(str, "A YouTube URL or search text.")): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return @@ -1500,8 +1499,8 @@ async def gplay(interaction: discord.Interaction, youtube_link: str): ), ephemeral=True ) -@app_commands.guild_only() -@bot.tree.command(name="gqueue", description="Show the current playback queue.") +@discord.guild_only() +@bot.slash_command(name="gqueue", description="Show the current playback queue.") async def gqueue(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1548,8 +1547,8 @@ async def gqueue(interaction: discord.Interaction): else: lines.append("\nQueue is empty.") await interaction.response.send_message("\n".join(lines), ephemeral=True) -@app_commands.guild_only() -@bot.tree.command(name="gskip", description="Skip the currently playing track.") +@discord.guild_only() +@bot.slash_command(name="gskip", description="Skip the currently playing track.") async def gskip(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1576,8 +1575,8 @@ async def gskip(interaction: discord.Interaction): return vc.stop() await interaction.response.send_message("⏭️ Skipped current track.", ephemeral=True) -@app_commands.guild_only() -@bot.tree.command(name="gtranscribe", description="Join your voice channel and start recording speakers for transcription.") +@discord.guild_only() +@bot.slash_command(name="gtranscribe", description="Join your voice channel and start recording speakers for transcription.") async def gtranscribe(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1602,21 +1601,33 @@ async def gtranscribe(interaction: discord.Interaction): ephemeral=True, ) return + await interaction.response.defer(ephemeral=True, thinking=True) vc = interaction.guild.voice_client + connected_here = False if vc is not None and vc.channel != voice_channel: - await interaction.response.send_message( + await interaction.followup.send( f"I am already connected to {vc.channel.mention}. Disconnect or move me first.", ephemeral=True, ) return if vc is None or not vc.is_connected(): try: - vc = await voice_channel.connect() - except (discord.ClientException, discord.HTTPException): - await interaction.response.send_message("I couldn't join your voice channel.", ephemeral=True) + vc = await voice_channel.connect(timeout=15.0, reconnect=True) + connected_here = True + except (discord.ClientException, discord.HTTPException, asyncio.TimeoutError) as exc: + print(f"[transcribe] voice connect failed: {type(exc).__name__}: {exc}") + await interaction.followup.send( + "I couldn't join your voice channel. Confirm I have **Connect/Speak** permissions and that PyNaCl is installed (`pip install -r requirements.txt`).", + ephemeral=True, + ) return + if vc is None: + await interaction.followup.send("I couldn't initialize a voice client.", ephemeral=True) + return + if getattr(vc, "recording", False): + await interaction.followup.send("I am already recording in this server.", ephemeral=True) + return session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id) - transcription_sessions[interaction.guild.id] = session def _recording_finished(sink: object, channel: object, *_: object): guild = interaction.guild if guild is None: @@ -1625,20 +1636,39 @@ def _recording_finished(sink: object, channel: object, *_: object): if active_session is None: return copy_recorded_audio_to_session(sink, guild, active_session) - sink = discord.sinks.WaveSink() - vc.start_recording( - sink, - _recording_finished, - interaction.channel, - ) - await interaction.response.send_message( + try: + sink = discord.sinks.WaveSink() + vc.start_recording( + sink, + _recording_finished, + interaction.channel, + ) + except Exception as exc: + print(f"[transcribe] start_recording failed: {type(exc).__name__}: {exc}") + shutil.rmtree(session.temp_dir, ignore_errors=True) + if connected_here: + try: + await vc.disconnect(force=True) + except (discord.HTTPException, discord.ClientException): + pass + await interaction.followup.send( + f"I joined voice but couldn't start recording: `{type(exc).__name__}`. Ensure Opus/PyNaCl are available on the host.", + ephemeral=True, + ) + return + transcription_sessions[interaction.guild.id] = session + await interaction.followup.send( f"πŸŽ™οΈ Started transcription capture in {voice_channel.mention}. Use `/gendsession` when you're done.", ephemeral=True, ) -@app_commands.guild_only() -@bot.tree.command(name="gsetuser", description="Set a display alias for a Discord user id in the active transcription session.") -@app_commands.describe(user="User to alias", name="Alias to write in the transcript") -async def gsetuser(interaction: discord.Interaction, user: discord.Member, name: str): +@discord.guild_only() +@bot.slash_command(name="gsetuser", description="Set a display alias for a Discord user id in the active transcription session.") + +async def gsetuser( + interaction: discord.Interaction, + user: discord.Option(discord.Member, "User to alias"), + name: discord.Option(str, "Alias to write in the transcript"), +): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return @@ -1658,8 +1688,8 @@ async def gsetuser(interaction: discord.Interaction, user: discord.Member, name: f"βœ… Transcript alias set: `{user.id}` β†’ **{alias}**", ephemeral=True, ) -@app_commands.guild_only() -@bot.tree.command(name="gendsession", description="Stop recording and export the transcript text file.") +@discord.guild_only() +@bot.slash_command(name="gendsession", description="Stop recording and export the transcript text file.") async def gendsession(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -1699,7 +1729,7 @@ async def gendsession(interaction: discord.Interaction): ephemeral=True, ) remove_transcription_session(interaction.guild.id) -@bot.tree.command(name="gokibothelp", description="Show all available GokiBot commands.") +@bot.slash_command(name="gokibothelp", description="Show all available GokiBot commands.") async def gokibothelp(interaction: discord.Interaction): command_lines = [ "**GokiBot Commands**", @@ -1733,7 +1763,7 @@ async def on_ready(): init_year_db(current_year_local()) init_cleanup_db() try: - await bot.tree.sync() + await bot.sync_commands() except (discord.HTTPException, discord.Forbidden): pass if not daily_midnight_pacific.is_running(): diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f138a47 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +py-cord==2.6.1 +python-dotenv==1.2.1 +PyNaCl==1.6.1 From 91e264d88ce27b212e4c09cca67848c4bd1e7d3b Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Wed, 18 Feb 2026 20:43:10 -0500 Subject: [PATCH 05/54] Removed thinking from await interaction.response.defer --- poopbot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/poopbot.py b/poopbot.py index 3996bf2..0aa4dbe 100644 --- a/poopbot.py +++ b/poopbot.py @@ -1437,7 +1437,8 @@ async def gplay(interaction: discord.Interaction, youtube_link: discord.Option(s ephemeral=True ) return - await interaction.response.defer(ephemeral=True, thinking=True) + await interaction.response.defer(ephemeral=True) + await interaction.followup.send("Working…", ephemeral=True) if interaction.guild.voice_client and interaction.guild.voice_client.channel != voice_channel: await interaction.followup.send( f"You must be in {interaction.guild.voice_client.channel.mention} to control playback.", @@ -1682,7 +1683,8 @@ async def gendsession(interaction: discord.Interaction): ephemeral=True, ) return - await interaction.response.defer(ephemeral=True, thinking=True) + await interaction.response.defer(ephemeral=True) + await interaction.followup.send("Working…", ephemeral=True) if getattr(vc, "recording", False): vc.stop_recording() await asyncio.sleep(1) From d5ccfac132ee20e071ee0de579e39950a1155130 Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Wed, 18 Feb 2026 20:44:25 -0500 Subject: [PATCH 06/54] await removed thinking --- poopbot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index 23b64c9..d984fc6 100644 --- a/poopbot.py +++ b/poopbot.py @@ -1602,7 +1602,8 @@ async def gtranscribe(interaction: discord.Interaction): ephemeral=True, ) return - await interaction.response.defer(ephemeral=True, thinking=True) + await interaction.response.defer(ephemeral=True) + await interaction.followup.send("Working…", ephemeral=True) vc = interaction.guild.voice_client connected_here = False if vc is not None and vc.channel != voice_channel: From c15d28a59b3de84ba7c35f5d81d2b4544fa41581 Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Wed, 18 Feb 2026 20:50:11 -0500 Subject: [PATCH 07/54] forceload opus --- poopbot.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/poopbot.py b/poopbot.py index d984fc6..a81bfee 100644 --- a/poopbot.py +++ b/poopbot.py @@ -24,6 +24,13 @@ from zoneinfo import ZoneInfoNotFoundError except ImportError: raise RuntimeError("Python 3.9+ required for zoneinfo") +# Ensure Opus is available for voice/recording on Linux +try: + if not discord.opus.is_loaded(): + discord.opus.load_opus("libopus.so.0") +except Exception as e: + print(f"[voice] Failed to load Opus: {e}") + # ========================= # CONFIG # ========================= From 929625c88c751e72523a78295fc3b261b53c4a61 Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 20:57:27 -0500 Subject: [PATCH 08/54] Improve voice recording dependency checks and Opus loading --- poopbot.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/poopbot.py b/poopbot.py index a81bfee..e822986 100644 --- a/poopbot.py +++ b/poopbot.py @@ -11,6 +11,7 @@ import tempfile import shutil import importlib.util +import ctypes.util import xml.etree.ElementTree as ET from collections import deque from dataclasses import dataclass @@ -25,11 +26,18 @@ except ImportError: raise RuntimeError("Python 3.9+ required for zoneinfo") # Ensure Opus is available for voice/recording on Linux -try: - if not discord.opus.is_loaded(): - discord.opus.load_opus("libopus.so.0") -except Exception as e: - print(f"[voice] Failed to load Opus: {e}") +_OPUS_LOAD_ERROR: str | None = None +if not discord.opus.is_loaded(): + for opus_lib_name in ("libopus.so.0", "libopus.so", ctypes.util.find_library("opus")): + if not opus_lib_name: + continue + try: + discord.opus.load_opus(opus_lib_name) + break + except Exception as e: + _OPUS_LOAD_ERROR = str(e) +if not discord.opus.is_loaded() and _OPUS_LOAD_ERROR: + print(f"[voice] Failed to load Opus: {_OPUS_LOAD_ERROR}") # ========================= # CONFIG @@ -276,8 +284,14 @@ def resolve_display_name(guild: discord.Guild | None, user_id: int, aliases_by_u if member is not None: return member.display_name return str(user_id) -def can_record_voice() -> bool: - return hasattr(discord, "sinks") and hasattr(discord.sinks, "WaveSink") +def can_record_voice() -> tuple[bool, str]: + if not hasattr(discord, "sinks") or not hasattr(discord.sinks, "WaveSink"): + return False, "This runtime is missing discord voice sinks support." + if importlib.util.find_spec("nacl") is None: + return False, "PyNaCl is not installed in this Python environment." + if not discord.opus.is_loaded(): + return False, "Opus is not loaded (missing libopus on host)." + return True, "" def get_whisper_transcriber() -> tuple[str | None, object | None]: if importlib.util.find_spec("faster_whisper") is not None: from faster_whisper import WhisperModel @@ -1589,9 +1603,10 @@ async def gtranscribe(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return - if not can_record_voice(): + can_record, record_error = can_record_voice() + if not can_record: await interaction.response.send_message( - "This bot runtime does not support Discord voice recording (missing `discord.sinks`).", + f"Voice recording is unavailable: {record_error} Install dependencies with `pip install -r requirements.txt` and ensure system Opus is installed (for Debian/Ubuntu: `sudo apt install libopus0`).", ephemeral=True, ) return @@ -1661,7 +1676,7 @@ def _recording_finished(sink: object, channel: object, *_: object): except (discord.HTTPException, discord.ClientException): pass await interaction.followup.send( - f"I joined voice but couldn't start recording: `{type(exc).__name__}`. Ensure Opus/PyNaCl are available on the host.", + f"I joined voice but couldn't start recording: `{type(exc).__name__}`. Ensure `PyNaCl` is installed in the bot venv and system Opus is available (`libopus0`/`libopus.so`).", ephemeral=True, ) return @@ -1794,4 +1809,4 @@ async def on_ready(): print(f"Logged in as {bot.user} (id={bot.user.id})") if not TOKEN or TOKEN == "PUT_TOKEN_HERE_FOR_TESTING": raise RuntimeError("Set DISCORD_TOKEN_POOPBOT env var or paste token into TOKEN.") -bot.run(TOKEN) \ No newline at end of file +bot.run(TOKEN) From 386a2fbfb7d9f43b96232c264a31498f2fad831d Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 21:15:02 -0500 Subject: [PATCH 09/54] Harden gtranscribe against voice websocket race --- poopbot.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/poopbot.py b/poopbot.py index e822986..6039cb3 100644 --- a/poopbot.py +++ b/poopbot.py @@ -580,6 +580,19 @@ async def ensure_voice_channel(interaction: discord.Interaction) -> discord.Voic if not isinstance(member.voice.channel, discord.VoiceChannel): return None return member.voice.channel + + +async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: float = 5.0) -> bool: + """Wait for the voice websocket handshake to be ready for recording/playback.""" + deadline = time.monotonic() + timeout_seconds + while time.monotonic() < deadline: + if not vc.is_connected(): + return False + ws = getattr(vc, "ws", None) + if ws and hasattr(ws, "poll_event"): + return True + await asyncio.sleep(0.1) + return False async def play_next_track(guild: discord.Guild): voice_client = guild.voice_client if voice_client is None: @@ -1648,6 +1661,17 @@ async def gtranscribe(interaction: discord.Interaction): if vc is None: await interaction.followup.send("I couldn't initialize a voice client.", ephemeral=True) return + if not await wait_for_voice_client_ready(vc): + if connected_here: + try: + await vc.disconnect(force=True) + except (discord.HTTPException, discord.ClientException): + pass + await interaction.followup.send( + "I connected to voice, but Discord voice wasn't ready in time. Please try `/gtranscribe` again.", + ephemeral=True, + ) + return if getattr(vc, "recording", False): await interaction.followup.send("I am already recording in this server.", ephemeral=True) return From f863737e65d07294236daa4c529b52543f68d3e9 Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 21:22:07 -0500 Subject: [PATCH 10/54] Increase voice readiness timeout for transcription --- poopbot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/poopbot.py b/poopbot.py index 6039cb3..d660d13 100644 --- a/poopbot.py +++ b/poopbot.py @@ -582,7 +582,7 @@ async def ensure_voice_channel(interaction: discord.Interaction) -> discord.Voic return member.voice.channel -async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: float = 5.0) -> bool: +async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: float = 12.0) -> bool: """Wait for the voice websocket handshake to be ready for recording/playback.""" deadline = time.monotonic() + timeout_seconds while time.monotonic() < deadline: @@ -1668,7 +1668,7 @@ async def gtranscribe(interaction: discord.Interaction): except (discord.HTTPException, discord.ClientException): pass await interaction.followup.send( - "I connected to voice, but Discord voice wasn't ready in time. Please try `/gtranscribe` again.", + "I connected to voice, but Discord voice setup took too long. Please try `/gtranscribe` again.", ephemeral=True, ) return From 48b5be2d00ae5705f8f42032220cc0b868ab564f Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Wed, 18 Feb 2026 21:24:48 -0500 Subject: [PATCH 11/54] 120 --- poopbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index d660d13..c1fefd9 100644 --- a/poopbot.py +++ b/poopbot.py @@ -582,7 +582,7 @@ async def ensure_voice_channel(interaction: discord.Interaction) -> discord.Voic return member.voice.channel -async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: float = 12.0) -> bool: +async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: float = 120.0) -> bool: """Wait for the voice websocket handshake to be ready for recording/playback.""" deadline = time.monotonic() + timeout_seconds while time.monotonic() < deadline: From d379595977c5449419d4c90e5120985579f01ad9 Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 21:32:32 -0500 Subject: [PATCH 12/54] Improve voice readiness checks for transcription startup --- poopbot.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index c1fefd9..33496ee 100644 --- a/poopbot.py +++ b/poopbot.py @@ -588,11 +588,39 @@ async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: while time.monotonic() < deadline: if not vc.is_connected(): return False + + connected_event = getattr(vc, "_connected", None) + if connected_event is not None and hasattr(connected_event, "is_set") and connected_event.is_set(): + return True + ws = getattr(vc, "ws", None) - if ws and hasattr(ws, "poll_event"): + if ws is not None: return True + + if getattr(vc, "channel", None) is not None and getattr(vc, "guild", None) is not None: + guild_voice_client = vc.guild.voice_client + if guild_voice_client is vc: + return True + await asyncio.sleep(0.1) return False + + +def describe_voice_client_state(vc: discord.VoiceClient) -> str: + connected_event = getattr(vc, "_connected", None) + connected_event_set = ( + connected_event.is_set() if connected_event is not None and hasattr(connected_event, "is_set") else None + ) + ws = getattr(vc, "ws", None) + channel = getattr(vc, "channel", None) + return ( + f"connected={vc.is_connected()} " + f"event_set={connected_event_set} " + f"ws={type(ws).__name__ if ws is not None else None} " + f"channel_id={getattr(channel, 'id', None)}" + ) + + async def play_next_track(guild: discord.Guild): voice_client = guild.voice_client if voice_client is None: @@ -1662,6 +1690,7 @@ async def gtranscribe(interaction: discord.Interaction): await interaction.followup.send("I couldn't initialize a voice client.", ephemeral=True) return if not await wait_for_voice_client_ready(vc): + print(f"[transcribe] voice client not ready after wait: {describe_voice_client_state(vc)}") if connected_here: try: await vc.disconnect(force=True) From b60744b1ec77570351083820dac89a9f8e04f821 Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 21:35:21 -0500 Subject: [PATCH 13/54] Harden voice readiness check against sentinel ws values --- poopbot.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index c1fefd9..2f0e572 100644 --- a/poopbot.py +++ b/poopbot.py @@ -588,11 +588,39 @@ async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: while time.monotonic() < deadline: if not vc.is_connected(): return False + ws = getattr(vc, "ws", None) - if ws and hasattr(ws, "poll_event"): + if ws is not None and hasattr(ws, "poll_event") and callable(getattr(ws, "poll_event")): return True + + connected_event = getattr(vc, "_connected", None) + if connected_event is not None and hasattr(connected_event, "is_set") and connected_event.is_set(): + # py-cord may expose the connected event before ws assignment; keep waiting briefly + # unless ws is already a valid websocket object with poll_event. + await asyncio.sleep(0.1) + continue + await asyncio.sleep(0.1) return False + + +def describe_voice_client_state(vc: discord.VoiceClient) -> str: + connected_event = getattr(vc, "_connected", None) + connected_event_set = ( + connected_event.is_set() if connected_event is not None and hasattr(connected_event, "is_set") else None + ) + ws = getattr(vc, "ws", None) + ws_has_poll_event = hasattr(ws, "poll_event") and callable(getattr(ws, "poll_event", None)) + channel = getattr(vc, "channel", None) + return ( + f"connected={vc.is_connected()} " + f"event_set={connected_event_set} " + f"ws={type(ws).__name__ if ws is not None else None} " + f"ws_ready={ws_has_poll_event} " + f"channel_id={getattr(channel, 'id', None)}" + ) + + async def play_next_track(guild: discord.Guild): voice_client = guild.voice_client if voice_client is None: @@ -1662,6 +1690,7 @@ async def gtranscribe(interaction: discord.Interaction): await interaction.followup.send("I couldn't initialize a voice client.", ephemeral=True) return if not await wait_for_voice_client_ready(vc): + print(f"[transcribe] voice client not ready after wait: {describe_voice_client_state(vc)}") if connected_here: try: await vc.disconnect(force=True) From 8aeb4bd7ba73d5248a063651cff27faf2e2d24bb Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 21:42:14 -0500 Subject: [PATCH 14/54] Harden voice websocket readiness checks --- poopbot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index 33496ee..621fb0d 100644 --- a/poopbot.py +++ b/poopbot.py @@ -594,7 +594,7 @@ async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: return True ws = getattr(vc, "ws", None) - if ws is not None: + if ws is not None and callable(getattr(ws, "poll_event", None)): return True if getattr(vc, "channel", None) is not None and getattr(vc, "guild", None) is not None: @@ -612,11 +612,13 @@ def describe_voice_client_state(vc: discord.VoiceClient) -> str: connected_event.is_set() if connected_event is not None and hasattr(connected_event, "is_set") else None ) ws = getattr(vc, "ws", None) + ws_ready = ws is not None and callable(getattr(ws, "poll_event", None)) channel = getattr(vc, "channel", None) return ( f"connected={vc.is_connected()} " f"event_set={connected_event_set} " f"ws={type(ws).__name__ if ws is not None else None} " + f"ws_ready={ws_ready} " f"channel_id={getattr(channel, 'id', None)}" ) From ff95943a901125f42beb0d7ab2c9aed89fb3143f Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 21:55:00 -0500 Subject: [PATCH 15/54] Add yt-dlp and faster-whisper deployment/transcription guidance --- DEPLOYMENT.md | 10 ++++++++-- poopbot.py | 8 ++++---- requirements.txt | 2 ++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index d2eaaf7..44ac43a 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -16,12 +16,18 @@ Additional runtime dependencies used by this bot: - `PyNaCl==1.6.1` (voice transport encryption support) - `python-dotenv==1.2.1` (loads `.env` values) +## Required for music +Music playback and URL resolution commands depend on `yt-dlp` (installed from `requirements.txt`) and system `ffmpeg`. + +## Required for transcription export +Transcript export uses a local Whisper backend. This project installs `faster-whisper` from `requirements.txt` (recommended backend). + ## Raspberry Pi / Linux system packages -Voice/music features require FFmpeg on the host: +Voice/music features require FFmpeg and Opus on the host: ```bash sudo apt update -sudo apt install -y ffmpeg +sudo apt install -y ffmpeg libopus0 ``` ## Environment variables diff --git a/poopbot.py b/poopbot.py index 621fb0d..a6c71c0 100644 --- a/poopbot.py +++ b/poopbot.py @@ -352,8 +352,8 @@ async def finalize_transcription_session( engine_name, engine = get_whisper_transcriber() if engine is None or engine_name is None: return None, ( - "No local transcription engine was found. Install `faster-whisper` (recommended) " - "or `openai-whisper` on the host." + "No local transcription engine was found. Install dependencies with " + "`pip install -r requirements.txt` (includes `faster-whisper`)." ) transcript_path = session.temp_dir / f"transcript-{guild.id}-{int(time.time())}.txt" lines = [ @@ -1684,7 +1684,7 @@ async def gtranscribe(interaction: discord.Interaction): except (discord.ClientException, discord.HTTPException, asyncio.TimeoutError) as exc: print(f"[transcribe] voice connect failed: {type(exc).__name__}: {exc}") await interaction.followup.send( - "I couldn't join your voice channel. Confirm I have **Connect/Speak** permissions and that PyNaCl is installed (`pip install -r requirements.txt`).", + "I couldn't join your voice channel. Confirm I have **Connect/Speak** permissions and that dependencies are installed (`pip install -r requirements.txt`).", ephemeral=True, ) return @@ -1731,7 +1731,7 @@ def _recording_finished(sink: object, channel: object, *_: object): except (discord.HTTPException, discord.ClientException): pass await interaction.followup.send( - f"I joined voice but couldn't start recording: `{type(exc).__name__}`. Ensure `PyNaCl` is installed in the bot venv and system Opus is available (`libopus0`/`libopus.so`).", + f"I joined voice but couldn't start recording: `{type(exc).__name__}`. Ensure dependencies are installed (`pip install -r requirements.txt`) and system Opus is available (`libopus0`/`libopus.so`).", ephemeral=True, ) return diff --git a/requirements.txt b/requirements.txt index 73ed8bc..eb4a55b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ py-cord==2.6.1 python-dotenv==1.2.1 PyNaCl==1.6.1 +yt-dlp +faster-whisper audioop-lts From 0b5a5e2bb350e4b571c370911a7f929b0961feef Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 21:57:53 -0500 Subject: [PATCH 16/54] Harden voice client readiness checks for gtranscribe --- poopbot.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/poopbot.py b/poopbot.py index a6c71c0..c1574fb 100644 --- a/poopbot.py +++ b/poopbot.py @@ -586,21 +586,20 @@ async def wait_for_voice_client_ready(vc: discord.VoiceClient, timeout_seconds: """Wait for the voice websocket handshake to be ready for recording/playback.""" deadline = time.monotonic() + timeout_seconds while time.monotonic() < deadline: - if not vc.is_connected(): - return False - + connected = vc.is_connected() connected_event = getattr(vc, "_connected", None) - if connected_event is not None and hasattr(connected_event, "is_set") and connected_event.is_set(): - return True + connected_event_set = ( + connected_event is not None and hasattr(connected_event, "is_set") and connected_event.is_set() + ) ws = getattr(vc, "ws", None) - if ws is not None and callable(getattr(ws, "poll_event", None)): + ws_poll_ready = ws is not None and callable(getattr(ws, "poll_event", None)) + + if connected and (ws_poll_ready or connected_event_set): return True - if getattr(vc, "channel", None) is not None and getattr(vc, "guild", None) is not None: - guild_voice_client = vc.guild.voice_client - if guild_voice_client is vc: - return True + if not connected: + return False await asyncio.sleep(0.1) return False @@ -1691,7 +1690,20 @@ async def gtranscribe(interaction: discord.Interaction): if vc is None: await interaction.followup.send("I couldn't initialize a voice client.", ephemeral=True) return - if not await wait_for_voice_client_ready(vc): + ready_check_started = time.monotonic() + ready = await wait_for_voice_client_ready(vc) + readiness_elapsed = time.monotonic() - ready_check_started + ws = getattr(vc, "ws", None) + print( + "[transcribe] voice readiness " + f"guild_id={interaction.guild.id} " + f"channel_id={voice_channel.id} " + f"ws_type={type(ws).__name__ if ws is not None else None} " + f"elapsed_s={readiness_elapsed:.3f} " + f"used_reconnect_path={connected_here} " + f"ready={ready}" + ) + if not ready: print(f"[transcribe] voice client not ready after wait: {describe_voice_client_state(vc)}") if connected_here: try: From 66a06855841394d93c351bdaad6fab2f02a7888f Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 22:11:52 -0500 Subject: [PATCH 17/54] Replace poopbot prints with structured logging --- poopbot.py | 187 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 149 insertions(+), 38 deletions(-) diff --git a/poopbot.py b/poopbot.py index c1574fb..6a3f505 100644 --- a/poopbot.py +++ b/poopbot.py @@ -5,6 +5,8 @@ import random import sqlite3 import asyncio +import logging +import logging.handlers import urllib.request from urllib.parse import urlparse import json @@ -20,6 +22,70 @@ import time import discord from discord.ext import commands, tasks + +load_dotenv() # loads variables from .env into the process environment + +logger = logging.getLogger("gokibot") + + +def configure_logging() -> None: + log_level_name = os.getenv("LOG_LEVEL", "INFO").strip().upper() or "INFO" + log_level = getattr(logging, log_level_name, logging.INFO) + log_dir = Path("logs") + log_dir.mkdir(parents=True, exist_ok=True) + formatter = logging.Formatter( + "%(asctime)s %(levelname)s %(name)s %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S%z", + ) + stdout_handler = logging.StreamHandler() + stdout_handler.setFormatter(formatter) + file_handler = logging.handlers.RotatingFileHandler( + log_dir / "gokibot.log", + maxBytes=5 * 1024 * 1024, + backupCount=5, + encoding="utf-8", + ) + file_handler.setFormatter(formatter) + root_logger = logging.getLogger() + root_logger.handlers.clear() + root_logger.setLevel(log_level) + root_logger.addHandler(stdout_handler) + root_logger.addHandler(file_handler) + logger.info("logging_configured level=%s", logging.getLevelName(log_level)) + + +def interaction_log_context(interaction: discord.Interaction) -> dict[str, object]: + return { + "guild_id": getattr(interaction.guild, "id", None), + "channel_id": getattr(interaction.channel, "id", None), + "user_id": getattr(interaction.user, "id", None), + "interaction": getattr(getattr(interaction, "command", None), "qualified_name", None), + } + + +def register_loop_exception_handler(loop: asyncio.AbstractEventLoop) -> None: + if getattr(loop, "_gokibot_exception_handler_installed", False): + return + default_handler = loop.get_exception_handler() + + def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dict[str, object]) -> None: + message = context.get("message", "Unhandled asyncio loop exception") + exception = context.get("exception") + if exception is not None: + logger.exception("loop_exception message=%s context=%r", message, context, exc_info=exception) + else: + logger.error("loop_exception message=%s context=%r", message, context) + if default_handler is not None: + default_handler(active_loop, context) + else: + active_loop.default_exception_handler(context) + + loop.set_exception_handler(_loop_exception_handler) + setattr(loop, "_gokibot_exception_handler_installed", True) + logger.info("loop_exception_handler_registered") + + +configure_logging() try: from zoneinfo import ZoneInfo from zoneinfo import ZoneInfoNotFoundError @@ -37,12 +103,11 @@ except Exception as e: _OPUS_LOAD_ERROR = str(e) if not discord.opus.is_loaded() and _OPUS_LOAD_ERROR: - print(f"[voice] Failed to load Opus: {_OPUS_LOAD_ERROR}") + logger.warning("voice opus_load_failed error=%s", _OPUS_LOAD_ERROR) # ========================= # CONFIG # ========================= -load_dotenv() # loads variables from .env into the process environment TOKEN = os.getenv("DISCORD_TOKEN") if not TOKEN: raise RuntimeError("DISCORD_TOKEN not found. Check your .env file and WorkingDirectory.") @@ -339,6 +404,13 @@ def copy_recorded_audio_to_session( continue copied_files[user_id] = temp_output session.voice_paths_by_user = copied_files + logger.info( + "transcribe_audio_copied guild_id=%s voice_channel_id=%s temp_dir=%s captured_files=%s", + session.guild_id, + session.voice_channel_id, + session.temp_dir, + len(copied_files), + ) return copied_files async def finalize_transcription_session( interaction: discord.Interaction, @@ -397,9 +469,7 @@ def parse_duration_seconds(value: object) -> int: return total def log_music_timing(step: str, phase: str, started_at: float, **fields: object): elapsed = time.perf_counter() - started_at - details = " ".join(f"{key}={value!r}" for key, value in fields.items()) - suffix = f" {details}" if details else "" - print(f"[music] {step} {phase} elapsed={elapsed:.2f}s{suffix}") + logger.info("music_timing step=%s phase=%s elapsed_s=%.2f details=%r", step, phase, elapsed, fields) def pick_track_info(info: dict[str, object]) -> dict[str, object]: entries = info.get("entries") if isinstance(entries, list): @@ -621,6 +691,25 @@ def describe_voice_client_state(vc: discord.VoiceClient) -> str: f"channel_id={getattr(channel, 'id', None)}" ) +def describe_transcription_session_state(session: GuildTranscriptionSession | None) -> str: + if session is None: + return "session=none" + file_count = len(session.voice_paths_by_user) + return f"temp_dir={session.temp_dir} captured_files={file_count}" + + +def build_interaction_log_context( + interaction: discord.Interaction, + vc: discord.VoiceClient | None = None, + session: GuildTranscriptionSession | None = None, +) -> dict[str, object]: + context = interaction_log_context(interaction) + context["voice_state"] = describe_voice_client_state(vc) if vc is not None else "voice_client=none" + if session is not None: + context["transcription_session"] = describe_transcription_session_state(session) + return context + + async def play_next_track(guild: discord.Guild): voice_client = guild.voice_client @@ -638,10 +727,17 @@ async def play_next_track(guild: discord.Guild): next_track = state.queue.popleft() state.current_track = next_track state.track_started_at = datetime.now(timezone.utc) + music_context = { + "guild_id": guild.id, + "channel_id": getattr(getattr(voice_client, "channel", None), "id", None), + "user_id": next_track.requested_by, + "interaction": "music_playback", + "voice_state": describe_voice_client_state(voice_client), + } try: stream_url = await resolve_stream_url(next_track.source_url) - except RuntimeError as exc: - print(f"Failed to resolve stream URL for '{next_track.title}': {exc}") + except RuntimeError: + logger.exception("music_stream_resolve_failed track=%s context=%r", next_track.title, music_context) await play_next_track(guild) return ffmpeg_source = discord.FFmpegPCMAudio( @@ -657,13 +753,13 @@ async def play_next_track(guild: discord.Guild): ) def _after_playback(play_error: Exception | None): if play_error: - print(f"Playback error: {play_error}") + logger.exception("music_playback_error context=%r", music_context, exc_info=play_error) fut = asyncio.run_coroutine_threadsafe(play_next_track(guild), bot.loop) try: fut.result() - except Exception as exc: - print(f"Failed to start next track: {exc}") - print(f"[music] voice_client.play start track='{next_track.title}'") + except Exception: + logger.exception("music_next_track_start_failed context=%r", music_context) + logger.info("music_play_start track=%s context=%r", next_track.title, music_context) voice_client.play(ffmpeg_source, after=_after_playback) # ========================= # DATABASE HELPERS @@ -929,11 +1025,11 @@ async def resolve_wesroth_channel_id() -> str | None: try: html = await asyncio.to_thread(_fetch_url_text, WESROTH_HANDLE_URL) except OSError as exc: - print(f"Failed to fetch WesRoth channel page: {exc}") + logger.exception("wesroth_channel_fetch_failed") return None channel_id = _extract_channel_id_from_handle_page(html) if not channel_id: - print("Failed to parse WesRoth channel id from handle page.") + logger.warning("wesroth_channel_id_parse_failed") return channel_id def _parse_wesroth_feed(xml_text: str) -> list[dict]: root = ET.fromstring(xml_text) @@ -969,7 +1065,7 @@ async def fetch_wesroth_latest_today() -> dict | None: try: xml_text = await asyncio.to_thread(_fetch_url_text, feed_url) except OSError as exc: - print(f"Failed to fetch WesRoth feed: {exc}") + logger.exception("wesroth_feed_fetch_failed") return None entries = _parse_wesroth_feed(xml_text) if not entries: @@ -1045,12 +1141,13 @@ async def log_cleanup_message(message: discord.Message): message.content, created_at.isoformat() )) - print( - "Cleanup message stored:", - f"user={message.author} ({message.author.id})", - f"channel={message.channel.id}", - f"time={created_at.isoformat()}", - f"text={message.content!r}" + logger.info( + "cleanup_message_stored user=%s user_id=%s channel_id=%s created_at=%s text=%r", + message.author, + message.author.id, + message.channel.id, + created_at.isoformat(), + message.content, ) def find_last_active_poop_event_id(user_id: int, year: int) -> str | None: """Most recent POOP in the given year that has NOT been undone by that same user.""" @@ -1494,6 +1591,9 @@ async def gplay(interaction: discord.Interaction, youtube_link: discord.Option(s await interaction.response.send_message("This command only works in a server.", ephemeral=True) return voice_channel = await ensure_voice_channel(interaction) + vc = interaction.guild.voice_client + cmd_context = build_interaction_log_context(interaction, vc=vc) + logger.info("music_command_start name=gplay context=%r", cmd_context) if voice_channel is None: await interaction.response.send_message( "You must be in a voice channel to use this command.", @@ -1517,8 +1617,9 @@ async def gplay(interaction: discord.Interaction, youtube_link: discord.Option(s if interaction.guild.voice_client is None: try: await voice_channel.connect() - except discord.DiscordException as exc: - await interaction.followup.send(f"Could not join voice channel: {exc}", ephemeral=True) + except discord.DiscordException: + logger.exception("music_voice_connect_failed context=%r", build_interaction_log_context(interaction, vc=interaction.guild.voice_client)) + await interaction.followup.send("Could not join voice channel.", ephemeral=True) return fetch_started_at = time.perf_counter() try: @@ -1531,8 +1632,9 @@ async def gplay(interaction: discord.Interaction, youtube_link: discord.Option(s log_music_timing("fetch_track_info", "timeout", fetch_started_at, source=source) await interaction.followup.send(FETCH_TRACK_INFO_TIMEOUT_MESSAGE, ephemeral=True) return - except RuntimeError as exc: - await interaction.followup.send(f"Could not fetch audio: {exc}", ephemeral=True) + except RuntimeError: + logger.exception("music_fetch_tracks_failed context=%r source=%r", build_interaction_log_context(interaction, vc=interaction.guild.voice_client), source) + await interaction.followup.send("Could not fetch audio.", ephemeral=True) return for track in tracks: track.requested_by = interaction.user.id @@ -1541,6 +1643,7 @@ async def gplay(interaction: discord.Interaction, youtube_link: discord.Option(s starting_queue_size = len(state.queue) state.queue.extend(tracks) first_queue_position = starting_queue_size + 1 + logger.info("music_tracks_queued count=%s first_position=%s context=%r", len(tracks), first_queue_position, build_interaction_log_context(interaction, vc=interaction.guild.voice_client)) await play_next_track(interaction.guild) if len(tracks) == 1: track = tracks[0] @@ -1577,6 +1680,7 @@ async def gqueue(interaction: discord.Interaction): ) return vc = interaction.guild.voice_client + logger.info("music_command_start name=gqueue context=%r", build_interaction_log_context(interaction, vc=vc)) if vc is not None and vc.channel != voice_channel: await interaction.response.send_message( f"You must be in {vc.channel.mention} to view this queue.", @@ -1625,6 +1729,7 @@ async def gskip(interaction: discord.Interaction): ) return vc = interaction.guild.voice_client + logger.info("music_command_start name=gskip context=%r", build_interaction_log_context(interaction, vc=vc)) if vc is None or not vc.is_connected(): await interaction.response.send_message("Nothing is playing right now.", ephemeral=True) return @@ -1637,11 +1742,13 @@ async def gskip(interaction: discord.Interaction): if not vc.is_playing() and not vc.is_paused(): await interaction.response.send_message("Nothing is currently playing.", ephemeral=True) return + logger.info("music_command_execute name=gskip context=%r", build_interaction_log_context(interaction, vc=vc)) vc.stop() await interaction.response.send_message("⏭️ Skipped current track.", ephemeral=True) @discord.guild_only() @bot.slash_command(name="gtranscribe", description="Join your voice channel and start recording speakers for transcription.") async def gtranscribe(interaction: discord.Interaction): + logger.info("transcribe_command_start context=%r", build_interaction_log_context(interaction, vc=getattr(getattr(interaction, "guild", None), "voice_client", None))) if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return @@ -1680,8 +1787,8 @@ async def gtranscribe(interaction: discord.Interaction): try: vc = await voice_channel.connect(timeout=15.0, reconnect=True) connected_here = True - except (discord.ClientException, discord.HTTPException, asyncio.TimeoutError) as exc: - print(f"[transcribe] voice connect failed: {type(exc).__name__}: {exc}") + except (discord.ClientException, discord.HTTPException, asyncio.TimeoutError): + logger.exception("transcribe_voice_connect_failed context=%r", build_interaction_log_context(interaction, vc=interaction.guild.voice_client)) await interaction.followup.send( "I couldn't join your voice channel. Confirm I have **Connect/Speak** permissions and that dependencies are installed (`pip install -r requirements.txt`).", ephemeral=True, @@ -1694,17 +1801,16 @@ async def gtranscribe(interaction: discord.Interaction): ready = await wait_for_voice_client_ready(vc) readiness_elapsed = time.monotonic() - ready_check_started ws = getattr(vc, "ws", None) - print( - "[transcribe] voice readiness " - f"guild_id={interaction.guild.id} " - f"channel_id={voice_channel.id} " - f"ws_type={type(ws).__name__ if ws is not None else None} " - f"elapsed_s={readiness_elapsed:.3f} " - f"used_reconnect_path={connected_here} " - f"ready={ready}" + logger.info( + "transcribe_voice_readiness elapsed_s=%.3f used_reconnect_path=%s ready=%s ws_type=%s context=%r", + readiness_elapsed, + connected_here, + ready, + type(ws).__name__ if ws is not None else None, + build_interaction_log_context(interaction, vc=vc), ) if not ready: - print(f"[transcribe] voice client not ready after wait: {describe_voice_client_state(vc)}") + logger.warning("transcribe_voice_not_ready context=%r", build_interaction_log_context(interaction, vc=vc)) if connected_here: try: await vc.disconnect(force=True) @@ -1735,7 +1841,7 @@ def _recording_finished(sink: object, channel: object, *_: object): interaction.channel, ) except Exception as exc: - print(f"[transcribe] start_recording failed: {type(exc).__name__}: {exc}") + logger.exception("transcribe_start_recording_failed error_type=%s context=%r", type(exc).__name__, build_interaction_log_context(interaction, vc=vc, session=session)) shutil.rmtree(session.temp_dir, ignore_errors=True) if connected_here: try: @@ -1748,6 +1854,7 @@ def _recording_finished(sink: object, channel: object, *_: object): ) return transcription_sessions[interaction.guild.id] = session + logger.info("transcribe_session_started context=%r", build_interaction_log_context(interaction, vc=vc, session=session)) await interaction.followup.send( f"πŸŽ™οΈ Started transcription capture in {voice_channel.mention}. Use `/gendsession` when you're done.", ephemeral=True, @@ -1786,6 +1893,7 @@ async def gendsession(interaction: discord.Interaction): await interaction.response.send_message("This command only works in a server.", ephemeral=True) return session = get_transcription_session(interaction.guild.id) + logger.info("transcribe_end_command_start context=%r", build_interaction_log_context(interaction, vc=interaction.guild.voice_client, session=session)) if session is None: await interaction.response.send_message( "No active transcription session in this server.", @@ -1811,10 +1919,12 @@ async def gendsession(interaction: discord.Interaction): except (discord.HTTPException, discord.ClientException): pass if transcript_path is None: + logger.error("transcribe_finalize_failed error=%s context=%r", error_message, build_interaction_log_context(interaction, vc=vc, session=session)) remove_transcription_session(interaction.guild.id) - await interaction.followup.send(f"⚠️ Session ended, but transcript export failed: {error_message}", ephemeral=True) + await interaction.followup.send("⚠️ Session ended, but transcript export failed.", ephemeral=True) return transcript_file = discord.File(str(transcript_path), filename=transcript_path.name) + logger.info("transcribe_session_finished transcript=%s context=%r", transcript_path, build_interaction_log_context(interaction, vc=vc, session=session)) await interaction.followup.send( content="πŸ“ Transcription session ended. Here is your transcript file.", file=transcript_file, @@ -1851,6 +1961,7 @@ async def gokibothelp(interaction: discord.Interaction): # ========================= @bot.event async def on_ready(): + register_loop_exception_handler(asyncio.get_running_loop()) init_config_db() init_year_db(current_year_local()) init_cleanup_db() @@ -1873,7 +1984,7 @@ async def on_ready(): await post_button_for_guild(gid, cid) except (discord.Forbidden, discord.NotFound, discord.HTTPException): continue - print(f"Logged in as {bot.user} (id={bot.user.id})") + logger.info("bot_ready user=%s user_id=%s", bot.user, bot.user.id) if not TOKEN or TOKEN == "PUT_TOKEN_HERE_FOR_TESTING": raise RuntimeError("Set DISCORD_TOKEN_POOPBOT env var or paste token into TOKEN.") bot.run(TOKEN) From e3e52037ee8dd845e3c05553fd05afd378424568 Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Wed, 18 Feb 2026 22:42:29 -0500 Subject: [PATCH 18/54] reqs --- requirements.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 73ed8bc..2620a35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -py-cord==2.6.1 -python-dotenv==1.2.1 -PyNaCl==1.6.1 -audioop-lts +py-cord[voice]>=2.6.1 +python-dotenv>=1.2.1 +PyNaCl>=1.5.0 +yt-dlp>=2024.0.0 +faster-whisper \ No newline at end of file From d3e6b098a9d0a6e84bf798fd4c97ec704620fc29 Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Wed, 18 Feb 2026 22:55:26 -0500 Subject: [PATCH 19/54] reqs --- requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2620a35 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +py-cord[voice]>=2.6.1 +python-dotenv>=1.2.1 +PyNaCl>=1.5.0 +yt-dlp>=2024.0.0 +faster-whisper \ No newline at end of file From 6c59bc1e9ac8bee929b0c772c5ca2c866c38d6f3 Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 22:29:35 -0500 Subject: [PATCH 20/54] updates reqs --- requirements.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2620a35..19750af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,9 @@ py-cord[voice]>=2.6.1 python-dotenv>=1.2.1 PyNaCl>=1.5.0 yt-dlp>=2024.0.0 -faster-whisper \ No newline at end of file +<<<<<<< HEAD +faster-whisper +======= +faster-whisper + +>>>>>>> 33219f1 (updates reqs) From 7cdf616488eaa6cdfbbac22103ca8fdbbd99d87f Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 18 Feb 2026 23:34:45 -0500 Subject: [PATCH 21/54] reqs again --- requirements.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 19750af..7e26d20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,9 +2,4 @@ py-cord[voice]>=2.6.1 python-dotenv>=1.2.1 PyNaCl>=1.5.0 yt-dlp>=2024.0.0 -<<<<<<< HEAD faster-whisper -======= -faster-whisper - ->>>>>>> 33219f1 (updates reqs) From 2893f09c120b9da065a2dd02319c9bf2c4d8bdf4 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 13:14:13 -0500 Subject: [PATCH 22/54] Add live consent-gated transcription thread workflow --- poopbot.py | 491 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 348 insertions(+), 143 deletions(-) diff --git a/poopbot.py b/poopbot.py index 6a3f505..7a4fef8 100644 --- a/poopbot.py +++ b/poopbot.py @@ -17,7 +17,7 @@ import xml.etree.ElementTree as ET from collections import deque from dataclasses import dataclass -from datetime import datetime, timezone, date, time as dtime +from datetime import datetime, timezone, date, time as dtime, timedelta from pathlib import Path import time import discord @@ -149,6 +149,9 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic FETCH_TRACK_INFO_TIMEOUT_MESSAGE = ( "Timed out while fetching track info from YouTube. Please try again in a moment." ) +TRANSCRIBE_SLICE_SECONDS = max(int(os.getenv("TRANSCRIBE_SLICE_SECONDS", "12")), 5) +TRANSCRIBE_CONSENT_EMOJI = "βœ…" +TRANSCRIBE_CONSENT_VALID_DAYS = 180 # ========================= # MESSAGES # ========================= @@ -319,14 +322,24 @@ def __init__(self): self.lock = asyncio.Lock() music_states: dict[int, GuildMusicState] = {} class GuildTranscriptionSession: - def __init__(self, guild_id: int, voice_channel_id: int): + def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: int, consent_message_id: int): self.guild_id = guild_id self.voice_channel_id = voice_channel_id + self.transcript_thread_id = transcript_thread_id + self.consent_message_id = consent_message_id self.started_at = datetime.now(timezone.utc) self.temp_dir = Path(tempfile.mkdtemp(prefix=f"gokibot_transcribe_{guild_id}_")) - self.voice_paths_by_user: dict[int, Path] = {} self.aliases_by_user: dict[int, str] = {} + self.consented_user_ids: set[int] = set() + self.slice_number = 0 + self.closed = False + self.loop_task: asyncio.Task | None = None + self.active_sink: object | None = None + self.active_slice_done = asyncio.Event() + + transcription_sessions: dict[int, GuildTranscriptionSession] = {} +pending_transcription_name_prompts: dict[tuple[int, int], int] = {} def get_music_state(guild_id: int) -> GuildMusicState: state = music_states.get(guild_id) if state is None: @@ -369,81 +382,220 @@ def get_whisper_transcriber() -> tuple[str | None, object | None]: model = whisper.load_model(model_name) return "whisper", model return None, None -def transcribe_audio_file(engine_name: str, engine: object, file_path: Path) -> str: +def transcribe_audio_file(engine_name: str, engine: object, file_path: Path) -> list[dict[str, object]]: + utterances: list[dict[str, object]] = [] if engine_name == "faster_whisper": segments, _ = engine.transcribe(str(file_path), vad_filter=True) - return " ".join(seg.text.strip() for seg in segments if seg.text.strip()) + for seg in segments: + phrase = (seg.text or "").strip() + if not phrase: + continue + utterances.append({ + "start": float(getattr(seg, "start", 0.0) or 0.0), + "text": phrase, + }) + return utterances if engine_name == "whisper": result = engine.transcribe(str(file_path), fp16=False) - return str(result.get("text") or "").strip() - return "" -def copy_recorded_audio_to_session( - sink: object, - guild: discord.Guild, - session: GuildTranscriptionSession, -) -> dict[int, Path]: + segments = result.get("segments") if isinstance(result, dict) else None + if isinstance(segments, list): + for seg in segments: + if not isinstance(seg, dict): + continue + phrase = str(seg.get("text") or "").strip() + if not phrase: + continue + utterances.append({ + "start": float(seg.get("start") or 0.0), + "text": phrase, + }) + else: + text_value = str((result or {}).get("text") or "").strip() if isinstance(result, dict) else "" + if text_value: + utterances.append({"start": 0.0, "text": text_value}) + return utterances + + +def normalize_transcript_display_name(name: str) -> str: + compact = " ".join(name.split()).strip() + return compact[:64] + + +def transcription_consent_is_active(consented_at: str | None, expires_at: str | None) -> bool: + if not consented_at or not expires_at: + return False + try: + consented_dt = datetime.fromisoformat(consented_at) + expires_dt = datetime.fromisoformat(expires_at) + except ValueError: + return False + if consented_dt.tzinfo is None: + consented_dt = consented_dt.replace(tzinfo=timezone.utc) + if expires_dt.tzinfo is None: + expires_dt = expires_dt.replace(tzinfo=timezone.utc) + return datetime.now(timezone.utc) <= expires_dt + + +def get_active_transcription_consent(guild_id: int, user_id: int) -> tuple[str | None, str | None, str | None]: + with db_config() as conn: + row = conn.execute( + """ + SELECT display_name, consented_at_utc, expires_at_utc + FROM transcription_consent + WHERE guild_id=? AND user_id=? + """, + (guild_id, user_id), + ).fetchone() + if not row: + return None, None, None + display_name = normalize_transcript_display_name(str(row["display_name"] or "")) + consented_at = row["consented_at_utc"] + expires_at = row["expires_at_utc"] + if not transcription_consent_is_active(consented_at, expires_at): + return None, consented_at, expires_at + if not display_name: + return None, consented_at, expires_at + return display_name, consented_at, expires_at + + +async def upsert_transcription_consent(guild_id: int, user_id: int, display_name: str): + clean_name = normalize_transcript_display_name(display_name) + now_utc = datetime.now(timezone.utc) + expires = now_utc + timedelta(days=TRANSCRIBE_CONSENT_VALID_DAYS) + async with db_write_lock: + with db_config() as conn: + conn.execute( + """ + INSERT INTO transcription_consent( + guild_id, user_id, display_name, consented_at_utc, expires_at_utc + ) VALUES (?, ?, ?, ?, ?) + ON CONFLICT(guild_id, user_id) DO UPDATE SET + display_name=excluded.display_name, + consented_at_utc=excluded.consented_at_utc, + expires_at_utc=excluded.expires_at_utc + """, + ( + guild_id, + user_id, + clean_name, + now_utc.isoformat(), + expires.isoformat(), + ), + ) + + +def slice_timestamp_label(started_at: datetime, seconds_offset: float) -> str: + ts = started_at + timedelta(seconds=max(seconds_offset, 0.0)) + return ts.astimezone(LOCAL_TZ).strftime("%H:%M:%S") + + +def copy_recorded_audio_slice(sink: object, session: GuildTranscriptionSession) -> dict[int, Path]: copied_files: dict[int, Path] = {} sink_audio_data = getattr(sink, "audio_data", None) if not isinstance(sink_audio_data, dict): return copied_files + session.slice_number += 1 + slice_dir = session.temp_dir / f"slice_{session.slice_number:05d}" + slice_dir.mkdir(parents=True, exist_ok=True) for user_id, audio_obj in sink_audio_data.items(): - if not isinstance(user_id, int): + if not isinstance(user_id, int) or user_id not in session.consented_user_ids: continue file_path = getattr(audio_obj, "file", None) if file_path is None: continue - temp_output = session.temp_dir / f"{user_id}.wav" + output_file = slice_dir / f"{user_id}.wav" try: if hasattr(file_path, "seek"): file_path.seek(0) if hasattr(file_path, "read"): - temp_output.write_bytes(file_path.read()) + output_file.write_bytes(file_path.read()) else: - shutil.copy(str(file_path), temp_output) + shutil.copy(str(file_path), output_file) except OSError: continue - copied_files[user_id] = temp_output - session.voice_paths_by_user = copied_files - logger.info( - "transcribe_audio_copied guild_id=%s voice_channel_id=%s temp_dir=%s captured_files=%s", - session.guild_id, - session.voice_channel_id, - session.temp_dir, - len(copied_files), - ) + copied_files[user_id] = output_file return copied_files -async def finalize_transcription_session( - interaction: discord.Interaction, - session: GuildTranscriptionSession, -) -> tuple[Path | None, str]: - guild = interaction.guild - if guild is None: - return None, "This command only works in a server." - if not session.voice_paths_by_user: - return None, "No captured audio was found for this session." + + +async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTranscriptionSession, copied_files: dict[int, Path]): + if not copied_files: + return engine_name, engine = get_whisper_transcriber() if engine is None or engine_name is None: - return None, ( - "No local transcription engine was found. Install dependencies with " - "`pip install -r requirements.txt` (includes `faster-whisper`)." - ) - transcript_path = session.temp_dir / f"transcript-{guild.id}-{int(time.time())}.txt" - lines = [ - f"GokiBot transcription session for guild {guild.id}", - f"Started UTC: {session.started_at.isoformat()}", - f"Ended UTC: {datetime.now(timezone.utc).isoformat()}", - "", - ] - for user_id, audio_path in sorted(session.voice_paths_by_user.items(), key=lambda item: item[0]): + return + thread = guild.get_thread(session.transcript_thread_id) + if thread is None: + fetched = guild.get_channel(session.transcript_thread_id) + if isinstance(fetched, discord.Thread): + thread = fetched + if thread is None: + return + ordered_lines: list[tuple[float, str]] = [] + for user_id, audio_path in copied_files.items(): speaker_name = resolve_display_name(guild, user_id, session.aliases_by_user) - transcript = transcribe_audio_file(engine_name, engine, audio_path) - if not transcript: - transcript = "[No speech detected]" - lines.append(f"[{speaker_name} | {user_id}]") - lines.append(transcript) - lines.append("") - transcript_path.write_text("\n".join(lines), encoding="utf-8") - return transcript_path, "" + utterances = transcribe_audio_file(engine_name, engine, audio_path) + for utterance in utterances: + phrase = str(utterance.get("text") or "").strip() + if not phrase: + continue + start_sec = float(utterance.get("start") or 0.0) + stamp = slice_timestamp_label(session.started_at, ((session.slice_number - 1) * TRANSCRIBE_SLICE_SECONDS) + start_sec) + ordered_lines.append((start_sec, f"[{stamp}] [{speaker_name}] {phrase}")) + ordered_lines.sort(key=lambda item: item[0]) + if not ordered_lines: + return + for _, line in ordered_lines: + await thread.send(line) + + +async def finalize_recording_slice(vc: discord.VoiceClient, guild: discord.Guild, session: GuildTranscriptionSession): + if not getattr(vc, "recording", False): + return + done_event = asyncio.Event() + session.active_slice_done = done_event + + def _slice_finished(sink: object, channel: object, *_: object): + copied = copy_recorded_audio_slice(sink, session) + fut = asyncio.run_coroutine_threadsafe(post_transcription_slice_lines(guild, session, copied), bot.loop) + try: + fut.result(timeout=90) + except Exception: + logger.exception("transcribe_slice_post_failed guild_id=%s", guild.id) + done_event.set() + + try: + vc.stop_recording() + except Exception: + logger.exception("transcribe_slice_stop_failed guild_id=%s", guild.id) + done_event.set() + await asyncio.wait_for(done_event.wait(), timeout=120) + if session.closed: + return + try: + new_sink = discord.sinks.WaveSink() + session.active_sink = new_sink + vc.start_recording(new_sink, _slice_finished, None) + except Exception: + logger.exception("transcribe_slice_restart_failed guild_id=%s", guild.id) + + +async def transcription_live_loop(guild_id: int): + while True: + await asyncio.sleep(TRANSCRIBE_SLICE_SECONDS) + session = get_transcription_session(guild_id) + if session is None or session.closed: + return + guild = bot.get_guild(guild_id) + if guild is None: + continue + vc = guild.voice_client + if vc is None or not vc.is_connected() or not getattr(vc, "recording", False): + continue + try: + await finalize_recording_slice(vc, guild, session) + except asyncio.TimeoutError: + logger.warning("transcribe_slice_timeout guild_id=%s", guild_id) + def format_duration(duration_seconds: int) -> str: mins, secs = divmod(max(duration_seconds, 0), 60) hours, mins = divmod(mins, 60) @@ -694,8 +846,12 @@ def describe_voice_client_state(vc: discord.VoiceClient) -> str: def describe_transcription_session_state(session: GuildTranscriptionSession | None) -> str: if session is None: return "session=none" - file_count = len(session.voice_paths_by_user) - return f"temp_dir={session.temp_dir} captured_files={file_count}" + return ( + f"temp_dir={session.temp_dir} " + f"slice={session.slice_number} " + f"consented={len(session.consented_user_ids)} " + f"thread_id={session.transcript_thread_id}" + ) def build_interaction_log_context( @@ -830,6 +986,16 @@ def init_config_db(): PRIMARY KEY (ticket_id, user_id) ); """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS transcription_consent ( + guild_id INTEGER NOT NULL, + user_id INTEGER NOT NULL, + display_name TEXT NOT NULL, + consented_at_utc TEXT NOT NULL, + expires_at_utc TEXT NOT NULL, + PRIMARY KEY (guild_id, user_id) + ); + """) columns = { row["name"] for row in conn.execute("PRAGMA table_info(tickets)").fetchall() @@ -1243,8 +1409,27 @@ async def wesroth_upload_watch(): # ========================= @bot.event async def on_message(message: discord.Message): + if message.author.bot: + await bot.process_commands(message) + return + if isinstance(message.channel, discord.DMChannel): + prompt_guild_id = pending_transcription_name_prompts.pop((message.author.id, message.channel.id), None) + if prompt_guild_id is not None: + chosen_name = normalize_transcript_display_name(message.content) + if not chosen_name: + await message.channel.send("Display name cannot be empty. React again in the consent thread to retry.") + return + await upsert_transcription_consent(prompt_guild_id, message.author.id, chosen_name) + session = get_transcription_session(prompt_guild_id) + if session is not None: + session.consented_user_ids.add(message.author.id) + session.aliases_by_user[message.author.id] = chosen_name + await message.channel.send( + f"βœ… Consent recorded for this server. Your transcript display name is **{chosen_name}** for the next {TRANSCRIBE_CONSENT_VALID_DAYS} days." + ) + return # delete any non-bot message in the cleanup channel - if message.channel.id == CLEANUP_CHANNEL_ID and not message.author.bot: + if message.channel.id == CLEANUP_CHANNEL_ID: try: await log_cleanup_message(message) await message.delete() @@ -1262,10 +1447,24 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): return if payload.guild_id is None: return + emoji = str(payload.emoji) + session = get_transcription_session(payload.guild_id) + if session is not None and emoji == TRANSCRIBE_CONSENT_EMOJI and payload.message_id == session.consent_message_id: + guild = bot.get_guild(payload.guild_id) + if guild is None: + return + member = guild.get_member(payload.user_id) + if member is None or member.bot: + return + dm_channel = member.dm_channel or await member.create_dm() + pending_transcription_name_prompts[(member.id, dm_channel.id)] = payload.guild_id + await dm_channel.send( + "You reacted to transcription consent. Please reply with the display name you want shown in transcripts." + ) + return active_message_id = gget(payload.guild_id, "active_message_id") if not active_message_id or str(payload.message_id) != active_message_id: return - emoji = str(payload.emoji) channel = await bot.fetch_channel(payload.channel_id) message = await channel.fetch_message(payload.message_id) user = await bot.fetch_user(payload.user_id) @@ -1746,10 +1945,10 @@ async def gskip(interaction: discord.Interaction): vc.stop() await interaction.response.send_message("⏭️ Skipped current track.", ephemeral=True) @discord.guild_only() -@bot.slash_command(name="gtranscribe", description="Join your voice channel and start recording speakers for transcription.") +@bot.slash_command(name="gtranscribe", description="Join your voice channel and start live transcription in a consent thread.") async def gtranscribe(interaction: discord.Interaction): logger.info("transcribe_command_start context=%r", build_interaction_log_context(interaction, vc=getattr(getattr(interaction, "guild", None), "voice_client", None))) - if interaction.guild is None: + if interaction.guild is None or interaction.channel is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return can_record, record_error = can_record_voice() @@ -1766,15 +1965,36 @@ async def gtranscribe(interaction: discord.Interaction): ephemeral=True, ) return - existing = get_transcription_session(interaction.guild.id) - if existing is not None: + if get_transcription_session(interaction.guild.id) is not None: await interaction.response.send_message( "A transcription session is already active in this server. Use `/gendsession` first.", ephemeral=True, ) return await interaction.response.defer(ephemeral=True) - await interaction.followup.send("Working…", ephemeral=True) + + start_label = datetime.now(LOCAL_TZ).strftime("%Y-%m-%d %H:%M") + transcript_thread = await interaction.channel.create_thread( + name=f"transcript-{start_label}", + type=discord.ChannelType.private_thread, + ) + for member in voice_channel.members: + if member.bot: + continue + try: + await transcript_thread.add_user(member) + except (discord.Forbidden, discord.HTTPException): + continue + consent_message = await transcript_thread.send( + ( + f"πŸŽ™οΈ **Transcription session started** for {voice_channel.mention}.\n" + f"React with {TRANSCRIBE_CONSENT_EMOJI} to consent to recording/transcription.\n" + "After reacting, I will DM you to collect your transcript display name.\n" + f"Consent expires after {TRANSCRIBE_CONSENT_VALID_DAYS} days." + ) + ) + await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) + vc = interaction.guild.voice_client connected_here = False if vc is not None and vc.channel != voice_channel: @@ -1797,20 +2017,8 @@ async def gtranscribe(interaction: discord.Interaction): if vc is None: await interaction.followup.send("I couldn't initialize a voice client.", ephemeral=True) return - ready_check_started = time.monotonic() ready = await wait_for_voice_client_ready(vc) - readiness_elapsed = time.monotonic() - ready_check_started - ws = getattr(vc, "ws", None) - logger.info( - "transcribe_voice_readiness elapsed_s=%.3f used_reconnect_path=%s ready=%s ws_type=%s context=%r", - readiness_elapsed, - connected_here, - ready, - type(ws).__name__ if ws is not None else None, - build_interaction_log_context(interaction, vc=vc), - ) if not ready: - logger.warning("transcribe_voice_not_ready context=%r", build_interaction_log_context(interaction, vc=vc)) if connected_here: try: await vc.disconnect(force=True) @@ -1824,25 +2032,33 @@ async def gtranscribe(interaction: discord.Interaction): if getattr(vc, "recording", False): await interaction.followup.send("I am already recording in this server.", ephemeral=True) return - session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id) - def _recording_finished(sink: object, channel: object, *_: object): - guild = interaction.guild - if guild is None: - return - active_session = get_transcription_session(guild.id) - if active_session is None: - return - copy_recorded_audio_to_session(sink, guild, active_session) + + session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id, consent_message.id) + for member in voice_channel.members: + if member.bot: + continue + display_name, _, _ = get_active_transcription_consent(interaction.guild.id, member.id) + if display_name: + session.consented_user_ids.add(member.id) + session.aliases_by_user[member.id] = display_name + transcription_sessions[interaction.guild.id] = session + + def _slice_finished(sink: object, channel: object, *_: object): + copied = copy_recorded_audio_slice(sink, session) + fut = asyncio.run_coroutine_threadsafe(post_transcription_slice_lines(interaction.guild, session, copied), bot.loop) + try: + fut.result(timeout=90) + except Exception: + logger.exception("transcribe_slice_post_failed guild_id=%s", interaction.guild.id) + session.active_slice_done.set() + try: sink = discord.sinks.WaveSink() - vc.start_recording( - sink, - _recording_finished, - interaction.channel, - ) + session.active_sink = sink + vc.start_recording(sink, _slice_finished, None) except Exception as exc: logger.exception("transcribe_start_recording_failed error_type=%s context=%r", type(exc).__name__, build_interaction_log_context(interaction, vc=vc, session=session)) - shutil.rmtree(session.temp_dir, ignore_errors=True) + remove_transcription_session(interaction.guild.id) if connected_here: try: await vc.disconnect(force=True) @@ -1853,47 +2069,45 @@ def _recording_finished(sink: object, channel: object, *_: object): ephemeral=True, ) return - transcription_sessions[interaction.guild.id] = session - logger.info("transcribe_session_started context=%r", build_interaction_log_context(interaction, vc=vc, session=session)) + + session.loop_task = asyncio.create_task(transcription_live_loop(interaction.guild.id)) await interaction.followup.send( - f"πŸŽ™οΈ Started transcription capture in {voice_channel.mention}. Use `/gendsession` when you're done.", + f"πŸŽ™οΈ Live transcription started in {voice_channel.mention}. Updates will be posted in {transcript_thread.mention}.", ephemeral=True, ) -@discord.guild_only() -@bot.slash_command(name="gsetuser", description="Set a display alias for a Discord user id in the active transcription session.") -async def gsetuser( + +@discord.guild_only() +@bot.slash_command(name="gsetname", description="Set your transcription display name and renew consent.") +async def gsetname( interaction: discord.Interaction, - user: discord.Option(discord.Member, "User to alias"), - name: discord.Option(str, "Alias to write in the transcript"), + name: discord.Option(str, "Display name to use in transcripts"), ): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return - session = get_transcription_session(interaction.guild.id) - if session is None: - await interaction.response.send_message( - "No active transcription session. Start one with `/gtranscribe`.", - ephemeral=True, - ) - return - alias = name.strip() - if not alias: - await interaction.response.send_message("Alias cannot be empty.", ephemeral=True) + clean_name = normalize_transcript_display_name(name) + if not clean_name: + await interaction.response.send_message("Display name cannot be empty.", ephemeral=True) return - session.aliases_by_user[user.id] = alias + await upsert_transcription_consent(interaction.guild.id, interaction.user.id, clean_name) + session = get_transcription_session(interaction.guild.id) + if session is not None: + session.consented_user_ids.add(interaction.user.id) + session.aliases_by_user[interaction.user.id] = clean_name await interaction.response.send_message( - f"βœ… Transcript alias set: `{user.id}` β†’ **{alias}**", + f"βœ… Saved transcript display name **{clean_name}** and renewed consent for {TRANSCRIBE_CONSENT_VALID_DAYS} days.", ephemeral=True, ) + + @discord.guild_only() -@bot.slash_command(name="gendsession", description="Stop recording and export the transcript text file.") +@bot.slash_command(name="gendsession", description="Stop live transcription and disconnect from voice.") async def gendsession(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) return session = get_transcription_session(interaction.guild.id) - logger.info("transcribe_end_command_start context=%r", build_interaction_log_context(interaction, vc=interaction.guild.voice_client, session=session)) if session is None: await interaction.response.send_message( "No active transcription session in this server.", @@ -1901,36 +2115,27 @@ async def gendsession(interaction: discord.Interaction): ) return vc = interaction.guild.voice_client - if vc is None or not vc.is_connected(): - remove_transcription_session(interaction.guild.id) - await interaction.response.send_message( - "The voice session was already disconnected, so no recording could be finalized.", - ephemeral=True, - ) - return await interaction.response.defer(ephemeral=True) - await interaction.followup.send("Working…", ephemeral=True) - if getattr(vc, "recording", False): - vc.stop_recording() - await asyncio.sleep(1) - transcript_path, error_message = await finalize_transcription_session(interaction, session) - try: - await vc.disconnect(force=True) - except (discord.HTTPException, discord.ClientException): - pass - if transcript_path is None: - logger.error("transcribe_finalize_failed error=%s context=%r", error_message, build_interaction_log_context(interaction, vc=vc, session=session)) - remove_transcription_session(interaction.guild.id) - await interaction.followup.send("⚠️ Session ended, but transcript export failed.", ephemeral=True) - return - transcript_file = discord.File(str(transcript_path), filename=transcript_path.name) - logger.info("transcribe_session_finished transcript=%s context=%r", transcript_path, build_interaction_log_context(interaction, vc=vc, session=session)) - await interaction.followup.send( - content="πŸ“ Transcription session ended. Here is your transcript file.", - file=transcript_file, - ephemeral=True, - ) + if vc is not None and vc.is_connected() and getattr(vc, "recording", False): + try: + session.closed = True + await finalize_recording_slice(vc, interaction.guild, session) + except asyncio.TimeoutError: + logger.warning("transcribe_final_slice_timeout guild_id=%s", interaction.guild.id) + if session.loop_task is not None: + session.loop_task.cancel() + if vc is not None and vc.is_connected(): + try: + await vc.disconnect(force=True) + except (discord.HTTPException, discord.ClientException): + pass + thread = interaction.guild.get_thread(session.transcript_thread_id) + if thread is not None: + await thread.send("πŸ›‘ Transcription session ended. Bot disconnected from voice.") remove_transcription_session(interaction.guild.id) + await interaction.followup.send("βœ… Transcription session ended and bot disconnected.", ephemeral=True) + + @bot.slash_command(name="gokibothelp", description="Show all available GokiBot commands.") async def gokibothelp(interaction: discord.Interaction): command_lines = [ @@ -1941,9 +2146,9 @@ async def gokibothelp(interaction: discord.Interaction): "- `/gplay ` β€” Queue and play YouTube audio.", "- `/gqueue` β€” Show the current playback queue.", "- `/gskip` β€” Skip the currently playing track.", - "- `/gtranscribe` β€” Start recording and isolate speakers for transcription.", - "- `/gsetuser ` β€” Alias a Discord user in transcript output.", - "- `/gendsession` β€” Stop recording and export transcript text.", + "- `/gtranscribe` β€” Start live transcription in a timestamped consent thread.", + "- `/gsetname ` β€” Set your transcript display name and renew consent.", + "- `/gendsession` β€” Stop live transcription and disconnect.", "- `/gokibothelp` β€” Show this help message." ] if is_dev_user(interaction.user.id): From 04b5988af20d2af14bd4f16263932003f3e44cf2 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 15:55:07 -0500 Subject: [PATCH 23/54] docs: document required Discord permissions and intent --- DEPLOYMENT.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 44ac43a..03a597a 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -40,3 +40,29 @@ Optional feature variables used by ticketing and alerts: - `TICKET_DEV_USER_ID` - `TICKET_ARCHIVE_CHANNEL_ID` - `WESROTH_CHANNEL_ID` + +## Discord bot permissions (for all current features) +When inviting the bot, grant the following **OAuth2 bot permissions** so every command works as implemented today: + +- `View Channels` +- `Read Message History` +- `Send Messages` +- `Send Messages in Threads` +- `Create Private Threads` +- `Manage Threads` +- `Add Reactions` +- `Manage Messages` +- `Connect` +- `Speak` + +### Why these are needed +- Poop logging + consent flows use reactions, reaction removal, and message cleanup. +- Ticketing + transcription create private threads, add collaborators, and post in thread channels. +- Ticket archive export reads prior thread history. +- Music playback and live transcription require voice connection and speaking permissions. + +## Discord privileged intent configuration +Enable **Message Content Intent** in the Discord Developer Portal for this bot. The runtime explicitly sets `intents.message_content = True`. + +## Maintenance note +If new bot features require additional Discord permissions or intents, update this document at the same time as the feature change. From 3bb01029027a43ac1cac451f502ba0fdee41d31c Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 16:29:35 -0500 Subject: [PATCH 24/54] Simplify transcription consent DM wording --- poopbot.py | 89 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/poopbot.py b/poopbot.py index 7a4fef8..751533b 100644 --- a/poopbot.py +++ b/poopbot.py @@ -150,7 +150,6 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic "Timed out while fetching track info from YouTube. Please try again in a moment." ) TRANSCRIBE_SLICE_SECONDS = max(int(os.getenv("TRANSCRIBE_SLICE_SECONDS", "12")), 5) -TRANSCRIBE_CONSENT_EMOJI = "βœ…" TRANSCRIBE_CONSENT_VALID_DAYS = 180 # ========================= # MESSAGES @@ -322,15 +321,15 @@ def __init__(self): self.lock = asyncio.Lock() music_states: dict[int, GuildMusicState] = {} class GuildTranscriptionSession: - def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: int, consent_message_id: int): + def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: int): self.guild_id = guild_id self.voice_channel_id = voice_channel_id self.transcript_thread_id = transcript_thread_id - self.consent_message_id = consent_message_id self.started_at = datetime.now(timezone.utc) self.temp_dir = Path(tempfile.mkdtemp(prefix=f"gokibot_transcribe_{guild_id}_")) self.aliases_by_user: dict[int, str] = {} self.consented_user_ids: set[int] = set() + self.dm_prompted_user_ids: set[int] = set() self.slice_number = 0 self.closed = False self.loop_task: asyncio.Task | None = None @@ -339,7 +338,6 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i transcription_sessions: dict[int, GuildTranscriptionSession] = {} -pending_transcription_name_prompts: dict[tuple[int, int], int] = {} def get_music_state(guild_id: int) -> GuildMusicState: state = music_states.get(guild_id) if state is None: @@ -484,6 +482,13 @@ async def upsert_transcription_consent(guild_id: int, user_id: int, display_name ) +async def send_transcription_consent_dm(guild: discord.Guild, member: discord.Member): + dm_channel = member.dm_channel or await member.create_dm() + await dm_channel.send( + f"πŸŽ™οΈ Live transcription is active in **{guild.name}**. To change your display name, run `/gsetname ` in the server." + ) + + def slice_timestamp_label(started_at: datetime, seconds_offset: float) -> str: ts = started_at + timedelta(seconds=max(seconds_offset, 0.0)) return ts.astimezone(LOCAL_TZ).strftime("%H:%M:%S") @@ -1412,22 +1417,6 @@ async def on_message(message: discord.Message): if message.author.bot: await bot.process_commands(message) return - if isinstance(message.channel, discord.DMChannel): - prompt_guild_id = pending_transcription_name_prompts.pop((message.author.id, message.channel.id), None) - if prompt_guild_id is not None: - chosen_name = normalize_transcript_display_name(message.content) - if not chosen_name: - await message.channel.send("Display name cannot be empty. React again in the consent thread to retry.") - return - await upsert_transcription_consent(prompt_guild_id, message.author.id, chosen_name) - session = get_transcription_session(prompt_guild_id) - if session is not None: - session.consented_user_ids.add(message.author.id) - session.aliases_by_user[message.author.id] = chosen_name - await message.channel.send( - f"βœ… Consent recorded for this server. Your transcript display name is **{chosen_name}** for the next {TRANSCRIBE_CONSENT_VALID_DAYS} days." - ) - return # delete any non-bot message in the cleanup channel if message.channel.id == CLEANUP_CHANNEL_ID: try: @@ -1438,6 +1427,30 @@ async def on_message(message: discord.Message): # still allow commands processing elsewhere; this message is gone anyway return await bot.process_commands(message) +@bot.event +async def on_voice_state_update(member: discord.Member, before: discord.VoiceState, after: discord.VoiceState): + if member.bot or member.guild is None: + return + session = get_transcription_session(member.guild.id) + if session is None or session.closed: + return + if after.channel is None or after.channel.id != session.voice_channel_id: + return + if before.channel is not None and before.channel.id == session.voice_channel_id: + return + display_name, _, _ = get_active_transcription_consent(member.guild.id, member.id) + if display_name: + session.consented_user_ids.add(member.id) + session.aliases_by_user[member.id] = display_name + return + if member.id in session.dm_prompted_user_ids: + return + try: + await send_transcription_consent_dm(member.guild, member) + session.dm_prompted_user_ids.add(member.id) + except (discord.Forbidden, discord.HTTPException): + return + # ========================= # REACTIONS # ========================= @@ -1448,20 +1461,6 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): if payload.guild_id is None: return emoji = str(payload.emoji) - session = get_transcription_session(payload.guild_id) - if session is not None and emoji == TRANSCRIBE_CONSENT_EMOJI and payload.message_id == session.consent_message_id: - guild = bot.get_guild(payload.guild_id) - if guild is None: - return - member = guild.get_member(payload.user_id) - if member is None or member.bot: - return - dm_channel = member.dm_channel or await member.create_dm() - pending_transcription_name_prompts[(member.id, dm_channel.id)] = payload.guild_id - await dm_channel.send( - "You reacted to transcription consent. Please reply with the display name you want shown in transcripts." - ) - return active_message_id = gget(payload.guild_id, "active_message_id") if not active_message_id or str(payload.message_id) != active_message_id: return @@ -1945,7 +1944,7 @@ async def gskip(interaction: discord.Interaction): vc.stop() await interaction.response.send_message("⏭️ Skipped current track.", ephemeral=True) @discord.guild_only() -@bot.slash_command(name="gtranscribe", description="Join your voice channel and start live transcription in a consent thread.") +@bot.slash_command(name="gtranscribe", description="Join your voice channel and start live transcription in a transcript thread.") async def gtranscribe(interaction: discord.Interaction): logger.info("transcribe_command_start context=%r", build_interaction_log_context(interaction, vc=getattr(getattr(interaction, "guild", None), "voice_client", None))) if interaction.guild is None or interaction.channel is None: @@ -1985,15 +1984,13 @@ async def gtranscribe(interaction: discord.Interaction): await transcript_thread.add_user(member) except (discord.Forbidden, discord.HTTPException): continue - consent_message = await transcript_thread.send( + await transcript_thread.send( ( - f"πŸŽ™οΈ **Transcription session started** for {voice_channel.mention}.\n" - f"React with {TRANSCRIBE_CONSENT_EMOJI} to consent to recording/transcription.\n" - "After reacting, I will DM you to collect your transcript display name.\n" - f"Consent expires after {TRANSCRIBE_CONSENT_VALID_DAYS} days." + f"πŸŽ™οΈ **Transcription session has begun** for {voice_channel.mention}.\n" + "If you have not already consented, check your DMs and run `/gsetname `." ) ) - await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) + vc = interaction.guild.voice_client connected_here = False @@ -2033,7 +2030,7 @@ async def gtranscribe(interaction: discord.Interaction): await interaction.followup.send("I am already recording in this server.", ephemeral=True) return - session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id, consent_message.id) + session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id) for member in voice_channel.members: if member.bot: continue @@ -2041,6 +2038,12 @@ async def gtranscribe(interaction: discord.Interaction): if display_name: session.consented_user_ids.add(member.id) session.aliases_by_user[member.id] = display_name + continue + try: + await send_transcription_consent_dm(interaction.guild, member) + session.dm_prompted_user_ids.add(member.id) + except (discord.Forbidden, discord.HTTPException): + continue transcription_sessions[interaction.guild.id] = session def _slice_finished(sink: object, channel: object, *_: object): @@ -2146,7 +2149,7 @@ async def gokibothelp(interaction: discord.Interaction): "- `/gplay ` β€” Queue and play YouTube audio.", "- `/gqueue` β€” Show the current playback queue.", "- `/gskip` β€” Skip the currently playing track.", - "- `/gtranscribe` β€” Start live transcription in a timestamped consent thread.", + "- `/gtranscribe` β€” Start live transcription in a timestamped transcript thread.", "- `/gsetname ` β€” Set your transcript display name and renew consent.", "- `/gendsession` β€” Stop live transcription and disconnect.", "- `/gokibothelp` β€” Show this help message." From 1527fd1dd23eb7b1c94f6269cf97a313abe9f509 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 19:24:27 -0500 Subject: [PATCH 25/54] Fix transcription slice callback coroutine handling --- poopbot.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/poopbot.py b/poopbot.py index 751533b..0986657 100644 --- a/poopbot.py +++ b/poopbot.py @@ -559,11 +559,10 @@ async def finalize_recording_slice(vc: discord.VoiceClient, guild: discord.Guild done_event = asyncio.Event() session.active_slice_done = done_event - def _slice_finished(sink: object, channel: object, *_: object): + async def _slice_finished(sink: object, channel: object, *_: object): copied = copy_recorded_audio_slice(sink, session) - fut = asyncio.run_coroutine_threadsafe(post_transcription_slice_lines(guild, session, copied), bot.loop) try: - fut.result(timeout=90) + await asyncio.wait_for(post_transcription_slice_lines(guild, session, copied), timeout=90) except Exception: logger.exception("transcribe_slice_post_failed guild_id=%s", guild.id) done_event.set() @@ -2046,11 +2045,10 @@ async def gtranscribe(interaction: discord.Interaction): continue transcription_sessions[interaction.guild.id] = session - def _slice_finished(sink: object, channel: object, *_: object): + async def _slice_finished(sink: object, channel: object, *_: object): copied = copy_recorded_audio_slice(sink, session) - fut = asyncio.run_coroutine_threadsafe(post_transcription_slice_lines(interaction.guild, session, copied), bot.loop) try: - fut.result(timeout=90) + await asyncio.wait_for(post_transcription_slice_lines(interaction.guild, session, copied), timeout=90) except Exception: logger.exception("transcribe_slice_post_failed guild_id=%s", interaction.guild.id) session.active_slice_done.set() From 9b1d4c26c335eeabe31410d0b08671603025ae0a Mon Sep 17 00:00:00 2001 From: Matthew Matthew Date: Thu, 19 Feb 2026 19:37:27 -0500 Subject: [PATCH 26/54] reworded some consent stuff --- poopbot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poopbot.py b/poopbot.py index 0986657..780a8e2 100644 --- a/poopbot.py +++ b/poopbot.py @@ -1986,7 +1986,7 @@ async def gtranscribe(interaction: discord.Interaction): await transcript_thread.send( ( f"πŸŽ™οΈ **Transcription session has begun** for {voice_channel.mention}.\n" - "If you have not already consented, check your DMs and run `/gsetname `." + "To change your display name `/gsetname `." ) ) @@ -2079,7 +2079,7 @@ async def _slice_finished(sink: object, channel: object, *_: object): @discord.guild_only() -@bot.slash_command(name="gsetname", description="Set your transcription display name and renew consent.") +@bot.slash_command(name="gsetname", description="Set your transcription display name") async def gsetname( interaction: discord.Interaction, name: discord.Option(str, "Display name to use in transcripts"), @@ -2097,7 +2097,7 @@ async def gsetname( session.consented_user_ids.add(interaction.user.id) session.aliases_by_user[interaction.user.id] = clean_name await interaction.response.send_message( - f"βœ… Saved transcript display name **{clean_name}** and renewed consent for {TRANSCRIBE_CONSENT_VALID_DAYS} days.", + f"βœ… Saved transcript display name **{clean_name}**.", ephemeral=True, ) @@ -2148,7 +2148,7 @@ async def gokibothelp(interaction: discord.Interaction): "- `/gqueue` β€” Show the current playback queue.", "- `/gskip` β€” Skip the currently playing track.", "- `/gtranscribe` β€” Start live transcription in a timestamped transcript thread.", - "- `/gsetname ` β€” Set your transcript display name and renew consent.", + "- `/gsetname ` β€” Set your transcript display name", "- `/gendsession` β€” Stop live transcription and disconnect.", "- `/gokibothelp` β€” Show this help message." ] From 20c918bd2e39430aa651d5d64946a82cc3cb2010 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 19:44:36 -0500 Subject: [PATCH 27/54] Improve transcription consent flow for all voice participants --- poopbot.py | 148 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 115 insertions(+), 33 deletions(-) diff --git a/poopbot.py b/poopbot.py index 780a8e2..e2a2cfa 100644 --- a/poopbot.py +++ b/poopbot.py @@ -151,6 +151,7 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic ) TRANSCRIBE_SLICE_SECONDS = max(int(os.getenv("TRANSCRIBE_SLICE_SECONDS", "12")), 5) TRANSCRIBE_CONSENT_VALID_DAYS = 180 +TRANSCRIBE_CONSENT_EMOJI = "βœ…" # ========================= # MESSAGES # ========================= @@ -338,6 +339,7 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i transcription_sessions: dict[int, GuildTranscriptionSession] = {} +transcription_consent_prompts: dict[int, tuple[int, int]] = {} def get_music_state(guild_id: int) -> GuildMusicState: state = music_states.get(guild_id) if state is None: @@ -350,6 +352,13 @@ def remove_transcription_session(guild_id: int): session = transcription_sessions.pop(guild_id, None) if session is None: return + stale_prompt_ids = [ + message_id + for message_id, (prompt_guild_id, prompt_thread_id) in transcription_consent_prompts.items() + if prompt_guild_id == guild_id and prompt_thread_id == session.transcript_thread_id + ] + for message_id in stale_prompt_ids: + transcription_consent_prompts.pop(message_id, None) shutil.rmtree(session.temp_dir, ignore_errors=True) def resolve_display_name(guild: discord.Guild | None, user_id: int, aliases_by_user: dict[int, str]) -> str: alias = aliases_by_user.get(user_id) @@ -485,10 +494,72 @@ async def upsert_transcription_consent(guild_id: int, user_id: int, display_name async def send_transcription_consent_dm(guild: discord.Guild, member: discord.Member): dm_channel = member.dm_channel or await member.create_dm() await dm_channel.send( - f"πŸŽ™οΈ Live transcription is active in **{guild.name}**. To change your display name, run `/gsetname ` in the server." + f"πŸŽ™οΈ Live transcription is active in **{guild.name}**. " + f"React with {TRANSCRIBE_CONSENT_EMOJI} in the consent thread message to opt in. " + "To change your display name, run `/gsetname ` in the server." ) +def find_active_transcription_thread(guild: discord.Guild, session: GuildTranscriptionSession) -> discord.Thread | None: + thread = guild.get_thread(session.transcript_thread_id) + if thread is not None: + return thread + fetched = guild.get_channel(session.transcript_thread_id) + if isinstance(fetched, discord.Thread): + return fetched + return None + + +async def prompt_transcription_consent( + guild: discord.Guild, + session: GuildTranscriptionSession, + transcript_thread: discord.Thread, + members: list[discord.Member], +): + non_consented = [member for member in members if member.id not in session.consented_user_ids] + if not non_consented: + return + mentions = " ".join(member.mention for member in non_consented) + consent_message = await transcript_thread.send( + ( + f"{mentions}\n" + f"React with {TRANSCRIBE_CONSENT_EMOJI} to opt into transcription for this session. " + "Only consented users will be transcribed." + ) + ) + await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) + transcription_consent_prompts[consent_message.id] = (guild.id, session.transcript_thread_id) + + +async def sync_voice_channel_members_for_transcription( + guild: discord.Guild, + voice_channel: discord.VoiceChannel, + session: GuildTranscriptionSession, + transcript_thread: discord.Thread, +): + members: list[discord.Member] = [] + for member in voice_channel.members: + if member.bot: + continue + members.append(member) + try: + await transcript_thread.add_user(member) + except (discord.Forbidden, discord.HTTPException): + pass + display_name, _, _ = get_active_transcription_consent(guild.id, member.id) + if display_name: + session.consented_user_ids.add(member.id) + session.aliases_by_user[member.id] = display_name + continue + if member.id not in session.dm_prompted_user_ids: + try: + await send_transcription_consent_dm(guild, member) + session.dm_prompted_user_ids.add(member.id) + except (discord.Forbidden, discord.HTTPException): + pass + await prompt_transcription_consent(guild, session, transcript_thread, members) + + def slice_timestamp_label(started_at: datetime, seconds_offset: float) -> str: ts = started_at + timedelta(seconds=max(seconds_offset, 0.0)) return ts.astimezone(LOCAL_TZ).strftime("%H:%M:%S") @@ -528,11 +599,7 @@ async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTra engine_name, engine = get_whisper_transcriber() if engine is None or engine_name is None: return - thread = guild.get_thread(session.transcript_thread_id) - if thread is None: - fetched = guild.get_channel(session.transcript_thread_id) - if isinstance(fetched, discord.Thread): - thread = fetched + thread = find_active_transcription_thread(guild, session) if thread is None: return ordered_lines: list[tuple[float, str]] = [] @@ -1437,18 +1504,25 @@ async def on_voice_state_update(member: discord.Member, before: discord.VoiceSta return if before.channel is not None and before.channel.id == session.voice_channel_id: return + thread = find_active_transcription_thread(member.guild, session) + if thread is not None: + try: + await thread.add_user(member) + except (discord.Forbidden, discord.HTTPException): + pass display_name, _, _ = get_active_transcription_consent(member.guild.id, member.id) if display_name: session.consented_user_ids.add(member.id) session.aliases_by_user[member.id] = display_name return - if member.id in session.dm_prompted_user_ids: - return - try: - await send_transcription_consent_dm(member.guild, member) - session.dm_prompted_user_ids.add(member.id) - except (discord.Forbidden, discord.HTTPException): - return + if member.id not in session.dm_prompted_user_ids: + try: + await send_transcription_consent_dm(member.guild, member) + session.dm_prompted_user_ids.add(member.id) + except (discord.Forbidden, discord.HTTPException): + pass + if thread is not None: + await prompt_transcription_consent(member.guild, session, thread, [member]) # ========================= # REACTIONS @@ -1460,6 +1534,31 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): if payload.guild_id is None: return emoji = str(payload.emoji) + consent_prompt = transcription_consent_prompts.get(payload.message_id) + if consent_prompt is not None and emoji == TRANSCRIBE_CONSENT_EMOJI: + guild_id, _ = consent_prompt + if guild_id != payload.guild_id: + return + guild = bot.get_guild(payload.guild_id) + member = guild.get_member(payload.user_id) if guild is not None else None + if member is None or member.bot: + return + clean_name = normalize_transcript_display_name(member.display_name) + if clean_name: + await upsert_transcription_consent(payload.guild_id, payload.user_id, clean_name) + session = get_transcription_session(payload.guild_id) + if session is not None and not session.closed: + session.consented_user_ids.add(payload.user_id) + if clean_name: + session.aliases_by_user[payload.user_id] = clean_name + thread = find_active_transcription_thread(guild, session) + if thread is not None: + try: + await thread.add_user(member) + except (discord.Forbidden, discord.HTTPException): + pass + await thread.send(f"βœ… {member.mention} opted into transcription.") + return active_message_id = gget(payload.guild_id, "active_message_id") if not active_message_id or str(payload.message_id) != active_message_id: return @@ -1976,21 +2075,16 @@ async def gtranscribe(interaction: discord.Interaction): name=f"transcript-{start_label}", type=discord.ChannelType.private_thread, ) - for member in voice_channel.members: - if member.bot: - continue - try: - await transcript_thread.add_user(member) - except (discord.Forbidden, discord.HTTPException): - continue await transcript_thread.send( ( f"πŸŽ™οΈ **Transcription session has begun** for {voice_channel.mention}.\n" + f"React with {TRANSCRIBE_CONSENT_EMOJI} on the consent prompt to opt in.\n" "To change your display name `/gsetname `." ) ) + vc = interaction.guild.voice_client connected_here = False if vc is not None and vc.channel != voice_channel: @@ -2030,20 +2124,8 @@ async def gtranscribe(interaction: discord.Interaction): return session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id) - for member in voice_channel.members: - if member.bot: - continue - display_name, _, _ = get_active_transcription_consent(interaction.guild.id, member.id) - if display_name: - session.consented_user_ids.add(member.id) - session.aliases_by_user[member.id] = display_name - continue - try: - await send_transcription_consent_dm(interaction.guild, member) - session.dm_prompted_user_ids.add(member.id) - except (discord.Forbidden, discord.HTTPException): - continue transcription_sessions[interaction.guild.id] = session + await sync_voice_channel_members_for_transcription(interaction.guild, voice_channel, session, transcript_thread) async def _slice_finished(sink: object, channel: object, *_: object): copied = copy_recorded_audio_slice(sink, session) From d70abff6e6c78ef20da6a8a3c73e9acad81315f0 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 19:59:47 -0500 Subject: [PATCH 28/54] Fix transcription member sync visibility and add diagnostics --- poopbot.py | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/poopbot.py b/poopbot.py index e2a2cfa..7ee0b7d 100644 --- a/poopbot.py +++ b/poopbot.py @@ -305,6 +305,8 @@ def current_year_local() -> int: intents = discord.Intents.default() intents.reactions = True intents.message_content = True # needed for cleanup logging +intents.voice_states = True # needed to track voice-channel joins/leaves during transcription +intents.members = True # needed to discover/add members to transcript thread and DM consent prompts bot = commands.Bot(command_prefix="!", intents=intents) # serialize DB writes to avoid sqlite "database is locked" db_write_lock = asyncio.Lock() @@ -498,6 +500,7 @@ async def send_transcription_consent_dm(guild: discord.Guild, member: discord.Me f"React with {TRANSCRIBE_CONSENT_EMOJI} in the consent thread message to opt in. " "To change your display name, run `/gsetname ` in the server." ) + logger.info("transcribe_consent_dm_sent guild_id=%s user_id=%s", guild.id, member.id) def find_active_transcription_thread(guild: discord.Guild, session: GuildTranscriptionSession) -> discord.Thread | None: @@ -538,25 +541,39 @@ async def sync_voice_channel_members_for_transcription( transcript_thread: discord.Thread, ): members: list[discord.Member] = [] + logger.info( + "transcribe_sync_members_start guild_id=%s voice_channel_id=%s channel_member_count=%s", + guild.id, + voice_channel.id, + len(getattr(voice_channel, "members", []) or []), + ) for member in voice_channel.members: if member.bot: continue members.append(member) try: await transcript_thread.add_user(member) + logger.info("transcribe_thread_add_user_ok guild_id=%s thread_id=%s user_id=%s", guild.id, transcript_thread.id, member.id) except (discord.Forbidden, discord.HTTPException): - pass + logger.exception("transcribe_thread_add_user_failed guild_id=%s thread_id=%s user_id=%s", guild.id, transcript_thread.id, member.id) display_name, _, _ = get_active_transcription_consent(guild.id, member.id) if display_name: session.consented_user_ids.add(member.id) session.aliases_by_user[member.id] = display_name + logger.info("transcribe_member_has_active_consent guild_id=%s user_id=%s display_name=%r", guild.id, member.id, display_name) continue if member.id not in session.dm_prompted_user_ids: try: await send_transcription_consent_dm(guild, member) session.dm_prompted_user_ids.add(member.id) except (discord.Forbidden, discord.HTTPException): - pass + logger.exception("transcribe_consent_dm_failed guild_id=%s user_id=%s", guild.id, member.id) + logger.info( + "transcribe_sync_members_done guild_id=%s discovered_members=%s consented_members=%s", + guild.id, + [member.id for member in members], + sorted(session.consented_user_ids), + ) await prompt_transcription_consent(guild, session, transcript_thread, members) @@ -590,17 +607,28 @@ def copy_recorded_audio_slice(sink: object, session: GuildTranscriptionSession) except OSError: continue copied_files[user_id] = output_file + logger.info( + "transcribe_slice_copied session_guild_id=%s slice=%s sink_users=%s consented_users=%s copied_users=%s", + session.guild_id, + session.slice_number, + sorted([uid for uid in sink_audio_data.keys() if isinstance(uid, int)]), + sorted(session.consented_user_ids), + sorted(copied_files.keys()), + ) return copied_files async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTranscriptionSession, copied_files: dict[int, Path]): if not copied_files: + logger.info("transcribe_slice_skip_empty guild_id=%s slice=%s", guild.id, session.slice_number) return engine_name, engine = get_whisper_transcriber() if engine is None or engine_name is None: + logger.warning("transcribe_engine_unavailable guild_id=%s slice=%s", guild.id, session.slice_number) return thread = find_active_transcription_thread(guild, session) if thread is None: + logger.warning("transcribe_thread_missing guild_id=%s thread_id=%s", guild.id, session.transcript_thread_id) return ordered_lines: list[tuple[float, str]] = [] for user_id, audio_path in copied_files.items(): @@ -615,7 +643,9 @@ async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTra ordered_lines.append((start_sec, f"[{stamp}] [{speaker_name}] {phrase}")) ordered_lines.sort(key=lambda item: item[0]) if not ordered_lines: + logger.info("transcribe_slice_no_utterances guild_id=%s slice=%s copied_users=%s", guild.id, session.slice_number, sorted(copied_files.keys())) return + logger.info("transcribe_slice_posting guild_id=%s slice=%s lines=%s", guild.id, session.slice_number, len(ordered_lines)) for _, line in ordered_lines: await thread.send(line) @@ -2069,6 +2099,12 @@ async def gtranscribe(interaction: discord.Interaction): ) return await interaction.response.defer(ephemeral=True) + logger.info( + "transcribe_start_intents guild_id=%s members=%s voice_states=%s", + interaction.guild.id, + bot.intents.members, + bot.intents.voice_states, + ) start_label = datetime.now(LOCAL_TZ).strftime("%Y-%m-%d %H:%M") transcript_thread = await interaction.channel.create_thread( @@ -2126,6 +2162,13 @@ async def gtranscribe(interaction: discord.Interaction): session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id) transcription_sessions[interaction.guild.id] = session await sync_voice_channel_members_for_transcription(interaction.guild, voice_channel, session, transcript_thread) + logger.info( + "transcribe_session_initialized guild_id=%s voice_channel_id=%s thread_id=%s consented_users=%s", + interaction.guild.id, + voice_channel.id, + transcript_thread.id, + sorted(session.consented_user_ids), + ) async def _slice_finished(sink: object, channel: object, *_: object): copied = copy_recorded_audio_slice(sink, session) From d7d54daba67e452778261e594508aff159dfd1d8 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 20:47:40 -0500 Subject: [PATCH 29/54] Add detailed transcription lifecycle logging --- poopbot.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/poopbot.py b/poopbot.py index 7ee0b7d..c97572d 100644 --- a/poopbot.py +++ b/poopbot.py @@ -652,47 +652,85 @@ async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTra async def finalize_recording_slice(vc: discord.VoiceClient, guild: discord.Guild, session: GuildTranscriptionSession): if not getattr(vc, "recording", False): + logger.warning( + "transcribe_finalize_skipped_not_recording guild_id=%s voice_connected=%s", + guild.id, + vc.is_connected() if vc is not None else None, + ) return + logger.info( + "transcribe_finalize_start guild_id=%s slice=%s voice_connected=%s", + guild.id, + session.slice_number, + vc.is_connected(), + ) done_event = asyncio.Event() session.active_slice_done = done_event async def _slice_finished(sink: object, channel: object, *_: object): + logger.info( + "transcribe_finalize_callback_start guild_id=%s slice=%s sink_type=%s", + guild.id, + session.slice_number, + type(sink).__name__, + ) copied = copy_recorded_audio_slice(sink, session) try: await asyncio.wait_for(post_transcription_slice_lines(guild, session, copied), timeout=90) except Exception: logger.exception("transcribe_slice_post_failed guild_id=%s", guild.id) + logger.info( + "transcribe_finalize_callback_done guild_id=%s slice=%s copied_users=%s", + guild.id, + session.slice_number, + sorted(copied.keys()), + ) done_event.set() try: vc.stop_recording() + logger.info("transcribe_finalize_stop_recording_called guild_id=%s slice=%s", guild.id, session.slice_number) except Exception: logger.exception("transcribe_slice_stop_failed guild_id=%s", guild.id) done_event.set() await asyncio.wait_for(done_event.wait(), timeout=120) + logger.info("transcribe_finalize_done_event_received guild_id=%s slice=%s", guild.id, session.slice_number) if session.closed: + logger.info("transcribe_finalize_exit_session_closed guild_id=%s slice=%s", guild.id, session.slice_number) return try: new_sink = discord.sinks.WaveSink() session.active_sink = new_sink vc.start_recording(new_sink, _slice_finished, None) + logger.info("transcribe_finalize_restart_recording_ok guild_id=%s next_slice=%s", guild.id, session.slice_number + 1) except Exception: logger.exception("transcribe_slice_restart_failed guild_id=%s", guild.id) async def transcription_live_loop(guild_id: int): + logger.info("transcribe_live_loop_started guild_id=%s interval_seconds=%s", guild_id, TRANSCRIBE_SLICE_SECONDS) while True: await asyncio.sleep(TRANSCRIBE_SLICE_SECONDS) session = get_transcription_session(guild_id) if session is None or session.closed: + logger.info("transcribe_live_loop_exit guild_id=%s reason=%s", guild_id, "session_missing" if session is None else "session_closed") return guild = bot.get_guild(guild_id) if guild is None: + logger.warning("transcribe_live_loop_skip guild_id=%s reason=guild_not_found", guild_id) continue vc = guild.voice_client if vc is None or not vc.is_connected() or not getattr(vc, "recording", False): + logger.warning( + "transcribe_live_loop_skip guild_id=%s reason=voice_not_recording vc_present=%s vc_connected=%s vc_recording=%s", + guild_id, + vc is not None, + vc.is_connected() if vc is not None else None, + getattr(vc, "recording", False) if vc is not None else None, + ) continue try: + logger.info("transcribe_live_loop_finalize guild_id=%s current_slice=%s", guild_id, session.slice_number) await finalize_recording_slice(vc, guild, session) except asyncio.TimeoutError: logger.warning("transcribe_slice_timeout guild_id=%s", guild_id) @@ -2171,17 +2209,35 @@ async def gtranscribe(interaction: discord.Interaction): ) async def _slice_finished(sink: object, channel: object, *_: object): + logger.info( + "transcribe_initial_callback_start guild_id=%s slice=%s sink_type=%s", + interaction.guild.id, + session.slice_number, + type(sink).__name__, + ) copied = copy_recorded_audio_slice(sink, session) try: await asyncio.wait_for(post_transcription_slice_lines(interaction.guild, session, copied), timeout=90) except Exception: logger.exception("transcribe_slice_post_failed guild_id=%s", interaction.guild.id) + logger.info( + "transcribe_initial_callback_done guild_id=%s slice=%s copied_users=%s", + interaction.guild.id, + session.slice_number, + sorted(copied.keys()), + ) session.active_slice_done.set() try: sink = discord.sinks.WaveSink() session.active_sink = sink vc.start_recording(sink, _slice_finished, None) + logger.info( + "transcribe_start_recording_ok guild_id=%s voice_channel_id=%s thread_id=%s", + interaction.guild.id, + voice_channel.id, + transcript_thread.id, + ) except Exception as exc: logger.exception("transcribe_start_recording_failed error_type=%s context=%r", type(exc).__name__, build_interaction_log_context(interaction, vc=vc, session=session)) remove_transcription_session(interaction.guild.id) From 27b8eb0453df9dd06292274485ec46c63410cc26 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 21:11:31 -0500 Subject: [PATCH 30/54] Recover recording state after transcription slice timeout --- poopbot.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/poopbot.py b/poopbot.py index c97572d..529a3e6 100644 --- a/poopbot.py +++ b/poopbot.py @@ -734,6 +734,61 @@ async def transcription_live_loop(guild_id: int): await finalize_recording_slice(vc, guild, session) except asyncio.TimeoutError: logger.warning("transcribe_slice_timeout guild_id=%s", guild_id) + recovery_session = get_transcription_session(guild_id) + recovery_guild = bot.get_guild(guild_id) + recovery_vc = recovery_guild.voice_client if recovery_guild is not None else None + next_slice = (recovery_session.slice_number + 1) if recovery_session is not None else None + try: + should_recover = ( + recovery_session is not None + and not recovery_session.closed + and recovery_vc is not None + and recovery_vc.is_connected() + and not getattr(recovery_vc, "recording", False) + ) + logger.info( + "transcribe_timeout_recovery_check guild_id=%s next_slice=%s session_open=%s vc_present=%s vc_connected=%s vc_recording=%s", + guild_id, + next_slice, + recovery_session is not None and not recovery_session.closed, + recovery_vc is not None, + recovery_vc.is_connected() if recovery_vc is not None else None, + getattr(recovery_vc, "recording", False) if recovery_vc is not None else None, + ) + if not should_recover: + logger.info("transcribe_timeout_recovery_skipped guild_id=%s next_slice=%s", guild_id, next_slice) + continue + + done_event = asyncio.Event() + recovery_session.active_slice_done = done_event + + async def _slice_finished(sink: object, channel: object, *_: object): + logger.info( + "transcribe_timeout_recovery_callback_start guild_id=%s slice=%s sink_type=%s", + recovery_guild.id, + recovery_session.slice_number, + type(sink).__name__, + ) + copied = copy_recorded_audio_slice(sink, recovery_session) + try: + await asyncio.wait_for(post_transcription_slice_lines(recovery_guild, recovery_session, copied), timeout=90) + except Exception: + logger.exception("transcribe_slice_post_failed guild_id=%s", recovery_guild.id) + logger.info( + "transcribe_timeout_recovery_callback_done guild_id=%s slice=%s copied_users=%s", + recovery_guild.id, + recovery_session.slice_number, + sorted(copied.keys()), + ) + done_event.set() + + logger.info("transcribe_timeout_recovery_start guild_id=%s next_slice=%s", guild_id, next_slice) + recovery_sink = discord.sinks.WaveSink() + recovery_session.active_sink = recovery_sink + recovery_vc.start_recording(recovery_sink, _slice_finished, None) + logger.info("transcribe_timeout_recovery_success guild_id=%s next_slice=%s", guild_id, next_slice) + except Exception: + logger.exception("transcribe_timeout_recovery_failed guild_id=%s next_slice=%s", guild_id, next_slice) def format_duration(duration_seconds: int) -> str: mins, secs = divmod(max(duration_seconds, 0), 60) From 1757957fa062158a870fc102e7dd58f4f772e893 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 21:19:39 -0500 Subject: [PATCH 31/54] Add teardown fallback for repeated transcription failures --- poopbot.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 3 deletions(-) diff --git a/poopbot.py b/poopbot.py index 529a3e6..e56db70 100644 --- a/poopbot.py +++ b/poopbot.py @@ -150,6 +150,7 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic "Timed out while fetching track info from YouTube. Please try again in a moment." ) TRANSCRIBE_SLICE_SECONDS = max(int(os.getenv("TRANSCRIBE_SLICE_SECONDS", "12")), 5) +TRANSCRIBE_MAX_FAILURES = max(int(os.getenv("TRANSCRIBE_MAX_FAILURES", "3")), 1) TRANSCRIBE_CONSENT_VALID_DAYS = 180 TRANSCRIBE_CONSENT_EMOJI = "βœ…" # ========================= @@ -338,6 +339,7 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.loop_task: asyncio.Task | None = None self.active_sink: object | None = None self.active_slice_done = asyncio.Event() + self.recording_failure_count = 0 transcription_sessions: dict[int, GuildTranscriptionSession] = {} @@ -702,9 +704,46 @@ async def _slice_finished(sink: object, channel: object, *_: object): new_sink = discord.sinks.WaveSink() session.active_sink = new_sink vc.start_recording(new_sink, _slice_finished, None) + session.recording_failure_count = 0 logger.info("transcribe_finalize_restart_recording_ok guild_id=%s next_slice=%s", guild.id, session.slice_number + 1) except Exception: - logger.exception("transcribe_slice_restart_failed guild_id=%s", guild.id) + session.recording_failure_count += 1 + logger.exception( + "transcribe_slice_restart_failed guild_id=%s failure_count=%s", + guild.id, + session.recording_failure_count, + ) + if session.recording_failure_count >= TRANSCRIBE_MAX_FAILURES: + await teardown_transcription_session_for_recording_failure(guild, session, "restart_failed") + + +async def teardown_transcription_session_for_recording_failure( + guild: discord.Guild | None, + session: GuildTranscriptionSession, + failure_reason: str, +): + guild_id = session.guild_id + session.closed = True + if guild is not None: + vc = guild.voice_client + if vc is not None and vc.is_connected(): + try: + await vc.disconnect(force=True) + except (discord.HTTPException, discord.ClientException): + logger.exception("transcribe_teardown_disconnect_failed guild_id=%s", guild_id) + thread = find_active_transcription_thread(guild, session) + if thread is not None: + try: + await thread.send("πŸ›‘ Transcription session ended due to recording failure. Please start `/gtranscribe` again.") + except Exception: + logger.exception("transcribe_teardown_thread_notify_failed guild_id=%s", guild_id) + remove_transcription_session(guild_id) + logger.error( + "transcribe_recording_failure guild_id=%s failure_count=%s action=torn_down reason=%s", + guild_id, + session.recording_failure_count, + failure_reason, + ) async def transcription_live_loop(guild_id: int): @@ -733,9 +772,24 @@ async def transcription_live_loop(guild_id: int): logger.info("transcribe_live_loop_finalize guild_id=%s current_slice=%s", guild_id, session.slice_number) await finalize_recording_slice(vc, guild, session) except asyncio.TimeoutError: - logger.warning("transcribe_slice_timeout guild_id=%s", guild_id) recovery_session = get_transcription_session(guild_id) + if recovery_session is None or recovery_session.closed: + logger.warning("transcribe_slice_timeout guild_id=%s failure_count=%s", guild_id, None) + continue + recovery_session.recording_failure_count += 1 + logger.warning( + "transcribe_slice_timeout guild_id=%s failure_count=%s", + guild_id, + recovery_session.recording_failure_count, + ) recovery_guild = bot.get_guild(guild_id) + if recovery_session.recording_failure_count >= TRANSCRIBE_MAX_FAILURES: + await teardown_transcription_session_for_recording_failure( + recovery_guild, + recovery_session, + "finalize_timeout", + ) + continue recovery_vc = recovery_guild.voice_client if recovery_guild is not None else None next_slice = (recovery_session.slice_number + 1) if recovery_session is not None else None try: @@ -786,9 +840,22 @@ async def _slice_finished(sink: object, channel: object, *_: object): recovery_sink = discord.sinks.WaveSink() recovery_session.active_sink = recovery_sink recovery_vc.start_recording(recovery_sink, _slice_finished, None) + recovery_session.recording_failure_count = 0 logger.info("transcribe_timeout_recovery_success guild_id=%s next_slice=%s", guild_id, next_slice) except Exception: - logger.exception("transcribe_timeout_recovery_failed guild_id=%s next_slice=%s", guild_id, next_slice) + recovery_session.recording_failure_count += 1 + logger.exception( + "transcribe_timeout_recovery_failed guild_id=%s next_slice=%s failure_count=%s", + guild_id, + next_slice, + recovery_session.recording_failure_count, + ) + if recovery_session.recording_failure_count >= TRANSCRIBE_MAX_FAILURES: + await teardown_transcription_session_for_recording_failure( + recovery_guild, + recovery_session, + "timeout_recovery_restart_failed", + ) def format_duration(duration_seconds: int) -> str: mins, secs = divmod(max(duration_seconds, 0), 60) From b96c9e0a9bee522adf6d2a200faa29652b0e0636 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 21:31:54 -0500 Subject: [PATCH 32/54] Avoid repeated transcription consent prompts per user --- poopbot.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index e56db70..c91642a 100644 --- a/poopbot.py +++ b/poopbot.py @@ -495,6 +495,39 @@ async def upsert_transcription_consent(guild_id: int, user_id: int, display_name ) +def has_transcription_consent_prompt_been_sent(guild_id: int, user_id: int) -> bool: + with db_config() as conn: + row = conn.execute( + """ + SELECT 1 + FROM transcription_consent_prompts_sent + WHERE guild_id=? AND user_id=? + """, + (guild_id, user_id), + ).fetchone() + return row is not None + + +async def mark_transcription_consent_prompt_sent(guild_id: int, user_id: int): + now_utc = datetime.now(timezone.utc) + async with db_write_lock: + with db_config() as conn: + conn.execute( + """ + INSERT INTO transcription_consent_prompts_sent( + guild_id, user_id, prompted_at_utc + ) VALUES (?, ?, ?) + ON CONFLICT(guild_id, user_id) DO UPDATE SET + prompted_at_utc=excluded.prompted_at_utc + """, + ( + guild_id, + user_id, + now_utc.isoformat(), + ), + ) + + async def send_transcription_consent_dm(guild: discord.Guild, member: discord.Member): dm_channel = member.dm_channel or await member.create_dm() await dm_channel.send( @@ -521,7 +554,13 @@ async def prompt_transcription_consent( transcript_thread: discord.Thread, members: list[discord.Member], ): - non_consented = [member for member in members if member.id not in session.consented_user_ids] + non_consented: list[discord.Member] = [] + for member in members: + if member.id in session.consented_user_ids: + continue + if has_transcription_consent_prompt_been_sent(guild.id, member.id): + continue + non_consented.append(member) if not non_consented: return mentions = " ".join(member.mention for member in non_consented) @@ -534,6 +573,8 @@ async def prompt_transcription_consent( ) await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) transcription_consent_prompts[consent_message.id] = (guild.id, session.transcript_thread_id) + for member in non_consented: + await mark_transcription_consent_prompt_sent(guild.id, member.id) async def sync_voice_channel_members_for_transcription( @@ -564,10 +605,14 @@ async def sync_voice_channel_members_for_transcription( session.aliases_by_user[member.id] = display_name logger.info("transcribe_member_has_active_consent guild_id=%s user_id=%s display_name=%r", guild.id, member.id, display_name) continue + already_prompted = has_transcription_consent_prompt_been_sent(guild.id, member.id) + if already_prompted: + continue if member.id not in session.dm_prompted_user_ids: try: await send_transcription_consent_dm(guild, member) session.dm_prompted_user_ids.add(member.id) + await mark_transcription_consent_prompt_sent(guild.id, member.id) except (discord.Forbidden, discord.HTTPException): logger.exception("transcribe_consent_dm_failed guild_id=%s user_id=%s", guild.id, member.id) logger.info( @@ -1257,6 +1302,14 @@ def init_config_db(): PRIMARY KEY (guild_id, user_id) ); """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS transcription_consent_prompts_sent ( + guild_id INTEGER NOT NULL, + user_id INTEGER NOT NULL, + prompted_at_utc TEXT NOT NULL, + PRIMARY KEY (guild_id, user_id) + ); + """) columns = { row["name"] for row in conn.execute("PRAGMA table_info(tickets)").fetchall() @@ -1705,10 +1758,13 @@ async def on_voice_state_update(member: discord.Member, before: discord.VoiceSta session.consented_user_ids.add(member.id) session.aliases_by_user[member.id] = display_name return + if has_transcription_consent_prompt_been_sent(member.guild.id, member.id): + return if member.id not in session.dm_prompted_user_ids: try: await send_transcription_consent_dm(member.guild, member) session.dm_prompted_user_ids.add(member.id) + await mark_transcription_consent_prompt_sent(member.guild.id, member.id) except (discord.Forbidden, discord.HTTPException): pass if thread is not None: From 7bb8585a694714372ea14148c863fa0b42139b11 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 22:18:33 -0500 Subject: [PATCH 33/54] Scope transcription consent prompts to active session --- poopbot.py | 101 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 32 deletions(-) diff --git a/poopbot.py b/poopbot.py index c91642a..23f560b 100644 --- a/poopbot.py +++ b/poopbot.py @@ -333,7 +333,7 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.temp_dir = Path(tempfile.mkdtemp(prefix=f"gokibot_transcribe_{guild_id}_")) self.aliases_by_user: dict[int, str] = {} self.consented_user_ids: set[int] = set() - self.dm_prompted_user_ids: set[int] = set() + self.prompted_user_ids: set[int] = set() self.slice_number = 0 self.closed = False self.loop_task: asyncio.Task | None = None @@ -495,19 +495,6 @@ async def upsert_transcription_consent(guild_id: int, user_id: int, display_name ) -def has_transcription_consent_prompt_been_sent(guild_id: int, user_id: int) -> bool: - with db_config() as conn: - row = conn.execute( - """ - SELECT 1 - FROM transcription_consent_prompts_sent - WHERE guild_id=? AND user_id=? - """, - (guild_id, user_id), - ).fetchone() - return row is not None - - async def mark_transcription_consent_prompt_sent(guild_id: int, user_id: int): now_utc = datetime.now(timezone.utc) async with db_write_lock: @@ -557,11 +544,28 @@ async def prompt_transcription_consent( non_consented: list[discord.Member] = [] for member in members: if member.id in session.consented_user_ids: + logger.info( + "transcribe_consent_prompt_skip guild_id=%s thread_id=%s user_id=%s prompt_reason=already_consented", + guild.id, + transcript_thread.id, + member.id, + ) continue - if has_transcription_consent_prompt_been_sent(guild.id, member.id): + if member.id in session.prompted_user_ids: + logger.info( + "transcribe_consent_prompt_skip guild_id=%s thread_id=%s user_id=%s prompt_reason=session_already_prompted", + guild.id, + transcript_thread.id, + member.id, + ) continue non_consented.append(member) if not non_consented: + logger.info( + "transcribe_consent_prompt_none guild_id=%s thread_id=%s prompt_reason=session_no_eligible_members", + guild.id, + transcript_thread.id, + ) return mentions = " ".join(member.mention for member in non_consented) consent_message = await transcript_thread.send( @@ -574,6 +578,13 @@ async def prompt_transcription_consent( await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) transcription_consent_prompts[consent_message.id] = (guild.id, session.transcript_thread_id) for member in non_consented: + session.prompted_user_ids.add(member.id) + logger.info( + "transcribe_consent_prompt_thread_sent guild_id=%s thread_id=%s user_id=%s prompt_reason=session_new", + guild.id, + transcript_thread.id, + member.id, + ) await mark_transcription_consent_prompt_sent(guild.id, member.id) @@ -605,16 +616,24 @@ async def sync_voice_channel_members_for_transcription( session.aliases_by_user[member.id] = display_name logger.info("transcribe_member_has_active_consent guild_id=%s user_id=%s display_name=%r", guild.id, member.id, display_name) continue - already_prompted = has_transcription_consent_prompt_been_sent(guild.id, member.id) - if already_prompted: + if member.id in session.prompted_user_ids: + logger.info( + "transcribe_consent_dm_skip guild_id=%s user_id=%s prompt_reason=session_already_prompted", + guild.id, + member.id, + ) continue - if member.id not in session.dm_prompted_user_ids: - try: - await send_transcription_consent_dm(guild, member) - session.dm_prompted_user_ids.add(member.id) - await mark_transcription_consent_prompt_sent(guild.id, member.id) - except (discord.Forbidden, discord.HTTPException): - logger.exception("transcribe_consent_dm_failed guild_id=%s user_id=%s", guild.id, member.id) + try: + await send_transcription_consent_dm(guild, member) + session.prompted_user_ids.add(member.id) + logger.info( + "transcribe_consent_dm_sent guild_id=%s user_id=%s prompt_reason=session_new", + guild.id, + member.id, + ) + await mark_transcription_consent_prompt_sent(guild.id, member.id) + except (discord.Forbidden, discord.HTTPException): + logger.exception("transcribe_consent_dm_failed guild_id=%s user_id=%s prompt_reason=send_failed", guild.id, member.id) logger.info( "transcribe_sync_members_done guild_id=%s discovered_members=%s consented_members=%s", guild.id, @@ -1757,16 +1776,34 @@ async def on_voice_state_update(member: discord.Member, before: discord.VoiceSta if display_name: session.consented_user_ids.add(member.id) session.aliases_by_user[member.id] = display_name + logger.info( + "transcribe_voice_join_skip_prompt guild_id=%s user_id=%s prompt_reason=already_consented", + member.guild.id, + member.id, + ) return - if has_transcription_consent_prompt_been_sent(member.guild.id, member.id): + if member.id in session.prompted_user_ids: + logger.info( + "transcribe_voice_join_skip_prompt guild_id=%s user_id=%s prompt_reason=session_already_prompted", + member.guild.id, + member.id, + ) return - if member.id not in session.dm_prompted_user_ids: - try: - await send_transcription_consent_dm(member.guild, member) - session.dm_prompted_user_ids.add(member.id) - await mark_transcription_consent_prompt_sent(member.guild.id, member.id) - except (discord.Forbidden, discord.HTTPException): - pass + try: + await send_transcription_consent_dm(member.guild, member) + session.prompted_user_ids.add(member.id) + logger.info( + "transcribe_voice_join_dm_sent guild_id=%s user_id=%s prompt_reason=session_new", + member.guild.id, + member.id, + ) + await mark_transcription_consent_prompt_sent(member.guild.id, member.id) + except (discord.Forbidden, discord.HTTPException): + logger.exception( + "transcribe_voice_join_dm_failed guild_id=%s user_id=%s prompt_reason=send_failed", + member.guild.id, + member.id, + ) if thread is not None: await prompt_transcription_consent(member.guild, session, thread, [member]) From 5c0883fdb19d87b883771fffe6e79f5e94a8a21f Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 22:24:29 -0500 Subject: [PATCH 34/54] Refactor transcription session to reuse whisper engine --- poopbot.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index 23f560b..6239297 100644 --- a/poopbot.py +++ b/poopbot.py @@ -340,6 +340,8 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.active_sink: object | None = None self.active_slice_done = asyncio.Event() self.recording_failure_count = 0 + self.engine_name: str | None = None + self.engine_instance: object | None = None transcription_sessions: dict[int, GuildTranscriptionSession] = {} @@ -363,6 +365,8 @@ def remove_transcription_session(guild_id: int): ] for message_id in stale_prompt_ids: transcription_consent_prompts.pop(message_id, None) + session.engine_name = None + session.engine_instance = None shutil.rmtree(session.temp_dir, ignore_errors=True) def resolve_display_name(guild: discord.Guild | None, user_id: int, aliases_by_user: dict[int, str]) -> str: alias = aliases_by_user.get(user_id) @@ -688,7 +692,8 @@ async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTra if not copied_files: logger.info("transcribe_slice_skip_empty guild_id=%s slice=%s", guild.id, session.slice_number) return - engine_name, engine = get_whisper_transcriber() + engine_name = session.engine_name + engine = session.engine_instance if engine is None or engine_name is None: logger.warning("transcribe_engine_unavailable guild_id=%s slice=%s", guild.id, session.slice_number) return @@ -697,6 +702,7 @@ async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTra logger.warning("transcribe_thread_missing guild_id=%s thread_id=%s", guild.id, session.transcript_thread_id) return ordered_lines: list[tuple[float, str]] = [] + transcribe_started = time.monotonic() for user_id, audio_path in copied_files.items(): speaker_name = resolve_display_name(guild, user_id, session.aliases_by_user) utterances = transcribe_audio_file(engine_name, engine, audio_path) @@ -707,6 +713,13 @@ async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTra start_sec = float(utterance.get("start") or 0.0) stamp = slice_timestamp_label(session.started_at, ((session.slice_number - 1) * TRANSCRIBE_SLICE_SECONDS) + start_sec) ordered_lines.append((start_sec, f"[{stamp}] [{speaker_name}] {phrase}")) + logger.info( + "transcribe_slice_transcription_done guild_id=%s slice=%s duration_ms=%.2f copied_users=%s", + guild.id, + session.slice_number, + (time.monotonic() - transcribe_started) * 1000, + sorted(copied_files.keys()), + ) ordered_lines.sort(key=lambda item: item[0]) if not ordered_lines: logger.info("transcribe_slice_no_utterances guild_id=%s slice=%s copied_users=%s", guild.id, session.slice_number, sorted(copied_files.keys())) @@ -2351,6 +2364,20 @@ async def gtranscribe(interaction: discord.Interaction): ephemeral=True, ) return + engine_init_started = time.monotonic() + engine_name, engine_instance = get_whisper_transcriber() + if engine_name is None or engine_instance is None: + await interaction.response.send_message( + "Whisper transcription engine is unavailable. Install one of: `pip install faster-whisper` (recommended) or `pip install openai-whisper`, then retry `/gtranscribe`.", + ephemeral=True, + ) + return + logger.info( + "transcribe_engine_initialized guild_id=%s engine=%s duration_ms=%.2f", + interaction.guild.id, + engine_name, + (time.monotonic() - engine_init_started) * 1000, + ) await interaction.response.defer(ephemeral=True) logger.info( "transcribe_start_intents guild_id=%s members=%s voice_states=%s", @@ -2413,6 +2440,8 @@ async def gtranscribe(interaction: discord.Interaction): return session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id) + session.engine_name = engine_name + session.engine_instance = engine_instance transcription_sessions[interaction.guild.id] = session await sync_voice_channel_members_for_transcription(interaction.guild, voice_channel, session, transcript_thread) logger.info( From 7f4e82990bfeef0c792542e1edd9f5e4de513669 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 19 Feb 2026 22:30:04 -0500 Subject: [PATCH 35/54] Rework transcription to durable capture with deferred finalization --- poopbot.py | 514 +++++++++++++++++++++++++---------------------------- 1 file changed, 247 insertions(+), 267 deletions(-) diff --git a/poopbot.py b/poopbot.py index 6239297..6537604 100644 --- a/poopbot.py +++ b/poopbot.py @@ -13,6 +13,8 @@ import tempfile import shutil import importlib.util +import io +import wave import ctypes.util import xml.etree.ElementTree as ET from collections import deque @@ -334,14 +336,24 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.aliases_by_user: dict[int, str] = {} self.consented_user_ids: set[int] = set() self.prompted_user_ids: set[int] = set() - self.slice_number = 0 self.closed = False self.loop_task: asyncio.Task | None = None + self.worker_task: asyncio.Task | None = None self.active_sink: object | None = None - self.active_slice_done = asyncio.Event() + self.capture_done = asyncio.Event() self.recording_failure_count = 0 self.engine_name: str | None = None self.engine_instance: object | None = None + self.capture_index = 0 + self.chunk_index = 0 + self.chunk_queue: asyncio.Queue[int] = asyncio.Queue(maxsize=128) + self.chunk_meta: list[dict[str, object]] = [] + self.chunk_meta_by_id: dict[int, dict[str, object]] = {} + self.chunk_transcripts: dict[int, list[dict[str, object]]] = {} + self.pending_live_lines: list[str] = [] + self.user_last_frame: dict[int, int] = {} + self.chunk_meta_path = self.temp_dir / "chunk_metadata.jsonl" + self.finalized = False transcription_sessions: dict[int, GuildTranscriptionSession] = {} @@ -367,7 +379,8 @@ def remove_transcription_session(guild_id: int): transcription_consent_prompts.pop(message_id, None) session.engine_name = None session.engine_instance = None - shutil.rmtree(session.temp_dir, ignore_errors=True) + if session.finalized: + shutil.rmtree(session.temp_dir, ignore_errors=True) def resolve_display_name(guild: discord.Guild | None, user_id: int, aliases_by_user: dict[int, str]) -> str: alias = aliases_by_user.get(user_id) if alias: @@ -652,146 +665,238 @@ def slice_timestamp_label(started_at: datetime, seconds_offset: float) -> str: return ts.astimezone(LOCAL_TZ).strftime("%H:%M:%S") -def copy_recorded_audio_slice(sink: object, session: GuildTranscriptionSession) -> dict[int, Path]: - copied_files: dict[int, Path] = {} +def persist_chunk_metadata(session: GuildTranscriptionSession, chunk: dict[str, object]) -> None: + with session.chunk_meta_path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(chunk, ensure_ascii=False) + "\n") + + +def capture_chunk_from_sink_audio( + session: GuildTranscriptionSession, + user_id: int, + audio_obj: object, + capture_dir: Path, + capture_offset_seconds: float, +) -> dict[str, object] | None: + file_obj = getattr(audio_obj, "file", None) + if file_obj is None: + return None + try: + prev_pos = file_obj.tell() if hasattr(file_obj, "tell") else None + if hasattr(file_obj, "seek"): + file_obj.seek(0) + payload = file_obj.read() if hasattr(file_obj, "read") else None + if prev_pos is not None and hasattr(file_obj, "seek"): + file_obj.seek(prev_pos) + except Exception: + return None + if not isinstance(payload, (bytes, bytearray)): + return None + try: + wav_in = wave.open(io.BytesIO(payload), "rb") + except wave.Error: + return None + with wav_in: + framerate = wav_in.getframerate() or 1 + total_frames = wav_in.getnframes() + last_frame = session.user_last_frame.get(user_id, 0) + if total_frames <= last_frame: + return None + wav_in.setpos(last_frame) + delta_frames = total_frames - last_frame + frames = wav_in.readframes(delta_frames) + params = wav_in.getparams() + session.user_last_frame[user_id] = total_frames + chunk_start = capture_offset_seconds + (last_frame / max(framerate, 1)) + session.chunk_index += 1 + chunk_file = capture_dir / f"chunk_{session.chunk_index:06d}_u{user_id}.wav" + with wave.open(str(chunk_file), "wb") as wav_out: + wav_out.setparams(params) + wav_out.writeframes(frames) + chunk = { + "chunk_id": session.chunk_index, + "user_id": user_id, + "start_offset": round(chunk_start, 3), + "file_path": str(chunk_file), + "queued": False, + "transcribed": False, + } + session.chunk_meta.append(chunk) + session.chunk_meta_by_id[session.chunk_index] = chunk + persist_chunk_metadata(session, chunk) + return chunk + + +async def flush_active_recording_buffers(session: GuildTranscriptionSession, guild_id: int) -> int: + sink = session.active_sink sink_audio_data = getattr(sink, "audio_data", None) if not isinstance(sink_audio_data, dict): - return copied_files - session.slice_number += 1 - slice_dir = session.temp_dir / f"slice_{session.slice_number:05d}" - slice_dir.mkdir(parents=True, exist_ok=True) + return 0 + session.capture_index += 1 + capture_dir = session.temp_dir / f"capture_{session.capture_index:05d}" + capture_dir.mkdir(parents=True, exist_ok=True) + capture_offset_seconds = (datetime.now(timezone.utc) - session.started_at).total_seconds() + produced = 0 for user_id, audio_obj in sink_audio_data.items(): if not isinstance(user_id, int) or user_id not in session.consented_user_ids: continue - file_path = getattr(audio_obj, "file", None) - if file_path is None: + chunk = capture_chunk_from_sink_audio(session, user_id, audio_obj, capture_dir, capture_offset_seconds) + if chunk is None: continue - output_file = slice_dir / f"{user_id}.wav" + produced += 1 try: - if hasattr(file_path, "seek"): - file_path.seek(0) - if hasattr(file_path, "read"): - output_file.write_bytes(file_path.read()) - else: - shutil.copy(str(file_path), output_file) - except OSError: - continue - copied_files[user_id] = output_file + session.chunk_queue.put_nowait(int(chunk["chunk_id"])) + chunk["queued"] = True + except asyncio.QueueFull: + logger.warning("transcribe_queue_backpressure guild_id=%s chunk_id=%s", guild_id, chunk["chunk_id"]) logger.info( - "transcribe_slice_copied session_guild_id=%s slice=%s sink_users=%s consented_users=%s copied_users=%s", - session.guild_id, - session.slice_number, - sorted([uid for uid in sink_audio_data.keys() if isinstance(uid, int)]), - sorted(session.consented_user_ids), - sorted(copied_files.keys()), + "transcribe_capture_flushed guild_id=%s capture=%s produced_chunks=%s queue_size=%s", + guild_id, + session.capture_index, + produced, + session.chunk_queue.qsize(), ) - return copied_files + return produced -async def post_transcription_slice_lines(guild: discord.Guild, session: GuildTranscriptionSession, copied_files: dict[int, Path]): - if not copied_files: - logger.info("transcribe_slice_skip_empty guild_id=%s slice=%s", guild.id, session.slice_number) - return - engine_name = session.engine_name - engine = session.engine_instance - if engine is None or engine_name is None: - logger.warning("transcribe_engine_unavailable guild_id=%s slice=%s", guild.id, session.slice_number) - return +def build_transcript_lines_for_chunk( + guild: discord.Guild | None, + session: GuildTranscriptionSession, + chunk: dict[str, object], + utterances: list[dict[str, object]], +) -> list[dict[str, object]]: + user_id = int(chunk["user_id"]) + speaker_name = resolve_display_name(guild, user_id, session.aliases_by_user) + base_offset = float(chunk["start_offset"]) + lines: list[dict[str, object]] = [] + for utterance in utterances: + phrase = str(utterance.get("text") or "").strip() + if not phrase: + continue + rel_start = float(utterance.get("start") or 0.0) + absolute_start = base_offset + rel_start + stamp = slice_timestamp_label(session.started_at, absolute_start) + lines.append({ + "absolute_start": absolute_start, + "line": f"[{stamp}] [{speaker_name}] {phrase}", + }) + lines.sort(key=lambda item: item["absolute_start"]) + return lines + + +async def try_post_live_lines(guild: discord.Guild | None, session: GuildTranscriptionSession, lines: list[str]) -> bool: + if not lines: + return True + if guild is None: + session.pending_live_lines.extend(lines) + return False thread = find_active_transcription_thread(guild, session) if thread is None: - logger.warning("transcribe_thread_missing guild_id=%s thread_id=%s", guild.id, session.transcript_thread_id) - return - ordered_lines: list[tuple[float, str]] = [] - transcribe_started = time.monotonic() - for user_id, audio_path in copied_files.items(): - speaker_name = resolve_display_name(guild, user_id, session.aliases_by_user) - utterances = transcribe_audio_file(engine_name, engine, audio_path) - for utterance in utterances: - phrase = str(utterance.get("text") or "").strip() - if not phrase: - continue - start_sec = float(utterance.get("start") or 0.0) - stamp = slice_timestamp_label(session.started_at, ((session.slice_number - 1) * TRANSCRIBE_SLICE_SECONDS) + start_sec) - ordered_lines.append((start_sec, f"[{stamp}] [{speaker_name}] {phrase}")) - logger.info( - "transcribe_slice_transcription_done guild_id=%s slice=%s duration_ms=%.2f copied_users=%s", - guild.id, - session.slice_number, - (time.monotonic() - transcribe_started) * 1000, - sorted(copied_files.keys()), - ) - ordered_lines.sort(key=lambda item: item[0]) - if not ordered_lines: - logger.info("transcribe_slice_no_utterances guild_id=%s slice=%s copied_users=%s", guild.id, session.slice_number, sorted(copied_files.keys())) - return - logger.info("transcribe_slice_posting guild_id=%s slice=%s lines=%s", guild.id, session.slice_number, len(ordered_lines)) - for _, line in ordered_lines: - await thread.send(line) + session.pending_live_lines.extend(lines) + return False + try: + for line in lines: + await thread.send(line) + return True + except Exception: + logger.exception("transcribe_live_post_failed guild_id=%s", session.guild_id) + session.pending_live_lines.extend(lines) + return False -async def finalize_recording_slice(vc: discord.VoiceClient, guild: discord.Guild, session: GuildTranscriptionSession): - if not getattr(vc, "recording", False): - logger.warning( - "transcribe_finalize_skipped_not_recording guild_id=%s voice_connected=%s", - guild.id, - vc.is_connected() if vc is not None else None, - ) - return - logger.info( - "transcribe_finalize_start guild_id=%s slice=%s voice_connected=%s", - guild.id, - session.slice_number, - vc.is_connected(), - ) - done_event = asyncio.Event() - session.active_slice_done = done_event +async def transcription_worker_loop(guild_id: int): + logger.info("transcribe_worker_started guild_id=%s", guild_id) + while True: + session = get_transcription_session(guild_id) + if session is None: + logger.info("transcribe_worker_exit guild_id=%s reason=session_missing", guild_id) + return + if session.closed and session.chunk_queue.empty(): + logger.info("transcribe_worker_exit guild_id=%s reason=session_closed", guild_id) + return + try: + chunk_id = await asyncio.wait_for(session.chunk_queue.get(), timeout=1.0) + except asyncio.TimeoutError: + continue + chunk = session.chunk_meta_by_id.get(chunk_id) + if chunk is None or chunk.get("transcribed"): + session.chunk_queue.task_done() + continue + engine_name = session.engine_name + engine = session.engine_instance + if engine_name is None or engine is None: + session.chunk_queue.task_done() + continue + try: + utterances = transcribe_audio_file(engine_name, engine, Path(str(chunk["file_path"]))) + lines = build_transcript_lines_for_chunk(bot.get_guild(guild_id), session, chunk, utterances) + session.chunk_transcripts[chunk_id] = lines + chunk["transcribed"] = True + await try_post_live_lines(bot.get_guild(guild_id), session, [item["line"] for item in lines]) + except Exception: + logger.exception("transcribe_chunk_failed guild_id=%s chunk_id=%s", guild_id, chunk_id) + finally: + session.chunk_queue.task_done() - async def _slice_finished(sink: object, channel: object, *_: object): - logger.info( - "transcribe_finalize_callback_start guild_id=%s slice=%s sink_type=%s", - guild.id, - session.slice_number, - type(sink).__name__, - ) - copied = copy_recorded_audio_slice(sink, session) + +async def transcription_live_loop(guild_id: int): + logger.info("transcribe_live_loop_started guild_id=%s interval_seconds=%s", guild_id, TRANSCRIBE_SLICE_SECONDS) + while True: + await asyncio.sleep(TRANSCRIBE_SLICE_SECONDS) + session = get_transcription_session(guild_id) + if session is None or session.closed: + logger.info("transcribe_live_loop_exit guild_id=%s", guild_id) + return + guild = bot.get_guild(guild_id) + vc = guild.voice_client if guild is not None else None + if vc is None or not vc.is_connected() or not getattr(vc, "recording", False): + continue + await flush_active_recording_buffers(session, guild_id) + if session.pending_live_lines: + pending = list(session.pending_live_lines) + session.pending_live_lines.clear() + await try_post_live_lines(guild, session, pending) + + +async def finalize_transcription_session(guild: discord.Guild, session: GuildTranscriptionSession, vc: discord.VoiceClient | None) -> Path: + session.closed = True + if vc is not None and vc.is_connected() and getattr(vc, "recording", False): try: - await asyncio.wait_for(post_transcription_slice_lines(guild, session, copied), timeout=90) + vc.stop_recording() except Exception: - logger.exception("transcribe_slice_post_failed guild_id=%s", guild.id) - logger.info( - "transcribe_finalize_callback_done guild_id=%s slice=%s copied_users=%s", - guild.id, - session.slice_number, - sorted(copied.keys()), - ) - done_event.set() + logger.exception("transcribe_stop_recording_failed guild_id=%s", guild.id) + await asyncio.wait_for(session.capture_done.wait(), timeout=60) + await flush_active_recording_buffers(session, guild.id) + if not session.chunk_queue.empty(): + await session.chunk_queue.join() - try: - vc.stop_recording() - logger.info("transcribe_finalize_stop_recording_called guild_id=%s slice=%s", guild.id, session.slice_number) - except Exception: - logger.exception("transcribe_slice_stop_failed guild_id=%s", guild.id) - done_event.set() - await asyncio.wait_for(done_event.wait(), timeout=120) - logger.info("transcribe_finalize_done_event_received guild_id=%s slice=%s", guild.id, session.slice_number) - if session.closed: - logger.info("transcribe_finalize_exit_session_closed guild_id=%s slice=%s", guild.id, session.slice_number) - return - try: - new_sink = discord.sinks.WaveSink() - session.active_sink = new_sink - vc.start_recording(new_sink, _slice_finished, None) - session.recording_failure_count = 0 - logger.info("transcribe_finalize_restart_recording_ok guild_id=%s next_slice=%s", guild.id, session.slice_number + 1) - except Exception: - session.recording_failure_count += 1 - logger.exception( - "transcribe_slice_restart_failed guild_id=%s failure_count=%s", - guild.id, - session.recording_failure_count, - ) - if session.recording_failure_count >= TRANSCRIBE_MAX_FAILURES: - await teardown_transcription_session_for_recording_failure(guild, session, "restart_failed") + engine_name = session.engine_name + engine = session.engine_instance + if engine_name is not None and engine is not None: + for chunk in session.chunk_meta: + if chunk.get("transcribed"): + continue + try: + utterances = transcribe_audio_file(engine_name, engine, Path(str(chunk["file_path"]))) + lines = build_transcript_lines_for_chunk(guild, session, chunk, utterances) + session.chunk_transcripts[int(chunk["chunk_id"])] = lines + chunk["transcribed"] = True + except Exception: + logger.exception("transcribe_final_pass_failed guild_id=%s chunk_id=%s", guild.id, chunk.get("chunk_id")) + + all_lines: list[dict[str, object]] = [] + for chunk in session.chunk_meta: + all_lines.extend(session.chunk_transcripts.get(int(chunk["chunk_id"]), [])) + all_lines.sort(key=lambda item: float(item.get("absolute_start", 0.0))) + transcript_file = session.temp_dir / "final_transcript.txt" + transcript_text = "\n".join(item["line"] for item in all_lines) if all_lines else "(no transcribed speech)" + transcript_file.write_text(transcript_text, encoding="utf-8") + + thread = find_active_transcription_thread(guild, session) + if thread is not None: + await thread.send("βœ… Final transcript complete.", file=discord.File(str(transcript_file), filename="final_transcript.txt")) + + session.finalized = True + return transcript_file async def teardown_transcription_session_for_recording_failure( @@ -823,117 +928,6 @@ async def teardown_transcription_session_for_recording_failure( ) -async def transcription_live_loop(guild_id: int): - logger.info("transcribe_live_loop_started guild_id=%s interval_seconds=%s", guild_id, TRANSCRIBE_SLICE_SECONDS) - while True: - await asyncio.sleep(TRANSCRIBE_SLICE_SECONDS) - session = get_transcription_session(guild_id) - if session is None or session.closed: - logger.info("transcribe_live_loop_exit guild_id=%s reason=%s", guild_id, "session_missing" if session is None else "session_closed") - return - guild = bot.get_guild(guild_id) - if guild is None: - logger.warning("transcribe_live_loop_skip guild_id=%s reason=guild_not_found", guild_id) - continue - vc = guild.voice_client - if vc is None or not vc.is_connected() or not getattr(vc, "recording", False): - logger.warning( - "transcribe_live_loop_skip guild_id=%s reason=voice_not_recording vc_present=%s vc_connected=%s vc_recording=%s", - guild_id, - vc is not None, - vc.is_connected() if vc is not None else None, - getattr(vc, "recording", False) if vc is not None else None, - ) - continue - try: - logger.info("transcribe_live_loop_finalize guild_id=%s current_slice=%s", guild_id, session.slice_number) - await finalize_recording_slice(vc, guild, session) - except asyncio.TimeoutError: - recovery_session = get_transcription_session(guild_id) - if recovery_session is None or recovery_session.closed: - logger.warning("transcribe_slice_timeout guild_id=%s failure_count=%s", guild_id, None) - continue - recovery_session.recording_failure_count += 1 - logger.warning( - "transcribe_slice_timeout guild_id=%s failure_count=%s", - guild_id, - recovery_session.recording_failure_count, - ) - recovery_guild = bot.get_guild(guild_id) - if recovery_session.recording_failure_count >= TRANSCRIBE_MAX_FAILURES: - await teardown_transcription_session_for_recording_failure( - recovery_guild, - recovery_session, - "finalize_timeout", - ) - continue - recovery_vc = recovery_guild.voice_client if recovery_guild is not None else None - next_slice = (recovery_session.slice_number + 1) if recovery_session is not None else None - try: - should_recover = ( - recovery_session is not None - and not recovery_session.closed - and recovery_vc is not None - and recovery_vc.is_connected() - and not getattr(recovery_vc, "recording", False) - ) - logger.info( - "transcribe_timeout_recovery_check guild_id=%s next_slice=%s session_open=%s vc_present=%s vc_connected=%s vc_recording=%s", - guild_id, - next_slice, - recovery_session is not None and not recovery_session.closed, - recovery_vc is not None, - recovery_vc.is_connected() if recovery_vc is not None else None, - getattr(recovery_vc, "recording", False) if recovery_vc is not None else None, - ) - if not should_recover: - logger.info("transcribe_timeout_recovery_skipped guild_id=%s next_slice=%s", guild_id, next_slice) - continue - - done_event = asyncio.Event() - recovery_session.active_slice_done = done_event - - async def _slice_finished(sink: object, channel: object, *_: object): - logger.info( - "transcribe_timeout_recovery_callback_start guild_id=%s slice=%s sink_type=%s", - recovery_guild.id, - recovery_session.slice_number, - type(sink).__name__, - ) - copied = copy_recorded_audio_slice(sink, recovery_session) - try: - await asyncio.wait_for(post_transcription_slice_lines(recovery_guild, recovery_session, copied), timeout=90) - except Exception: - logger.exception("transcribe_slice_post_failed guild_id=%s", recovery_guild.id) - logger.info( - "transcribe_timeout_recovery_callback_done guild_id=%s slice=%s copied_users=%s", - recovery_guild.id, - recovery_session.slice_number, - sorted(copied.keys()), - ) - done_event.set() - - logger.info("transcribe_timeout_recovery_start guild_id=%s next_slice=%s", guild_id, next_slice) - recovery_sink = discord.sinks.WaveSink() - recovery_session.active_sink = recovery_sink - recovery_vc.start_recording(recovery_sink, _slice_finished, None) - recovery_session.recording_failure_count = 0 - logger.info("transcribe_timeout_recovery_success guild_id=%s next_slice=%s", guild_id, next_slice) - except Exception: - recovery_session.recording_failure_count += 1 - logger.exception( - "transcribe_timeout_recovery_failed guild_id=%s next_slice=%s failure_count=%s", - guild_id, - next_slice, - recovery_session.recording_failure_count, - ) - if recovery_session.recording_failure_count >= TRANSCRIBE_MAX_FAILURES: - await teardown_transcription_session_for_recording_failure( - recovery_guild, - recovery_session, - "timeout_recovery_restart_failed", - ) - def format_duration(duration_seconds: int) -> str: mins, secs = divmod(max(duration_seconds, 0), 60) hours, mins = divmod(mins, 60) @@ -1186,7 +1180,7 @@ def describe_transcription_session_state(session: GuildTranscriptionSession | No return "session=none" return ( f"temp_dir={session.temp_dir} " - f"slice={session.slice_number} " + f"captures={session.capture_index} " f"consented={len(session.consented_user_ids)} " f"thread_id={session.transcript_thread_id}" ) @@ -2338,7 +2332,7 @@ async def gskip(interaction: discord.Interaction): vc.stop() await interaction.response.send_message("⏭️ Skipped current track.", ephemeral=True) @discord.guild_only() -@bot.slash_command(name="gtranscribe", description="Join your voice channel and start live transcription in a transcript thread.") +@bot.slash_command(name="gtranscribe", description="Capture voice now, with deferred transcription and guaranteed final transcript at session end.") async def gtranscribe(interaction: discord.Interaction): logger.info("transcribe_command_start context=%r", build_interaction_log_context(interaction, vc=getattr(getattr(interaction, "guild", None), "voice_client", None))) if interaction.guild is None or interaction.channel is None: @@ -2452,30 +2446,18 @@ async def gtranscribe(interaction: discord.Interaction): sorted(session.consented_user_ids), ) - async def _slice_finished(sink: object, channel: object, *_: object): + async def _capture_finished(sink: object, channel: object, *_: object): logger.info( - "transcribe_initial_callback_start guild_id=%s slice=%s sink_type=%s", + "transcribe_capture_callback_start guild_id=%s sink_type=%s", interaction.guild.id, - session.slice_number, type(sink).__name__, ) - copied = copy_recorded_audio_slice(sink, session) - try: - await asyncio.wait_for(post_transcription_slice_lines(interaction.guild, session, copied), timeout=90) - except Exception: - logger.exception("transcribe_slice_post_failed guild_id=%s", interaction.guild.id) - logger.info( - "transcribe_initial_callback_done guild_id=%s slice=%s copied_users=%s", - interaction.guild.id, - session.slice_number, - sorted(copied.keys()), - ) - session.active_slice_done.set() + session.capture_done.set() try: sink = discord.sinks.WaveSink() session.active_sink = sink - vc.start_recording(sink, _slice_finished, None) + vc.start_recording(sink, _capture_finished, None) logger.info( "transcribe_start_recording_ok guild_id=%s voice_channel_id=%s thread_id=%s", interaction.guild.id, @@ -2497,8 +2479,9 @@ async def _slice_finished(sink: object, channel: object, *_: object): return session.loop_task = asyncio.create_task(transcription_live_loop(interaction.guild.id)) + session.worker_task = asyncio.create_task(transcription_worker_loop(interaction.guild.id)) await interaction.followup.send( - f"πŸŽ™οΈ Live transcription started in {voice_channel.mention}. Updates will be posted in {transcript_thread.mention}.", + f"πŸŽ™οΈ Transcription capture started in {voice_channel.mention}. Live posts may lag; a complete transcript is guaranteed when `/gendsession` is used.", ephemeral=True, ) @@ -2528,7 +2511,7 @@ async def gsetname( @discord.guild_only() -@bot.slash_command(name="gendsession", description="Stop live transcription and disconnect from voice.") +@bot.slash_command(name="gendsession", description="Stop capture, finalize deferred transcription, and disconnect from voice.") async def gendsession(interaction: discord.Interaction): if interaction.guild is None: await interaction.response.send_message("This command only works in a server.", ephemeral=True) @@ -2542,24 +2525,21 @@ async def gendsession(interaction: discord.Interaction): return vc = interaction.guild.voice_client await interaction.response.defer(ephemeral=True) - if vc is not None and vc.is_connected() and getattr(vc, "recording", False): - try: - session.closed = True - await finalize_recording_slice(vc, interaction.guild, session) - except asyncio.TimeoutError: - logger.warning("transcribe_final_slice_timeout guild_id=%s", interaction.guild.id) + try: + await finalize_transcription_session(interaction.guild, session, vc) + except asyncio.TimeoutError: + logger.warning("transcribe_finalization_timeout guild_id=%s", interaction.guild.id) if session.loop_task is not None: session.loop_task.cancel() + if session.worker_task is not None: + session.worker_task.cancel() if vc is not None and vc.is_connected(): try: await vc.disconnect(force=True) except (discord.HTTPException, discord.ClientException): pass - thread = interaction.guild.get_thread(session.transcript_thread_id) - if thread is not None: - await thread.send("πŸ›‘ Transcription session ended. Bot disconnected from voice.") remove_transcription_session(interaction.guild.id) - await interaction.followup.send("βœ… Transcription session ended and bot disconnected.", ephemeral=True) + await interaction.followup.send("βœ… Transcription capture ended. Final transcript generated.", ephemeral=True) @bot.slash_command(name="gokibothelp", description="Show all available GokiBot commands.") @@ -2572,9 +2552,9 @@ async def gokibothelp(interaction: discord.Interaction): "- `/gplay ` β€” Queue and play YouTube audio.", "- `/gqueue` β€” Show the current playback queue.", "- `/gskip` β€” Skip the currently playing track.", - "- `/gtranscribe` β€” Start live transcription in a timestamped transcript thread.", + "- `/gtranscribe` β€” Start durable voice capture (live transcript posts may lag).", "- `/gsetname ` β€” Set your transcript display name", - "- `/gendsession` β€” Stop live transcription and disconnect.", + "- `/gendsession` β€” Stop capture, finalize, and post the complete transcript.", "- `/gokibothelp` β€” Show this help message." ] if is_dev_user(interaction.user.id): From 747c976b0b14061ab15087b1a37d64f85edc801c Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 11:31:39 -0500 Subject: [PATCH 36/54] Make transcription consent session-scoped --- poopbot.py | 175 ++++++----------------------------------------------- 1 file changed, 17 insertions(+), 158 deletions(-) diff --git a/poopbot.py b/poopbot.py index 6537604..4f31d19 100644 --- a/poopbot.py +++ b/poopbot.py @@ -153,7 +153,6 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic ) TRANSCRIBE_SLICE_SECONDS = max(int(os.getenv("TRANSCRIBE_SLICE_SECONDS", "12")), 5) TRANSCRIBE_MAX_FAILURES = max(int(os.getenv("TRANSCRIBE_MAX_FAILURES", "3")), 1) -TRANSCRIBE_CONSENT_VALID_DAYS = 180 TRANSCRIBE_CONSENT_EMOJI = "βœ…" # ========================= # MESSAGES @@ -449,99 +448,6 @@ def normalize_transcript_display_name(name: str) -> str: return compact[:64] -def transcription_consent_is_active(consented_at: str | None, expires_at: str | None) -> bool: - if not consented_at or not expires_at: - return False - try: - consented_dt = datetime.fromisoformat(consented_at) - expires_dt = datetime.fromisoformat(expires_at) - except ValueError: - return False - if consented_dt.tzinfo is None: - consented_dt = consented_dt.replace(tzinfo=timezone.utc) - if expires_dt.tzinfo is None: - expires_dt = expires_dt.replace(tzinfo=timezone.utc) - return datetime.now(timezone.utc) <= expires_dt - - -def get_active_transcription_consent(guild_id: int, user_id: int) -> tuple[str | None, str | None, str | None]: - with db_config() as conn: - row = conn.execute( - """ - SELECT display_name, consented_at_utc, expires_at_utc - FROM transcription_consent - WHERE guild_id=? AND user_id=? - """, - (guild_id, user_id), - ).fetchone() - if not row: - return None, None, None - display_name = normalize_transcript_display_name(str(row["display_name"] or "")) - consented_at = row["consented_at_utc"] - expires_at = row["expires_at_utc"] - if not transcription_consent_is_active(consented_at, expires_at): - return None, consented_at, expires_at - if not display_name: - return None, consented_at, expires_at - return display_name, consented_at, expires_at - - -async def upsert_transcription_consent(guild_id: int, user_id: int, display_name: str): - clean_name = normalize_transcript_display_name(display_name) - now_utc = datetime.now(timezone.utc) - expires = now_utc + timedelta(days=TRANSCRIBE_CONSENT_VALID_DAYS) - async with db_write_lock: - with db_config() as conn: - conn.execute( - """ - INSERT INTO transcription_consent( - guild_id, user_id, display_name, consented_at_utc, expires_at_utc - ) VALUES (?, ?, ?, ?, ?) - ON CONFLICT(guild_id, user_id) DO UPDATE SET - display_name=excluded.display_name, - consented_at_utc=excluded.consented_at_utc, - expires_at_utc=excluded.expires_at_utc - """, - ( - guild_id, - user_id, - clean_name, - now_utc.isoformat(), - expires.isoformat(), - ), - ) - - -async def mark_transcription_consent_prompt_sent(guild_id: int, user_id: int): - now_utc = datetime.now(timezone.utc) - async with db_write_lock: - with db_config() as conn: - conn.execute( - """ - INSERT INTO transcription_consent_prompts_sent( - guild_id, user_id, prompted_at_utc - ) VALUES (?, ?, ?) - ON CONFLICT(guild_id, user_id) DO UPDATE SET - prompted_at_utc=excluded.prompted_at_utc - """, - ( - guild_id, - user_id, - now_utc.isoformat(), - ), - ) - - -async def send_transcription_consent_dm(guild: discord.Guild, member: discord.Member): - dm_channel = member.dm_channel or await member.create_dm() - await dm_channel.send( - f"πŸŽ™οΈ Live transcription is active in **{guild.name}**. " - f"React with {TRANSCRIBE_CONSENT_EMOJI} in the consent thread message to opt in. " - "To change your display name, run `/gsetname ` in the server." - ) - logger.info("transcribe_consent_dm_sent guild_id=%s user_id=%s", guild.id, member.id) - - def find_active_transcription_thread(guild: discord.Guild, session: GuildTranscriptionSession) -> discord.Thread | None: thread = guild.get_thread(session.transcript_thread_id) if thread is not None: @@ -589,7 +495,7 @@ async def prompt_transcription_consent( ( f"{mentions}\n" f"React with {TRANSCRIBE_CONSENT_EMOJI} to opt into transcription for this session. " - "Only consented users will be transcribed." + "Only users who react in this thread are transcribed for this session." ) ) await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) @@ -602,7 +508,6 @@ async def prompt_transcription_consent( transcript_thread.id, member.id, ) - await mark_transcription_consent_prompt_sent(guild.id, member.id) async def sync_voice_channel_members_for_transcription( @@ -627,12 +532,6 @@ async def sync_voice_channel_members_for_transcription( logger.info("transcribe_thread_add_user_ok guild_id=%s thread_id=%s user_id=%s", guild.id, transcript_thread.id, member.id) except (discord.Forbidden, discord.HTTPException): logger.exception("transcribe_thread_add_user_failed guild_id=%s thread_id=%s user_id=%s", guild.id, transcript_thread.id, member.id) - display_name, _, _ = get_active_transcription_consent(guild.id, member.id) - if display_name: - session.consented_user_ids.add(member.id) - session.aliases_by_user[member.id] = display_name - logger.info("transcribe_member_has_active_consent guild_id=%s user_id=%s display_name=%r", guild.id, member.id, display_name) - continue if member.id in session.prompted_user_ids: logger.info( "transcribe_consent_dm_skip guild_id=%s user_id=%s prompt_reason=session_already_prompted", @@ -640,17 +539,7 @@ async def sync_voice_channel_members_for_transcription( member.id, ) continue - try: - await send_transcription_consent_dm(guild, member) - session.prompted_user_ids.add(member.id) - logger.info( - "transcribe_consent_dm_sent guild_id=%s user_id=%s prompt_reason=session_new", - guild.id, - member.id, - ) - await mark_transcription_consent_prompt_sent(guild.id, member.id) - except (discord.Forbidden, discord.HTTPException): - logger.exception("transcribe_consent_dm_failed guild_id=%s user_id=%s prompt_reason=send_failed", guild.id, member.id) + session.prompted_user_ids.add(member.id) logger.info( "transcribe_sync_members_done guild_id=%s discovered_members=%s consented_members=%s", guild.id, @@ -1328,14 +1217,6 @@ def init_config_db(): PRIMARY KEY (guild_id, user_id) ); """) - conn.execute(""" - CREATE TABLE IF NOT EXISTS transcription_consent_prompts_sent ( - guild_id INTEGER NOT NULL, - user_id INTEGER NOT NULL, - prompted_at_utc TEXT NOT NULL, - PRIMARY KEY (guild_id, user_id) - ); - """) columns = { row["name"] for row in conn.execute("PRAGMA table_info(tickets)").fetchall() @@ -1779,16 +1660,6 @@ async def on_voice_state_update(member: discord.Member, before: discord.VoiceSta await thread.add_user(member) except (discord.Forbidden, discord.HTTPException): pass - display_name, _, _ = get_active_transcription_consent(member.guild.id, member.id) - if display_name: - session.consented_user_ids.add(member.id) - session.aliases_by_user[member.id] = display_name - logger.info( - "transcribe_voice_join_skip_prompt guild_id=%s user_id=%s prompt_reason=already_consented", - member.guild.id, - member.id, - ) - return if member.id in session.prompted_user_ids: logger.info( "transcribe_voice_join_skip_prompt guild_id=%s user_id=%s prompt_reason=session_already_prompted", @@ -1796,20 +1667,10 @@ async def on_voice_state_update(member: discord.Member, before: discord.VoiceSta member.id, ) return - try: - await send_transcription_consent_dm(member.guild, member) - session.prompted_user_ids.add(member.id) - logger.info( - "transcribe_voice_join_dm_sent guild_id=%s user_id=%s prompt_reason=session_new", - member.guild.id, - member.id, - ) - await mark_transcription_consent_prompt_sent(member.guild.id, member.id) - except (discord.Forbidden, discord.HTTPException): - logger.exception( - "transcribe_voice_join_dm_failed guild_id=%s user_id=%s prompt_reason=send_failed", - member.guild.id, - member.id, + session.prompted_user_ids.add(member.id) + if thread is not None: + await thread.send( + f"{member.mention} transcription in progress. React with {TRANSCRIBE_CONSENT_EMOJI} to be included in this session." ) if thread is not None: await prompt_transcription_consent(member.guild, session, thread, [member]) @@ -1834,8 +1695,6 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): if member is None or member.bot: return clean_name = normalize_transcript_display_name(member.display_name) - if clean_name: - await upsert_transcription_consent(payload.guild_id, payload.user_id, clean_name) session = get_transcription_session(payload.guild_id) if session is not None and not session.closed: session.consented_user_ids.add(payload.user_id) @@ -1847,7 +1706,7 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): await thread.add_user(member) except (discord.Forbidden, discord.HTTPException): pass - await thread.send(f"βœ… {member.mention} opted into transcription.") + await thread.send(f"{member.mention} included for this session.") return active_message_id = gget(payload.guild_id, "active_message_id") if not active_message_id or str(payload.message_id) != active_message_id: @@ -2387,9 +2246,9 @@ async def gtranscribe(interaction: discord.Interaction): ) await transcript_thread.send( ( - f"πŸŽ™οΈ **Transcription session has begun** for {voice_channel.mention}.\n" - f"React with {TRANSCRIBE_CONSENT_EMOJI} on the consent prompt to opt in.\n" - "To change your display name `/gsetname `." + f"Transcription started for {voice_channel.mention}.\n" + f"React with {TRANSCRIBE_CONSENT_EMOJI} on the consent message to be included in this session.\n" + "Use /gsetname to set your name for this session." ) ) @@ -2487,7 +2346,7 @@ async def _capture_finished(sink: object, channel: object, *_: object): @discord.guild_only() -@bot.slash_command(name="gsetname", description="Set your transcription display name") +@bot.slash_command(name="gsetname", description="Set your transcription display name for the current session") async def gsetname( interaction: discord.Interaction, name: discord.Option(str, "Display name to use in transcripts"), @@ -2499,13 +2358,13 @@ async def gsetname( if not clean_name: await interaction.response.send_message("Display name cannot be empty.", ephemeral=True) return - await upsert_transcription_consent(interaction.guild.id, interaction.user.id, clean_name) session = get_transcription_session(interaction.guild.id) - if session is not None: - session.consented_user_ids.add(interaction.user.id) - session.aliases_by_user[interaction.user.id] = clean_name + if session is None: + await interaction.response.send_message("No active transcription session in this server.", ephemeral=True) + return + session.aliases_by_user[interaction.user.id] = clean_name await interaction.response.send_message( - f"βœ… Saved transcript display name **{clean_name}**.", + f"Saved transcript display name: **{clean_name}**.", ephemeral=True, ) @@ -2553,7 +2412,7 @@ async def gokibothelp(interaction: discord.Interaction): "- `/gqueue` β€” Show the current playback queue.", "- `/gskip` β€” Skip the currently playing track.", "- `/gtranscribe` β€” Start durable voice capture (live transcript posts may lag).", - "- `/gsetname ` β€” Set your transcript display name", + "- `/gsetname ` β€” Set your transcript display name for the current session", "- `/gendsession` β€” Stop capture, finalize, and post the complete transcript.", "- `/gokibothelp` β€” Show this help message." ] From cb943a32bebc998d67b044049d9927ca1a96e95c Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 11:59:03 -0500 Subject: [PATCH 37/54] Fix transcription consent prompting and thread reaction handling --- poopbot.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/poopbot.py b/poopbot.py index 4f31d19..1fc2301 100644 --- a/poopbot.py +++ b/poopbot.py @@ -539,7 +539,6 @@ async def sync_voice_channel_members_for_transcription( member.id, ) continue - session.prompted_user_ids.add(member.id) logger.info( "transcribe_sync_members_done guild_id=%s discovered_members=%s consented_members=%s", guild.id, @@ -1667,11 +1666,6 @@ async def on_voice_state_update(member: discord.Member, before: discord.VoiceSta member.id, ) return - session.prompted_user_ids.add(member.id) - if thread is not None: - await thread.send( - f"{member.mention} transcription in progress. React with {TRANSCRIBE_CONSENT_EMOJI} to be included in this session." - ) if thread is not None: await prompt_transcription_consent(member.guild, session, thread, [member]) @@ -1686,27 +1680,32 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): return emoji = str(payload.emoji) consent_prompt = transcription_consent_prompts.get(payload.message_id) - if consent_prompt is not None and emoji == TRANSCRIBE_CONSENT_EMOJI: - guild_id, _ = consent_prompt - if guild_id != payload.guild_id: + if emoji == TRANSCRIBE_CONSENT_EMOJI: + session = get_transcription_session(payload.guild_id) + consent_via_prompt = consent_prompt is not None + if session is None or session.closed: + return + if consent_via_prompt: + guild_id, _ = consent_prompt + if guild_id != payload.guild_id: + return + elif payload.channel_id != session.transcript_thread_id: return guild = bot.get_guild(payload.guild_id) member = guild.get_member(payload.user_id) if guild is not None else None if member is None or member.bot: return clean_name = normalize_transcript_display_name(member.display_name) - session = get_transcription_session(payload.guild_id) - if session is not None and not session.closed: - session.consented_user_ids.add(payload.user_id) - if clean_name: - session.aliases_by_user[payload.user_id] = clean_name - thread = find_active_transcription_thread(guild, session) - if thread is not None: - try: - await thread.add_user(member) - except (discord.Forbidden, discord.HTTPException): - pass - await thread.send(f"{member.mention} included for this session.") + session.consented_user_ids.add(payload.user_id) + if clean_name: + session.aliases_by_user[payload.user_id] = clean_name + thread = find_active_transcription_thread(guild, session) + if thread is not None: + try: + await thread.add_user(member) + except (discord.Forbidden, discord.HTTPException): + pass + await thread.send(f"{member.mention} included for this session.") return active_message_id = gget(payload.guild_id, "active_message_id") if not active_message_id or str(payload.message_id) != active_message_id: From da72bc7f76b8465cb8c10a0a0e7fd2a77a1d9fd7 Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 12:07:22 -0500 Subject: [PATCH 38/54] Improve gtranscribe consent UX and add status debug command --- poopbot.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/poopbot.py b/poopbot.py index 1fc2301..41da835 100644 --- a/poopbot.py +++ b/poopbot.py @@ -491,11 +491,13 @@ async def prompt_transcription_consent( ) return mentions = " ".join(member.mention for member in non_consented) - consent_message = await transcript_thread.send( - ( + consent_message = await transcript_thread.send(f"{mentions}\nPreparing consent instructions…") + await consent_message.edit( + content=( f"{mentions}\n" - f"React with {TRANSCRIBE_CONSENT_EMOJI} to opt into transcription for this session. " - "Only users who react in this thread are transcribed for this session." + f"React with {TRANSCRIBE_CONSENT_EMOJI} on this exact message to opt into transcription for this session.\n" + f"Required reaction location: {transcript_thread.mention} β†’ {consent_message.jump_url}\n" + "Only users who react on this message are transcribed for this session." ) ) await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) @@ -2247,6 +2249,7 @@ async def gtranscribe(interaction: discord.Interaction): ( f"Transcription started for {voice_channel.mention}.\n" f"React with {TRANSCRIBE_CONSENT_EMOJI} on the consent message to be included in this session.\n" + "React on the consent message in the transcript thread, not on the slash-command response.\n" "Use /gsetname to set your name for this session." ) ) @@ -2294,7 +2297,16 @@ async def gtranscribe(interaction: discord.Interaction): session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id) session.engine_name = engine_name session.engine_instance = engine_instance + if isinstance(interaction.user, discord.Member): + session.consented_user_ids.add(interaction.user.id) + clean_name = normalize_transcript_display_name(interaction.user.display_name) + if clean_name: + session.aliases_by_user[interaction.user.id] = clean_name transcription_sessions[interaction.guild.id] = session + await transcript_thread.send( + f"ℹ️ Auto-consented command invoker {interaction.user.mention} for this session. " + "Everyone else must react on the consent message." + ) await sync_voice_channel_members_for_transcription(interaction.guild, voice_channel, session, transcript_thread) logger.info( "transcribe_session_initialized guild_id=%s voice_channel_id=%s thread_id=%s consented_users=%s", @@ -2339,11 +2351,47 @@ async def _capture_finished(sink: object, channel: object, *_: object): session.loop_task = asyncio.create_task(transcription_live_loop(interaction.guild.id)) session.worker_task = asyncio.create_task(transcription_worker_loop(interaction.guild.id)) await interaction.followup.send( - f"πŸŽ™οΈ Transcription capture started in {voice_channel.mention}. Live posts may lag; a complete transcript is guaranteed when `/gendsession` is used.", + ( + f"πŸŽ™οΈ Transcription capture started in {voice_channel.mention}. " + "I auto-consented you for this session; other users must react on the consent message in the transcript thread. " + "Live posts may lag; a complete transcript is guaranteed when `/gendsession` is used." + ), ephemeral=True, ) +@discord.guild_only() +@bot.slash_command(name="gtranscribe_status", description="Debug current transcription consent state") +async def gtranscribe_status(interaction: discord.Interaction): + if interaction.guild is None: + await interaction.response.send_message("This command only works in a server.", ephemeral=True) + return + session = get_transcription_session(interaction.guild.id) + if session is None or session.closed: + await interaction.response.send_message("No active transcription session in this server.", ephemeral=True) + return + voice_channel = interaction.guild.get_channel(session.voice_channel_id) + if not isinstance(voice_channel, discord.VoiceChannel): + await interaction.response.send_message( + f"Active session found, but voice channel `{session.voice_channel_id}` is unavailable.", + ephemeral=True, + ) + return + consented_ids = sorted(session.consented_user_ids) + lines = [ + f"Thread: <#{session.transcript_thread_id}>", + f"Consented user IDs: `{consented_ids}`", + "Current voice members:", + ] + members = [member for member in voice_channel.members if not member.bot] + if not members: + lines.append("- *(none)*") + for member in members: + opted_in = "yes" if member.id in session.consented_user_ids else "no" + lines.append(f"- {member.mention} (`{member.id}`): opted_in={opted_in}") + await interaction.response.send_message("\n".join(lines), ephemeral=True) + + @discord.guild_only() @bot.slash_command(name="gsetname", description="Set your transcription display name for the current session") async def gsetname( From 7b7edf57514ab56ca00051035507226f227e89ed Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 12:23:34 -0500 Subject: [PATCH 39/54] Add consent reaction flow logging for transcription --- poopbot.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/poopbot.py b/poopbot.py index 41da835..cdc6644 100644 --- a/poopbot.py +++ b/poopbot.py @@ -1685,20 +1685,64 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): if emoji == TRANSCRIBE_CONSENT_EMOJI: session = get_transcription_session(payload.guild_id) consent_via_prompt = consent_prompt is not None + logger.info( + "transcribe_consent_reaction_received guild_id=%s message_id=%s channel_id=%s user_id=%s session_present=%s", + payload.guild_id, + payload.message_id, + payload.channel_id, + payload.user_id, + session is not None, + ) if session is None or session.closed: + logger.info( + "transcribe_consent_reaction_rejected_session_missing_or_closed guild_id=%s message_id=%s channel_id=%s user_id=%s session_present=%s session_closed=%s", + payload.guild_id, + payload.message_id, + payload.channel_id, + payload.user_id, + session is not None, + session.closed if session is not None else None, + ) return if consent_via_prompt: guild_id, _ = consent_prompt if guild_id != payload.guild_id: return elif payload.channel_id != session.transcript_thread_id: + logger.info( + "transcribe_consent_reaction_rejected_thread_mismatch guild_id=%s message_id=%s channel_id=%s user_id=%s consent_via_prompt=%s expected_thread_id=%s", + payload.guild_id, + payload.message_id, + payload.channel_id, + payload.user_id, + consent_via_prompt, + session.transcript_thread_id, + ) return guild = bot.get_guild(payload.guild_id) member = guild.get_member(payload.user_id) if guild is not None else None if member is None or member.bot: + logger.info( + "transcribe_consent_reaction_rejected_member_invalid guild_id=%s message_id=%s channel_id=%s user_id=%s guild_present=%s member_present=%s member_is_bot=%s", + payload.guild_id, + payload.message_id, + payload.channel_id, + payload.user_id, + guild is not None, + member is not None, + member.bot if member is not None else None, + ) return clean_name = normalize_transcript_display_name(member.display_name) session.consented_user_ids.add(payload.user_id) + logger.info( + "transcribe_consent_reaction_accepted guild_id=%s message_id=%s channel_id=%s user_id=%s consented_count=%s", + payload.guild_id, + payload.message_id, + payload.channel_id, + payload.user_id, + len(session.consented_user_ids), + ) if clean_name: session.aliases_by_user[payload.user_id] = clean_name thread = find_active_transcription_thread(guild, session) From 17ea647b56fdd4f78adaf84301e4f2f597721439 Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 12:26:55 -0500 Subject: [PATCH 40/54] Add consent-aware capture flush logging --- poopbot.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index cdc6644..22d7c8b 100644 --- a/poopbot.py +++ b/poopbot.py @@ -353,6 +353,7 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.user_last_frame: dict[int, int] = {} self.chunk_meta_path = self.temp_dir / "chunk_metadata.jsonl" self.finalized = False + self.no_consented_users_warning_interval = 10 transcription_sessions: dict[int, GuildTranscriptionSession] = {} @@ -626,6 +627,20 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui capture_dir.mkdir(parents=True, exist_ok=True) capture_offset_seconds = (datetime.now(timezone.utc) - session.started_at).total_seconds() produced = 0 + if not session.consented_user_ids: + guild = bot.get_guild(guild_id) + voice_member_count = 0 + if guild is not None: + voice_channel = guild.get_channel(session.voice_channel_id) + voice_member_count = len(getattr(voice_channel, "members", []) or []) + warning_interval = max(1, session.no_consented_users_warning_interval) + if session.capture_index % warning_interval == 0: + logger.warning( + "transcribe_capture_skipped_no_consented_users guild_id=%s capture=%s voice_member_count=%s", + guild_id, + session.capture_index, + voice_member_count, + ) for user_id, audio_obj in sink_audio_data.items(): if not isinstance(user_id, int) or user_id not in session.consented_user_ids: continue @@ -639,11 +654,12 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui except asyncio.QueueFull: logger.warning("transcribe_queue_backpressure guild_id=%s chunk_id=%s", guild_id, chunk["chunk_id"]) logger.info( - "transcribe_capture_flushed guild_id=%s capture=%s produced_chunks=%s queue_size=%s", + "transcribe_capture_flushed guild_id=%s capture=%s produced_chunks=%s queue_size=%s consented_count=%s", guild_id, session.capture_index, produced, session.chunk_queue.qsize(), + len(session.consented_user_ids), ) return produced From ac7009b67a0ce8e8884c6e5101d3fcd4a7ac8984 Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 14:18:35 -0500 Subject: [PATCH 41/54] Fix chunk start offsets to use audio timeline --- poopbot.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/poopbot.py b/poopbot.py index 22d7c8b..b747366 100644 --- a/poopbot.py +++ b/poopbot.py @@ -566,7 +566,6 @@ def capture_chunk_from_sink_audio( user_id: int, audio_obj: object, capture_dir: Path, - capture_offset_seconds: float, ) -> dict[str, object] | None: file_obj = getattr(audio_obj, "file", None) if file_obj is None: @@ -597,7 +596,7 @@ def capture_chunk_from_sink_audio( frames = wav_in.readframes(delta_frames) params = wav_in.getparams() session.user_last_frame[user_id] = total_frames - chunk_start = capture_offset_seconds + (last_frame / max(framerate, 1)) + chunk_start = last_frame / max(framerate, 1) session.chunk_index += 1 chunk_file = capture_dir / f"chunk_{session.chunk_index:06d}_u{user_id}.wav" with wave.open(str(chunk_file), "wb") as wav_out: @@ -625,7 +624,6 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui session.capture_index += 1 capture_dir = session.temp_dir / f"capture_{session.capture_index:05d}" capture_dir.mkdir(parents=True, exist_ok=True) - capture_offset_seconds = (datetime.now(timezone.utc) - session.started_at).total_seconds() produced = 0 if not session.consented_user_ids: guild = bot.get_guild(guild_id) @@ -644,7 +642,7 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui for user_id, audio_obj in sink_audio_data.items(): if not isinstance(user_id, int) or user_id not in session.consented_user_ids: continue - chunk = capture_chunk_from_sink_audio(session, user_id, audio_obj, capture_dir, capture_offset_seconds) + chunk = capture_chunk_from_sink_audio(session, user_id, audio_obj, capture_dir) if chunk is None: continue produced += 1 From a6d60d552f661922817657d2cba49fcd84921577 Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 14:26:11 -0500 Subject: [PATCH 42/54] Improve transcription consent visibility and reminders --- poopbot.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/poopbot.py b/poopbot.py index b747366..527d7df 100644 --- a/poopbot.py +++ b/poopbot.py @@ -354,6 +354,10 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.chunk_meta_path = self.temp_dir / "chunk_metadata.jsonl" self.finalized = False self.no_consented_users_warning_interval = 10 + self.latest_consent_message_id: int | None = None + self.latest_consent_jump_url: str | None = None + self.last_unconsented_reminder_at: datetime | None = None + self.unconsented_reminder_interval_seconds = 45 transcription_sessions: dict[int, GuildTranscriptionSession] = {} @@ -502,6 +506,8 @@ async def prompt_transcription_consent( ) ) await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) + session.latest_consent_message_id = consent_message.id + session.latest_consent_jump_url = consent_message.jump_url transcription_consent_prompts[consent_message.id] = (guild.id, session.transcript_thread_id) for member in non_consented: session.prompted_user_ids.add(member.id) @@ -621,15 +627,38 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui sink_audio_data = getattr(sink, "audio_data", None) if not isinstance(sink_audio_data, dict): return 0 + + guild = bot.get_guild(guild_id) + voice_channel = guild.get_channel(session.voice_channel_id) if guild is not None else None + voice_members = getattr(voice_channel, "members", []) if voice_channel is not None else [] + voice_member_ids = {member.id for member in voice_members if isinstance(member, discord.Member) and not member.bot} + active_speaker_ids = {user_id for user_id in sink_audio_data if isinstance(user_id, int) and user_id in voice_member_ids} + active_unconsented_ids = sorted(user_id for user_id in active_speaker_ids if user_id not in session.consented_user_ids) + if active_unconsented_ids and guild is not None: + now = datetime.now(timezone.utc) + last_sent = session.last_unconsented_reminder_at + min_interval = timedelta(seconds=max(1, session.unconsented_reminder_interval_seconds)) + if last_sent is None or (now - last_sent) >= min_interval: + thread = find_active_transcription_thread(guild, session) + if thread is not None: + speaker_mentions = " ".join(f"<@{user_id}>" for user_id in active_unconsented_ids) + consent_target = session.latest_consent_jump_url or "(consent prompt not posted yet)" + await thread.send( + ( + f"⚠️ Active speakers detected without consent: {speaker_mentions}.\n" + f"To be included in this session, react with {TRANSCRIBE_CONSENT_EMOJI} to this exact message: {consent_target}\n" + "Only reactions on that exact consent message opt users into transcription." + ) + ) + session.last_unconsented_reminder_at = now + session.capture_index += 1 capture_dir = session.temp_dir / f"capture_{session.capture_index:05d}" capture_dir.mkdir(parents=True, exist_ok=True) produced = 0 if not session.consented_user_ids: - guild = bot.get_guild(guild_id) voice_member_count = 0 if guild is not None: - voice_channel = guild.get_channel(session.voice_channel_id) voice_member_count = len(getattr(voice_channel, "members", []) or []) warning_interval = max(1, session.no_consented_users_warning_interval) if session.capture_index % warning_interval == 0: @@ -2366,6 +2395,17 @@ async def gtranscribe(interaction: discord.Interaction): "Everyone else must react on the consent message." ) await sync_voice_channel_members_for_transcription(interaction.guild, voice_channel, session, transcript_thread) + startup_members = [member for member in voice_channel.members if not member.bot] + startup_consented = [member.mention for member in startup_members if member.id in session.consented_user_ids] + startup_unconsented = [member.mention for member in startup_members if member.id not in session.consented_user_ids] + await transcript_thread.send( + "\n".join([ + "πŸ“‹ Startup consent snapshot:", + f"- Consented: {', '.join(startup_consented) if startup_consented else '(none)'}", + f"- Not consented: {', '.join(startup_unconsented) if startup_unconsented else '(none)'}", + f"- Consent message: {session.latest_consent_jump_url or '(not available yet)'}", + ]) + ) logger.info( "transcribe_session_initialized guild_id=%s voice_channel_id=%s thread_id=%s consented_users=%s", interaction.guild.id, @@ -2438,6 +2478,7 @@ async def gtranscribe_status(interaction: discord.Interaction): consented_ids = sorted(session.consented_user_ids) lines = [ f"Thread: <#{session.transcript_thread_id}>", + f"Consent message jump URL: {session.latest_consent_jump_url or '(not available yet)'}", f"Consented user IDs: `{consented_ids}`", "Current voice members:", ] @@ -2445,8 +2486,11 @@ async def gtranscribe_status(interaction: discord.Interaction): if not members: lines.append("- *(none)*") for member in members: - opted_in = "yes" if member.id in session.consented_user_ids else "no" - lines.append(f"- {member.mention} (`{member.id}`): opted_in={opted_in}") + prompted = "yes" if member.id in session.prompted_user_ids else "no" + consented = "yes" if member.id in session.consented_user_ids else "no" + lines.append( + f"- {member.mention} (`{member.id}`): prompted={prompted}, consented={consented}" + ) await interaction.response.send_message("\n".join(lines), ephemeral=True) From 9767ccb0702ff342eedd09585d64d1b400a27fe4 Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 14:32:59 -0500 Subject: [PATCH 43/54] Add gtranscribe permission prechecks and thread error handling --- poopbot.py | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 5 deletions(-) diff --git a/poopbot.py b/poopbot.py index 527d7df..df3e6ac 100644 --- a/poopbot.py +++ b/poopbot.py @@ -505,7 +505,20 @@ async def prompt_transcription_consent( "Only users who react on this message are transcribed for this session." ) ) - await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) + try: + await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) + except (discord.Forbidden, discord.HTTPException): + logger.exception( + "transcribe_consent_reaction_failed guild_id=%s thread_id=%s message_id=%s", + guild.id, + transcript_thread.id, + consent_message.id, + ) + await transcript_thread.send( + "⚠️ I couldn't add the consent reaction automatically. " + f"Please grant **Add Reactions** in this text channel, then manually react with {TRANSCRIBE_CONSENT_EMOJI} " + "to the consent message to opt in." + ) session.latest_consent_message_id = consent_message.id session.latest_consent_jump_url = consent_message.jump_url transcription_consent_prompts[consent_message.id] = (guild.id, session.transcript_thread_id) @@ -541,6 +554,10 @@ async def sync_voice_channel_members_for_transcription( logger.info("transcribe_thread_add_user_ok guild_id=%s thread_id=%s user_id=%s", guild.id, transcript_thread.id, member.id) except (discord.Forbidden, discord.HTTPException): logger.exception("transcribe_thread_add_user_failed guild_id=%s thread_id=%s user_id=%s", guild.id, transcript_thread.id, member.id) + await transcript_thread.send( + f"⚠️ I couldn't add {member.mention} to this transcript thread. " + "Grant **Manage Threads** for this text channel so I can add members for consent." + ) if member.id in session.prompted_user_ids: logger.info( "transcribe_consent_dm_skip guild_id=%s user_id=%s prompt_reason=session_already_prompted", @@ -1058,6 +1075,57 @@ def is_youtube_url(value: str) -> bool: "www.youtu.be", } return hostname in youtube_hosts + + +def build_transcribe_permission_error( + interaction: discord.Interaction, + voice_channel: discord.VoiceChannel, +) -> str | None: + if interaction.guild is None or interaction.channel is None or bot.user is None: + return None + bot_member = interaction.guild.get_member(bot.user.id) + if bot_member is None: + return ( + "❌ I couldn't verify my server permissions. Ensure I am a member of this server and retry `/gtranscribe`." + ) + + text_perms = interaction.channel.permissions_for(bot_member) + voice_perms = voice_channel.permissions_for(bot_member) + missing_text: list[str] = [] + missing_voice: list[str] = [] + + text_requirements = [ + ("view_channel", "View Channel"), + ("send_messages", "Send Messages"), + ("create_private_threads", "Create Private Threads"), + ("send_messages_in_threads", "Send Messages in Threads"), + ("manage_threads", "Manage Threads (needed to add users to the transcript thread)"), + ("add_reactions", "Add Reactions (needed for consent flow)"), + ("read_message_history", "Read Message History (needed for reactions)"), + ] + voice_requirements = [ + ("connect", "Connect"), + ("speak", "Speak"), + ("use_voice_activation", "Use Voice Activity (needed to stay connected)"), + ] + for attr_name, label in text_requirements: + if not getattr(text_perms, attr_name, False): + missing_text.append(label) + for attr_name, label in voice_requirements: + if not getattr(voice_perms, attr_name, False): + missing_voice.append(label) + + if not missing_text and not missing_voice: + return None + + lines = ["❌ Cannot start `/gtranscribe` because required permissions are missing."] + if missing_text: + lines.append(f"- **Grant in text channel {interaction.channel.mention}:** {', '.join(missing_text)}") + if missing_voice: + lines.append(f"- **Grant in voice channel {voice_channel.mention}:** {', '.join(missing_voice)}") + return "\n".join(lines) + + async def ensure_voice_channel(interaction: discord.Interaction) -> discord.VoiceChannel | None: if interaction.guild is None: return None @@ -2305,6 +2373,10 @@ async def gtranscribe(interaction: discord.Interaction): ephemeral=True, ) return + permission_error = build_transcribe_permission_error(interaction, voice_channel) + if permission_error is not None: + await interaction.response.send_message(permission_error, ephemeral=True) + return engine_init_started = time.monotonic() engine_name, engine_instance = get_whisper_transcriber() if engine_name is None or engine_instance is None: @@ -2328,10 +2400,18 @@ async def gtranscribe(interaction: discord.Interaction): ) start_label = datetime.now(LOCAL_TZ).strftime("%Y-%m-%d %H:%M") - transcript_thread = await interaction.channel.create_thread( - name=f"transcript-{start_label}", - type=discord.ChannelType.private_thread, - ) + try: + transcript_thread = await interaction.channel.create_thread( + name=f"transcript-{start_label}", + type=discord.ChannelType.private_thread, + ) + except (discord.Forbidden, discord.HTTPException): + logger.exception("transcribe_create_thread_failed context=%r", build_interaction_log_context(interaction, vc=getattr(interaction.guild, "voice_client", None))) + await interaction.followup.send( + "I couldn't create the transcript thread. Grant **Create Private Threads**, **Send Messages in Threads**, and **Manage Threads** in this text channel, then retry `/gtranscribe`.", + ephemeral=True, + ) + return await transcript_thread.send( ( f"Transcription started for {voice_channel.mention}.\n" From 331bdd5e235dec3894f2de82349ca9b642646b6b Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 14:38:27 -0500 Subject: [PATCH 44/54] Improve transcription with rolling windows and overlap --- poopbot.py | 122 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 106 insertions(+), 16 deletions(-) diff --git a/poopbot.py b/poopbot.py index df3e6ac..ae92f82 100644 --- a/poopbot.py +++ b/poopbot.py @@ -151,9 +151,20 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic FETCH_TRACK_INFO_TIMEOUT_MESSAGE = ( "Timed out while fetching track info from YouTube. Please try again in a moment." ) -TRANSCRIBE_SLICE_SECONDS = max(int(os.getenv("TRANSCRIBE_SLICE_SECONDS", "12")), 5) +TRANSCRIBE_WINDOW_SECONDS = max(float(os.getenv("TRANSCRIBE_WINDOW_SECONDS", "2.0")), 0.5) +TRANSCRIBE_OVERLAP_SECONDS = max(float(os.getenv("TRANSCRIBE_OVERLAP_SECONDS", "0.5")), 0.0) +if TRANSCRIBE_OVERLAP_SECONDS >= TRANSCRIBE_WINDOW_SECONDS: + TRANSCRIBE_OVERLAP_SECONDS = max(0.0, TRANSCRIBE_WINDOW_SECONDS - 0.1) +TRANSCRIBE_EMIT_INTERVAL_SECONDS = max( + float(os.getenv("TRANSCRIBE_EMIT_INTERVAL_SECONDS", str(max(0.5, TRANSCRIBE_WINDOW_SECONDS - TRANSCRIBE_OVERLAP_SECONDS)))), + 0.25, +) +TRANSCRIBE_MAX_QUEUE_DEPTH = max(int(os.getenv("TRANSCRIBE_MAX_QUEUE_DEPTH", "128")), 1) TRANSCRIBE_MAX_FAILURES = max(int(os.getenv("TRANSCRIBE_MAX_FAILURES", "3")), 1) TRANSCRIBE_CONSENT_EMOJI = "βœ…" +TRANSCRIBE_MODEL_SIZE = (os.getenv("TRANSCRIBE_MODEL_SIZE") or os.getenv("WHISPER_MODEL") or "base").strip().lower() or "base" +if TRANSCRIBE_MODEL_SIZE not in {"tiny", "base", "small"}: + TRANSCRIBE_MODEL_SIZE = "base" # ========================= # MESSAGES # ========================= @@ -345,12 +356,14 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.engine_instance: object | None = None self.capture_index = 0 self.chunk_index = 0 - self.chunk_queue: asyncio.Queue[int] = asyncio.Queue(maxsize=128) + self.chunk_queue: asyncio.Queue[int] = asyncio.Queue(maxsize=TRANSCRIBE_MAX_QUEUE_DEPTH) self.chunk_meta: list[dict[str, object]] = [] self.chunk_meta_by_id: dict[int, dict[str, object]] = {} self.chunk_transcripts: dict[int, list[dict[str, object]]] = {} self.pending_live_lines: list[str] = [] + self.last_phrase_by_user: dict[int, str] = {} self.user_last_frame: dict[int, int] = {} + self.user_window_start_frame: dict[int, int] = {} self.chunk_meta_path = self.temp_dir / "chunk_metadata.jsonl" self.finalized = False self.no_consented_users_warning_interval = 10 @@ -405,12 +418,12 @@ def can_record_voice() -> tuple[bool, str]: def get_whisper_transcriber() -> tuple[str | None, object | None]: if importlib.util.find_spec("faster_whisper") is not None: from faster_whisper import WhisperModel - model_name = os.getenv("WHISPER_MODEL", "base") + model_name = TRANSCRIBE_MODEL_SIZE model = WhisperModel(model_name, device="cpu", compute_type="int8") return "faster_whisper", model if importlib.util.find_spec("whisper") is not None: import whisper - model_name = os.getenv("WHISPER_MODEL", "base") + model_name = TRANSCRIBE_MODEL_SIZE model = whisper.load_model(model_name) return "whisper", model return None, None @@ -422,9 +435,11 @@ def transcribe_audio_file(engine_name: str, engine: object, file_path: Path) -> phrase = (seg.text or "").strip() if not phrase: continue + confidence = getattr(seg, "avg_logprob", None) utterances.append({ "start": float(getattr(seg, "start", 0.0) or 0.0), "text": phrase, + "confidence": float(confidence) if isinstance(confidence, (int, float)) else None, }) return utterances if engine_name == "whisper": @@ -437,17 +452,54 @@ def transcribe_audio_file(engine_name: str, engine: object, file_path: Path) -> phrase = str(seg.get("text") or "").strip() if not phrase: continue + confidence = seg.get("avg_logprob") utterances.append({ "start": float(seg.get("start") or 0.0), "text": phrase, + "confidence": float(confidence) if isinstance(confidence, (int, float)) else None, }) else: text_value = str((result or {}).get("text") or "").strip() if isinstance(result, dict) else "" if text_value: - utterances.append({"start": 0.0, "text": text_value}) + utterances.append({"start": 0.0, "text": text_value, "confidence": None}) return utterances +def normalize_phrase_for_overlap(value: str) -> str: + normalized = " ".join(value.lower().split()) + return "".join(ch for ch in normalized if ch.isalnum() or ch.isspace()).strip() + + +def remove_overlap_duplicate_text(previous_text: str | None, new_text: str) -> str: + if not new_text: + return "" + candidate = new_text.strip() + if not candidate: + return "" + previous = normalize_phrase_for_overlap(previous_text or "") + current = normalize_phrase_for_overlap(candidate) + if not previous or not current: + return candidate + if current == previous or current in previous: + return "" + if previous in current: + return candidate + prev_tokens = previous.split() + curr_tokens = current.split() + max_overlap = min(len(prev_tokens), len(curr_tokens)) + overlap_tokens = 0 + for length in range(max_overlap, 0, -1): + if prev_tokens[-length:] == curr_tokens[:length]: + overlap_tokens = length + break + if overlap_tokens <= 0: + return candidate + original_tokens = candidate.split() + if overlap_tokens >= len(original_tokens): + return "" + return " ".join(original_tokens[overlap_tokens:]).strip() + + def normalize_transcript_display_name(name: str) -> str: compact = " ".join(name.split()).strip() return compact[:64] @@ -611,15 +663,23 @@ def capture_chunk_from_sink_audio( with wav_in: framerate = wav_in.getframerate() or 1 total_frames = wav_in.getnframes() - last_frame = session.user_last_frame.get(user_id, 0) - if total_frames <= last_frame: - return None - wav_in.setpos(last_frame) - delta_frames = total_frames - last_frame - frames = wav_in.readframes(delta_frames) params = wav_in.getparams() + overlap_frames = max(int(TRANSCRIBE_OVERLAP_SECONDS * framerate), 0) + window_frames = max(int(TRANSCRIBE_WINDOW_SECONDS * framerate), 1) + last_window_start = session.user_window_start_frame.get(user_id) + if last_window_start is None: + next_start = 0 + else: + step_frames = max(window_frames - overlap_frames, 1) + next_start = max(last_window_start + step_frames, 0) + if total_frames - next_start < window_frames: + return None + wav_in.setpos(next_start) + frames = wav_in.readframes(window_frames) session.user_last_frame[user_id] = total_frames - chunk_start = last_frame / max(framerate, 1) + session.user_window_start_frame[user_id] = next_start + chunk_start = next_start / max(framerate, 1) + chunk_end = (next_start + window_frames) / max(framerate, 1) session.chunk_index += 1 chunk_file = capture_dir / f"chunk_{session.chunk_index:06d}_u{user_id}.wav" with wave.open(str(chunk_file), "wb") as wav_out: @@ -629,6 +689,8 @@ def capture_chunk_from_sink_audio( "chunk_id": session.chunk_index, "user_id": user_id, "start_offset": round(chunk_start, 3), + "end_offset": round(chunk_end, 3), + "capture_emitted_at": time.perf_counter(), "file_path": str(chunk_file), "queued": False, "transcribed": False, @@ -696,7 +758,24 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui session.chunk_queue.put_nowait(int(chunk["chunk_id"])) chunk["queued"] = True except asyncio.QueueFull: - logger.warning("transcribe_queue_backpressure guild_id=%s chunk_id=%s", guild_id, chunk["chunk_id"]) + dropped_chunk_id: int | None = None + try: + dropped_chunk_id = session.chunk_queue.get_nowait() + session.chunk_queue.task_done() + except asyncio.QueueEmpty: + pass + try: + session.chunk_queue.put_nowait(int(chunk["chunk_id"])) + chunk["queued"] = True + except asyncio.QueueFull: + pass + logger.warning( + "transcribe_queue_backpressure guild_id=%s chunk_id=%s dropped_chunk_id=%s queue_depth=%s", + guild_id, + chunk["chunk_id"], + dropped_chunk_id, + session.chunk_queue.qsize(), + ) logger.info( "transcribe_capture_flushed guild_id=%s capture=%s produced_chunks=%s queue_size=%s consented_count=%s", guild_id, @@ -717,17 +796,28 @@ def build_transcript_lines_for_chunk( user_id = int(chunk["user_id"]) speaker_name = resolve_display_name(guild, user_id, session.aliases_by_user) base_offset = float(chunk["start_offset"]) + emitted_at = float(chunk.get("capture_emitted_at") or 0.0) lines: list[dict[str, object]] = [] for utterance in utterances: phrase = str(utterance.get("text") or "").strip() if not phrase: continue + deduped_phrase = remove_overlap_duplicate_text(session.last_phrase_by_user.get(user_id), phrase) + if not deduped_phrase: + continue + session.last_phrase_by_user[user_id] = phrase rel_start = float(utterance.get("start") or 0.0) absolute_start = base_offset + rel_start stamp = slice_timestamp_label(session.started_at, absolute_start) + confidence = utterance.get("confidence") + latency_ms = max((time.perf_counter() - emitted_at) * 1000, 0.0) if emitted_at else 0.0 + confidence_str = f"{float(confidence):.2f}" if isinstance(confidence, (int, float)) else "n/a" lines.append({ "absolute_start": absolute_start, - "line": f"[{stamp}] [{speaker_name}] {phrase}", + "line": ( + f"[{stamp}] [{speaker_name}] {deduped_phrase} " + f"(conf={confidence_str}, latency={latency_ms:.0f}ms)" + ), }) lines.sort(key=lambda item: item["absolute_start"]) return lines @@ -789,9 +879,9 @@ async def transcription_worker_loop(guild_id: int): async def transcription_live_loop(guild_id: int): - logger.info("transcribe_live_loop_started guild_id=%s interval_seconds=%s", guild_id, TRANSCRIBE_SLICE_SECONDS) + logger.info("transcribe_live_loop_started guild_id=%s interval_seconds=%s", guild_id, TRANSCRIBE_EMIT_INTERVAL_SECONDS) while True: - await asyncio.sleep(TRANSCRIBE_SLICE_SECONDS) + await asyncio.sleep(TRANSCRIBE_EMIT_INTERVAL_SECONDS) session = get_transcription_session(guild_id) if session is None or session.closed: logger.info("transcribe_live_loop_exit guild_id=%s", guild_id) From 8188276ff4e4be86fe2cc8994ab6552ae2b243ee Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 15:30:18 -0500 Subject: [PATCH 45/54] Improve live transcription chunk diagnostics and key handling --- poopbot.py | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/poopbot.py b/poopbot.py index ae92f82..11fad4e 100644 --- a/poopbot.py +++ b/poopbot.py @@ -151,7 +151,7 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic FETCH_TRACK_INFO_TIMEOUT_MESSAGE = ( "Timed out while fetching track info from YouTube. Please try again in a moment." ) -TRANSCRIBE_WINDOW_SECONDS = max(float(os.getenv("TRANSCRIBE_WINDOW_SECONDS", "2.0")), 0.5) +TRANSCRIBE_WINDOW_SECONDS = max(float(os.getenv("TRANSCRIBE_WINDOW_SECONDS", "0.5")), 0.5) TRANSCRIBE_OVERLAP_SECONDS = max(float(os.getenv("TRANSCRIBE_OVERLAP_SECONDS", "0.5")), 0.0) if TRANSCRIBE_OVERLAP_SECONDS >= TRANSCRIBE_WINDOW_SECONDS: TRANSCRIBE_OVERLAP_SECONDS = max(0.0, TRANSCRIBE_WINDOW_SECONDS - 0.1) @@ -656,9 +656,22 @@ def capture_chunk_from_sink_audio( return None if not isinstance(payload, (bytes, bytearray)): return None + logger.info( + "transcribe_chunk_debug user_id=%s payload_type=%s payload_len=%s head=%s", + user_id, + type(payload).__name__, + len(payload), + bytes(payload[:8]).hex() if len(payload) >= 8 else None, + ) try: wav_in = wave.open(io.BytesIO(payload), "rb") - except wave.Error: + except wave.Error as error: + logger.warning( + "transcribe_chunk_wave_open_failed user_id=%s payload_len=%s error=%s", + user_id, + len(payload), + error, + ) return None with wav_in: framerate = wav_in.getframerate() or 1 @@ -673,6 +686,14 @@ def capture_chunk_from_sink_audio( step_frames = max(window_frames - overlap_frames, 1) next_start = max(last_window_start + step_frames, 0) if total_frames - next_start < window_frames: + logger.info( + "transcribe_chunk_too_short user_id=%s total_frames=%s next_start=%s needed=%s framerate=%s", + user_id, + total_frames, + next_start, + window_frames, + framerate, + ) return None wav_in.setpos(next_start) frames = wav_in.readframes(window_frames) @@ -707,6 +728,13 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui if not isinstance(sink_audio_data, dict): return 0 + logger.info( + "transcribe_sink_keys key_types=%s keys=%s consented=%s", + [type(key).__name__ for key in sink_audio_data.keys()], + list(sink_audio_data.keys()), + sorted(session.consented_user_ids), + ) + guild = bot.get_guild(guild_id) voice_channel = guild.get_channel(session.voice_channel_id) if guild is not None else None voice_members = getattr(voice_channel, "members", []) if voice_channel is not None else [] @@ -748,9 +776,17 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui voice_member_count, ) for user_id, audio_obj in sink_audio_data.items(): - if not isinstance(user_id, int) or user_id not in session.consented_user_ids: + normalized_user_id = None + if isinstance(user_id, int): + normalized_user_id = user_id + elif isinstance(user_id, str) and user_id.isdigit(): + normalized_user_id = int(user_id) + elif hasattr(user_id, "id") and isinstance(getattr(user_id, "id"), int): + normalized_user_id = int(getattr(user_id, "id")) + + if normalized_user_id is None or normalized_user_id not in session.consented_user_ids: continue - chunk = capture_chunk_from_sink_audio(session, user_id, audio_obj, capture_dir) + chunk = capture_chunk_from_sink_audio(session, normalized_user_id, audio_obj, capture_dir) if chunk is None: continue produced += 1 From 0f8f14d13418abb2a978409425e91a50aa689985 Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 15:34:52 -0500 Subject: [PATCH 46/54] Handle raw PCM sink payloads in transcription chunks --- poopbot.py | 66 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/poopbot.py b/poopbot.py index 11fad4e..360bbb4 100644 --- a/poopbot.py +++ b/poopbot.py @@ -641,6 +641,10 @@ def capture_chunk_from_sink_audio( user_id: int, audio_obj: object, capture_dir: Path, + *, + sample_rate: int, + sample_width: int, + channels: int, ) -> dict[str, object] | None: file_obj = getattr(audio_obj, "file", None) if file_obj is None: @@ -666,17 +670,19 @@ def capture_chunk_from_sink_audio( try: wav_in = wave.open(io.BytesIO(payload), "rb") except wave.Error as error: - logger.warning( - "transcribe_chunk_wave_open_failed user_id=%s payload_len=%s error=%s", + logger.info( + "transcribe_chunk_wave_open_fallback_pcm user_id=%s payload_len=%s error=%s sample_rate=%s channels=%s sample_width=%s", user_id, len(payload), error, + sample_rate, + channels, + sample_width, ) - return None - with wav_in: - framerate = wav_in.getframerate() or 1 - total_frames = wav_in.getnframes() - params = wav_in.getparams() + frame_size = max(sample_width * channels, 1) + framerate = max(sample_rate, 1) + total_frames = len(payload) // frame_size + params = (channels, sample_width, framerate, 0, "NONE", "not compressed") overlap_frames = max(int(TRANSCRIBE_OVERLAP_SECONDS * framerate), 0) window_frames = max(int(TRANSCRIBE_WINDOW_SECONDS * framerate), 1) last_window_start = session.user_window_start_frame.get(user_id) @@ -695,8 +701,34 @@ def capture_chunk_from_sink_audio( framerate, ) return None - wav_in.setpos(next_start) - frames = wav_in.readframes(window_frames) + start_byte = next_start * frame_size + end_byte = (next_start + window_frames) * frame_size + frames = bytes(payload[start_byte:end_byte]) + else: + with wav_in: + framerate = wav_in.getframerate() or 1 + total_frames = wav_in.getnframes() + params = wav_in.getparams() + overlap_frames = max(int(TRANSCRIBE_OVERLAP_SECONDS * framerate), 0) + window_frames = max(int(TRANSCRIBE_WINDOW_SECONDS * framerate), 1) + last_window_start = session.user_window_start_frame.get(user_id) + if last_window_start is None: + next_start = 0 + else: + step_frames = max(window_frames - overlap_frames, 1) + next_start = max(last_window_start + step_frames, 0) + if total_frames - next_start < window_frames: + logger.info( + "transcribe_chunk_too_short user_id=%s total_frames=%s next_start=%s needed=%s framerate=%s", + user_id, + total_frames, + next_start, + window_frames, + framerate, + ) + return None + wav_in.setpos(next_start) + frames = wav_in.readframes(window_frames) session.user_last_frame[user_id] = total_frames session.user_window_start_frame[user_id] = next_start chunk_start = next_start / max(framerate, 1) @@ -736,6 +768,12 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui ) guild = bot.get_guild(guild_id) + vc = guild.voice_client if guild is not None else None + decoder = getattr(vc, "decoder", None) + sample_rate = int(getattr(decoder, "SAMPLING_RATE", 48000) or 48000) + channels = int(getattr(decoder, "CHANNELS", 2) or 2) + sample_size = int(getattr(decoder, "SAMPLE_SIZE", channels * 2) or channels * 2) + sample_width = max(sample_size // max(channels, 1), 1) voice_channel = guild.get_channel(session.voice_channel_id) if guild is not None else None voice_members = getattr(voice_channel, "members", []) if voice_channel is not None else [] voice_member_ids = {member.id for member in voice_members if isinstance(member, discord.Member) and not member.bot} @@ -786,7 +824,15 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui if normalized_user_id is None or normalized_user_id not in session.consented_user_ids: continue - chunk = capture_chunk_from_sink_audio(session, normalized_user_id, audio_obj, capture_dir) + chunk = capture_chunk_from_sink_audio( + session, + normalized_user_id, + audio_obj, + capture_dir, + sample_rate=sample_rate, + sample_width=sample_width, + channels=channels, + ) if chunk is None: continue produced += 1 From f9d6e9a56727a7393eca7d2ccbdab838b7cb596d Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 16:27:50 -0500 Subject: [PATCH 47/54] Remove transcript debug suffix and handle expired gendsession interactions --- poopbot.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/poopbot.py b/poopbot.py index 360bbb4..80726f2 100644 --- a/poopbot.py +++ b/poopbot.py @@ -891,15 +891,9 @@ def build_transcript_lines_for_chunk( rel_start = float(utterance.get("start") or 0.0) absolute_start = base_offset + rel_start stamp = slice_timestamp_label(session.started_at, absolute_start) - confidence = utterance.get("confidence") - latency_ms = max((time.perf_counter() - emitted_at) * 1000, 0.0) if emitted_at else 0.0 - confidence_str = f"{float(confidence):.2f}" if isinstance(confidence, (int, float)) else "n/a" lines.append({ "absolute_start": absolute_start, - "line": ( - f"[{stamp}] [{speaker_name}] {deduped_phrase} " - f"(conf={confidence_str}, latency={latency_ms:.0f}ms)" - ), + "line": f"[{stamp}] [{speaker_name}] {deduped_phrase}", }) lines.sort(key=lambda item: item["absolute_start"]) return lines @@ -2784,7 +2778,12 @@ async def gendsession(interaction: discord.Interaction): ) return vc = interaction.guild.voice_client - await interaction.response.defer(ephemeral=True) + response_open = True + try: + await interaction.response.defer(ephemeral=True) + except discord.NotFound: + response_open = False + logger.warning("gendsession_interaction_expired guild_id=%s", interaction.guild.id) try: await finalize_transcription_session(interaction.guild, session, vc) except asyncio.TimeoutError: @@ -2799,7 +2798,8 @@ async def gendsession(interaction: discord.Interaction): except (discord.HTTPException, discord.ClientException): pass remove_transcription_session(interaction.guild.id) - await interaction.followup.send("βœ… Transcription capture ended. Final transcript generated.", ephemeral=True) + if response_open: + await interaction.followup.send("βœ… Transcription capture ended. Final transcript generated.", ephemeral=True) @bot.slash_command(name="gokibothelp", description="Show all available GokiBot commands.") From 7b81f26f226695fde11a67537cb53f2851b2970c Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 17:09:08 -0500 Subject: [PATCH 48/54] Refine transcription consent flow to DM opt-in only --- poopbot.py | 314 ++++++++++++++++++++++------------------------------- 1 file changed, 127 insertions(+), 187 deletions(-) diff --git a/poopbot.py b/poopbot.py index 80726f2..2697b09 100644 --- a/poopbot.py +++ b/poopbot.py @@ -369,12 +369,10 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.no_consented_users_warning_interval = 10 self.latest_consent_message_id: int | None = None self.latest_consent_jump_url: str | None = None - self.last_unconsented_reminder_at: datetime | None = None - self.unconsented_reminder_interval_seconds = 45 transcription_sessions: dict[int, GuildTranscriptionSession] = {} -transcription_consent_prompts: dict[int, tuple[int, int]] = {} +transcription_consent_prompts: dict[int, tuple[int, int, int]] = {} def get_music_state(guild_id: int) -> GuildMusicState: state = music_states.get(guild_id) if state is None: @@ -389,7 +387,7 @@ def remove_transcription_session(guild_id: int): return stale_prompt_ids = [ message_id - for message_id, (prompt_guild_id, prompt_thread_id) in transcription_consent_prompts.items() + for message_id, (prompt_guild_id, prompt_thread_id, _prompt_user_id) in transcription_consent_prompts.items() if prompt_guild_id == guild_id and prompt_thread_id == session.transcript_thread_id ] for message_id in stale_prompt_ids: @@ -547,37 +545,27 @@ async def prompt_transcription_consent( transcript_thread.id, ) return - mentions = " ".join(member.mention for member in non_consented) - consent_message = await transcript_thread.send(f"{mentions}\nPreparing consent instructions…") - await consent_message.edit( - content=( - f"{mentions}\n" - f"React with {TRANSCRIBE_CONSENT_EMOJI} on this exact message to opt into transcription for this session.\n" - f"Required reaction location: {transcript_thread.mention} β†’ {consent_message.jump_url}\n" - "Only users who react on this message are transcribed for this session." - ) - ) - try: - await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) - except (discord.Forbidden, discord.HTTPException): - logger.exception( - "transcribe_consent_reaction_failed guild_id=%s thread_id=%s message_id=%s", - guild.id, - transcript_thread.id, - consent_message.id, - ) - await transcript_thread.send( - "⚠️ I couldn't add the consent reaction automatically. " - f"Please grant **Add Reactions** in this text channel, then manually react with {TRANSCRIBE_CONSENT_EMOJI} " - "to the consent message to opt in." - ) - session.latest_consent_message_id = consent_message.id - session.latest_consent_jump_url = consent_message.jump_url - transcription_consent_prompts[consent_message.id] = (guild.id, session.transcript_thread_id) for member in non_consented: + try: + dm_channel = member.dm_channel or await member.create_dm() + consent_message = await dm_channel.send( + "Transcription is running in this channel, Are you ok with being transcribed for this session only?" + ) + await consent_message.add_reaction(TRANSCRIBE_CONSENT_EMOJI) + except (discord.Forbidden, discord.HTTPException): + logger.exception( + "transcribe_consent_dm_failed guild_id=%s thread_id=%s user_id=%s", + guild.id, + transcript_thread.id, + member.id, + ) + continue session.prompted_user_ids.add(member.id) + session.latest_consent_message_id = consent_message.id + session.latest_consent_jump_url = consent_message.jump_url + transcription_consent_prompts[consent_message.id] = (guild.id, session.transcript_thread_id, member.id) logger.info( - "transcribe_consent_prompt_thread_sent guild_id=%s thread_id=%s user_id=%s prompt_reason=session_new", + "transcribe_consent_prompt_dm_sent guild_id=%s thread_id=%s user_id=%s prompt_reason=session_new", guild.id, transcript_thread.id, member.id, @@ -754,6 +742,93 @@ def capture_chunk_from_sink_audio( return chunk +def current_total_frames_for_user_audio(audio_obj: object, *, sample_rate: int, sample_width: int, channels: int) -> int | None: + file_obj = getattr(audio_obj, "file", None) + if file_obj is None: + return None + try: + prev_pos = file_obj.tell() if hasattr(file_obj, "tell") else None + if hasattr(file_obj, "seek"): + file_obj.seek(0) + payload = file_obj.read() if hasattr(file_obj, "read") else None + if prev_pos is not None and hasattr(file_obj, "seek"): + file_obj.seek(prev_pos) + except Exception: + return None + if not isinstance(payload, (bytes, bytearray)): + return None + try: + with wave.open(io.BytesIO(payload), "rb") as wav_in: + return int(wav_in.getnframes() or 0) + except wave.Error: + frame_size = max(sample_width * channels, 1) + return len(payload) // frame_size + + +def initialize_user_capture_cursor(session: GuildTranscriptionSession, guild: discord.Guild, user_id: int) -> None: + sink = session.active_sink + sink_audio_data = getattr(sink, "audio_data", None) + if not isinstance(sink_audio_data, dict): + return + vc = guild.voice_client + decoder = getattr(vc, "decoder", None) if vc is not None else None + sample_rate = int(getattr(decoder, "SAMPLING_RATE", 48000) or 48000) + channels = int(getattr(decoder, "CHANNELS", 2) or 2) + sample_size = int(getattr(decoder, "SAMPLE_SIZE", channels * 2) or channels * 2) + sample_width = max(sample_size // max(channels, 1), 1) + for source_user_id, audio_obj in sink_audio_data.items(): + normalized_user_id = None + if isinstance(source_user_id, int): + normalized_user_id = source_user_id + elif isinstance(source_user_id, str) and source_user_id.isdigit(): + normalized_user_id = int(source_user_id) + elif hasattr(source_user_id, "id") and isinstance(getattr(source_user_id, "id"), int): + normalized_user_id = int(getattr(source_user_id, "id")) + if normalized_user_id != user_id: + continue + total_frames = current_total_frames_for_user_audio( + audio_obj, + sample_rate=sample_rate, + sample_width=sample_width, + channels=channels, + ) + if total_frames is None: + return + session.user_last_frame[user_id] = total_frames + session.user_window_start_frame[user_id] = total_frames + return + + +async def apply_transcription_consent( + *, + guild: discord.Guild, + session: GuildTranscriptionSession, + member: discord.Member, + message_id: int, + channel_id: int, +) -> None: + clean_name = normalize_transcript_display_name(member.display_name) + session.consented_user_ids.add(member.id) + if clean_name: + session.aliases_by_user[member.id] = clean_name + initialize_user_capture_cursor(session, guild, member.id) + logger.info( + "transcribe_consent_reaction_accepted guild_id=%s message_id=%s channel_id=%s user_id=%s consented_count=%s", + guild.id, + message_id, + channel_id, + member.id, + len(session.consented_user_ids), + ) + thread = find_active_transcription_thread(guild, session) + if thread is not None: + try: + await thread.add_user(member) + except (discord.Forbidden, discord.HTTPException): + pass + await thread.send(f"Activated transcription for {member.mention}") + + async def flush_active_recording_buffers(session: GuildTranscriptionSession, guild_id: int) -> int: sink = session.active_sink sink_audio_data = getattr(sink, "audio_data", None) @@ -775,27 +850,6 @@ async def flush_active_recording_buffers(session: GuildTranscriptionSession, gui sample_size = int(getattr(decoder, "SAMPLE_SIZE", channels * 2) or channels * 2) sample_width = max(sample_size // max(channels, 1), 1) voice_channel = guild.get_channel(session.voice_channel_id) if guild is not None else None - voice_members = getattr(voice_channel, "members", []) if voice_channel is not None else [] - voice_member_ids = {member.id for member in voice_members if isinstance(member, discord.Member) and not member.bot} - active_speaker_ids = {user_id for user_id in sink_audio_data if isinstance(user_id, int) and user_id in voice_member_ids} - active_unconsented_ids = sorted(user_id for user_id in active_speaker_ids if user_id not in session.consented_user_ids) - if active_unconsented_ids and guild is not None: - now = datetime.now(timezone.utc) - last_sent = session.last_unconsented_reminder_at - min_interval = timedelta(seconds=max(1, session.unconsented_reminder_interval_seconds)) - if last_sent is None or (now - last_sent) >= min_interval: - thread = find_active_transcription_thread(guild, session) - if thread is not None: - speaker_mentions = " ".join(f"<@{user_id}>" for user_id in active_unconsented_ids) - consent_target = session.latest_consent_jump_url or "(consent prompt not posted yet)" - await thread.send( - ( - f"⚠️ Active speakers detected without consent: {speaker_mentions}.\n" - f"To be included in this session, react with {TRANSCRIBE_CONSENT_EMOJI} to this exact message: {consent_target}\n" - "Only reactions on that exact consent message opt users into transcription." - ) - ) - session.last_unconsented_reminder_at = now session.capture_index += 1 capture_dir = session.temp_dir / f"capture_{session.capture_index:05d}" @@ -1955,16 +2009,16 @@ async def on_voice_state_update(member: discord.Member, before: discord.VoiceSta async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): if payload.user_id == bot.user.id: return - if payload.guild_id is None: - return emoji = str(payload.emoji) consent_prompt = transcription_consent_prompts.get(payload.message_id) - if emoji == TRANSCRIBE_CONSENT_EMOJI: - session = get_transcription_session(payload.guild_id) - consent_via_prompt = consent_prompt is not None + if emoji == TRANSCRIBE_CONSENT_EMOJI and consent_prompt is not None: + prompt_guild_id, _prompt_thread_id, prompt_user_id = consent_prompt + if payload.user_id != prompt_user_id: + return + session = get_transcription_session(prompt_guild_id) logger.info( "transcribe_consent_reaction_received guild_id=%s message_id=%s channel_id=%s user_id=%s session_present=%s", - payload.guild_id, + prompt_guild_id, payload.message_id, payload.channel_id, payload.user_id, @@ -1973,7 +2027,7 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): if session is None or session.closed: logger.info( "transcribe_consent_reaction_rejected_session_missing_or_closed guild_id=%s message_id=%s channel_id=%s user_id=%s session_present=%s session_closed=%s", - payload.guild_id, + prompt_guild_id, payload.message_id, payload.channel_id, payload.user_id, @@ -1981,27 +2035,12 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): session.closed if session is not None else None, ) return - if consent_via_prompt: - guild_id, _ = consent_prompt - if guild_id != payload.guild_id: - return - elif payload.channel_id != session.transcript_thread_id: - logger.info( - "transcribe_consent_reaction_rejected_thread_mismatch guild_id=%s message_id=%s channel_id=%s user_id=%s consent_via_prompt=%s expected_thread_id=%s", - payload.guild_id, - payload.message_id, - payload.channel_id, - payload.user_id, - consent_via_prompt, - session.transcript_thread_id, - ) - return - guild = bot.get_guild(payload.guild_id) + guild = bot.get_guild(prompt_guild_id) member = guild.get_member(payload.user_id) if guild is not None else None - if member is None or member.bot: + if guild is None or member is None or member.bot: logger.info( "transcribe_consent_reaction_rejected_member_invalid guild_id=%s message_id=%s channel_id=%s user_id=%s guild_present=%s member_present=%s member_is_bot=%s", - payload.guild_id, + prompt_guild_id, payload.message_id, payload.channel_id, payload.user_id, @@ -2010,25 +2049,15 @@ async def on_raw_reaction_add(payload: discord.RawReactionActionEvent): member.bot if member is not None else None, ) return - clean_name = normalize_transcript_display_name(member.display_name) - session.consented_user_ids.add(payload.user_id) - logger.info( - "transcribe_consent_reaction_accepted guild_id=%s message_id=%s channel_id=%s user_id=%s consented_count=%s", - payload.guild_id, - payload.message_id, - payload.channel_id, - payload.user_id, - len(session.consented_user_ids), + await apply_transcription_consent( + guild=guild, + session=session, + member=member, + message_id=payload.message_id, + channel_id=payload.channel_id, ) - if clean_name: - session.aliases_by_user[payload.user_id] = clean_name - thread = find_active_transcription_thread(guild, session) - if thread is not None: - try: - await thread.add_user(member) - except (discord.Forbidden, discord.HTTPException): - pass - await thread.send(f"{member.mention} included for this session.") + return + if payload.guild_id is None: return active_message_id = gget(payload.guild_id, "active_message_id") if not active_message_id or str(payload.message_id) != active_message_id: @@ -2558,12 +2587,6 @@ async def gtranscribe(interaction: discord.Interaction): (time.monotonic() - engine_init_started) * 1000, ) await interaction.response.defer(ephemeral=True) - logger.info( - "transcribe_start_intents guild_id=%s members=%s voice_states=%s", - interaction.guild.id, - bot.intents.members, - bot.intents.voice_states, - ) start_label = datetime.now(LOCAL_TZ).strftime("%Y-%m-%d %H:%M") try: @@ -2581,14 +2604,11 @@ async def gtranscribe(interaction: discord.Interaction): await transcript_thread.send( ( f"Transcription started for {voice_channel.mention}.\n" - f"React with {TRANSCRIBE_CONSENT_EMOJI} on the consent message to be included in this session.\n" - "React on the consent message in the transcript thread, not on the slash-command response.\n" + "I will DM each participant a consent prompt for this session.\n" "Use /gsetname to set your name for this session." ) ) - - vc = interaction.guild.voice_client connected_here = False if vc is not None and vc.channel != voice_channel: @@ -2630,28 +2650,8 @@ async def gtranscribe(interaction: discord.Interaction): session = GuildTranscriptionSession(interaction.guild.id, voice_channel.id, transcript_thread.id) session.engine_name = engine_name session.engine_instance = engine_instance - if isinstance(interaction.user, discord.Member): - session.consented_user_ids.add(interaction.user.id) - clean_name = normalize_transcript_display_name(interaction.user.display_name) - if clean_name: - session.aliases_by_user[interaction.user.id] = clean_name transcription_sessions[interaction.guild.id] = session - await transcript_thread.send( - f"ℹ️ Auto-consented command invoker {interaction.user.mention} for this session. " - "Everyone else must react on the consent message." - ) await sync_voice_channel_members_for_transcription(interaction.guild, voice_channel, session, transcript_thread) - startup_members = [member for member in voice_channel.members if not member.bot] - startup_consented = [member.mention for member in startup_members if member.id in session.consented_user_ids] - startup_unconsented = [member.mention for member in startup_members if member.id not in session.consented_user_ids] - await transcript_thread.send( - "\n".join([ - "πŸ“‹ Startup consent snapshot:", - f"- Consented: {', '.join(startup_consented) if startup_consented else '(none)'}", - f"- Not consented: {', '.join(startup_unconsented) if startup_unconsented else '(none)'}", - f"- Consent message: {session.latest_consent_jump_url or '(not available yet)'}", - ]) - ) logger.info( "transcribe_session_initialized guild_id=%s voice_channel_id=%s thread_id=%s consented_users=%s", interaction.guild.id, @@ -2697,73 +2697,13 @@ async def _capture_finished(sink: object, channel: object, *_: object): await interaction.followup.send( ( f"πŸŽ™οΈ Transcription capture started in {voice_channel.mention}. " - "I auto-consented you for this session; other users must react on the consent message in the transcript thread. " + "I will DM participants for session consent. " "Live posts may lag; a complete transcript is guaranteed when `/gendsession` is used." ), ephemeral=True, ) -@discord.guild_only() -@bot.slash_command(name="gtranscribe_status", description="Debug current transcription consent state") -async def gtranscribe_status(interaction: discord.Interaction): - if interaction.guild is None: - await interaction.response.send_message("This command only works in a server.", ephemeral=True) - return - session = get_transcription_session(interaction.guild.id) - if session is None or session.closed: - await interaction.response.send_message("No active transcription session in this server.", ephemeral=True) - return - voice_channel = interaction.guild.get_channel(session.voice_channel_id) - if not isinstance(voice_channel, discord.VoiceChannel): - await interaction.response.send_message( - f"Active session found, but voice channel `{session.voice_channel_id}` is unavailable.", - ephemeral=True, - ) - return - consented_ids = sorted(session.consented_user_ids) - lines = [ - f"Thread: <#{session.transcript_thread_id}>", - f"Consent message jump URL: {session.latest_consent_jump_url or '(not available yet)'}", - f"Consented user IDs: `{consented_ids}`", - "Current voice members:", - ] - members = [member for member in voice_channel.members if not member.bot] - if not members: - lines.append("- *(none)*") - for member in members: - prompted = "yes" if member.id in session.prompted_user_ids else "no" - consented = "yes" if member.id in session.consented_user_ids else "no" - lines.append( - f"- {member.mention} (`{member.id}`): prompted={prompted}, consented={consented}" - ) - await interaction.response.send_message("\n".join(lines), ephemeral=True) - - -@discord.guild_only() -@bot.slash_command(name="gsetname", description="Set your transcription display name for the current session") -async def gsetname( - interaction: discord.Interaction, - name: discord.Option(str, "Display name to use in transcripts"), -): - if interaction.guild is None: - await interaction.response.send_message("This command only works in a server.", ephemeral=True) - return - clean_name = normalize_transcript_display_name(name) - if not clean_name: - await interaction.response.send_message("Display name cannot be empty.", ephemeral=True) - return - session = get_transcription_session(interaction.guild.id) - if session is None: - await interaction.response.send_message("No active transcription session in this server.", ephemeral=True) - return - session.aliases_by_user[interaction.user.id] = clean_name - await interaction.response.send_message( - f"Saved transcript display name: **{clean_name}**.", - ephemeral=True, - ) - - @discord.guild_only() @bot.slash_command(name="gendsession", description="Stop capture, finalize deferred transcription, and disconnect from voice.") async def gendsession(interaction: discord.Interaction): From 1f44af71259df28e84575036f20535cb9514b157 Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 24 Feb 2026 17:13:46 -0500 Subject: [PATCH 49/54] Improve transcription accuracy with English bias and fallback --- poopbot.py | 61 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/poopbot.py b/poopbot.py index 2697b09..c527341 100644 --- a/poopbot.py +++ b/poopbot.py @@ -165,6 +165,9 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic TRANSCRIBE_MODEL_SIZE = (os.getenv("TRANSCRIBE_MODEL_SIZE") or os.getenv("WHISPER_MODEL") or "base").strip().lower() or "base" if TRANSCRIBE_MODEL_SIZE not in {"tiny", "base", "small"}: TRANSCRIBE_MODEL_SIZE = "base" +TRANSCRIBE_LANGUAGE = (os.getenv("TRANSCRIBE_LANGUAGE") or "en").strip().lower() or "en" +TRANSCRIBE_FALLBACK_AVG_LOGPROB = float(os.getenv("TRANSCRIBE_FALLBACK_AVG_LOGPROB", "-1.2")) +TRANSCRIBE_BEAM_SIZE = max(int(os.getenv("TRANSCRIBE_BEAM_SIZE", "5")), 1) # ========================= # MESSAGES # ========================= @@ -428,20 +431,54 @@ def get_whisper_transcriber() -> tuple[str | None, object | None]: def transcribe_audio_file(engine_name: str, engine: object, file_path: Path) -> list[dict[str, object]]: utterances: list[dict[str, object]] = [] if engine_name == "faster_whisper": - segments, _ = engine.transcribe(str(file_path), vad_filter=True) - for seg in segments: - phrase = (seg.text or "").strip() - if not phrase: - continue - confidence = getattr(seg, "avg_logprob", None) - utterances.append({ - "start": float(getattr(seg, "start", 0.0) or 0.0), - "text": phrase, - "confidence": float(confidence) if isinstance(confidence, (int, float)) else None, - }) + def _collect_segments(segments_iterable: object) -> list[dict[str, object]]: + collected: list[dict[str, object]] = [] + for seg in segments_iterable: + phrase = (seg.text or "").strip() + if not phrase: + continue + confidence = getattr(seg, "avg_logprob", None) + collected.append({ + "start": float(getattr(seg, "start", 0.0) or 0.0), + "text": phrase, + "confidence": float(confidence) if isinstance(confidence, (int, float)) else None, + }) + return collected + transcribe_kwargs = { + "vad_filter": True, + "task": "transcribe", + "condition_on_previous_text": True, + "beam_size": TRANSCRIBE_BEAM_SIZE, + "vad_parameters": {"min_silence_duration_ms": 500}, + } + segments, info = engine.transcribe(str(file_path), language=TRANSCRIBE_LANGUAGE, **transcribe_kwargs) + utterances = _collect_segments(segments) + confidence_values = [item["confidence"] for item in utterances if isinstance(item.get("confidence"), float)] + avg_logprob = (sum(confidence_values) / len(confidence_values)) if confidence_values else None + if avg_logprob is not None and avg_logprob < TRANSCRIBE_FALLBACK_AVG_LOGPROB: + logger.info( + "transcribe_fallback_to_auto_language file=%s avg_logprob=%.3f threshold=%.3f detected_language=%s", + file_path, + avg_logprob, + TRANSCRIBE_FALLBACK_AVG_LOGPROB, + getattr(info, "language", None), + ) + retry_segments, retry_info = engine.transcribe(str(file_path), language=None, **transcribe_kwargs) + retry_utterances = _collect_segments(retry_segments) + retry_confidence_values = [item["confidence"] for item in retry_utterances if isinstance(item.get("confidence"), float)] + retry_avg_logprob = (sum(retry_confidence_values) / len(retry_confidence_values)) if retry_confidence_values else None + if retry_avg_logprob is not None and (avg_logprob is None or retry_avg_logprob > avg_logprob): + logger.info( + "transcribe_fallback_selected_auto_language file=%s previous_avg=%.3f fallback_avg=%.3f fallback_language=%s", + file_path, + avg_logprob, + retry_avg_logprob, + getattr(retry_info, "language", None), + ) + return retry_utterances return utterances if engine_name == "whisper": - result = engine.transcribe(str(file_path), fp16=False) + result = engine.transcribe(str(file_path), fp16=False, language=TRANSCRIBE_LANGUAGE, task="transcribe") segments = result.get("segments") if isinstance(result, dict) else None if isinstance(segments, list): for seg in segments: From ea7a2f8e1cdb816ebf2e8145b1ba76d6cb11805e Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 25 Feb 2026 23:53:07 -0500 Subject: [PATCH 50/54] Order live transcription posts by timestamp --- poopbot.py | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/poopbot.py b/poopbot.py index c527341..d2973c7 100644 --- a/poopbot.py +++ b/poopbot.py @@ -363,7 +363,8 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.chunk_meta: list[dict[str, object]] = [] self.chunk_meta_by_id: dict[int, dict[str, object]] = {} self.chunk_transcripts: dict[int, list[dict[str, object]]] = {} - self.pending_live_lines: list[str] = [] + self.pending_live_entries: list[dict[str, object]] = [] + self.pending_live_lock = asyncio.Lock() self.last_phrase_by_user: dict[int, str] = {} self.user_last_frame: dict[int, int] = {} self.user_window_start_frame: dict[int, int] = {} @@ -984,29 +985,37 @@ def build_transcript_lines_for_chunk( stamp = slice_timestamp_label(session.started_at, absolute_start) lines.append({ "absolute_start": absolute_start, + "chunk_id": int(chunk["chunk_id"]), "line": f"[{stamp}] [{speaker_name}] {deduped_phrase}", }) lines.sort(key=lambda item: item["absolute_start"]) return lines -async def try_post_live_lines(guild: discord.Guild | None, session: GuildTranscriptionSession, lines: list[str]) -> bool: - if not lines: +async def try_post_live_lines( + guild: discord.Guild | None, + session: GuildTranscriptionSession, + entries: list[dict[str, object]], +) -> bool: + if not entries: return True if guild is None: - session.pending_live_lines.extend(lines) + async with session.pending_live_lock: + session.pending_live_entries.extend(entries) return False thread = find_active_transcription_thread(guild, session) if thread is None: - session.pending_live_lines.extend(lines) + async with session.pending_live_lock: + session.pending_live_entries.extend(entries) return False try: - for line in lines: - await thread.send(line) + for entry in entries: + await thread.send(str(entry["line"])) return True except Exception: logger.exception("transcribe_live_post_failed guild_id=%s", session.guild_id) - session.pending_live_lines.extend(lines) + async with session.pending_live_lock: + session.pending_live_entries.extend(entries) return False @@ -1038,7 +1047,9 @@ async def transcription_worker_loop(guild_id: int): lines = build_transcript_lines_for_chunk(bot.get_guild(guild_id), session, chunk, utterances) session.chunk_transcripts[chunk_id] = lines chunk["transcribed"] = True - await try_post_live_lines(bot.get_guild(guild_id), session, [item["line"] for item in lines]) + if lines: + async with session.pending_live_lock: + session.pending_live_entries.extend(lines) except Exception: logger.exception("transcribe_chunk_failed guild_id=%s chunk_id=%s", guild_id, chunk_id) finally: @@ -1058,10 +1069,22 @@ async def transcription_live_loop(guild_id: int): if vc is None or not vc.is_connected() or not getattr(vc, "recording", False): continue await flush_active_recording_buffers(session, guild_id) - if session.pending_live_lines: - pending = list(session.pending_live_lines) - session.pending_live_lines.clear() - await try_post_live_lines(guild, session, pending) + emit_cutoff = max( + 0.0, + (datetime.now(timezone.utc) - session.started_at).total_seconds() - TRANSCRIBE_EMIT_INTERVAL_SECONDS, + ) + pending_to_send: list[dict[str, object]] = [] + async with session.pending_live_lock: + still_pending: list[dict[str, object]] = [] + for entry in session.pending_live_entries: + if float(entry.get("absolute_start") or 0.0) <= emit_cutoff: + pending_to_send.append(entry) + else: + still_pending.append(entry) + session.pending_live_entries = still_pending + if pending_to_send: + pending_to_send.sort(key=lambda item: (float(item.get("absolute_start") or 0.0), int(item.get("chunk_id") or 0))) + await try_post_live_lines(guild, session, pending_to_send) async def finalize_transcription_session(guild: discord.Guild, session: GuildTranscriptionSession, vc: discord.VoiceClient | None) -> Path: From 19f30be9aaa7a261468ab3347ecae81ef70beefe Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 26 Feb 2026 00:19:20 -0500 Subject: [PATCH 51/54] Refactor transcription inference into worker threads --- poopbot.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/poopbot.py b/poopbot.py index d2973c7..b07ebf9 100644 --- a/poopbot.py +++ b/poopbot.py @@ -360,6 +360,7 @@ def __init__(self, guild_id: int, voice_channel_id: int, transcript_thread_id: i self.capture_index = 0 self.chunk_index = 0 self.chunk_queue: asyncio.Queue[int] = asyncio.Queue(maxsize=TRANSCRIBE_MAX_QUEUE_DEPTH) + self.transcription_inference_lock = asyncio.Semaphore(1) self.chunk_meta: list[dict[str, object]] = [] self.chunk_meta_by_id: dict[int, dict[str, object]] = {} self.chunk_transcripts: dict[int, list[dict[str, object]]] = {} @@ -1043,7 +1044,31 @@ async def transcription_worker_loop(guild_id: int): session.chunk_queue.task_done() continue try: - utterances = transcribe_audio_file(engine_name, engine, Path(str(chunk["file_path"]))) + inference_queue_size_start = session.chunk_queue.qsize() + inference_started_at = time.perf_counter() + logger.info( + "transcribe_inference_started guild_id=%s chunk_id=%s queue_size=%s", + guild_id, + chunk_id, + inference_queue_size_start, + ) + async with session.transcription_inference_lock: + utterances = await asyncio.to_thread( + transcribe_audio_file, + engine_name, + engine, + Path(str(chunk["file_path"])), + ) + inference_duration_seconds = time.perf_counter() - inference_started_at + inference_queue_size_end = session.chunk_queue.qsize() + logger.info( + "transcribe_inference_finished guild_id=%s chunk_id=%s duration_seconds=%.3f queue_size=%s backlog_growth=%s", + guild_id, + chunk_id, + inference_duration_seconds, + inference_queue_size_end, + inference_queue_size_end - inference_queue_size_start, + ) lines = build_transcript_lines_for_chunk(bot.get_guild(guild_id), session, chunk, utterances) session.chunk_transcripts[chunk_id] = lines chunk["transcribed"] = True @@ -1106,9 +1131,34 @@ async def finalize_transcription_session(guild: discord.Guild, session: GuildTra if chunk.get("transcribed"): continue try: - utterances = transcribe_audio_file(engine_name, engine, Path(str(chunk["file_path"]))) + chunk_id = int(chunk["chunk_id"]) + inference_queue_size_start = session.chunk_queue.qsize() + inference_started_at = time.perf_counter() + logger.info( + "transcribe_final_inference_started guild_id=%s chunk_id=%s queue_size=%s", + guild.id, + chunk_id, + inference_queue_size_start, + ) + async with session.transcription_inference_lock: + utterances = await asyncio.to_thread( + transcribe_audio_file, + engine_name, + engine, + Path(str(chunk["file_path"])), + ) + inference_duration_seconds = time.perf_counter() - inference_started_at + inference_queue_size_end = session.chunk_queue.qsize() + logger.info( + "transcribe_final_inference_finished guild_id=%s chunk_id=%s duration_seconds=%.3f queue_size=%s backlog_growth=%s", + guild.id, + chunk_id, + inference_duration_seconds, + inference_queue_size_end, + inference_queue_size_end - inference_queue_size_start, + ) lines = build_transcript_lines_for_chunk(guild, session, chunk, utterances) - session.chunk_transcripts[int(chunk["chunk_id"])] = lines + session.chunk_transcripts[chunk_id] = lines chunk["transcribed"] = True except Exception: logger.exception("transcribe_final_pass_failed guild_id=%s chunk_id=%s", guild.id, chunk.get("chunk_id")) From 79ed95f118ff561047fd35f5c77992b509e5a9e4 Mon Sep 17 00:00:00 2001 From: Gokias Date: Thu, 26 Feb 2026 00:27:15 -0500 Subject: [PATCH 52/54] Add recording health monitoring and teardown safeguards --- poopbot.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index b07ebf9..07f5cd1 100644 --- a/poopbot.py +++ b/poopbot.py @@ -161,6 +161,10 @@ def _loop_exception_handler(active_loop: asyncio.AbstractEventLoop, context: dic ) TRANSCRIBE_MAX_QUEUE_DEPTH = max(int(os.getenv("TRANSCRIBE_MAX_QUEUE_DEPTH", "128")), 1) TRANSCRIBE_MAX_FAILURES = max(int(os.getenv("TRANSCRIBE_MAX_FAILURES", "3")), 1) +TRANSCRIBE_RECORDING_FAILURE_GRACE_INTERVALS = max( + int(os.getenv("TRANSCRIBE_RECORDING_FAILURE_GRACE_INTERVALS", "2")), + 1, +) TRANSCRIBE_CONSENT_EMOJI = "βœ…" TRANSCRIBE_MODEL_SIZE = (os.getenv("TRANSCRIBE_MODEL_SIZE") or os.getenv("WHISPER_MODEL") or "base").strip().lower() or "base" if TRANSCRIBE_MODEL_SIZE not in {"tiny", "base", "small"}: @@ -1091,8 +1095,46 @@ async def transcription_live_loop(guild_id: int): return guild = bot.get_guild(guild_id) vc = guild.voice_client if guild is not None else None - if vc is None or not vc.is_connected() or not getattr(vc, "recording", False): + if vc is None or not vc.is_connected(): + continue + if not getattr(vc, "recording", False): + session.recording_failure_count += 1 + voice_state = { + "connected": vc.is_connected(), + "recording": bool(getattr(vc, "recording", False)), + "channel_id": getattr(getattr(vc, "channel", None), "id", None), + } + logger.warning( + "transcribe_recording_inactive guild_id=%s voice_state=%s recording_failure_count=%s queue_depth=%s grace_intervals=%s max_failures=%s", + guild_id, + voice_state, + session.recording_failure_count, + session.chunk_queue.qsize(), + TRANSCRIBE_RECORDING_FAILURE_GRACE_INTERVALS, + TRANSCRIBE_MAX_FAILURES, + ) + if session.recording_failure_count <= TRANSCRIBE_RECORDING_FAILURE_GRACE_INTERVALS: + continue + if session.recording_failure_count > TRANSCRIBE_MAX_FAILURES: + await teardown_transcription_session_for_recording_failure( + guild, + session, + ( + "voice_client_connected_but_not_recording " + f"after_consecutive_failures={session.recording_failure_count} " + f"queue_depth={session.chunk_queue.qsize()}" + ), + ) + return continue + if session.recording_failure_count > 0: + logger.info( + "transcribe_recording_recovered guild_id=%s recording_failure_count=%s queue_depth=%s", + guild_id, + session.recording_failure_count, + session.chunk_queue.qsize(), + ) + session.recording_failure_count = 0 await flush_active_recording_buffers(session, guild_id) emit_cutoff = max( 0.0, @@ -1186,6 +1228,17 @@ async def teardown_transcription_session_for_recording_failure( ): guild_id = session.guild_id session.closed = True + current_task = asyncio.current_task() + for task_name, task in (("loop_task", session.loop_task), ("worker_task", session.worker_task)): + if task is None or task.done() or task is current_task: + continue + task.cancel() + try: + await task + except asyncio.CancelledError: + logger.info("transcribe_teardown_task_cancelled guild_id=%s task=%s", guild_id, task_name) + except Exception: + logger.exception("transcribe_teardown_task_cancel_failed guild_id=%s task=%s", guild_id, task_name) if guild is not None: vc = guild.voice_client if vc is not None and vc.is_connected(): From 4873c99e6bb4509212cc7e52a5ea8ef2c5fc3f4f Mon Sep 17 00:00:00 2001 From: Gokias Date: Tue, 3 Mar 2026 23:48:58 -0500 Subject: [PATCH 53/54] Handle gplay voice reconnect/readiness before playback --- poopbot.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/poopbot.py b/poopbot.py index 07f5cd1..e2133a9 100644 --- a/poopbot.py +++ b/poopbot.py @@ -1587,6 +1587,31 @@ async def play_next_track(guild: discord.Guild): voice_client = guild.voice_client if voice_client is None: return + if not voice_client.is_connected(): + channel = getattr(voice_client, "channel", None) + if isinstance(channel, discord.VoiceChannel): + try: + await voice_client.disconnect(force=True) + except (discord.ClientException, discord.HTTPException): + pass + try: + voice_client = await channel.connect(timeout=15.0, reconnect=True) + except (discord.ClientException, discord.HTTPException, asyncio.TimeoutError): + logger.exception( + "music_voice_reconnect_failed guild_id=%s channel_id=%s state=%s", + guild.id, + channel.id, + describe_voice_client_state(voice_client), + ) + return + else: + return + + ready = await wait_for_voice_client_ready(voice_client, timeout_seconds=15.0) + if not ready: + logger.warning("music_voice_not_ready guild_id=%s state=%s", guild.id, describe_voice_client_state(voice_client)) + return + state = get_music_state(guild.id) async with state.lock: if voice_client.is_playing() or voice_client.is_paused(): @@ -1632,7 +1657,12 @@ def _after_playback(play_error: Exception | None): except Exception: logger.exception("music_next_track_start_failed context=%r", music_context) logger.info("music_play_start track=%s context=%r", next_track.title, music_context) - voice_client.play(ffmpeg_source, after=_after_playback) + try: + voice_client.play(ffmpeg_source, after=_after_playback) + except discord.ClientException: + logger.exception("music_play_start_failed context=%r", music_context) + await asyncio.sleep(0.5) + await play_next_track(guild) # ========================= # DATABASE HELPERS # ========================= From 02ef642b289d2792f91043d4b6b9e2bb9e51def8 Mon Sep 17 00:00:00 2001 From: Gokias Date: Wed, 4 Mar 2026 00:18:17 -0500 Subject: [PATCH 54/54] Pin voice dependencies and log runtime voice stack --- poopbot.py | 7 +++++++ requirements.txt | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/poopbot.py b/poopbot.py index e2133a9..dd2001a 100644 --- a/poopbot.py +++ b/poopbot.py @@ -22,6 +22,7 @@ from datetime import datetime, timezone, date, time as dtime, timedelta from pathlib import Path import time +import sys import discord from discord.ext import commands, tasks @@ -2988,6 +2989,12 @@ async def on_ready(): await post_button_for_guild(gid, cid) except (discord.Forbidden, discord.NotFound, discord.HTTPException): continue + logger.info( + "discord_runtime discord_version=%s python_version=%s opus_loaded=%s", + discord.__version__, + sys.version.split()[0], + discord.opus.is_loaded(), + ) logger.info("bot_ready user=%s user_id=%s", bot.user, bot.user.id) if not TOKEN or TOKEN == "PUT_TOKEN_HERE_FOR_TESTING": raise RuntimeError("Set DISCORD_TOKEN_POOPBOT env var or paste token into TOKEN.") diff --git a/requirements.txt b/requirements.txt index 7e26d20..890f002 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -py-cord[voice]>=2.6.1 +py-cord[voice]==2.6.1 python-dotenv>=1.2.1 -PyNaCl>=1.5.0 +PyNaCl==1.6.1 yt-dlp>=2024.0.0 faster-whisper