|
2 | 2 |
|
3 | 3 | import hashlib |
4 | 4 | import io |
| 5 | +import json |
5 | 6 | import logging |
6 | 7 | import os |
7 | 8 | import secrets |
|
16 | 17 | from litestar import WebSocket |
17 | 18 | from minio import Minio, S3Error |
18 | 19 | from minio.datatypes import Object as BlobStat |
| 20 | +from pydantic import ValidationError |
19 | 21 | from sqlalchemy import Engine |
20 | 22 | from sqlalchemy.exc import NoResultFound |
21 | 23 | from sqlalchemy.ext.asyncio import AsyncEngine |
22 | 24 |
|
23 | 25 | from masterbase.anomaly import DetectionState |
| 26 | +from masterbase.models import Analysis |
24 | 27 |
|
25 | 28 | logger = logging.getLogger(__name__) |
26 | 29 |
|
@@ -304,6 +307,131 @@ async def check_analyst(engine: AsyncEngine, steam_id: str) -> bool: |
304 | 307 | return analyst |
305 | 308 |
|
306 | 309 |
|
| 310 | +def get_uningested_demos(engine: Engine, limit: int) -> list[str]: |
| 311 | + """Get a list of uningested demos.""" |
| 312 | + sql = """ |
| 313 | + SELECT |
| 314 | + session_id |
| 315 | + FROM |
| 316 | + demo_sessions |
| 317 | + WHERE |
| 318 | + active = false |
| 319 | + AND open = false |
| 320 | + AND ingested = false |
| 321 | + AND demo_size > 0 |
| 322 | + AND blob_name IS NOT NULL |
| 323 | + ORDER BY |
| 324 | + created_at ASC |
| 325 | + LIMIT :limit; |
| 326 | + """ |
| 327 | + params = {"limit": limit} |
| 328 | + |
| 329 | + with engine.connect() as conn: |
| 330 | + result = conn.execute( |
| 331 | + sa.text(sql), |
| 332 | + params, |
| 333 | + ) |
| 334 | + |
| 335 | + data = result.all() |
| 336 | + uningested_demos = [row[0] for row in data] |
| 337 | + |
| 338 | + return uningested_demos |
| 339 | + |
| 340 | + |
| 341 | +def ingest_demo(minio_client: Minio, engine: Engine, session_id: str): |
| 342 | + """Ingest a demo analysis from an analysis client.""" |
| 343 | + blob_name = f"{session_id}.json" |
| 344 | + try: |
| 345 | + raw_data = minio_client.get_object("jsonblobs", blob_name).read() |
| 346 | + decoded_data = raw_data.decode("utf-8") |
| 347 | + json_data = json.JSONDecoder().decode(decoded_data) |
| 348 | + data = Analysis.parse_obj(json_data) |
| 349 | + except S3Error as err: |
| 350 | + if err.code == "NoSuchKey": |
| 351 | + return "no analysis data found." |
| 352 | + else: |
| 353 | + return "unknown S3 error while looking up analysis data." |
| 354 | + except ValidationError: |
| 355 | + return "malformed analysis data." |
| 356 | + |
| 357 | + # Data preprocessing |
| 358 | + algorithm_counts = {} |
| 359 | + for detection in data.detections: |
| 360 | + key = (detection.player, detection.algorithm) |
| 361 | + if key not in algorithm_counts: |
| 362 | + algorithm_counts[key] = 0 |
| 363 | + algorithm_counts[key] += 1 |
| 364 | + |
| 365 | + # ensure the demo session is not already ingested |
| 366 | + is_ingested_sql = "SELECT ingested, active, open FROM demo_sessions WHERE session_id = :session_id;" |
| 367 | + |
| 368 | + # Wipe existing analysis data |
| 369 | + # (we want to be able to reingest a demo if necessary by manually setting ingested = false) |
| 370 | + wipe_analysis_sql = "DELETE FROM analysis WHERE session_id = :session_id;" |
| 371 | + wipe_reviews_sql = "DELETE FROM reviews WHERE session_id = :session_id;" |
| 372 | + |
| 373 | + # Insert the analysis data |
| 374 | + insert_sql = """\ |
| 375 | + INSERT INTO analysis ( |
| 376 | + session_id, target_steam_id, algorithm_type, detection_count, created_at |
| 377 | + ) VALUES ( |
| 378 | + :session_id, :target_steam_id, :algorithm, :count, :created_at |
| 379 | + ); |
| 380 | + """ |
| 381 | + |
| 382 | + # Mark the demo as ingested |
| 383 | + mark_ingested_sql = "UPDATE demo_sessions SET ingested = true WHERE session_id = :session_id;" |
| 384 | + created_at = datetime.now().astimezone(timezone.utc).isoformat() |
| 385 | + |
| 386 | + with engine.connect() as conn: |
| 387 | + with conn.begin(): |
| 388 | + command = conn.execute( |
| 389 | + sa.text(is_ingested_sql), |
| 390 | + {"session_id": session_id}, |
| 391 | + ) |
| 392 | + |
| 393 | + result = command.one_or_none() |
| 394 | + if result is None: |
| 395 | + conn.rollback() |
| 396 | + return "demo not found" |
| 397 | + if result.ingested is True: |
| 398 | + conn.rollback() |
| 399 | + return "demo already ingested" |
| 400 | + if result.active is True: |
| 401 | + conn.rollback() |
| 402 | + return "session is still active" |
| 403 | + if result.open is True: |
| 404 | + conn.rollback() |
| 405 | + return "session is still open" |
| 406 | + |
| 407 | + conn.execute( |
| 408 | + sa.text(wipe_analysis_sql), |
| 409 | + {"session_id": session_id}, |
| 410 | + ) |
| 411 | + conn.execute( |
| 412 | + sa.text(wipe_reviews_sql), |
| 413 | + {"session_id": session_id}, |
| 414 | + ) |
| 415 | + |
| 416 | + for key, count in algorithm_counts.items(): |
| 417 | + conn.execute( |
| 418 | + sa.text(insert_sql), |
| 419 | + { |
| 420 | + "session_id": session_id, |
| 421 | + "target_steam_id": key[0], |
| 422 | + "algorithm": key[1], |
| 423 | + "count": count, |
| 424 | + "created_at": created_at, |
| 425 | + }, |
| 426 | + ) |
| 427 | + |
| 428 | + conn.execute( |
| 429 | + sa.text(mark_ingested_sql), |
| 430 | + {"session_id": session_id}, |
| 431 | + ) |
| 432 | + return None |
| 433 | + |
| 434 | + |
307 | 435 | async def session_closed(engine: AsyncEngine, session_id: str) -> bool: |
308 | 436 | """Determine if a session is active.""" |
309 | 437 | sql = "SELECT active FROM demo_sessions WHERE session_id = :session_id;" |
|
0 commit comments