diff --git a/docker-compose.yml b/docker-compose.yml index f281609..0e5da17 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,78 +3,58 @@ version: '3.8' services: elasticsearch: - build: - context: ./src/elasticsearch - container_name: elasticsearch + image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0 + container_name: copilot-elasticsearch + environment: + - discovery.type=single-node + - xpack.security.enabled=true + - ELASTIC_PASSWORD=hiFc1SbV + - xpack.security.http.ssl.enabled=false + - xpack.security.transport.ssl.enabled=false ports: - "9200:9200" + - "9300:9300" volumes: - - data:/usr/share/elasticsearch/data - - logs:/usr/share/elasticsearch/logs - mem_limit: 1g - cpus: 1 - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health/"] - interval: 30s - timeout: 10s - retries: 5 + - elasticsearch_data:/usr/share/elasticsearch/data + networks: + - copilot-network grafana: - build: - context: ./src/grafana - container_name: grafana - mem_limit: 1g - cpus: 0.5 + image: grafana/grafana:latest + container_name: copilot-grafana + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false ports: - - "3000:80" - depends_on: - - elasticsearch + - "3000:3000" volumes: - - grafana:/var/lib/grafana - environment: - GF_LOG_LEVEL: debug - GF_SERVER_HTTP_PORT: 80 - GF_SECURITY_ADMIN_USER: admin - GF_SECURITY_ADMIN_PASSWORD: copilot - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:80/api/health"] - interval: 30s - timeout: 10s - retries: 5 - - init-grafana: - mem_limit: 0.5g - cpus: 0.25 - build: - context: ./src/cpuad-updater/grafana - container_name: init-grafana + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning depends_on: - - grafana - elasticsearch - - cpuad-updater - environment: - ELASTICSEARCH_URL: http://elasticsearch:9200 - GRAFANA_URL: http://grafana:80 - GRAFANA_USERNAME: admin - GRAFANA_PASSWORD: copilot - restart: "no" + networks: + - copilot-network - cpuad-updater: - mem_limit: 0.5g - cpus: 0.25 + python-updater: build: context: ./src/cpuad-updater - container_name: cpuad-updater + dockerfile: Dockerfile + container_name: copilot-updater + environment: + - ELASTICSEARCH_URL=http://elasticsearch:9200 + - ELASTICSEARCH_USER=elastic + - ELASTICSEARCH_PASS=hiFc1SbV + - GITHUB_PAT=${GITHUB_PAT} + - ORGANIZATION_SLUGS=${ORGANIZATION_SLUGS} depends_on: - elasticsearch - environment: - GITHUB_PAT: ${GITHUB_PAT} - ORGANIZATION_SLUGS: cody-test-org - ELASTICSEARCH_URL: http://elasticsearch:9200 - # Add other environment variables as needed - restart: "no" + networks: + - copilot-network volumes: - data: - logs: - grafana: \ No newline at end of file + elasticsearch_data: + grafana_data: + +networks: + copilot-network: + driver: bridge \ No newline at end of file diff --git a/elastic-start-local/config/telemetry.yml b/elastic-start-local/config/telemetry.yml new file mode 100644 index 0000000..d5e0258 --- /dev/null +++ b/elastic-start-local/config/telemetry.yml @@ -0,0 +1,2 @@ +start-local: + version: 0.11.0 diff --git a/elastic-start-local/docker-compose.yml b/elastic-start-local/docker-compose.yml new file mode 100644 index 0000000..7f924d0 --- /dev/null +++ b/elastic-start-local/docker-compose.yml @@ -0,0 +1,85 @@ +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:${ES_LOCAL_VERSION} + container_name: ${ES_LOCAL_CONTAINER_NAME} + volumes: + - dev-elasticsearch:/usr/share/elasticsearch/data + ports: + - 127.0.0.1:${ES_LOCAL_PORT}:9200 + environment: + - discovery.type=single-node + - ELASTIC_PASSWORD=${ES_LOCAL_PASSWORD} + - xpack.security.enabled=true + - xpack.security.http.ssl.enabled=false + - xpack.license.self_generated.type=trial + - xpack.ml.use_auto_machine_memory_percent=true + - ES_JAVA_OPTS=${ES_LOCAL_JAVA_OPTS} + - cluster.routing.allocation.disk.watermark.low=${ES_LOCAL_DISK_SPACE_REQUIRED} + - cluster.routing.allocation.disk.watermark.high=${ES_LOCAL_DISK_SPACE_REQUIRED} + - cluster.routing.allocation.disk.watermark.flood_stage=${ES_LOCAL_DISK_SPACE_REQUIRED} + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: + [ + "CMD-SHELL", + "curl --output /dev/null --silent --head --fail -u elastic:${ES_LOCAL_PASSWORD} http://elasticsearch:9200", + ] + interval: 10s + timeout: 10s + retries: 30 + + kibana_settings: + depends_on: + elasticsearch: + condition: service_healthy + image: docker.elastic.co/elasticsearch/elasticsearch:${ES_LOCAL_VERSION} + container_name: ${KIBANA_LOCAL_SETTINGS_CONTAINER_NAME} + restart: 'no' + command: > + bash -c ' + echo "Setup the kibana_system password"; + start_time=$$(date +%s); + timeout=60; + until curl -s -u "elastic:${ES_LOCAL_PASSWORD}" -X POST http://elasticsearch:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_LOCAL_PASSWORD}\"}" -H "Content-Type: application/json" | grep -q "^{}"; do + if [ $$(($$(date +%s) - $$start_time)) -ge $$timeout ]; then + echo "Error: Elasticsearch timeout"; + exit 1; + fi; + sleep 2; + done; + ' + + kibana: + depends_on: + kibana_settings: + condition: service_completed_successfully + image: docker.elastic.co/kibana/kibana:${ES_LOCAL_VERSION} + container_name: ${KIBANA_LOCAL_CONTAINER_NAME} + volumes: + - dev-kibana:/usr/share/kibana/data + - ./config/telemetry.yml:/usr/share/kibana/config/telemetry.yml + ports: + - 127.0.0.1:${KIBANA_LOCAL_PORT}:5601 + environment: + - SERVER_NAME=kibana + - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + - ELASTICSEARCH_USERNAME=kibana_system + - ELASTICSEARCH_PASSWORD=${KIBANA_LOCAL_PASSWORD} + - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=${KIBANA_ENCRYPTION_KEY} + - ELASTICSEARCH_PUBLICBASEURL=http://localhost:${ES_LOCAL_PORT} + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s -I http://kibana:5601 | grep -q 'HTTP/1.1 302 Found'", + ] + interval: 10s + timeout: 10s + retries: 30 + +volumes: + dev-elasticsearch: + dev-kibana: diff --git a/elastic-start-local/start.sh b/elastic-start-local/start.sh new file mode 100644 index 0000000..7f918a9 --- /dev/null +++ b/elastic-start-local/start.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# Start script for start-local +# More information: https://github.com/elastic/start-local +set -eu + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "${SCRIPT_DIR}" +today=$(date +%s) +. ./.env +# Check disk space +available_gb=$(($(df -k / | awk 'NR==2 {print $4}') / 1024 / 1024)) +required=$(echo "${ES_LOCAL_DISK_SPACE_REQUIRED}" | grep -Eo '[0-9]+') +if [ "$available_gb" -lt "$required" ]; then + echo "----------------------------------------------------------------------------" + echo "WARNING: Disk space is below the ${required} GB limit. Elasticsearch will be" + echo "executed in read-only mode. Please free up disk space to resolve this issue." + echo "----------------------------------------------------------------------------" + echo "Press ENTER to confirm." + # shellcheck disable=SC2034 + read -r line +fi +if [ -z "${ES_LOCAL_LICENSE:-}" ] && [ "$today" -gt 1761894959 ]; then + echo "---------------------------------------------------------------------" + echo "The one-month trial period has expired. You can continue using the" + echo "Free and open Basic license or request to extend the trial for" + echo "another 30 days using this form:" + echo "https://www.elastic.co/trialextension" + echo "---------------------------------------------------------------------" + echo "For more info about the license: https://www.elastic.co/subscriptions" + echo + echo "Updating the license..." + docker compose up --wait elasticsearch >/dev/null 2>&1 + result=$(curl -s -X POST "${ES_LOCAL_URL}/_license/start_basic?acknowledge=true" -H "Authorization: ApiKey ${ES_LOCAL_API_KEY}" -o /dev/null -w '%{http_code}\n') + if [ "$result" = "200" ]; then + echo "✅ Basic license successfully installed" + echo "ES_LOCAL_LICENSE=basic" >> .env + else + echo "Error: I cannot update the license" + result=$(curl -s -X GET "${ES_LOCAL_URL}" -H "Authorization: ApiKey ${ES_LOCAL_API_KEY}" -o /dev/null -w '%{http_code}\n') + if [ "$result" != "200" ]; then + echo "Elasticsearch is not running." + fi + exit 1 + fi + echo +fi +docker compose up --wait diff --git a/elastic-start-local/stop.sh b/elastic-start-local/stop.sh new file mode 100644 index 0000000..d091861 --- /dev/null +++ b/elastic-start-local/stop.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# Stop script for start-local +# More information: https://github.com/elastic/start-local +set -eu + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "${SCRIPT_DIR}" +docker compose stop diff --git a/elastic-start-local/uninstall.sh b/elastic-start-local/uninstall.sh new file mode 100644 index 0000000..dc96994 --- /dev/null +++ b/elastic-start-local/uninstall.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# Uninstall script for start-local +# More information: https://github.com/elastic/start-local +set -eu + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +ask_confirmation() { + echo "Do you confirm? (yes/no)" + read -r answer + case "$answer" in + yes|y|Y|Yes|YES) + return 0 # true + ;; + no|n|N|No|NO) + return 1 # false + ;; + *) + echo "Please answer yes or no." + ask_confirmation # Ask again if the input is invalid + ;; + esac +} + +cd "${SCRIPT_DIR}" +if [ ! -e "docker-compose.yml" ]; then + echo "Error: I cannot find the docker-compose.yml file" + echo "I cannot uninstall start-local." +fi +if [ ! -e ".env" ]; then + echo "Error: I cannot find the .env file" + echo "I cannot uninstall start-local." +fi +echo "This script will uninstall start-local." +echo "All data will be deleted and cannot be recovered." +if ask_confirmation; then + docker compose rm -fsv + docker compose down -v + rm docker-compose.yml .env uninstall.sh start.sh stop.sh config/telemetry.yml + if [ -z "$(ls -A config)" ]; then + rm -d config + fi + echo + echo "Do you want to remove the following Docker images?" + echo "- docker.elastic.co/elasticsearch/elasticsearch:9.1.4" + echo "- docker.elastic.co/kibana/kibana:9.1.4" + if ask_confirmation; then + if docker rmi "docker.elastic.co/elasticsearch/elasticsearch:9.1.4" >/dev/null 2>&1; then + echo "Image docker.elastic.co/elasticsearch/elasticsearch:9.1.4 removed successfully" + else + echo "Failed to remove image docker.elastic.co/elasticsearch/elasticsearch:9.1.4. It might be in use." + fi + if docker rmi docker.elastic.co/kibana/kibana:9.1.4 >/dev/null 2>&1; then + echo "Image docker.elastic.co/kibana/kibana:9.1.4 removed successfully" + else + echo "Failed to remove image docker.elastic.co/kibana/kibana:9.1.4. It might be in use." + fi + fi + echo "Start-local successfully removed" +fi diff --git a/grafana/provisioning/dashboards/copilot-dashboard.json b/grafana/provisioning/dashboards/copilot-dashboard.json new file mode 100644 index 0000000..75b7c72 --- /dev/null +++ b/grafana/provisioning/dashboards/copilot-dashboard.json @@ -0,0 +1,146 @@ +{ + "dashboard": { + "id": null, + "title": "GitHub Copilot Usage Dashboard", + "tags": ["copilot", "github"], + "timezone": "browser", + "panels": [ + { + "id": 1, + "title": "Total Suggestions Count", + "type": "stat", + "targets": [ + { + "refId": "A", + "datasource": { + "type": "elasticsearch", + "uid": "copilot-elasticsearch" + }, + "query": "*", + "alias": "", + "metrics": [ + { + "id": "1", + "type": "sum", + "field": "total_suggestions_count" + } + ], + "bucketAggs": [], + "timeField": "@timestamp" + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + } + } + }, + { + "id": 2, + "title": "Copilot Usage Over Time", + "type": "timeseries", + "targets": [ + { + "refId": "A", + "datasource": { + "type": "elasticsearch", + "uid": "copilot-elasticsearch" + }, + "query": "*", + "alias": "", + "metrics": [ + { + "id": "1", + "type": "sum", + "field": "total_suggestions_count" + } + ], + "bucketAggs": [ + { + "id": "2", + "type": "date_histogram", + "field": "@timestamp", + "settings": { + "interval": "1d", + "min_doc_count": 0, + "trimEdges": 0 + } + } + ], + "timeField": "@timestamp" + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + } + }, + { + "id": 3, + "title": "Active vs Inactive Seats", + "type": "piechart", + "targets": [ + { + "refId": "A", + "datasource": { + "type": "elasticsearch", + "uid": "copilot-elasticsearch" + }, + "query": "*", + "alias": "Active Seats", + "metrics": [ + { + "id": "1", + "type": "max", + "field": "seat_active_this_cycle" + } + ], + "bucketAggs": [], + "timeField": "@timestamp" + }, + { + "refId": "B", + "datasource": { + "type": "elasticsearch", + "uid": "copilot-elasticsearch" + }, + "query": "*", + "alias": "Inactive Seats", + "metrics": [ + { + "id": "1", + "type": "max", + "field": "seat_inactive_this_cycle" + } + ], + "bucketAggs": [], + "timeField": "@timestamp" + } + ], + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + } + } + ], + "time": { + "from": "now-30d", + "to": "now" + }, + "refresh": "5m", + "schemaVersion": 37, + "version": 1 + } +} \ No newline at end of file diff --git a/grafana/provisioning/dashboards/dashboard.yml b/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..b83bea8 --- /dev/null +++ b/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +providers: + - name: 'Copilot Dashboards' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards \ No newline at end of file diff --git a/grafana/provisioning/datasources/elasticsearch.yml b/grafana/provisioning/datasources/elasticsearch.yml new file mode 100644 index 0000000..1f71d24 --- /dev/null +++ b/grafana/provisioning/datasources/elasticsearch.yml @@ -0,0 +1,23 @@ +apiVersion: 1 + +datasources: + - name: Copilot-Elasticsearch + type: elasticsearch + access: proxy + url: http://localhost:9200 + database: "copilot_*" + basicAuth: true + basicAuthUser: elastic + secureJsonData: + basicAuthPassword: hiFc1SbV + jsonData: + interval: Daily + timeField: "@timestamp" + esVersion: "8.0.0" + maxConcurrentShardRequests: 5 + logMessageField: message + logLevelField: level + includeFrozen: false + xpack: true + editable: true + uid: copilot-elasticsearch \ No newline at end of file diff --git a/src/cpuad-updater/Dockerfile b/src/cpuad-updater/Dockerfile index 98b2ec2..92077f1 100644 --- a/src/cpuad-updater/Dockerfile +++ b/src/cpuad-updater/Dockerfile @@ -1,5 +1,5 @@ -# Use the official Python 3.10 base image -FROM python:3.13.2-slim +# Use the official Python 3.11 base image +FROM python:3.11-slim # Set environment variables # PYTHONDONTWRITEBYTECODE: Prevents Python from writing .pyc files to disk @@ -23,7 +23,7 @@ RUN groupadd -g 1000 cpuadupdater && \ # Copy requirements.txt and install Python packages COPY requirements.txt /app/ -RUN pip3 install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r requirements.txt # Copy mapping folder to the working directory COPY mapping /app/mapping @@ -40,5 +40,8 @@ RUN chown -R cpuadupdater:cpuadupdater /app # Switch to the new user USER cpuadupdater +# Create logs directory +RUN mkdir -p logs + # Run the command one time -CMD ["python3", "main.py"] +CMD ["python", "main.py"] diff --git a/src/cpuad-updater/grafana/add_grafana_data_sources.sh b/src/cpuad-updater/grafana/add_grafana_data_sources.sh index 77df3c1..e7721d4 100644 --- a/src/cpuad-updater/grafana/add_grafana_data_sources.sh +++ b/src/cpuad-updater/grafana/add_grafana_data_sources.sh @@ -1,11 +1,11 @@ -curl -X POST http://$GRAFANA_URL/api/datasources \ +curl -X POST $GRAFANA_URL/api/datasources \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GRAFANA_TOKEN" \ -d '{ "name": "elasticsearch-breakdown", "type": "elasticsearch", "access": "proxy", - "url": "http://$ELASTICSEARCH_URL", + "url": "$ELASTICSEARCH_URL", "basicAuth": false, "withCredentials": false, "isDefault": false, @@ -21,14 +21,14 @@ curl -X POST http://$GRAFANA_URL/api/datasources \ }' -curl -X POST http://$GRAFANA_URL/api/datasources \ +curl -X POST $GRAFANA_URL/api/datasources \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GRAFANA_TOKEN" \ -d '{ "name": "elasticsearch-breakdown-chat", "type": "elasticsearch", "access": "proxy", - "url": "http://$ELASTICSEARCH_URL", + "url": "$ELASTICSEARCH_URL", "basicAuth": false, "withCredentials": false, "isDefault": false, @@ -44,14 +44,14 @@ curl -X POST http://$GRAFANA_URL/api/datasources \ }' -curl -X POST http://$GRAFANA_URL/api/datasources \ +curl -X POST $GRAFANA_URL/api/datasources \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GRAFANA_TOKEN" \ -d '{ "name": "elasticsearch-total", "type": "elasticsearch", "access": "proxy", - "url": "http://$ELASTICSEARCH_URL", + "url": "$ELASTICSEARCH_URL", "basicAuth": false, "withCredentials": false, "isDefault": false, @@ -67,14 +67,14 @@ curl -X POST http://$GRAFANA_URL/api/datasources \ }' -curl -X POST http://$GRAFANA_URL/api/datasources \ +curl -X POST $GRAFANA_URL/api/datasources \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GRAFANA_TOKEN" \ -d '{ "name": "elasticsearch-seat-info-settings", "type": "elasticsearch", "access": "proxy", - "url": "http://$ELASTICSEARCH_URL", + "url": "$ELASTICSEARCH_URL", "basicAuth": false, "withCredentials": false, "isDefault": false, @@ -89,14 +89,14 @@ curl -X POST http://$GRAFANA_URL/api/datasources \ } }' -curl -X POST http://$GRAFANA_URL/api/datasources \ +curl -X POST $GRAFANA_URL/api/datasources \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GRAFANA_TOKEN" \ -d '{ "name": "elasticsearch-seat-assignments", "type": "elasticsearch", "access": "proxy", - "url": "http://$ELASTICSEARCH_URL", + "url": "$ELASTICSEARCH_URL", "basicAuth": false, "withCredentials": false, "isDefault": false, diff --git a/src/cpuad-updater/grafana/gen_grafana_model.py b/src/cpuad-updater/grafana/gen_grafana_model.py index ada17a4..99aee99 100644 --- a/src/cpuad-updater/grafana/gen_grafana_model.py +++ b/src/cpuad-updater/grafana/gen_grafana_model.py @@ -3,6 +3,8 @@ import os import argparse from datetime import datetime +from dotenv import load_dotenv +load_dotenv() data_source_names = [ diff --git a/src/cpuad-updater/main.py b/src/cpuad-updater/main.py index 099bbb6..7150b34 100644 --- a/src/cpuad-updater/main.py +++ b/src/cpuad-updater/main.py @@ -9,6 +9,8 @@ from metrics_2_usage_convertor import convert_metrics_to_usage import traceback from zoneinfo import ZoneInfo +from dotenv import load_dotenv +load_dotenv() def get_utc_offset(): @@ -111,7 +113,7 @@ def dict_save_to_json_file( def generate_unique_hash(data, key_properties=[]): - key_string = "-".join([data.get(key_propertie) for key_propertie in key_properties]) + key_string = "-".join([str(data.get(key_propertie, "")) for key_propertie in key_properties]) unique_hash = hashlib.sha256(key_string.encode()).hexdigest() return unique_hash @@ -262,13 +264,7 @@ def get_copilot_usages( position_in_tree="leaf_team", usage_or_metrics="metrics", ): - urls = { - self.organization_slug, - ( - position_in_tree, - f"https://api.github.com/{self.api_type}/{self.organization_slug}/copilot/{usage_or_metrics}", - ), - } + urls = {} if team_slug: if team_slug != "all": urls = { @@ -776,8 +772,43 @@ def check_and_create_indexes(self): def write_to_es(self, index_name, data, update_condition=None): last_updated_at = current_time() data["last_updated_at"] = last_updated_at + + # Add @timestamp field for Grafana time-series support + if "day" in data: + # Convert day field to proper timestamp + day_str = data["day"] + if isinstance(day_str, str) and len(day_str) >= 10: + data["@timestamp"] = f"{day_str[:10]}T00:00:00.000Z" + else: + data["@timestamp"] = last_updated_at + else: + data["@timestamp"] = last_updated_at + doc_id = data.get(self.primary_key) - logger.info(f"Writing data to Elasticsearch index: {index_name}") + # doc_id = "NFZyUG5wa0JzUDAyUXVVWHEyeGk6X3kwTThvV3dFYkxPdTRsMktCdDBpZw==" # Static ID for testing - FIXED + + # Fallback: generate unique_hash if missing + if not doc_id: + logger.warning(f"Primary key '{self.primary_key}' not found, generating fallback ID") + # Generate a fallback unique hash based on available data + fallback_keys = ['organization_slug', 'team_slug', 'day', 'assignee_login'] + available_keys = [key for key in fallback_keys if key in data] + if available_keys: + doc_id = generate_unique_hash(data, available_keys) + data[self.primary_key] = doc_id + logger.info(f"Generated fallback document ID: {doc_id} using keys: {available_keys}") + else: + # Last resort: use timestamp-based ID + doc_id = hashlib.sha256(f"{index_name}-{current_time()}".encode()).hexdigest() + data[self.primary_key] = doc_id + logger.info(f"Generated timestamp-based document ID: {doc_id}") + + if not doc_id: + logger.error(f"No document ID found for primary key '{self.primary_key}' in data. Available keys: {list(data.keys())}") + logger.error(f"Data sample: {str(data)[:200]}...") + return + + logger.info(f"Writing data to Elasticsearch index: {index_name} with ID: {doc_id}") try: # Get existing document existing_doc = self.es.get(index=index_name, id=doc_id) @@ -804,10 +835,10 @@ def write_to_es(self, index_name, data, update_condition=None): # Always update document, possibly with some preserved fields self.es.update(index=index_name, id=doc_id, doc=data) - logger.info(f"[updated] to [{index_name}]: {data}") + logger.info(f"[updated] to [{index_name}]: {doc_id}") except NotFoundError: self.es.index(index=index_name, id=doc_id, document=data) - logger.info(f"[created] to [{index_name}]: {data}") + logger.info(f"[created] to [{index_name}]: {doc_id}") def main(organization_slug): @@ -825,7 +856,7 @@ def main(organization_slug): logger.info(f"Starting data processing for {slug_type}: {organization_slug}") github_org_manager = GitHubOrganizationManager( - organization_slug, is_standalone=is_standalone + organization_slug, save_to_json=True, is_standalone=is_standalone ) es_manager = ElasticsearchManager() @@ -843,8 +874,14 @@ def main(organization_slug): f"No Copilot seat info & settings found for {slug_type}: {organization_slug}" ) else: + # Ensure unique_hash is present + if 'unique_hash' not in data_seat_info_settings: + logger.warning("unique_hash missing from seat info settings, generating one") + data_seat_info_settings['unique_hash'] = generate_unique_hash( + data_seat_info_settings, key_properties=["organization_slug", "day"] + ) es_manager.write_to_es(Indexes.index_seat_info, data_seat_info_settings) - logger.info(f"Data processing completed for {slug_type}: {organization_slug}") + logger.info(f"Seat info data processing completed for {slug_type}: {organization_slug}") # Process seat assignments logger.info( diff --git a/src/cpuad-updater/mapping/copilot_seat_info_settings_v2_mapping.json b/src/cpuad-updater/mapping/copilot_seat_info_settings_v2_mapping.json new file mode 100644 index 0000000..32e6bf7 --- /dev/null +++ b/src/cpuad-updater/mapping/copilot_seat_info_settings_v2_mapping.json @@ -0,0 +1,58 @@ +{ + "mappings" : { + "properties" : { + "day" : { + "type" : "date" + }, + "last_updated_at" : { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss.SSS" + }, + "organization_slug" : { + "type" : "keyword" + }, + "seat_management_setting" : { + "type" : "keyword" + }, + "public_code_suggestions" : { + "type" : "keyword" + }, + "ide_chat" : { + "type" : "keyword" + }, + "cli" : { + "type" : "keyword" + }, + "plan_type" : { + "type" : "keyword" + }, + "seat_total" : { + "type" : "long" + }, + "seat_added_this_cycle" : { + "type" : "long" + }, + "seat_pending_invitation" : { + "type" : "long" + }, + "seat_pending_cancellation" : { + "type" : "long" + }, + "seat_active_this_cycle" : { + "type" : "long" + }, + "seat_inactive_this_cycle" : { + "type" : "long" + }, + "unique_hash" : { + "type" : "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": 1, + "number_of_replicas": 0 + } + } +} diff --git a/src/cpuad-updater/requirements.txt b/src/cpuad-updater/requirements.txt index 9157546..40c61a9 100644 --- a/src/cpuad-updater/requirements.txt +++ b/src/cpuad-updater/requirements.txt @@ -1,4 +1,3 @@ -elasticsearch==8.17.2 -requests==2.32.3 -tzlocal==5.3.1 -tzdata==2025.2 \ No newline at end of file +requests==2.31.0 +elasticsearch==8.11.0 +python-dotenv==1.0.0 \ No newline at end of file