From fe555af45cc1f2cf7f0c78fbdea86b4bea22aa92 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 15 Oct 2025 12:57:37 +0800 Subject: [PATCH 01/20] feat(demos): add eks benchmark demo --- .../aws/eks-benchmark/README.md | 229 +++++++ .../helm-chart/automq-benchmark/Chart.yaml | 17 + .../helm-chart/automq-benchmark/README.md | 119 ++++ .../automq-benchmark/templates/job.yaml | 114 ++++ .../helm-chart/automq-benchmark/values.yaml | 77 +++ .../eks-benchmark/monitoring/prometheus.yaml | 606 ++++++++++++++++++ .../eks-benchmark/terraform/automq/main.tf | 122 ++++ .../terraform/automq/terraform.tfvars.example | 15 + .../terraform/benchmark-node/main.tf | 87 +++ .../terraform/benchmark-node/outputs.tf | 77 +++ .../terraform/benchmark-node/provider.tf | 14 + .../benchmark-node/terraform.tfvars.example | 34 + .../terraform/benchmark-node/variables.tf | 110 ++++ 13 files changed, 1621 insertions(+) create mode 100644 cloudservice-setup/aws/eks-benchmark/README.md create mode 100644 cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/Chart.yaml create mode 100644 cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md create mode 100644 cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml create mode 100644 cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/values.yaml create mode 100644 cloudservice-setup/aws/eks-benchmark/monitoring/prometheus.yaml create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md new file mode 100644 index 0000000..c89f941 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -0,0 +1,229 @@ +# AutoMQ EKS Benchmark & Observability + +This project extends the existing AutoMQ EKS demo by adding comprehensive observability and performance benchmarking capabilities. It enables you to deploy monitoring infrastructure and conduct performance testing on existing AutoMQ clusters running on Amazon EKS, with real-time visualization through observability dashboards. + +## Overview + +The project provides: + +- **Infrastructure Setup**: Terraform modules for deploying dedicated benchmark nodes on existing EKS clusters +- **AutoMQ Integration**: Automated deployment and configuration of AutoMQ instances with monitoring integration +- **Observability Stack**: Prometheus and Grafana deployment for comprehensive monitoring and visualization +- **Benchmark Tools**: Helm charts for running performance tests against AutoMQ clusters +- **Dashboard Visualization**: Pre-configured Grafana dashboards to visualize benchmark results and cluster metrics + +## Architecture + +``` +┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ +│ EKS Cluster │ │ Benchmark Nodes │ │ Observability │ +│ │ │ │ │ │ +│ ┌─────────────┐ │ │ ┌──────────────┐ │ │ ┌─────────────┐ │ +│ │ AutoMQ │ │ │ │ Benchmark │ │ │ │ Prometheus │ │ +│ │ Console │ │ │ │ Workloads │ │ │ │ │ │ +│ └─────────────┘ │ │ └──────┬───────┘ │ │ └─────────────┘ │ +│ │ │ │ │ │ │ +│ ┌─────────────┐ │ │ │ │ │ ┌─────────────┐ │ +│ │ AutoMQ │ │◄────────────┘ │ │ │ Grafana │ │ +│ │ Cluster │ │ │ │ │ │ Dashboards │ │ +│ └─────────────┘ │ │ │ │ └─────────────┘ │ +└─────────────────┘ └──────────────────┘ └─────────────────┘ +``` + +## Prerequisites + +Before using this project, ensure you have: + +### Required Infrastructure +- **Existing EKS Cluster**: A running Amazon EKS cluster +- **AutoMQ Console**: AutoMQ Console already installed and configured +- **AutoMQ Cluster**: At least one AutoMQ cluster deployed and operational + +> **Note**: You can refer to [`/cloudservice-setup/aws/eks/`](../eks/) for instructions on setting up a complete EKS cluster with AutoMQ Console using Terraform. + + +### Required Tools +- **Terraform** (>= 1.0) +- **kubectl** configured for your EKS cluster +- **Helm** (>= 3.0) +- **AWS CLI** configured with appropriate permissions + +### Required Permissions +- EKS cluster management permissions +- EC2 instance and networking permissions +- IAM role management permissions +- S3 bucket access (for AutoMQ data storage) + +## Project Structure + +``` +eks-benchmark/ +├── terraform/ +│ ├── benchmark-node/ # Terraform module for benchmark nodes +│ │ ├── main.tf # Node group configuration +│ │ ├── variables.tf # Input variables +│ │ ├── outputs.tf # Output values +│ │ └── terraform.tfvars.example +│ └── automq/ # AutoMQ deployment configuration +│ ├── main.tf # AutoMQ instance setup +│ └── terraform.tfvars.example +├── helm-chart/ +│ └── automq-benchmark/ # Helm chart for benchmark workloads +│ ├── Chart.yaml +│ ├── values.yaml # Benchmark configuration +│ └── templates/ +├── monitoring/ +│ └── prometheus.yaml # Prometheus & Grafana configuration +└── README.md +``` + +## Quick Start + +### Step 1: Deploy Benchmark Infrastructure + +This step creates dedicated EKS node groups optimized for running benchmark workloads. These nodes are configured with appropriate instance types (4c8g minimum) and can be optionally tainted to ensure benchmark workloads run in isolation from other cluster workloads. + +**Expected Result**: A new EKS node group will be created and ready to host benchmark pods, providing the computational resources needed for performance testing. + +1. **Configure benchmark nodes**: + ```bash + cd terraform/benchmark-node + cp terraform.tfvars.example terraform.tfvars + # Edit terraform.tfvars with your cluster details + ``` + +2. **Deploy benchmark nodes**: + ```bash + terraform init + terraform plan + terraform apply + ``` + +### Step 2: Deploy AutoMQ Instance (Optional) + +This optional step allows you to deploy additional AutoMQ instances if needed for your testing scenario. It uses the AutoMQ BYOC provider to create and configure AutoMQ clusters with integrated monitoring capabilities, including Prometheus remote write endpoints for metrics collection. + +**Expected Result**: A new AutoMQ instance will be deployed and configured with monitoring integration, ready to handle Kafka workloads and export metrics to your observability stack. + +If you need to deploy additional AutoMQ instances: + +1. **Configure AutoMQ deployment**: + ```bash + cd terraform/automq + cp terraform.tfvars.example terraform.tfvars + # Edit terraform.tfvars with your AutoMQ BYOC credentials + ``` + +2. **Deploy AutoMQ instance**: + ```bash + terraform init + terraform plan + terraform apply + ``` + +### Step 3: Deploy Observability Stack + +This step deploys a comprehensive monitoring solution including Prometheus and Grafana to collect, store, and visualize metrics from your AutoMQ cluster and benchmark workloads. The stack is configured with remote write capabilities and pre-configured dashboards for AutoMQ monitoring. + +**Expected Result**: Prometheus and Grafana will be deployed and accessible via LoadBalancer services. Prometheus will start collecting metrics from AutoMQ instances, and Grafana will be ready to display performance dashboards. + +1. **Install Prometheus and Grafana**: + ```bash + # Add Prometheus community Helm repository + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm repo update + + # Create monitoring namespace + kubectl create namespace prometheus + + # Deploy Prometheus and Grafana + helm install prometheus prometheus-community/kube-prometheus-stack \ + -n prometheus \ + -f monitoring/prometheus.yaml + ``` + +2. **Access Grafana Dashboard**: + ```bash + # Get Grafana LoadBalancer URL + kubectl get svc -n prometheus prometheus-grafana + + # Default credentials: + # Username: admin + # Password: AutoMQ@Grafana + ``` + +### Step 4: Run Benchmark Tests + +This step executes performance tests against your AutoMQ cluster using configurable workloads. The benchmark simulates real-world Kafka usage patterns with customizable parameters for throughput, message size, topic configuration, and test duration. The tests generate comprehensive metrics that are automatically collected by your monitoring stack. + +**Expected Result**: Benchmark jobs will run and generate load against the AutoMQ cluster. Performance metrics including throughput, latency, and resource utilization will be collected and visible in Grafana dashboards. You should see data flowing through the system and performance characteristics of your AutoMQ deployment. + +1. **Configure benchmark parameters**: + ```bash + cd helm-chart/automq-benchmark + # Edit values.yaml to configure: + # - AutoMQ connection details + # - Test parameters (topics, partitions, message size, etc.) + # - Resource requirements + ``` + +2. **Deploy benchmark workload**: + ```bash + helm install automq-benchmark . \ + --namespace default \ + --values values.yaml + ``` + +3. **Monitor benchmark progress**: + ```bash + # Watch job status + kubectl get jobs -w + + # View benchmark logs + kubectl logs -f job/automq-benchmark + ``` + +4. **View results in Grafana**: + - Access your Grafana dashboard + - Navigate to AutoMQ performance dashboards + - Observe real-time metrics during the test execution + +> **Note**: For comprehensive dashboard configurations and additional monitoring templates, you can contact the AutoMQ team to obtain pre-configured Grafana dashboards that will help you visualize detailed performance metrics and system health indicators. + + + +## Cleanup + +To remove all deployed resources: + +```bash +# Remove benchmark workload +helm uninstall automq-benchmark + +# Remove monitoring stack +helm uninstall prometheus -n prometheus +kubectl delete namespace prometheus + +# Remove AutoMQ instance (if deployed) +cd terraform/automq +terraform destroy + +# Remove benchmark nodes +cd terraform/benchmark-node +terraform destroy +``` + +## Contributing + +When contributing to this project: +1. Test changes in a development environment +2. Update documentation for any configuration changes +3. Ensure Terraform modules follow best practices +4. Validate Helm charts with different configurations + +## Support + +For issues and questions: +- Check the [AutoMQ Documentation](https://docs.automq.com) +- Review existing issues in the repository +- Contact the AutoMQ team for enterprise support, welcome to join our [Slack community](https://go.automq.com/slack) \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/Chart.yaml b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/Chart.yaml new file mode 100644 index 0000000..e64bb9a --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +name: automq-benchmark +description: A Helm chart for AutoMQ benchmark testing +type: application +version: 0.1.0 +appVersion: "latest" +keywords: + - automq + - kafka + - benchmark + - performance +home: https://github.com/AutoMQ/automq-labs +sources: + - https://github.com/AutoMQ/automq-labs +maintainers: + - name: AutoMQ Team + email: support@automq.com \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md new file mode 100644 index 0000000..ba1c048 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md @@ -0,0 +1,119 @@ +# AutoMQ Benchmark Helm Chart + +This Helm chart deploys an AutoMQ benchmark job on a Kubernetes cluster. + +## Prerequisites + +- Kubernetes 1.16+ +- Helm 3.0+ +- An AutoMQ cluster running in the same Kubernetes cluster + +## Installing the Chart + +To install the chart with the release name `my-benchmark`: + +```bash +helm install my-benchmark ./automq-benchmark +``` + +To install with custom values: + +```bash +helm install my-benchmark ./automq-benchmark -f custom-values.yaml +``` + +## Uninstalling the Chart + +To uninstall/delete the `my-benchmark` deployment: + +```bash +helm uninstall my-benchmark +``` + +## Configuration + +The following table lists the configurable parameters of the AutoMQ benchmark chart and their default values. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `job.name` | Name of the benchmark job | `automq-benchmark` | +| `job.completions` | Number of successful completions | `1` | +| `job.parallelism` | Number of parallel pods | `1` | +| `job.backoffLimit` | Number of retries before marking job as failed | `3` | +| `job.restartPolicy` | Restart policy for the job | `Never` | +| `image.repository` | AutoMQ image repository | `automqinc/automq` | +| `image.tag` | AutoMQ image tag | `latest` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `automq.username` | AutoMQ username | `user1` | +| `automq.password` | AutoMQ password | `MrCrSQTVoB` | +| `automq.bootstrapServer` | AutoMQ bootstrap server | `automq-release-kafka.automq.svc.cluster.local:9092` | +| `automq.securityProtocol` | Security protocol | `SASL_PLAINTEXT` | +| `automq.saslMechanism` | SASL mechanism | `PLAIN` | +| `benchmark.kafkaHeapOpts` | Kafka heap options | `-Xmx1g -Xms1g` | +| `benchmark.producerConfigs` | Producer configurations | `batch.size=0` | +| `benchmark.consumerConfigs` | Consumer configurations | `fetch.max.wait.ms=1000` | +| `benchmark.topics` | Number of topics | `10` | +| `benchmark.partitionsPerTopic` | Partitions per topic | `128` | +| `benchmark.producersPerTopic` | Producers per topic | `1` | +| `benchmark.groupsPerTopic` | Consumer groups per topic | `1` | +| `benchmark.consumersPerGroup` | Consumers per group | `1` | +| `benchmark.recordSize` | Record size in bytes | `52224` | +| `benchmark.sendRate` | Send rate (messages/sec) | `160` | +| `benchmark.warmupDuration` | Warmup duration in minutes | `3` | +| `benchmark.testDuration` | Test duration in minutes | `3` | +| `resources.requests.cpu` | CPU request | `500m` | +| `resources.requests.memory` | Memory request | `2Gi` | +| `resources.limits.cpu` | CPU limit | `2` | +| `resources.limits.memory` | Memory limit | `4Gi` | + +## Example Custom Values + +```yaml +# custom-values.yaml +benchmark: + topics: 20 + partitionsPerTopic: 256 + recordSize: 1024 + sendRate: 1000 + testDuration: 10 + +resources: + requests: + cpu: "1" + memory: "4Gi" + limits: + cpu: "4" + memory: "8Gi" + +automq: + bootstrapServer: "my-automq-cluster:9092" + username: "my-user" + password: "my-password" +``` + +## Monitoring + +After the job completes, you can check the results by viewing the job logs: + +```bash +kubectl logs job/automq-benchmark +``` + +To check the job status: + +```bash +kubectl get jobs +kubectl describe job automq-benchmark +``` + +## Troubleshooting + +1. **Job fails to start**: Check if the AutoMQ cluster is accessible and credentials are correct. +2. **Pod crashes**: Check resource limits and AutoMQ cluster capacity. +3. **Authentication errors**: Verify username, password, and security settings. + +For more information, check the pod logs: + +```bash +kubectl logs -l app=automq-benchmark +``` \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml new file mode 100644 index 0000000..52c809a --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml @@ -0,0 +1,114 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Values.job.name }} + labels: + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value }} + {{- end }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + completions: {{ .Values.job.completions }} + parallelism: {{ .Values.job.parallelism }} + backoffLimit: {{ .Values.job.backoffLimit }} + template: + metadata: + labels: + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value }} + {{- end }} + spec: + restartPolicy: {{ .Values.job.restartPolicy }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Values.job.name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + {{- if .Values.automq.username }} + - name: USERNAME + value: "{{ .Values.automq.username }}" + - name: PASSWORD + value: "{{ .Values.automq.password }}" + {{- end }} + - name: SECURITY_PROTOCOL + value: "{{ .Values.automq.securityProtocol }}" + - name: KAFKA_HEAP_OPTS + value: "{{ .Values.benchmark.kafkaHeapOpts }}" + - name: SASL_MECHANISM + value: "{{ .Values.automq.saslMechanism }}" + - name: BOOTSTRAP_SERVER + value: "{{ .Values.automq.bootstrapServer }}" + - name: PRODUCER_CONFIGS + value: "{{ .Values.benchmark.producerConfigs }}" + - name: CONSUMER_CONFIGS + value: "{{ .Values.benchmark.consumerConfigs }}" + - name: TOPICS + value: "{{ .Values.benchmark.topics }}" + - name: PARTITIONS_PER_TOPIC + value: "{{ .Values.benchmark.partitionsPerTopic }}" + - name: PRODUCERS_PER_TOPIC + value: "{{ .Values.benchmark.producersPerTopic }}" + - name: GROUPS_PER_TOPIC + value: "{{ .Values.benchmark.groupsPerTopic }}" + - name: CONSUMERS_PER_GROUP + value: "{{ .Values.benchmark.consumersPerGroup }}" + - name: RECORD_SIZE + value: "{{ .Values.benchmark.recordSize }}" + - name: SEND_RATE + value: "{{ .Values.benchmark.sendRate }}" + - name: WARMUP_DURATION + value: "{{ .Values.benchmark.warmupDuration }}" + - name: TEST_DURATION + value: "{{ .Values.benchmark.testDuration }}" + command: ["/bin/sh", "-lc"] + args: + - | + set -eu + + AUTOMQ_BIN="/opt/kafka/kafka/bin/automq-perf-test.sh" + + {{- if .Values.automq.username }} + SASL_CONFIG="/tmp/client-sasl.properties" + cat > "$SASL_CONFIG" < + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + storageClass: "gp2" + + ## Prometheus server data Persistent Volume Binding Mode + ## If defined, volumeBindingMode: + ## If undefined (the default) or set to null, no volumeBindingMode spec is + ## set, choosing the default mode. + ## + # volumeBindingMode: "" + + ## Subdirectory of Prometheus server data Persistent Volume to mount + ## Useful if the volume's root directory is not empty + ## + subPath: "" + + ## Persistent Volume Claim Selector + ## Useful if Persistent Volumes have been provisioned in advance + ## Ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#selector + ## + # selector: + # matchLabels: + # release: "stable" + # matchExpressions: + # - { key: environment, operator: In, values: [ dev ] } + + ## Persistent Volume Name + ## Useful if Persistent Volumes have been provisioned in advance and you want to use a specific one + ## + # volumeName: "" + + emptyDir: + ## Prometheus server emptyDir volume size limit + ## + sizeLimit: "" + + ## Annotations to be added to Prometheus server pods + ## + podAnnotations: {} + # iam.amazonaws.com/role: prometheus + + ## Labels to be added to Prometheus server pods + ## + podLabels: {} + + ## Prometheus AlertManager configuration + ## + alertmanagers: [] + + ## Specify if a Pod Security Policy for node-exporter must be created + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ + ## + podSecurityPolicy: + annotations: {} + ## Specify pod annotations + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl + ## + # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' + # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' + + ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below) + ## + replicaCount: 1 + + ## Number of old history to retain to allow rollback + ## Default Kubernetes value is set to 10 + ## + revisionHistoryLimit: 10 + + ## Annotations to be added to ConfigMap + ## + configMapAnnotations: {} + + ## Annotations to be added to deployment + ## + deploymentAnnotations: {} + + statefulSet: + ## If true, use a statefulset instead of a deployment for pod management. + ## This allows to scale replicas to more than 1 pod + ## + enabled: true + + annotations: { } + labels: { } + podManagementPolicy: OrderedReady + + ## Alertmanager headless service to use for the statefulset + ## + headless: + annotations: { } + labels: { } + servicePort: 80 + ## Enable gRPC port on service to allow auto discovery with thanos-querier + gRPC: + enabled: false + servicePort: 10901 + # nodePort: 10901 + + ## Statefulset's persistent volume claim retention policy + ## pvcDeleteOnStsDelete and pvcDeleteOnStsScale determine whether + ## statefulset's PVCs are deleted (true) or retained (false) on scaling down + ## and deleting statefulset, respectively. Requires 1.27.0+. + ## Ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + ## + pvcDeleteOnStsDelete: false + pvcDeleteOnStsScale: false + + ## Prometheus server resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + requests: + cpu: 500m + memory: 2Gi + # limits: + # cpu: 500m + # memory: 512Mi + # requests: + # cpu: 500m + # memory: 512Mi + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + ## + hostNetwork: false + + # When hostNetwork is enabled, this will set to ClusterFirstWithHostNet automatically + dnsPolicy: ClusterFirst + + # Use hostPort + # hostPort: 9090 + + # Use portName + portName: "" + + ## Vertical Pod Autoscaler config + ## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler + verticalAutoscaler: + ## If true a VPA object will be created for the controller (either StatefulSet or Deployemnt, based on above configs) + enabled: false + # updateMode: "Auto" + # containerPolicies: + # - containerName: 'prometheus-server' + securityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + + ## Security context to be added to server container + ## + containerSecurityContext: {} + + service: + ## If false, no Service will be created for the Prometheus server + ## + enabled: true + + annotations: {} + labels: {} + clusterIP: "" + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips + ## + externalIPs: [] + + loadBalancerIP: "" + loadBalancerSourceRanges: [] + servicePort: 9090 + sessionAffinity: None + type: ClusterIP + + ## Enable gRPC port on service to allow auto discovery with thanos-querier + gRPC: + enabled: false + servicePort: 10901 + # nodePort: 10901 + + ## If using a statefulSet (statefulSet.enabled=true), configure the + ## service to connect to a specific replica to have a consistent view + ## of the data. + statefulsetReplica: + enabled: false + replica: 0 + + ## Additional port to define in the Service + additionalPorts: [] + # additionalPorts: + # - name: authenticated + # port: 8081 + # targetPort: 8081 + + ## Prometheus server pod termination grace period + ## + terminationGracePeriodSeconds: 300 + + ## Prometheus data retention period (default if not specified is 15 days) + ## + retention: "30d" + + ## Prometheus' data retention size. Supported units: B, KB, MB, GB, TB, PB, EB. + ## + retentionSize: "100GB" + +## Prometheus server ConfigMap entries for rule files (allow prometheus labels interpolation) +ruleFiles: {} + +## Prometheus server ConfigMap entries for scrape_config_files +## (allows scrape configs defined in additional files) +## +scrapeConfigFiles: [] + +## Prometheus server ConfigMap entries +## +serverFiles: + ## Alerts configuration + ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ + alerting_rules.yml: {} + # groups: + # - name: Instances + # rules: + # - alert: InstanceDown + # expr: up == 0 + # for: 5m + # labels: + # severity: page + # annotations: + # description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.' + # summary: 'Instance {{ $labels.instance }} down' + ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml + alerts: {} + + ## Records configuration + ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ + recording_rules.yml: {} + ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml + rules: {} + + prometheus.yml: + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + +# adds additional scrape configs to prometheus.yml +# must be a string so you have to add a | after extraScrapeConfigs: +# example adds prometheus-blackbox-exporter scrape config +extraScrapeConfigs: "" + # - job_name: 'prometheus-blackbox-exporter' + # metrics_path: /probe + # params: + # module: [http_2xx] + # static_configs: + # - targets: + # - https://example.com + # relabel_configs: + # - source_labels: [__address__] + # target_label: __param_target + # - source_labels: [__param_target] + # target_label: instance + # - target_label: __address__ +# replacement: prometheus-blackbox-exporter:9115 + +# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager +# useful in H/A prometheus with different external labels but the same alerts +alertRelabelConfigs: {} + # alert_relabel_configs: + # - source_labels: [dc] + # regex: (.+)\d+ +# target_label: dc + +networkPolicy: + ## Enable creation of NetworkPolicy resources. + ## + enabled: false + +# Force namespace of namespaced resources +forceNamespace: "prometheus" + +# Extra manifests to deploy as an array +extraManifests: [] +prometheus-node-exporter: + ## If false, node-exporter will not be installed + ## + enabled: false + +## alertmanager sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager +## +alertmanager: + ## If false, alertmanager will not be installed + ## + enabled: false + +## kube-state-metrics sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics +## +kube-state-metrics: + ## If false, kube-state-metrics sub-chart will not be installed + ## + enabled: false + +## prometheus-pushgateway sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway +## +prometheus-pushgateway: + ## If false, pushgateway will not be installed + ## + enabled: false + diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf b/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf new file mode 100644 index 0000000..9ec7048 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf @@ -0,0 +1,122 @@ +terraform { + required_providers { + automq = { + source = "automq/automq" + } + aws = { + source = "hashicorp/aws" + } + } +} + +data "aws_subnets" "aws_subnets_example" { + provider = aws + filter { + name = "vpc-id" + values = [var.vpc_id] + } + filter { + name = "availability-zone" + values = [var.az] + } +} + + +resource "automq_integration" "prometheus_remote_write_example_1" { + environment_id = var.automq_environment_id + name = "example-1" + type = "prometheusRemoteWrite" + endpoint = "http://prometheus.prometheus:9090/api/v1/write" + deploy_profile = "eks" + + prometheus_remote_write_config = { + auth_type = "noauth" + } +} + +provider "automq" { + automq_byoc_endpoint = var.automq_byoc_endpoint + automq_byoc_access_key_id = var.automq_byoc_access_key_id + automq_byoc_secret_key = var.automq_byoc_secret_key +} + +data "automq_deploy_profile" "test" { + environment_id = var.automq_environment_id + name = "default" +} + +data "automq_data_bucket_profiles" "test" { + environment_id = var.automq_environment_id + profile_name = data.automq_deploy_profile.test.name +} + +resource "automq_kafka_instance" "example" { + environment_id = var.automq_environment_id + name = "automq-example-vm" + description = "example" + version = "1.4.1" + deploy_profile = data.automq_deploy_profile.test.name + + compute_specs = { + reserved_aku = 3 + networks = [ + { + zone = var.az + subnets = [data.aws_subnets.aws_subnets_example.ids[0]] + } + ] + bucket_profiles = [ + { + id = data.automq_data_bucket_profiles.test.data_buckets[0].id + } + ] + } + + features = { + wal_mode = "EBSWAL" + security = { + authentication_methods = ["anonymous"] + transit_encryption_modes = ["plaintext"] + } + instance_configs = { + "auto.create.topics.enable" = "false" + "log.retention.ms" = "3600000" + } + integrations = [ + automq_integration.prometheus_remote_write_example_1.id, + ] + } +} + + +variable "vpc_id" { + type = string +} + +variable "region" { + type = string +} + +variable "az" { + type = string +} + +variable "automq_byoc_endpoint" { + type = string +} + +variable "automq_byoc_access_key_id" { + type = string +} + +variable "automq_byoc_secret_key" { + type = string +} + +variable "automq_environment_id" { + type = string +} + +provider "aws" { + region = var.region +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example b/cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example new file mode 100644 index 0000000..5d8f2b2 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example @@ -0,0 +1,15 @@ +# Fill in your environment-specific values here +# DO NOT COMMIT real secrets to VCS. Keep this file local. + +# AWS networking +vpc_id = "vpc-id" +region = "us-east-1" +az = "us-east-1a" + +# AutoMQ BYOC endpoint and credentials +automq_byoc_endpoint = "http://example.com" +automq_byoc_access_key_id = "access-key" +automq_byoc_secret_key = "secretkey" + +# AutoMQ environment id +automq_environment_id = "automqlab-id" \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf new file mode 100644 index 0000000..3054cc3 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf @@ -0,0 +1,87 @@ +# Data sources to reference existing EKS cluster +data "aws_eks_cluster" "existing" { + name = var.cluster_name +} + +data "aws_eks_cluster_auth" "existing" { + name = var.cluster_name +} + +# Data source to reference existing IAM role +data "aws_iam_role" "existing_node_role" { + name = var.existing_node_role_name +} + +# Create the new node group for load testing +resource "aws_eks_node_group" "benchmark_nodes" { + cluster_name = data.aws_eks_cluster.existing.name + node_group_name = "benchmark-${var.resource_suffix}" + node_role_arn = data.aws_iam_role.existing_node_role.arn + subnet_ids = var.subnet_ids + + # Scaling configuration - at least 1 node with 4c8g + scaling_config { + desired_size = var.desired_size + max_size = var.max_size + min_size = var.min_size + } + + # Update configuration + update_config { + max_unavailable = 1 + } + + # Instance configuration - 4c8g instances + capacity_type = var.capacity_type + instance_types = var.instance_types + ami_type = var.ami_type + disk_size = var.disk_size + + # Labels for the node group + labels = merge( + { + "node.kubernetes.io/node-group" = "benchmark-${var.resource_suffix}" + "infrastructure.eks.amazonaws.com/managed-by" = "terraform" + "node.kubernetes.io/capacity-type" = lower(var.capacity_type) + "workload-type" = "benchmark" + "environment" = var.environment + }, + var.additional_labels + ) + + # Optional taints for dedicated nodes + dynamic "taint" { + for_each = var.enable_dedicated_nodes ? [1] : [] + content { + key = "workload-type" + value = "benchmark" + effect = "NO_SCHEDULE" + } + } + + # Remote access configuration (optional) + dynamic "remote_access" { + for_each = var.enable_remote_access ? [1] : [] + content { + ec2_ssh_key = var.ec2_ssh_key + source_security_group_ids = var.source_security_group_ids + } + } + + # Tags + tags = merge( + { + Name = "benchmark-${var.resource_suffix}" + Environment = var.environment + ManagedBy = "terraform" + Purpose = "benchmark" + }, + var.additional_tags + ) + + # Ensure proper ordering + depends_on = [ + data.aws_eks_cluster.existing, + data.aws_iam_role.existing_node_role + ] +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf new file mode 100644 index 0000000..edf72d7 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf @@ -0,0 +1,77 @@ +# Node group outputs +output "node_group_name" { + description = "Name of the created node group" + value = aws_eks_node_group.benchmark_nodes.node_group_name +} + +output "node_group_arn" { + description = "ARN of the created node group" + value = aws_eks_node_group.benchmark_nodes.arn +} + +output "node_group_status" { + description = "Status of the node group" + value = aws_eks_node_group.benchmark_nodes.status +} + +output "node_group_capacity_type" { + description = "Capacity type of the node group" + value = aws_eks_node_group.benchmark_nodes.capacity_type +} + +output "node_group_instance_types" { + description = "Instance types used by the node group" + value = aws_eks_node_group.benchmark_nodes.instance_types +} + +output "node_group_scaling_config" { + description = "Scaling configuration of the node group" + value = { + desired_size = aws_eks_node_group.benchmark_nodes.scaling_config[0].desired_size + max_size = aws_eks_node_group.benchmark_nodes.scaling_config[0].max_size + min_size = aws_eks_node_group.benchmark_nodes.scaling_config[0].min_size + } +} + +output "node_group_labels" { + description = "Labels applied to the node group" + value = aws_eks_node_group.benchmark_nodes.labels +} + +# Cluster information +output "cluster_name" { + description = "Name of the EKS cluster" + value = data.aws_eks_cluster.existing.name +} + +output "cluster_endpoint" { + description = "Endpoint of the EKS cluster" + value = data.aws_eks_cluster.existing.endpoint +} + +output "cluster_version" { + description = "Version of the EKS cluster" + value = data.aws_eks_cluster.existing.version +} + +# Node selector and tolerations for workload scheduling +output "node_selector_labels" { + description = "Labels to use for node selection in pod specs" + value = { + "node.kubernetes.io/node-group" = aws_eks_node_group.benchmark_nodes.node_group_name + "workload-type" = "benchmark" + "environment" = var.environment + } +} + +output "tolerations" { + description = "Tolerations to use in pod specs if dedicated nodes are enabled" + value = var.enable_dedicated_nodes ? [ + { + key = "workload-type" + operator = "Equal" + value = "benchmark" + effect = "NoSchedule" + } + ] : [] +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf new file mode 100644 index 0000000..281185a --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0.0" + } + } +} + +provider "aws" { + region = var.aws_region +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example new file mode 100644 index 0000000..150c861 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example @@ -0,0 +1,34 @@ +# EKS Cluster Configuration +cluster_name = "cluster-name" +existing_node_role_name = "node-group-role" +aws_region = "us-east-1" +environment = "dev" + +# Subnet Configuration (only us-east-1a for new node group) +subnet_ids = [ + "subnet-id" # Recommend us-east-1a only, use your own vpc id. +] + +# Node Group Configuration +resource_suffix = "observability" +capacity_type = "ON_DEMAND" +instance_types = ["c5.xlarge", "c5a.xlarge", "c5n.xlarge", "m5.xlarge", "m5a.xlarge"] +desired_size = 1 +max_size = 3 +min_size = 1 +ami_type = "AL2023_x86_64_STANDARD" +disk_size = 50 + +# Optional configurations +enable_dedicated_nodes = false +enable_remote_access = false + +# Additional labels and tags +additional_labels = { + "team" = "platform" +} + +additional_tags = { + "Project" = "AutoMQ" + "Team" = "Platform" +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf new file mode 100644 index 0000000..01cdd7e --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf @@ -0,0 +1,110 @@ +# Required variables - must be specified in terraform.tfvars.example +variable "cluster_name" { + description = "Name of the existing EKS cluster" + type = string +} + +variable "existing_node_role_name" { + description = "Name of the existing node group IAM role" + type = string +} + +variable "subnet_ids" { + description = "List of subnet IDs where the node group will be deployed" + type = list(string) +} + +variable "resource_suffix" { + description = "Suffix to append to resource names for uniqueness" + type = string +} + +variable "aws_region" { + description = "AWS region" + type = string +} + +variable "environment" { + description = "Environment name" + type = string +} + +# Node group configuration +variable "capacity_type" { + description = "Type of capacity associated with the EKS Node Group. Valid values: ON_DEMAND, SPOT" + type = string + default = "ON_DEMAND" +} + +variable "instance_types" { + description = "List of instance types for the node group - configured for at least 4c8g" + type = list(string) + default = ["c5.xlarge", "c5a.xlarge", "c5n.xlarge", "m5.xlarge", "m5a.xlarge"] +} + +variable "desired_size" { + description = "Desired number of nodes" + type = number + default = 2 +} + +variable "max_size" { + description = "Maximum number of nodes" + type = number + default = 3 +} + +variable "min_size" { + description = "Minimum number of nodes" + type = number + default = 1 +} + +variable "ami_type" { + description = "Type of Amazon Machine Image (AMI) associated with the EKS Node Group" + type = string + default = "AL2023_x86_64_STANDARD" +} + +variable "disk_size" { + description = "Disk size in GiB for worker nodes" + type = number + default = 50 +} + +# Optional configurations +variable "enable_dedicated_nodes" { + description = "Whether to add taints to make nodes dedicated for load testing" + type = bool + default = false +} + +variable "enable_remote_access" { + description = "Whether to enable remote access to the nodes" + type = bool + default = false +} + +variable "ec2_ssh_key" { + description = "EC2 Key Pair name for SSH access" + type = string + default = null +} + +variable "source_security_group_ids" { + description = "Security group IDs allowed for remote access" + type = list(string) + default = [] +} + +variable "additional_labels" { + description = "Additional labels to apply to the node group" + type = map(string) + default = {} +} + +variable "additional_tags" { + description = "Additional tags to apply to resources" + type = map(string) + default = {} +} \ No newline at end of file From 4b39af4047344aa494d51d8ea84c079777eb3975 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 15 Oct 2025 13:00:00 +0800 Subject: [PATCH 02/20] remove extra comment --- .../eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml index 52c809a..7bb7b79 100644 --- a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml +++ b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml @@ -94,7 +94,6 @@ spec: COMMON_CONFIG_ARG="" {{- end }} - # 执行性能测试 exec "$AUTOMQ_BIN" \ --bootstrap-server "$BOOTSTRAP_SERVER" \ --producer-configs "$PRODUCER_CONFIGS" \ From 33eb3bc327503a7da1128901583d30a2a791f03c Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Thu, 16 Oct 2025 20:43:40 +0800 Subject: [PATCH 03/20] use eks profile --- .../aws/eks-benchmark/terraform/automq/main.tf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf b/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf index 9ec7048..d1dbbd5 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf @@ -42,7 +42,7 @@ provider "automq" { data "automq_deploy_profile" "test" { environment_id = var.automq_environment_id - name = "default" + name = "eks" } data "automq_data_bucket_profiles" "test" { @@ -65,6 +65,9 @@ resource "automq_kafka_instance" "example" { subnets = [data.aws_subnets.aws_subnets_example.ids[0]] } ] + kubernetes_node_groups = [{ + id = "automq-node-group" + }] bucket_profiles = [ { id = data.automq_data_bucket_profiles.test.data_buckets[0].id From 5cc8300e958fcf867732abc986c70d46bd0f0fb3 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Tue, 21 Oct 2025 16:38:01 +0800 Subject: [PATCH 04/20] improve automq eks benchmark docs --- .../aws/eks-benchmark/README.md | 120 ++++++++++++++---- .../aws/eks-benchmark/architecture.png | Bin 0 -> 40213 bytes .../helm-chart/automq-benchmark/README.md | 12 +- 3 files changed, 99 insertions(+), 33 deletions(-) create mode 100644 cloudservice-setup/aws/eks-benchmark/architecture.png diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index c89f941..aa6008b 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -1,6 +1,10 @@ -# AutoMQ EKS Benchmark & Observability +# AutoMQ Quick Setup & Benchmark -This project extends the existing AutoMQ EKS demo by adding comprehensive observability and performance benchmarking capabilities. It enables you to deploy monitoring infrastructure and conduct performance testing on existing AutoMQ clusters running on Amazon EKS, with real-time visualization through observability dashboards. +Deploying a complete AutoMQ cluster on AWS traditionally involves multiple, complex steps, from setting up the control and data planes to manually configuring a separate observability environment and benchmarking tools. + +This project eliminates that complexity. It is designed to provide a seamless, one-click solution using Terraform to automatically provision an entire AutoMQ ecosystem on AWS. + +The primary goal is to empower users to effortlessly spin up a fully operational, observable, and testable AutoMQ cluster, drastically reducing setup time and manual configuration. ## Overview @@ -14,21 +18,7 @@ The project provides: ## Architecture -``` -┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ -│ EKS Cluster │ │ Benchmark Nodes │ │ Observability │ -│ │ │ │ │ │ -│ ┌─────────────┐ │ │ ┌──────────────┐ │ │ ┌─────────────┐ │ -│ │ AutoMQ │ │ │ │ Benchmark │ │ │ │ Prometheus │ │ -│ │ Console │ │ │ │ Workloads │ │ │ │ │ │ -│ └─────────────┘ │ │ └──────┬───────┘ │ │ └─────────────┘ │ -│ │ │ │ │ │ │ -│ ┌─────────────┐ │ │ │ │ │ ┌─────────────┐ │ -│ │ AutoMQ │ │◄────────────┘ │ │ │ Grafana │ │ -│ │ Cluster │ │ │ │ │ │ Dashboards │ │ -│ └─────────────┘ │ │ │ │ └─────────────┘ │ -└─────────────────┘ └──────────────────┘ └─────────────────┘ -``` +![architecture](./architecture.png) ## Prerequisites @@ -92,6 +82,39 @@ This step creates dedicated EKS node groups optimized for running benchmark work # Edit terraform.tfvars with your cluster details ``` + **Required Configuration Parameters**: + + You can override the following variables by creating a `terraform.tfvars` file or by using the `-var` command-line argument: + + - **`cluster_name`** + - **Description**: The name of your existing EKS cluster where benchmark nodes will be deployed. + - **Type**: `string` + - **Required**: Yes + - **How to find**: Use `kubectl config current-context` or check AWS EKS console + + - **`existing_node_role_name`** + - **Description**: The IAM role name used by existing EKS node groups in your cluster. + - **Type**: `string` + - **Required**: Yes + - **How to find**: Check your existing node group's IAM role in AWS EKS console + + - **`aws_region`** + - **Description**: The AWS region where your EKS cluster is located and resources will be deployed. + - **Required**: Yes + - **Type**: `string` + - **Default**: `"us-east-1"` + + - **`environment`** + - **Description**: Environment tag used for resource identification and organization. + - **Required**: Yes + - **Type**: `string` + - **Default**: `"dev"` + + - **`subnet_ids`** + - **Description**: List of subnet IDs where benchmark nodes will be deployed. Recommend using only one subnet in us-east-1a for optimal performance. + - **Type**: `list(string)` + - **Required**: Yes + 2. **Deploy benchmark nodes**: ```bash terraform init @@ -99,13 +122,9 @@ This step creates dedicated EKS node groups optimized for running benchmark work terraform apply ``` -### Step 2: Deploy AutoMQ Instance (Optional) - -This optional step allows you to deploy additional AutoMQ instances if needed for your testing scenario. It uses the AutoMQ BYOC provider to create and configure AutoMQ clusters with integrated monitoring capabilities, including Prometheus remote write endpoints for metrics collection. - -**Expected Result**: A new AutoMQ instance will be deployed and configured with monitoring integration, ready to handle Kafka workloads and export metrics to your observability stack. +### Step 2: Deploy AutoMQ Instance -If you need to deploy additional AutoMQ instances: +Access the AutoMQ control plane obtained in the previous step to create access credentials aksk. In the current version, you also need to create an eks profile for further access to the cluster, which needs to be filled into the terraform variables. Future releases of AutoMQ will allow profile creation through terraform. 1. **Configure AutoMQ deployment**: ```bash @@ -114,6 +133,50 @@ If you need to deploy additional AutoMQ instances: # Edit terraform.tfvars with your AutoMQ BYOC credentials ``` + **Required Configuration Parameters**: + + You can override the following variables by creating a `terraform.tfvars` file or by using the `-var` command-line argument: + + - **`vpc_id`** + - **Description**: The VPC ID where your EKS cluster is deployed and AutoMQ resources will be created. + - **Type**: `string` + - **Required**: Yes + - **How to find**: Check AWS VPC console or use `aws ec2 describe-vpcs` command + + - **`region`** + - **Description**: The AWS region where AutoMQ resources will be deployed. + - **Type**: `string` + - **Default**: `"us-east-1"` + + - **`az`** + - **Description**: The availability zone where AutoMQ resources will be deployed. + - **Type**: `string` + - **Default**: `"us-east-1a"` + + - **`automq_byoc_endpoint`** + - **Description**: The AutoMQ BYOC (Bring Your Own Cloud) endpoint URL for API access. + - **Type**: `string` + - **Required**: Yes + - **How to find**: Obtain from AutoMQ Console after setting up your BYOC environment + + - **`automq_byoc_access_key_id`** + - **Description**: Access key ID for AutoMQ BYOC authentication. + - **Type**: `string` + - **Required**: Yes + - **How to find**: Generate from AutoMQ Console credentials section + + - **`automq_byoc_secret_key`** + - **Description**: Secret access key for AutoMQ BYOC authentication. + - **Type**: `string` + - **Required**: Yes + - **How to find**: Generate from AutoMQ Console credentials section (keep secure) + + - **`automq_environment_id`** + - **Description**: The AutoMQ environment identifier for resource organization. + - **Type**: `string` + - **Required**: Yes + - **How to find**: Available in AutoMQ Console environment settings + 2. **Deploy AutoMQ instance**: ```bash terraform init @@ -143,21 +206,26 @@ This step deploys a comprehensive monitoring solution including Prometheus and G ``` 2. **Access Grafana Dashboard**: + + You can access the Grafana dashboard in this way, and contact the AutoMQ team to obtain the [configuration file](https://www.automq.com/docs/automq/observability/dashboard-configuration) for the observability dashboard. + ```bash - # Get Grafana LoadBalancer URL kubectl get svc -n prometheus prometheus-grafana # Default credentials: - # Username: admin - # Password: AutoMQ@Grafana + ``` ### Step 4: Run Benchmark Tests This step executes performance tests against your AutoMQ cluster using configurable workloads. The benchmark simulates real-world Kafka usage patterns with customizable parameters for throughput, message size, topic configuration, and test duration. The tests generate comprehensive metrics that are automatically collected by your monitoring stack. +For specific configurations of helm values, you can refer to the [README](./helm-chart/automq-benchmark/README.md) in the automq-benchmark folder for further details. + **Expected Result**: Benchmark jobs will run and generate load against the AutoMQ cluster. Performance metrics including throughput, latency, and resource utilization will be collected and visible in Grafana dashboards. You should see data flowing through the system and performance characteristics of your AutoMQ deployment. + + 1. **Configure benchmark parameters**: ```bash cd helm-chart/automq-benchmark diff --git a/cloudservice-setup/aws/eks-benchmark/architecture.png b/cloudservice-setup/aws/eks-benchmark/architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..4a93d9ea8366636eb5a14dbe30942f92fca9b7b9 GIT binary patch literal 40213 zcmd>l_g7PE*DdHlkZPex6+KEX9;8VJ#Y&T2BcSvUdM8mSQHt0Q4IQNSl2AgC1Vlla z^gsxqw}eiBKytU|d%yeLasPrl#{B^Z*?T{2t^GV}%{kX=6QjEvtOBeI3=ACi@7;dP zz;Fr<{Is#00lpE`bi2a9@SNfPZC&$F$94ScHuI5;jguwO(99*HC^$JHfT~rGhD_Fr zN7O*-?@!*q3tSxL&%J(IkNx|-+llTsXU~bn2bi=oDzT_KnY@rtJ^wfRsoCcOx{F~K zo}SV5)m%cqX^XNfH^Wxk3R*QU-wA7TTw`+Ea6ku>a7ar1Xw@3s9~j3sH(F)rDd0o6 zJVp%o^G}RE@cStXFj9se=QtS|7~YBr0oO0Hnt(bUxr( z=&vRNW-ij{##(BND6sa-8)52ATh^j~7tV0e-HstLpi?dc-$23F2B=Uy)>PJmQth>d zH0i%RMY&&)^?()3zAwQ|9zUr@)2&MJ6IUHWKxk-=AO&+v8`4?gKps|vt{7=KW(3#J7D1QFI}!_UFly;Yo0gudXF4#>q}WW6w*fEpRDv$%@}Jf+jfKUN<&{U*}?SNVkhx0Ch3XB8V9Y*80$V)fYK+#Dx) zISZMIn_j~0n?0cV?L_tR4Mm@U0jsIBhp5JAW^KQ$$M0UrUH;R^6V}+a&()(*OP^G5 zUaZ<|F~T{U$g;$=*6-Q|L;LN$KjJM=vvLYpqgw zp!B|adqNg!j62Toho6GR4&pI!v-U}m$pJ@-8{WgMKD9D^r%as^F@zV+Bly~(ge9gJ zE;Ik}PyG4-F^`ZJP1lTDUiYtmGU4PKa`cuo2= z^dL8FtJ}6dqQC2=(p8mzTbTclm(>(vN%9LZa$GsW*Qt8xaXu_IS?;Mi^qEZ{<)v6v z;z_N!=d8b?fZ2f5#=6ZzzSkEvW*vKHrYS-4)B}r7;&3`@fFX5y7k_=3Bezn0C_lwx z-Z(?i!H|eHOjUsbdo1fqrktvB_OvB^C&C&ifTGcYA&uj;SZydb(=~z`)t8Q!9*%Oy zSA@={%yGeO}<5G?2qb$-wrG&nAbRAt;JzbCZYSrd=1hY zurAqSk`c^o{VgKSs$zm*Y(ntZv(*a^J+ zb&1b>%9o? zLM@mAAsvSa%IGzNz7P=71Xz{PpFh+zMY+2Aw}0;uiS#zB3j2BBV;p2K1Qm_SlX$UwV*cFz(WRFs9&;# zs>&HZIG1RUwAIFTULsoo@Xn7`5a#a6!J{9de6(lC5%a>7qzyxP_PDHAJ&ah%zZ3Ln zJK2|6FWUdHPG}=I__i~(ev(Npt!V15oSXk(Ecdz9wO1fy;#q1CYs{({rQ@RU0is$1 zX;I>Urc6iRlT@ac6Z9;ajii^dV9<6K9$xTtYm~4PaV}Xq_~dA>XE7sa)TTWzJxv9Y zsZL$5Wg9JjuIl=WUnO|my8cP~%a;q%6SZ#1TraowOC+Ahd8@^e4XS)q!MM6mOSAB~ z=za{b3UmFbG@;SYRC(s_I=>+;3O>Qnw~f|&v#BB7f1dLl8}XdZkQq4Bb4Vt;=;yG;&M(D@UFY6RvEF)HXL10JplhZgVt0bICHWd1*p6T65Ma9Ju(cE z|7AZ%x4gI=AYTz2)v7ka(H5jT*!5!>p2|JS3*}{P`)%2ZYx7q#>LKLq9rH=({JpLlIld=@6?kyheQN7u^RqPSUg-|7%eKyOqJFG_ zxiMnT3knN*6&NI0_+vHF93_RCH!*BbwOH+u@2}4^jL%u56+L?+B&p4xEi}?Jal}2v z?HIpjF!SBFPilO;nHi7)^uE>_|B-~QoLm6N`*!wcIXbF|%S6aT*dyJBA?Edw;wT1Dtc0_v6B+Tu`(im-QS#nbxTp`L=G_r5U;A*l^b}=1y6c3iBJ!)bMV{v_nsk%HYD&lhuhcALthGL@|#!^yrARS>W8TNq+<8A?Ig zs#jK&lWH6=Hg&Uq?{M@cWiG34Ww@(~KUUvMkIVumwO92&-zNGF&er4kj5R0W9T(d| z3nHKf5r*s2Ez~Ir^U1iw)`zS{8bNTDHkJizKRF~of8xW&tj8#6gb>(f8-4I(Pdj~e zQQmr8ODl35AJ%V8S%U3c(>`fvI~EV=HHLsE_fLG5Q?Qets5SDiJ($tn!XcmQ5yN3G zEqY4yWMV1GR0m(zcoJ+8Esq}MxD=j2o#u)|X>KgxMd_W|ja1Q9{Au7LN?IvE`l&-euM*Rm z)j%=o3-VJ0FK*}3UdS+kwOx!U+>ncEW@}gxj|nYC++qriz0-2HB*qATb>>3{Ize3NS4}`JWRNX$Exex|9LO0uL$`}tsOxrY7~6Cd}Ae&srj|B zY|v5c#|M$UPC~2!aoBhD({s?k&OKTvdRF}El*QQeJmn-4jC%{O~8b}OTzYWH89X2yR@kP8uq9j2qQRgkig zt;AO9iD)A|;P=Xen;y}meS6_co1Y>mW6iZUa|z`QZY%xP(Is9V+v7C_)^6(;J}MJ@ zy}YykYxt5+#}8AV)d2aX1!>ER+*g4Z&2R-Z>9fPdX6+KGv>V92lIhPVPT4p26C868fYU>zv=b>p* zE|cy^T)Z5Omtll!I7ZiAsddlC*1mcDYKSX0OH^Y`%SSdiL6sgx>G6=BD$nCXL-|^k zUHewt`{bYP%1=wOM3d-7AyKXBc)FdGkkqS_9*?D?bHVH5gr2J^nXYjf`vcZgyn%Oh;F|WaG&^+iUtKBS511{^IG^IMph1Q89LM3MuxxAz}y-trzm3k0c zB=(NLcHH**t7Zb6WrJF5ck{Z+UfUS%<`-EGjuujmT+(Z&q4Rd@=uGqs>a&X3ovmLAtU4%ez>P4 zB=x!U(&0gyc4ySd6x-C2F%1E6v{W^holk?bta;H2XsdVHGAXHYl>WVhv%>gunc<6S zlzuvHtlYMxLQ{y-gA0DNJ{?9GHzHQq*KSXRB>(}$)T)R8<@SWYK{c25sz}z&YnfAC z=Q$Z~@{E*CbT*lrelu~u860tuVM_l3Yo+k?oTF zk~mMzxv}D0Age4pzXwNev{EC%PB)s~+8WwSN1Up#uD%lCSdM=0`xI4uk}vRR5C!k; za(a6cXW5eJ`*%&!?i<5Qjfs9MQk6{`v_<>+dJt~M<7>!xvPrw5hy@tWv3+LNvv{$} zT%nAglNklqw`mQp7mVzMOi^IRC3f4bXTolC9VBwWS>sy&YTt}ZxdbLwRXvPzfAR`L zScz&|JDuXWe=}6yX|ohWeKScTzL_GLM4QlZWRE?B!#A?RkF3bXwC%rD+-O$_g+seBQ*RR-Ddb7$`be#mL(@gbbKnrjxUB08VOVLu8*4&@3CEt6! z=%RF=?i}f)Ju&@fulI20_KqyQ5UYvrHn?h89w*tFpTKlMy2PU9x;tBfEo7=vqD<*A z>Ag>CuB|c;{_(S^20t6{G!U`aY>q!3Rx#?4emm{oBMY_TemeZLm^2pbo=L0pC^^rg znFUT5&ADz#r2;u)a%O(sS=+V87fTKJq^D#Znuf=|uQZajQV_rOEqh9G1twO83aJQX zB}f~PxzK{ulx=N#_fTadJCfo_`B>pVIit#v9VM_X%0nrTm5aFitX%(eGR8$GieG;; zRpA)PJL!5NJ-Re>=3~)f&9J*?thH=u#Qk8n%5a%?`(~}q=M?GgSZowoknh(IOb%wI z1Q~(D5M@Gs8ZR}KtH*}~G(ct}NI3vl;hr<_pc;X_w$Z$Cdh9e;$nf{ImP;yIl9ao! z;M_E&2xx_GsIHyrR83GC=2( z&ytPlM+vR4nQN&IYj3oPpeV!4H}D;puT7_K(VpaAC0}fx{X~;ax6251XSPJ(qg=YE zG`Il6PIsA_&q4_jy4mZ+M)1{c-u0yHzu%@jUpM@-(Y!qMWn^<@w&Lv$kmT~nxH!p` zDP8d5v@j*T^<>4+yro!qZg5zhO_j9Ze&4NhKX%xOKRDXNK- za5k;Ehi8wm?31JZIHcOzG<2X7M?*zikviz?onx&jA=WY88J+ibU>F{|Q8qZVHa2rP zB2rZ0{c*r6^=y%gz>!i*#wBZ{ck!fKdHjO6o2rsB( z`I=7O!4QuOsZ))7E%S~D{1%(!c3({f8Ew_PD%a}NN|iYB(445HRG}B9>!;Mf$w|?~ z6@nKVylp*FU6ez}R9l+~{nnEVo#y?%_{(7#dbnFT^`ULOC=6C38>*>t@+_o*6p({c zjn!7-DU~No9VXyj&Ww(MDtVePiH7caNT5fAazC>M|K3AK@;228q2 zev?ThN;3;q{cY>oL$?+eWkd0J%D01c#awy0)v==1SC-r|ccPyJP735Lg@tzq($fX_ z^8~s!*o9^{LK^%R5Z-D42y^o+`Xs?K=0Va0?}rMwy|`;2ZG zX*-(fX!G$;@hn%Rx1yq2tBw&arGi?X)zmny*FbW(`$C?Z1FrLLR9O^U@f++iTvp5$ zW*;EI`k<5iu!t(Cj6mu4erDBPa5R2|g!n0pLS($=EA;{*Pp=qA@w~N2E5@OlsWwE- zvh$WnexZsRRTAGO+~?ks=iG_LMC7<^M4e{-XsN)X5gmb(bf+o4YR75oBO4$q%8}Q% za79Bn)0TWfmo@919hTBUm#WaeK<16h-D7qdP_ zY#RV(-6iw&SZ&jeZc0@ZC(kv9?L=0|Q5;KM6*+%*x~q+3mlWIU-Xw?M@BrAu!!DoZ zU~+_t{c*O=km=PZ) zA5YtsQmhuctpaVrC+x0%mtl{c70_ zbFkAplX15nWpDg{%7lD>YlY9a2wj^7AtyEWe%pqSlVLkH7yX?Kfz-Awd{A1L?zmec zuPuD?X%bddZMf?000gxRP47Gb&uW}Z3Xl9kT^tEeXTm%Na;s}?`wP3MZ4Rb~Y_S=h zKnf6T87Tow0VJp~M@PQLes;Cd$J&)7~c^D07QCs&}V{!WD^2P6Ft_*tVk za#H-2Vx1(8+ue|{+MyFs0n4T2ST(_u=iJ!>fBF`k) z$}6aXOy2ULccRIpe04nW>P@k{;6HtY|1gd1NTN;!xlH?T#W-E`FNVh2NUXEO@!=H+ zvNRHBNQ}v=TmNG6SgphP&uK{aQo-=aHL{G75}ffynJhcvtucfxk^aQ{XmGiY4%Gdr@ZOwS7xRH)I>Qu9$HB83*DO zX8LXfCmJmpMaT%-`t{OeypTl5nra*ek2eu0c7HXU1_-3**T2$UL5_prXj-NUUPT@@ z;t)QhM>#IzAkT>TxF(VB{NYPplO{qsWNr4}O*YM4HvHkv^8Xs?gy`_9Y%$2Wc}I7{ zK=rS1H8lV6@sf6kTUJzXVeU%jgJyf<)cz=P05@yw?88Xt^3Mxd>g6{BSj2@+wImwd z3;rE%Tm)Rx^N2hf{Zui=03$ZQW_>x)_+F=x(L(FW_iH}!p82P;{bu_?5Q(kBf8|}Mh63fra1eDt;1XF2}uCWbirJP+m`E(|B@w+;kosA$5hI5pjTUy@pN{E=Vwnlm!~kt=x2ADc(E`@h&LMz zzXaOm_vBz?NU^44ZKck$z;)PiM$a+4y*u7)U;c>;9=^3F(KMj!^18m(k*QqrI81|q zp}QElUp0x%dHG8DfuMC+jN#LdqY~i+ndV%-U7H%p^C&KcqQ9Gc%*}3?Q+)a3+EvZJ zTF+KZdS9EKCQHSnkPD-{sb~b38bdavWTDA)%=fcOqP6eYG{c?vw7aFUKa8o;-m`r> zv#-d=sI zI(+d{so|A#`<#|o{xrx;3j;;_wM$`*b3wQTH*CYD=f-`;y$oV>a7U5kmq zJP%i}fWSBvKe=Zp_K5$&Sw#mvvM~cg$>3XGez+cwo|rSx%pYFP(*oRvEx|fXdEWzz zb)Thv^-mwJz(Jyl2X44TrIn9AkzkGetsbAj+z=ONs5CYU+$zNW!XE>X7LrG<84?FD zPeLnlOeDCaiK(~e-Kdphq6y^o!6HMLNyK4UM&aNRCQR!^DxqH zqxb^Eij^rJlID76$HZ1??A;VmAyR+4jGrZz4`viNQo5**_l(b2k9f%cmkHW=>5X~+ zhrK6M>3-TzVNR?tm-$1!qfJcms`*Zi&$X?e873^*wp(E_YmfYbh5KqD{;}9iOeU`L zY|?oV{mAwr$9STGR!-qAj^UZ3+Q4FNdx-)lI(x7}-u1NNBb zKlTb7@K~`G!95HdsVo_b{piT{e36?HAl??}qWV`;cWdv0n(ClfiVlym_-UOG)_xWH z02a8rvy+APHqZ;|XpJEp9ZgMcEbX`5h>MD9K^JVCsrTN>2|XVI;1lG69p$EXCn}9; z%-;TrBGWwbze(%idA}#UxUjh42S7^_o^L!qJSeK6w2pw72PQcVdQ!;4u-f5_{f$pe zl{9L;&tL+Tby4(W-i$JY8aU`BfQYRp^m5;&lYSm@PAQ5gGv}i-8{D!>sV)PwmJQv*+NhOPx2NI}r z7CtjsW&U%Oc*m+odE;T?#sQw)C2GtUIR|}(j7J`d2|e~=uWNPbN=K7k_Ev0cM9uUa zLZqh}v+T&NdsOMEG(|Qnai+~~PVRVU1pF)oz1(kwPDlpLiJYv!nR;%rM&T#Va}Exx zW|pMtsn$_iQb4B%CR^w@UC}cm=2LVo%O*xx-R}%~SLxJy8|Ip4j#19ZR3fM%IGXO6 zB)F~OHI+;YP7(~q*s83!slQZG;sibVx5f3)RZ>QD=^rkPJ??l8v0pGdXW7U`dzw-y zY~DMAu2shnN5m^BPMh0pprwjqco4bd;#gy8Th+8?Ku_Nj!=#IGxzmw&F(H%<%KM9; zn*Z0ynr9C+*fbNlJoX;5!kagAyaZ0LvaMeZm|>`Hj-v!6^6Z=y`XS9T3B3y$Z6Nfi zEo+?&H!j_U%~u?Cp%e}dgM7yMMjO{?VJ&FdJO?>W9_%~hLzi%ThV`Y1359ow!kP(- z%p#$eGc8huL0W0nx8sDdMY7_bca$)~AIhq1QJ?o2vCujcdjeGhWPHk+6g`--Y0iF156&J84dF0ioQw_MzLvkC?A}JwMcjxi`B%~i@qXctsBKqR$JhsZ3__ZQKQRN$Lbg1$P03mM<9Z@z! zW$<6XAp0|1BJ2C>6Bp&Dxot_Io@dHMIKk8is=IDBkMMX8y>8ke;&(Xc zs=JRyP##xwedMzb=<@#SAzhc!rZU`of(55fRP9^`zebU15Ffvt@1))WrS}dY;Xq;` z29PE$U7P@K-1Tg-4AEFowHdV0LTTzvQ$gQJ;B7qISwgE2B5Qkk+x^LnORke8CeoAj zkp6<=kDk9RH<8aOT8}&_?aZPF0{7B_w>tT{xI9=-PAs72L zCL)CQgTpBL{zQ|L)8GgT?%9%jh5gwC`KG%fVI3Kvv#*)f*m{0_QJjNY_8u9OqzB_X zpok;Rxc-2HP2#d?pXSkW-b7?9aWkmo%Yn6XsYE*0q>kOH{Rbt%CDMYZRBIHxVrEL&pi@zc%JZ@wqG6<{1XumgDzfIImGpip{l`7n>jnE zCeSWnQRI-UTz@n0~<{jIPMb^AL-y>872k{Y7@_eD-6 zU+gL3M@h39^;|e{6U(I?2=L?qf9RvlANzx?ww60jF)?Q+yimE{f;XRZF^Ud-dn*WO zuFk%0f_!6T?-x5+=V8{|P-v?u8$vr=a#Q-)nW@eWLx*hiA$mtDH_hoiL)tNk$V5nv`)IYa;};G^mr2HhZjluWzhQQx_K!s2yKV5fuobSH-#;q(R< zRwf<>HxvW;VOx`TVFYbE!)L!7op(1x`%FjB547le7I?{}HNZo8Ki}3hhOI&2iz|g$ z`$J`iqY^rvO?s&kzOwcW17ovE0!rQe5u5GIY$6xK0erfK@oy>CS0zq$ZAixv&+lhA zg_f5}4{>rdOQTyWUuFKPu&KQsk$C3&__4Fo-r-VKh+VzU9=#rHQtn=RS!fi{n`IAj zlGPTc>3h2M=FdiT>%D`Qyxx?hNBk&9g#0M?kwz;A)|nUg6OWig(R*y#!GgR|>&mUr zap7rDvtFM1L=c?OuSG^^I(QNcrJ7S+4O@b5*djeGQ0Ym|t?M;CH6FPF+J;s|36c#S znLg5H-BnK5ny9Z!Rj&3U+xUQf&534(%O%Q#1=8P2?oZMzFR2H6^ZH`#hQ_@bXh&ts}M2Y1GTT)5rab@~kZk55R~g|{5*1Le-Yj`r40!b#I` z-c|0Y`09vDwjV;Qy^1zH-UzA9dP$J>)jwP}-0oQpzV+P)QGS6?aJTm-5gchgOt1NJ zj?)X!2R_H0<5VBttAAgDaZ+^nJnSt%a5X^2G^HdN2$p_-G=u?(TvG8#&ngIG{lvQ} z2gnR$%sZp+4u36OA@&xDhA)e08`9(V9uCW=2;CaiiUi%fcSm;^wXrt_q8Rp}z*3&! za?kKhK@9>E-;iz3xqM=$jO0I0nY%>A-;pL}Oh`o&jm=EL+^4KI&|McetZOr&+wR0zH zTjI--a^uPT#b?4X#DJCQ*xV}DA@dIa6+BbbHXE_gGPV-*QL5KI;0Lpk_MZ+5M*6e% zo{kY7*#09V73mRSo>n+H!0poJeF2m>EN=SZan8f2UGL2wWj3|P+W?4W*1l~EtG>+3 zWnxqHL|54v7cUaB)2l+= ze>$0CGARSZtsZ+H1c~RC)tZEHqQ{lm0dz{+iE&2v^9=`n`RSiT?A@A$Ql*gTW21hX zvR7w5W=>Gjjdo~D^rN9%%1&?T|1suO@ugw=h~4GEP)h@(7jCg?vFZYc(QB{Eck1EQ zhzEFR!IXSnk&O+-ei-cg;AGuFDNs;wl0K4n%M$j0isWbNN|Cnup0LXTIH2vx{PzgX z-E@FT_R@Na5AG^@xvfrL2*3+))_YH^itg~if?lE*Q0fl!4aXT80Y(P_Eb~v$!O~cr zwg$Psc8})1oP0tzKIy(Rq0C92xTj6(0cj2itNJR0$3@zn9NQS*hhv*p&-W!Fq92Yt z4pw2hjelG?-oKoZ2Szcfu-2D=*FFC1=tn=3C_>9>tX6HIUtKoy4lCQ}a0@tg4SEZc zs?iGFuRnGnH@PL!sl^~I5{tPoJf0;ewO2w!X?gBW|Kk!)BnA0GP=@2#7~G zT25p(b)q^Cprm&N$BO`x=}QrMEmcd}1097`4$B&>?pej`nBYQ=_(snV7ottW z==yZ}z*kh)dCs+H-&X}6{Z;EOzHL!HDSw>&Z5-&p9O!^Y!un*6&&Y=j29lMa?`t3X zbow4_(1&2sEbvNM>-mI7k?XHfhxC!Cm5-GN9sx@&i>*P^A+=R^RlT)h^lz4%N9*x( zCZs4RSeA|$`xLh>=}5ow+Q&SW6r!Q&2cU=hYRxBorchj5l5qWVv{Q#66!%IH;)C#Y zKRs)^2iPH4t|G?wRop{OiDgAINeSGN%YfG{z$w^I3qYRPgTfYFY$# zTNFm2tX<$#flMF_uL>!vsPkFrsouu!p|Z34lQPmD@W`4~9De!p_z7=|4?X1;Q>jK2 zXIo&m)x$U}z0h8Ds`*;l=y^^^v7Kt9|L><5r;PE&h6@-mfeIDo*k;`u2CD1AfAli2 zkP|>@mFSk!ml;7;gFUhgK-XHPhzm*mRRSZf69SKCUW!m4H0;LFIk%{@;@XOxr18cf z((!Vhj1a9(_8b|9oFw7V{TY6vYET^JPURRkCv2(hI7DS_WP+w<&fC&DezE|fYZ9xB z*6r=j607bkqi&9YU@dL0LrUjDOQv?cPcCv0t3n!fe?L5dvcf8Ad7JvznE32xv;YB5 zs3m@!izP4?N_s$fN9x-_plZiat8RCZm2ggB0YJ~ln%6|})`V1lvdZ*5_}!y;;!h{R zj^9yB+fJB4yY)=<5gVdwbhrCAz(KH7tv+HiXDb&Mp z<-g1&2# z#=%T9Q&*E7!qW)-fYKSUlAjuRZb<&wO1h1V_oUYER6yznB1w%!eN~s>&r*4kJz3@C zqS51^y8K?I;b+I)tj(V9oySLGm99h3P3*&(6=F?Da;rSsoC-cNEo&c>K++afrz4rP z5^+%%f_{M_PBvdInUGwLk5lc#8!M}a>4lFnKFOx>5v!u3rlo)k?Nzy5{Sie+n>x0t zn)yJQ=Yrkgyn*VkyKPxM)D?tR#cMaK&X@hbp+9RAlmmKR1GZScaU>g(=2+oz4DgXF zfr+KdW9jQ5%U;!P{UV}&&Fz(Rv?&D9PbkzCKw#zeufz|?IaaY8O$8~y5#X8yfAF3Y zo{zX_JBUMiokV0KVlDYZjBmB>w&!zyI>3yUWa$DJO94)9q+#3Ro;+OVCQ|CQCmJ}dQw>Y=zu2^0 z&d;(OndAA#LQxHReBhno2VTdA5J-jEy94@6L+FH9?4QGmZeF@iJU-kxSuo zj*DOF53lXlzji+s+uVD$+`!cZAN%qrw4(tfuyn-HN+H{{ z+ewM3EO(7a)b8MWq?^>$yKfRjEs=$p;U!Np_9J!*)BPVPms@{89qE)v;y#`A5+3Pr zkCOWn=F-=UM)(z76Jg%RcH5Mx0OS40RWz8~FvqEFSeakSDWb9L2s;UamBDq5D_1)C zK)c$o63eWEmRA(fCT75v!T~8j99`;RLFmAGb^ls`!2?_+zIr4@CotU)`NgAzJ|H>OF~jnbFfBqD)Gc*|QQeAgXt zm8C<5@R@InqB?gI`H3PC>(ws_*A14uj3BOEplkPIu%2mv$6#XFPQ26rIpLg*Y~d(Z z$(v5wgv?U)2U9}!hOgy$^W#5_1dP=5BXmPK2fcKewXL&Brz)cYMl7tJ8D{vYHUklD z{JwtO+`T0orOxkNZ#-9|@bfks#^b+Ab9Yw0-v2OrN0E2?=4XBS>IRzJRQg01$U_6# zT`-3Nh7R!~43epR;HqEAa(zWkmY3hfJ8n* zz|$#XL@;PFmOPxMUBqkoFjn8N{+V73_U6i&UpRSID^|G@j^Bim8~*^(y`Fv0ILHL= z?tN7AtmpV9Q^!cD6}8nHU_E=j`UIC{Ul4(@eh^Rh)!V1M71NI_H(v%Ee{UzS)q-Ss z5X!?QyVclz>RGj`J$UC&(B<{J&DKcCKg?q$N#uju$GX9C=L3)%V zN?4c`c1g*2G9-Im0w4N20-7;ze7Hl&a+T>i&cH?K6RK@*S`$zcw{~pUK*rK4)o!Sx zq&yCNtse&H6(I^fzP!}) zqQehf4CpyY#$b1UWooKV%Cp)#E5V)0Goo%JGUvsL=@F&ABGe46o=5-+eA$UPo+)!d z*FY^(+0To`mZoA|%LRi7AK8CAD^PpO_TH1FG`N9N=O@YF%vyvzlh`CPYe5)?h{m|~ zb>oZG)uY!1k019!IqeGSHn`oTO=~MZ$&W3WE%~s$EaBjBNRwgkJeM#?s|OFHKF@4` zIhQUcjioe=b|b9FtbH3r>$c~0d{W@?5PmG4rswYXRwV`r>h$Y8IoFAW)UP;){KeCuXu4W=dX#a1*a}QYno?`tP#-r_Bz_WhxEK zz%bEH8_c5x=*6lh{KL`&Zr6We$ubkaOARh^CVWeZGULcH7H2KZ8Gb$YT0HYdt1bxe zlnMXd0+mRUo>)xt3x(UNkKlSX{8A5i@Y}b!3wA?7Pe*b*W=XZ+XP+pq#87jkmY3P8 z_KM9ww<98}ZN>3Eor`wQYjkpVEXHxU@@*yO&9cp9^I}^zKi%a0KHT+>&;D?E1ZFIc z#0i_LzW~nC+;63l6WsWoZM*=x$Wm>nhB^NTy)N4cwZ^)wvq296qK@8YuRX_82P(vF z!Qdw>NW(3iD{IOYhL_3Z;aDYcdG&&SJr^9HCu=Bqv|r%Cd)q?hV=*DVxS&|fKJ`d= z;R6c6Fa_B9Q+Mx^9`XBc;4Mo5@wPRi?KcJB$47f9zO{(W{vYRY{R_p zS;<{AN2fdtb*#Sb%yUBUxtiByGgRcGCCo}ag=_3IP_)y&jl4Df&sI|52v>h!ERMGv z+k`4wPPcdi(2%-@j6XywH;ZT~wuO$#9NpEq(D{+yC#<9`ocvIsF$I(zly#S+g5MHb zcYF%^PO7!(g^nV18K>Oo?vDHyf+BL?W~5@9;3$Dr;Dp z*msYmdroZG+_xZBmGU)^P5x{0TR-i^&`-sUnyWP`x9_nw%QqZL6$hsTyco|}06r&W*`WjXCV4QAOkEsq? z#pR94sl)Ah%RJe^UpC#&-dP)c&M(OAQvdTTH8{7yyiyHWwgN73u?OoKEo@X{S| zF6DX{XntIUKuUgiZ*i<}NJ?F#>PJt+#S-v?^HdY5SFz#`OHXR|%!h`b)GH1AytSTI zM5BrYuGLtF1j7jJ2{8dg+dC-EjEc#b{!0!w65wFds)?l^3UO zQ);Y!ae-P6qJX>Fm!JN&S&4DtY#E~9OsoafPMmFo)=9X9-SlJ}_q>3O@aLgkuY{0q z`3jrb{&*w=CaRaUHCXlqs&b?z9K|-rP-VYz~5WpG1yh4zbd!7 zh+PnMFw}F;lq94%@jES*M4&wxGiq#hr%FX=m zJAw8Ay3H0CJD{J*R*u1P0tvJ1o5KJ4H!yi^K$ckbyZgDu!CT8@KpTRbYZ{XKx3ips zGgg9sx_`4~gl-P2DDnOATG-(ZAsIlJRlNa(c>o6p`&W56u?P{l7J+N(5+r)kV zC*#0H;0n;~gK`o7@CT(EDxQgqyMJSx&^~A3>(+rhQT+hAXFaAKQ|BoY2k%99~8fL+_SRfRDu``E)+gC44~r^T?ml7K(421&;1 z6OS3Q(WYMGHuW+3XVruKU*5gpM<3Q;zS8}N-o=}%Dl-H!t9O0%pDu$pFW~`RJoGWw z=_?Qh0CnsQIG0;9jLC^1teS;8Fob`qP&0SrNu@N|2dRGaXEdcR-6o)yLs{f^oru;& z$=ukSbLSX`&Ft^DhvmN|r2^J@EX5uJVSzlrVIXdutQ)YCuL7LcG*UNZMRm$&?cdKH z)|7e^0_}j-lnk7zaUBl?qZi#a!WvDA!kWkSQv6jv9%x_=Pw6;r9Y%YZc%YJ{NLg~R z`Yy;Azk3o^G77eXalF#Re?F3>x1e15PZ}@tw-u&vsty}$#O_zq48OGWRgK?3T#0Cj z^m%@~E?}{a1BBsWs4S_BfYl3}Nf{k;Dhcm{#khmprY~?B7Vq3U$p0tSrJ}y2Qb(zz zxNPRu$7E9F18PYHQQ;}1pe1K3ZT!at(c2#@+L-rW+;Q0*J@;;j%`MVfxn_KS^H+HgZ;n|I-7$2%T*appG<~=^W>fzUpnI2TGCw>^Xn}?6Mfpny=GcC5$h>vOJt# z8Q9LeEh7D&TQ7oU0C|S|MIzyr%6Rqdtf%IGR6KgBkGP2E!a9+0k7b1Xg>nFp>)(;G zz<)@y!!6bMX~Hxh0Zz~guW#-;`@cv0E>&1uYU?Ij!m!8{KuPxBk6bO|VW)C`AHpm= z2lNg)dMxl2F`+BT&W7EOeQfjDw8JZ#yIB6Gk-MSkG4Zkbh)8*Mhw}fS#p<9!xVlpBCS!2FQ3@hJSncXjV*r(&+2HqYAnVNUHv`3=89Kt=8o^ zK-q^=m5j1zi`DepAIT5{U^z_tnHc>JnPUr9c*x|+Vf$s?` zKnk4fc1>(=Z0y&+tCkC3t3^!2D&};6DTNk7rkm9=Q(}feAjR6t?Xj1IWAp==P8*pd z6tKXVxkM7hl{x?#X-Tf_*dGvhREJvD-vV%g6C+TqtD%PU+2XcVSA%~|?EpBEZ1~?o zQtVp$<1T;7gNuyP%bYA zKzMwT?K%1ydxvrB$GfXrv-oZdvCd;?)&}40xRav~pS8LFe@%37z_-C;sG#iU7Et{g z3^1a>V-6t>sPh-ajS@wFmjm1x03fx(rgHBjOaTZIJ`K{k0_X=ngl;trgqvyoETkPF zgaOm}sg?!SoY9NXAHK42SRhBN0<}SDKl@bY5?{V6Q$Cpr=;!!E7Fh)WMW6o?&3_B3 znwI@aS5t(#I~3h_ShKokWV%{eIRRvez`J7wn2JrEVBYj8P)!I(bwy|MPzC>V|EwW@)pe2gB-9Xga8WY%U*&*ByZ)44YXU3nvoYNQJ+!HH zZ5%RE?;Q)5vi3aJ0}yFbRK34Iczt@(fzk0Dqg$96`Td-0!yr&s3(Uq6a z?oc;@qScK$I_9nY+M0e8uw}VuLtJBhUwQt!^sZ74|BJP^j;gY0+lMjWRuE7rX|X5) zr5mI}O1h=HOBxm20urKhcX#JjknY^TCZxMd*lhS_Bey=!`~JSQe&1T(A1>EoU)RiB z=bSThoW~g>T|ic{_CjpDQBhzo zdSL_-A$BaMBm0E9{-N~vnb|CpV9@f3mPfELCd#I2=EED6ML5` zjQ+%8lY^fU(5w718`;mj%kFfzH9ai{Z}4>}ysVIPBH{ z93ox~Vp>G0B>e7mrub!Nj4EH$1K0|430WJ6()`$wB>w0M!`DfMbIe17Hi!rgu<)@f zip?}a{46y?tXUq})E`8gYa%#q{dD_u^$q8*j0hM6L`Gihf;XC|T5)zuE_FdZ`N=v* zrbZEpPG3}IqoNO`@y)uaG{n&p;R(_rutw6y#Vs(DZxp{ECj5%8-}sgU#O#RoAE=dP zHKz98$CVj`rYx0g(6q=-fV|z;AKuM_ouxE}cWuv<&0=^$MNp=!0^4nJWTMt?uEp{M zY?P6He14ii>my#}&oW~){lt*EZ145oYKv0AvNGfcWw{pc&yPUd|LL^(s}V2d^CrM) z5|Q6h{C^R)D4WlSp?Z!lo9v7aJ~}^kgS=p|S&k%@w_j6Z@;l=;^R2umT{Lm#H58do zMzR$%jn|6CC2G+_0uIWtXs>^Or=7WA#u8#GH}VIKc+2lePi{VOMlsQ?GfdEO&K*fxg?FpLSdrlP1kG|4p7-;a3sB_YBH-;a5iStaFlnpY5=oh^1Ypo85ZibpIx5TA?3Js_!WO)Suo{i5h_;VlpLraPUHnH@M1p! zIR8>;8};NAz1!;EW>c$`mYCDTzQwqfZLB~Y^J|2ES^e@s0G;Z5>UJqufG6A7gdT%*YJfoU*dsn6SIj*Ko*Q6!3H$= z@?m(B?(-`FU@`fFUhd#11lBD9OSbRo4T#wTvJ{oyr#pztK|!w=fP)A?6K(DdATAjo z_^2lUIDtYh3>U2lV%`7UjNgnrdfUUT4lv?N1OMA*Pd^OT&q*c#+FE_|Ymb`vtJsCMIsJlzG~T)LKVB68t2&Raqt zc)Xb+Zr7{H!`!rj8vYi9^x;()xw((0S~lr+_MA=pkJQ;7d)RakxHHYdvRATXraVzG z?LPc%uKazb3^RUm=i+vln0A66h409On3cYklvt0cLj?a3R(SJUAHmVu=GJSif=<6d zI1p+?Mj*dT0L>F#_Fof9)LTmczg`MAy*W8FM##6Z0K>z17uZNB3&h`6O>4rj%`QYA z#3Bg80jn8;iZkAp(a;DcD+QROu6gi-#6@iM+mLVEdBt=zjM zb02!;ja&tEFVVUP=hLr8jp)A1?oql^z4v~b0<3p%u*MqDazv+_JpK&WImzs{wq7}e zOkm*-&Rbt_Mi_3<6O{)ibv+N(7&GzDSQyy#>bV>hm<+mlVj^e9aCFMx=)u!vzU}iHW~roUJ)_^K9Snkp)t-{Zc4MlZCt<>|Oh7D{j9AWE z10YmKmq{m8-fF!@&G978-_{P$N^+8FTom3u*RmMN^EY85urdHhX|7#ntr3f9AMUaJ z^ZOf>rcpZ)Xw&T95t+{O1F*_+>cF7ez?(3vR2BrHZasLb&9oi+MZIDHc)!zYeh(x4 ztN0(`3O_vh4l4c(%~3tUg;N7`8fe0n{^C8x@rE|oI(c*TI8`H1PdDCV0+PoGn)v4e zy%(g4hqQpeNGlUL_Cb|76e@uudU&>el#)EL`L&Y2yGi+H%{iM6@&5wI{B*BO$YJ9q zb==R&asrIrHdGy+N*9ngzmxZ&oz%-Mw+;a`{8Do^NL;bPtl$;LORIo3=gnP-Lj{~^ zR@zKc^qNbdLQkis+P_e@mXfsaOUKa%{!y;cg3mMZM>sQb?T`$=5_q%kl2`dVdPIYWA{F zT?RhG4NSEyet`KcqXFdZl&f?KXdFr4b1#8P_uK1@Dg~`*+SC&~?SFAi2Sku+Qy${a zUPRhZEdmuI{u_`oCo;P*QI1ugN{Yj_N+_Y(-jC`^^A@fe_EBZ?| zz~2@nu5D2KXRp@@#PQbb%(WuC>}$4WqMjQ}#Bn)z+`z2g?Vuk+uvm^hJHLnJG?S$@ zOlLlh%GWpUP0-$MzyXd{wbZ?GUv{~*Ral|!;SnlMbiuNQuNR<%l2C59{bfro^(tFE z!eu4JOe4n-5p)qYY50u|#1<6#(~e}_9^hDf%ffIF{A$vfO{g@ue@dP3CgmLZF70qB zkf~wW;yMGFxh^UI61~uFbtoJCdu8pY1N*F+x#*wu1!rR>L)mob2wuh<;!@2C8cy}w z9w3kw$?C_Di>+B^s!*YW<s~`^J{=Cw|X<;aLT|KZ~$0VQoN~$>|>)vhBgG>Lm&R znePKxE1H9j0w`0*X?z6aT{CH_fMc6YSyNXu{9p}c@>cLM*~irW@uhf|gv5exnMswd zdy9E1+VFgN)5dW{w!+q@WFHoQk?2eb0`}@mD{lzM1#w1bfP|w974dCy4!Q*a_OERP zRM{Zo%npSX%3TSs15AtoT?a*&q8M^xI3{zN&Aaz))bI!yb5q8J&?SfES8dpM0?jE7 zj%YLEW^u^5zw+tK@7MbSz0C(Uu>(E zrv?>2FdLdynKSjyU74^PElU1btT+7(p?f*U^-iP6vRjoWUud%AC{NWaHSbYd6g4L0cOPQJx zbnkiNIeEiU69o5&YA_~F84tvu^yI1ZG)6I4D9xD;mdo7zAdEintjqj%uqpY8Ci47; z`F&J{E2ykYB}q#WWeS_8rkcu)r;8b}*1LPkd-wQts8o;m*EbQAcX8n;CW4?zrre)CQ`M(=6%ZyjqZfISI(6rv^)L^MJ ze=zr*j1VIc#L&KjDzCYMlOpe?nkR7(MzCoN#>~gzuPvf7FwDFW-?Qb^kduXc+%)vb zs9}*De{BSpWkQ9%UIh;bXZi2pu=a_Ro9^TrDj_P}4puF>(1kR`t|(N#QLt6Sju|c* zL&H57QyjD}POMR+6vKj~7_lZjZ0HUQ(i%a-FCMSduWVGH6zi^CE2ur4CfrwIq2rDi zIwd`0EEP(9S9HQ0fDyaQ<_AwYQ14DRKVg+g4Qs~e9(8D8eW$T!g%Dl4p=;uDHjedI zi2(mkLXHz;Ub)Zn&0Ht=AIQBR#)`L8b_$`Ib#LrWLuDOfBcG);fNv-sh{Ht8N%nle)LE<)y$f`ZPAnhu)(nCV=Gx zOy+-=6CVLiZn96B`%QBnKl7G=pb^nt5V3Jdz5xh9-paB1)_9=MZjWjm1fCSuN8(^&eUMY+TD-X!*JyQ8^ zK*CW7!gl8+2-uo?*Nk4zT~iqIO z0pcc9?BSmeRK17ezJ`znfp9bO&{i#fWJ3wE!9fnhS-T#Nnaqu0zqtHR+bG6Hs+ngO zO3O4PqUrCYs@|F4I3Nj5;K5$ZaXVg6gzAm6@LvrJw3XnzWi?v#eJgOUS3n?PxE)2F zMe?oXmCmYTyBtlrn6&PEB4(NW-<_Z+9VQT3j@P?tIMe7?{oc?8#Oth;%vEKc^!!~? z7d>piD|@#Bd674kKzYeXa&AXc-rSioY-h41VzWu|#>CWlH9CMoQPekZbG7bF4ifFl z2_RV$l!id@EG>|rGYKjX;`i=$H}W{If21}s?v7cI<=J4ZG#h*ZV9#?9vejFC=b>0t zu|aDMY7d{;u9^#`e>2e%L^=s*Mo)&B+2%kb>)H~B$FJ>oM%|#0qo@3!#Ox+2dBp@i z`EiN<$t_6zL8;OuoviI#ve~_5!{RG^Fs*16ze%jdI6>2w9in@$1JDOs)^$5#=WpDZ zoZO@73#u9+-$LIdE#~S@p@QKTltCTJ5n8fUF&K&tQIlN@1yBY76%QgI;P=CotO!zZ zJpT@kAA+YuuN*4gFpp9IOGrl+RN{(KzS*M#mDqzUesIHuH>x>74>=-M$$N^Cqlwf!0)B=B=buyxV zcdjL9?}c1)XJc}})iqGKAP$+|mvP^?C4_P-Gj~KGn1XM$p#E^&(v@KN)dx6-VOyvy zM>aqv&fg94_BY_Lf2RAudC$#|ssH^A>WuE>xcg^EsJQ{@zJt@`lV+Qhlpe#2PM`YI zn5yk+bVPCMO3SdyT2>5-oTQF5xw3;|u`-zvQPL=3Fr5aT+r2FOr%s8@DCVj>=HgJrnRL|DgN+o%NcgRSs*rS=;??8r zVnrLv`HATuQ1rre@PLe6oKs2Qg*zoaP7~tLYN~FGDV0=lZL4VIIGAU1dr&Iqp4ZQ} z|D1aGG2Db90MRHe&EvA%mu%S}Rg7i^NJDmL7bqCHc|PI{pNn{^ewSgAf|p?eg);fk zG)GbYmo7AV-Z^7UtS9u7Mwf6n7*;juT+2Ln3$qfpK|WQ!bdg+4RkE7xz!s?{Lu?1g zhFe+eiH}9r>o*6iKOceEf{6C-G2B_YDT}xuB zN!`{=vM-%7V2b=?V)@!nuH+oj-{LVBD*tBuqDzfZXgDK#q&KTDAtu3!{LsD028whZ zgXP>d-81Qliw4|~2WIx_pvikJ>-qx~fh|zPb=P~1%jy^;@o{P8d+T7;VY=oV54D*g z^D`U{5|v8J%mp{t=vi0KYrdC{@>6ZiibQ8ymWhJ&%_TSl_m zDI|5joH-5ypU`325{5O7L!V{&u~~s)(3m4bJzkn)hkjlUn8@T)QjE1+0fl@ztwK)= z7$-mNpa(d*?=hm)J}O6jvY=IFmC@_Rm|k|GeQ7RI!QjwAn~BdGq7&GddxjI6Ldx)8 zwhP$f+2uHgXmpUW#TCMyH$|x=2Zv0GhLBinqr%CZI`0Fnj(`yD^(F&}YsybUn;*Uv zrIOxbV;)A3&%lVm^vAJErO~>6cH2u6mCgdtep;o?@<3p#R zQu(dlQASaqne*l92;0b~yafXM@|VTN8)9w$Wfavk+Kvu-Zn{a8`U)!y=7__D?lByG>zJ1tyvSC&~uBxXiiu6s0fdndK4IScZs!G6fNNu+!YdI1Y z{3;vzJ&DS(0i=`qZsqjps(v=&A-mAj_Mqm%q`;*)Ey`&LdpdJF|9{D(qSP}7VS5MO;dH0@;v5= z{{o>v+tKfgT0^HvgmKgtf161|4GQuMIO;!{+pQ~ILILphHJQ#VbT8Am)a`&GHY&mlOdAG=G}MXPB52x7~Stsr3RV02c~ipV%nL zZ+y)1eT@p(xOjioSg9=VC=gSo>)B8h43I{sgR6k^da|%qZifllNu6(bhlg>_CB*>- zRi}TYn8TCM(PJpLjM>VY4D-Mx7g_+0_zT8Fnur4+i3K5jm+QWqYJQ?16K1(lGy4a8 zy6f?jGyTq5#oevUf+DA{<0f%f76ypY-~Q-tjGw{*t;6Trbo$h%`32R}g~mWWe#RYc znn^`E^e)~l$Reul2FkMwX3OFWHO;F58n?tX1yWERG!*|r4%({O)s@jw6Z;y4cLn4y zMGLVGKv|2sDY*6txU3o>j6R@VMi9y`3aA<)kPn**cqkN~0v_t4`P23lR?Od64Zs?- zivkb-<8d%OeK&qTUgkFw>Rx+g8I)Q^jjRkruCQMH{kGMAbOv&U1Rq{@_MdP6|MPfk ze9=t^S`ARGtahelB0FB zyEFbKD@VuIeYJ8H3$;LUd4pEf_1_--8<2dNkQb6|5l7O=pHIV$?t2)Kw zP4+WILyl-!gui<{2+zIp6jVVV5ub8L(?(6L5Qqs}h>4K)YqEcP7w~4k=TC&Y2>y)p!5PU6N3Du$h#HENunBCq%qbhztWvabrX~>*pLSq#kFd#BZRI)+8sLjBnx{TH0O5S&=U0+I@Y99ZOKz*>pb7F_ zE?)eRyDjMh?WBJ`NO81`j0i0YR7k&Av0%U|+;-Ol87_Au%zuC;UvZ@V_h&W+Pzm9( zF9u7EMLvu^?aPh=EpV?S0i*0f?}-liu3runr1887`L_uww4~hM&!hPi{3c(nqSc_H zIUa#}XLoAo+^qEaEz{6k)?R6FkoF3`)$_yHG7-v4md8>dENw%3rMx#!wU^|uj9 z8*i<$rvar%RWnK;6v8CjlK3B*q9Ax@B_=UvyKyA`aYNE862;l0&TD2OoU#e zy74Fhj@~R%ZqzHrb6$OWTdu{Ueozt1HiaY&@rH6lbqHvN&>d^Rn*U|M>3R#|ICXd)q!; zP4T&Z|3T7K98U4(G4StswV1(vODw0h%%|ru6OY93@S1PpAHtxMo4fl3IR!ZsIdU6r z3FTAD%7|O#YmNt8a`o<GGJS~zEd@a2OM!X+Qj z@g7oUVD~s9+kXScYr6r;`49xqumaFraeT5?h!hnKJo7?XT7gs_P?=`s^DWI_NJwQW z^!}Af=J-k>bkyVki3n4kvq|32UiY45_dfWvK3&@ivc2AYKTG*U)0uJ-4Ct;2Kua&C zhVbvI-u%EZ^`^-?tX_3`j}qGpg*JN{{^$XITK;%tZz`_sQt@^E!}DK^-U{(A6Zz{$ zn!o$c0%Lo!O<*@jXD}b8w+|G0dZ4@%r9MTrfL7k9XR!{?T1; zwo)6t@MHZYEohZ2>#vlOl9Cbh2P8G4(;%&U5fH-L91NUsWFYgtMOO*Sg;(fTmrPKa zEfA~ZG$|3VadV-Psl|>fuMiY5kI`2DxtJZ%2F9_F3)qB2=Id*DihZs0bt1Wp2r9#H;Wq*|!k(fb@Rjg-N{q=qPF%9bZC zs6tP|T|p&-?oKACC5m_c;RbG0C|=7RV`eR=UL=8+HP}q^dn?FhwI6;UwU+P@cr*mA z6R7Y+W2&CNTMAf-ST@U!hCE>Z7jGdub`kVBWCYgb?%8)LK9y?vp3LCedAY>GMSpnk)r^>o+cCd#wdh@q8Ysw#tRkhXUm zSRyuf`V_d1?A9N>0~BR6RO8Fs{4+($R}#3tg&4Q+ES7sivp++HY+SeY=H54!TB~Cd zdiV&Pi-+sltOh*xJsWXMp|0x?*-5KzUHM=tR&MRHG!QZc6mX0kyT)k~)H9){x!VFR z9vClpcsyR?l)KRxNf(&N0@noYZO>OSW}p4pkc$wYOY4%@M$a^ z&*K>-7UMD;OlpQz#TBM6Nm<9dn@u339|_dX#t&lZ@*&ZGKp(Nr_Rts~DB3Xse0 z4$TCrYW-i(qy$r2Up~m%7_!p){tmsIU$=}hs{(c;l^U#Cpk)Ta5a@s_>FkF+c0_=6 zt=ob59w@!9&LwwnO?>$tFQC#6d2*2jy3nb!tet*bstsugp~>?yFOjqdaAXU?#VHbC zDf%?gP%*Q&QYmoeBrI)PQdK9gkU!Ufa?Exvh-0LN^A}Zglz?|by_z=&V{s;RdGR@~ z>R{+8qE8ritSgz>8t=)=ta3`Dxs;M@%g>#Ur0P)aP4vSqHh;6&*=j~rZ1#4@hb3972-`nah@<$qg z+Bp>y*i-Vl5|sjMb{vL6Ce((}59k_KJ_a0u6|ZP92FSoKhuM80*QgIU=;qr!tKB|ulty(F^_#slMEY@a3t8L0C200O~ zvFFH4i21G^?Ty7-*^pMj7?-zlbt^;KU(E5W!E!ChV5K8PX4KrvdBKe-d5$3duGpBC zQSquU_c3f*koZ-5=KT4foWKExOrvCB#op%$#MVq>dr=HLb!*GSCfv!f^I+~pt7L2U zP54?NG(k-jUUSZIGp1HpYTq;5Y6zF$$n(p-G@9d6R33|F2Gj|kn}4t$=-a23UVDc>c~nHik4)Eq`65AVN7|KC6A%dn6%XY>64}^av%l1 z+!&wZjVkhC5-DzzpV9l^CfGQvQo6QqGhc8v`b#+?EZ6Rv*>`GHe=#I7uQ~cWzq}_7 z8~mB*s6P3!D&y5rwEcHBtTl&>FG>f;gAJ3#9|L}QBVpc;D~=EtI_?|g&e0i7b`tE` zMA!Tpa{J#>f#Uk*400UV)g=|y@MRkJ1J_R!a%>h@$WZ@?T7&t9tq%DS6Q~`=ZxFzE z$AXc0c;%H0rJ zH(V6@d_uHr;n|ciA+Dgv9rqEg0c*Vl3haDlvq#7)$u7!%D|vtLmW_iQjL2i3xG4(;h7D6q3A3 zKJM*!`@>=3r*xp8P8n{AFRQvDV%|D58};FbHSlcJ4IUDFb(D^KobN@+i(O&2JmOq1 z(Ru(^avm@`pJ>Q<867st$Q{)_sj9A@fXNnKyYZDa+*0hH(yEtPXHySi>Xmf6X&C2n z7T2|oNKJm9Hq8EN4xv`@I9|U3j(0yah0V1Ly~{E)$JvlhD45Hw%>felIo5n;Zq_f_ zt&P7~LthEstlmg)Tc!2>Opt7P&ZPZ_4XWcP6|r3RPJwGEi4b<0b&yI-S1HE^ll&2< zC3zot=67Ow_tUZ_^xssFP37V?n09EyrnSl zNa8QAX%wG){=}{+#UIZ)EWZ5}TTwAfB%hy#bkQ3_VB)#5h&OMRD?D5pEmh>784+Qb zWA0$tPvOH(RaclbsL8XfakKlTbrpYB!B)lBP6k%Deja^y>^`@Ca#|Qdf@vWjMELs98YhkS!KQgxTo8KkLI+t4b$y%Vo79p2Ai*CY)Dfv(6FF z^VlKY+&+fh(<@J{``%uEMI)T~{pOiScgKJ!r8R!>8ayjBjTW97Sk<{pbDFLDN_(i> zOX=GqIXby;0wnsi}{Zpkgp0xrKsi7@Hy;>0$+}xZg#pm6~pnBx`_7 zVwd%`a9B=nUT@9|hK>YUM22PY>Qamz`H3Z?dkozg@kZuyaBMAo2=iBNYlXMY_ByJR zuY!C+g1IAXdA{R;f=Ss7F7IVnF*V;OdBkm3ulU>l*#O#aHyt5?QbaWcB zx+xc%8Y)85xs)Mc2Z~NWWeqF7+6{Q7rgm2fZ7INp7-Q~I(-l;YP!YKGF7t=`eSKQZ zpX?=Jzm{X*kdhbsL()^)bEvYsfbzkKrbG-9>fa|waqb1{c#ui5KcBj#TQiqV`EbnR z=+rAL<@q6eLIkQBeVtYWWlv{?d}Lz-&bBPocpTo5=5^@S^kBPj>g>aa<*vOHSrYf;kVC87f~$*&Um*=C>s1IfH{q0#wmq6&pIAq7rg zZmr33XRXuZXo4fZ?LOZ}kbfx7Bi)+63wU!-v7!lje(cbDeqn*i+y9z7kHpu7^3!H_ z;Q|Zy(gX}6^V(Z{wAbx@loLF4mU11!Ujy?%PMpj9;DD|#6<$RPWoe@VHux>OufiM* zB437A`cxNx@|1SLn2?m-H_-&^1yMWoYZ@u8>u9Ei6V>>rjSG|F!Z`jtDquXKVN?I7 zEes5Id=fj}OS5+NjgTLVL9%E>f=RG-wwl{@nZxt~^9s z&>tqjcF=|j65TrnEa)q@Rk@xXp7Ef`b5`vsKTj-0O(bZG`3^fEDK5rCWTrD&8O-%awmsoF)vvVweC0V?^$UCTaaH#`t>m(_Sa4T zFrq~#7D9EG&~9^_E*kbogPu_|4LY?x+DELokXP?fqytr3@^-8(C)G8W})ENG5!ZUx`$VGpmu3jz4)ookGVh)VF?%PdZR*Gu&HCyno{XiR{%>hFj6|7_a7n z%`)a7s5wb)(=(Xka2+A&f3p5^#zb}%^IS*_U-#Fvf6J}&+Oc~x=Qgayf^~M&V=ukw z=Wi#W^@%oW``))a4P^N2jwX2V1wedgNy(^5cngRd@C`tmfYrHB|Et%p10_SlMiB`p z4MHQi;F^GHp|Rqlgaxz&&6EZu9+U?6$)iN_f8Suy4bERdx>wQqF36L&WFLABIP#y1 zSI|5cCsWqf2#*+RH6> zJ=AYSG}$0OyZulj+8bHf)OCbwp?8K92!3|HQ~$7A4^mkYLZ|Ang%FSSMjSQa748w_ z!<8Nl!$K02kuqR|JZq8X{VjwKy{l|%VT`jP9&<$xHCmX1%|F*C)%wg{xe&rLj~HPb z_k&fV@ye}GCD|W5U#)^#To%}(CmfTVg6Yf>@&B}gYcC;UfNWFAaFX088DJ;KFp|_9 zEOwA1Z4&=f9AfR7*dQL8Gj*+7l3MN=xJ{5PzD6nfGj*_ZUZE34`~fM8@#I!l*TzPW+!Hv!e_-vVx0nnF;>YR&gXadwuO!KMz8v8=Aa#GJoIAGw--62zG!<&ZHo z%AwO3rVH#yPP17pWBD>)BnM5EsiZ)5HHt1p83dCX1yk795!;=%m(OqiFsIV0L4+8? zCn=*`d8Uq_0k@z1p4FMs*=v0EH|L9r8rg6wO@}j7 zsuuF4;Ex&hVD#B!aepNs=fFlE8Ls>&=*{B*`lL6gm3o!yR$>;JdCna+${JdueXSXj z01}cms^q`&O|h?X>o#`S9cAhIAz9j8G$&hT6i8TD67o3AMRzS#LW{|pa&?G+!g4{{D1^hcX4qY>HD8r@~o>5C=gF!)(; zKYz$H&pOQ>vPmO`kO8hn%T?mW#~1N60**A*Uv~T=V1W>)1Q)$!t=dSzzrv6D8|ZnQ zOFPSCl?&ghykSjtG=f+gxAoz5UBf*a{i^C%(Fxq8#ymS^?o9caFgJdS!@H#xFx+*7 z&3IW;#L@cL^o0A?7fC0x^}Zcb)sg;@QW-dAsbRZohBQp4;$@<^J}lV)7vVNuc5K`; zVKiJ=8!Jm9Igy>o0JF%?UFu&Rsy43O%N|Db)mV+Ij8|hTS2)0e7Go@| zIq;rzNeqd$%EP;O_MU2vG@L-1CtOX&4kpf$|4dX>n_foN=O#jxD#j_d@JVNQ#pGfG zA|c1iGrfK7hZ9dRJSm0lkrDr}L%2M&=7)cTHK`CVJ$l?%$HD%2Qaofca zG=gB4v0Ee_an;B}%4oYxndiVTh$Vhz8wKtLss&M?>D6Z^wd+fA)DXa>#d2&hx85RE zmd1JfUUg&2K$gcUXJsP2qhYFK0h7nxbo^imS6`JB_`X*5zWCqvKj1&#$c4 zo=i%G#NMHfv&?)%>~$LT!`^7B1MX5lnAJbLMrKlK8(*`}ip028UfE{RpJP@2(ve-e z=73o^gzNh)9%rR|r1*mT0M$w-e!u3A1(LId(`^`2THnT=Mwz9BaLHl`j_K&{Mayu1Bjms4v6cc&9 z;miCV=l?D)53Vk8LwMfl)^zU59IQqgqS0THmXWAevkl&B@P0EnChUz=N;X#U=1JN7 z&)1X|<-6OgI=Q2!JpY07=w?C3cNf;WqP|d%+6QSJF3?17FUXIH9>^&U9^_Z@8CusI z^OwG!>F_11$X(W7OmM9re!i=;*W{kanlA;7#SlLjNZo9k>DZ^hqwX%b_87##%&hjZ zw3vo|IDsR#A#zbz99yjPDg~8;dvTm6tn?6bp$(Xggg)h}#l1=0NbCCU=)Fj|f&Hr4 z7pugYwF2lM1*H`|ml9=j_TVC0*3G)8s{#L998pc%I}7s3FVI&lmLoY-e%UWGxy;OO zw?$Z4G|z}fJnbY^>d5 zNQBz@#hch1!~}NbIB>-1cw&t04)#XrQTB4X2Tei~5*>F}jFBf+l87EQjS6!s4D1|N z%3+wU*B--BZ1!LeD=E8y~XvaRhOAXa^sya zhcAcaZU@GthAY+n{PFR%Dxi9o+f;^j`oZH5 zpkAxdVz&UiCpYW(Zh4Rx3q73OarEY5)g9_+;!?tB3T33Fn05C!gYn5HbmOC8_bx1^ z(-7_RIAzkVaMekD3@;v41J8%yWSenbA!A${6o}IZKHMmU)Yh@QwPDzVwGFPz;=6?m zCwRarcOf&Jsk&u zQ(=Lojr&sg)U>e!i{BG<$R7v*jws79bA*~h$XrZGx^!=;y}eQ=yMM{@?um%9GC_3H zlYo}cCXc`Rf*0P@EUhy3DcIVZ!({W7rt^s}@!`lDlb3O3rEgeF7?}f!3Z-iLG6PF{ zbYhiMBFSpt``O;{(8+$`kV@o^V=Ljocg1LC%}rUx4%VDr!`_^;3>#?bhJ#VL<55IO zK=3d;SZ3TL!&HF*0um8Qdlhd+uHN;WOg7+%T~MUy%(hH~$|{O!+h;Wd^uxGy`eB=! zoc(<}feo7i+xHMzwR2S^CueGZ7ko^r`dDPsKnJ7oa>qeNolSQf#5`iyGv57>kjeou_e5oDD^>nUYF?T=MKryyha69ha>lqt8~B*N!BMR@@eGB ziuBl`+m*(us{FvUs>QcgG5>4;A`9E|iaip%s*1@Oru};W&n!oy^uTDcQ&;w$qDXYJiWN63MDc*U#?)xn=kGwIbW8AP2 zn;u*HU#a3==IlCeqREF+4RV>W!}?PN+_? zs!CXe-CVk{e)+eR2q*D!ODh}PCEfK>Y+W26zaXKumjC|dHO5iXQYjg71zJIzSbogp zf3Iv)zwUSL{dk6>d2B{aIg(a63_M&!?8~3m_Hc^puG%m{% z)8Lv1kWieRJ^8Kp|acG&9fe5VOVOYn2Xl=0PY2kzGxBl&A!M0<_^^nl@ z?#E8X+McKCZUxozVHRP`-q=94$|GR&rnOR>THYFt`Yc=35csP-xfVSogObGSv^^u| zNO1o4F3x3BwveyuskaxNDgO|{;n_d@4=Deh%$PFA##NLWaf;e=fjhwB!BR^tBV)QsJ4^0MZ6|DCiz};n9<#57x4l>7H;80DYaNEqh zEyylAdvL=yi|lYs;KzPGj&4AT!mx$lEA@hz&SWTWLGnoj{g$b5w~jtz&9c2iOWGI! z%IU@@UFg%sDs$A3?5kY99w<8;wKNsDv?{%Ft8MpW9ahp|s1QIJ#p#AV(Q&Pxh|9O`@0YFvL`kmJpRUQo^G+>HZJ=htabpC|_(g5G+Avk@gVf8T z)soE-fCR_8)wNmrR){U1{39kGghoR&Q8Gybk$ z>CQW{9^I-&-rY92ZW)eg6MzCE)SV;5Y#&0r)N2HB0kQlpE<#*fV=Y`)>HHA zS1a0icW*r8todoIYZb$q!f-E6W8a~6jmj8LVH~q_iuOpkyA6WVbpfoCi%}T zMsy5S^0X2k5o7zy#4<{U346zLigv2xD|RHG=&vfg%V~}A@=wnY?lp*ENJ;HdY)=L% zwTpeZ>hY)kEDzTXF zS7~(iXcbMA(J`>JVhgkz_sHIJmC9dm66NN4NxsuV22}R<_Jh*4CtQ{R5@Ah*=^0M0 zst!uIBr{*Wg)4Q&b^1zT9;oSP1Gw#u&>C!AE-sOg9xO5H_LA0weWG(-t5s+rQA)L% z+-J#$dvP4;W`vUAkg&G#7yh4?Q$%1Tb=nK${yxpy0mF83o`WKnp?e(GB?JX6QDn6y zg;(0`qx9>mM4oWfnRXH^t}iUbVlhpah4gOYVQ^@QtS!KlooaV;G%YE6z6&EBTfy%= zH}5J+D{<_15o9{gy>aE=eSzcbwVcrX4>+V|Zb#X7lHa}$DVHfXpW5gzO|%La_6fIK z!q21k;IdR_V2o78e$FS=+5PQBYfQbn?z0fSLlz$Iq!Cqz;(4*bwPm~3o;52K`6ITG z-78;QVmiF$EVW^IlllqXXHG1hEmvz_zL$5z`ZlRLZ7vo}Vi8fOWdo8ya2%$Qa@520 z%7$T0l3z?>KT{{fC{0a#&M8pL=w_i#v=@8jY9NYLm@S|v_yr8j-#vqqzuMcjYE$k5Hg64 z0?E$Qe(#^pjVH_$^FA{1SU5THna3Qk{DtoC$wtV77$C<1LRs{G>_33#i!)_Q9ImS< zITv?VSoBWotHjs#e*&BXm)^V@&#PaJ-8|hN@nkDx8p6qImBlC3)0Kc3b+qopmL$q&$5l{IZh5(_+KqXp?VvdUCBBn`6pq z>yR0o!I}kTZX*fa#nF5w{{8pEP}buI3Ws+qgN4peKG8sPE;3HrB+mq?DnPKjS}V7} zbeUmxbrLm##Pntx6pBwq^y*T>B$Q!MvPKkbQ0noZrL4EHoF`miTZlH?SG{2O-c>J3j z6p&YU?Ln_NcG~W5+9)Z;avpC5CXK}+FjIRp>&GldZL^7a3d9DGHS>213MR$(%MOP3 z)JuspZ$=DBpAa%}!6FLdT%y!Vm!CP9J591a|Fs{D1uk5V)aW06O`4xtb10zqA)3=H zqJFJ|+=cfQ15A43Te{M#XqmrSVHesIv7s4j9mo?kFlDvw;R`anp<~r+Eg~aQ57Rs8 z%uBTRa!+FOHCU5`qHYKY(uyWrn3lOCxP`)2b;`(!~+_nl?kpKEvxWG9_-B!2g` z82=OW$|>QP^y*l~-mE=bHVCF*Zgkw3YE?R*FePN*ZvDjN`fdg0Z7STRu3yP`zMAH%+8@H^Qm958WuUtrgB6J7G<5-P721NNI)rxno(nB_w z4ROSSC?b_cCKM7a<1)r}I(P5%*VKIHwZr;j6=ha4LpH|4VYCptPbY194x;}#6)$i~ zLxgTK68#S|s^+28{_Msyd1`SA?fkPyGv`%!V!>_SPEm)U^5IHP(NgR`&9_256EI}I zR&FUd6f;?wWLDU9ve`MpOK>EKjyc zsk8T!H7ZX7J0n8roE%wFb2Z?7;`!+kpo}EI`GyBEHj>l>#8p@J+>=-tqRnTyJ(GsP zl&%3QYT5}_hs=!jqp5ip_Cm<+W@et|QF+d?nH2ZF**?u0J5Bae?Xss^i-4U$Ia$65 z;xuJ)%@=`!16*wMOi}hCEoz(xZD39A%R=I7m7)Pv7Hc-R6#HG=n}b@%;K!WYDX$4v*OT3NE;8qJn$n{kqL-~b zd@^QmOqTtq&lM9y6&_+WH79S$g!oYa;u;|HISp}g-Eg0=m@qIeqnlD<~OwLPR$n?v%x7VMRTASSbTQ zkU7^7U-7|cLEZc6(}38nJ_A{B?iK>r{}y%bD&Z`3X`7E%bjrrre||3D!L=U=qqfhM zd)}^>1D-<#3ST~lGS+OqWNO07pPHF$IppnnAt57SSpK8_sd5(wO4u7iOe2iXO&R`{ zYXB1c{UTQS++ID&HGBGCkd>dz<^0Q(r%!mT!gQG3&U>jw$bLC8J+NXJ{5MQcFx6n2 zz2g33$Dpf>c(2>lf*sDRJGx8UUsGKp8_(X$Z{E{EKo5gg(*dYM4OHPZN)`SAqo(I} z%4X)vIe8qO8MxR+W^Uc0l5Uidmtv68g_3ekO@#8$OVfhyH1Ropn%5LjQ;B+gIgyy_ zR;-eJmD5`IV%qaeRVRjISCt~dOQ5LKavJ6br7{*5apWe52)Q(IMpGEVFs@Tc!j71jcE)97 z$o+bhjd7jxEc>(1U+2H``JBJTjP+jDTJN*o^}f&V`943u8V7Z4uY)yCPJauQ2YQ{7-2r#A*A@kqtMzK9q+gH$!A&xQC!D??cm*!M^Fn z(R;y-y*#TUpT@@oIh5Sj=@o+iGLEKCv#@tnyVhP6gFEWKst{ z-~1Ds>7$cS)@Y)mqk|vg`;0xTao#B;4L>UshSEGjVm+OVC*lme=kEsuf;u=NIH55R zMw%8vn$N!T_ELw_k4;LL?0zh8CVaB&=^nq+&3Sm*0WO|qR9S84HhLdS$IyTF)ibvD zTfV6^C%U1v=z7q!b=Hdy>E$hHRaX)pnOvhj3K?{Y*;cHWkJmY-c+%a#2t_?q#mcI2 zoR5N4N~)z$JP`!h?qbERjD?Z7HZ;*{ErM7URCw8#NNojgyW|_LeP)Y4f`oeeg*%{2 zKv-p%h})r5EK`LDyUoYd`a!>oAa8mLV8vy5v45d?GlIx+m5Gm?!GX3hVmL!#K@~i; zCjFhl<7gv?$sx-Uh@~IKC-5*B<<9+Dk>lW@y~HqgvVsMLf-En~jScSEf4yFgKXyH}hL zkYKY*4#_CA)r_1}5vIbaOE54tI=0^T5~1BadLz#Yi)is=^zj%kleIsm0<+1k8ESdk zaM5LGGV6dZ#7$U-d7UL(V}GqlPj#-Cpnj1NFIvj(xGP@v9iI{!D6eqZEeuRE74}0V z>UG$IFEL3JdG!rpYBbX{=AhMvr_ktw2m&rdR296{Z0SO?o!kmF1-$bqQ_b2rZ!O9P zjO6+d5_7iNczT*XAY1FrlAU0??1q>lc)cuYi@@Ulv@&GO#eEffl%6+G@_adY*@a+zs zwxh^dI;?gLCxRS)cF#}eHv#5l$r`an``?0Jnx>2Q!0Q6%20$8{fWR?faEJH>a3qDK zy;plI|1Q1UsXSu0$Vu?40a zghpLnoO_J&19n8|gFwy|mDVr5#;#3DN}46V7Jb=7kvlaF3iOUXV?Zw9>5u9ZgDwdjc)cg7T%@NI8!LU^{Vj!ut5f#vEnSz)F z{`=EpNM$w%e-fg)0JYgfU-@&l@AGj``r9|5fEFVwr5FHqwhK#!uLC$|yy(p9U{2MB z3@vm-xV{0M;BS#=PQ=ppSccOhE4?wu>^Q$D6rcCHQHEyBuwCPVr|oHQCN*dd#vk2Q zbfWX2%Km_QqU~c+Ps{=d0GHVzev&aVZSJi$es1{X<|R>IoS=^ll)keANFCTGA}iH> zy${kr_|T#k3*kTN-=xzF=7_I1tju0tq$LT&v}Pj&bp za39OnGs$`9D?4M^1>ei}0Q6^f_XYY4WP+G-JoaG3SUN_xo8zCI9&)7X#!$M6AlC*< zEghBhVLQrfHOb?jIt=^)mR?(k#67vyD_?O)*3+C&2RXxu>zS9+0m#VWvwvj0GO}G# z;5D@k(J5CJKYB!btMKa^{{{|WEIZ{=$t2)~5@w%YA1`{h)H;S+JTdr(FwPam)XJz6 zd6ysLUhE{F2#Z!>AJy?4P>5PwoaBEfK=vXG?J%evNwN?QCF|=(Dk_ z>j|V5*?m`@8NmFW!KZd}P&SoK{ew$Kr0^PH1uOti$?zNp){`9Pu; zfV^%ox_W7;t>yZv1f1p_6n4M}23ix9JY>ZE02qaF6Hj&w``4Ov=O$@?_%GwZ7Re0f zC*DtJQLWcbvRcP=A8H1U*(hs9@pfIonwphRl0HLl4Am&q71aSnNq7bYGQP}>B#ztk zI}^Qu$56J^>BTB#B`uRKTfEnOLDt47asMeD^R|_JciT-Zhb&#jtoWV93hy=l*5z~v z&e&fRo6K3(At`ak7UoALbsf4C%n8?_D-cZJ$8fg$<{6@>|KkX)D(Utougy(g=LX!5 zrz45`B3waughWXnOVKYqp&swBZUR^mCE@A}@{EQ9p(kyN_PN!V(en)Ox;Tm^f$pZ^RWKd;8C)jLAzC4ozFT0`H_SoJm<*uE#TVDVo;G3d^!dg^Y zXZ(bkkL@ju7-mH4|GbaU9$AZ5(vwcK`qIzJe$cKh2m(v)Y}@n?-Kba3i=-@16%c>H zy1%U6E^lw@+2+={bf)}DI=7QkZCL&}=v+QIyT)g`s`mKoljGCu`WfR92NnyWI)(*K z^8x?by{D&kVhj?3vPugcXuTkMw^^K6-tw2hWIVbWuKlqNup<+dDRA?7B}y*@zEzcc z6R@BlSACq@M~&c>H}?Z1(X^PFeBMQj-8qa)0iKq46kC^zKm1sF^GeH$+-Rd9{bM8V zHG+6D=k@8f%R4mNawai)y#XXT*5IZ}DGTzH@e>Xy%KO>*^zj{wONEMZ8E4SP;XMVn zoyGAle}9uwGl=^rLLc<^^DZ6O%3Gra`|hAdBXzkPL)2E#p0)H>jQm#rQKEJ`I(gDm zOb6~ymPjIB(Rnv?*U|8U8h4cqW&Zv?1O$%Q)(iy%0+e6|zHulK|I(}wu?$A1o&RlK bt`@Jq`t)LLU$TKII(yq%6L1BGFNXaa2)}l9 literal 0 HcmV?d00001 diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md index ba1c048..560f749 100644 --- a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md @@ -10,24 +10,24 @@ This Helm chart deploys an AutoMQ benchmark job on a Kubernetes cluster. ## Installing the Chart -To install the chart with the release name `my-benchmark`: +To install the chart with the release name `automq-benchmark`: ```bash -helm install my-benchmark ./automq-benchmark +helm install automq-benchmark ./automq-benchmark ``` To install with custom values: ```bash -helm install my-benchmark ./automq-benchmark -f custom-values.yaml +helm install automq-benchmark ./automq-benchmark -f custom-values.yaml ``` ## Uninstalling the Chart -To uninstall/delete the `my-benchmark` deployment: +To uninstall/delete the `automq-benchmark` deployment: ```bash -helm uninstall my-benchmark +helm uninstall automq-benchmark ``` ## Configuration @@ -87,8 +87,6 @@ resources: automq: bootstrapServer: "my-automq-cluster:9092" - username: "my-user" - password: "my-password" ``` ## Monitoring From 754fbe86fbe2d013b5a30cc9267863c5e431bde7 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Tue, 21 Oct 2025 16:44:27 +0800 Subject: [PATCH 05/20] add conclusion for benchmark --- cloudservice-setup/aws/eks-benchmark/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index aa6008b..7779423 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -256,7 +256,7 @@ For specific configurations of helm values, you can refer to the [README](./helm - Navigate to AutoMQ performance dashboards - Observe real-time metrics during the test execution -> **Note**: For comprehensive dashboard configurations and additional monitoring templates, you can contact the AutoMQ team to obtain pre-configured Grafana dashboards that will help you visualize detailed performance metrics and system health indicators. +After completing the above steps, you can see the corresponding metrics on the Grafana dashboard. Adjust the stress test parameters according to the corresponding specifications to further understand the specifications and performance related to AutoMQ. From 5d8c58c824cba644073a9e2c5714330c739274b3 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Tue, 28 Oct 2025 19:15:39 +0800 Subject: [PATCH 06/20] refactor: simplefy and consolidate the TF steps. --- .../Chart.yaml | 0 .../README.md | 0 .../templates/job.yaml | 0 .../values.yaml | 0 .../aws/eks-benchmark/automq/main.tf | 222 +++++++ .../automq/terraform.tfvars.example | 42 ++ .../eks-benchmark/monitoring/prometheus.yaml | 606 ------------------ .../eks-benchmark/terraform/automq/main.tf | 125 ---- .../terraform/automq/terraform.tfvars.example | 15 - .../terraform/benchmark-node/main.tf | 87 --- .../terraform/benchmark-node/outputs.tf | 77 --- .../terraform/benchmark-node/provider.tf | 14 - .../benchmark-node/terraform.tfvars.example | 34 - .../terraform/benchmark-node/variables.tf | 110 ---- .../aws/eks-benchmark/terraform/benchmark.tf | 55 ++ .../aws/eks-benchmark/terraform/eks.tf | 37 ++ .../aws/eks-benchmark/terraform/main.tf | 12 + .../terraform/monitoring/prometheus.yaml | 114 ++++ .../aws/eks-benchmark/terraform/outputs.tf | 139 ++++ .../aws/eks-benchmark/terraform/prometheus.tf | 40 ++ .../aws/eks-benchmark/terraform/providers.tf | 22 + .../aws/eks-benchmark/terraform/variables.tf | 163 +++++ 22 files changed, 846 insertions(+), 1068 deletions(-) rename cloudservice-setup/aws/eks-benchmark/{helm-chart/automq-benchmark => automq-benchmark-chart}/Chart.yaml (100%) rename cloudservice-setup/aws/eks-benchmark/{helm-chart/automq-benchmark => automq-benchmark-chart}/README.md (100%) rename cloudservice-setup/aws/eks-benchmark/{helm-chart/automq-benchmark => automq-benchmark-chart}/templates/job.yaml (100%) rename cloudservice-setup/aws/eks-benchmark/{helm-chart/automq-benchmark => automq-benchmark-chart}/values.yaml (100%) create mode 100644 cloudservice-setup/aws/eks-benchmark/automq/main.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars.example delete mode 100644 cloudservice-setup/aws/eks-benchmark/monitoring/prometheus.yaml delete mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf delete mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example delete mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf delete mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf delete mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf delete mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example delete mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/eks.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/main.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/providers.tf create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/variables.tf diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/Chart.yaml b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/Chart.yaml similarity index 100% rename from cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/Chart.yaml rename to cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/Chart.yaml diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/README.md similarity index 100% rename from cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/README.md rename to cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/README.md diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/templates/job.yaml similarity index 100% rename from cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/templates/job.yaml rename to cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/templates/job.yaml diff --git a/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/values.yaml b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/values.yaml similarity index 100% rename from cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark/values.yaml rename to cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/values.yaml diff --git a/cloudservice-setup/aws/eks-benchmark/automq/main.tf b/cloudservice-setup/aws/eks-benchmark/automq/main.tf new file mode 100644 index 0000000..13e2be9 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/automq/main.tf @@ -0,0 +1,222 @@ +terraform { + required_providers { + automq = { + source = "automq/automq" + } + aws = { + source = "hashicorp/aws" + } + } +} + +data "aws_subnets" "aws_subnets_example" { + provider = aws + filter { + name = "vpc-id" + values = [var.vpc_id] + } + filter { + name = "availability-zone" + values = [var.az] + } +} + + +resource "automq_integration" "prometheus_remote_write_integration" { + environment_id = var.automq_environment_id + name = var.prometheus_integration_name + type = var.prometheus_integration_type + endpoint = var.prometheus_remote_write_endpoint + deploy_profile = var.automq_deploy_profile_name + + prometheus_remote_write_config = { + auth_type = var.prometheus_auth_type + } +} + +provider "automq" { + automq_byoc_endpoint = var.automq_byoc_endpoint + automq_byoc_access_key_id = var.automq_byoc_access_key_id + automq_byoc_secret_key = var.automq_byoc_secret_key +} + +data "automq_deploy_profile" "automq_deploy_profile" { + environment_id = var.automq_environment_id + name = var.automq_deploy_profile_name +} + +data "automq_data_bucket_profiles" "automq_data_bucket_profiles" { + environment_id = var.automq_environment_id + profile_name = data.automq_deploy_profile.automq_deploy_profile.name +} + +resource "automq_kafka_instance" "automq_kafka_instance" { + environment_id = var.automq_environment_id + name = var.kafka_instance_name + description = var.kafka_instance_description + version = var.kafka_version + deploy_profile = data.automq_deploy_profile.automq_deploy_profile.name + + compute_specs = { + reserved_aku = var.kafka_reserved_aku + networks = [ + { + zone = var.az + subnets = [data.aws_subnets.aws_subnets_example.ids[0]] + } + ] + kubernetes_node_groups = [{ + id = var.kubernetes_node_group_id + }] + bucket_profiles = [ + { + id = data.automq_data_bucket_profiles.automq_data_bucket_profiles.data_buckets[0].id + } + ] + } + + features = { + wal_mode = var.kafka_wal_mode + security = { + authentication_methods = var.kafka_authentication_methods + transit_encryption_modes = var.kafka_transit_encryption_modes + } + instance_configs = var.kafka_instance_configs + integrations = [ + automq_integration.prometheus_remote_write_integration.id, + ] + } +} + + +# Prometheus Integration Configuration +variable "prometheus_integration_name" { + description = "Name of the Prometheus remote write integration" + type = string + default = "prometheus-remote-write" +} + +variable "prometheus_integration_type" { + description = "Type of the Prometheus integration" + type = string + default = "prometheusRemoteWrite" +} + +variable "prometheus_remote_write_endpoint" { + description = "Prometheus remote write endpoint URL" + type = string + default = "http://prometheus-prometheus-server.monitoring:9090/api/v1/write" +} + +variable "prometheus_auth_type" { + description = "Authentication type for Prometheus remote write" + type = string + default = "noauth" +} + +# AutoMQ Deploy Profile Configuration +variable "automq_deploy_profile_name" { + description = "Name of the AutoMQ deploy profile" + type = string + default = "eks" +} + +# Kafka Instance Configuration +variable "kafka_instance_name" { + description = "Name of the AutoMQ Kafka instance" + type = string + default = "automq-kafka-benchmark" +} + +variable "kafka_instance_description" { + description = "Description of the AutoMQ Kafka instance" + type = string + default = "AutoMQ Kafka instance for benchmark testing" +} + +variable "kafka_version" { + description = "Version of the AutoMQ Kafka instance" + type = string + default = "1.4.1" +} + +variable "kafka_reserved_aku" { + description = "Reserved AKU (AutoMQ Kafka Units) for the instance" + type = number + default = 3 +} + +variable "kubernetes_node_group_id" { + description = "ID of the Kubernetes node group for AutoMQ deployment" + type = string + default = "automq-node-group" +} + +variable "kafka_wal_mode" { + description = "WAL (Write-Ahead Log) mode for Kafka" + type = string + default = "EBSWAL" +} + +variable "kafka_authentication_methods" { + description = "Authentication methods for Kafka" + type = list(string) + default = ["anonymous"] +} + +variable "kafka_transit_encryption_modes" { + description = "Transit encryption modes for Kafka" + type = list(string) + default = ["plaintext"] +} + +variable "kafka_instance_configs" { + description = "Instance configuration parameters for Kafka" + type = map(string) + default = { + "auto.create.topics.enable" = "false" + "log.retention.ms" = "3600000" + } +} + +# Existing variables +variable "vpc_id" { + description = "VPC ID where AutoMQ resources will be deployed" + type = string +} + +variable "region" { + description = "AWS region for deployment" + type = string +} + +variable "az" { + description = "Availability zone for deployment" + type = string +} + +variable "automq_byoc_endpoint" { + description = "AutoMQ BYOC endpoint URL" + type = string +} + +variable "automq_byoc_access_key_id" { + description = "AutoMQ BYOC access key ID" + type = string + sensitive = true +} + +variable "automq_byoc_secret_key" { + description = "AutoMQ BYOC secret key" + type = string + sensitive = true +} + +variable "automq_environment_id" { + description = "AutoMQ environment ID" + type = string +} + +provider "aws" { + region = var.region +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars.example b/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars.example new file mode 100644 index 0000000..37ad5fd --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars.example @@ -0,0 +1,42 @@ +# Fill in your environment-specific values here +# DO NOT COMMIT real secrets to VCS. Keep this file local. + +# AWS networking +vpc_id = "vpc-id" +region = "us-east-1" +az = "us-east-1a" + +# AutoMQ BYOC endpoint and credentials +automq_byoc_endpoint = "http://example.com" +automq_byoc_access_key_id = "access-key" +automq_byoc_secret_key = "secretkey" + +# AutoMQ environment id +automq_environment_id = "automqlab-id" + +# Prometheus Integration Configuration +prometheus_integration_name = "prometheus-remote-write" +prometheus_integration_type = "prometheusRemoteWrite" +prometheus_remote_write_endpoint = "http://prometheus-prometheus-server.monitoring:9090/api/v1/write" +prometheus_auth_type = "noauth" + +# AutoMQ Deploy Profile Configuration +automq_deploy_profile_name = "eks" + +# Kafka Instance Configuration +kafka_instance_name = "automq-kafka-benchmark" +kafka_instance_description = "AutoMQ Kafka instance for benchmark testing" +kafka_version = "1.4.1" +kafka_reserved_aku = 3 +kubernetes_node_group_id = "automq-node-group" +kafka_wal_mode = "EBSWAL" + +# Kafka Authentication and Encryption +kafka_authentication_methods = ["anonymous"] +kafka_transit_encryption_modes = ["plaintext"] + +# Kafka Instance Configuration Parameters +kafka_instance_configs = { + "auto.create.topics.enable" = "false" + "log.retention.ms" = "3600000" +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/monitoring/prometheus.yaml b/cloudservice-setup/aws/eks-benchmark/monitoring/prometheus.yaml deleted file mode 100644 index 616c1fd..0000000 --- a/cloudservice-setup/aws/eks-benchmark/monitoring/prometheus.yaml +++ /dev/null @@ -1,606 +0,0 @@ -rbac: - create: false - -podSecurityPolicy: - enabled: false - -grafana: - adminPassword: "AutoMQ@Grafana" - persistence: - enabled: false - - # Grafana service configuration for LoadBalancer - service: - enabled: true - type: LoadBalancer - port: 80 - targetPort: 3000 - annotations: - service.beta.kubernetes.io/aws-load-balancer-type: "nlb" - service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" - service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: "true" - labels: {} - portName: service - - # Node selector for Grafana - nodeSelector: - workload-type: benchmark - - -prometheus: - prometheusSpec: - enableRemoteWriteReceiver: true # 启用远程写入接收器 - -server: - ## Prometheus server container name - ## - name: prometheus-server - - ## Prometheus server command - ## - command: [] - - ## prometheus server priorityClassName - ## - priorityClassName: "" - - ## EnableServiceLinks indicates whether information about services should be injected - ## into pod's environment variables, matching the syntax of Docker links. - ## WARNING: the field is unsupported and will be skipped in K8s prior to v1.13.0. - ## - enableServiceLinks: true - - ## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug - ## so that the various internal URLs are still able to access as they are in the default case. - ## (Optional) - prefixURL: "" - - ## External URL which can access prometheus - ## Maybe same with Ingress host name - baseURL: "" - - ## Additional server container environment variables - ## - ## You specify this manually like you would a raw deployment manifest. - ## This means you can bind in environment variables from secrets. - ## - ## e.g. static environment variable: - ## - name: DEMO_GREETING - ## value: "Hello from the environment" - ## - ## e.g. secret environment variable: - ## - name: USERNAME - ## valueFrom: - ## secretKeyRef: - ## name: mysecret - ## key: username - env: [] - defaultFlagsOverride: - - --enable-feature=exemplar-storage,remote-write-receiver,otlp-write-receiver - - --config.file=/etc/config/prometheus.yml - - extraFlags: - - web.enable-lifecycle - - storage.tsdb.wal-compression - - - ## Path to a configuration file on prometheus server container FS - configPath: /etc/config/prometheus.yml - - ### The data directory used by prometheus to set --storage.tsdb.path - ### When empty server.persistentVolume.mountPath is used instead - storagePath: "/prometheus" - - global: - ## How frequently to scrape targets by default - ## - scrape_interval: 1m - ## How long until a scrape request times out - ## - scrape_timeout: 10s - ## How frequently to evaluate rules - ## - evaluation_interval: 1m - ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write - ## - remoteWrite: [] - ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read - ## - remoteRead: [] - - ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb - ## - tsdb: - out_of_order_time_window: 60s - # out_of_order_time_window: 0s - - ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#exemplars - ## Must be enabled via --enable-feature=exemplar-storage - ## - exemplars: {} - # max_exemplars: 100000 - - ## Custom HTTP headers for Liveness/Readiness/Startup Probe - ## - ## Useful for providing HTTP Basic Auth to healthchecks - probeHeaders: [] - # - name: "Authorization" - # value: "Bearer ABCDEabcde12345" - - ## Additional Prometheus server container arguments - ## Set to null for argumentless flags - ## - extraArgs: - web.enable-remote-write-receiver: "" - - ## Additional InitContainers to initialize the pod - ## - extraInitContainers: [] - - ## Additional Prometheus server Volume mounts - ## - extraVolumeMounts: [] - - ## Additional Prometheus server Volumes - ## - extraVolumes: [] - - ## Additional Prometheus server hostPath mounts - ## - extraHostPathMounts: [] - # - name: certs-dir - # mountPath: /etc/kubernetes/certs - # subPath: "" - # hostPath: /etc/kubernetes/certs - # readOnly: true - - extraConfigmapMounts: [] - # - name: certs-configmap - # mountPath: /prometheus - # subPath: "" - # configMap: certs-configmap - # readOnly: true - - ## Additional Prometheus server Secret mounts - # Defines additional mounts with secrets. Secrets must be manually created in the namespace. - extraSecretMounts: [] - # - name: secret-files - # mountPath: /etc/secrets - # subPath: "" - # secretName: prom-secret-files - # readOnly: true - - ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}} - ## Defining configMapOverrideName will cause templates/server-configmap.yaml - ## to NOT generate a ConfigMap resource - ## - configMapOverrideName: "" - - ## Extra labels for Prometheus server ConfigMap (ConfigMap that holds serverFiles) - extraConfigmapLabels: {} - - ## Override the prometheus.server.fullname for all objects related to the Prometheus server - fullnameOverride: "" - - ## Server Deployment Strategy type - strategy: - type: Recreate - - ## hostAliases allows adding entries to /etc/hosts inside the containers - hostAliases: [] - # - ip: "127.0.0.1" - # hostnames: - # - "example.com" - - ## Node tolerations for server scheduling to nodes with taints - ## Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ - ## - tolerations: - # - key: "dedicated" - # operator: "Equal" - # value: "loki" - # effect: "NoSchedule" - - ## Node labels for Prometheus server pod assignment - ## Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ - ## - nodeSelector: - workload-type: benchmark - - ## Pod affinity - ## - affinity: {} - - ## Pod anti-affinity can prevent the scheduler from placing Prometheus server replicas on the same node. - ## The value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. - ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node. - ## The default value "" will disable pod anti-affinity so that no anti-affinity rules will be configured (unless set in `server.affinity`). - ## - podAntiAffinity: "" - - ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity. - ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone - ## - podAntiAffinityTopologyKey: kubernetes.io/hostname - - ## Pod topology spread constraints - ## ref. https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ - topologySpreadConstraints: [] - - ## PodDisruptionBudget settings - ## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ - ## - podDisruptionBudget: - enabled: false - # maxUnavailable: 1 - # minAvailable: 1 - ## unhealthyPodEvictionPolicy is available since 1.27.0 (beta) - ## https://kubernetes.io/docs/tasks/run-application/configure-pdb/#unhealthy-pod-eviction-policy - # unhealthyPodEvictionPolicy: IfHealthyBudget - - ## Use an alternate scheduler, e.g. "stork". - ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ - ## - # schedulerName: - - persistentVolume: - ## If true, Prometheus server will create/use a Persistent Volume Claim - ## If false, use emptyDir - ## - enabled: true - - ## If set it will override the name of the created persistent volume claim - ## generated by the stateful set. - ## - statefulSetNameOverride: "" - - ## Prometheus server data Persistent Volume access modes - ## Must match those of existing PV or dynamic provisioner - ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ - ## - accessModes: - - ReadWriteOnce - - ## Prometheus server data Persistent Volume labels - ## - labels: {} - - ## Prometheus server data Persistent Volume annotations - ## - annotations: {} - - ## Prometheus server data Persistent Volume existing claim name - ## Requires server.persistentVolume.enabled: true - ## If defined, PVC must be created manually before volume will be bound - existingClaim: "" - - ## Prometheus server data Persistent Volume mount root path - ## - mountPath: /data - - ## Prometheus server data Persistent Volume size - ## - size: 30Gi - - ## Prometheus server data Persistent Volume Storage Class - ## If defined, storageClassName: - ## If set to "-", storageClassName: "", which disables dynamic provisioning - ## If undefined (the default) or set to null, no storageClassName spec is - ## set, choosing the default provisioner. (gp2 on AWS, standard on - ## GKE, AWS & OpenStack) - ## - storageClass: "gp2" - - ## Prometheus server data Persistent Volume Binding Mode - ## If defined, volumeBindingMode: - ## If undefined (the default) or set to null, no volumeBindingMode spec is - ## set, choosing the default mode. - ## - # volumeBindingMode: "" - - ## Subdirectory of Prometheus server data Persistent Volume to mount - ## Useful if the volume's root directory is not empty - ## - subPath: "" - - ## Persistent Volume Claim Selector - ## Useful if Persistent Volumes have been provisioned in advance - ## Ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#selector - ## - # selector: - # matchLabels: - # release: "stable" - # matchExpressions: - # - { key: environment, operator: In, values: [ dev ] } - - ## Persistent Volume Name - ## Useful if Persistent Volumes have been provisioned in advance and you want to use a specific one - ## - # volumeName: "" - - emptyDir: - ## Prometheus server emptyDir volume size limit - ## - sizeLimit: "" - - ## Annotations to be added to Prometheus server pods - ## - podAnnotations: {} - # iam.amazonaws.com/role: prometheus - - ## Labels to be added to Prometheus server pods - ## - podLabels: {} - - ## Prometheus AlertManager configuration - ## - alertmanagers: [] - - ## Specify if a Pod Security Policy for node-exporter must be created - ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ - ## - podSecurityPolicy: - annotations: {} - ## Specify pod annotations - ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor - ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp - ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl - ## - # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' - # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' - # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' - - ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below) - ## - replicaCount: 1 - - ## Number of old history to retain to allow rollback - ## Default Kubernetes value is set to 10 - ## - revisionHistoryLimit: 10 - - ## Annotations to be added to ConfigMap - ## - configMapAnnotations: {} - - ## Annotations to be added to deployment - ## - deploymentAnnotations: {} - - statefulSet: - ## If true, use a statefulset instead of a deployment for pod management. - ## This allows to scale replicas to more than 1 pod - ## - enabled: true - - annotations: { } - labels: { } - podManagementPolicy: OrderedReady - - ## Alertmanager headless service to use for the statefulset - ## - headless: - annotations: { } - labels: { } - servicePort: 80 - ## Enable gRPC port on service to allow auto discovery with thanos-querier - gRPC: - enabled: false - servicePort: 10901 - # nodePort: 10901 - - ## Statefulset's persistent volume claim retention policy - ## pvcDeleteOnStsDelete and pvcDeleteOnStsScale determine whether - ## statefulset's PVCs are deleted (true) or retained (false) on scaling down - ## and deleting statefulset, respectively. Requires 1.27.0+. - ## Ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention - ## - pvcDeleteOnStsDelete: false - pvcDeleteOnStsScale: false - - ## Prometheus server resource requests and limits - ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ - ## - resources: - requests: - cpu: 500m - memory: 2Gi - # limits: - # cpu: 500m - # memory: 512Mi - # requests: - # cpu: 500m - # memory: 512Mi - - # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), - # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working - ## - hostNetwork: false - - # When hostNetwork is enabled, this will set to ClusterFirstWithHostNet automatically - dnsPolicy: ClusterFirst - - # Use hostPort - # hostPort: 9090 - - # Use portName - portName: "" - - ## Vertical Pod Autoscaler config - ## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler - verticalAutoscaler: - ## If true a VPA object will be created for the controller (either StatefulSet or Deployemnt, based on above configs) - enabled: false - # updateMode: "Auto" - # containerPolicies: - # - containerName: 'prometheus-server' - securityContext: - runAsUser: 65534 - runAsNonRoot: true - runAsGroup: 65534 - fsGroup: 65534 - - ## Security context to be added to server container - ## - containerSecurityContext: {} - - service: - ## If false, no Service will be created for the Prometheus server - ## - enabled: true - - annotations: {} - labels: {} - clusterIP: "" - - ## List of IP addresses at which the Prometheus server service is available - ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips - ## - externalIPs: [] - - loadBalancerIP: "" - loadBalancerSourceRanges: [] - servicePort: 9090 - sessionAffinity: None - type: ClusterIP - - ## Enable gRPC port on service to allow auto discovery with thanos-querier - gRPC: - enabled: false - servicePort: 10901 - # nodePort: 10901 - - ## If using a statefulSet (statefulSet.enabled=true), configure the - ## service to connect to a specific replica to have a consistent view - ## of the data. - statefulsetReplica: - enabled: false - replica: 0 - - ## Additional port to define in the Service - additionalPorts: [] - # additionalPorts: - # - name: authenticated - # port: 8081 - # targetPort: 8081 - - ## Prometheus server pod termination grace period - ## - terminationGracePeriodSeconds: 300 - - ## Prometheus data retention period (default if not specified is 15 days) - ## - retention: "30d" - - ## Prometheus' data retention size. Supported units: B, KB, MB, GB, TB, PB, EB. - ## - retentionSize: "100GB" - -## Prometheus server ConfigMap entries for rule files (allow prometheus labels interpolation) -ruleFiles: {} - -## Prometheus server ConfigMap entries for scrape_config_files -## (allows scrape configs defined in additional files) -## -scrapeConfigFiles: [] - -## Prometheus server ConfigMap entries -## -serverFiles: - ## Alerts configuration - ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ - alerting_rules.yml: {} - # groups: - # - name: Instances - # rules: - # - alert: InstanceDown - # expr: up == 0 - # for: 5m - # labels: - # severity: page - # annotations: - # description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.' - # summary: 'Instance {{ $labels.instance }} down' - ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml - alerts: {} - - ## Records configuration - ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - recording_rules.yml: {} - ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml - rules: {} - - prometheus.yml: - scrape_configs: - - job_name: prometheus - static_configs: - - targets: - - localhost:9090 - - -# adds additional scrape configs to prometheus.yml -# must be a string so you have to add a | after extraScrapeConfigs: -# example adds prometheus-blackbox-exporter scrape config -extraScrapeConfigs: "" - # - job_name: 'prometheus-blackbox-exporter' - # metrics_path: /probe - # params: - # module: [http_2xx] - # static_configs: - # - targets: - # - https://example.com - # relabel_configs: - # - source_labels: [__address__] - # target_label: __param_target - # - source_labels: [__param_target] - # target_label: instance - # - target_label: __address__ -# replacement: prometheus-blackbox-exporter:9115 - -# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager -# useful in H/A prometheus with different external labels but the same alerts -alertRelabelConfigs: {} - # alert_relabel_configs: - # - source_labels: [dc] - # regex: (.+)\d+ -# target_label: dc - -networkPolicy: - ## Enable creation of NetworkPolicy resources. - ## - enabled: false - -# Force namespace of namespaced resources -forceNamespace: "prometheus" - -# Extra manifests to deploy as an array -extraManifests: [] -prometheus-node-exporter: - ## If false, node-exporter will not be installed - ## - enabled: false - -## alertmanager sub-chart configurable values -## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager -## -alertmanager: - ## If false, alertmanager will not be installed - ## - enabled: false - -## kube-state-metrics sub-chart configurable values -## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics -## -kube-state-metrics: - ## If false, kube-state-metrics sub-chart will not be installed - ## - enabled: false - -## prometheus-pushgateway sub-chart configurable values -## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway -## -prometheus-pushgateway: - ## If false, pushgateway will not be installed - ## - enabled: false - diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf b/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf deleted file mode 100644 index d1dbbd5..0000000 --- a/cloudservice-setup/aws/eks-benchmark/terraform/automq/main.tf +++ /dev/null @@ -1,125 +0,0 @@ -terraform { - required_providers { - automq = { - source = "automq/automq" - } - aws = { - source = "hashicorp/aws" - } - } -} - -data "aws_subnets" "aws_subnets_example" { - provider = aws - filter { - name = "vpc-id" - values = [var.vpc_id] - } - filter { - name = "availability-zone" - values = [var.az] - } -} - - -resource "automq_integration" "prometheus_remote_write_example_1" { - environment_id = var.automq_environment_id - name = "example-1" - type = "prometheusRemoteWrite" - endpoint = "http://prometheus.prometheus:9090/api/v1/write" - deploy_profile = "eks" - - prometheus_remote_write_config = { - auth_type = "noauth" - } -} - -provider "automq" { - automq_byoc_endpoint = var.automq_byoc_endpoint - automq_byoc_access_key_id = var.automq_byoc_access_key_id - automq_byoc_secret_key = var.automq_byoc_secret_key -} - -data "automq_deploy_profile" "test" { - environment_id = var.automq_environment_id - name = "eks" -} - -data "automq_data_bucket_profiles" "test" { - environment_id = var.automq_environment_id - profile_name = data.automq_deploy_profile.test.name -} - -resource "automq_kafka_instance" "example" { - environment_id = var.automq_environment_id - name = "automq-example-vm" - description = "example" - version = "1.4.1" - deploy_profile = data.automq_deploy_profile.test.name - - compute_specs = { - reserved_aku = 3 - networks = [ - { - zone = var.az - subnets = [data.aws_subnets.aws_subnets_example.ids[0]] - } - ] - kubernetes_node_groups = [{ - id = "automq-node-group" - }] - bucket_profiles = [ - { - id = data.automq_data_bucket_profiles.test.data_buckets[0].id - } - ] - } - - features = { - wal_mode = "EBSWAL" - security = { - authentication_methods = ["anonymous"] - transit_encryption_modes = ["plaintext"] - } - instance_configs = { - "auto.create.topics.enable" = "false" - "log.retention.ms" = "3600000" - } - integrations = [ - automq_integration.prometheus_remote_write_example_1.id, - ] - } -} - - -variable "vpc_id" { - type = string -} - -variable "region" { - type = string -} - -variable "az" { - type = string -} - -variable "automq_byoc_endpoint" { - type = string -} - -variable "automq_byoc_access_key_id" { - type = string -} - -variable "automq_byoc_secret_key" { - type = string -} - -variable "automq_environment_id" { - type = string -} - -provider "aws" { - region = var.region -} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example b/cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example deleted file mode 100644 index 5d8f2b2..0000000 --- a/cloudservice-setup/aws/eks-benchmark/terraform/automq/terraform.tfvars.example +++ /dev/null @@ -1,15 +0,0 @@ -# Fill in your environment-specific values here -# DO NOT COMMIT real secrets to VCS. Keep this file local. - -# AWS networking -vpc_id = "vpc-id" -region = "us-east-1" -az = "us-east-1a" - -# AutoMQ BYOC endpoint and credentials -automq_byoc_endpoint = "http://example.com" -automq_byoc_access_key_id = "access-key" -automq_byoc_secret_key = "secretkey" - -# AutoMQ environment id -automq_environment_id = "automqlab-id" \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf deleted file mode 100644 index 3054cc3..0000000 --- a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/main.tf +++ /dev/null @@ -1,87 +0,0 @@ -# Data sources to reference existing EKS cluster -data "aws_eks_cluster" "existing" { - name = var.cluster_name -} - -data "aws_eks_cluster_auth" "existing" { - name = var.cluster_name -} - -# Data source to reference existing IAM role -data "aws_iam_role" "existing_node_role" { - name = var.existing_node_role_name -} - -# Create the new node group for load testing -resource "aws_eks_node_group" "benchmark_nodes" { - cluster_name = data.aws_eks_cluster.existing.name - node_group_name = "benchmark-${var.resource_suffix}" - node_role_arn = data.aws_iam_role.existing_node_role.arn - subnet_ids = var.subnet_ids - - # Scaling configuration - at least 1 node with 4c8g - scaling_config { - desired_size = var.desired_size - max_size = var.max_size - min_size = var.min_size - } - - # Update configuration - update_config { - max_unavailable = 1 - } - - # Instance configuration - 4c8g instances - capacity_type = var.capacity_type - instance_types = var.instance_types - ami_type = var.ami_type - disk_size = var.disk_size - - # Labels for the node group - labels = merge( - { - "node.kubernetes.io/node-group" = "benchmark-${var.resource_suffix}" - "infrastructure.eks.amazonaws.com/managed-by" = "terraform" - "node.kubernetes.io/capacity-type" = lower(var.capacity_type) - "workload-type" = "benchmark" - "environment" = var.environment - }, - var.additional_labels - ) - - # Optional taints for dedicated nodes - dynamic "taint" { - for_each = var.enable_dedicated_nodes ? [1] : [] - content { - key = "workload-type" - value = "benchmark" - effect = "NO_SCHEDULE" - } - } - - # Remote access configuration (optional) - dynamic "remote_access" { - for_each = var.enable_remote_access ? [1] : [] - content { - ec2_ssh_key = var.ec2_ssh_key - source_security_group_ids = var.source_security_group_ids - } - } - - # Tags - tags = merge( - { - Name = "benchmark-${var.resource_suffix}" - Environment = var.environment - ManagedBy = "terraform" - Purpose = "benchmark" - }, - var.additional_tags - ) - - # Ensure proper ordering - depends_on = [ - data.aws_eks_cluster.existing, - data.aws_iam_role.existing_node_role - ] -} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf deleted file mode 100644 index edf72d7..0000000 --- a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/outputs.tf +++ /dev/null @@ -1,77 +0,0 @@ -# Node group outputs -output "node_group_name" { - description = "Name of the created node group" - value = aws_eks_node_group.benchmark_nodes.node_group_name -} - -output "node_group_arn" { - description = "ARN of the created node group" - value = aws_eks_node_group.benchmark_nodes.arn -} - -output "node_group_status" { - description = "Status of the node group" - value = aws_eks_node_group.benchmark_nodes.status -} - -output "node_group_capacity_type" { - description = "Capacity type of the node group" - value = aws_eks_node_group.benchmark_nodes.capacity_type -} - -output "node_group_instance_types" { - description = "Instance types used by the node group" - value = aws_eks_node_group.benchmark_nodes.instance_types -} - -output "node_group_scaling_config" { - description = "Scaling configuration of the node group" - value = { - desired_size = aws_eks_node_group.benchmark_nodes.scaling_config[0].desired_size - max_size = aws_eks_node_group.benchmark_nodes.scaling_config[0].max_size - min_size = aws_eks_node_group.benchmark_nodes.scaling_config[0].min_size - } -} - -output "node_group_labels" { - description = "Labels applied to the node group" - value = aws_eks_node_group.benchmark_nodes.labels -} - -# Cluster information -output "cluster_name" { - description = "Name of the EKS cluster" - value = data.aws_eks_cluster.existing.name -} - -output "cluster_endpoint" { - description = "Endpoint of the EKS cluster" - value = data.aws_eks_cluster.existing.endpoint -} - -output "cluster_version" { - description = "Version of the EKS cluster" - value = data.aws_eks_cluster.existing.version -} - -# Node selector and tolerations for workload scheduling -output "node_selector_labels" { - description = "Labels to use for node selection in pod specs" - value = { - "node.kubernetes.io/node-group" = aws_eks_node_group.benchmark_nodes.node_group_name - "workload-type" = "benchmark" - "environment" = var.environment - } -} - -output "tolerations" { - description = "Tolerations to use in pod specs if dedicated nodes are enabled" - value = var.enable_dedicated_nodes ? [ - { - key = "workload-type" - operator = "Equal" - value = "benchmark" - effect = "NoSchedule" - } - ] : [] -} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf deleted file mode 100644 index 281185a..0000000 --- a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/provider.tf +++ /dev/null @@ -1,14 +0,0 @@ -terraform { - required_version = ">= 1.0" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">= 5.0.0" - } - } -} - -provider "aws" { - region = var.aws_region -} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example deleted file mode 100644 index 150c861..0000000 --- a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/terraform.tfvars.example +++ /dev/null @@ -1,34 +0,0 @@ -# EKS Cluster Configuration -cluster_name = "cluster-name" -existing_node_role_name = "node-group-role" -aws_region = "us-east-1" -environment = "dev" - -# Subnet Configuration (only us-east-1a for new node group) -subnet_ids = [ - "subnet-id" # Recommend us-east-1a only, use your own vpc id. -] - -# Node Group Configuration -resource_suffix = "observability" -capacity_type = "ON_DEMAND" -instance_types = ["c5.xlarge", "c5a.xlarge", "c5n.xlarge", "m5.xlarge", "m5a.xlarge"] -desired_size = 1 -max_size = 3 -min_size = 1 -ami_type = "AL2023_x86_64_STANDARD" -disk_size = 50 - -# Optional configurations -enable_dedicated_nodes = false -enable_remote_access = false - -# Additional labels and tags -additional_labels = { - "team" = "platform" -} - -additional_tags = { - "Project" = "AutoMQ" - "Team" = "Platform" -} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf deleted file mode 100644 index 01cdd7e..0000000 --- a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark-node/variables.tf +++ /dev/null @@ -1,110 +0,0 @@ -# Required variables - must be specified in terraform.tfvars.example -variable "cluster_name" { - description = "Name of the existing EKS cluster" - type = string -} - -variable "existing_node_role_name" { - description = "Name of the existing node group IAM role" - type = string -} - -variable "subnet_ids" { - description = "List of subnet IDs where the node group will be deployed" - type = list(string) -} - -variable "resource_suffix" { - description = "Suffix to append to resource names for uniqueness" - type = string -} - -variable "aws_region" { - description = "AWS region" - type = string -} - -variable "environment" { - description = "Environment name" - type = string -} - -# Node group configuration -variable "capacity_type" { - description = "Type of capacity associated with the EKS Node Group. Valid values: ON_DEMAND, SPOT" - type = string - default = "ON_DEMAND" -} - -variable "instance_types" { - description = "List of instance types for the node group - configured for at least 4c8g" - type = list(string) - default = ["c5.xlarge", "c5a.xlarge", "c5n.xlarge", "m5.xlarge", "m5a.xlarge"] -} - -variable "desired_size" { - description = "Desired number of nodes" - type = number - default = 2 -} - -variable "max_size" { - description = "Maximum number of nodes" - type = number - default = 3 -} - -variable "min_size" { - description = "Minimum number of nodes" - type = number - default = 1 -} - -variable "ami_type" { - description = "Type of Amazon Machine Image (AMI) associated with the EKS Node Group" - type = string - default = "AL2023_x86_64_STANDARD" -} - -variable "disk_size" { - description = "Disk size in GiB for worker nodes" - type = number - default = 50 -} - -# Optional configurations -variable "enable_dedicated_nodes" { - description = "Whether to add taints to make nodes dedicated for load testing" - type = bool - default = false -} - -variable "enable_remote_access" { - description = "Whether to enable remote access to the nodes" - type = bool - default = false -} - -variable "ec2_ssh_key" { - description = "EC2 Key Pair name for SSH access" - type = string - default = null -} - -variable "source_security_group_ids" { - description = "Security group IDs allowed for remote access" - type = list(string) - default = [] -} - -variable "additional_labels" { - description = "Additional labels to apply to the node group" - type = map(string) - default = {} -} - -variable "additional_tags" { - description = "Additional tags to apply to resources" - type = map(string) - default = {} -} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf new file mode 100644 index 0000000..4d5c243 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf @@ -0,0 +1,55 @@ +# Data source to get the existing node group IAM role +data "aws_iam_role" "benchmark_node_group_role" { + count = var.enable_benchmark_nodes ? 1 : 0 + name = var.benchmark_node_role_name +} + +# Benchmark Node Group +resource "aws_eks_node_group" "benchmark_node_group" { + count = var.enable_benchmark_nodes ? 1 : 0 + cluster_name = module.eks-env.cluster_name + node_group_name = "benchmark-node-group-${var.resource_suffix}" + node_role_arn = data.aws_iam_role.benchmark_node_group_role[0].arn + + # Use the same subnet as the default node group (single AZ for cost optimization) + subnet_ids = slice(module.eks-env.private_subnets, 0, 1) + + scaling_config { + desired_size = var.benchmark_desired_size + max_size = var.benchmark_max_size + min_size = var.benchmark_min_size + } + + capacity_type = var.benchmark_capacity_type + instance_types = var.benchmark_instance_types + ami_type = var.benchmark_ami_type + disk_size = var.benchmark_disk_size + + labels = merge( + { + "node-type" = "benchmark" + "workload-type" = "benchmark" + } + ) + + # Add taints for dedicated benchmark nodes if enabled + dynamic "taint" { + for_each = var.benchmark_enable_dedicated_nodes ? [1] : [] + content { + key = "benchmark-node" + value = "true" + effect = "NO_SCHEDULE" + } + } + + tags = merge( + { + Name = "benchmark-node-group-${var.resource_suffix}" + } + ) + + # Ensure that IAM Role permissions are created before and deleted after EKS Node Group handling. + depends_on = [ + module.eks-env + ] +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/eks.tf b/cloudservice-setup/aws/eks-benchmark/terraform/eks.tf new file mode 100644 index 0000000..8445f3d --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/eks.tf @@ -0,0 +1,37 @@ +module "eks-env" { + source = "../../../../kubernetes/aws/terraform" + + region = var.region + resource_suffix = var.resource_suffix + + node_group = var.node_group +} + +resource "aws_vpc_security_group_ingress_rule" "automq_console_ingress_rule" { + description = "Allow inbound traffic from security group of AutoMQ Console" + from_port = 0 + to_port = 65535 + ip_protocol = "tcp" + security_group_id = module.eks-env.eks_cluster_security_group + referenced_security_group_id = module.automq-byoc.automq_byoc_security_group_id + + depends_on = [module.automq-byoc, module.eks-env] +} + +resource "aws_eks_access_entry" "cluster_admins" { + cluster_name = module.eks-env.cluster_name + principal_arn = module.automq-byoc.automq_byoc_console_role_arn + kubernetes_groups = [] + type = "STANDARD" + depends_on = [module.automq-byoc, module.eks-env] +} + +resource "aws_eks_access_policy_association" "cluster_admins" { + cluster_name = module.eks-env.cluster_name + policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy" + principal_arn = module.automq-byoc.automq_byoc_console_role_arn + access_scope { + type = "cluster" + } + depends_on = [module.automq-byoc, module.eks-env] +} diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/main.tf b/cloudservice-setup/aws/eks-benchmark/terraform/main.tf new file mode 100644 index 0000000..dd812e9 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/main.tf @@ -0,0 +1,12 @@ +module "automq-byoc" { + source = "AutoMQ/automq-byoc-environment/aws" + version = "0.3.2" + + cloud_provider_region = var.region + automq_byoc_env_id = var.resource_suffix + + create_new_vpc = false + automq_byoc_vpc_id = module.eks-env.vpc_id + automq_byoc_env_console_public_subnet_id = module.eks-env.public_subnets[0] +} + diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml new file mode 100644 index 0000000..1ccf67a --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml @@ -0,0 +1,114 @@ +rbac: + create: false + +podSecurityPolicy: + enabled: false + +grafana: + adminPassword: "AutoMQ@Grafana" + persistence: + enabled: false + + service: + enabled: true + type: LoadBalancer + port: 80 + targetPort: 3000 + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: "nlb" + service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" + service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: "true" + + nodeSelector: + workload-type: benchmark + +prometheus: + prometheusSpec: + enableRemoteWriteReceiver: true + +server: + name: prometheus-server + + defaultFlagsOverride: + - --enable-feature=exemplar-storage,remote-write-receiver,otlp-write-receiver + - --config.file=/etc/config/prometheus.yml + + extraFlags: + - web.enable-lifecycle + - storage.tsdb.wal-compression + + configPath: /etc/config/prometheus.yml + storagePath: "/prometheus" + + global: + scrape_interval: 1m + scrape_timeout: 10s + evaluation_interval: 1m + + tsdb: + out_of_order_time_window: 60s + + extraArgs: + web.enable-remote-write-receiver: "" + + tolerations: [] + + nodeSelector: + workload-type: benchmark + + persistentVolume: + enabled: true + accessModes: + - ReadWriteOnce + mountPath: /data + size: 30Gi + storageClass: "${STORAGE_CLASS_NAME}" + + replicaCount: 1 + + statefulSet: + enabled: true + podManagementPolicy: OrderedReady + pvcDeleteOnStsDelete: false + pvcDeleteOnStsScale: false + + resources: + requests: + cpu: 500m + memory: 2Gi + + securityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + + service: + enabled: true + servicePort: 9090 + type: ClusterIP + + terminationGracePeriodSeconds: 300 + retention: "30d" + retentionSize: "100GB" + +serverFiles: + prometheus.yml: + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + +prometheus-node-exporter: + enabled: false + +alertmanager: + enabled: false + +kube-state-metrics: + enabled: false + +prometheus-pushgateway: + enabled: false + diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf new file mode 100644 index 0000000..c27bca7 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf @@ -0,0 +1,139 @@ +output "console_endpoint" { + description = "Console endpoint for the AutoMQ BYOC environment" + value = module.automq-byoc.automq_byoc_endpoint +} + +output "initial_username" { + description = "Initial username for the AutoMQ BYOC environment" + value = module.automq-byoc.automq_byoc_initial_username +} + +output "initial_password" { + description = "Initial password for the AutoMQ BYOC environment" + value = module.automq-byoc.automq_byoc_initial_password +} + +output "dns_zone_id" { + description = "Route53 DNS Zone ID for the AutoMQ BYOC environment" + value = module.automq-byoc.automq_byoc_vpc_route53_zone_id +} + +output "cluster_name" { + description = "Name of the EKS cluster" + value = module.eks-env.cluster_name +} + +output "cluster_endpoint" { + description = "Endpoint for EKS control plane" + value = module.eks-env.cluster_endpoint +} + +output "cluster_security_group_id" { + description = "Security group ids attached to the cluster control plane" + value = module.eks-env.cluster_security_group_id +} + +output "cluster_iam_role_name" { + description = "IAM role name associated with EKS cluster" + value = module.eks-env.cluster_iam_role_name +} + +output "cluster_iam_role_arn" { + description = "IAM role ARN associated with EKS cluster" + value = module.eks-env.cluster_iam_role_arn +} + +output "cluster_certificate_authority_data" { + description = "Base64 encoded certificate data required to communicate with the cluster" + value = module.eks-env.cluster_certificate_authority_data +} + +output "cluster_version" { + description = "The Kubernetes version for the EKS cluster" + value = module.eks-env.cluster_version +} + +output "node_groups" { + description = "EKS node groups" + value = module.eks-env.node_groups +} + +output "fargate_profiles" { + description = "EKS Fargate profiles" + value = module.eks-env.fargate_profiles +} + +output "oidc_provider_arn" { + description = "The ARN of the OIDC Provider if enabled" + value = module.eks-env.oidc_provider_arn +} + +# Benchmark Node Group Outputs +output "benchmark_node_group_name" { + description = "Name of the benchmark node group" + value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].node_group_name : null +} + +output "benchmark_node_group_arn" { + description = "ARN of the benchmark node group" + value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].arn : null +} + +output "benchmark_node_group_status" { + description = "Status of the benchmark node group" + value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].status : null +} + +output "benchmark_node_group_capacity_type" { + description = "Type of capacity associated with the benchmark EKS Node Group" + value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].capacity_type : null +} + +output "benchmark_node_group_instance_types" { + description = "Set of instance types associated with the benchmark EKS Node Group" + value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].instance_types : null +} + +output "benchmark_node_group_scaling_config" { + description = "Configuration block with scaling settings for the benchmark node group" + value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].scaling_config : null +} + +output "benchmark_node_group_labels" { + description = "Key-value map of Kubernetes labels applied to the benchmark nodes" + value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].labels : null +} + +output "benchmark_tolerations" { + description = "Tolerations to use when scheduling workloads on dedicated benchmark nodes" + value = var.enable_benchmark_nodes && var.benchmark_enable_dedicated_nodes ? [ + { + key = "benchmark-node" + operator = "Equal" + value = "true" + effect = "NoSchedule" + } + ] : null +} + +output "node_group_instance_profile_arn" { + description = "ARN of the EKS Node Group" + value = module.eks-env.node_group_instance_profile_arn +} + +# Prometheus Outputs +output "prometheus_namespace" { + description = "Kubernetes namespace where Prometheus is deployed" + value = var.enable_prometheus ? helm_release.prometheus[0].namespace : null +} + +output "prometheus_release_name" { + description = "Helm release name for Prometheus" + value = var.enable_prometheus ? helm_release.prometheus[0].name : null +} + +output "prometheus_chart_version" { + description = "Version of the Prometheus Helm chart deployed" + value = var.enable_prometheus ? helm_release.prometheus[0].version : null +} + diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf new file mode 100644 index 0000000..955ca99 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf @@ -0,0 +1,40 @@ + +resource "kubernetes_namespace_v1" "monitoring" { + metadata { + name = "monitoring" + + labels = { + "app" = "automq" + "managed-by" = "terraform" + "purpose" = "monitoring" + } + + annotations = { + "description" = "Namespace for Monitoring" + } + } + + depends_on = [module.eks-env] +} + +resource "helm_release" "prometheus" { + chart = "kube-prometheus-stack" + repository = "https://prometheus-community.github.io/helm-charts" + name = "prometheus" + namespace = kubernetes_namespace_v1.monitoring.metadata[0].name + version = "45.7.1" + + timeout = 600 # 不加会超时 + wait = true + + create_namespace = true + + values = [ + templatefile("${path.module}/monitoring/prometheus.yaml") + ] + + depends_on = [ + module.eks-env, + kubernetes_namespace_v1.monitoring + ] +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf b/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf new file mode 100644 index 0000000..b46c6a6 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf @@ -0,0 +1,22 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + helm = { + source = "hashicorp/helm" + version = "~> 2.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + +# Configure AWS Provider +provider "aws" { + region = var.region +} + diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf new file mode 100644 index 0000000..1ec8ef5 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf @@ -0,0 +1,163 @@ + +variable "region" { + description = "AWS region" + type = string + default = "us-east-1" +} + +variable "resource_suffix" { + description = "Suffix for resource names" + type = string + default = "automqlab-beihai-toge-test" +} + +variable "node_group" { + description = "Configuration for EKS node group" + type = object({ + name = string + ami_type = string + instance_type = string + desired_size = number + max_size = number + min_size = number + }) + default = { + name = "automq-node-group" + desired_size = 4 # Desired number of nodes + max_size = 10 # Maximum number of nodes + min_size = 3 # Minimum number of nodes + instance_type = "c6g.2xlarge" # Compute-optimized instance with AWS Graviton2 processor + ami_type = "AL2_ARM_64" # Amazon Linux 2 AMI type, can use AL2_ARM_64 for ARM architecture + } +} + +# Benchmark Node Group Configuration +variable "enable_benchmark_nodes" { + description = "Whether to create benchmark node group" + type = bool + default = true +} + +variable "benchmark_node_role_name" { + description = "Name of the existing node group IAM role for benchmark nodes" + type = string + default = "" +} + +variable "benchmark_subnet_ids" { + description = "List of subnet IDs where the benchmark node group will be deployed" + type = list(string) + default = [] +} + +# Benchmark node group scaling configuration +variable "benchmark_capacity_type" { + description = "Type of capacity associated with the benchmark EKS Node Group. Valid values: ON_DEMAND, SPOT" + type = string + default = "ON_DEMAND" +} + +variable "benchmark_instance_types" { + description = "List of instance types for the benchmark node group - configured for at least 4c8g" + type = list(string) + default = ["c5.xlarge", "c5a.xlarge", "c5n.xlarge", "m5.xlarge", "m5a.xlarge"] +} + +variable "benchmark_desired_size" { + description = "Desired number of benchmark nodes" + type = number + default = 2 +} + +variable "benchmark_max_size" { + description = "Maximum number of benchmark nodes" + type = number + default = 3 +} + +variable "benchmark_min_size" { + description = "Minimum number of benchmark nodes" + type = number + default = 1 +} + +variable "benchmark_ami_type" { + description = "Type of Amazon Machine Image (AMI) associated with the benchmark EKS Node Group" + type = string + default = "AL2023_x86_64_STANDARD" +} + +variable "benchmark_disk_size" { + description = "Disk size in GiB for benchmark worker nodes" + type = number + default = 50 +} + +# Optional benchmark configurations +variable "benchmark_enable_dedicated_nodes" { + description = "Whether to add taints to make benchmark nodes dedicated for load testing" + type = bool + default = true +} + +# Prometheus Configuration +variable "enable_prometheus" { + description = "Whether to deploy Prometheus monitoring stack" + type = bool + default = true +} + +variable "prometheus_namespace" { + description = "Kubernetes namespace for Prometheus deployment" + type = string + default = "monitoring" +} + +variable "prometheus_chart_version" { + description = "Version of the kube-prometheus-stack Helm chart" + type = string + default = "61.3.0" +} + +variable "grafana_admin_password" { + description = "Admin password for Grafana" + type = string + default = "AutoMQ@Grafana" + sensitive = true +} + +variable "prometheus_storage_size" { + description = "Storage size for Prometheus persistent volume" + type = string + default = "30Gi" +} + +variable "prometheus_storage_class" { + description = "Storage class for Prometheus persistent volume" + type = string + default = "gp2" +} + +variable "prometheus_cpu_request" { + description = "CPU request for Prometheus server" + type = string + default = "500m" +} + +variable "prometheus_memory_request" { + description = "Memory request for Prometheus server" + type = string + default = "2Gi" +} + +variable "prometheus_retention" { + description = "Data retention period for Prometheus" + type = string + default = "30d" +} + +variable "prometheus_retention_size" { + description = "Maximum size of data retention for Prometheus" + type = string + default = "100GB" +} \ No newline at end of file From 07a056764a970cf20da9d670c1f0008c140e299a Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 29 Oct 2025 12:58:39 +0800 Subject: [PATCH 07/20] Validate and adjust the Terraform code --- .../aws/eks-benchmark/terraform/benchmark.tf | 30 +++------- .../aws/eks-benchmark/terraform/outputs.tf | 60 ------------------- .../aws/eks-benchmark/terraform/prometheus.tf | 13 ++-- .../aws/eks-benchmark/terraform/providers.tf | 38 +++++++++++- .../aws/eks-benchmark/terraform/variables.tf | 41 +------------ kubernetes/aws/terraform/outputs.tf | 14 +++++ 6 files changed, 68 insertions(+), 128 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf index 4d5c243..c701ec6 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf @@ -1,16 +1,10 @@ -# Data source to get the existing node group IAM role -data "aws_iam_role" "benchmark_node_group_role" { - count = var.enable_benchmark_nodes ? 1 : 0 - name = var.benchmark_node_role_name -} - # Benchmark Node Group resource "aws_eks_node_group" "benchmark_node_group" { count = var.enable_benchmark_nodes ? 1 : 0 cluster_name = module.eks-env.cluster_name node_group_name = "benchmark-node-group-${var.resource_suffix}" - node_role_arn = data.aws_iam_role.benchmark_node_group_role[0].arn - + node_role_arn = module.eks-env.node_role_arn + # Use the same subnet as the default node group (single AZ for cost optimization) subnet_ids = slice(module.eks-env.private_subnets, 0, 1) @@ -20,10 +14,10 @@ resource "aws_eks_node_group" "benchmark_node_group" { min_size = var.benchmark_min_size } - capacity_type = var.benchmark_capacity_type - instance_types = var.benchmark_instance_types - ami_type = var.benchmark_ami_type - disk_size = var.benchmark_disk_size + capacity_type = var.benchmark_capacity_type + instance_types = var.benchmark_instance_types + ami_type = var.benchmark_ami_type + disk_size = var.benchmark_disk_size labels = merge( { @@ -32,19 +26,9 @@ resource "aws_eks_node_group" "benchmark_node_group" { } ) - # Add taints for dedicated benchmark nodes if enabled - dynamic "taint" { - for_each = var.benchmark_enable_dedicated_nodes ? [1] : [] - content { - key = "benchmark-node" - value = "true" - effect = "NO_SCHEDULE" - } - } - tags = merge( { - Name = "benchmark-node-group-${var.resource_suffix}" + Name = "benchmark-node-group-${var.resource_suffix}" } ) diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf index c27bca7..ab349e6 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf @@ -23,51 +23,6 @@ output "cluster_name" { value = module.eks-env.cluster_name } -output "cluster_endpoint" { - description = "Endpoint for EKS control plane" - value = module.eks-env.cluster_endpoint -} - -output "cluster_security_group_id" { - description = "Security group ids attached to the cluster control plane" - value = module.eks-env.cluster_security_group_id -} - -output "cluster_iam_role_name" { - description = "IAM role name associated with EKS cluster" - value = module.eks-env.cluster_iam_role_name -} - -output "cluster_iam_role_arn" { - description = "IAM role ARN associated with EKS cluster" - value = module.eks-env.cluster_iam_role_arn -} - -output "cluster_certificate_authority_data" { - description = "Base64 encoded certificate data required to communicate with the cluster" - value = module.eks-env.cluster_certificate_authority_data -} - -output "cluster_version" { - description = "The Kubernetes version for the EKS cluster" - value = module.eks-env.cluster_version -} - -output "node_groups" { - description = "EKS node groups" - value = module.eks-env.node_groups -} - -output "fargate_profiles" { - description = "EKS Fargate profiles" - value = module.eks-env.fargate_profiles -} - -output "oidc_provider_arn" { - description = "The ARN of the OIDC Provider if enabled" - value = module.eks-env.oidc_provider_arn -} - # Benchmark Node Group Outputs output "benchmark_node_group_name" { description = "Name of the benchmark node group" @@ -121,19 +76,4 @@ output "node_group_instance_profile_arn" { value = module.eks-env.node_group_instance_profile_arn } -# Prometheus Outputs -output "prometheus_namespace" { - description = "Kubernetes namespace where Prometheus is deployed" - value = var.enable_prometheus ? helm_release.prometheus[0].namespace : null -} - -output "prometheus_release_name" { - description = "Helm release name for Prometheus" - value = var.enable_prometheus ? helm_release.prometheus[0].name : null -} - -output "prometheus_chart_version" { - description = "Version of the Prometheus Helm chart deployed" - value = var.enable_prometheus ? helm_release.prometheus[0].version : null -} diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf index 955ca99..d4d1544 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf @@ -1,7 +1,7 @@ resource "kubernetes_namespace_v1" "monitoring" { metadata { - name = "monitoring" + name = var.prometheus_namespace labels = { "app" = "automq" @@ -22,19 +22,22 @@ resource "helm_release" "prometheus" { repository = "https://prometheus-community.github.io/helm-charts" name = "prometheus" namespace = kubernetes_namespace_v1.monitoring.metadata[0].name - version = "45.7.1" + version = var.prometheus_chart_version - timeout = 600 # 不加会超时 + timeout = 600 # 不加会超时 wait = true create_namespace = true values = [ - templatefile("${path.module}/monitoring/prometheus.yaml") + templatefile("${path.module}/monitoring/prometheus.yaml", { + STORAGE_CLASS_NAME = var.prometheus_storage_class + }) ] depends_on = [ module.eks-env, - kubernetes_namespace_v1.monitoring + kubernetes_namespace_v1.monitoring, + aws_eks_node_group.benchmark_node_group ] } \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf b/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf index b46c6a6..034ebe1 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf @@ -2,7 +2,7 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = "~> 5.0" + version = " > 5.0.0" } helm = { source = "hashicorp/helm" @@ -20,3 +20,39 @@ provider "aws" { region = var.region } +# Configure Kubernetes provider to connect to the EKS cluster +provider "kubernetes" { + host = module.eks-env.eks_cluster_endpoint + cluster_ca_certificate = base64decode(module.eks-env.eks_cluster_ca_certificate) + + # Use AWS CLI to obtain EKS token dynamically + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = [ + "eks", + "get-token", + "--cluster-name", + module.eks-env.cluster_name, + ] + } +} + +# Configure Helm provider using the same Kubernetes connection +provider "helm" { + kubernetes { + host = module.eks-env.eks_cluster_endpoint + cluster_ca_certificate = base64decode(module.eks-env.eks_cluster_ca_certificate) + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = [ + "eks", + "get-token", + "--cluster-name", + module.eks-env.cluster_name, + ] + } + } +} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf index 1ec8ef5..0d42a11 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf @@ -8,7 +8,7 @@ variable "region" { variable "resource_suffix" { description = "Suffix for resource names" type = string - default = "automqlab-beihai-toge-test" + default = "automqlab-bt" } variable "node_group" { @@ -119,45 +119,8 @@ variable "prometheus_chart_version" { default = "61.3.0" } -variable "grafana_admin_password" { - description = "Admin password for Grafana" - type = string - default = "AutoMQ@Grafana" - sensitive = true -} - -variable "prometheus_storage_size" { - description = "Storage size for Prometheus persistent volume" - type = string - default = "30Gi" -} - variable "prometheus_storage_class" { - description = "Storage class for Prometheus persistent volume" + description = "StorageClass name used for Prometheus PVCs" type = string default = "gp2" -} - -variable "prometheus_cpu_request" { - description = "CPU request for Prometheus server" - type = string - default = "500m" -} - -variable "prometheus_memory_request" { - description = "Memory request for Prometheus server" - type = string - default = "2Gi" -} - -variable "prometheus_retention" { - description = "Data retention period for Prometheus" - type = string - default = "30d" -} - -variable "prometheus_retention_size" { - description = "Maximum size of data retention for Prometheus" - type = string - default = "100GB" } \ No newline at end of file diff --git a/kubernetes/aws/terraform/outputs.tf b/kubernetes/aws/terraform/outputs.tf index 81d2282..d5f749d 100644 --- a/kubernetes/aws/terraform/outputs.tf +++ b/kubernetes/aws/terraform/outputs.tf @@ -20,6 +20,20 @@ output "node_group_name" { value = aws_eks_node_group.automq-node-groups.node_group_name } +output "node_role_arn" { + description = "EKS Node Role ARN" + value = aws_eks_node_group.automq-node-groups.node_role_arn +} + +output "eks_cluster_endpoint" { + value = module.eks.eks_cluster_endpoint +} + +output "eks_cluster_ca_certificate" { + value = module.eks.eks_cluster_ca_certificate +} + + output "node_group_instance_profile_arn" { description = "ARN of the EKS Node Group IAM Role" value = module.cluster-iam.node_group_instance_profile_arn From bd42dfb33725b4e57ed625213cf56eb4bc23b14d Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 29 Oct 2025 15:42:58 +0800 Subject: [PATCH 08/20] Complete and enhance the README. --- .../aws/eks-benchmark/README.md | 308 +++++------------- .../aws/eks-benchmark/terraform/variables.tf | 96 +----- 2 files changed, 78 insertions(+), 326 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index 7779423..0a0ce45 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -7,14 +7,16 @@ This project eliminates that complexity. It is designed to provide a seamless, o The primary goal is to empower users to effortlessly spin up a fully operational, observable, and testable AutoMQ cluster, drastically reducing setup time and manual configuration. ## Overview + +This project follows a simple, three-step end-to-end flow to go from infrastructure to benchmarking with minimal manual work: + +1) Provision with Terraform: bring up the required components — `EKS`, `AutoMQ Console` (BYOC control plane), and the observability stack (Prometheus/Grafana). After this step, the Kubernetes cluster and monitoring environment are ready. + +2) Configure in AutoMQ Console and create the cluster: create the required `Profile` and credentials in the Console, then create or connect your `AutoMQ Cluster` (BYOC). Use these values in the subsequent Terraform/Helm configuration to enable connectivity. + +3) Run benchmarks via the provided Helm chart: go to `helm-chart/automq-benchmark`, set connection details and workload parameters (topics, partitions, message size, concurrency, etc.), deploy the benchmark Job, and observe throughput and latency in Grafana. -The project provides: -- **Infrastructure Setup**: Terraform modules for deploying dedicated benchmark nodes on existing EKS clusters -- **AutoMQ Integration**: Automated deployment and configuration of AutoMQ instances with monitoring integration -- **Observability Stack**: Prometheus and Grafana deployment for comprehensive monitoring and visualization -- **Benchmark Tools**: Helm charts for running performance tests against AutoMQ clusters -- **Dashboard Visualization**: Pre-configured Grafana dashboards to visualize benchmark results and cluster metrics ## Architecture @@ -24,14 +26,6 @@ The project provides: Before using this project, ensure you have: -### Required Infrastructure -- **Existing EKS Cluster**: A running Amazon EKS cluster -- **AutoMQ Console**: AutoMQ Console already installed and configured -- **AutoMQ Cluster**: At least one AutoMQ cluster deployed and operational - -> **Note**: You can refer to [`/cloudservice-setup/aws/eks/`](../eks/) for instructions on setting up a complete EKS cluster with AutoMQ Console using Terraform. - - ### Required Tools - **Terraform** (>= 1.0) - **kubectl** configured for your EKS cluster @@ -39,219 +33,90 @@ Before using this project, ensure you have: - **AWS CLI** configured with appropriate permissions ### Required Permissions + - EKS cluster management permissions - EC2 instance and networking permissions - IAM role management permissions - S3 bucket access (for AutoMQ data storage) -## Project Structure +## Quick Start -``` -eks-benchmark/ -├── terraform/ -│ ├── benchmark-node/ # Terraform module for benchmark nodes -│ │ ├── main.tf # Node group configuration -│ │ ├── variables.tf # Input variables -│ │ ├── outputs.tf # Output values -│ │ └── terraform.tfvars.example -│ └── automq/ # AutoMQ deployment configuration -│ ├── main.tf # AutoMQ instance setup -│ └── terraform.tfvars.example -├── helm-chart/ -│ └── automq-benchmark/ # Helm chart for benchmark workloads -│ ├── Chart.yaml -│ ├── values.yaml # Benchmark configuration -│ └── templates/ -├── monitoring/ -│ └── prometheus.yaml # Prometheus & Grafana configuration -└── README.md +### Step 1: Deploy Benchmark Infrastructure + +This step provisions and integrates everything via Terraform in `./eks-benchmark/terraform`: + +- EKS cluster (creating and configuring required `VPC`, subnets, `Security Group`, `IAM`, and related networking/permission resources) +- AutoMQ BYOC Console (deployed in the same VPC public subnet, with access and security integrated to the EKS cluster) +- Observability stack (Prometheus/Grafana) installed via Helm `kube-prometheus-stack` for collecting and visualizing benchmark metrics + +All necessary cloud resources (including networking and object storage such as `S3`) will be newly created and wired up in this step. + +1. Plan the Deployment Run terraform plan to preview the resources that will be created. + +Tip: To control resource naming and avoid conflicts, set `resource_suffix` in `terraform/variables.tf`. + +```bash +terraform plan ``` -## Quick Start +2. Apply the Deployment After reviewing the plan, execute terraform apply to begin the deployment. This process may take 25-30 minutes. -### Step 1: Deploy Benchmark Infrastructure +```bash +terraform apply +``` + +Enter yes at the prompt to confirm. -This step creates dedicated EKS node groups optimized for running benchmark workloads. These nodes are configured with appropriate instance types (4c8g minimum) and can be optionally tainted to ensure benchmark workloads run in isolation from other cluster workloads. - -**Expected Result**: A new EKS node group will be created and ready to host benchmark pods, providing the computational resources needed for performance testing. - -1. **Configure benchmark nodes**: - ```bash - cd terraform/benchmark-node - cp terraform.tfvars.example terraform.tfvars - # Edit terraform.tfvars with your cluster details - ``` - - **Required Configuration Parameters**: - - You can override the following variables by creating a `terraform.tfvars` file or by using the `-var` command-line argument: - - - **`cluster_name`** - - **Description**: The name of your existing EKS cluster where benchmark nodes will be deployed. - - **Type**: `string` - - **Required**: Yes - - **How to find**: Use `kubectl config current-context` or check AWS EKS console - - - **`existing_node_role_name`** - - **Description**: The IAM role name used by existing EKS node groups in your cluster. - - **Type**: `string` - - **Required**: Yes - - **How to find**: Check your existing node group's IAM role in AWS EKS console - - - **`aws_region`** - - **Description**: The AWS region where your EKS cluster is located and resources will be deployed. - - **Required**: Yes - - **Type**: `string` - - **Default**: `"us-east-1"` - - - **`environment`** - - **Description**: Environment tag used for resource identification and organization. - - **Required**: Yes - - **Type**: `string` - - **Default**: `"dev"` - - - **`subnet_ids`** - - **Description**: List of subnet IDs where benchmark nodes will be deployed. Recommend using only one subnet in us-east-1a for optimal performance. - - **Type**: `list(string)` - - **Required**: Yes - -2. **Deploy benchmark nodes**: - ```bash - terraform init - terraform plan - terraform apply - ``` ### Step 2: Deploy AutoMQ Instance -Access the AutoMQ control plane obtained in the previous step to create access credentials aksk. In the current version, you also need to create an eks profile for further access to the cluster, which needs to be filled into the terraform variables. Future releases of AutoMQ will allow profile creation through terraform. - -1. **Configure AutoMQ deployment**: - ```bash - cd terraform/automq - cp terraform.tfvars.example terraform.tfvars - # Edit terraform.tfvars with your AutoMQ BYOC credentials - ``` - - **Required Configuration Parameters**: - - You can override the following variables by creating a `terraform.tfvars` file or by using the `-var` command-line argument: - - - **`vpc_id`** - - **Description**: The VPC ID where your EKS cluster is deployed and AutoMQ resources will be created. - - **Type**: `string` - - **Required**: Yes - - **How to find**: Check AWS VPC console or use `aws ec2 describe-vpcs` command - - - **`region`** - - **Description**: The AWS region where AutoMQ resources will be deployed. - - **Type**: `string` - - **Default**: `"us-east-1"` - - - **`az`** - - **Description**: The availability zone where AutoMQ resources will be deployed. - - **Type**: `string` - - **Default**: `"us-east-1a"` - - - **`automq_byoc_endpoint`** - - **Description**: The AutoMQ BYOC (Bring Your Own Cloud) endpoint URL for API access. - - **Type**: `string` - - **Required**: Yes - - **How to find**: Obtain from AutoMQ Console after setting up your BYOC environment - - - **`automq_byoc_access_key_id`** - - **Description**: Access key ID for AutoMQ BYOC authentication. - - **Type**: `string` - - **Required**: Yes - - **How to find**: Generate from AutoMQ Console credentials section - - - **`automq_byoc_secret_key`** - - **Description**: Secret access key for AutoMQ BYOC authentication. - - **Type**: `string` - - **Required**: Yes - - **How to find**: Generate from AutoMQ Console credentials section (keep secure) - - - **`automq_environment_id`** - - **Description**: The AutoMQ environment identifier for resource organization. - - **Type**: `string` - - **Required**: Yes - - **How to find**: Available in AutoMQ Console environment settings - -2. **Deploy AutoMQ instance**: - ```bash - terraform init - terraform plan - terraform apply - ``` - -### Step 3: Deploy Observability Stack - -This step deploys a comprehensive monitoring solution including Prometheus and Grafana to collect, store, and visualize metrics from your AutoMQ cluster and benchmark workloads. The stack is configured with remote write capabilities and pre-configured dashboards for AutoMQ monitoring. - -**Expected Result**: Prometheus and Grafana will be deployed and accessible via LoadBalancer services. Prometheus will start collecting metrics from AutoMQ instances, and Grafana will be ready to display performance dashboards. - -1. **Install Prometheus and Grafana**: - ```bash - # Add Prometheus community Helm repository - helm repo add prometheus-community https://prometheus-community.github.io/helm-charts - helm repo update - - # Create monitoring namespace - kubectl create namespace prometheus - - # Deploy Prometheus and Grafana - helm install prometheus prometheus-community/kube-prometheus-stack \ - -n prometheus \ - -f monitoring/prometheus.yaml - ``` - -2. **Access Grafana Dashboard**: - - You can access the Grafana dashboard in this way, and contact the AutoMQ team to obtain the [configuration file](https://www.automq.com/docs/automq/observability/dashboard-configuration) for the observability dashboard. - - ```bash - kubectl get svc -n prometheus prometheus-grafana - - # Default credentials: - - ``` - -### Step 4: Run Benchmark Tests - -This step executes performance tests against your AutoMQ cluster using configurable workloads. The benchmark simulates real-world Kafka usage patterns with customizable parameters for throughput, message size, topic configuration, and test duration. The tests generate comprehensive metrics that are automatically collected by your monitoring stack. - -For specific configurations of helm values, you can refer to the [README](./helm-chart/automq-benchmark/README.md) in the automq-benchmark folder for further details. +1. Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) to create a Service Account and obtain the `Client ID` and `Client Secret` (used as `automq_byoc_access_key_id` and `automq_byoc_secret_key`). -**Expected Result**: Benchmark jobs will run and generate load against the AutoMQ cluster. Performance metrics including throughput, latency, and resource utilization will be collected and visible in Grafana dashboards. You should see data flowing through the system and performance characteristics of your AutoMQ deployment. +2. In the AutoMQ Console, create a Deploy Profile for the EKS environment (e.g., named `eks`). Reference: [Create a Deploy Profile](https://www.automq.com/docs/automq-cloud/deploy-automq-on-kubernetes/deploy-to-aws-eks#step-12%3A-access-the-environment-console-and-create-deployment-configuration). +3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. +```bash +terraform init +terraform apply +``` + +#### AutoMQ tfvars Parameters + +Use the following variables in `cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars` to connect Terraform to your AutoMQ Console and environment: + +| Parameter | Description | Notes | +| - | - | - | +| `automq_byoc_endpoint` | AutoMQ BYOC Console API endpoint | Get from output of step1 | +| `automq_byoc_access_key_id` | BYOC API Access Key (Client ID) | Paired with `automq_byoc_secret_key`; do not commit secrets | +| `automq_byoc_secret_key` | BYOC API Secret Key (Client Secret) | Keep locally and secure; avoid plaintext leaks | +| `automq_deploy_profile_name` | Deploy Profile name created in Console | Must exactly match the name created in Console | +| `automq_environment_id` | AutoMQ Environment ID | Get from the AutoMQ Console env page | + + +### Step 3: Run Benchmark Tests + +This step executes performance tests against your AutoMQ cluster using configurable workloads. The benchmark simulates Kafka usage patterns with customizable parameters for throughput, message size, topic configuration, and test duration. The tests generate comprehensive metrics that are automatically collected by your monitoring stack. + +For specific configurations of helm values, you can refer to the [README](./automq-benchmark-chart/README.md) in the automq-benchmark folder for further details. + +**Expected Result**: Benchmark jobs will run and generate load against the AutoMQ cluster. Performance metrics including throughput, latency, and resource utilization will be collected and visible in Grafana dashboards. You should see data flowing through the system and performance characteristics of your AutoMQ deployment. 1. **Configure benchmark parameters**: - ```bash - cd helm-chart/automq-benchmark - # Edit values.yaml to configure: - # - AutoMQ connection details - # - Test parameters (topics, partitions, message size, etc.) - # - Resource requirements - ``` + +```bash +cd helm-chart/automq-benchmark +``` 2. **Deploy benchmark workload**: - ```bash - helm install automq-benchmark . \ - --namespace default \ - --values values.yaml - ``` - -3. **Monitor benchmark progress**: - ```bash - # Watch job status - kubectl get jobs -w - - # View benchmark logs - kubectl logs -f job/automq-benchmark - ``` - -4. **View results in Grafana**: + +```bash +helm install automq-benchmark . \ + --namespace default \ + --values values.yaml +``` + +3. **View results in Grafana**: - Access your Grafana dashboard - Navigate to AutoMQ performance dashboards - Observe real-time metrics during the test execution @@ -268,30 +133,11 @@ To remove all deployed resources: # Remove benchmark workload helm uninstall automq-benchmark -# Remove monitoring stack -helm uninstall prometheus -n prometheus -kubectl delete namespace prometheus - -# Remove AutoMQ instance (if deployed) -cd terraform/automq +# Remove AutoMQ instance +cd automq terraform destroy -# Remove benchmark nodes -cd terraform/benchmark-node +# Remove EKS and AutoMQ Console +cd terraform terraform destroy -``` - -## Contributing - -When contributing to this project: -1. Test changes in a development environment -2. Update documentation for any configuration changes -3. Ensure Terraform modules follow best practices -4. Validate Helm charts with different configurations - -## Support - -For issues and questions: -- Check the [AutoMQ Documentation](https://docs.automq.com) -- Review existing issues in the repository -- Contact the AutoMQ team for enterprise support, welcome to join our [Slack community](https://go.automq.com/slack) \ No newline at end of file +``` \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf index 0d42a11..b5171ec 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf @@ -8,7 +8,7 @@ variable "region" { variable "resource_suffix" { description = "Suffix for resource names" type = string - default = "automqlab-bt" + default = "automqlab" } variable "node_group" { @@ -29,98 +29,4 @@ variable "node_group" { instance_type = "c6g.2xlarge" # Compute-optimized instance with AWS Graviton2 processor ami_type = "AL2_ARM_64" # Amazon Linux 2 AMI type, can use AL2_ARM_64 for ARM architecture } -} - -# Benchmark Node Group Configuration -variable "enable_benchmark_nodes" { - description = "Whether to create benchmark node group" - type = bool - default = true -} - -variable "benchmark_node_role_name" { - description = "Name of the existing node group IAM role for benchmark nodes" - type = string - default = "" -} - -variable "benchmark_subnet_ids" { - description = "List of subnet IDs where the benchmark node group will be deployed" - type = list(string) - default = [] -} - -# Benchmark node group scaling configuration -variable "benchmark_capacity_type" { - description = "Type of capacity associated with the benchmark EKS Node Group. Valid values: ON_DEMAND, SPOT" - type = string - default = "ON_DEMAND" -} - -variable "benchmark_instance_types" { - description = "List of instance types for the benchmark node group - configured for at least 4c8g" - type = list(string) - default = ["c5.xlarge", "c5a.xlarge", "c5n.xlarge", "m5.xlarge", "m5a.xlarge"] -} - -variable "benchmark_desired_size" { - description = "Desired number of benchmark nodes" - type = number - default = 2 -} - -variable "benchmark_max_size" { - description = "Maximum number of benchmark nodes" - type = number - default = 3 -} - -variable "benchmark_min_size" { - description = "Minimum number of benchmark nodes" - type = number - default = 1 -} - -variable "benchmark_ami_type" { - description = "Type of Amazon Machine Image (AMI) associated with the benchmark EKS Node Group" - type = string - default = "AL2023_x86_64_STANDARD" -} - -variable "benchmark_disk_size" { - description = "Disk size in GiB for benchmark worker nodes" - type = number - default = 50 -} - -# Optional benchmark configurations -variable "benchmark_enable_dedicated_nodes" { - description = "Whether to add taints to make benchmark nodes dedicated for load testing" - type = bool - default = true -} - -# Prometheus Configuration -variable "enable_prometheus" { - description = "Whether to deploy Prometheus monitoring stack" - type = bool - default = true -} - -variable "prometheus_namespace" { - description = "Kubernetes namespace for Prometheus deployment" - type = string - default = "monitoring" -} - -variable "prometheus_chart_version" { - description = "Version of the kube-prometheus-stack Helm chart" - type = string - default = "61.3.0" -} - -variable "prometheus_storage_class" { - description = "StorageClass name used for Prometheus PVCs" - type = string - default = "gp2" } \ No newline at end of file From 87cb3f6a9f7942cceeea8dd5a7bac078a8cc1ecf Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 29 Oct 2025 17:23:31 +0800 Subject: [PATCH 09/20] Restore the mistakenly deleted code --- .../aws/eks-benchmark/terraform/variables.tf | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf index b5171ec..32e99e8 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf @@ -29,4 +29,99 @@ variable "node_group" { instance_type = "c6g.2xlarge" # Compute-optimized instance with AWS Graviton2 processor ami_type = "AL2_ARM_64" # Amazon Linux 2 AMI type, can use AL2_ARM_64 for ARM architecture } +} + + +# Benchmark Node Group Configuration +variable "enable_benchmark_nodes" { + description = "Whether to create benchmark node group" + type = bool + default = true +} + +variable "benchmark_node_role_name" { + description = "Name of the existing node group IAM role for benchmark nodes" + type = string + default = "" +} + +variable "benchmark_subnet_ids" { + description = "List of subnet IDs where the benchmark node group will be deployed" + type = list(string) + default = [] +} + +# Benchmark node group scaling configuration +variable "benchmark_capacity_type" { + description = "Type of capacity associated with the benchmark EKS Node Group. Valid values: ON_DEMAND, SPOT" + type = string + default = "ON_DEMAND" +} + +variable "benchmark_instance_types" { + description = "List of instance types for the benchmark node group - configured for at least 4c8g" + type = list(string) + default = ["c5.xlarge", "c5a.xlarge", "c5n.xlarge", "m5.xlarge", "m5a.xlarge"] +} + +variable "benchmark_desired_size" { + description = "Desired number of benchmark nodes" + type = number + default = 2 +} + +variable "benchmark_max_size" { + description = "Maximum number of benchmark nodes" + type = number + default = 3 +} + +variable "benchmark_min_size" { + description = "Minimum number of benchmark nodes" + type = number + default = 1 +} + +variable "benchmark_ami_type" { + description = "Type of Amazon Machine Image (AMI) associated with the benchmark EKS Node Group" + type = string + default = "AL2023_x86_64_STANDARD" +} + +variable "benchmark_disk_size" { + description = "Disk size in GiB for benchmark worker nodes" + type = number + default = 50 +} + +# Optional benchmark configurations +variable "benchmark_enable_dedicated_nodes" { + description = "Whether to add taints to make benchmark nodes dedicated for load testing" + type = bool + default = true +} + +# Prometheus Configuration +variable "enable_prometheus" { + description = "Whether to deploy Prometheus monitoring stack" + type = bool + default = true +} + +variable "prometheus_namespace" { + description = "Kubernetes namespace for Prometheus deployment" + type = string + default = "monitoring" +} + +variable "prometheus_chart_version" { + description = "Version of the kube-prometheus-stack Helm chart" + type = string + default = "61.3.0" +} + +variable "prometheus_storage_class" { + description = "StorageClass name used for Prometheus PVCs" + type = string + default = "gp2" } \ No newline at end of file From 2fe0e26db2c138038fa2053c93c48a746c556b5b Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 29 Oct 2025 17:51:31 +0800 Subject: [PATCH 10/20] Optimize the document following the suggestions. --- .../aws/eks-benchmark/README.md | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index 0a0ce45..196a1ba 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -10,11 +10,13 @@ The primary goal is to empower users to effortlessly spin up a fully operational This project follows a simple, three-step end-to-end flow to go from infrastructure to benchmarking with minimal manual work: +The resources that need to be installed this time include EKS, along with three corresponding node groups, about 10 EC2 instance, and their AutoMQ Console nodes. + 1) Provision with Terraform: bring up the required components — `EKS`, `AutoMQ Console` (BYOC control plane), and the observability stack (Prometheus/Grafana). After this step, the Kubernetes cluster and monitoring environment are ready. 2) Configure in AutoMQ Console and create the cluster: create the required `Profile` and credentials in the Console, then create or connect your `AutoMQ Cluster` (BYOC). Use these values in the subsequent Terraform/Helm configuration to enable connectivity. -3) Run benchmarks via the provided Helm chart: go to `helm-chart/automq-benchmark`, set connection details and workload parameters (topics, partitions, message size, concurrency, etc.), deploy the benchmark Job, and observe throughput and latency in Grafana. +3) Run benchmarks via the provided Helm chart: go to `automq-benchmark-chart`, set connection details and workload parameters (topics, partitions, message size, concurrency, etc.), deploy the benchmark Job, and observe throughput and latency in Grafana. @@ -43,7 +45,7 @@ Before using this project, ensure you have: ### Step 1: Deploy Benchmark Infrastructure -This step provisions and integrates everything via Terraform in `./eks-benchmark/terraform`: +This step provisions and integrates everything via Terraform in `./terraform`: - EKS cluster (creating and configuring required `VPC`, subnets, `Security Group`, `IAM`, and related networking/permission resources) - AutoMQ BYOC Console (deployed in the same VPC public subnet, with access and security integrated to the EKS cluster) @@ -56,6 +58,8 @@ All necessary cloud resources (including networking and object storage such as ` Tip: To control resource naming and avoid conflicts, set `resource_suffix` in `terraform/variables.tf`. ```bash +cd ./terraform +terraform init terraform plan ``` @@ -67,6 +71,20 @@ terraform apply Enter yes at the prompt to confirm. +Upon successful deployment, Terraform will display the following outputs. You can also retrieve them at any time using the `terraform output` command: + +| Name | Description | +| ------------------------------- | ------------------------------------------------------- | +| `console_endpoint` | The endpoint URL for the AutoMQ BYOC Console. | +| `initial_username` | The initial username for logging into the Console. | +| `initial_password` | The initial password for logging into the Console. | +| `cluster_name` | The name of the created EKS cluster. | +| `node_group_instance_profile_arn` | The IAM Instance Profile ARN used by the EKS node group. | +| `dns_zone_id` | The Route 53 DNS Zone ID created for the BYOC environment. | + +This time, Terraform will initiate the corresponding EKS-related nodes and the AutoMQ control plane, and create an AutoMQ cluster within EKS. + +You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. ### Step 2: Deploy AutoMQ Instance @@ -77,7 +95,9 @@ Enter yes at the prompt to confirm. 3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. ```bash +cd ./automq terraform init +terraform plan terraform apply ``` From 24a41b7693c7eaac8bc3b98b991832e45c10078d Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 29 Oct 2025 18:57:44 +0800 Subject: [PATCH 11/20] Add a description of how to access the observability system. --- cloudservice-setup/aws/eks-benchmark/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index 196a1ba..faefe60 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -82,9 +82,13 @@ Upon successful deployment, Terraform will display the following outputs. You ca | `node_group_instance_profile_arn` | The IAM Instance Profile ARN used by the EKS node group. | | `dns_zone_id` | The Route 53 DNS Zone ID created for the BYOC environment. | -This time, Terraform will initiate the corresponding EKS-related nodes and the AutoMQ control plane, and create an AutoMQ cluster within EKS. +Terraform will initiate the corresponding EKS-related nodes and the AutoMQ control plane, and create an AutoMQ cluster within EKS. You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. -You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. +To visit the observability stack, use the following command to obtain the public endpoint of Grafana. + +```bash +kubectl get service prometheus-grafana -n monitoring +``` ### Step 2: Deploy AutoMQ Instance From d2e0a0435a0c88090cc810652e71385af4718bcd Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 29 Oct 2025 19:07:25 +0800 Subject: [PATCH 12/20] add eks access method in readme --- cloudservice-setup/aws/eks-benchmark/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index faefe60..560431d 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -84,12 +84,20 @@ Upon successful deployment, Terraform will display the following outputs. You ca Terraform will initiate the corresponding EKS-related nodes and the AutoMQ control plane, and create an AutoMQ cluster within EKS. You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. +To access the EKS cluster using this command, and the placeholders in the command can be replaced with the actual values obtained from the output above. + +```bash +aws eks update-kubeconfig --region [your-region] --name [your-cluster-name] +``` + To visit the observability stack, use the following command to obtain the public endpoint of Grafana. ```bash kubectl get service prometheus-grafana -n monitoring ``` + + ### Step 2: Deploy AutoMQ Instance 1. Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) to create a Service Account and obtain the `Client ID` and `Client Secret` (used as `automq_byoc_access_key_id` and `automq_byoc_secret_key`). From 5729cf49d5e389ae231e63e2e070573815fb5700 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 29 Oct 2025 19:59:42 +0800 Subject: [PATCH 13/20] add random resource_suffix --- .../aws/eks-benchmark/terraform/benchmark.tf | 4 ++-- .../aws/eks-benchmark/terraform/eks.tf | 2 +- .../aws/eks-benchmark/terraform/main.tf | 2 +- .../aws/eks-benchmark/terraform/providers.tf | 4 ++++ .../aws/eks-benchmark/terraform/variables.tf | 13 +++++++++++++ 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf index c701ec6..72e805c 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/benchmark.tf @@ -2,7 +2,7 @@ resource "aws_eks_node_group" "benchmark_node_group" { count = var.enable_benchmark_nodes ? 1 : 0 cluster_name = module.eks-env.cluster_name - node_group_name = "benchmark-node-group-${var.resource_suffix}" + node_group_name = "benchmark-node-group-${local.resource_suffix}" node_role_arn = module.eks-env.node_role_arn # Use the same subnet as the default node group (single AZ for cost optimization) @@ -28,7 +28,7 @@ resource "aws_eks_node_group" "benchmark_node_group" { tags = merge( { - Name = "benchmark-node-group-${var.resource_suffix}" + Name = "benchmark-node-group-${local.resource_suffix}" } ) diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/eks.tf b/cloudservice-setup/aws/eks-benchmark/terraform/eks.tf index 8445f3d..62a0e45 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/eks.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/eks.tf @@ -2,7 +2,7 @@ module "eks-env" { source = "../../../../kubernetes/aws/terraform" region = var.region - resource_suffix = var.resource_suffix + resource_suffix = local.resource_suffix node_group = var.node_group } diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/main.tf b/cloudservice-setup/aws/eks-benchmark/terraform/main.tf index dd812e9..ae007aa 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/main.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/main.tf @@ -3,7 +3,7 @@ module "automq-byoc" { version = "0.3.2" cloud_provider_region = var.region - automq_byoc_env_id = var.resource_suffix + automq_byoc_env_id = local.resource_suffix create_new_vpc = false automq_byoc_vpc_id = module.eks-env.vpc_id diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf b/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf index 034ebe1..4aa29dd 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/providers.tf @@ -4,6 +4,10 @@ terraform { source = "hashicorp/aws" version = " > 5.0.0" } + random = { + source = "hashicorp/random" + version = ">= 3.5.0" + } helm = { source = "hashicorp/helm" version = "~> 2.0" diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf index 32e99e8..c785c77 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/variables.tf @@ -11,6 +11,19 @@ variable "resource_suffix" { default = "automqlab" } +resource "random_string" "resource_suffix" { + length = 4 + upper = false + lower = true + numeric = true + special = false +} + +locals { + # Append a dash and a 4-char random tail to the configured suffix + resource_suffix = "${var.resource_suffix}-${random_string.resource_suffix.result}" +} + variable "node_group" { description = "Configuration for EKS node group" type = object({ From 9445a86bb133b2d621913db72b499f95bf864806 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Fri, 31 Oct 2025 16:23:34 +0800 Subject: [PATCH 14/20] Optimize the Readme based on actual deployment experience. --- .../aws/eks-benchmark/README.md | 140 ++++++++++++------ .../aws/eks-benchmark/automq/main.tf | 2 + ...raform.tfvars.example => terraform.tfvars} | 24 +-- .../aws/eks-benchmark/terraform/outputs.tf | 10 ++ cloudservice-setup/aws/eks-existing/README.md | 2 +- 5 files changed, 117 insertions(+), 61 deletions(-) rename cloudservice-setup/aws/eks-benchmark/automq/{terraform.tfvars.example => terraform.tfvars} (57%) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index 560431d..8d3c2ac 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -1,24 +1,33 @@ # AutoMQ Quick Setup & Benchmark -Deploying a complete AutoMQ cluster on AWS traditionally involves multiple, complex steps, from setting up the control and data planes to manually configuring a separate observability environment and benchmarking tools. +Deploying a complete AutoMQ cluster on AWS traditionally involves multiple, complex steps, from setting up the control +and data planes to manually configuring a separate observability environment and benchmarking tools. -This project eliminates that complexity. It is designed to provide a seamless, one-click solution using Terraform to automatically provision an entire AutoMQ ecosystem on AWS. +This project eliminates that complexity. It is designed to provide a seamless, one-click solution using Terraform to +automatically provision an entire AutoMQ ecosystem on AWS. -The primary goal is to empower users to effortlessly spin up a fully operational, observable, and testable AutoMQ cluster, drastically reducing setup time and manual configuration. +The primary goal is to empower users to effortlessly spin up a fully operational, observable, and testable AutoMQ +cluster, drastically reducing setup time and manual configuration. ## Overview - -This project follows a simple, three-step end-to-end flow to go from infrastructure to benchmarking with minimal manual work: -The resources that need to be installed this time include EKS, along with three corresponding node groups, about 10 EC2 instance, and their AutoMQ Console nodes. +This project follows a simple, three-step end-to-end flow to go from infrastructure to benchmarking with minimal manual +work: -1) Provision with Terraform: bring up the required components — `EKS`, `AutoMQ Console` (BYOC control plane), and the observability stack (Prometheus/Grafana). After this step, the Kubernetes cluster and monitoring environment are ready. +The resources that need to be installed this time include EKS, along with three corresponding node groups, about 10 EC2 +instance, and their AutoMQ Console nodes. -2) Configure in AutoMQ Console and create the cluster: create the required `Profile` and credentials in the Console, then create or connect your `AutoMQ Cluster` (BYOC). Use these values in the subsequent Terraform/Helm configuration to enable connectivity. - -3) Run benchmarks via the provided Helm chart: go to `automq-benchmark-chart`, set connection details and workload parameters (topics, partitions, message size, concurrency, etc.), deploy the benchmark Job, and observe throughput and latency in Grafana. +1) Provision with Terraform: bring up the required components — `EKS`, `AutoMQ Console` (BYOC control plane), and the + observability stack (Prometheus/Grafana). After this step, the Kubernetes cluster and monitoring environment are + ready. +2) Configure in AutoMQ Console and create the cluster: create the required `Profile` and credentials in the Console, + then create or connect your `AutoMQ Cluster` (BYOC). Use these values in the subsequent Terraform/Helm configuration + to enable connectivity. +3) Run benchmarks via the provided Helm chart: go to `automq-benchmark-chart`, set connection details and workload + parameters (topics, partitions, message size, concurrency, etc.), deploy the benchmark Job, and observe throughput + and latency in Grafana. ## Architecture @@ -29,6 +38,7 @@ The resources that need to be installed this time include EKS, along with three Before using this project, ensure you have: ### Required Tools + - **Terraform** (>= 1.0) - **kubectl** configured for your EKS cluster - **Helm** (>= 3.0) @@ -47,15 +57,18 @@ Before using this project, ensure you have: This step provisions and integrates everything via Terraform in `./terraform`: -- EKS cluster (creating and configuring required `VPC`, subnets, `Security Group`, `IAM`, and related networking/permission resources) +- EKS cluster (creating and configuring required `VPC`, subnets, `Security Group`, `IAM`, and related + networking/permission resources) - AutoMQ BYOC Console (deployed in the same VPC public subnet, with access and security integrated to the EKS cluster) -- Observability stack (Prometheus/Grafana) installed via Helm `kube-prometheus-stack` for collecting and visualizing benchmark metrics +- Observability stack (Prometheus/Grafana) installed via Helm `kube-prometheus-stack` for collecting and visualizing + benchmark metrics -All necessary cloud resources (including networking and object storage such as `S3`) will be newly created and wired up in this step. +All necessary cloud resources (including networking and object storage such as `S3`) will be newly created and wired up +in this step. 1. Plan the Deployment Run terraform plan to preview the resources that will be created. -Tip: To control resource naming and avoid conflicts, set `resource_suffix` in `terraform/variables.tf`. +Tip: To control resource naming and avoid conflicts, set `resource_suffix` in `./terraform/variables.tf`. ```bash cd ./terraform @@ -63,7 +76,8 @@ terraform init terraform plan ``` -2. Apply the Deployment After reviewing the plan, execute terraform apply to begin the deployment. This process may take 25-30 minutes. +2. Apply the Deployment After reviewing the plan, execute terraform apply to begin the deployment. This process may take + 25-30 minutes. ```bash terraform apply @@ -71,40 +85,71 @@ terraform apply Enter yes at the prompt to confirm. -Upon successful deployment, Terraform will display the following outputs. You can also retrieve them at any time using the `terraform output` command: +Upon successful deployment, Terraform will display the following outputs. You can also retrieve them at any time using +the `terraform output` command: -| Name | Description | -| ------------------------------- | ------------------------------------------------------- | -| `console_endpoint` | The endpoint URL for the AutoMQ BYOC Console. | -| `initial_username` | The initial username for logging into the Console. | -| `initial_password` | The initial password for logging into the Console. | -| `cluster_name` | The name of the created EKS cluster. | -| `node_group_instance_profile_arn` | The IAM Instance Profile ARN used by the EKS node group. | -| `dns_zone_id` | The Route 53 DNS Zone ID created for the BYOC environment. | +| Name | Description | +|-----------------------------------|------------------------------------------------------------| +| `console_endpoint` | The endpoint URL for the AutoMQ BYOC Console. | +| `initial_username` | The initial username for logging into the Console. | +| `initial_password` | The initial password for logging into the Console. | +| `cluster_name` | The name of the created EKS cluster. | +| `node_group_instance_profile_arn` | The IAM Instance Profile ARN used by the EKS node group. | +| `dns_zone_id` | The Route 53 DNS Zone ID created for the BYOC environment. | +| `vpc_id` | The ID of the VPC created for the environment. | +| `env_id` | The ID of the AutoMQ environment. | -Terraform will initiate the corresponding EKS-related nodes and the AutoMQ control plane, and create an AutoMQ cluster within EKS. You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. +Terraform will initiate the corresponding EKS-related nodes and the AutoMQ control plane, and create an AutoMQ cluster +within EKS. You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. -To access the EKS cluster using this command, and the placeholders in the command can be replaced with the actual values obtained from the output above. +To access the EKS cluster using this command, and the placeholders in the command can be replaced with the actual values +obtained from the output above. ```bash aws eks update-kubeconfig --region [your-region] --name [your-cluster-name] ``` To visit the observability stack, use the following command to obtain the public endpoint of Grafana. +The username is admin, and the password can be obtained through the command below. If you wish to change it, you can +configure it in the `./terraform/monitoring/prometheus.yaml` file. + +AutoMQ provides [grafana official dashboards](https://www.automq.com/docs/automq/observability/dashboard-configuration), +and you can contact the AutoMQ team to obtain the dashboard JSON. Once users export metrics to Prometheus, they can +import these Grafana dashboard templates, configure the Grafana data source to link to the respective Prometheus, and +begin monitoring AutoMQ. ```bash +# Get the public endpoint of Grafana. Please make sure to use the HTTP protocol for access. kubectl get service prometheus-grafana -n monitoring + +# Get the Grafana password +kubectl get secret prometheus-grafana -n monitoring -o jsonpath="{.data.admin-password}" | base64 --decode ``` +### Step 2: Deploy AutoMQ Instance +1.Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) +to create a Service Account and obtain the `Client ID` and `Client Secret` (used as `automq_byoc_access_key_id` and`automq_byoc_secret_key`). It is recommended to use EnvironmentAdmin for convenient management of all resources. -### Step 2: Deploy AutoMQ Instance +2. In the AutoMQ Console, create a Deploy Profile for the EKS environment (e.g., named `eks`). Kubernetes Cluster, DNS ZoneId, Bucket Name, and IAM Role ARN are all obtained from the output of the previous step. + Reference: [Create a Deploy Profile](https://www.automq.com/docs/automq-cloud/deploy-automq-on-kubernetes/deploy-to-aws-eks#step-12%3A-access-the-environment-console-and-create-deployment-configuration). -1. Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) to create a Service Account and obtain the `Client ID` and `Client Secret` (used as `automq_byoc_access_key_id` and `automq_byoc_secret_key`). +3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability + integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. -2. In the AutoMQ Console, create a Deploy Profile for the EKS environment (e.g., named `eks`). Reference: [Create a Deploy Profile](https://www.automq.com/docs/automq-cloud/deploy-automq-on-kubernetes/deploy-to-aws-eks#step-12%3A-access-the-environment-console-and-create-deployment-configuration). +The following are the parameters you must fill in within the `terraform.tfvars` file, and the Notes document provides information on how to obtain them along with their explanations. The remaining parameters can be further configured according to the needs of the cluster. + +| Parameter | Description | Notes | +|------------------------------------|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| +| `automq_byoc_endpoint` | AutoMQ BYOC Console API endpoint | Get from output of step1 | +| `automq_byoc_access_key_id` | BYOC API Access Key (Client ID) | Obtained when creating the Service Account in the previous step. | +| `automq_byoc_secret_key` | BYOC API Secret Key (Client Secret) | | +| `automq_deploy_profile_name` | Deploy Profile name created in Console | Obtained when creating the Deploy Profile in the previous step. | +| `automq_environment_id` | AutoMQ Environment ID | Get from output of step1 | +| `vpc_id` | VPC ID | Get from output of step1 | +| `automq_environment_id` | ENV ID | Get from output of step1 | +| `prometheus_remote_write_endpoint` | prometheus inner endpoint | Allow the AutoMQ control plane to access Prometheus using the Prometheus service name and namespace.
Use `kubectl get svc -n monitoring` to check. | -3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. ```bash cd ./automq @@ -115,24 +160,25 @@ terraform apply #### AutoMQ tfvars Parameters -Use the following variables in `cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars` to connect Terraform to your AutoMQ Console and environment: - -| Parameter | Description | Notes | -| - | - | - | -| `automq_byoc_endpoint` | AutoMQ BYOC Console API endpoint | Get from output of step1 | -| `automq_byoc_access_key_id` | BYOC API Access Key (Client ID) | Paired with `automq_byoc_secret_key`; do not commit secrets | -| `automq_byoc_secret_key` | BYOC API Secret Key (Client Secret) | Keep locally and secure; avoid plaintext leaks | -| `automq_deploy_profile_name` | Deploy Profile name created in Console | Must exactly match the name created in Console | -| `automq_environment_id` | AutoMQ Environment ID | Get from the AutoMQ Console env page | - +Use the following variables in `cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars` to connect Terraform to +your AutoMQ Console and environment: ### Step 3: Run Benchmark Tests -This step executes performance tests against your AutoMQ cluster using configurable workloads. The benchmark simulates Kafka usage patterns with customizable parameters for throughput, message size, topic configuration, and test duration. The tests generate comprehensive metrics that are automatically collected by your monitoring stack. +This step executes performance tests against your AutoMQ cluster using configurable workloads. The benchmark simulates +Kafka usage patterns with customizable parameters for throughput, message size, topic configuration, and test duration. +The tests generate comprehensive metrics that are automatically collected by your monitoring stack. -For specific configurations of helm values, you can refer to the [README](./automq-benchmark-chart/README.md) in the automq-benchmark folder for further details. +For specific configurations of helm values, you can refer to the [README](./automq-benchmark-chart/README.md) in the +automq-benchmark folder for further details. -**Expected Result**: Benchmark jobs will run and generate load against the AutoMQ cluster. Performance metrics including throughput, latency, and resource utilization will be collected and visible in Grafana dashboards. You should see data flowing through the system and performance characteristics of your AutoMQ deployment. +The current stress testing machine is a single node with a maximum network bandwidth of 10Gbps considering the node instance type. In the `values.yaml` file, the default rate is to write 160 messages per second, each 51 KiB in size (without any batching), with a write speed of 8 MiB/s. +If you need to conduct a larger scale test, you can adjust the parameters or contact the AutoMQ team for further assistance. +More details about stress testing tools can be found in this [blog](https://www.automq.com/blog/how-to-perform-a-performance-test-on-automq). + +**Expected Result**: Benchmark jobs will run and generate load against the AutoMQ cluster. Performance metrics including +throughput, latency, and resource utilization will be collected and visible in Grafana dashboards. You should see data +flowing through the system and performance characteristics of your AutoMQ deployment. 1. **Configure benchmark parameters**: @@ -149,14 +195,12 @@ helm install automq-benchmark . \ ``` 3. **View results in Grafana**: - - Access your Grafana dashboard - - Navigate to AutoMQ performance dashboards - - Observe real-time metrics during the test execution + - Access your Grafana dashboard + - Navigate to AutoMQ performance dashboards + - Observe real-time metrics during the test execution After completing the above steps, you can see the corresponding metrics on the Grafana dashboard. Adjust the stress test parameters according to the corresponding specifications to further understand the specifications and performance related to AutoMQ. - - ## Cleanup To remove all deployed resources: diff --git a/cloudservice-setup/aws/eks-benchmark/automq/main.tf b/cloudservice-setup/aws/eks-benchmark/automq/main.tf index 13e2be9..36c0639 100644 --- a/cloudservice-setup/aws/eks-benchmark/automq/main.tf +++ b/cloudservice-setup/aws/eks-benchmark/automq/main.tf @@ -86,6 +86,8 @@ resource "automq_kafka_instance" "automq_kafka_instance" { automq_integration.prometheus_remote_write_integration.id, ] } + + depends_on = [automq_integration.prometheus_remote_write_integration] } diff --git a/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars.example b/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars similarity index 57% rename from cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars.example rename to cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars index 37ad5fd..1e92b07 100644 --- a/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars.example +++ b/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars @@ -7,18 +7,18 @@ region = "us-east-1" az = "us-east-1a" # AutoMQ BYOC endpoint and credentials -automq_byoc_endpoint = "http://example.com" -automq_byoc_access_key_id = "access-key" -automq_byoc_secret_key = "secretkey" +automq_byoc_endpoint = "http://example.com" +automq_byoc_access_key_id = "access-key" +automq_byoc_secret_key = "secretkey" # AutoMQ environment id automq_environment_id = "automqlab-id" # Prometheus Integration Configuration -prometheus_integration_name = "prometheus-remote-write" -prometheus_integration_type = "prometheusRemoteWrite" -prometheus_remote_write_endpoint = "http://prometheus-prometheus-server.monitoring:9090/api/v1/write" -prometheus_auth_type = "noauth" +prometheus_integration_name = "prometheus-remote-write" +prometheus_integration_type = "prometheusRemoteWrite" +prometheus_remote_write_endpoint = "http://prometheus-prometheus-server.monitoring:9090/api/v1/write" +prometheus_auth_type = "noauth" # AutoMQ Deploy Profile Configuration automq_deploy_profile_name = "eks" @@ -26,13 +26,13 @@ automq_deploy_profile_name = "eks" # Kafka Instance Configuration kafka_instance_name = "automq-kafka-benchmark" kafka_instance_description = "AutoMQ Kafka instance for benchmark testing" -kafka_version = "1.4.1" -kafka_reserved_aku = 3 -kubernetes_node_group_id = "automq-node-group" -kafka_wal_mode = "EBSWAL" +kafka_version = "1.4.1" +kafka_reserved_aku = 3 +kubernetes_node_group_id = "automq-node-group" +kafka_wal_mode = "EBSWAL" # Kafka Authentication and Encryption -kafka_authentication_methods = ["anonymous"] +kafka_authentication_methods = ["anonymous"] kafka_transit_encryption_modes = ["plaintext"] # Kafka Instance Configuration Parameters diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf index ab349e6..d2cc28b 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf @@ -34,6 +34,16 @@ output "benchmark_node_group_arn" { value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].arn : null } +output "vpc_id" { + description = "VPC Id of the cluster" + value = module.eks-env.vpc_id +} + +output "automq_control_panel_env_id" { + description = "environment id of control panel" + value = module.automq-byoc.automq_byoc_env_id +} + output "benchmark_node_group_status" { description = "Status of the benchmark node group" value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].status : null diff --git a/cloudservice-setup/aws/eks-existing/README.md b/cloudservice-setup/aws/eks-existing/README.md index ffd6f38..c86bdd0 100644 --- a/cloudservice-setup/aws/eks-existing/README.md +++ b/cloudservice-setup/aws/eks-existing/README.md @@ -54,7 +54,7 @@ Before you begin, ensure you have the following: 1. **Create a `terraform.tfvars` file** Copy the example configuration and update it with your existing infrastructure details: ```bash - cp terraform/terraform.tfvars.example terraform/terraform.tfvars + cp terraform/terraform.tfvars terraform/terraform.tfvars ``` Edit the file with your specific values: From f3487a0cb7ce8b865987efaaf0270b3abc275121 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Sat, 1 Nov 2025 09:02:44 +0800 Subject: [PATCH 15/20] Launch the Grafana dashboard with one click using TF. --- .../monitoring/dashboard/broker.json | 1 + .../monitoring/dashboard/cluster.json | 1 + .../terraform/monitoring/dashboard/group.json | 1 + .../monitoring/dashboard/topics.json | 1 + .../terraform/monitoring/prometheus.yaml | 12 +++++++++- .../aws/eks-benchmark/terraform/prometheus.tf | 22 ++++++++++++++++++- 6 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/broker.json create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/cluster.json create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/group.json create mode 100644 cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/topics.json diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/broker.json b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/broker.json new file mode 100644 index 0000000..da32095 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/broker.json @@ -0,0 +1 @@ +{"__inputs":[{"name":"prometheus","label":"prometheus","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__elements":{},"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"11.2.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"1.0.0"},{"type":"panel","id":"timeseries","name":"Time series","version":""}],"annotations":{"list":[{"builtIn":1,"datasource":{"type":"grafana","uid":"-- Grafana --"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":1,"id":null,"links":[],"panels":[{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":0,"y":0},"id":11,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance, error) (rate(kafka_request_error_count_total{job=\"$cluster_id\", instance=~\"$node_id\", error!=\"NONE\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{error}}","range":true,"refId":"A","useBackend":false}],"title":"Error Rate","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":8,"x":8,"y":0},"id":1,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance, listener) (kafka_server_connection_count{job=\"$cluster_id\", instance=~\"$node_id\"})","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{listener}}","range":true,"refId":"A","useBackend":false}],"title":"Connection Count","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":8,"x":16,"y":0},"id":3,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_partition_count{job=\"$cluster_id\", instance=~\"$node_id\"}","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Partition Count","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":8,"x":0,"y":10},"id":15,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_stream_partition_status_statistics{job=\"$cluster_id\", instance=~\"$node_id\"}","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{status}}","range":true,"refId":"A","useBackend":false}],"title":"Partition Status","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":8,"y":10},"id":9,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_broker_network_io_bytes_total{job=\"$cluster_id\", direction=\"in\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Bytes In","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":16,"y":10},"id":5,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_broker_network_io_bytes_total{job=\"$cluster_id\", direction=\"out\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Bytes Out","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"mps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":0,"y":20},"id":2,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_message_count_total{job=\"$cluster_id\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Messages In","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":8,"y":20},"id":13,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic, instance) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", direction=\"in\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Bytes In Per Topic","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":16,"y":20},"id":14,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic, instance) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", direction=\"out\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Bytes Out Per Topic","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"mps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":0,"y":30},"id":12,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic, instance) (rate(kafka_message_count_total{job=\"$cluster_id\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Messages In Per Topic","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":8,"y":30},"id":17,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance, topic) (rate(kafka_topic_request_count_total{job=\"$cluster_id\", instance=~\"$node_id\", type=\"produce\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Produce QPS Per Topic","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":16,"y":30},"id":16,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance, topic) (rate(kafka_topic_request_count_total{job=\"$cluster_id\", instance=~\"$node_id\", type=\"fetch\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Fetch QPS Per Topic","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":0,"y":40},"id":20,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_request_count_total{job=\"$cluster_id\", instance=~\"$node_id\", type=~\"OffsetCommit|Produce|Fetch\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Total Throughput (Broker Level)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":8,"y":40},"id":4,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_request_count_total{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Produce\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Produce Throughput (Broker Level)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":16,"y":40},"id":7,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_request_count_total{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Fetch\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Fetch Throughput (Broker Level)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":50},"id":18,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_topic_request_count_total{job=\"$cluster_id\", instance=~\"$node_id\", type=\"produce\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Produce Throughput (Sum By Topic)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":50},"id":19,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (rate(kafka_topic_request_count_total{job=\"$cluster_id\", instance=~\"$node_id\", type=\"fetch\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Fetch Throughput (Sum By Topic)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":60},"id":8,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_99p_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Produce\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#P99","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_mean_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Produce\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#Avg","range":true,"refId":"B","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_50p_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Produce\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#P50","range":true,"refId":"C","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_max_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Produce\"})","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#Max","range":true,"refId":"D","useBackend":false}],"title":"Produce Latency","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":60},"id":10,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_99p_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Fetch\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#P99","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_mean_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Fetch\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#Avg","range":true,"refId":"B","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_50p_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Fetch\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#P50","range":true,"refId":"C","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(instance) (kafka_request_time_max_milliseconds{job=\"$cluster_id\", instance=~\"$node_id\", type=\"Fetch\"})","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"Node-{{instance}}#Max","range":true,"refId":"D","useBackend":false}],"title":"Fetch Latency","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":[],"templating":{"list":[{"current":{},"includeAll":false,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","includeAll":false,"label":"Cluster Id","name":"cluster_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"","type":"query"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_server_connection_count{job=\"$cluster_id\"},instance)","includeAll":true,"label":"Node Id","multi":true,"name":"node_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_server_connection_count{job=\"$cluster_id\"},instance)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"/(^[0-9]*$)/","sort":3,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","includeAll":true,"label":"CMP Instance Id","name":"cmp_instance_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","sort":1,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count,env_id)","includeAll":true,"label":"CMP Env Id","name":"cmp_env_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count,env_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Broker Metrics","uid":"e97dc219-c0c7-4bbe-9616-cdafb3451a3b","version":6,"weekStart":""} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/cluster.json b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/cluster.json new file mode 100644 index 0000000..ed05bd5 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/cluster.json @@ -0,0 +1 @@ +{"__inputs":[{"name":"prometheus","label":"prometheus","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"},{"name":"DS_EXPRESSION","label":"Expression","description":"","type":"datasource","pluginId":"__expr__"}],"__elements":{},"__requires":[{"type":"datasource","id":"__expr__","version":"1.0.0"},{"type":"panel","id":"barchart","name":"Bar chart","version":""},{"type":"panel","id":"bargauge","name":"Bar gauge","version":""},{"type":"grafana","id":"grafana","name":"Grafana","version":"11.2.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"1.0.0"},{"type":"panel","id":"stat","name":"Stat","version":""},{"type":"panel","id":"table","name":"Table","version":""},{"type":"panel","id":"timeseries","name":"Time series","version":""}],"annotations":{"list":[{"builtIn":1,"datasource":{"type":"grafana","uid":"-- Grafana --"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":1,"id":null,"links":[],"panels":[{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"red","value":null},{"color":"green","value":0}]},"unit":"short"},"overrides":[]},"gridPos":{"h":6,"w":3,"x":0,"y":0},"id":1,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"value_and_name","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(job) (kafka_controller_active_count{job=\"$cluster_id\"})","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Active Controller","range":true,"refId":"A","useBackend":false}],"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":1}]},"unit":"short"},"overrides":[]},"gridPos":{"h":6,"w":3,"x":3,"y":0},"id":3,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"value_and_name","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_broker_fenced_count{job=\"$cluster_id\", instance=\"$active_controller\"}","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Fenced Broker","range":true,"refId":"A","useBackend":false}],"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"semi-dark-blue","value":null}]},"unit":"short"},"overrides":[]},"gridPos":{"h":6,"w":4,"x":6,"y":0},"id":6,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"value_and_name","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_topic_count{job=\"$cluster_id\", instance=\"$active_controller\"}","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Topics","range":true,"refId":"A","useBackend":false}],"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"decimals":3,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":0.1}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":6,"w":3,"x":10,"y":0},"id":13,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(rate(kafka_request_error_count_total{job=\"$cluster_id\", error!=\"NONE\"}[$__rate_interval]))","fullMetaSearch":false,"hide":true,"includeNullMetadata":false,"instant":false,"legendFormat":"__auto","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(rate(kafka_request_count_total{job=\"$cluster_id\"}[$__rate_interval]))","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"__auto","range":true,"refId":"B","useBackend":false},{"datasource":{"type":"__expr__","uid":"${DS_EXPRESSION}"},"expression":"$A / $B","hide":false,"refId":"C","type":"math"}],"title":"Error Ratio (Percent)","type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":true,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":30,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[{"matcher":{"id":"byName","options":"In"},"properties":[{"id":"color","value":{"fixedColor":"green","mode":"fixed"}}]},{"matcher":{"id":"byName","options":"Out"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byName","options":"Out"},"properties":[]}]},"gridPos":{"h":12,"w":11,"x":13,"y":0},"id":5,"interval":"60s","options":{"legend":{"calcs":["max","min"],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(rate(kafka_broker_network_io_bytes_total{job=\"$cluster_id\", direction=\"in\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"In","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(rate(kafka_broker_network_io_bytes_total{job=\"$cluster_id\", direction=\"out\"}[$__rate_interval])) * -1","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Out","range":true,"refId":"B","useBackend":false}],"title":"Bytes In (+) / Out (-)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","fillOpacity":64,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineWidth":1,"scaleDistribution":{"type":"linear"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]}},"overrides":[]},"gridPos":{"h":6,"w":6,"x":0,"y":6},"id":16,"interval":"120s","options":{"barRadius":0,"barWidth":0.54,"fullHighlight":false,"groupWidth":0.7,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"orientation":"auto","showValue":"never","stacking":"none","tooltip":{"mode":"multi","sort":"desc"},"xTickLabelRotation":0,"xTickLabelSpacing":100},"pluginVersion":"11.2.0","targets":[{"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"max by(job) (kafka_broker_active_count{job=\"$cluster_id\"})","fullMetaSearch":false,"includeNullMetadata":true,"legendFormat":"{{label_name}}","range":true,"refId":"A","useBackend":false,"datasource":{"type":"prometheus","uid":"prometheus"}}],"title":"Broker Active Count","type":"barchart"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"semi-dark-blue","value":null}]},"unit":"short"},"overrides":[]},"gridPos":{"h":6,"w":4,"x":6,"y":6},"id":4,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"value_and_name","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_partition_total_count{job=\"$cluster_id\", instance=\"$active_controller\"}","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Partitions","range":true,"refId":"A","useBackend":false}],"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"semi-dark-green","value":null}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":6,"w":3,"x":10,"y":6},"id":7,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(max by(topic, partition) (kafka_log_size{job=\"$cluster_id\"}))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Size","range":true,"refId":"A","useBackend":false}],"transparent":true,"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"continuous-YlBl"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]}},"overrides":[]},"gridPos":{"h":12,"w":6,"x":0,"y":12},"id":11,"options":{"displayMode":"gradient","maxVizHeight":300,"minVizHeight":10,"minVizWidth":0,"namePlacement":"auto","orientation":"horizontal","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(kafka_group_count{job=\"$cluster_id\"})","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"total","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(kafka_group_stable_count{job=\"$cluster_id\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"stable","range":true,"refId":"D","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(kafka_group_dead_count{job=\"$cluster_id\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"dead","range":true,"refId":"B","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(kafka_group_empty_count{job=\"$cluster_id\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"empty","range":true,"refId":"C","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(kafka_group_preparing_rebalance_count{job=\"$cluster_id\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"prepare_rebalance","range":true,"refId":"E","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(kafka_group_completing_rebalance_count{job=\"$cluster_id\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"completing_rebalance","range":true,"refId":"F","useBackend":false}],"title":"Group Count","type":"bargauge"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"max":1,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"percentunit"},"overrides":[{"matcher":{"id":"byName","options":"D 0okXDODKTMOSjDl3o_D9zA"},"properties":[{"id":"displayName","value":"Util"}]}]},"gridPos":{"h":12,"w":7,"x":6,"y":12},"id":15,"options":{"legend":{"calcs":["max"],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"max by(job) (rate(jvm_cpu_time_seconds_total{job=\"$cluster_id\"}[$__rate_interval]))","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"__auto","range":true,"refId":"B","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"max by(job) (jvm_cpu_count{job=\"$cluster_id\"})","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"__auto","range":true,"refId":"C","useBackend":false},{"datasource":{"type":"__expr__","uid":"${DS_EXPRESSION}"},"expression":"$B / $C","hide":false,"refId":"D","type":"math"}],"title":"Cluster Max JVM CPU Utilization","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":true,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":30,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"fieldMinMax":false,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[{"matcher":{"id":"byName","options":"In"},"properties":[{"id":"color","value":{"fixedColor":"green","mode":"fixed"}}]},{"matcher":{"id":"byName","options":"Out"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]}]},"gridPos":{"h":12,"w":11,"x":13,"y":12},"id":14,"interval":"60s","options":{"legend":{"calcs":["min","max"],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(rate(kafka_topic_request_count_total{job=\"$cluster_id\", type=\"produce\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"In","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(rate(kafka_topic_request_count_total{job=\"$cluster_id\", type=\"fetch\"}[$__rate_interval])) * -1","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Out","range":true,"refId":"B","useBackend":false}],"title":"QPS In (+) / Out (-)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":"center","cellOptions":{"type":"auto"},"inspect":false},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]}},"overrides":[{"matcher":{"id":"byName","options":"Value #A"},"properties":[{"id":"displayName","value":"Commit Offset"},{"id":"custom.hidden","value":true}]},{"matcher":{"id":"byName","options":"Value #B"},"properties":[{"id":"displayName","value":"Log End Offset"},{"id":"custom.hidden","value":true}]},{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"custom.hidden","value":true}]},{"matcher":{"id":"byName","options":"consumer_group"},"properties":[{"id":"displayName","value":"Consumer Group"},{"id":"links","value":[{"targetBlank":true,"title":"Show Group Metrics","url":"/d/d90be32f-7d32-488a-b99c-3e21529790e4/group-metrics?var-datasource=${datasource}&var-cluster_id=${cluster_id}&var-group_id=${__value.text}&var-topic=${__data.fields.topic}"}]},{"id":"custom.filterable","value":true}]}]},"gridPos":{"h":13,"w":7,"x":0,"y":24},"id":12,"options":{"cellHeight":"sm","footer":{"countRows":false,"enablePagination":true,"fields":"","reducer":["sum"],"show":false},"frameIndex":1,"showHeader":true,"sortBy":[{"desc":true,"displayName":"Consumer Lag"}]},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(consumer_group, topic) (max by(consumer_group, topic, partition) (kafka_group_commit_offset{job=\"$cluster_id\"}))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"{{consumer_group}}-{{topic}}-{{partition}}","range":false,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(topic) (max by(topic, partition) (kafka_log_end_offset{job=\"$cluster_id\"}))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"{{topic}}-{{partition}}","range":false,"refId":"B","useBackend":false}],"title":"Group Statistics","transformations":[{"id":"merge","options":{}},{"id":"calculateField","options":{"alias":"Consumer Lag","binary":{"left":"Value #B","operator":"-","right":"Value #A"},"mode":"binary","reduce":{"reducer":"sum"},"replaceFields":false}},{"id":"filterByValue","options":{"filters":[{"config":{"id":"isNotNull","options":{}},"fieldName":"consumer_group"}],"match":"any","type":"include"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Value #A":true,"Value #B":true},"indexByName":{"Consumer Lag":5,"Time":1,"Value #A":3,"Value #B":4,"consumer_group":0,"topic":2},"renameByName":{}}}],"type":"table"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":"center","cellOptions":{"type":"auto"},"inspect":true},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"topic"},"properties":[{"id":"displayName","value":"Topic"},{"id":"custom.filterable","value":true},{"id":"links","value":[{"targetBlank":true,"title":"","url":"/d/b908cc7a-3592-405d-aafb-fc9225219b0a/topic-metrics?orgId=1&var-datasource=${datasource}&var-cluster_id=${cluster_id}&var-topic=${__value.text}&from=now-1h&to=now"}]}]},{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"custom.hidden","value":true}]},{"matcher":{"id":"byName","options":"Value #A"},"properties":[{"id":"displayName","value":"Bytes In"},{"id":"unit","value":"binBps"}]},{"matcher":{"id":"byName","options":"Value #B"},"properties":[{"id":"displayName","value":"Bytes Out"},{"id":"unit","value":"binBps"}]},{"matcher":{"id":"byName","options":"Value #C"},"properties":[{"id":"displayName","value":"Msgs In"},{"id":"unit","value":"mps"}]},{"matcher":{"id":"byName","options":"Value #D"},"properties":[{"id":"displayName","value":"Size"},{"id":"unit","value":"bytes"}]}]},"gridPos":{"h":13,"w":17,"x":7,"y":24},"id":9,"options":{"cellHeight":"sm","footer":{"countRows":false,"enablePagination":true,"fields":"","reducer":["sum"],"show":false},"showHeader":true,"sortBy":[{"desc":true,"displayName":"Bytes In"}]},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"code","exemplar":false,"expr":"sum by(topic) (max by(topic, partition) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", direction=\"in\"}[$__rate_interval] offset 2m)))","format":"table","fullMetaSearch":false,"includeNullMetadata":false,"instant":true,"legendFormat":"__auto","range":false,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"code","exemplar":false,"expr":"sum by(topic) (max by(topic, partition) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", direction=\"out\"}[$__rate_interval] offset 2m)))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":false,"instant":true,"legendFormat":"__auto","range":false,"refId":"B","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(topic) (rate(kafka_message_count_total{job=\"$cluster_id\", direction=\"in\"}[$__rate_interval]))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":false,"instant":true,"legendFormat":"__auto","range":false,"refId":"C","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(topic) (kafka_log_size{job=\"$cluster_id\"})","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":false,"instant":true,"legendFormat":"__auto","range":false,"refId":"D","useBackend":false}],"title":"Topic Statistics","transformations":[{"id":"merge","options":{}},{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{"Time":0,"Value #A":2,"Value #B":3,"Value #C":4,"Value #D":5,"topic":1},"renameByName":{"Value #A":"","Value #B":"","Value #C":"","topic":""}}}],"type":"table"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":"center","cellOptions":{"type":"auto"},"filterable":false,"inspect":false},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]}},"overrides":[{"matcher":{"id":"byName","options":"Value #A"},"properties":[{"id":"displayName","value":"Partitions"}]},{"matcher":{"id":"byName","options":"Value #B"},"properties":[{"id":"displayName","value":"Connections"}]},{"matcher":{"id":"byName","options":"instance"},"properties":[{"id":"displayName","value":"node id"},{"id":"links","value":[{"targetBlank":true,"title":"","url":"/d/e97dc219-c0c7-4bbe-9616-cdafb3451a3b/broker-metrics?var-cluster_id=$cluster_id&var-node_id=${__value.text}&var-datasource=${datasource}&var-node_type=All"}]},{"id":"custom.filterable","value":true}]},{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"custom.hidden","value":true}]},{"matcher":{"id":"byName","options":"Value #C"},"properties":[{"id":"displayName","value":"Bytes In"},{"id":"unit","value":"binBps"}]},{"matcher":{"id":"byName","options":"Value #D"},"properties":[{"id":"displayName","value":"Bytes Out"},{"id":"unit","value":"binBps"}]},{"matcher":{"id":"byName","options":"Value #E"},"properties":[{"id":"displayName","value":"Msgs In"},{"id":"unit","value":"mps"}]},{"matcher":{"id":"byName","options":"Value #F"},"properties":[{"id":"displayName","value":"Produce"},{"id":"unit","value":"reqps"}]},{"matcher":{"id":"byName","options":"Value #G"},"properties":[{"id":"displayName","value":"Fetch"},{"id":"unit","value":"reqps"}]},{"matcher":{"id":"byName","options":"Value #H"},"properties":[{"id":"displayName","value":"Produce P99"},{"id":"unit","value":"ms"}]},{"matcher":{"id":"byName","options":"Value #I"},"properties":[{"id":"displayName","value":"Fetch P99"},{"id":"unit","value":"ms"}]},{"matcher":{"id":"byName","options":"Value #J"},"properties":[{"id":"unit","value":"reqps"},{"id":"thresholds","value":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":0}]}}]},{"matcher":{"id":"byName","options":"Value #K"},"properties":[{"id":"unit","value":"reqps"},{"id":"thresholds","value":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":0}]}}]},{"matcher":{"id":"byName","options":"Value #L"},"properties":[{"id":"unit","value":"reqps"}]}]},"gridPos":{"h":15,"w":24,"x":0,"y":37},"id":8,"options":{"cellHeight":"sm","footer":{"countRows":false,"enablePagination":true,"fields":"","reducer":["sum"],"show":false},"frameIndex":1,"showHeader":true,"sortBy":[{"desc":true,"displayName":"Bytes In"}]},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(instance) (kafka_partition_count{job=\"$cluster_id\"})","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(instance) (kafka_server_connection_count{job=\"$cluster_id\"})","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"B","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"code","exemplar":false,"expr":"sum by(instance) (rate(kafka_broker_network_io_bytes_total{job=\"$cluster_id\", direction=\"in\"}[$__rate_interval] offset 2m))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"C","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"code","exemplar":false,"expr":"sum by(instance) (rate(kafka_broker_network_io_bytes_total{job=\"$cluster_id\", direction=\"out\"}[$__rate_interval] offset 2m))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"D","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"code","exemplar":false,"expr":"sum by(instance) (rate(kafka_message_count_total{job=\"$cluster_id\", direction=\"in\"}[$__rate_interval] offset 2m))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"E","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"code","exemplar":false,"expr":"sum by(instance) (rate(kafka_request_count_total{job=\"$cluster_id\", type=\"Produce\"}[$__rate_interval] offset 2m))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"F","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"code","exemplar":false,"expr":"sum by(instance) (rate(kafka_request_count_total{job=\"$cluster_id\", type=\"Fetch\"}[$__rate_interval] offset 2m))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"G","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(instance) (kafka_request_time_99p_milliseconds{job=\"$cluster_id\", type=\"Produce\"})","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"H","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(instance) (kafka_request_time_99p_milliseconds{job=\"$cluster_id\", type=\"Fetch\"})","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"I","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(instance) (rate(kafka_topic_request_failed_total{job=\"$cluster_id\", type=\"produce\"}[$__rate_interval]))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"J","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(instance) (rate(kafka_topic_request_failed_total{job=\"$cluster_id\", type=\"fetch\"}[$__rate_interval]))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"K","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(instance) (rate(kafka_request_error_count_total{job=\"$cluster_id\", error!=\"NONE\"}[$__rate_interval]))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"","range":false,"refId":"L","useBackend":false}],"title":"Broker Statistics","transformations":[{"id":"merge","options":{}},{"id":"organize","options":{"excludeByName":{"Time":true},"indexByName":{"Time":0,"Value #A":2,"Value #B":3,"Value #C":5,"Value #D":6,"Value #E":7,"Value #F":8,"Value #G":9,"Value #H":10,"Value #I":11,"Value #J":12,"Value #K":13,"Value #L":4,"instance":1},"renameByName":{"Value #J":"Failed Produce","Value #K":"Failed Fetch","Value #L":"Error Request"}}}],"type":"table"}],"refresh":"30s","schemaVersion":39,"tags":[],"templating":{"list":[{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","description":"The cluster id of a Kafka cluster","includeAll":false,"label":"cluster_id","name":"cluster_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"","type":"query"},{"current":{},"includeAll":false,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"query_result(kafka_controller_active_count{job=\"$cluster_id\"} > 0)","includeAll":false,"label":"Active Controller","name":"active_controller","options":[],"query":{"qryType":3,"query":"query_result(kafka_controller_active_count{job=\"$cluster_id\"} > 0)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"/instance=\"(?[^\"]+)/g","type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count,env_id)","includeAll":true,"label":"CMP Env Id","name":"cmp_env_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count,env_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","sort":1,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_node_info{job=~\"$cluster_id\", instance=\"$active_controller\"},version)","includeAll":false,"label":"Version","name":"version","options":[],"query":{"qryType":1,"query":"label_values(kafka_node_info{job=~\"$cluster_id\", instance=\"$active_controller\"},version)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"","sort":1,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","includeAll":true,"label":"CMP Instance Id","name":"cmp_instance_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","sort":1,"type":"query"}]},"time":{"from":"now-30m","to":"now"},"timepicker":{},"timezone":"","title":"Cluster Overview","uid":"f719833b-0a35-4fb3-9b84-3815726006e7","version":12,"weekStart":""} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/group.json b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/group.json new file mode 100644 index 0000000..09be579 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/group.json @@ -0,0 +1 @@ +{"__inputs":[{"name":"prometheus","label":"prometheus","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"},{"name":"DS_EXPRESSION","label":"Expression","description":"","type":"datasource","pluginId":"__expr__"}],"__elements":{},"__requires":[{"type":"datasource","id":"__expr__","version":"1.0.0"},{"type":"grafana","id":"grafana","name":"Grafana","version":"11.2.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"1.0.0"},{"type":"panel","id":"timeseries","name":"Time series","version":""}],"annotations":{"list":[{"builtIn":1,"datasource":{"type":"grafana","uid":"-- Grafana --"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":1,"id":null,"links":[],"panels":[{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"mps"},"overrides":[]},"gridPos":{"h":14,"w":12,"x":0,"y":0},"id":5,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(consumer_group, topic) (max by(consumer_group, topic, partition) (rate(kafka_group_commit_offset{job=\"$cluster_id\", consumer_group=~\"$group_id\", topic=~\"$topic\", partition=~\"$partition\"}[$__rate_interval])))","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{consumer_group}}#{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Consumer Throughput","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"min":0,"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"displayName","value":"${__field.labels.consumer_group}#${__field.labels.topic}"}]}]},"gridPos":{"h":14,"w":12,"x":12,"y":0},"id":1,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(consumer_group, topic) (max by(consumer_group, topic, partition) (kafka_group_commit_offset{job=\"$cluster_id\", consumer_group=~\"$group_id\", topic=~\"$topic\", partition=~\"$partition\"}))","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"{{consumer_group}}#{{topic}}","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic) (max by(topic, partition) (kafka_log_end_offset{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\"}))","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"B","useBackend":false},{"datasource":{"type":"__expr__","uid":"${DS_EXPRESSION}"},"expression":"$B - $A","hide":false,"refId":"C","type":"math"}],"title":"Consumer Lag","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"mps"},"overrides":[]},"gridPos":{"h":14,"w":12,"x":0,"y":14},"id":2,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(consumer_group, topic, partition) (rate(kafka_group_commit_offset{job=\"$cluster_id\", consumer_group=~\"$group_id\", topic=~\"$topic\", partition=~\"$partition\"}[$__rate_interval]))","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{consumer_group}}#{{topic}}-{{partition}}","range":true,"refId":"A","useBackend":false}],"title":"Consumer Throughput By Partition","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"min":0,"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"displayName","value":"${__field.labels.consumer_group}#${__field.labels.topic}-${__field.labels.partition}"}]}]},"gridPos":{"h":14,"w":12,"x":12,"y":14},"id":6,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"max by(consumer_group, topic, partition) (kafka_group_commit_offset{job=\"$cluster_id\", consumer_group=~\"$group_id\", topic=~\"$topic\", partition=~\"$partition\"})","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"{{consumer_group}}#{{topic}}-{{partition}}","range":true,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"max by(topic, partition) (kafka_log_end_offset{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\"})","fullMetaSearch":false,"hide":true,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}-{{partition}}","range":true,"refId":"B","useBackend":false},{"datasource":{"type":"__expr__","uid":"${DS_EXPRESSION}"},"expression":"$B - $A","hide":false,"refId":"C","type":"math"}],"title":"Consumer Lag By Partition","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"min":0,"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"displayName","value":"${__field.labels.consumer_group}#${__field.labels.topic}-${__field.labels.partition}"}]}]},"gridPos":{"h":14,"w":12,"x":0,"y":28},"id":3,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"max by(consumer_group, topic, partition) (kafka_group_commit_offset{job=\"$cluster_id\", consumer_group=~\"$group_id\", topic=~\"$topic\", partition=~\"$partition\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{consumer_group}}#{{topic}}-{{partition}}","range":true,"refId":"A","useBackend":false}],"title":"Consumer Commit Offsets","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"min":0,"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"displayName","value":"${__field.labels.consumer_group}#${__field.labels.topic}-${__field.labels.partition}"}]}]},"gridPos":{"h":14,"w":12,"x":12,"y":28},"id":4,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"max by(topic, partition) (kafka_log_end_offset{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\"})","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}-{{partition}}","range":true,"refId":"B","useBackend":false}],"title":"Log End Offset","type":"timeseries"}],"refresh":"1m","schemaVersion":39,"tags":[],"templating":{"list":[{"current":{},"includeAll":false,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_server_connection_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","includeAll":false,"label":"Cluster Id","name":"cluster_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_server_connection_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"","type":"query"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_group_commit_offset{topic=~\"$topic\", job=\"$cluster_id\"},consumer_group)","includeAll":true,"label":"Consumer Group","multi":true,"name":"group_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_group_commit_offset{topic=~\"$topic\", job=\"$cluster_id\"},consumer_group)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","sort":1,"type":"query"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_group_commit_offset{job=\"$cluster_id\"},topic)","includeAll":true,"label":"Topic","multi":true,"name":"topic","options":[],"query":{"qryType":1,"query":"label_values(kafka_group_commit_offset{job=\"$cluster_id\"},topic)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","type":"query"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_group_commit_offset{topic=~\"$topic\", job=\"$cluster_id\"},partition)","includeAll":true,"label":"Partition","multi":true,"name":"partition","options":[],"query":{"qryType":1,"query":"label_values(kafka_group_commit_offset{topic=~\"$topic\", job=\"$cluster_id\"},partition)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"","sort":3,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count,env_id)","includeAll":true,"label":"CMP Env Id","name":"cmp_env_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count,env_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","sort":1,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","includeAll":true,"label":"CMP Instance Id","name":"cmp_instance_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Group Metrics","uid":"d90be32f-7d32-488a-b99c-3e21529790e4","version":7,"weekStart":""} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/topics.json b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/topics.json new file mode 100644 index 0000000..c5ec9f9 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/dashboard/topics.json @@ -0,0 +1 @@ +{"__inputs":[{"name":"prometheus","label":"prometheus","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__elements":{},"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"11.2.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"1.0.0"},{"type":"panel","id":"stat","name":"Stat","version":""},{"type":"panel","id":"table","name":"Table","version":""},{"type":"panel","id":"timeseries","name":"Time series","version":""}],"annotations":{"list":[{"builtIn":1,"datasource":{"type":"grafana","uid":"-- Grafana --"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":1,"id":null,"links":[],"panels":[{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]},"unit":"binBps"},"overrides":[{"matcher":{"id":"byName","options":"Network-in"},"properties":[{"id":"color","value":{"fixedColor":"semi-dark-green","mode":"fixed"}}]},{"matcher":{"id":"byName","options":"Network-out"},"properties":[{"id":"color","value":{"fixedColor":"semi-dark-blue","mode":"fixed"}}]}]},"gridPos":{"h":10,"w":5,"x":0,"y":0},"id":7,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(direction, job) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\"}[$__rate_interval]))","fullMetaSearch":false,"hide":false,"includeNullMetadata":false,"instant":false,"legendFormat":"Network-{{direction}}","range":true,"refId":"A","useBackend":false}],"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":5,"y":0},"id":8,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(kafka_log_size{job=\"$cluster_id\", topic=~\"$topic\"})","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Size","range":true,"refId":"A","useBackend":false}],"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":8,"x":8,"y":0},"id":14,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(node_id, topic) (kafka_stream_topic_partition_count{job=\"$cluster_id\", topic=\"$topic\", instance=~\"$node_id\"})","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}-Node#{{node_id}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Partition Count Per Broker","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":8,"x":16,"y":0},"id":15,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(rack, topic) (kafka_stream_topic_partition_count{job=\"$cluster_id\", topic=\"$topic\"})","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}-Rack#{{rack}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Partition Count Per Rack","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]},"unit":"short"},"overrides":[]},"gridPos":{"h":5,"w":3,"x":5,"y":5},"id":6,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum(count by(topic) (max by(topic, partition) (kafka_log_size{job=\"$cluster_id\", topic=~\"$topic\"})))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"Partition Count","range":true,"refId":"A","useBackend":false}],"type":"stat"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":0,"y":10},"id":4,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic) (max by(topic, partition) (kafka_log_size{job=\"$cluster_id\", topic=~\"$topic\"}))","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Size","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":8,"y":10},"id":10,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_log_size{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\", instance=~\"$node_id\"}","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}-{{partition}}-Node#{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Partition Size","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":10,"w":8,"x":16,"y":10},"id":13,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_log_end_offset{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\", instance=~\"$node_id\"}","fullMetaSearch":false,"includeNullMetadata":true,"instant":false,"legendFormat":"{{topic}}-{{partition}}-Node#{{instance}}","range":true,"refId":"A","useBackend":false}],"title":"Partition Log End Offset","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"mps"},"overrides":[]},"gridPos":{"h":11,"w":8,"x":0,"y":20},"id":3,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic) (rate(kafka_message_count_total{job=\"$cluster_id\", topic=~\"$topic\", direction=\"in\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Messages In","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":11,"w":8,"x":8,"y":20},"id":1,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic) (max by(topic, partition) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", topic=~\"$topic\", direction=\"in\", instance=~\"$node_id\"}[$__rate_interval])))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Bytes In","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":11,"w":8,"x":16,"y":20},"id":2,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic) (max by(topic, partition) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", topic=~\"$topic\", direction=\"out\", instance=~\"$node_id\"}[$__rate_interval])))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Bytes Out","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]},"unit":"mps"},"overrides":[]},"gridPos":{"h":11,"w":8,"x":0,"y":31},"id":19,"interval":"30s","options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic, partition) (rate(kafka_log_end_offset{job=\"$cluster_id\", topic=~\"$topic\", topic!~\"__cluster_metadata\", partition=~\"$partition\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}-{{partition}}","range":true,"refId":"A","useBackend":false}],"title":"Partition Messages In","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":11,"w":8,"x":8,"y":31},"id":11,"interval":"30s","options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic, partition) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", topic=~\"$topic\", direction=\"in\", partition=~\"$partition\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}-{{partition}}","range":true,"refId":"A","useBackend":false}],"title":"Partition Bytes In","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":11,"w":8,"x":16,"y":31},"id":12,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic, partition) (rate(kafka_network_io_bytes_total{job=\"$cluster_id\", topic=~\"$topic\", direction=\"out\", partition=~\"$partition\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}-{{partition}}","range":true,"refId":"A","useBackend":false}],"title":"Partition Bytes Out","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":11,"w":12,"x":0,"y":42},"id":9,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic) (rate(kafka_topic_request_count_total{job=\"$cluster_id\", topic=~\"$topic\", type=\"produce\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Produce QPS","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":11,"w":12,"x":12,"y":42},"id":16,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"sum by(topic) (rate(kafka_topic_request_count_total{job=\"$cluster_id\", topic=~\"$topic\", type=\"fetch\", instance=~\"$node_id\"}[$__rate_interval]))","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Topic Fetch QPS","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":11,"w":12,"x":0,"y":53},"id":17,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_tabletopic_fps{job=\"$cluster_id\", topic=~\"$topic\"}","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Table Fields Per Seconds","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":15,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"dtdurationms"},"overrides":[]},"gridPos":{"h":11,"w":12,"x":12,"y":53},"id":18,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","expr":"kafka_tabletopic_delay_milliseconds{job=\"$cluster_id\", topic=~\"$topic\"}","fullMetaSearch":false,"includeNullMetadata":false,"instant":false,"legendFormat":"{{topic}}","range":true,"refId":"A","useBackend":false}],"title":"Table Watermark","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":"center","cellOptions":{"type":"auto"},"filterable":true,"inspect":false},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"custom.hidden","value":true}]},{"matcher":{"id":"byName","options":"Value #B"},"properties":[{"id":"displayName","value":"End Offset"},{"id":"custom.hidden","value":true}]},{"matcher":{"id":"byName","options":"consumer_group"},"properties":[{"id":"displayName","value":"Consumer Group"},{"id":"links","value":[{"targetBlank":true,"title":"Show Group Metrics","url":"/d/d90be32f-7d32-488a-b99c-3e21529790e4/group-metrics?var-datasource=${datasource}&var-cluster_id=${cluster_id}&var-group_id=${__value.text}&var-topic=${__data.fields.topic}"}]}]},{"matcher":{"id":"byName","options":"topic"},"properties":[{"id":"displayName","value":"Topic"}]},{"matcher":{"id":"byName","options":"Value #A"},"properties":[{"id":"custom.hidden","value":true}]}]},"gridPos":{"h":11,"w":24,"x":0,"y":64},"id":5,"options":{"cellHeight":"sm","footer":{"countRows":false,"enablePagination":true,"fields":"","reducer":["sum"],"show":false},"showHeader":true,"sortBy":[{"desc":true,"displayName":"Consumer Lag"}]},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(topic, consumer_group) (max by(topic, partition, consumer_group) (kafka_group_commit_offset{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\"}))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"__auto","range":false,"refId":"A","useBackend":false},{"datasource":{"type":"prometheus","uid":"prometheus"},"disableTextWrap":false,"editorMode":"builder","exemplar":false,"expr":"sum by(topic) (max by(topic, partition) (kafka_log_end_offset{job=\"$cluster_id\", topic=~\"$topic\", partition=~\"$partition\"}))","format":"table","fullMetaSearch":false,"hide":false,"includeNullMetadata":true,"instant":true,"legendFormat":"__auto","range":false,"refId":"B","useBackend":false}],"title":"Consume Statistics","transformations":[{"id":"merge","options":{}},{"id":"calculateField","options":{"alias":"Consumer Lag","binary":{"left":"Value #B","operator":"-","right":"Value #A"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"filterByValue","options":{"filters":[{"config":{"id":"isNotNull","options":{}},"fieldName":"consumer_group"}],"match":"any","type":"include"}}],"type":"table"}],"refresh":"","schemaVersion":39,"tags":[],"templating":{"list":[{"current":{},"hide":0,"includeAll":false,"label":"Data Source","multi":false,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","hide":0,"includeAll":false,"label":"Cluster Id","multi":false,"name":"cluster_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count{instance_id=~\"$cmp_instance_id\", env_id=~\"$cmp_env_id\"},job)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":0,"type":"query"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_log_size{job=\"$cluster_id\"},topic)","hide":0,"includeAll":true,"label":"Topic","multi":true,"name":"topic","options":[],"query":{"qryType":1,"query":"label_values(kafka_log_size{job=\"$cluster_id\"},topic)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":0,"type":"query"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_log_size{topic=~\"$topic\", job=\"$cluster_id\"},partition)","hide":0,"includeAll":true,"label":"Partition","multi":true,"name":"partition","options":[],"query":{"qryType":1,"query":"label_values(kafka_log_size{topic=~\"$topic\", job=\"$cluster_id\"},partition)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":3,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","hide":0,"includeAll":true,"label":"CMP Instance Id","multi":false,"name":"cmp_instance_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count{env_id=~\"$cmp_env_id\"},instance_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"allValue":".*","current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_broker_active_count,env_id)","hide":0,"includeAll":true,"label":"CMP Env Id","multi":false,"name":"cmp_env_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_broker_active_count,env_id)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"current":{},"datasource":{"type":"prometheus","uid":"prometheus"},"definition":"label_values(kafka_log_size{job=\"$cluster_id\", topic=~\"$topic\"},instance)","hide":0,"includeAll":true,"label":"Node Id","multi":true,"name":"node_id","options":[],"query":{"qryType":1,"query":"label_values(kafka_log_size{job=\"$cluster_id\", topic=~\"$topic\"},instance)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":3,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Topic Metrics","uid":"b908cc7a-3592-405d-aafb-fc9225219b0a","version":11,"weekStart":""} \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml index 1ccf67a..eb3ac42 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml +++ b/cloudservice-setup/aws/eks-benchmark/terraform/monitoring/prometheus.yaml @@ -1,5 +1,5 @@ rbac: - create: false + create: true podSecurityPolicy: enabled: false @@ -22,8 +22,18 @@ grafana: nodeSelector: workload-type: benchmark + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + searchNamespace: "monitoring" + + prometheus: prometheusSpec: + serviceMonitorSelector: + matchLabels: + prometheus: automq enableRemoteWriteReceiver: true server: diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf index d4d1544..15dde5f 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf @@ -17,6 +17,25 @@ resource "kubernetes_namespace_v1" "monitoring" { depends_on = [module.eks-env] } +resource "kubernetes_config_map" "grafana_dashboard_config" { + metadata { + name = "grafana_dashboard_config" + namespace = kubernetes_namespace_v1.monitoring.metadata[0].name + labels = { + grafana_dashboard = "1" + prometheus = "automq" + } + } + data = { + "cluster.json" = file("${path.module}/monitoring/dashboard/cluster.json") + "broker.json" = file("${path.module}/monitoring/dashboard/broker.json") + "topic.json" = file("${path.module}/monitoring/dashboard/topic.json") + "group.json" = file("${path.module}/monitoring/dashboard/group.json") + } + depends_on = [kubernetes_namespace_v1.monitoring] +} + + resource "helm_release" "prometheus" { chart = "kube-prometheus-stack" repository = "https://prometheus-community.github.io/helm-charts" @@ -38,6 +57,7 @@ resource "helm_release" "prometheus" { depends_on = [ module.eks-env, kubernetes_namespace_v1.monitoring, - aws_eks_node_group.benchmark_node_group + aws_eks_node_group.benchmark_node_group, + kubernetes_config_map.cluster_dashboard_config ] } \ No newline at end of file From 1a73df42c2dabe6bef1b32b4d638e6041fbe491a Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Sat, 1 Nov 2025 09:05:27 +0800 Subject: [PATCH 16/20] Adjust the README according to the actual creation situation. --- cloudservice-setup/aws/eks-benchmark/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index 8d3c2ac..9744175 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -113,10 +113,8 @@ To visit the observability stack, use the following command to obtain the public The username is admin, and the password can be obtained through the command below. If you wish to change it, you can configure it in the `./terraform/monitoring/prometheus.yaml` file. -AutoMQ provides [grafana official dashboards](https://www.automq.com/docs/automq/observability/dashboard-configuration), -and you can contact the AutoMQ team to obtain the dashboard JSON. Once users export metrics to Prometheus, they can -import these Grafana dashboard templates, configure the Grafana data source to link to the respective Prometheus, and -begin monitoring AutoMQ. +AutoMQ provides [grafana official dashboards](https://www.automq.com/docs/automq/observability/dashboard-configuration). Once users export metrics to Prometheus, they can import these Grafana dashboard templates, configure the Grafana data source to link to the respective Prometheus, and begin monitoring AutoMQ. At this step, Terraform will help you create these dashboards in Grafana. + ```bash # Get the public endpoint of Grafana. Please make sure to use the HTTP protocol for access. From a7ea06af9dbb818ad50655564b23126d259f47d2 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Mon, 3 Nov 2025 17:13:45 +0800 Subject: [PATCH 17/20] Modify the Terraform configuration file for automatically creating Grafana dashboards --- .../aws/eks-benchmark/terraform/prometheus.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf index 15dde5f..76320bd 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/prometheus.tf @@ -17,9 +17,9 @@ resource "kubernetes_namespace_v1" "monitoring" { depends_on = [module.eks-env] } -resource "kubernetes_config_map" "grafana_dashboard_config" { +resource "kubernetes_config_map" "grafana-dashboard-config" { metadata { - name = "grafana_dashboard_config" + name = "grafana-dashboard-config" namespace = kubernetes_namespace_v1.monitoring.metadata[0].name labels = { grafana_dashboard = "1" @@ -29,7 +29,7 @@ resource "kubernetes_config_map" "grafana_dashboard_config" { data = { "cluster.json" = file("${path.module}/monitoring/dashboard/cluster.json") "broker.json" = file("${path.module}/monitoring/dashboard/broker.json") - "topic.json" = file("${path.module}/monitoring/dashboard/topic.json") + "topics.json" = file("${path.module}/monitoring/dashboard/topics.json") "group.json" = file("${path.module}/monitoring/dashboard/group.json") } depends_on = [kubernetes_namespace_v1.monitoring] @@ -58,6 +58,6 @@ resource "helm_release" "prometheus" { module.eks-env, kubernetes_namespace_v1.monitoring, aws_eks_node_group.benchmark_node_group, - kubernetes_config_map.cluster_dashboard_config + kubernetes_config_map.grafana-dashboard-config ] } \ No newline at end of file From ecea51ebe6e91c1addd52d45415ab38accbda847 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Mon, 3 Nov 2025 19:34:34 +0800 Subject: [PATCH 18/20] Add an automation script to modify and create automq configurations. --- .../aws/eks-benchmark/README.md | 7 +- .../aws/eks-benchmark/automq/terraform.tfvars | 2 +- .../eks-benchmark/modify-automq-tf-config.sh | 89 +++++++++++++++++++ .../aws/eks-benchmark/terraform/outputs.tf | 26 +++++- kubernetes/aws/terraform/network/outputs.tf | 5 ++ kubernetes/aws/terraform/outputs.tf | 5 ++ 6 files changed, 125 insertions(+), 9 deletions(-) create mode 100755 cloudservice-setup/aws/eks-benchmark/modify-automq-tf-config.sh diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index 9744175..baff732 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -137,6 +137,8 @@ to create a Service Account and obtain the `Client ID` and `Client Secret` (used The following are the parameters you must fill in within the `terraform.tfvars` file, and the Notes document provides information on how to obtain them along with their explanations. The remaining parameters can be further configured according to the needs of the cluster. +We have prepared a script for you, `modify-automq-tf-config.sh`, which automatically fills in the required variables. You can execute this script, and it will automatically populate the necessary parameter information for you. + | Parameter | Description | Notes | |------------------------------------|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| | `automq_byoc_endpoint` | AutoMQ BYOC Console API endpoint | Get from output of step1 | @@ -150,16 +152,13 @@ The following are the parameters you must fill in within the `terraform.tfvars` ```bash +./modify-automq-tf-config.sh cd ./automq terraform init terraform plan terraform apply ``` -#### AutoMQ tfvars Parameters - -Use the following variables in `cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars` to connect Terraform to -your AutoMQ Console and environment: ### Step 3: Run Benchmark Tests diff --git a/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars b/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars index 1e92b07..2dc7627 100644 --- a/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars +++ b/cloudservice-setup/aws/eks-benchmark/automq/terraform.tfvars @@ -17,7 +17,7 @@ automq_environment_id = "automqlab-id" # Prometheus Integration Configuration prometheus_integration_name = "prometheus-remote-write" prometheus_integration_type = "prometheusRemoteWrite" -prometheus_remote_write_endpoint = "http://prometheus-prometheus-server.monitoring:9090/api/v1/write" +prometheus_remote_write_endpoint = "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/write" prometheus_auth_type = "noauth" # AutoMQ Deploy Profile Configuration diff --git a/cloudservice-setup/aws/eks-benchmark/modify-automq-tf-config.sh b/cloudservice-setup/aws/eks-benchmark/modify-automq-tf-config.sh new file mode 100755 index 0000000..e374602 --- /dev/null +++ b/cloudservice-setup/aws/eks-benchmark/modify-automq-tf-config.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Paths +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +TF_DIR="$SCRIPT_DIR/terraform" +TFVARS_FILE="$SCRIPT_DIR/automq/terraform.tfvars" + +# Checks +command -v terraform >/dev/null 2>&1 || { echo "[ERROR] terraform is not installed or not in PATH"; exit 1; } +command -v jq >/dev/null 2>&1 || { echo "[ERROR] jq is required (brew install jq)"; exit 1; } + +if [ ! -d "$TF_DIR" ]; then + echo "[ERROR] Terraform directory not found: $TF_DIR" + exit 1 +fi + +if [ ! -f "$TFVARS_FILE" ]; then + echo "[WARN] tfvars file not found: $TFVARS_FILE" + echo "[INFO] Creating from example if present..." + if [ -f "$SCRIPT_DIR/automq/terraform.tfvars.example" ]; then + cp "$SCRIPT_DIR/automq/terraform.tfvars.example" "$TFVARS_FILE" + echo "[INFO] Created $TFVARS_FILE from example" + else + echo "[ERROR] Missing $TFVARS_FILE and no example found. Please create it."; exit 1 + fi +fi + +echo "[INFO] Reading terraform outputs from: $TF_DIR" +OUT_JSON="$(terraform -chdir="$TF_DIR" output -json)" + +get_output() { + local key="$1" + echo "$OUT_JSON" | jq -r ".[\"$key\"].value" 2>/dev/null || true +} + +# Extract values from outputs +VPC_ID="$(get_output vpc_id)" +REGION="$(get_output region)" +AZ="$(get_output default_az)" +BYOC_ENDPOINT="$(get_output console_endpoint)" +AUTOMQ_ENV_ID="$(get_output automq_environment_id)" + +# Basic validation +for kv in VPC_ID REGION AZ BYOC_ENDPOINT AUTOMQ_ENV_ID; do + if [ -z "${!kv}" ] || [ "${!kv}" = "null" ]; then + echo "[WARN] Output $kv is empty. Ensure you have applied Terraform in $TF_DIR and outputs are defined." + fi +done + +echo "[INFO] Please input AutoMQ BYOC credentials (from Console Service Account)" +read -r -p "automq_byoc_access_key_id: " BYOC_AKID +read -r -s -p "automq_byoc_secret_key (hidden): " BYOC_SK +echo + +if [ -z "$BYOC_AKID" ] || [ -z "$BYOC_SK" ]; then + echo "[ERROR] Both automq_byoc_access_key_id and automq_byoc_secret_key are required." + exit 1 +fi + +# Helper: update or append key = "value" +update_tfvar() { + local file="$1" key="$2" value="$3" + local esc_val + esc_val="$(printf '%s' "$value" | sed -e 's/[&|\\]/\\&/g')" + if grep -E "^${key}[[:space:]]*=" "$file" >/dev/null 2>&1; then + # macOS/BSD sed inline edit + sed -i '' -E "s|^${key}[[:space:]]*=[[:space:]]*\"[^\"]*\"|${key} = \"${esc_val}\"|" "$file" + else + printf '%s\n' "${key} = \"${value}\"" >> "$file" + fi +} + +echo "[INFO] Updating $TFVARS_FILE" +update_tfvar "$TFVARS_FILE" vpc_id "$VPC_ID" +update_tfvar "$TFVARS_FILE" region "$REGION" +update_tfvar "$TFVARS_FILE" az "$AZ" +update_tfvar "$TFVARS_FILE" automq_byoc_endpoint "$BYOC_ENDPOINT" +update_tfvar "$TFVARS_FILE" automq_environment_id "$AUTOMQ_ENV_ID" +update_tfvar "$TFVARS_FILE" automq_byoc_access_key_id "$BYOC_AKID" +update_tfvar "$TFVARS_FILE" automq_byoc_secret_key "$BYOC_SK" + +echo "[DONE] Updated $TFVARS_FILE with values from Terraform outputs and provided credentials." +echo " - vpc_id=$VPC_ID" +echo " - region=$REGION" +echo " - az=$AZ" +echo " - automq_byoc_endpoint=$BYOC_ENDPOINT" +echo " - automq_environment_id=$AUTOMQ_ENV_ID" \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf index d2cc28b..64d02d9 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf @@ -23,6 +23,27 @@ output "cluster_name" { value = module.eks-env.cluster_name } +output "region" { + description = "AWS region where resources are deployed" + value = var.region +} + +output "vpc_id" { + description = "VPC ID used by the EKS environment" + value = module.eks-env.vpc_id +} + +output "default_az" { + description = "Selected availability zone (first private subnet AZ)" + value = module.eks-env.azs[0] + +} + +output "automq_environment_id" { + description = "AutoMQ Environment ID used for BYOC" + value = module.automq-byoc.automq_byoc_env_id +} + # Benchmark Node Group Outputs output "benchmark_node_group_name" { description = "Name of the benchmark node group" @@ -34,10 +55,7 @@ output "benchmark_node_group_arn" { value = var.enable_benchmark_nodes ? aws_eks_node_group.benchmark_node_group[0].arn : null } -output "vpc_id" { - description = "VPC Id of the cluster" - value = module.eks-env.vpc_id -} + output "automq_control_panel_env_id" { description = "environment id of control panel" diff --git a/kubernetes/aws/terraform/network/outputs.tf b/kubernetes/aws/terraform/network/outputs.tf index 96a011c..d40d511 100644 --- a/kubernetes/aws/terraform/network/outputs.tf +++ b/kubernetes/aws/terraform/network/outputs.tf @@ -9,6 +9,11 @@ output "vpc_id" { depends_on = [module.vpc] } +output "azs" { + description = "Availability Zones" + value = module.vpc.azs +} + output "public_subnets" { description = "Public Subnets" value = module.vpc.public_subnets diff --git a/kubernetes/aws/terraform/outputs.tf b/kubernetes/aws/terraform/outputs.tf index d5f749d..fac7b2f 100644 --- a/kubernetes/aws/terraform/outputs.tf +++ b/kubernetes/aws/terraform/outputs.tf @@ -10,6 +10,11 @@ output "vpc_id" { value = module.network.vpc_id } +output "azs" { + description = "List of Availability Zones" + value = module.network.azs +} + output "cluster_name" { description = "EKS Cluster Name" value = module.eks.eks_cluster_name From 9818f19219b705537f7f024de8c0298f8a93d6db Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 5 Nov 2025 10:55:48 +0800 Subject: [PATCH 19/20] Optimize the readme by changing the automq node group to an ondemand instance. --- .../aws/eks-benchmark/README.md | 38 +++++++++---------- kubernetes/aws/terraform/main.tf | 2 +- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index baff732..18c04d2 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -106,14 +106,20 @@ To access the EKS cluster using this command, and the placeholders in the comman obtained from the output above. ```bash -aws eks update-kubeconfig --region [your-region] --name [your-cluster-name] +cd ./terraform +REGION=$(terraform output -raw region) +CLUSTER_NAME=$(terraform output -raw cluster_name) + +aws eks update-kubeconfig --region $REGION --name $CLUSTER_NAME ``` To visit the observability stack, use the following command to obtain the public endpoint of Grafana. The username is admin, and the password can be obtained through the command below. If you wish to change it, you can configure it in the `./terraform/monitoring/prometheus.yaml` file. -AutoMQ provides [grafana official dashboards](https://www.automq.com/docs/automq/observability/dashboard-configuration). Once users export metrics to Prometheus, they can import these Grafana dashboard templates, configure the Grafana data source to link to the respective Prometheus, and begin monitoring AutoMQ. At this step, Terraform will help you create these dashboards in Grafana. +AutoMQ provides [grafana official dashboards](https://www.automq.com/docs/automq/observability/dashboard-configuration). In this example, Grafana dashboards come pre-installed with broker, topic, group, and cluster dashboards. + +Terraform will help you create these dashboards in Grafana. If you need further guidance, please feel free to [contact the AutoMQ team](https://www.automq.com/contact). ```bash @@ -126,32 +132,24 @@ kubectl get secret prometheus-grafana -n monitoring -o jsonpath="{.data.admin-pa ### Step 2: Deploy AutoMQ Instance -1.Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) -to create a Service Account and obtain the `Client ID` and `Client Secret` (used as `automq_byoc_access_key_id` and`automq_byoc_secret_key`). It is recommended to use EnvironmentAdmin for convenient management of all resources. +1. Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) to create a Service Account and obtain the `Client ID` and `Client Secret` (Remember to save these two pieces of information, as you will need to enter them in the subsequent installation script). -2. In the AutoMQ Console, create a Deploy Profile for the EKS environment (e.g., named `eks`). Kubernetes Cluster, DNS ZoneId, Bucket Name, and IAM Role ARN are all obtained from the output of the previous step. - Reference: [Create a Deploy Profile](https://www.automq.com/docs/automq-cloud/deploy-automq-on-kubernetes/deploy-to-aws-eks#step-12%3A-access-the-environment-console-and-create-deployment-configuration). +For this service account, you need to select EnvironmentAdmin to easily create and manage resources. -3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability - integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. +2. In the AutoMQ Console, create a Deploy Profile named `eks` for the EKS environment. Get the bucket name from the default profile. Kubernetes Cluster, DNS ZoneId and Node pool IAM Role ARN are all obtained from the output of the previous step. -The following are the parameters you must fill in within the `terraform.tfvars` file, and the Notes document provides information on how to obtain them along with their explanations. The remaining parameters can be further configured according to the needs of the cluster. +It's worth noting that the EKS node pool role that needs to be created last was already created in the first step; you can simply copy the node_group_instance_profile_arn output from the output. -We have prepared a script for you, `modify-automq-tf-config.sh`, which automatically fills in the required variables. You can execute this script, and it will automatically populate the necessary parameter information for you. +Reference: [Create a Deploy Profile](https://www.automq.com/docs/automq-cloud/deploy-automq-on-kubernetes/deploy-to-aws-eks#step-12%3A-access-the-environment-console-and-create-deployment-configuration). -| Parameter | Description | Notes | -|------------------------------------|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| -| `automq_byoc_endpoint` | AutoMQ BYOC Console API endpoint | Get from output of step1 | -| `automq_byoc_access_key_id` | BYOC API Access Key (Client ID) | Obtained when creating the Service Account in the previous step. | -| `automq_byoc_secret_key` | BYOC API Secret Key (Client Secret) | | -| `automq_deploy_profile_name` | Deploy Profile name created in Console | Obtained when creating the Deploy Profile in the previous step. | -| `automq_environment_id` | AutoMQ Environment ID | Get from output of step1 | -| `vpc_id` | VPC ID | Get from output of step1 | -| `automq_environment_id` | ENV ID | Get from output of step1 | -| `prometheus_remote_write_endpoint` | prometheus inner endpoint | Allow the AutoMQ control plane to access Prometheus using the Prometheus service name and namespace.
Use `kubectl get svc -n monitoring` to check. | +3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. +We have prepared a script for you, `modify-automq-tf-config.sh`, which automatically fills in the required variables. The file is located in the root directory of this example. You can execute this script, and it will automatically populate the necessary parameter information for you. + +If you need further configuration, you can also refer to the comments and modify `automq/terraform.tfvars` directly. ```bash + ./modify-automq-tf-config.sh cd ./automq terraform init diff --git a/kubernetes/aws/terraform/main.tf b/kubernetes/aws/terraform/main.tf index da9789a..60efdd4 100644 --- a/kubernetes/aws/terraform/main.tf +++ b/kubernetes/aws/terraform/main.tf @@ -54,7 +54,7 @@ resource "aws_eks_node_group" "automq-node-groups" { subnet_ids = slice(module.network.private_subnets, 0, 1) ami_type = local.node_group.ami_type - capacity_type = "SPOT" # Use On-Demand instances, can switch to "SPOT" for cost savings + capacity_type = "ON_DEMAND" # Use On-Demand instances, can switch to "SPOT" for cost savings instance_types = [local.node_group.instance_type] # Node group auto-scaling configuration From 89b7a4ce3a3eaca9bd7519a91ad8f092d533ab83 Mon Sep 17 00:00:00 2001 From: lyx2000 <1419360299@qq.com> Date: Wed, 5 Nov 2025 18:08:54 +0800 Subject: [PATCH 20/20] Docs(benchmark): Improve testing guide and clarify endpoint usage Refactored the benchmark documentation to enhance clarity and provide better guidance for testers: * Specify mandatory `endpoint` parameter. * Documented the current throughput limit for client expectation setting. * Added steps for traffic repetition. * Outlined required validation checks: Dashboard EKS (Step 1) and subsequent checks (Step 2). * Optimized path instructions in the README. --- .../aws/eks-benchmark/README.md | 102 ++++++++++++------ .../automq-benchmark-chart/README.md | 68 ++++++------ .../automq-benchmark-chart/values.yaml | 2 +- .../aws/eks-benchmark/terraform/outputs.tf | 6 +- 4 files changed, 113 insertions(+), 65 deletions(-) diff --git a/cloudservice-setup/aws/eks-benchmark/README.md b/cloudservice-setup/aws/eks-benchmark/README.md index 18c04d2..858409f 100644 --- a/cloudservice-setup/aws/eks-benchmark/README.md +++ b/cloudservice-setup/aws/eks-benchmark/README.md @@ -44,6 +44,15 @@ Before using this project, ensure you have: - **Helm** (>= 3.0) - **AWS CLI** configured with appropriate permissions +### Environment Setup + +To ensure all commands execute correctly, set the `BASE_DIR` environment variable to the root directory of this +repository: + +```bash +export BASE_DIR=$(pwd) +``` + ### Required Permissions - EKS cluster management permissions @@ -71,7 +80,7 @@ in this step. Tip: To control resource naming and avoid conflicts, set `resource_suffix` in `./terraform/variables.tf`. ```bash -cd ./terraform +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/terraform terraform init terraform plan ``` @@ -80,6 +89,7 @@ terraform plan 25-30 minutes. ```bash +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/terraform terraform apply ``` @@ -98,78 +108,111 @@ the `terraform output` command: | `dns_zone_id` | The Route 53 DNS Zone ID created for the BYOC environment. | | `vpc_id` | The ID of the VPC created for the environment. | | `env_id` | The ID of the AutoMQ environment. | +| `data_bucket` | The S3 data bucket of the AutoMQ environment. | Terraform will initiate the corresponding EKS-related nodes and the AutoMQ control plane, and create an AutoMQ cluster -within EKS. You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. +within EKS. + +Please follow the steps below to ensure that all newly created resources can be accessed normally before proceeding to +the next step. + +#### Access Control Panel + +You can use console_endpoint and initial_username/initial_password to log in to the AutoMQ Console. + +#### Access EKS Cluster To access the EKS cluster using this command, and the placeholders in the command can be replaced with the actual values obtained from the output above. ```bash -cd ./terraform +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/terraform REGION=$(terraform output -raw region) CLUSTER_NAME=$(terraform output -raw cluster_name) aws eks update-kubeconfig --region $REGION --name $CLUSTER_NAME ``` +#### Access Grafana Dashboard + To visit the observability stack, use the following command to obtain the public endpoint of Grafana. The username is admin, and the password can be obtained through the command below. If you wish to change it, you can configure it in the `./terraform/monitoring/prometheus.yaml` file. -AutoMQ provides [grafana official dashboards](https://www.automq.com/docs/automq/observability/dashboard-configuration). In this example, Grafana dashboards come pre-installed with broker, topic, group, and cluster dashboards. - -Terraform will help you create these dashboards in Grafana. If you need further guidance, please feel free to [contact the AutoMQ team](https://www.automq.com/contact). +AutoMQ provides [grafana official dashboards](https://www.automq.com/docs/automq/observability/dashboard-configuration). +In this example, Grafana dashboards come pre-installed with broker, topic, group, and cluster dashboards. +Terraform will help you create these dashboards in Grafana. If you need further guidance, please feel free +to [contact the AutoMQ team](https://www.automq.com/contact). ```bash # Get the public endpoint of Grafana. Please make sure to use the HTTP protocol for access. kubectl get service prometheus-grafana -n monitoring # Get the Grafana password +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/terraform kubectl get secret prometheus-grafana -n monitoring -o jsonpath="{.data.admin-password}" | base64 --decode ``` ### Step 2: Deploy AutoMQ Instance -1. Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) to create a Service Account and obtain the `Client ID` and `Client Secret` (Remember to save these two pieces of information, as you will need to enter them in the subsequent installation script). +1. + +Follow [Create a Service Account](https://www.automq.com/docs/automq-cloud/manage-identities-and-access/service-accounts#create-a-service-account) +to create a Service Account and obtain the `Client ID` and `Client Secret` (Remember to save these two pieces of +information, as you will need to enter them in the subsequent installation script). For this service account, you need to select EnvironmentAdmin to easily create and manage resources. -2. In the AutoMQ Console, create a Deploy Profile named `eks` for the EKS environment. Get the bucket name from the default profile. Kubernetes Cluster, DNS ZoneId and Node pool IAM Role ARN are all obtained from the output of the previous step. +2. In the AutoMQ Console, create a Deploy Profile named `eks` for the EKS environment. -It's worth noting that the EKS node pool role that needs to be created last was already created in the first step; you can simply copy the node_group_instance_profile_arn output from the output. +Kubernetes Cluster, bucket name, DNS ZoneId and Node pool IAM Role ARN are all obtained from the output of the previous +step. + +When creating `Deploy Profiles`, in the second step `Configure IAM Authorization`, you do not need to perform the first +and second sub-steps. You can directly copy the content of `node_group_instance_profile_arn` from the output into the +input box. Reference: [Create a Deploy Profile](https://www.automq.com/docs/automq-cloud/deploy-automq-on-kubernetes/deploy-to-aws-eks#step-12%3A-access-the-environment-console-and-create-deployment-configuration). -3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. +3. Fill variables `automq/terraform.tfvars` and apply Terraform to create the AutoMQ cluster with observability + integration. You may need to wait approximately 5 to 10 minutes for the cluster to be fully created. -We have prepared a script for you, `modify-automq-tf-config.sh`, which automatically fills in the required variables. The file is located in the root directory of this example. You can execute this script, and it will automatically populate the necessary parameter information for you. +We have prepared a script for you, `modify-automq-tf-config.sh`, which automatically fills in the required variables. +The file is located in the root directory of this example. You can execute this script, and it will automatically +populate the necessary parameter information for you. If you need further configuration, you can also refer to the comments and modify `automq/terraform.tfvars` directly. ```bash - -./modify-automq-tf-config.sh -cd ./automq +$BASE_DIR/modify-automq-tf-config.sh +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/automq terraform init terraform plan terraform apply ``` - ### Step 3: Run Benchmark Tests -This step executes performance tests against your AutoMQ cluster using configurable workloads. The benchmark simulates -Kafka usage patterns with customizable parameters for throughput, message size, topic configuration, and test duration. -The tests generate comprehensive metrics that are automatically collected by your monitoring stack. +This step involves executing performance tests on your AutoMQ cluster using customizable workloads. The benchmark is +designed to simulate Kafka usage patterns and allows you to adjust parameters like throughput, message size, topic +configuration, and test duration. These tests generate comprehensive metrics that are automatically collected by your +monitoring stack. + +To begin, you need to update the bootstrapServer parameter to point to the endpoint of your current cluster, which can +be found in the detailed cluster information from Step 2. In the values.yaml file, the default settings write 160 +messages per second, each 51 KiB in size (without batching), resulting in a write speed of 8 MiB/s. -For specific configurations of helm values, you can refer to the [README](./automq-benchmark-chart/README.md) in the -automq-benchmark folder for further details. +For larger scale tests, you can modify the recordSize and sendRate parameters within the values.yaml file. For more +details on Helm configuration options, please refer to the [README](./automq-benchmark-chart/README.md) located in the +automq-benchmark folder. -The current stress testing machine is a single node with a maximum network bandwidth of 10Gbps considering the node instance type. In the `values.yaml` file, the default rate is to write 160 messages per second, each 51 KiB in size (without any batching), with a write speed of 8 MiB/s. -If you need to conduct a larger scale test, you can adjust the parameters or contact the AutoMQ team for further assistance. -More details about stress testing tools can be found in this [blog](https://www.automq.com/blog/how-to-perform-a-performance-test-on-automq). +With the current instance specifications and JVM parameter configurations, the setup can achieve approximately 200 MBps +in a 1:1 production-to-consumption scenario, which should meet the performance testing needs of your 3-10 AKU AutoMQ +cluster. If you need to further increase throughput, consider upgrading the machine type of the test node group and +adjusting the JVM parameters. For more information, you can refer to the +AutoMQ [blog](https://www.automq.com/blog/how-to-perform-a-performance-test-on-automq) or consult with AutoMQ product +experts. **Expected Result**: Benchmark jobs will run and generate load against the AutoMQ cluster. Performance metrics including throughput, latency, and resource utilization will be collected and visible in Grafana dashboards. You should see data @@ -178,7 +221,7 @@ flowing through the system and performance characteristics of your AutoMQ deploy 1. **Configure benchmark parameters**: ```bash -cd helm-chart/automq-benchmark +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/helm-chart/automq-benchmark ``` 2. **Deploy benchmark workload**: @@ -190,11 +233,10 @@ helm install automq-benchmark . \ ``` 3. **View results in Grafana**: - - Access your Grafana dashboard - - Navigate to AutoMQ performance dashboards - - Observe real-time metrics during the test execution -After completing the above steps, you can see the corresponding metrics on the Grafana dashboard. Adjust the stress test parameters according to the corresponding specifications to further understand the specifications and performance related to AutoMQ. +After completing the above steps, you can see the corresponding metrics on the Grafana dashboard. Adjust the stress test +parameters according to the corresponding specifications to further understand the specifications and performance +related to AutoMQ. ## Cleanup @@ -205,10 +247,10 @@ To remove all deployed resources: helm uninstall automq-benchmark # Remove AutoMQ instance -cd automq +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/automq terraform destroy # Remove EKS and AutoMQ Console -cd terraform +cd $BASE_DIR/cloudservice-setup/aws/eks-benchmark/terraform terraform destroy ``` \ No newline at end of file diff --git a/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/README.md b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/README.md index 560f749..31e58c9 100644 --- a/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/README.md +++ b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/README.md @@ -13,15 +13,17 @@ This Helm chart deploys an AutoMQ benchmark job on a Kubernetes cluster. To install the chart with the release name `automq-benchmark`: ```bash -helm install automq-benchmark ./automq-benchmark +helm install automq-benchmark ./automq-benchmark-chart ``` To install with custom values: ```bash -helm install automq-benchmark ./automq-benchmark -f custom-values.yaml +helm install automq-benchmark ./automq-benchmark-chart -f custom-values.yaml ``` +**Note:** If you need to re-run the benchmark task, first uninstall the existing deployment using `helm uninstall automq-benchmark`, then reinstall the chart. + ## Uninstalling the Chart To uninstall/delete the `automq-benchmark` deployment: @@ -34,37 +36,37 @@ helm uninstall automq-benchmark The following table lists the configurable parameters of the AutoMQ benchmark chart and their default values. -| Parameter | Description | Default | -|-----------|-------------|---------| -| `job.name` | Name of the benchmark job | `automq-benchmark` | -| `job.completions` | Number of successful completions | `1` | -| `job.parallelism` | Number of parallel pods | `1` | -| `job.backoffLimit` | Number of retries before marking job as failed | `3` | -| `job.restartPolicy` | Restart policy for the job | `Never` | -| `image.repository` | AutoMQ image repository | `automqinc/automq` | -| `image.tag` | AutoMQ image tag | `latest` | -| `image.pullPolicy` | Image pull policy | `IfNotPresent` | -| `automq.username` | AutoMQ username | `user1` | -| `automq.password` | AutoMQ password | `MrCrSQTVoB` | -| `automq.bootstrapServer` | AutoMQ bootstrap server | `automq-release-kafka.automq.svc.cluster.local:9092` | -| `automq.securityProtocol` | Security protocol | `SASL_PLAINTEXT` | -| `automq.saslMechanism` | SASL mechanism | `PLAIN` | -| `benchmark.kafkaHeapOpts` | Kafka heap options | `-Xmx1g -Xms1g` | -| `benchmark.producerConfigs` | Producer configurations | `batch.size=0` | -| `benchmark.consumerConfigs` | Consumer configurations | `fetch.max.wait.ms=1000` | -| `benchmark.topics` | Number of topics | `10` | -| `benchmark.partitionsPerTopic` | Partitions per topic | `128` | -| `benchmark.producersPerTopic` | Producers per topic | `1` | -| `benchmark.groupsPerTopic` | Consumer groups per topic | `1` | -| `benchmark.consumersPerGroup` | Consumers per group | `1` | -| `benchmark.recordSize` | Record size in bytes | `52224` | -| `benchmark.sendRate` | Send rate (messages/sec) | `160` | -| `benchmark.warmupDuration` | Warmup duration in minutes | `3` | -| `benchmark.testDuration` | Test duration in minutes | `3` | -| `resources.requests.cpu` | CPU request | `500m` | -| `resources.requests.memory` | Memory request | `2Gi` | -| `resources.limits.cpu` | CPU limit | `2` | -| `resources.limits.memory` | Memory limit | `4Gi` | +| Parameter | Description | Default | +|--------------------------------|------------------------------------------------|------------------------------------------------------| +| `job.name` | Name of the benchmark job | `automq-benchmark` | +| `job.completions` | Number of successful completions | `1` | +| `job.parallelism` | Number of parallel pods | `1` | +| `job.backoffLimit` | Number of retries before marking job as failed | `3` | +| `job.restartPolicy` | Restart policy for the job | `Never` | +| `image.repository` | AutoMQ image repository | `automqinc/automq` | +| `image.tag` | AutoMQ image tag | `latest` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `automq.username` | AutoMQ username | `user1` | +| `automq.password` | AutoMQ password | `MrCrSQTVoB` | +| `automq.bootstrapServer` | AutoMQ bootstrap server | `automq-release-kafka.automq.svc.cluster.local:9092` | +| `automq.securityProtocol` | Security protocol | `SASL_PLAINTEXT` | +| `automq.saslMechanism` | SASL mechanism | `PLAIN` | +| `benchmark.kafkaHeapOpts` | Kafka heap options | `-Xmx1g -Xms1g` | +| `benchmark.producerConfigs` | Producer configurations | `batch.size=0` | +| `benchmark.consumerConfigs` | Consumer configurations | `fetch.max.wait.ms=1000` | +| `benchmark.topics` | Number of topics | `10` | +| `benchmark.partitionsPerTopic` | Partitions per topic | `128` | +| `benchmark.producersPerTopic` | Producers per topic | `1` | +| `benchmark.groupsPerTopic` | Consumer groups per topic | `1` | +| `benchmark.consumersPerGroup` | Consumers per group | `1` | +| `benchmark.recordSize` | Record size in bytes | `52224` | +| `benchmark.sendRate` | Send rate (messages/sec) | `160` | +| `benchmark.warmupDuration` | Warmup duration in minutes | `3` | +| `benchmark.testDuration` | Test duration in minutes | `3` | +| `resources.requests.cpu` | CPU request | `500m` | +| `resources.requests.memory` | Memory request | `2Gi` | +| `resources.limits.cpu` | CPU limit | `2` | +| `resources.limits.memory` | Memory limit | `4Gi` | ## Example Custom Values diff --git a/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/values.yaml b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/values.yaml index 5dff7d6..662d927 100644 --- a/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/values.yaml +++ b/cloudservice-setup/aws/eks-benchmark/automq-benchmark-chart/values.yaml @@ -28,7 +28,7 @@ automq: # Benchmark configuration benchmark: # Kafka settings - kafkaHeapOpts: "-Xmx1g -Xms1g" + kafkaHeapOpts: "-Xmx3g -Xms3g" # Producer settings producerConfigs: "batch.size=0" diff --git a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf index 64d02d9..528fcbc 100644 --- a/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf +++ b/cloudservice-setup/aws/eks-benchmark/terraform/outputs.tf @@ -18,6 +18,11 @@ output "dns_zone_id" { value = module.automq-byoc.automq_byoc_vpc_route53_zone_id } +output "data_bucket" { + description = "Data bucket name for the AutoMQ BYOC environment" + value = "automq-data-${module.automq-byoc.automq_byoc_env_id}" +} + output "cluster_name" { description = "Name of the EKS cluster" value = module.eks-env.cluster_name @@ -56,7 +61,6 @@ output "benchmark_node_group_arn" { } - output "automq_control_panel_env_id" { description = "environment id of control panel" value = module.automq-byoc.automq_byoc_env_id