diff --git a/api/v1alpha1/clickhousecluster_types.go b/api/v1alpha1/clickhousecluster_types.go index 8ba4ddc..f3a1b11 100644 --- a/api/v1alpha1/clickhousecluster_types.go +++ b/api/v1alpha1/clickhousecluster_types.go @@ -49,6 +49,13 @@ type ClickHouseClusterSpec struct { // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Data Volume Claim Spec" DataVolumeClaimSpec *corev1.PersistentVolumeClaimSpec `json:"dataVolumeClaimSpec,omitempty"` + // Additional persistent volume claims attached to each ClickHouse pod. + // Each entry creates a volumeClaimTemplate on the StatefulSet, producing + // per-pod PVCs named --. + // Use for JBOD / multi-disk storage layouts. + // +optional + AdditionalDataVolumeClaimSpecs []AdditionalVolumeClaimSpec `json:"additionalDataVolumeClaimSpecs,omitempty"` + // Additional labels that are added to resources. // +optional Labels map[string]string `json:"labels,omitempty"` @@ -79,6 +86,22 @@ type ClickHouseClusterSpec struct { UpgradeChannel string `json:"upgradeChannel,omitempty"` } +// AdditionalVolumeClaimSpec defines an additional persistent volume claim for a ClickHouse pod. +type AdditionalVolumeClaimSpec struct { + // Name used as the volumeClaimTemplate name and the volume/volumeMount name. + // Must be unique and not collide with the primary data volume name. + // Must consist of lowercase alphanumeric characters or hyphens, and start and end with an alphanumeric character. + // Hyphens are automatically converted to underscores in the ClickHouse disk configuration. + // +kubebuilder:validation:Pattern=`^[a-z0-9]([a-z0-9-]*[a-z0-9])?$` + Name string `json:"name"` + // PVC spec for this additional volume. + Spec corev1.PersistentVolumeClaimSpec `json:"spec"` + // MountPath inside the ClickHouse container. + // If empty, defaults to /var/lib/clickhouse/disks/. + // +optional + MountPath string `json:"mountPath,omitempty"` +} + // WithDefaults sets default values for ClickHouseClusterSpec fields. func (s *ClickHouseClusterSpec) WithDefaults() { defaultSpec := ClickHouseClusterSpec{ @@ -122,6 +145,16 @@ func (s *ClickHouseClusterSpec) WithDefaults() { if s.DataVolumeClaimSpec != nil && len(s.DataVolumeClaimSpec.AccessModes) == 0 { s.DataVolumeClaimSpec.AccessModes = []corev1.PersistentVolumeAccessMode{DefaultAccessMode} } + + for i := range s.AdditionalDataVolumeClaimSpecs { + if len(s.AdditionalDataVolumeClaimSpecs[i].Spec.AccessModes) == 0 { + s.AdditionalDataVolumeClaimSpecs[i].Spec.AccessModes = []corev1.PersistentVolumeAccessMode{DefaultAccessMode} + } + if s.AdditionalDataVolumeClaimSpecs[i].MountPath == "" { + // Keep in sync with internal.AdditionalDiskBasePath. + s.AdditionalDataVolumeClaimSpecs[i].MountPath = "/var/lib/clickhouse/disks/" + s.AdditionalDataVolumeClaimSpecs[i].Name + } + } } // ClickHouseSettings defines ClickHouse server settings options. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 65306d7..6439b99 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -28,6 +28,22 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdditionalVolumeClaimSpec) DeepCopyInto(out *AdditionalVolumeClaimSpec) { + *out = *in + in.Spec.DeepCopyInto(&out.Spec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdditionalVolumeClaimSpec. +func (in *AdditionalVolumeClaimSpec) DeepCopy() *AdditionalVolumeClaimSpec { + if in == nil { + return nil + } + out := new(AdditionalVolumeClaimSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ClickHouseCluster) DeepCopyInto(out *ClickHouseCluster) { *out = *in @@ -112,6 +128,13 @@ func (in *ClickHouseClusterSpec) DeepCopyInto(out *ClickHouseClusterSpec) { *out = new(v1.PersistentVolumeClaimSpec) (*in).DeepCopyInto(*out) } + if in.AdditionalDataVolumeClaimSpecs != nil { + in, out := &in.AdditionalDataVolumeClaimSpecs, &out.AdditionalDataVolumeClaimSpecs + *out = make([]AdditionalVolumeClaimSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Labels != nil { in, out := &in.Labels, &out.Labels *out = make(map[string]string, len(*in)) diff --git a/config/crd/bases/clickhouse.com_clickhouseclusters.yaml b/config/crd/bases/clickhouse.com_clickhouseclusters.yaml index 770801e..becb784 100644 --- a/config/crd/bases/clickhouse.com_clickhouseclusters.yaml +++ b/config/crd/bases/clickhouse.com_clickhouseclusters.yaml @@ -62,6 +62,229 @@ spec: spec: description: ClickHouseClusterSpec defines the desired state of ClickHouseCluster. properties: + additionalDataVolumeClaimSpecs: + description: |- + Additional persistent volume claims attached to each ClickHouse pod. + Each entry creates a volumeClaimTemplate on the StatefulSet, producing + per-pod PVCs named --. + Use for JBOD / multi-disk storage layouts. + items: + description: AdditionalVolumeClaimSpec defines an additional persistent + volume claim for a ClickHouse pod. + properties: + mountPath: + description: |- + MountPath inside the ClickHouse container. + If empty, defaults to /var/lib/clickhouse/disks/. + type: string + name: + description: |- + Name used as the volumeClaimTemplate name and the volume/volumeMount name. + Must be unique and not collide with the primary data volume name. + Must consist of lowercase alphanumeric characters or hyphens, and start and end with an alphanumeric character. + Hyphens are automatically converted to underscores in the ClickHouse disk configuration. + pattern: ^[a-z0-9]([a-z0-9-]*[a-z0-9])?$ + type: string + spec: + description: PVC spec for this additional volume. + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + Users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over volumes to consider + for binding. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference to the + PersistentVolume backing this claim. + type: string + type: object + required: + - name + - spec + type: object + type: array annotations: additionalProperties: type: string diff --git a/examples/multi_disk_jbod.yaml b/examples/multi_disk_jbod.yaml new file mode 100644 index 0000000..2d0e672 --- /dev/null +++ b/examples/multi_disk_jbod.yaml @@ -0,0 +1,30 @@ +apiVersion: clickhouse.com/v1alpha1 +kind: ClickHouseCluster +metadata: + name: clickhouse-jbod +spec: + shards: 2 + replicas: 2 + keeperClusterRef: + name: keeper + dataVolumeClaimSpec: + storageClassName: gp3 + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 100Gi + additionalDataVolumeClaimSpecs: + - name: disk1 + spec: + storageClassName: gp3 + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 100Gi + - name: disk2 + spec: + storageClassName: gp3 + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 100Gi diff --git a/internal/controller/clickhouse/config.go b/internal/controller/clickhouse/config.go index d6eee8d..2959093 100644 --- a/internal/controller/clickhouse/config.go +++ b/internal/controller/clickhouse/config.go @@ -28,6 +28,8 @@ var ( userConfigTemplateStr string //go:embed templates/client.yaml.tmpl clientConfigTemplateStr string + //go:embed templates/storage_jbod.yaml.tmpl + storageJbodConfigTemplateStr string generators []configGenerator ) @@ -104,6 +106,13 @@ func init() { }) } + storageJbodTmpl := template.Must(template.New("").Parse(storageJbodConfigTemplateStr)) + generators = append(generators, &storageJbodConfigGenerator{ + filename: "10-storage-jbod.yaml", + path: path.Join(ConfigPath, ConfigDPath), + template: storageJbodTmpl, + }) + generators = append(generators, &extraConfigGenerator{ Name: ExtraConfigFileName, @@ -161,6 +170,67 @@ func (g *templateConfigGenerator) Generate(r *clickhouseReconciler, id v1.ClickH return data, nil } +type storageJbodConfigGenerator struct { + filename string + path string + template *template.Template +} + +func (g *storageJbodConfigGenerator) Filename() string { + return g.filename +} + +func (g *storageJbodConfigGenerator) Path() string { + return g.path +} + +func (g *storageJbodConfigGenerator) ConfigKey() string { + return controllerutil.PathToName(path.Join(g.path, g.filename)) +} + +func (g *storageJbodConfigGenerator) Exists(r *clickhouseReconciler) bool { + return len(r.Cluster.Spec.AdditionalDataVolumeClaimSpecs) > 0 +} + +func (g *storageJbodConfigGenerator) Generate(r *clickhouseReconciler, _ v1.ClickHouseReplicaID) (string, error) { + additionalDisks := make([]struct { + Name string + DiskName string + Path string + }, 0, len(r.Cluster.Spec.AdditionalDataVolumeClaimSpecs)) + for _, addl := range r.Cluster.Spec.AdditionalDataVolumeClaimSpecs { + diskPath := addl.MountPath + if !strings.HasSuffix(diskPath, "/") { + diskPath += "/" + } + additionalDisks = append(additionalDisks, struct { + Name string + DiskName string + Path string + }{ + Name: addl.Name, + DiskName: strings.ReplaceAll(addl.Name, "-", "_"), + Path: diskPath, + }) + } + params := struct { + DefaultDiskPath string + AdditionalDisks []struct { + Name string + DiskName string + Path string + } + }{ + DefaultDiskPath: internal.ClickHouseDataPath + "/", + AdditionalDisks: additionalDisks, + } + builder := strings.Builder{} + if err := g.template.Execute(&builder, params); err != nil { + return "", fmt.Errorf("template storage JBOD config: %w", err) + } + return builder.String(), nil +} + type configGeneratorFunc func(tmpl *template.Template, r *clickhouseReconciler, id v1.ClickHouseReplicaID) (string, error) type baseConfigParams struct { diff --git a/internal/controller/clickhouse/config_test.go b/internal/controller/clickhouse/config_test.go index 7f9008c..733ae34 100644 --- a/internal/controller/clickhouse/config_test.go +++ b/internal/controller/clickhouse/config_test.go @@ -4,6 +4,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/utils/ptr" @@ -41,13 +42,70 @@ var _ = Describe("ConfigGenerator", func() { } for _, generator := range generators { - It("should generate config: "+generator.Filename(), func() { - Expect(generator.Exists(&ctx)).To(BeTrue()) - data, err := generator.Generate(&ctx, v1.ClickHouseReplicaID{}) + gen := generator + It("should generate config: "+gen.Filename(), func() { + if !gen.Exists(&ctx) { + Skip("generator does not apply to this cluster spec") + } + data, err := gen.Generate(&ctx, v1.ClickHouseReplicaID{}) Expect(err).ToNot(HaveOccurred()) obj := map[any]any{} Expect(yaml.Unmarshal([]byte(data), &obj)).To(Succeed()) }) } + + It("should generate storage JBOD config when additionalDataVolumeClaimSpecs is set", func() { + ctxJBOD := clickhouseReconciler{ + reconcilerBase: reconcilerBase{ + Cluster: &v1.ClickHouseCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-jbod", + Namespace: "test-namespace", + }, + Spec: v1.ClickHouseClusterSpec{ + Replicas: ptr.To[int32](2), + Shards: ptr.To[int32](1), + KeeperClusterRef: &corev1.LocalObjectReference{Name: "keeper"}, + AdditionalDataVolumeClaimSpecs: []v1.AdditionalVolumeClaimSpec{ + {Name: "disk1", Spec: corev1.PersistentVolumeClaimSpec{}}, + {Name: "disk2", MountPath: "/custom/path", Spec: corev1.PersistentVolumeClaimSpec{}}, + }, + }, + }, + }, + keeper: v1.KeeperCluster{Spec: v1.KeeperClusterSpec{Replicas: ptr.To[int32](3)}}, + } + ctxJBOD.Cluster.Spec.WithDefaults() + + configData, err := generateConfigForSingleReplica(&ctxJBOD, v1.ClickHouseReplicaID{}) + Expect(err).ToNot(HaveOccurred()) + + storageConfig, ok := configData["etc-clickhouse-server-config-d-10-storage-jbod-yaml"] + Expect(ok).To(BeTrue()) + Expect(storageConfig).To(ContainSubstring("storage_configuration")) + Expect(storageConfig).To(ContainSubstring("disk1")) + Expect(storageConfig).To(ContainSubstring("disk2")) + Expect(storageConfig).To(ContainSubstring("/var/lib/clickhouse/disks/disk1/")) + Expect(storageConfig).To(ContainSubstring("/custom/path/")) + + // Verify true JBOD: all disks must be listed inside a single "main" volume + // as a YAML list (round-robin distribution), not as separate per-disk volumes. + parsed := map[any]any{} + Expect(yaml.Unmarshal([]byte(storageConfig), &parsed)).To(Succeed()) + disks := parsed["storage_configuration"].(map[any]any)["disks"].(map[any]any) + Expect(disks).NotTo(HaveKey("default"), "default disk must not be explicitly defined in JBOD config") + policies := parsed["storage_configuration"].(map[any]any)["policies"].(map[any]any) + volumes := policies["default"].(map[any]any)["volumes"].(map[any]any) + Expect(volumes).To(HaveLen(1), "true JBOD has exactly one volume containing all disks") + mainVolume := volumes["main"].(map[any]any) + diskList, ok := mainVolume["disk"].([]any) + Expect(ok).To(BeTrue(), "disks under main volume must be a list") + diskNames := make([]string, len(diskList)) + for i, d := range diskList { + diskNames[i] = d.(string) + } + Expect(diskNames).To(ContainElements("default", "disk1", "disk2")) + }) + }) diff --git a/internal/controller/clickhouse/sync.go b/internal/controller/clickhouse/sync.go index 3789958..7ed4cdf 100644 --- a/internal/controller/clickhouse/sync.go +++ b/internal/controller/clickhouse/sync.go @@ -811,16 +811,17 @@ func (r *clickhouseReconciler) updateReplica(ctx context.Context, log ctrlutil.L } replica := r.Replica(id) + additionalPVCs := templateAdditionalPVCs(r, id) result, err := r.ReconcileReplicaResources(ctx, log, id, chctrl.ReplicaUpdateInput{ ExistingSTS: replica.StatefulSet, DesiredConfigMap: configMap, DesiredSTS: statefulSet, + AdditionalPVCs: additionalPVCs, HasError: replica.Error, ConfigurationRevision: r.Cluster.Status.ConfigurationRevision, StatefulSetRevision: r.Cluster.Status.StatefulSetRevision, BreakingSTSVersion: breakingStatefulSetVersion, - DataVolumeClaimSpec: r.Cluster.Spec.DataVolumeClaimSpec, }) if err != nil { return nil, fmt.Errorf("reconcile replica %s resources: %w", id, err) diff --git a/internal/controller/clickhouse/templates.go b/internal/controller/clickhouse/templates.go index 59eeb3d..928a8d6 100644 --- a/internal/controller/clickhouse/templates.go +++ b/internal/controller/clickhouse/templates.go @@ -205,12 +205,7 @@ func templateStatefulSet(r *clickhouseReconciler, id v1.ClickHouseReplicaID) (*a return nil, fmt.Errorf("template pod spec: %w", err) } - resourceLabels := controllerutil.MergeMaps(r.Cluster.Spec.Labels, id.Labels(), map[string]string{ - controllerutil.LabelAppKey: r.Cluster.SpecificName(), - controllerutil.LabelInstanceK8sKey: r.Cluster.SpecificName(), - controllerutil.LabelRoleKey: controllerutil.LabelClickHouseValue, - controllerutil.LabelAppK8sKey: controllerutil.LabelClickHouseValue, - }) + resourceLabels := replicaResourceLabels(r.Cluster, id) spec := appsv1.StatefulSetSpec{ Selector: &metav1.LabelSelector{ @@ -612,6 +607,7 @@ func buildProtocols(cr *v1.ClickHouseCluster) map[string]protocol { func buildVolumes(r *clickhouseReconciler, id v1.ClickHouseReplicaID) ([]corev1.Volume, []corev1.VolumeMount, error) { var volumeMounts []corev1.VolumeMount + var volumes []corev1.Volume if r.Cluster.Spec.DataVolumeClaimSpec != nil { volumeMounts = append(volumeMounts, corev1.VolumeMount{ @@ -626,6 +622,20 @@ func buildVolumes(r *clickhouseReconciler, id v1.ClickHouseReplicaID) ([]corev1. }, ) } + for _, addl := range r.Cluster.Spec.AdditionalDataVolumeClaimSpecs { + volumes = append(volumes, corev1.Volume{ + Name: addl.Name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: additionalPVCName(r.Cluster, id, addl.Name), + }, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: addl.Name, + MountPath: addl.MountPath, + }) + } defaultConfigMapMode := corev1.ConfigMapVolumeSourceDefaultMode @@ -662,7 +672,6 @@ func buildVolumes(r *clickhouseReconciler, id v1.ClickHouseReplicaID) ([]corev1. configVolumes[generator.Path()] = volume } - var volumes []corev1.Volume for _, volume := range configVolumes { controllerutil.SortKey(volume.ConfigMap.Items, func(item corev1.KeyToPath) string { return item.Key @@ -736,3 +745,34 @@ func buildVolumes(r *clickhouseReconciler, id v1.ClickHouseReplicaID) ([]corev1. return volumes, volumeMounts, nil } + +func replicaResourceLabels(cluster *v1.ClickHouseCluster, id v1.ClickHouseReplicaID) map[string]string { + return controllerutil.MergeMaps(cluster.Spec.Labels, id.Labels(), map[string]string{ + controllerutil.LabelAppKey: cluster.SpecificName(), + controllerutil.LabelInstanceK8sKey: cluster.SpecificName(), + controllerutil.LabelRoleKey: controllerutil.LabelClickHouseValue, + controllerutil.LabelAppK8sKey: controllerutil.LabelClickHouseValue, + }) +} + +func additionalPVCName(cluster *v1.ClickHouseCluster, id v1.ClickHouseReplicaID, volumeName string) string { + return volumeName + "-" + cluster.StatefulSetNameByReplicaID(id) + "-0" +} + +func templateAdditionalPVCs(r *clickhouseReconciler, id v1.ClickHouseReplicaID) []*corev1.PersistentVolumeClaim { + resourceLabels := replicaResourceLabels(r.Cluster, id) + pvcs := make([]*corev1.PersistentVolumeClaim, 0, len(r.Cluster.Spec.AdditionalDataVolumeClaimSpecs)) + for _, addl := range r.Cluster.Spec.AdditionalDataVolumeClaimSpecs { + pvcs = append(pvcs, &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: additionalPVCName(r.Cluster, id, addl.Name), + Namespace: r.Cluster.Namespace, + Labels: resourceLabels, + Annotations: r.Cluster.Spec.Annotations, + }, + Spec: *addl.Spec.DeepCopy(), + }) + } + + return pvcs +} diff --git a/internal/controller/clickhouse/templates/storage_jbod.yaml.tmpl b/internal/controller/clickhouse/templates/storage_jbod.yaml.tmpl new file mode 100644 index 0000000..1a6a31d --- /dev/null +++ b/internal/controller/clickhouse/templates/storage_jbod.yaml.tmpl @@ -0,0 +1,17 @@ +{{- /* Storage configuration: all disks in a single volume for true JBOD round-robin distribution */}} +{{- /* DiskName is the ClickHouse identifier (hyphens replaced with underscores); Path uses the original name. */}} +storage_configuration: + disks: +{{- range .AdditionalDisks }} + {{ .DiskName }}: + path: {{ .Path }} +{{- end }} + policies: + default: + volumes: + main: + disk: + - default +{{- range .AdditionalDisks }} + - {{ .DiskName }} +{{- end }} diff --git a/internal/controller/clickhouse/templates_test.go b/internal/controller/clickhouse/templates_test.go index 5832c28..aa7abeb 100644 --- a/internal/controller/clickhouse/templates_test.go +++ b/internal/controller/clickhouse/templates_test.go @@ -8,6 +8,7 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -61,6 +62,48 @@ var _ = Describe("BuildVolumes", func() { checkVolumeMounts(volumes, mounts) }) + It("should add volume mounts for additionalDataVolumeClaimSpecs", func() { + ctx.Cluster = &v1.ClickHouseCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: v1.ClickHouseClusterSpec{ + DataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{}, + AdditionalDataVolumeClaimSpecs: []v1.AdditionalVolumeClaimSpec{ + { + Name: "disk1", + MountPath: "/var/lib/clickhouse/disks/disk1", + Spec: corev1.PersistentVolumeClaimSpec{}, + }, + { + Name: "disk2", + MountPath: "/var/lib/clickhouse/disks/disk2", + Spec: corev1.PersistentVolumeClaimSpec{}, + }, + }, + }, + } + volumes, mounts, err := buildVolumes(&ctx, v1.ClickHouseReplicaID{}) + Expect(err).To(Not(HaveOccurred())) + Expect(mounts).To(HaveLen(7)) // 5 from data+config + 2 additional + checkVolumeMounts(volumes, mounts) + mountPaths := make(map[string]string) + for _, m := range mounts { + mountPaths[m.MountPath] = m.Name + } + Expect(mountPaths["/var/lib/clickhouse/disks/disk1"]).To(Equal("disk1")) + Expect(mountPaths["/var/lib/clickhouse/disks/disk2"]).To(Equal("disk2")) + + pvcClaimNames := map[string]string{} + for _, v := range volumes { + if v.PersistentVolumeClaim != nil { + pvcClaimNames[v.Name] = v.PersistentVolumeClaim.ClaimName + } + } + Expect(pvcClaimNames).To(HaveKeyWithValue("disk1", "disk1-test-clickhouse-0-0-0")) + Expect(pvcClaimNames).To(HaveKeyWithValue("disk2", "disk2-test-clickhouse-0-0-0")) + }) + It("should add volumes provided by user", func() { ctx.Cluster = &v1.ClickHouseCluster{ ObjectMeta: metav1.ObjectMeta{ @@ -300,10 +343,85 @@ var _ = Describe("PDB", func() { }) }) -func checkVolumeMounts(volumes []corev1.Volume, mounts []corev1.VolumeMount) { +var _ = Describe("TemplateStatefulSet", func() { + It("should mount additional JBOD disks from explicit PVC volumes", func() { + r := &clickhouseReconciler{ + reconcilerBase: reconcilerBase{ + Cluster: &v1.ClickHouseCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "jbod", Namespace: "default"}, + Spec: v1.ClickHouseClusterSpec{ + Shards: ptr.To[int32](2), + Replicas: ptr.To[int32](2), + KeeperClusterRef: &corev1.LocalObjectReference{Name: "keeper"}, + DataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, + }, + }, + AdditionalDataVolumeClaimSpecs: []v1.AdditionalVolumeClaimSpec{ + { + Name: "disk1", + MountPath: "/var/lib/clickhouse/disks/disk1", + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, + }, + }, + }, + { + Name: "disk2", + MountPath: "/var/lib/clickhouse/disks/disk2", + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, + }, + }, + }, + }, + }, + }, + }, + keeper: v1.KeeperCluster{ObjectMeta: metav1.ObjectMeta{Name: "keeper"}}, + } + r.Cluster.Spec.WithDefaults() + + sts, err := templateStatefulSet(r, v1.ClickHouseReplicaID{ShardID: 0, Index: 0}) + Expect(err).To(Not(HaveOccurred())) + Expect(sts.Spec.VolumeClaimTemplates).To(HaveLen(1)) // primary only; additional PVCs are reconciled separately + Expect(sts.Spec.VolumeClaimTemplates[0].Name).To(Equal(internal.PersistentVolumeName)) + + podSpec, err := templatePodSpec(r, v1.ClickHouseReplicaID{ShardID: 0, Index: 0}) + Expect(err).To(Not(HaveOccurred())) + mountPaths := make(map[string]string) + for _, c := range podSpec.Containers { + for _, m := range c.VolumeMounts { + mountPaths[m.MountPath] = m.Name + } + } + Expect(mountPaths["/var/lib/clickhouse/disks/disk1"]).To(Equal("disk1")) + Expect(mountPaths["/var/lib/clickhouse/disks/disk2"]).To(Equal("disk2")) + + pvcVolumes := make(map[string]string) + for _, volume := range podSpec.Volumes { + if volume.PersistentVolumeClaim != nil { + pvcVolumes[volume.Name] = volume.PersistentVolumeClaim.ClaimName + } + } + Expect(pvcVolumes).To(HaveKeyWithValue("disk1", "disk1-jbod-clickhouse-0-0-0")) + Expect(pvcVolumes).To(HaveKeyWithValue("disk2", "disk2-jbod-clickhouse-0-0-0")) + }) +}) + +func checkVolumeMounts(volumes []corev1.Volume, mounts []corev1.VolumeMount, vctVolumeNames ...string) { volumeMap := map[string]struct{}{ internal.PersistentVolumeName: {}, } + for _, name := range vctVolumeNames { + volumeMap[name] = struct{}{} + } for _, volume := range volumes { ExpectWithOffset(1, volumeMap).NotTo(HaveKey(volume.Name)) volumeMap[volume.Name] = struct{}{} diff --git a/internal/controller/keeper/sync.go b/internal/controller/keeper/sync.go index b9b5806..6e94ef4 100644 --- a/internal/controller/keeper/sync.go +++ b/internal/controller/keeper/sync.go @@ -753,7 +753,6 @@ func (r *keeperReconciler) updateReplica(ctx context.Context, log ctrlutil.Logge ConfigurationRevision: r.Cluster.Status.ConfigurationRevision, StatefulSetRevision: r.Cluster.Status.StatefulSetRevision, BreakingSTSVersion: breakingStatefulSetVersion, - DataVolumeClaimSpec: r.Cluster.Spec.DataVolumeClaimSpec, }) if err != nil { return nil, fmt.Errorf("reconcile replica %q resources: %w", replicaID, err) diff --git a/internal/controller/resources.go b/internal/controller/resources.go index 0560542..d997438 100644 --- a/internal/controller/resources.go +++ b/internal/controller/resources.go @@ -196,6 +196,86 @@ func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) ReconcileConfigMap( return r.reconcileResource(ctx, log, configMap, []string{"Data", "BinaryData"}, action) } +// ReconcilePVC reconciles a Kubernetes PersistentVolumeClaim resource. +// For bound PVCs the spec is largely immutable; only spec.resources.requests.storage +// and metadata (labels) are patched. All other spec fields are left untouched. +func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) ReconcilePVC( + ctx context.Context, + log util.Logger, + pvc *corev1.PersistentVolumeClaim, + action v1.EventAction, +) (bool, error) { + cli := r.GetClient() + const kind = "PersistentVolumeClaim" + log = log.With(kind, pvc.GetName()) + + if err := ctrlruntime.SetControllerReference(r.Cluster, pvc, r.GetScheme()); err != nil { + return false, fmt.Errorf("set %s/%s ctrl reference: %w", kind, pvc.GetName(), err) + } + + existing := &corev1.PersistentVolumeClaim{} + if err := cli.Get(ctx, types.NamespacedName{Namespace: pvc.GetNamespace(), Name: pvc.GetName()}, existing); err != nil { + if !k8serrors.IsNotFound(err) { + return false, fmt.Errorf("get %s/%s: %w", kind, pvc.GetName(), err) + } + log.Info("PVC not found, creating") + return true, r.Create(ctx, pvc, action) + } + + // storageClassName is immutable; warn and skip if it diverges. + desiredClass := pvc.Spec.StorageClassName + existingClass := existing.Spec.StorageClassName + if desiredClass != nil && existingClass != nil && *desiredClass != *existingClass { + log.Warn("PVC storageClassName is immutable and cannot be changed in-place; "+ + "delete and recreate the PVC to switch storage class", + "existing", *existingClass, "desired", *desiredClass) + r.GetRecorder().Eventf(r.Cluster, existing, corev1.EventTypeWarning, v1.EventReasonFailedUpdate, action, + "PVC %s storageClassName is immutable (existing: %s, desired: %s); delete and recreate to change it", + existing.GetName(), *existingClass, *desiredClass) + return false, nil + } + + desiredStorage := pvc.Spec.Resources.Requests[corev1.ResourceStorage] + existingStorage := existing.Spec.Resources.Requests[corev1.ResourceStorage] + storageChanged := desiredStorage.Cmp(existingStorage) != 0 + vacChanged := pvc.Spec.VolumeAttributesClassName != existing.Spec.VolumeAttributesClassName + labelsChanged := !reflect.DeepEqual(pvc.GetLabels(), existing.GetLabels()) + + if !storageChanged && !vacChanged && !labelsChanged { + log.Debug("PVC is up to date") + return false, nil + } + + // Build a merge patch that only touches the mutable fields — leaving every + // immutable spec field (VolumeName, VolumeMode, AccessModes, StorageClassName, + // Selector, DataSource, …) completely untouched. + base := existing.DeepCopy() + existing.SetLabels(pvc.GetLabels()) + if storageChanged { + log.Info("resizing PVC storage", "from", existingStorage.String(), "to", desiredStorage.String()) + if existing.Spec.Resources.Requests == nil { + existing.Spec.Resources.Requests = make(corev1.ResourceList) + } + existing.Spec.Resources.Requests[corev1.ResourceStorage] = desiredStorage + } + if vacChanged { + log.Info("updating PVC volumeAttributesClassName", + "from", existing.Spec.VolumeAttributesClassName, "to", pvc.Spec.VolumeAttributesClassName) + existing.Spec.VolumeAttributesClassName = pvc.Spec.VolumeAttributesClassName + } + + if err := cli.Patch(ctx, existing, client.MergeFrom(base)); err != nil { + recorder := r.GetRecorder() + if util.ShouldEmitEvent(err) { + recorder.Eventf(r.Cluster, existing, corev1.EventTypeWarning, v1.EventReasonFailedUpdate, action, + "Update %s %s failed: %s", kind, existing.GetName(), err.Error()) + } + return false, fmt.Errorf("patch %s/%s: %w", kind, existing.GetName(), err) + } + + return true, nil +} + // Create creates the given Kubernetes resource and emits events on failure. func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) Create(ctx context.Context, resource client.Object, action v1.EventAction) error { recorder := r.GetRecorder() @@ -252,11 +332,14 @@ func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) Delete(ctx context.Con } // UpdatePVC updates the PersistentVolumeClaim for the given replica ID if it exists and differs from the provided spec. +// When targetPVCName is non-empty and multiple PVCs exist (e.g. from additional volumeClaimTemplates), +// the PVC matching that name is updated. targetPVCName should be "--". func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) UpdatePVC( ctx context.Context, log util.Logger, id ReplicaID, volumeSpec corev1.PersistentVolumeClaimSpec, + targetPVCName string, action v1.EventAction, ) error { cli := r.GetClient() @@ -280,29 +363,45 @@ func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) UpdatePVC( return nil } - if len(pvcs.Items) > 1 { + var pvc *corev1.PersistentVolumeClaim + if len(pvcs.Items) == 1 { + pvc = &pvcs.Items[0] + } else if targetPVCName != "" { + for i := range pvcs.Items { + if pvcs.Items[i].Name == targetPVCName { + pvc = &pvcs.Items[i] + break + } + } + if pvc == nil { + pvcNames := make([]string, len(pvcs.Items)) + for i, p := range pvcs.Items { + pvcNames[i] = p.Name + } + return fmt.Errorf("target PVC %q not found among replica %v PVCs: %v", targetPVCName, id, pvcNames) + } + } else { pvcNames := make([]string, len(pvcs.Items)) - for i, pvc := range pvcs.Items { - pvcNames[i] = pvc.Name + for i, p := range pvcs.Items { + pvcNames[i] = p.Name } - return fmt.Errorf("found multiple PVCs for replica %v: %v", id, pvcNames) } - if gcmp.Equal(pvcs.Items[0].Spec, volumeSpec) { - log.Debug("replica PVC is up to date", "pvc", pvcs.Items[0].Name) + if gcmp.Equal(pvc.Spec, volumeSpec) { + log.Debug("replica PVC is up to date", "pvc", pvc.Name) return nil } targetSpec := volumeSpec.DeepCopy() - if err := util.ApplyDefault(targetSpec, pvcs.Items[0].Spec); err != nil { + if err := util.ApplyDefault(targetSpec, pvc.Spec); err != nil { return fmt.Errorf("apply patch to replica PVC %v: %w", id, err) } - log.Info("updating replica PVC", "pvc", pvcs.Items[0].Name, "diff", gcmp.Diff(pvcs.Items[0].Spec, targetSpec)) + log.Info("updating replica PVC", "pvc", pvc.Name, "diff", gcmp.Diff(pvc.Spec, targetSpec)) - pvcs.Items[0].Spec = *targetSpec - if err := r.Update(ctx, &pvcs.Items[0], action); err != nil { + pvc.Spec = *targetSpec + if err := r.Update(ctx, pvc, action); err != nil { return fmt.Errorf("update replica PVC %v: %w", id, err) } @@ -314,11 +413,11 @@ type ReplicaUpdateInput struct { ExistingSTS *appsv1.StatefulSet DesiredConfigMap *corev1.ConfigMap DesiredSTS *appsv1.StatefulSet + AdditionalPVCs []*corev1.PersistentVolumeClaim HasError bool ConfigurationRevision string StatefulSetRevision string BreakingSTSVersion semver.Version - DataVolumeClaimSpec *corev1.PersistentVolumeClaimSpec } // ReconcileReplicaResources reconciles a replica's ConfigMap, StatefulSet and PVC. @@ -340,6 +439,13 @@ func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) ReconcileReplicaResour return nil, fmt.Errorf("set replica StatefulSet controller reference: %w", err) } + if len(input.AdditionalPVCs) > 0 { + pvcErr := r.reconcileAdditionalPVCs(ctx, log, input.AdditionalPVCs) + if pvcErr != nil { + return nil, fmt.Errorf("reconcile additional PVCs: %w", pvcErr) + } + } + if input.ExistingSTS == nil { log.Info("replica StatefulSet not found, creating", "statefulset", statefulSet.Name) util.AddObjectConfigHash(statefulSet, input.ConfigurationRevision) @@ -420,15 +526,28 @@ func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) ReconcileReplicaResour return nil, nil } - if input.DataVolumeClaimSpec != nil { - if !gcmp.Equal(input.ExistingSTS.Spec.VolumeClaimTemplates[0].Spec, input.DataVolumeClaimSpec) { - if err = r.UpdatePVC(ctx, log, replicaID, *input.DataVolumeClaimSpec, v1.EventActionReconciling); err != nil { + if len(statefulSet.Spec.VolumeClaimTemplates) > 0 && len(input.ExistingSTS.Spec.VolumeClaimTemplates) > 0 { + existingSpecsByTemplateName := make(map[string]corev1.PersistentVolumeClaimSpec, len(input.ExistingSTS.Spec.VolumeClaimTemplates)) + for _, template := range input.ExistingSTS.Spec.VolumeClaimTemplates { + existingSpecsByTemplateName[template.Name] = template.Spec + } + + for _, desiredTemplate := range statefulSet.Spec.VolumeClaimTemplates { + existingSpec, ok := existingSpecsByTemplateName[desiredTemplate.Name] + if !ok || gcmp.Equal(existingSpec, desiredTemplate.Spec) { + continue + } + + // Every replica StatefulSet has a single Pod with ordinal 0. + pvcName := desiredTemplate.Name + "-" + input.ExistingSTS.Name + "-0" + if err = r.UpdatePVC(ctx, log, replicaID, desiredTemplate.Spec, pvcName, v1.EventActionReconciling); err != nil { //nolint:nilerr // Error is logged internally and event sent return nil, nil } - - statefulSet.Spec.VolumeClaimTemplates = input.ExistingSTS.Spec.VolumeClaimTemplates } + + // volumeClaimTemplates are immutable; always keep existing templates in StatefulSet updates. + statefulSet.Spec.VolumeClaimTemplates = input.ExistingSTS.Spec.VolumeClaimTemplates } log.Info("updating replica StatefulSet", "statefulset", statefulSet.Name) @@ -444,3 +563,23 @@ func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) ReconcileReplicaResour return &ctrlruntime.Result{RequeueAfter: RequeueOnRefreshTimeout}, nil } + +func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) reconcileAdditionalPVCs( + ctx context.Context, + log util.Logger, + pvcs []*corev1.PersistentVolumeClaim, +) error { + + for _, desiredPVC := range pvcs { + if desiredPVC == nil { + continue + } + + _, err := r.ReconcilePVC(ctx, log, desiredPVC, v1.EventActionReconciling) + if err != nil { + return err + } + } + + return nil +} diff --git a/internal/controller/versionprobe.go b/internal/controller/versionprobe.go index 3815850..0240285 100644 --- a/internal/controller/versionprobe.go +++ b/internal/controller/versionprobe.go @@ -22,6 +22,8 @@ import ( const ( versionProbeContainerName = "version-probe" + versionProbeNameSuffix = "-version-probe-" + maxLabelValueLength = 63 ) // VersionProbeConfig holds parameters for the version probe Job. @@ -221,11 +223,20 @@ func (r *ResourceReconcilerBase[Status, T, ReplicaID, S]) buildVersionProbeJob(c return batchv1.Job{}, fmt.Errorf("hash version probe job spec: %w", err) } - job.Name = fmt.Sprintf("%s-version-probe-%s", r.Cluster.SpecificName(), specHash[:8]) + job.Name = buildVersionProbeJobName(r.Cluster.SpecificName(), specHash[:8]) return job, nil } +func buildVersionProbeJobName(prefix, hash string) string { + maxPrefixLen := maxLabelValueLength - len(versionProbeNameSuffix) - len(hash) + if len(prefix) > maxPrefixLen { + prefix = prefix[:maxPrefixLen] + } + + return prefix + versionProbeNameSuffix + hash +} + func getJobCondition(job *batchv1.Job, conditionType batchv1.JobConditionType) (batchv1.JobCondition, bool) { for _, c := range job.Status.Conditions { if c.Type == conditionType { diff --git a/internal/controller/versionprobe_test.go b/internal/controller/versionprobe_test.go new file mode 100644 index 0000000..de74060 --- /dev/null +++ b/internal/controller/versionprobe_test.go @@ -0,0 +1,35 @@ +package controller + +import "testing" + +func TestBuildVersionProbeJobNameTruncatesLongPrefix(t *testing.T) { + t.Parallel() + + prefix := "very-long-clickhouse-cluster-name-for-test-case-123456" + hash := "c0ed189d" + + got := buildVersionProbeJobName(prefix, hash) + maxPrefixLen := maxLabelValueLength - len(versionProbeNameSuffix) - len(hash) + want := prefix[:maxPrefixLen] + versionProbeNameSuffix + hash + + if got != want { + t.Fatalf("unexpected job name:\nwant: %q\ngot: %q", want, got) + } + if len(got) > maxLabelValueLength { + t.Fatalf("job name exceeds max label length: %d", len(got)) + } +} + +func TestBuildVersionProbeJobNameKeepsShortPrefix(t *testing.T) { + t.Parallel() + + prefix := "short-clickhouse" + hash := "deadbeef" + + got := buildVersionProbeJobName(prefix, hash) + want := "short-clickhouse-version-probe-deadbeef" + + if got != want { + t.Fatalf("unexpected job name:\nwant: %q\ngot: %q", want, got) + } +} diff --git a/internal/validation_constants.go b/internal/validation_constants.go index 34bd6f1..873b6f9 100644 --- a/internal/validation_constants.go +++ b/internal/validation_constants.go @@ -10,6 +10,8 @@ const ( KeeperDataPath = "/var/lib/clickhouse" ClickHouseDataPath = "/var/lib/clickhouse" + + AdditionalDiskBasePath = "/var/lib/clickhouse/disks/" ) var ( diff --git a/internal/webhook/v1alpha1/clickhousecluster_webhook.go b/internal/webhook/v1alpha1/clickhousecluster_webhook.go index 8cc7301..d4ae654 100644 --- a/internal/webhook/v1alpha1/clickhousecluster_webhook.go +++ b/internal/webhook/v1alpha1/clickhousecluster_webhook.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "slices" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -73,6 +74,13 @@ func (w *ClickHouseClusterWebhook) ValidateUpdate(_ context.Context, oldCluster, errs = append(errs, err) } + if err := validateAdditionalDataVolumeClaimSpecsChanges( + oldCluster.Spec.AdditionalDataVolumeClaimSpecs, + newCluster.Spec.AdditionalDataVolumeClaimSpecs, + ); err != nil { + errs = append(errs, err) + } + return warns, errors.Join(errs...) } @@ -101,10 +109,17 @@ func (w *ClickHouseClusterWebhook) validateImpl(obj *chv1.ClickHouseCluster) (ad errs = append(errs, err) } + additionalVolumeErrs := validateAdditionalDataVolumeClaimSpecs(obj.Spec.AdditionalDataVolumeClaimSpecs) + errs = append(errs, additionalVolumeErrs...) + + reservedNames := slices.Clone(internal.ReservedClickHouseVolumeNames) + for _, addl := range obj.Spec.AdditionalDataVolumeClaimSpecs { + reservedNames = append(reservedNames, addl.Name) + } volumeWarns, volumeErrs := validateVolumes( obj.Spec.PodTemplate.Volumes, obj.Spec.ContainerTemplate.VolumeMounts, - internal.ReservedClickHouseVolumeNames, + reservedNames, internal.ClickHouseDataPath, obj.Spec.DataVolumeClaimSpec != nil, ) diff --git a/internal/webhook/v1alpha1/common.go b/internal/webhook/v1alpha1/common.go index d3a50f8..fec8a0e 100644 --- a/internal/webhook/v1alpha1/common.go +++ b/internal/webhook/v1alpha1/common.go @@ -4,11 +4,21 @@ import ( "errors" "fmt" "path" + "regexp" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + "github.com/ClickHouse/clickhouse-operator/api/v1alpha1" + "github.com/ClickHouse/clickhouse-operator/internal" ) +// additionalVolumeNameRe matches names that are valid as Kubernetes volume / PVC names +// (DNS label subset: lowercase alphanumeric and hyphens, must start and end with alphanumeric). +// Hyphens are automatically converted to underscores when the name is written into the +// ClickHouse disk configuration, so users only need to follow Kubernetes naming rules here. +var additionalVolumeNameRe = regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?$`) + // validateCustomVolumeMounts validates that the provided volume mounts correspond to defined volumes and // do not use any reserved volume names. It returns a slice of errors for any validation issues found. func validateVolumes( @@ -78,3 +88,62 @@ func validateDataVolumeSpecChanges(oldSpec, newSpec *corev1.PersistentVolumeClai return nil } + +// validateAdditionalDataVolumeClaimSpecs validates additionalDataVolumeClaimSpecs: +// - names must not collide with the primary data volume name +// - no duplicate names in the slice +// - no duplicate mount paths in the slice (would cause two PVCs to mount at the same path) +func validateAdditionalDataVolumeClaimSpecs(specs []v1alpha1.AdditionalVolumeClaimSpec) []error { + var errs []error + seenNames := make(map[string]struct{}) + seenPaths := make(map[string]struct{}) + for i, spec := range specs { + if spec.Name == "" { + errs = append(errs, fmt.Errorf("additionalDataVolumeClaimSpecs[%d].name must not be empty", i)) + } else if !additionalVolumeNameRe.MatchString(spec.Name) { + errs = append(errs, fmt.Errorf("additionalDataVolumeClaimSpecs[%d].name %q is invalid: must consist of lowercase alphanumeric characters or hyphens, and start and end with an alphanumeric character", i, spec.Name)) + } + if spec.Name == internal.PersistentVolumeName { + errs = append(errs, fmt.Errorf("additionalDataVolumeClaimSpecs[%d].name %q collides with primary data volume name", i, spec.Name)) + } + if _, ok := seenNames[spec.Name]; ok { + errs = append(errs, fmt.Errorf("additionalDataVolumeClaimSpecs has duplicate name %q", spec.Name)) + } + seenNames[spec.Name] = struct{}{} + + // Resolve the effective mount path (mirrors WithDefaults logic) for duplicate detection. + mountPath := spec.MountPath + if mountPath == "" { + mountPath = internal.AdditionalDiskBasePath + spec.Name + } + if _, ok := seenPaths[mountPath]; ok { + errs = append(errs, fmt.Errorf("additionalDataVolumeClaimSpecs[%d] has duplicate mountPath %q", i, mountPath)) + } + seenPaths[mountPath] = struct{}{} + } + return errs +} + +// validateAdditionalDataVolumeClaimSpecsChanges validates update policy for additionalDataVolumeClaimSpecs: +// - adding new disks is allowed +// - removing existing disks is rejected +// - renaming existing disks is rejected (equivalent to remove+add) +// - updating specs for existing names is allowed +func validateAdditionalDataVolumeClaimSpecsChanges(oldSpecs, newSpecs []v1alpha1.AdditionalVolumeClaimSpec) error { + if len(oldSpecs) > 0 && len(newSpecs) == 0 { + return errors.New("additionalDataVolumeClaimSpecs cannot be removed after cluster creation") + } + + newNames := make(map[string]struct{}, len(newSpecs)) + for _, s := range newSpecs { + newNames[s.Name] = struct{}{} + } + + for _, s := range oldSpecs { + if _, ok := newNames[s.Name]; !ok { + return errors.New("additionalDataVolumeClaimSpecs names cannot be removed or renamed after cluster creation") + } + } + + return nil +} diff --git a/internal/webhook/v1alpha1/common_test.go b/internal/webhook/v1alpha1/common_test.go new file mode 100644 index 0000000..c56c8a4 --- /dev/null +++ b/internal/webhook/v1alpha1/common_test.go @@ -0,0 +1,177 @@ +package v1alpha1 + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/ClickHouse/clickhouse-operator/api/v1alpha1" + "github.com/ClickHouse/clickhouse-operator/internal" +) + +var _ = Describe("validateAdditionalDataVolumeClaimSpecs", func() { + It("should reject name collision with primary data volume", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + { + Name: internal.PersistentVolumeName, + MountPath: "/var/lib/clickhouse/disks/disk1", + Spec: corev1.PersistentVolumeClaimSpec{}, + }, + }) + Expect(errs).NotTo(BeEmpty()) + Expect(errs).To(ContainElement(MatchError(ContainSubstring("collides with primary data volume name")))) + }) + + It("should accept names with hyphens", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "disk-backfill-1", MountPath: "/var/lib/clickhouse/disks/disk-backfill-1", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(BeEmpty()) + }) + + It("should reject names with underscores", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "disk_backfill_1", MountPath: "/var/lib/clickhouse/disks/disk_backfill_1", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(HaveLen(1)) + Expect(errs[0].Error()).To(ContainSubstring("must consist of lowercase alphanumeric characters or hyphens")) + }) + + It("should reject names with uppercase letters", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "Disk1", MountPath: "/var/lib/clickhouse/disks/Disk1", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(HaveLen(1)) + Expect(errs[0].Error()).To(ContainSubstring("must consist of lowercase alphanumeric characters or hyphens")) + }) + + It("should reject names starting or ending with a hyphen", func() { + for _, name := range []string{"-disk1", "disk1-"} { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: name, MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(HaveLen(1), "expected error for name %q", name) + Expect(errs[0].Error()).To(ContainSubstring("must consist of lowercase alphanumeric characters or hyphens")) + } + }) + + It("should reject duplicate names", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "disk1", MountPath: "/path1", Spec: corev1.PersistentVolumeClaimSpec{}}, + {Name: "disk1", MountPath: "/path2", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(HaveLen(1)) + Expect(errs[0].Error()).To(ContainSubstring("duplicate name")) + }) + + It("should reject empty name", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "", MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(HaveLen(1)) + Expect(errs[0].Error()).To(ContainSubstring("name must not be empty")) + }) + + It("should accept valid specs with explicit mountPath", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + { + Name: "disk1", + MountPath: "/var/lib/clickhouse/disks/disk1", + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, + }, + }, + }, + }) + Expect(errs).To(BeEmpty()) + }) + + It("should accept valid specs with default mountPath (empty)", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + { + Name: "disk1", + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, + }, + }, + }, + }) + Expect(errs).To(BeEmpty()) + }) + + It("should reject duplicate explicit mountPaths", func() { + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "disk1", MountPath: "/mnt/data", Spec: corev1.PersistentVolumeClaimSpec{}}, + {Name: "disk2", MountPath: "/mnt/data", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(HaveLen(1)) + Expect(errs[0].Error()).To(ContainSubstring("duplicate mountPath")) + }) + + It("should reject duplicate mountPaths where one is implicit default", func() { + // disk1 has no mountPath so it defaults to /var/lib/clickhouse/disks/disk1; + // disk2 explicitly sets the same path — both resolve to the same location. + errs := validateAdditionalDataVolumeClaimSpecs([]v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "disk1", Spec: corev1.PersistentVolumeClaimSpec{}}, + {Name: "disk2", MountPath: "/var/lib/clickhouse/disks/disk1", Spec: corev1.PersistentVolumeClaimSpec{}}, + }) + Expect(errs).To(HaveLen(1)) + Expect(errs[0].Error()).To(ContainSubstring("duplicate mountPath")) + }) +}) + +var _ = Describe("validateAdditionalDataVolumeClaimSpecsChanges", func() { + It("should allow adding additionalDataVolumeClaimSpecs after creation", func() { + err := validateAdditionalDataVolumeClaimSpecsChanges( + nil, + []v1alpha1.AdditionalVolumeClaimSpec{{Name: "disk1", MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}}, + ) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should reject removing additionalDataVolumeClaimSpecs after creation", func() { + err := validateAdditionalDataVolumeClaimSpecsChanges( + []v1alpha1.AdditionalVolumeClaimSpec{{Name: "disk1", MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}}, + nil, + ) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot be removed")) + }) + + It("should allow adding new names while preserving old names", func() { + err := validateAdditionalDataVolumeClaimSpecsChanges( + []v1alpha1.AdditionalVolumeClaimSpec{{Name: "disk1", MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}}, + []v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "disk1", MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}, + {Name: "disk2", MountPath: "/path2", Spec: corev1.PersistentVolumeClaimSpec{}}, + }, + ) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should reject rename", func() { + err := validateAdditionalDataVolumeClaimSpecsChanges( + []v1alpha1.AdditionalVolumeClaimSpec{{Name: "disk1", MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}}, + []v1alpha1.AdditionalVolumeClaimSpec{{Name: "disk-renamed", MountPath: "/path", Spec: corev1.PersistentVolumeClaimSpec{}}}, + ) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot be removed or renamed")) + }) + + It("should allow no change when both empty", func() { + err := validateAdditionalDataVolumeClaimSpecsChanges(nil, nil) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should allow same specs", func() { + specs := []v1alpha1.AdditionalVolumeClaimSpec{ + {Name: "disk1", MountPath: "/path1", Spec: corev1.PersistentVolumeClaimSpec{}}, + {Name: "disk2", MountPath: "/path2", Spec: corev1.PersistentVolumeClaimSpec{}}, + } + err := validateAdditionalDataVolumeClaimSpecsChanges(specs, specs) + Expect(err).NotTo(HaveOccurred()) + }) +})