From c1f4b7e4caf80f4b4d0e754b12fc8398d95e8810 Mon Sep 17 00:00:00 2001 From: David Caputo Date: Mon, 29 Sep 2025 18:26:36 -0400 Subject: [PATCH] FIx hostname generation issue preventing distributed operations --- pkg/model/chi/namer/name.go | 35 ++++++++++++++++++++++++----------- pkg/model/chk/namer/name.go | 33 +++++++++++++++++++++------------ 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/pkg/model/chi/namer/name.go b/pkg/model/chi/namer/name.go index fef8d1a06..00ed629e9 100644 --- a/pkg/model/chi/namer/name.go +++ b/pkg/model/chi/namer/name.go @@ -177,10 +177,11 @@ func (n *Namer) createStatefulSetServiceName(host *api.Host) string { // createPodHostname returns a hostname of a Pod of a ClickHouse instance. // Is supposed to be used where network connection to a Pod is required. -// NB: right now Pod's hostname points to a Service, through which Pod can be accessed. +// For StatefulSet pods, this returns . to ensure DNS resolution works. func (n *Namer) createPodHostname(host *api.Host) string { - // Do not use Pod own hostname - point to appropriate StatefulSet's Service - return n.createStatefulSetServiceName(host) + // For StatefulSet pods, we need . format + // to ensure proper DNS resolution within the cluster + return fmt.Sprintf("%s.%s", n.createPodName(host), n.createStatefulSetServiceName(host)) } // createInstanceHostname returns hostname (pod-hostname + service or FQDN) which can be used as a replica name @@ -201,23 +202,35 @@ func (n *Namer) createInstanceHostname(host *api.Host) string { } // createPodFQDN creates a fully qualified domain name of a pod -// ss-1eb454-2-0.my-dev-domain.svc.cluster.local +// chi-db-clickhouse-db-0-0-0.chi-db-clickhouse-db-0-0.my-dev-namespace.svc.cluster.local func (n *Namer) createPodFQDN(host *api.Host) string { // FQDN can be generated either from default pattern, // or from personal pattern provided - // Start with default pattern - pattern := patternPodFQDN - + // For StatefulSet pods, the correct DNS pattern is: + // ...svc.cluster.local + // This is different from the simple pattern used for other resources + if host.GetCR().GetSpec().GetNamespaceDomainPattern().HasValue() { // NamespaceDomainPattern has been explicitly specified - pattern = "%s." + host.GetCR().GetSpec().GetNamespaceDomainPattern().Value() + // Use custom pattern: .. + pattern := "%s.%s." + host.GetCR().GetSpec().GetNamespaceDomainPattern().Value() + return fmt.Sprintf( + pattern, + n.createPodName(host), + n.createStatefulSetServiceName(host), + ) } - // Create FQDN based on pattern available + // Use standard Kubernetes StatefulSet DNS pattern: + // ...svc.cluster.local + // This fixes the hostname mismatch issue in remote_servers.xml where + // StatefulSet pods have names like "chi-db-clickhouse-db-0-0-0" but + // the generated hostnames were missing the headless service component return fmt.Sprintf( - pattern, - n.createPodHostname(host), + "%s.%s.%s.svc.cluster.local", + n.createPodName(host), + n.createStatefulSetServiceName(host), host.GetRuntime().GetAddress().GetNamespace(), ) } diff --git a/pkg/model/chk/namer/name.go b/pkg/model/chk/namer/name.go index 605d1c7d0..9ef9fa30e 100644 --- a/pkg/model/chk/namer/name.go +++ b/pkg/model/chk/namer/name.go @@ -174,12 +174,13 @@ func (n *Namer) createStatefulSetServiceName(host *api.Host) string { return n.macro.Scope(host).Line(pattern) } -// createPodHostname returns a hostname of a Pod of a ClickHouse instance. +// createPodHostname returns a hostname of a Pod of a ClickHouse Keeper instance. // Is supposed to be used where network connection to a Pod is required. -// NB: right now Pod's hostname points to a Service, through which Pod can be accessed. +// For StatefulSet pods, this returns . to ensure DNS resolution works. func (n *Namer) createPodHostname(host *api.Host) string { - // Do not use Pod own hostname - point to appropriate StatefulSet's Service - return n.createStatefulSetServiceName(host) + // For StatefulSet pods, we need . format + // to ensure proper DNS resolution within the cluster + return fmt.Sprintf("%s.%s", n.createPodName(host), n.createStatefulSetServiceName(host)) } // createInstanceHostname returns hostname (pod-hostname + service or FQDN) which can be used as a replica name @@ -200,23 +201,31 @@ func (n *Namer) createInstanceHostname(host *api.Host) string { } // createPodFQDN creates a fully qualified domain name of a pod -// ss-1eb454-2-0.my-dev-domain.svc.cluster.local +// chk-keeper-cluster-0-0.chk-keeper-cluster-0.my-dev-namespace.svc.cluster.local func (n *Namer) createPodFQDN(host *api.Host) string { // FQDN can be generated either from default pattern, // or from personal pattern provided - // Start with default pattern - pattern := patternPodFQDN - if host.GetCR().GetSpec().GetNamespaceDomainPattern().HasValue() { // NamespaceDomainPattern has been explicitly specified - pattern = "%s." + host.GetCR().GetSpec().GetNamespaceDomainPattern().Value() + // Use custom pattern: .. + pattern := "%s.%s." + host.GetCR().GetSpec().GetNamespaceDomainPattern().Value() + return fmt.Sprintf( + pattern, + n.createPodName(host), + n.createStatefulSetServiceName(host), + ) } - // Create FQDN based on pattern available + // Use standard Kubernetes StatefulSet DNS pattern: + // ...svc.cluster.local + // This fixes the hostname mismatch issue in remote_servers.xml where + // StatefulSet pods have names like "chk-keeper-cluster-0-0" but + // the generated hostnames were missing the headless service component return fmt.Sprintf( - pattern, - n.createPodHostname(host), + "%s.%s.%s.svc.cluster.local", + n.createPodName(host), + n.createStatefulSetServiceName(host), host.GetRuntime().GetAddress().GetNamespace(), ) }