Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
87100b0
feat(localdns): add hosts plugin support for LocalDNS
kwaksaewon Mar 24, 2026
5bea34e
feat(vhd): wire aks-hosts-setup files into all packer VHD builds
kwaksaewon Mar 24, 2026
5491a65
fix(spec): add dnsutils to shellspec Docker image and fix localdns spec
kwaksaewon Mar 24, 2026
f401894
fix: remove stale teleport code that leaked in from old merge
kwaksaewon Mar 24, 2026
9d4cc1d
fix: remove stale non-localdns changes from branch
kwaksaewon Mar 24, 2026
70971d8
fix: restore SKIP_WAAGENT_HOLD guard and tag e2e Private DNS zones
kwaksaewon Mar 25, 2026
0e8e913
fix: restore remaining SKIP_WAAGENT_HOLD guards in nodePrep
kwaksaewon Mar 25, 2026
1c01325
refactor: hosts plugin e2e to use dig AA flag, table-driven tests acr…
kwaksaewon Mar 25, 2026
0399090
refactor: simplify enableLocalDNS to read corefile globals directly
kwaksaewon Mar 25, 2026
499f89f
refactor: rename localdns corefile variables to FULL/BASE/ACTIVE
kwaksaewon Mar 25, 2026
68332d3
refactor: rename corefile variables for backward compat and clarity
kwaksaewon Mar 25, 2026
4186b58
refactor: add COREFILE_BASE and COREFILE_EXPERIMENTAL without modifyi…
kwaksaewon Mar 25, 2026
9d188be
refactor: move corefile selection logic into select_localdns_corefile…
kwaksaewon Mar 25, 2026
5645770
address PR review feedback: fix pipefail guard, tighten IPv6 regex, i…
kwaksaewon Mar 26, 2026
7085b12
address remaining PR review feedback: nslookup→dig, fix stale comment…
kwaksaewon Mar 26, 2026
ce232df
fix legacy corefile to exclude hosts plugin, rewrite cse_main_spec fo…
kwaksaewon Mar 26, 2026
e7bda04
rename LOCALDNS_COREFILE_ACTIVE → LOCALDNS_COREFILE_BASE, fix Case 3 …
kwaksaewon Mar 26, 2026
afbd787
add unit test for old-CSE + new-VHD fallback path
kwaksaewon Mar 26, 2026
b12c5b8
remove Before=kubelet.service localdns.service from aks-hosts-setup.s…
kwaksaewon Mar 26, 2026
2cc1611
use printf instead of echo for writing hosts file content
kwaksaewon Mar 26, 2026
84e2a6d
address remaining PR review comments
kwaksaewon Mar 26, 2026
9fcafde
remove MockUnknownCloud dead code per reviewer feedback
kwaksaewon Mar 26, 2026
65ac417
restart localdns in e2e after hosts file is populated
kwaksaewon Mar 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pipelines/scripts/verify_shell.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ filesToCheck=$(find . -type f -name "*.sh" -not -path './pkg/agent/testdata/*' -
# Known bash-only scripts that intentionally use bash specific syntax.
BASH_ONLY_LIST=$(cat <<'EOF'
./vhdbuilder/packer/install-ig.sh
./parts/linux/cloud-init/artifacts/aks-hosts-setup.sh
EOF
)

Expand Down
49 changes: 40 additions & 9 deletions aks-node-controller/parser/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,11 +719,19 @@ func getFuncMapForLocalDnsCorefileTemplate() template.FuncMap {
}
}

// getLocalDnsCorefileBase64 returns the base64 encoded LocalDns corefile.
// base64 encoded corefile returned from this function will decoded and written
// to /opt/azure/containers/localdns/localdns.corefile in cse_config.sh
// and then used by localdns systemd unit to start localdns systemd unit.
func getLocalDnsCorefileBase64(aksnodeconfig *aksnodeconfigv1.Configuration) string {
// getLocalDnsCorefileBase64WithHostsPlugin generates and returns the base64-encoded LocalDns corefile
// with or without the hosts plugin, depending on the includeHostsPlugin parameter.
//
// The generated content is returned as a base64-encoded string and stored in environment variables:
// - LOCALDNS_GENERATED_COREFILE (kept for backward compat with old VHDs)
// - LOCALDNS_COREFILE_BASE (standard, without experimental plugins)
// - LOCALDNS_COREFILE_EXPERIMENTAL (with experimental plugins e.g. hosts plugin)
//
// The actual file writing happens in shell scripts (cse_config.sh), which decode and write
// a selected variant to /opt/azure/containers/localdns/localdns.corefile after populating the env file.
// Runtime selection between LOCALDNS_COREFILE_BASE and LOCALDNS_COREFILE_EXPERIMENTAL happens in localdns.sh
// (via select_localdns_corefile(), invoked on localdns service start/restart) based on the availability of /etc/localdns/hosts.
func getLocalDnsCorefileBase64WithHostsPlugin(aksnodeconfig *aksnodeconfigv1.Configuration, includeHostsPlugin bool) string {
if aksnodeconfig == nil {
return ""
}
Expand All @@ -737,17 +745,33 @@ func getLocalDnsCorefileBase64(aksnodeconfig *aksnodeconfigv1.Configuration) str
return ""
}

localDnsConfig, err := generateLocalDnsCorefileFromAKSNodeConfig(aksnodeconfig)
variant := "with hosts plugin"
if !includeHostsPlugin {
variant = "without hosts plugin"
}

localDnsConfig, err := generateLocalDnsCorefileFromAKSNodeConfig(aksnodeconfig, includeHostsPlugin)
if err != nil {
return fmt.Sprintf("error getting localdns corfile from aks node config: %v", err)
return fmt.Sprintf("error getting localdns corefile (%s) from aks node config: %v", variant, err)
}
return base64.StdEncoding.EncodeToString([]byte(localDnsConfig))
}

// localDnsCorefileTemplateData wraps the AKS node config with additional template control flags.
type localDnsCorefileTemplateData struct {
Config *aksnodeconfigv1.Configuration
IncludeHostsPlugin bool
}

// Corefile is created using localdns.toml.gtpl template and aksnodeconfig values.
func generateLocalDnsCorefileFromAKSNodeConfig(aksnodeconfig *aksnodeconfigv1.Configuration) (string, error) {
// includeHostsPlugin controls whether the hosts plugin block is included in the generated Corefile.
func generateLocalDnsCorefileFromAKSNodeConfig(aksnodeconfig *aksnodeconfigv1.Configuration, includeHostsPlugin bool) (string, error) {
var corefileBuffer bytes.Buffer
if err := localDnsCorefileTemplate.Execute(&corefileBuffer, aksnodeconfig); err != nil {
templateData := localDnsCorefileTemplateData{
Config: aksnodeconfig,
IncludeHostsPlugin: includeHostsPlugin,
}
if err := localDnsCorefileTemplate.Execute(&corefileBuffer, templateData); err != nil {
return "", fmt.Errorf("failed to execute localdns corefile template: %w", err)
}
return corefileBuffer.String(), nil
Expand Down Expand Up @@ -785,6 +809,13 @@ func shouldEnableLocalDns(aksnodeconfig *aksnodeconfigv1.Configuration) string {
return fmt.Sprintf("%v", aksnodeconfig != nil && aksnodeconfig.GetLocalDnsProfile() != nil && aksnodeconfig.GetLocalDnsProfile().GetEnableLocalDns())
}

// shouldEnableHostsPlugin returns true if LocalDNS is enabled and the hosts plugin
// is explicitly enabled. When true, the localdns Corefile will include a hosts plugin
// block that serves cached DNS entries from /etc/localdns/hosts for critical AKS FQDNs.
func shouldEnableHostsPlugin(aksnodeconfig *aksnodeconfigv1.Configuration) string {
return fmt.Sprintf("%v", shouldEnableLocalDns(aksnodeconfig) == "true" && aksnodeconfig.GetLocalDnsProfile().GetEnableHostsPlugin())
}

// getLocalDnsCpuLimitInPercentage returns CPU limit in percentage unit that will be used in localdns systemd unit.
func getLocalDnsCpuLimitInPercentage(aksnodeconfig *aksnodeconfigv1.Configuration) string {
if shouldEnableLocalDns(aksnodeconfig) == "true" && aksnodeconfig.GetLocalDnsProfile().GetCpuLimitInMilliCores() != 0 {
Expand Down
75 changes: 74 additions & 1 deletion aks-node-controller/parser/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,10 @@ health-check.localdns.local:53 {
.:53 {
log
bind 169.254.10.10
# Check /etc/localdns/hosts first for critical AKS FQDNs (mcr.microsoft.com, packages.aks.azure.com, etc.)
hosts /etc/localdns/hosts {
fallthrough
}
forward . 168.63.129.16 {
policy sequential
max_concurrent 1000
Expand Down Expand Up @@ -1509,6 +1513,10 @@ testdomain456.com:53 {
.:53 {
errors
bind 169.254.10.11
# Check /etc/localdns/hosts first for critical AKS FQDNs (mcr.microsoft.com, packages.aks.azure.com, etc.)
hosts /etc/localdns/hosts {
fallthrough
}
forward . 10.0.0.10 {
policy sequential
max_concurrent 2000
Expand Down Expand Up @@ -1627,7 +1635,7 @@ func Test_getLocalDNSCorefileBase64(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := getLocalDnsCorefileBase64(tt.args.aksnodeconfig)
got := getLocalDnsCorefileBase64WithHostsPlugin(tt.args.aksnodeconfig, true)

if tt.wantContains == "" && got != "" {
t.Errorf("expected empty string, got %q", got)
Expand Down Expand Up @@ -1711,6 +1719,71 @@ func Test_shouldEnableLocalDns(t *testing.T) {
}
}

func Test_shouldEnableHostsPlugin(t *testing.T) {
type args struct {
aksnodeconfig *aksnodeconfigv1.Configuration
}
tests := []struct {
name string
args args
want string
}{
{
name: "nil config",
args: args{aksnodeconfig: nil},
want: "false",
},
{
name: "nil LocalDnsProfile",
args: args{aksnodeconfig: &aksnodeconfigv1.Configuration{}},
want: "false",
},
{
name: "LocalDns disabled, HostsPlugin enabled",
args: args{aksnodeconfig: &aksnodeconfigv1.Configuration{
LocalDnsProfile: &aksnodeconfigv1.LocalDnsProfile{
EnableLocalDns: false,
EnableHostsPlugin: true},
}},
want: "false",
},
{
name: "LocalDns enabled, HostsPlugin disabled",
args: args{aksnodeconfig: &aksnodeconfigv1.Configuration{
LocalDnsProfile: &aksnodeconfigv1.LocalDnsProfile{
EnableLocalDns: true,
EnableHostsPlugin: false},
}},
want: "false",
},
{
name: "both LocalDns and HostsPlugin enabled",
args: args{aksnodeconfig: &aksnodeconfigv1.Configuration{
LocalDnsProfile: &aksnodeconfigv1.LocalDnsProfile{
EnableLocalDns: true,
EnableHostsPlugin: true},
}},
want: "true",
},
{
name: "both disabled",
args: args{aksnodeconfig: &aksnodeconfigv1.Configuration{
LocalDnsProfile: &aksnodeconfigv1.LocalDnsProfile{
EnableLocalDns: false,
EnableHostsPlugin: false},
}},
want: "false",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := shouldEnableHostsPlugin(tt.args.aksnodeconfig); got != tt.want {
t.Errorf("shouldEnableHostsPlugin() = %v, want %v", got, tt.want)
}
})
}
}

func Test_getLocalDnsCpuLimitInPercentage(t *testing.T) {
type args struct {
aksnodeconfig *aksnodeconfigv1.Configuration
Expand Down
5 changes: 4 additions & 1 deletion aks-node-controller/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,12 @@ func getCSEEnv(config *aksnodeconfigv1.Configuration) map[string]string {
"INSERT_IMDS_RESTRICTION_RULE_TO_MANGLE_TABLE": fmt.Sprintf("%v", config.GetImdsRestrictionConfig().GetInsertImdsRestrictionRuleToMangleTable()),
"PRE_PROVISION_ONLY": fmt.Sprintf("%v", config.GetPreProvisionOnly()),
"SHOULD_ENABLE_LOCALDNS": shouldEnableLocalDns(config),
"SHOULD_ENABLE_HOSTS_PLUGIN": shouldEnableHostsPlugin(config),
"LOCALDNS_CPU_LIMIT": getLocalDnsCpuLimitInPercentage(config),
"LOCALDNS_MEMORY_LIMIT": getLocalDnsMemoryLimitInMb(config),
"LOCALDNS_GENERATED_COREFILE": getLocalDnsCorefileBase64(config),
"LOCALDNS_GENERATED_COREFILE": getLocalDnsCorefileBase64WithHostsPlugin(config, false),
"LOCALDNS_COREFILE_BASE": getLocalDnsCorefileBase64WithHostsPlugin(config, false),
"LOCALDNS_COREFILE_EXPERIMENTAL": getLocalDnsCorefileBase64WithHostsPlugin(config, true),
"DISABLE_PUBKEY_AUTH": fmt.Sprintf("%v", config.GetDisablePubkeyAuth()),
"SERVICE_ACCOUNT_IMAGE_PULL_ENABLED": fmt.Sprintf("%v", config.GetServiceAccountImagePullProfile().GetEnabled()),
"SERVICE_ACCOUNT_IMAGE_PULL_DEFAULT_CLIENT_ID": config.GetServiceAccountImagePullProfile().GetDefaultClientId(),
Expand Down
32 changes: 32 additions & 0 deletions aks-node-controller/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,38 @@ oom_score = -999
assert.Equal(t, "true", vars["NEEDS_CGROUPV2"])
},
},
{
name: "AKSUbuntu2204 with LocalDNS and hosts plugin enabled",
folder: "AKSUbuntu2204+LocalDNS+HostsPlugin",
k8sVersion: "1.24.2",
aksNodeConfigUpdator: func(aksNodeConfig *aksnodeconfigv1.Configuration) {
aksNodeConfig.LocalDnsProfile = &aksnodeconfigv1.LocalDnsProfile{
EnableLocalDns: true,
EnableHostsPlugin: true,
}
},
validator: func(cmd *exec.Cmd) {
vars := environToMap(cmd.Env)
assert.Equal(t, "true", vars["SHOULD_ENABLE_LOCALDNS"])
assert.Equal(t, "true", vars["SHOULD_ENABLE_HOSTS_PLUGIN"])
},
},
{
name: "AKSUbuntu2204 with LocalDNS enabled but hosts plugin disabled",
folder: "AKSUbuntu2204+LocalDNS",
k8sVersion: "1.24.2",
aksNodeConfigUpdator: func(aksNodeConfig *aksnodeconfigv1.Configuration) {
aksNodeConfig.LocalDnsProfile = &aksnodeconfigv1.LocalDnsProfile{
EnableLocalDns: true,
EnableHostsPlugin: false,
}
},
validator: func(cmd *exec.Cmd) {
vars := environToMap(cmd.Env)
assert.Equal(t, "true", vars["SHOULD_ENABLE_LOCALDNS"])
assert.Equal(t, "false", vars["SHOULD_ENABLE_HOSTS_PLUGIN"])
},
},
}

for _, tt := range tests {
Expand Down
20 changes: 16 additions & 4 deletions aks-node-controller/parser/templates/localdns.toml.gtpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ health-check.localdns.local:53 {
whoami
}
# VnetDNS overrides apply to DNS traffic from pods with dnsPolicy:default or kubelet (referred to as VnetDNS traffic).
{{- range $domain, $override := $.LocalDnsProfile.VnetDnsOverrides -}}
{{- range $domain, $override := $.Config.LocalDnsProfile.VnetDnsOverrides -}}
{{- $isRootDomain := eq $domain "." -}}
{{- $fwdToClusterCoreDNS := or (hasSuffix $domain "cluster.local") (eq $override.ForwardDestination "ClusterCoreDNS")}}
{{- $forwardPolicy := "sequential" -}}
Expand All @@ -23,11 +23,17 @@ health-check.localdns.local:53 {
log
{{- end }}
bind {{getLocalDnsNodeListenerIp}}
{{- if and $isRootDomain $.IncludeHostsPlugin}}
# Check /etc/localdns/hosts first for critical AKS FQDNs (mcr.microsoft.com, packages.aks.azure.com, etc.)
hosts /etc/localdns/hosts {
fallthrough
}
{{- end}}
{{- if $isRootDomain}}
forward . {{getAzureDnsIp}} {
{{- else}}
{{- if $fwdToClusterCoreDNS}}
forward . {{getCoreDnsServiceIp $}} {
forward . {{getCoreDnsServiceIp $.Config}} {
{{- else}}
forward . {{getAzureDnsIp}} {
{{- end}}
Expand Down Expand Up @@ -67,7 +73,7 @@ health-check.localdns.local:53 {
}
{{- end}}
# KubeDNS overrides apply to DNS traffic from pods with dnsPolicy:ClusterFirst (referred to as KubeDNS traffic).
{{- range $domain, $override := $.LocalDnsProfile.KubeDnsOverrides}}
{{- range $domain, $override := $.Config.LocalDnsProfile.KubeDnsOverrides}}
{{- $isRootDomain := eq $domain "." -}}
{{- $fwdToClusterCoreDNS := or (hasSuffix $domain "cluster.local") (eq $override.ForwardDestination "ClusterCoreDNS")}}
{{- $forwardPolicy := "" }}
Expand All @@ -84,8 +90,14 @@ health-check.localdns.local:53 {
log
{{- end }}
bind {{getLocalDnsClusterListenerIp}}
{{- if and $isRootDomain $.IncludeHostsPlugin}}
# Check /etc/localdns/hosts first for critical AKS FQDNs (mcr.microsoft.com, packages.aks.azure.com, etc.)
hosts /etc/localdns/hosts {
fallthrough
}
{{- end}}
{{- if $fwdToClusterCoreDNS}}
forward . {{getCoreDnsServiceIp $}} {
forward . {{getCoreDnsServiceIp $.Config}} {
{{- else}}
forward . {{getAzureDnsIp}} {
{{- end}}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/bin/bash -c echo $(date),$(hostname) > ${PROVISION_OUTPUT}; CLOUD_INIT_STATUS_SCRIPT="/opt/azure/containers/cloud-init-status-check.sh"; cloudInitExitCode=0; if [ -f "${CLOUD_INIT_STATUS_SCRIPT}" ]; then /bin/bash -c "source ${CLOUD_INIT_STATUS_SCRIPT}; handleCloudInitStatus \"${PROVISION_OUTPUT}\"; returnStatus=\$?; echo \"Cloud init status check exit code: \$returnStatus\" >> ${PROVISION_OUTPUT}; exit \$returnStatus" >> ${PROVISION_OUTPUT} 2>&1; else cloud-init status --wait > /dev/null 2>&1; fi; cloudInitExitCode=$?; if [ "$cloudInitExitCode" -eq 0 ]; then echo "cloud-init succeeded" >> ${PROVISION_OUTPUT}; else echo "cloud-init failed with exit code ${cloudInitExitCode}" >> ${PROVISION_OUTPUT}; cat ${PROVISION_OUTPUT} exit ${cloudInitExitCode}; fi; /usr/bin/nohup /bin/bash -c "/bin/bash /opt/azure/containers/provision_start.sh"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/bin/bash -c echo $(date),$(hostname) > ${PROVISION_OUTPUT}; CLOUD_INIT_STATUS_SCRIPT="/opt/azure/containers/cloud-init-status-check.sh"; cloudInitExitCode=0; if [ -f "${CLOUD_INIT_STATUS_SCRIPT}" ]; then /bin/bash -c "source ${CLOUD_INIT_STATUS_SCRIPT}; handleCloudInitStatus \"${PROVISION_OUTPUT}\"; returnStatus=\$?; echo \"Cloud init status check exit code: \$returnStatus\" >> ${PROVISION_OUTPUT}; exit \$returnStatus" >> ${PROVISION_OUTPUT} 2>&1; else cloud-init status --wait > /dev/null 2>&1; fi; cloudInitExitCode=$?; if [ "$cloudInitExitCode" -eq 0 ]; then echo "cloud-init succeeded" >> ${PROVISION_OUTPUT}; else echo "cloud-init failed with exit code ${cloudInitExitCode}" >> ${PROVISION_OUTPUT}; cat ${PROVISION_OUTPUT} exit ${cloudInitExitCode}; fi; /usr/bin/nohup /bin/bash -c "/bin/bash /opt/azure/containers/provision_start.sh"

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ message LocalDnsProfile {

// KubeDns overrides apply to DNS traffic from pods with dnsPolicy:ClusterFirst (referred to as KubeDns traffic).
map<string, LocalDnsOverrides> kube_dns_overrides = 5;

// Specifies whether the hosts plugin should be enabled in the localdns Corefile.
// When true and LocalDNS is enabled, the Corefile will include a hosts plugin block
// that serves cached DNS entries from /etc/localdns/hosts for critical AKS FQDNs.
bool enable_hosts_plugin = 6;
}

// Represents DNS override settings for both VnetDNS and KubeDNS traffic.
Expand Down
Loading
Loading