Skip to content

Commit 480ad78

Browse files
committed
First stab at enabling ACLs for a nomad cluster.
1 parent ec8b364 commit 480ad78

File tree

5 files changed

+149
-71
lines changed

5 files changed

+149
-71
lines changed

pkg/clients/nomad/nomad.go

Lines changed: 114 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import (
1818
//go:generate mockery --name Nomad --filename nomad.go
1919
type Nomad interface {
2020
// SetConfig for the client, path is a valid Nomad JSON config file
21-
SetConfig(address string, port, nodes int) error
21+
SetConfig(address string, port, nodes int, acl_token string) error
2222
// Create jobs in the provided files
2323
Create(files []string) error
2424
// Stop jobs in the provided files
@@ -30,9 +30,11 @@ type Nomad interface {
3030
// HealthCheckAPI uses the Nomad API to check that all servers and nodes
3131
// are ready. The function will block until either all nodes are healthy or the
3232
// timeout period elapses.
33-
HealthCheckAPI(time.Duration) error
33+
HealthCheckAPI(time.Duration, bool) error
3434
// Endpoints returns a list of endpoints for a cluster
3535
Endpoints(job, group, task string) ([]map[string]string, error)
36+
// Bootstrap ACLs
37+
BootstrapACLs() (string, error)
3638
}
3739

3840
// NomadImpl is an implementation of the Nomad interface
@@ -43,6 +45,7 @@ type NomadImpl struct {
4345
address string
4446
port int
4547
clientNodes int
48+
aclToken string
4649
}
4750

4851
// NewNomad creates a new Nomad client
@@ -59,17 +62,51 @@ type createRequest struct {
5962
Job string
6063
}
6164

65+
func (n *NomadImpl) setAuthHeaders(rq *http.Request) {
66+
if n.aclToken != "" {
67+
rq.Header.Set("X-Nomad-Token", n.aclToken)
68+
}
69+
}
70+
6271
// SetConfig loads the Nomad config from a file
63-
func (n *NomadImpl) SetConfig(address string, port, nodes int) error {
72+
func (n *NomadImpl) SetConfig(address string, port, nodes int, acl_token string) error {
6473
n.address = address
6574
n.port = port
6675
n.clientNodes = nodes
76+
n.aclToken = acl_token
6777

6878
return nil
6979
}
7080

71-
// HealthCheckAPI executes a HTTP heath check for a Nomad cluster
72-
func (n *NomadImpl) HealthCheckAPI(timeout time.Duration) error {
81+
func (n *NomadImpl) BootstrapACLs() (string, error) {
82+
n.l.Debug("Bootstrapping ACLs", "address", n.address)
83+
84+
rq, err := http.NewRequest(http.MethodPost, fmt.Sprintf("%s:%d/v1/acl/bootstrap", n.address, n.port), nil)
85+
if err != nil {
86+
return "", err
87+
}
88+
89+
resp, err := n.httpClient.Do(rq)
90+
if err != nil {
91+
return "", xerrors.Errorf("Unable to bootstrap ACLs: %w", err)
92+
}
93+
defer resp.Body.Close()
94+
95+
if resp.StatusCode != http.StatusOK {
96+
// try to read the body for the error
97+
d, _ := ioutil.ReadAll(resp.Body)
98+
return "", xerrors.Errorf("Error bootstrapping ACLs, got status code %d, error: %s", resp.StatusCode, string(d))
99+
} else {
100+
result := map[string]interface{}{}
101+
// check number of nodes
102+
json.NewDecoder(resp.Body).Decode(&result)
103+
n.aclToken = result["SecretID"].(string)
104+
return result["SecretID"].(string), nil
105+
}
106+
}
107+
108+
// HealthCheckAPI executes a HTTP health check for a Nomad cluster
109+
func (n *NomadImpl) HealthCheckAPI(timeout time.Duration, simple bool) error {
73110
n.l.Debug("Performing Nomad health check", "address", n.address)
74111
st := time.Now()
75112
for {
@@ -79,75 +116,87 @@ func (n *NomadImpl) HealthCheckAPI(timeout time.Duration) error {
79116
return fmt.Errorf("Timeout waiting for Nomad healthcheck %s", n.address)
80117
}
81118

82-
rq, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s:%d/v1/nodes", n.address, n.port), nil)
83-
if err != nil {
84-
return err
85-
}
86-
87-
resp, err := n.httpClient.Do(rq)
88-
if err == nil && resp.StatusCode == 200 {
89-
nodes := []map[string]interface{}{}
90-
// check number of nodes
91-
json.NewDecoder(resp.Body).Decode(&nodes)
92-
93-
// loop nodes and check ready
94-
readyCount := 0
95-
for _, node := range nodes {
96-
// get the node status
97-
nodeStatus := node["Status"].(string)
98-
nodeName := node["Name"].(string)
99-
nodeEligable := node["SchedulingEligibility"].(string)
100-
101-
n.l.Debug("Node status", "node", nodeName, "status", nodeStatus, "eligible", nodeEligable)
102-
// get the driver status
103-
drivers, ok := node["Drivers"].(map[string]interface{})
104-
if !ok {
105-
continue
106-
}
107-
108-
var driversHealthy = true
109-
var dockerDetected = false
110-
for k, v := range drivers {
111-
driver, ok := v.(map[string]interface{})
112-
if !ok {
113-
continue
114-
}
119+
if simple {
120+
rq, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s:%d/v1/status/leader", n.address, n.port), nil)
121+
if err != nil {
122+
return err
123+
}
124+
resp, err := n.httpClient.Do(rq)
125+
if err == nil && resp.StatusCode == http.StatusOK {
126+
return nil
127+
}
128+
} else {
129+
rq, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s:%d/v1/nodes", n.address, n.port), nil)
130+
n.setAuthHeaders(rq)
131+
if err != nil {
132+
return err
133+
}
115134

116-
healthy, ok := driver["Healthy"].(bool)
135+
resp, err := n.httpClient.Do(rq)
136+
if err == nil && resp.StatusCode == http.StatusOK {
137+
nodes := []map[string]interface{}{}
138+
// check number of nodes
139+
json.NewDecoder(resp.Body).Decode(&nodes)
140+
141+
// loop nodes and check ready
142+
readyCount := 0
143+
for _, node := range nodes {
144+
// get the node status
145+
nodeStatus := node["Status"].(string)
146+
nodeName := node["Name"].(string)
147+
nodeEligable := node["SchedulingEligibility"].(string)
148+
149+
n.l.Debug("Node status", "node", nodeName, "status", nodeStatus, "eligible", nodeEligable)
150+
// get the driver status
151+
drivers, ok := node["Drivers"].(map[string]interface{})
117152
if !ok {
118153
continue
119154
}
120155

121-
detected, ok := driver["Detected"].(bool)
122-
if !ok || !detected {
123-
continue
124-
}
156+
var driversHealthy = true
157+
var dockerDetected = false
158+
for k, v := range drivers {
159+
driver, ok := v.(map[string]interface{})
160+
if !ok {
161+
continue
162+
}
163+
164+
healthy, ok := driver["Healthy"].(bool)
165+
if !ok {
166+
continue
167+
}
168+
169+
detected, ok := driver["Detected"].(bool)
170+
if !ok || !detected {
171+
continue
172+
}
173+
174+
// we need to make a special case to check the docker driver is
175+
// present as if the nomad server starts before docker then the
176+
// presence of docker will not be detected
177+
if k == "docker" {
178+
dockerDetected = true
179+
}
180+
181+
n.l.Debug("Driver status", "node", nodeName, "driver", k, "healthy", healthy)
182+
if !healthy {
183+
driversHealthy = false
184+
}
125185

126-
// we need to make a special case to check the docker driver is
127-
// present as if the nomad server starts before docker then the
128-
// presence of docker will not be detected
129-
if k == "docker" {
130-
dockerDetected = true
131186
}
132187

133-
n.l.Debug("Driver status", "node", nodeName, "driver", k, "healthy", healthy)
134-
if !healthy {
135-
driversHealthy = false
188+
if nodeStatus == "ready" && nodeEligable == "eligible" && driversHealthy && dockerDetected {
189+
readyCount++
136190
}
137-
138191
}
139192

140-
if nodeStatus == "ready" && nodeEligable == "eligible" && driversHealthy && dockerDetected {
141-
readyCount++
193+
if readyCount == n.clientNodes {
194+
n.l.Debug("Nomad check complete", "address", n.address)
195+
return nil
142196
}
143-
}
144197

145-
if readyCount == n.clientNodes {
146-
n.l.Debug("Nomad check complete", "address", n.address)
147-
return nil
198+
n.l.Debug("Nodes not ready", "ready", readyCount, "nodes", n.clientNodes)
148199
}
149-
150-
n.l.Debug("Nodes not ready", "ready", readyCount, "nodes", n.clientNodes)
151200
}
152201

153202
// backoff
@@ -171,6 +220,7 @@ func (n *NomadImpl) Create(files []string) error {
171220
cr := fmt.Sprintf(`{"Job": %s}`, string(jsonJob))
172221

173222
r, err := http.NewRequest(http.MethodPost, addr, bytes.NewReader([]byte(cr)))
223+
n.setAuthHeaders(r)
174224
if err != nil {
175225
return xerrors.Errorf("Unable to create http request: %w", err)
176226
}
@@ -201,6 +251,7 @@ func (n *NomadImpl) Stop(files []string) error {
201251

202252
// stop the job
203253
r, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s:%d/v1/job/%s", n.address, n.port, id), nil)
254+
n.setAuthHeaders(r)
204255
if err != nil {
205256
return xerrors.Errorf("Unable to create http request: %w", err)
206257
}
@@ -235,6 +286,7 @@ func (n *NomadImpl) ParseJob(file string) ([]byte, error) {
235286

236287
// validate the config with the Nomad API
237288
r, err := http.NewRequest(http.MethodPost, fmt.Sprintf("%s:%d/v1/jobs/parse", n.address, n.port), bytes.NewReader(jobData))
289+
n.setAuthHeaders(r)
238290
if err != nil {
239291
return nil, xerrors.Errorf("Unable to create http request: %w", err)
240292
}
@@ -313,6 +365,7 @@ func (n *NomadImpl) Endpoints(job, group, task string) ([]map[string]string, err
313365
}
314366

315367
r, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s:%d/v1/allocation/%s", n.address, n.port, j["ID"]), nil)
368+
n.setAuthHeaders(r)
316369
if err != nil {
317370
return nil, xerrors.Errorf("Unable to create http request: %w", err)
318371
}
@@ -393,6 +446,7 @@ func (n *NomadImpl) Endpoints(job, group, task string) ([]map[string]string, err
393446
func (n *NomadImpl) getJobAllocations(job string) ([]map[string]interface{}, error) {
394447
// get the allocations for the job
395448
r, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s:%d/v1/job/%s/allocations", n.address, n.port, job), nil)
449+
n.setAuthHeaders(r)
396450
if err != nil {
397451
return nil, xerrors.Errorf("Unable to create http request: %w", err)
398452
}

pkg/clients/nomad/nomad_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func setupNomadTests(t *testing.T) (Nomad, string, *mocks.HTTP) {
4040
)
4141

4242
c := NewNomad(mh, 1*time.Millisecond, logger.NewTestLogger(t))
43-
c.SetConfig("local", 4646, 1)
43+
c.SetConfig("local", 4646, 1, "")
4444

4545
return c, tmpDir, mh
4646
}

pkg/config/resources/nomad/provider_cluster.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ func (p *ClusterProvider) Refresh() error {
145145

146146
wg.Wait()
147147

148-
p.nomadClient.SetConfig(fmt.Sprintf("http://%s", p.config.ExternalIP), p.config.APIPort, p.config.ClientNodes+1)
149-
err := p.nomadClient.HealthCheckAPI(startTimeout)
148+
p.nomadClient.SetConfig(fmt.Sprintf("http://%s", p.config.ExternalIP), p.config.APIPort, p.config.ClientNodes+1, p.config.ACLToken)
149+
err := p.nomadClient.HealthCheckAPI(startTimeout, false)
150150
if err != nil {
151151
return err
152152
}
@@ -174,8 +174,8 @@ func (p *ClusterProvider) Refresh() error {
174174
p.log.Debug("Successfully created client node", "ref", p.config.ID, "client", fqdn)
175175
}
176176

177-
p.nomadClient.SetConfig(fmt.Sprintf("http://%s", p.config.ExternalIP), p.config.APIPort, p.config.ClientNodes+1)
178-
err := p.nomadClient.HealthCheckAPI(startTimeout)
177+
p.nomadClient.SetConfig(fmt.Sprintf("http://%s", p.config.ExternalIP), p.config.APIPort, p.config.ClientNodes+1, p.config.ACLToken)
178+
err := p.nomadClient.HealthCheckAPI(startTimeout, false)
179179
if err != nil {
180180
return err
181181
}
@@ -391,8 +391,19 @@ func (p *ClusterProvider) createNomad() error {
391391
}
392392

393393
// ensure all client nodes are up
394-
p.nomadClient.SetConfig(fmt.Sprintf("http://%s", p.config.ExternalIP), p.config.APIPort, clientNodes)
395-
err = p.nomadClient.HealthCheckAPI(startTimeout)
394+
p.nomadClient.SetConfig(fmt.Sprintf("http://%s", p.config.ExternalIP), p.config.APIPort, clientNodes, p.config.ACLToken)
395+
err = p.nomadClient.HealthCheckAPI(startTimeout, true)
396+
if err != nil {
397+
return err
398+
}
399+
if p.config.ACLEnabled {
400+
acl_token, err := p.nomadClient.BootstrapACLs()
401+
if err != nil {
402+
return err
403+
}
404+
p.config.ACLToken = acl_token
405+
}
406+
err = p.nomadClient.HealthCheckAPI(startTimeout, false)
396407
if err != nil {
397408
return err
398409
}
@@ -423,7 +434,7 @@ func (p *ClusterProvider) createServerNode(img ctypes.Image, volumeID string, is
423434
}
424435

425436
// generate the server config
426-
sc := dataDir + "\n" + fmt.Sprintf(serverConfig, p.config.Datacenter, cpu)
437+
sc := dataDir + "\n" + fmt.Sprintf(serverConfig, p.config.Datacenter, cpu, p.config.ACLEnabled)
427438

428439
// write the nomad config to a file
429440
os.MkdirAll(p.config.ConfigDir, os.ModePerm)
@@ -540,7 +551,7 @@ func (p *ClusterProvider) createServerNode(img ctypes.Image, volumeID string, is
540551
// returns the fqdn, docker id, and an error if unsuccessful
541552
func (p *ClusterProvider) createClientNode(id string, image, volumeID, serverID string) (string, string, error) {
542553
// generate the client config
543-
sc := dataDir + "\n" + fmt.Sprintf(clientConfig, p.config.Datacenter, serverID)
554+
sc := dataDir + "\n" + fmt.Sprintf(clientConfig, p.config.Datacenter, serverID, p.config.ACLEnabled)
544555

545556
// write the default config to a file
546557
clientConfigPath := path.Join(p.config.ConfigDir, "client_config.hcl")
@@ -1002,6 +1013,10 @@ client {
10021013
%s
10031014
}
10041015
1016+
acl {
1017+
enabled = %t
1018+
}
1019+
10051020
plugin "raw_exec" {
10061021
config {
10071022
enabled = true
@@ -1020,6 +1035,10 @@ client {
10201035
}
10211036
}
10221037
1038+
acl {
1039+
enabled = %t
1040+
}
1041+
10231042
plugin "raw_exec" {
10241043
config {
10251044
enabled = true

pkg/config/resources/nomad/provider_job.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func (p *JobProvider) Create() error {
4646
nomadCluster := p.config.Cluster
4747

4848
// load the config
49-
p.client.SetConfig(fmt.Sprintf("http://%s", nomadCluster.ExternalIP), nomadCluster.APIPort, nomadCluster.ClientNodes)
49+
p.client.SetConfig(fmt.Sprintf("http://%s", nomadCluster.ExternalIP), nomadCluster.APIPort, nomadCluster.ClientNodes, nomadCluster.ACLToken)
5050

5151
err := p.client.Create(p.config.Paths)
5252
if err != nil {
@@ -99,7 +99,7 @@ func (p *JobProvider) Destroy() error {
9999
nomadCluster := p.config.Cluster
100100

101101
// load the config
102-
p.client.SetConfig(fmt.Sprintf("http://%s", nomadCluster.ExternalIP), nomadCluster.APIPort, nomadCluster.ClientNodes)
102+
p.client.SetConfig(fmt.Sprintf("http://%s", nomadCluster.ExternalIP), nomadCluster.APIPort, nomadCluster.ClientNodes, nomadCluster.ACLToken)
103103

104104
err := p.client.Stop(p.config.Paths)
105105
if err != nil {

0 commit comments

Comments
 (0)