Skip to content

Commit 9e6f06b

Browse files
author
Douglas Paz
committed
feat: add indices health collector
1 parent 23c72f7 commit 9e6f06b

File tree

5 files changed

+328
-0
lines changed

5 files changed

+328
-0
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ Further Information
182182
| elasticsearch_indices_translog_size_in_bytes | counter | 1 | Total translog size in bytes
183183
| elasticsearch_indices_warmer_time_seconds_total | counter | 1 | Total warmer time in seconds
184184
| elasticsearch_indices_warmer_total | counter | 1 | Total warmer count
185+
| elasticsearch_indices_health_up | gauge | 0 | Was the last scrape of the Elasticsearch cat indices endpoint successful
186+
| elasticsearch_indices_health_total_scrapes | counter | 0 | Current total Elasticsearch cat indices scrapes
187+
| elasticsearch_indices_health_json_parse_failures | counter | 0 | Number of errors while parsing JSON
188+
| elasticsearch_indices_health_health | gauge | 3 | Whether all primary and replica index shards are allocated.
185189
| elasticsearch_jvm_gc_collection_seconds_count | counter | 2 | Count of JVM GC runs
186190
| elasticsearch_jvm_gc_collection_seconds_sum | counter | 2 | GC run time in seconds
187191
| elasticsearch_jvm_memory_committed_bytes | gauge | 2 | JVM memory currently committed by area

collector/indices_health.go

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"encoding/json"
18+
"fmt"
19+
"io/ioutil"
20+
"net/http"
21+
"net/url"
22+
"path"
23+
24+
"github.com/go-kit/log"
25+
"github.com/go-kit/log/level"
26+
"github.com/prometheus-community/elasticsearch_exporter/pkg/clusterinfo"
27+
"github.com/prometheus/client_golang/prometheus"
28+
)
29+
30+
var (
31+
indexColors = []string{"green", "yellow", "red"}
32+
)
33+
34+
type indicesHealthLabels struct {
35+
keys func(...string) []string
36+
values func(*clusterinfo.Response, ...string) []string
37+
}
38+
39+
type indexHealthMetric struct {
40+
Type prometheus.ValueType
41+
Desc *prometheus.Desc
42+
Value func(indexHealth indexHealthResponse, color string) float64
43+
Labels indicesHealthLabels
44+
}
45+
46+
// IndiceHealth type defines the collector struct
47+
type IndicesHealth struct {
48+
logger log.Logger
49+
client *http.Client
50+
url *url.URL
51+
clusterInfoCh chan *clusterinfo.Response
52+
lastClusterInfo *clusterinfo.Response
53+
54+
up prometheus.Gauge
55+
totalScrapes, jsonParseFailures prometheus.Counter
56+
57+
indexesHealthMetrics []*indexHealthMetric
58+
}
59+
60+
// NewIndicesHealth defines IndicesHealth metrics
61+
func NewIndicesHealth(logger log.Logger, client *http.Client, url *url.URL) *IndicesHealth {
62+
subsystem := "indices_health"
63+
64+
indexLabels := indicesHealthLabels{
65+
keys: func(...string) []string {
66+
return []string{"index", "color", "cluster"}
67+
},
68+
values: func(lastClusterinfo *clusterinfo.Response, s ...string) []string {
69+
if lastClusterinfo != nil {
70+
return append(s, lastClusterinfo.ClusterName)
71+
}
72+
// this shouldn't happen, as the clusterinfo Retriever has a blocking
73+
// Run method. It blocks until the first clusterinfo call has succeeded
74+
return append(s, "unknown_cluster")
75+
},
76+
}
77+
78+
indicesHealth := &IndicesHealth{
79+
logger: logger,
80+
client: client,
81+
url: url,
82+
clusterInfoCh: make(chan *clusterinfo.Response),
83+
lastClusterInfo: &clusterinfo.Response{
84+
ClusterName: "unknown_cluster",
85+
},
86+
87+
up: prometheus.NewGauge(prometheus.GaugeOpts{
88+
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
89+
Help: "Was the last scrape of the Elasticsearch cat indices endpoint successful.",
90+
}),
91+
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
92+
Name: prometheus.BuildFQName(namespace, subsystem, "total_scrapes"),
93+
Help: "Current total Elasticsearch cat indices scrapes.",
94+
}),
95+
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
96+
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures"),
97+
Help: "Number of errors while parsing JSON.",
98+
}),
99+
100+
indexesHealthMetrics: []*indexHealthMetric{
101+
{
102+
Type: prometheus.GaugeValue,
103+
Desc: prometheus.NewDesc(
104+
prometheus.BuildFQName(namespace, subsystem, "health"),
105+
"Whether all primary and replica index shards are allocated.",
106+
indexLabels.keys(), nil,
107+
),
108+
Value: func(indexHealth indexHealthResponse, color string) float64 {
109+
if indexHealth.Health == color {
110+
return 1
111+
}
112+
return 0
113+
},
114+
Labels: indexLabels,
115+
},
116+
},
117+
}
118+
119+
// start go routine to fetch clusterinfo updates and save them to lastClusterinfo
120+
go func() {
121+
_ = level.Debug(logger).Log("msg", "starting cluster info receive loop")
122+
for ci := range indicesHealth.clusterInfoCh {
123+
if ci != nil {
124+
_ = level.Debug(logger).Log("msg", "received cluster info update", "cluster", ci.ClusterName)
125+
indicesHealth.lastClusterInfo = ci
126+
}
127+
}
128+
_ = level.Debug(logger).Log("msg", "exiting cluster info receive loop")
129+
}()
130+
131+
return indicesHealth
132+
}
133+
134+
// Describe add IndicesHealth metrics descriptions
135+
func (ih *IndicesHealth) Describe(ch chan<- *prometheus.Desc) {
136+
for _, metric := range ih.indexesHealthMetrics {
137+
ch <- metric.Desc
138+
}
139+
ch <- ih.up.Desc()
140+
ch <- ih.totalScrapes.Desc()
141+
ch <- ih.jsonParseFailures.Desc()
142+
}
143+
144+
// ClusterLabelUpdates returns a pointer to a channel to receive cluster info updates. It implements the
145+
// (not exported) clusterinfo.consumer interface
146+
func (ih *IndicesHealth) ClusterLabelUpdates() *chan *clusterinfo.Response {
147+
return &ih.clusterInfoCh
148+
}
149+
150+
// String implements the stringer interface. It is part of the clusterinfo.consumer interface
151+
func (ih *IndicesHealth) String() string {
152+
return namespace + "indiceshealth"
153+
}
154+
155+
func (ih *IndicesHealth) queryURL(u *url.URL) ([]byte, error) {
156+
res, err := ih.client.Get(u.String())
157+
if err != nil {
158+
return []byte{}, fmt.Errorf("failed to get resource from %s://%s:%s%s: %s",
159+
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
160+
}
161+
162+
defer func() {
163+
err = res.Body.Close()
164+
if err != nil {
165+
_ = level.Warn(ih.logger).Log(
166+
"msg", "failed to close http.Client",
167+
"err", err,
168+
)
169+
}
170+
}()
171+
172+
if res.StatusCode != http.StatusOK {
173+
return []byte{}, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
174+
}
175+
176+
bts, err := ioutil.ReadAll(res.Body)
177+
if err != nil {
178+
return []byte{}, err
179+
}
180+
181+
return bts, nil
182+
}
183+
184+
func (ih *IndicesHealth) fetchAndDecodeIndicesHealth() (CatIndicesResponse, error) {
185+
var isr CatIndicesResponse
186+
187+
u := *ih.url
188+
u.Path = path.Join(u.Path, "/_cat/indices")
189+
u.RawQuery = "format=json&h=health,index"
190+
191+
bts, err := ih.queryURL(&u)
192+
if err != nil {
193+
return isr, err
194+
}
195+
196+
if err := json.Unmarshal(bts, &isr); err != nil {
197+
ih.jsonParseFailures.Inc()
198+
return isr, err
199+
}
200+
201+
return isr, nil
202+
}
203+
204+
// Collect gets indices health metric values
205+
func (ih *IndicesHealth) Collect(ch chan<- prometheus.Metric) {
206+
ih.totalScrapes.Inc()
207+
defer func() {
208+
ch <- ih.up
209+
ch <- ih.totalScrapes
210+
ch <- ih.jsonParseFailures
211+
}()
212+
213+
catIndicesResponse, err := ih.fetchAndDecodeIndicesHealth()
214+
if err != nil {
215+
ih.up.Set(0)
216+
_ = level.Warn(ih.logger).Log(
217+
"msg", "failed to fetch and decode cat indices",
218+
"err", err,
219+
)
220+
return
221+
}
222+
ih.up.Set(1)
223+
224+
for _, metric := range ih.indexesHealthMetrics {
225+
for _, indexHealth := range catIndicesResponse {
226+
for _, color := range indexColors {
227+
ch <- prometheus.MustNewConstMetric(
228+
metric.Desc,
229+
metric.Type,
230+
metric.Value(indexHealth, color),
231+
metric.Labels.values(ih.lastClusterInfo, indexHealth.Index, color)...,
232+
)
233+
}
234+
}
235+
}
236+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
type indexHealthResponse struct {
17+
Health string `json:"health"`
18+
Index string `json:"index"`
19+
}
20+
21+
type CatIndicesResponse []indexHealthResponse

collector/indices_health_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"fmt"
18+
"net/http"
19+
"net/http/httptest"
20+
"net/url"
21+
"testing"
22+
23+
"github.com/go-kit/log"
24+
)
25+
26+
func TestIndicesHealth(t *testing.T) {
27+
// Testcases created using:
28+
// docker run -d -p 9200:9200 elasticsearch:VERSION
29+
// curl -XPUT http://localhost:9200/twitter
30+
// curl http://localhost:9200/_cat/indices?format=json&h=health,index
31+
tcs := map[string]string{
32+
"1.7.6": `[{"health":"yellow","index":"twitter"}]`,
33+
"2.4.5": `[{"health":"yellow","index":"twitter"}]`,
34+
"5.4.2": `[{"health":"yellow","index":"twitter"}]`,
35+
"5.5.2": `[{"health":"yellow","index":"twitter"}]`,
36+
"8.2.3": `[{"health":"yellow","index":"twitter"}]`,
37+
}
38+
for ver, out := range tcs {
39+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
40+
fmt.Fprintln(w, out)
41+
}))
42+
defer ts.Close()
43+
44+
u, err := url.Parse(ts.URL)
45+
if err != nil {
46+
t.Fatalf("Failed to parse URL: %s", err)
47+
}
48+
c := NewIndicesHealth(log.NewNopLogger(), http.DefaultClient, u)
49+
ihr, err := c.fetchAndDecodeIndicesHealth()
50+
if err != nil {
51+
t.Fatalf("Failed to fetch or decode cluster health: %s", err)
52+
}
53+
t.Logf("[%s] Cluster Health Response: %+v", ver, ihr)
54+
if ihr[0].Index != "twitter" {
55+
t.Errorf("is not twitter")
56+
}
57+
if ihr[0].Health != "yellow" {
58+
t.Errorf("twitter is not yellow")
59+
}
60+
}
61+
}

main.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ func main() {
191191
_ = level.Error(logger).Log("msg", "failed to register indices collector in cluster info")
192192
os.Exit(1)
193193
}
194+
iHC := collector.NewIndicesHealth(logger, httpClient, esURL)
195+
prometheus.MustRegister(iHC)
196+
if registerErr := clusterInfoRetriever.RegisterConsumer(iHC); registerErr != nil {
197+
_ = level.Error(logger).Log("msg", "failed to register indices health collector in cluster info")
198+
os.Exit(1)
199+
}
194200
}
195201

196202
if *esExportSnapshots {

0 commit comments

Comments
 (0)