Skip to content

Commit 0152750

Browse files
authored
Added health check endpoint (#18)
1 parent 9b1024f commit 0152750

File tree

12 files changed

+167
-14
lines changed

12 files changed

+167
-14
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ lint:
1616

1717
.PHONY: run
1818
run: build
19-
bin/${BINARY} -config=config/replicator.conf.yml
19+
bin/${BINARY} -config=config/dev.conf.yml
2020

2121
.PHONY: run_short_tests
2222
run_short_tests:

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,13 @@ a list of replicated columns, a space name.
3232
Replicator reads primary keys from MySQL table info and sync them automatically.
3333
Updating primary key in MySQL causes two Tarantool requests: delete an old row and insert a new one, because
3434
it is illegal to update primary key in Tarantool.
35+
36+
## Metrics
37+
38+
Replicator exposes several debug endpoints:
39+
40+
* `/metrics` - runtime and app metrics in Prometheus format,
41+
* `/health` - health check.
42+
43+
Health check returns status `503 Service Unavailable` if replicator is not running, dumping
44+
data or replication lag greater than `app.health.seconds_behind_master` config value.

cmd/replicator/main.go

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"context"
5+
"errors"
56
"flag"
67
"fmt"
78
"io"
@@ -13,6 +14,7 @@ import (
1314
"syscall"
1415
"time"
1516

17+
"github.com/etherlabsio/healthcheck"
1618
"github.com/prometheus/client_golang/prometheus/promhttp"
1719
"github.com/rs/zerolog"
1820
"github.com/rs/zerolog/log"
@@ -48,7 +50,13 @@ func main() {
4850

4951
metrics.Init()
5052

51-
server := initHTTPServer(cfg.App.ListenAddr)
53+
b, err := bridge.New(cfg, logger)
54+
if err != nil {
55+
logger.Fatal().Err(err).Msg("could not establish bridge from MySQL to Tarantool")
56+
}
57+
58+
health := initHealthHandler(cfg.App.Health, b)
59+
server := initHTTPServer(cfg.App.ListenAddr, health)
5260
go func() {
5361
logger.Info().Msgf("listening on %s", cfg.App.ListenAddr)
5462

@@ -58,14 +66,9 @@ func main() {
5866
}
5967
}()
6068

61-
b, err := bridge.New(cfg, logger)
62-
if err != nil {
63-
logger.Fatal().Err(err).Msg("could not establish bridge from MySQL to Tarantool")
64-
}
65-
6669
go func() {
67-
errors := b.Run()
68-
for errRun := range errors {
70+
runErrors := b.Run()
71+
for errRun := range runErrors {
6972
logger.Err(errRun).Msg("got sync error")
7073
}
7174

@@ -155,14 +158,52 @@ func newRollingLogFile(cfg *config.Logging) (io.Writer, error) {
155158
}, nil
156159
}
157160

158-
func initHTTPServer(addr string) *http.Server {
161+
func initHTTPServer(addr string, health http.Handler) *http.Server {
159162
server := &http.Server{
160163
Addr: addr,
161164
ReadTimeout: 5 * time.Second, //nolint:gomnd
162165
WriteTimeout: 5 * time.Second, //nolint:gomnd
163166
}
164167

165168
http.Handle("/metrics", promhttp.Handler())
169+
http.Handle("/health", health)
166170

167171
return server
168172
}
173+
174+
func initHealthHandler(cfg config.Health, b *bridge.Bridge) http.Handler {
175+
sbm := uint32(cfg.SecondsBehindMaster)
176+
177+
return healthcheck.Handler(
178+
healthcheck.WithChecker(
179+
"lag", healthcheck.CheckerFunc(
180+
func(ctx context.Context) error {
181+
cur := b.Delay()
182+
if cur > sbm {
183+
return fmt.Errorf("replication lag too big: %d", cur)
184+
}
185+
186+
return nil
187+
},
188+
),
189+
),
190+
191+
healthcheck.WithChecker(
192+
"state", healthcheck.CheckerFunc(
193+
func(ctx context.Context) error {
194+
dumping := b.Dumping()
195+
if dumping {
196+
return errors.New("replicator has not yet finished dump process")
197+
}
198+
199+
running := b.Running()
200+
if !running {
201+
return errors.New("replication is not running")
202+
}
203+
204+
return nil
205+
},
206+
),
207+
),
208+
)
209+
}

config/dev.conf.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
app:
2+
listen_addr: ':8080'
3+
data_file: '/tmp/mysql-tarantool/state.info'
4+
health:
5+
seconds_behind_master: 10
6+
logging:
7+
level: 'debug'
8+
syslog_enabled: false
9+
file_enabled: false
10+
file_name: '/var/log/mysql-tarantool-replicator.log'
11+
file_max_size: 256
12+
file_max_backups: 3
13+
file_max_age: 5
14+
15+
replication:
16+
server_id: 100
17+
gtid_mode: true
18+
19+
mysql:
20+
dump:
21+
exec_path: '/usr/bin/mysqldump'
22+
skip_master_data: false
23+
extra_options:
24+
- '--column-statistics=0'
25+
addr: '127.0.0.1:13306'
26+
user: 'repl'
27+
password: 'repl'
28+
charset: 'utf8'
29+
30+
tarantool:
31+
addr: '127.0.0.1:13301'
32+
user: 'repl'
33+
password: 'repl'
34+
max_retries: 3
35+
connect_timeout: '500ms'
36+
request_timeout: '500ms'
37+
38+
mappings:
39+
- source:
40+
schema: 'city'
41+
table: 'users'
42+
columns:
43+
- username
44+
- password
45+
- email
46+
dest:
47+
space: 'users'

config/replicator.conf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
app:
22
listen_addr: ':8080'
33
data_file: '/etc/mysql-tarantool/state.info'
4+
health:
5+
seconds_behind_master: 10
46
logging:
57
level: 'debug'
68
syslog_enabled: false

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go 1.14
44

55
require (
66
github.com/davecgh/go-spew v1.1.1 // indirect
7+
github.com/etherlabsio/healthcheck v0.0.0-20191224061800-dd3d2fd8c3f6
78
github.com/philhofer/fwd v1.1.0 // indirect
89
github.com/prometheus/client_golang v1.8.0
910
github.com/rs/zerolog v1.20.0
@@ -14,6 +15,7 @@ require (
1415
github.com/stretchr/testify v1.6.1
1516
github.com/tinylib/msgp v1.1.2 // indirect
1617
github.com/viciious/go-tarantool v0.0.0-20201014090959-d4e1044f393b
18+
go.uber.org/atomic v1.5.0
1719
golang.org/x/sys v0.0.0-20201029080932-201ba4db2418
1820
gopkg.in/natefinch/lumberjack.v2 v2.0.0
1921
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaB
6363
github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g=
6464
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
6565
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
66+
github.com/etherlabsio/healthcheck v0.0.0-20191224061800-dd3d2fd8c3f6 h1:az9jaEKre+mwUWiS9Pl8h1FuOvdiFM7UqplmCmJtHUQ=
67+
github.com/etherlabsio/healthcheck v0.0.0-20191224061800-dd3d2fd8c3f6/go.mod h1:ZMSmptAGNIg5UAxsJzmw5DMW6uQvxr/hvCklNwtFz1k=
6668
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
6769
github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4=
6870
github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=

internal/bridge/replicator.go

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/siddontang/go-mysql/canal"
1212
"github.com/siddontang/go-mysql/mysql"
1313
tnt "github.com/viciious/go-tarantool"
14+
"go.uber.org/atomic"
1415

1516
"github.com/pparshin/go-mysql-tarantool/internal/config"
1617
"github.com/pparshin/go-mysql-tarantool/internal/metrics"
@@ -32,13 +33,18 @@ type Bridge struct {
3233
cancel context.CancelFunc
3334
logger zerolog.Logger
3435

36+
dumping *atomic.Bool
37+
running *atomic.Bool
38+
3539
syncCh chan interface{}
3640
closeOnce *sync.Once
3741
}
3842

3943
func New(cfg *config.Config, logger zerolog.Logger) (*Bridge, error) {
4044
b := &Bridge{
4145
logger: logger,
46+
dumping: atomic.NewBool(false),
47+
running: atomic.NewBool(false),
4248
syncCh: make(chan interface{}, eventsBufSize),
4349
closeOnce: &sync.Once{},
4450
}
@@ -238,12 +244,12 @@ func (b *Bridge) newTarantoolClient(cfg *config.Config) {
238244
//
239245
// Returns closed channel with all errors or an empty channel.
240246
func (b *Bridge) Run() <-chan error {
241-
metrics.SetReplicationState(true)
242-
defer metrics.SetReplicationState(false)
247+
b.setRunning(true)
248+
defer b.setRunning(false)
243249

244250
go func() {
245251
for range time.Tick(1 * time.Second) {
246-
metrics.SetSecondsBehindMaster(b.canal.GetDelay())
252+
metrics.SetSecondsBehindMaster(b.Delay())
247253
}
248254
}()
249255

@@ -266,6 +272,12 @@ func (b *Bridge) Run() <-chan error {
266272
}
267273
}()
268274

275+
b.setDumping(true)
276+
go func() {
277+
<-b.canal.WaitDumpDone()
278+
b.setDumping(false)
279+
}()
280+
269281
var err error
270282
pos := b.stateSaver.position()
271283
switch p := pos.(type) {
@@ -346,3 +358,25 @@ func (b *Bridge) Close() error {
346358

347359
return err
348360
}
361+
362+
func (b *Bridge) Delay() uint32 {
363+
return b.canal.GetDelay()
364+
}
365+
366+
func (b *Bridge) setRunning(v bool) {
367+
b.running.Store(v)
368+
b.setDumping(false)
369+
metrics.SetReplicationState(v)
370+
}
371+
372+
func (b *Bridge) Running() bool {
373+
return b.running.Load()
374+
}
375+
376+
func (b *Bridge) setDumping(v bool) {
377+
b.dumping.Store(v)
378+
}
379+
380+
func (b *Bridge) Dumping() bool {
381+
return b.dumping.Load()
382+
}

internal/bridge/testdata/cfg.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
app:
2+
listen_addr: ':8080'
23
data_file: '/tmp/mysql-tarantool-replicator/state.info'
4+
health:
5+
seconds_behind_master: 5
36
logging:
47
level: 'debug'
58
syslog_enabled: false

internal/config/cfg.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
const (
1212
defaultListenAddr = ":8080"
1313
defaultDataFile = "/etc/mysql-tarantool-replicator/state.info"
14+
defaultHealthSBM = 10
1415
defaultLogLevel = "debug"
1516
defaultSysLogEnabled = false
1617
defaultFileLoggingEnabled = false
@@ -46,9 +47,14 @@ type Config struct {
4647
type AppConfig struct {
4748
ListenAddr string `yaml:"listen_addr"`
4849
DataFile string `yaml:"data_file"`
50+
Health Health `yaml:"health"`
4951
Logging Logging `yaml:"logging"`
5052
}
5153

54+
type Health struct {
55+
SecondsBehindMaster int `yaml:"seconds_behind_master"`
56+
}
57+
5258
type Logging struct {
5359
Level string `yaml:"level"`
5460
SysLogEnabled bool `yaml:"syslog_enabled"`
@@ -65,9 +71,10 @@ func (c *AppConfig) withDefaults() {
6571
}
6672

6773
c.ListenAddr = defaultListenAddr
68-
6974
c.DataFile = defaultDataFile
7075

76+
c.Health.SecondsBehindMaster = defaultHealthSBM
77+
7178
c.Logging.Level = defaultLogLevel
7279
c.Logging.SysLogEnabled = defaultSysLogEnabled
7380
c.Logging.FileLoggingEnabled = defaultFileLoggingEnabled

0 commit comments

Comments
 (0)