Skip to content

Commit ba282ec

Browse files
committed
feat(health): introduce back HealthService with Livez RPC for health status checks; deprecate legacy gRPC endpoint in favor of HTTP
1 parent a65b373 commit ba282ec

File tree

5 files changed

+426
-3
lines changed

5 files changed

+426
-3
lines changed

pkg/rpc/server/server.go

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,88 @@ func (p *P2PServer) GetNetInfo(
287287
}), nil
288288
}
289289

290-
// NewServiceHandler creates a new HTTP handler for Store, P2P and Config services
290+
// HealthServer implements the HealthService defined in the proto file
291+
// DEPRECATED: This is a legacy compatibility shim for external frameworks.
292+
// New code should use GET /health/live HTTP endpoint instead.
293+
type HealthServer struct {
294+
store store.Store
295+
config config.Config
296+
logger zerolog.Logger
297+
}
298+
299+
// NewHealthServer creates a new HealthServer instance
300+
func NewHealthServer(store store.Store, config config.Config, logger zerolog.Logger) *HealthServer {
301+
return &HealthServer{
302+
store: store,
303+
config: config,
304+
logger: logger,
305+
}
306+
}
307+
308+
// Livez implements the HealthService.Livez RPC
309+
// DEPRECATED: Use GET /health/live HTTP endpoint instead. This endpoint exists only
310+
// for backward compatibility with external testing frameworks.
311+
func (h *HealthServer) Livez(
312+
ctx context.Context,
313+
req *connect.Request[emptypb.Empty],
314+
) (*connect.Response[pb.GetHealthResponse], error) {
315+
status := pb.HealthStatus_PASS
316+
317+
// For aggregator nodes, check if block production is healthy
318+
if h.config.Node.Aggregator {
319+
state, err := h.store.GetState(ctx)
320+
if err != nil {
321+
h.logger.Error().Err(err).Msg("Failed to get state for health check")
322+
return connect.NewResponse(&pb.GetHealthResponse{
323+
Status: pb.HealthStatus_FAIL,
324+
}), nil
325+
}
326+
327+
// If we have blocks, check if the last block time is recent
328+
if state.LastBlockHeight > 0 {
329+
timeSinceLastBlock := time.Since(state.LastBlockTime)
330+
331+
// Calculate the threshold based on block time
332+
blockTime := h.config.Node.BlockTime.Duration
333+
334+
// For lazy mode, use the lazy block interval instead
335+
if h.config.Node.LazyMode {
336+
blockTime = h.config.Node.LazyBlockInterval.Duration
337+
}
338+
339+
warnThreshold := blockTime * 3 // healthCheckWarnMultiplier
340+
failThreshold := blockTime * 5 // healthCheckFailMultiplier
341+
342+
if timeSinceLastBlock > failThreshold {
343+
h.logger.Error().
344+
Dur("time_since_last_block", timeSinceLastBlock).
345+
Dur("fail_threshold", failThreshold).
346+
Uint64("last_block_height", state.LastBlockHeight).
347+
Time("last_block_time", state.LastBlockTime).
348+
Msg("Health check: node has stopped producing blocks (FAIL)")
349+
status = pb.HealthStatus_FAIL
350+
} else if timeSinceLastBlock > warnThreshold {
351+
h.logger.Warn().
352+
Dur("time_since_last_block", timeSinceLastBlock).
353+
Dur("warn_threshold", warnThreshold).
354+
Uint64("last_block_height", state.LastBlockHeight).
355+
Time("last_block_time", state.LastBlockTime).
356+
Msg("Health check: block production is slow (WARN)")
357+
status = pb.HealthStatus_WARN
358+
}
359+
}
360+
}
361+
362+
return connect.NewResponse(&pb.GetHealthResponse{
363+
Status: status,
364+
}), nil
365+
}
366+
367+
// NewServiceHandler creates a new HTTP handler for Store, P2P, Health and Config services
291368
func NewServiceHandler(store store.Store, peerManager p2p.P2PRPC, proposerAddress []byte, logger zerolog.Logger, config config.Config, bestKnown BestKnownHeightProvider) (http.Handler, error) {
292369
storeServer := NewStoreServer(store, logger)
293370
p2pServer := NewP2PServer(peerManager)
371+
healthServer := NewHealthServer(store, config, logger) // Legacy gRPC endpoint
294372
configServer := NewConfigServer(config, proposerAddress, logger)
295373

296374
mux := http.NewServeMux()
@@ -299,6 +377,7 @@ func NewServiceHandler(store store.Store, peerManager p2p.P2PRPC, proposerAddres
299377
reflector := grpcreflect.NewStaticReflector(
300378
rpc.StoreServiceName,
301379
rpc.P2PServiceName,
380+
rpc.HealthServiceName, // Legacy gRPC endpoint
302381
rpc.ConfigServiceName,
303382
)
304383
mux.Handle(grpcreflect.NewHandlerV1(reflector, compress1KB))
@@ -312,10 +391,14 @@ func NewServiceHandler(store store.Store, peerManager p2p.P2PRPC, proposerAddres
312391
p2pPath, p2pHandler := rpc.NewP2PServiceHandler(p2pServer)
313392
mux.Handle(p2pPath, p2pHandler)
314393

394+
// Register HealthService (legacy gRPC endpoint for backward compatibility)
395+
healthPath, healthHandler := rpc.NewHealthServiceHandler(healthServer)
396+
mux.Handle(healthPath, healthHandler)
397+
315398
configPath, configHandler := rpc.NewConfigServiceHandler(configServer)
316399
mux.Handle(configPath, configHandler)
317400

318-
// Register custom HTTP endpoints
401+
// Register custom HTTP endpoints (including the preferred /health/live endpoint)
319402
RegisterCustomHTTPEndpoints(mux, store, peerManager, config, bestKnown, logger)
320403

321404
// Use h2c to support HTTP/2 without TLS

proto/evnode/v1/health.proto

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
syntax = "proto3";
2+
package evnode.v1;
3+
4+
import "google/protobuf/empty.proto";
5+
6+
option go_package = "github.com/evstack/ev-node/types/pb/evnode/v1";
7+
8+
// HealthService defines the RPC service for the health package
9+
// DEPRECATED: Use HTTP endpoint GET /health/live instead
10+
service HealthService {
11+
// Livez returns the health status of the node
12+
// DEPRECATED: Use HTTP endpoint GET /health/live instead
13+
rpc Livez(google.protobuf.Empty) returns (GetHealthResponse) {}
14+
}
15+
16+
// HealthStatus defines the health status of the node
17+
enum HealthStatus {
18+
// Unknown health status
19+
UNKNOWN = 0;
20+
// Healthy status (Healthy)
21+
PASS = 1;
22+
// Degraded but still serving
23+
WARN = 2;
24+
// Hard fail
25+
FAIL = 3;
26+
}
27+
28+
// GetHealthResponse defines the response for retrieving health status
29+
message GetHealthResponse {
30+
// Health status
31+
HealthStatus status = 1;
32+
}

test/docker-e2e/upgrade_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
package docker_e2e
44

55
import (
6-
"bytes"
76
"context"
87
"fmt"
98
"math/big"

types/pb/evnode/v1/health.pb.go

Lines changed: 196 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)