improved health check

This commit is contained in:
yusing
2024-10-14 10:02:53 +08:00
parent 99207ae606
commit f38b3abdbc
20 changed files with 323 additions and 155 deletions

View File

@@ -43,10 +43,11 @@ func DockerrFilterContainer(nameOrID string) filters.KeyValuePair {
func NewDockerWatcher(host string) DockerWatcher {
return DockerWatcher{
host: host,
FieldLogger: (logrus.
WithField("module", "docker_watcher").
WithField("host", host))}
WithField("host", host)),
host: host,
}
}
func NewDockerWatcherWithClient(client D.Client) DockerWatcher {
@@ -65,8 +66,6 @@ func (w DockerWatcher) EventsWithOptions(ctx context.Context, options DockerList
eventCh := make(chan Event)
errCh := make(chan E.NestedError)
eventsCtx, eventsCancel := context.WithCancel(ctx)
go func() {
defer close(eventCh)
defer close(errCh)
@@ -100,7 +99,7 @@ func (w DockerWatcher) EventsWithOptions(ctx context.Context, options DockerList
w.Debugf("client connected")
cEventCh, cErrCh := w.client.Events(eventsCtx, options)
cEventCh, cErrCh := w.client.Events(ctx, options)
w.Debugf("watcher started")
@@ -134,9 +133,7 @@ func (w DockerWatcher) EventsWithOptions(ctx context.Context, options DockerList
case <-ctx.Done():
return
default:
eventsCancel()
time.Sleep(dockerWatcherRetryInterval)
eventsCtx, eventsCancel = context.WithCancel(ctx)
cEventCh, cErrCh = w.client.Events(ctx, options)
}
}
@@ -149,6 +146,6 @@ func (w DockerWatcher) EventsWithOptions(ctx context.Context, options DockerList
var optionsWatchAll = DockerListOptions{Filters: NewDockerFilter(
DockerFilterContainer,
DockerFilterStart,
DockerFilterStop,
// DockerFilterStop,
DockerFilterDie,
)}

View File

@@ -8,8 +8,8 @@ import (
type HealthCheckConfig struct {
Disabled bool `json:"disabled" yaml:"disabled"`
Path string `json:"path" yaml:"path"`
UseGet bool `json:"use_get" yaml:"use_get"`
Path string `json:"path,omitempty" yaml:"path"`
UseGet bool `json:"use_get,omitempty" yaml:"use_get"`
Interval time.Duration `json:"interval" yaml:"interval"`
Timeout time.Duration `json:"timeout" yaml:"timeout"`
}

View File

@@ -15,9 +15,9 @@ type HTTPHealthMonitor struct {
pinger *http.Client
}
func NewHTTPHealthMonitor(task common.Task, url types.URL, config HealthCheckConfig) HealthMonitor {
func NewHTTPHealthMonitor(task common.Task, url types.URL, config *HealthCheckConfig) HealthMonitor {
mon := new(HTTPHealthMonitor)
mon.monitor = newMonitor(task, url, &config, mon.checkHealth)
mon.monitor = newMonitor(task, url, config, mon.checkHealth)
mon.pinger = &http.Client{Timeout: config.Timeout}
if config.UseGet {
mon.method = http.MethodGet
@@ -31,7 +31,7 @@ func (mon *HTTPHealthMonitor) checkHealth() (healthy bool, detail string, err er
req, reqErr := http.NewRequestWithContext(
mon.task.Context(),
mon.method,
mon.URL.String(),
mon.url.JoinPath(mon.config.Path).String(),
nil,
)
if reqErr != nil {

View File

@@ -2,13 +2,14 @@ package health
import (
"context"
"encoding/json"
"errors"
"sync"
"sync/atomic"
"time"
"github.com/yusing/go-proxy/internal/common"
"github.com/yusing/go-proxy/internal/net/types"
U "github.com/yusing/go-proxy/internal/utils"
F "github.com/yusing/go-proxy/internal/utils/functional"
)
@@ -16,17 +17,20 @@ type (
HealthMonitor interface {
Start()
Stop()
IsHealthy() bool
Status() Status
Uptime() time.Duration
Name() string
String() string
MarshalJSON() ([]byte, error)
}
HealthCheckFunc func() (healthy bool, detail string, err error)
monitor struct {
Name string
URL types.URL
Interval time.Duration
config *HealthCheckConfig
url types.URL
healthy atomic.Bool
status U.AtomicValue[Status]
checkHealth HealthCheckFunc
startTime time.Time
task common.Task
cancel context.CancelFunc
@@ -41,29 +45,29 @@ var monMap = F.NewMapOf[string, HealthMonitor]()
func newMonitor(task common.Task, url types.URL, config *HealthCheckConfig, healthCheckFunc HealthCheckFunc) *monitor {
task, cancel := task.SubtaskWithCancel("Health monitor for %s", task.Name())
mon := &monitor{
Name: task.Name(),
URL: url.JoinPath(config.Path),
Interval: config.Interval,
config: config,
url: url,
checkHealth: healthCheckFunc,
startTime: time.Now(),
task: task,
cancel: cancel,
done: make(chan struct{}),
}
mon.healthy.Store(true)
mon.status.Store(StatusHealthy)
return mon
}
func IsHealthy(name string) (healthy bool, ok bool) {
func Inspect(name string) (status Status, ok bool) {
mon, ok := monMap.Load(name)
if !ok {
return
}
return mon.IsHealthy(), true
return mon.Status(), true
}
func (mon *monitor) Start() {
defer monMap.Store(mon.Name, mon)
defer logger.Debugf("%s health monitor started", mon)
defer monMap.Store(mon.task.Name(), mon)
defer logger.Debugf("%s health monitor started", mon.String())
go func() {
defer close(mon.done)
@@ -74,7 +78,7 @@ func (mon *monitor) Start() {
return
}
ticker := time.NewTicker(mon.Interval)
ticker := time.NewTicker(mon.config.Interval)
defer ticker.Stop()
for {
@@ -89,13 +93,13 @@ func (mon *monitor) Start() {
}
}
}()
logger.Debugf("health monitor %q started", mon)
logger.Debugf("health monitor %q started", mon.String())
}
func (mon *monitor) Stop() {
defer logger.Debugf("%s health monitor stopped", mon)
defer logger.Debugf("%s health monitor stopped", mon.String())
monMap.Delete(mon.Name)
monMap.Delete(mon.task.Name())
mon.mu.Lock()
defer mon.mu.Unlock()
@@ -108,31 +112,57 @@ func (mon *monitor) Stop() {
<-mon.done
mon.cancel = nil
mon.status.Store(StatusUnknown)
}
func (mon *monitor) IsHealthy() bool {
return mon.healthy.Load()
func (mon *monitor) Status() Status {
return mon.status.Load()
}
func (mon *monitor) Uptime() time.Duration {
return time.Since(mon.startTime)
}
func (mon *monitor) Name() string {
return mon.task.Name()
}
func (mon *monitor) String() string {
return mon.Name
return mon.Name()
}
func (mon *monitor) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]any{
"name": mon.Name(),
"url": mon.url,
"status": mon.status.Load(),
"uptime": mon.Uptime().String(),
"started": mon.startTime.Unix(),
"config": mon.config,
})
}
func (mon *monitor) checkUpdateHealth() (hasError bool) {
healthy, detail, err := mon.checkHealth()
if err != nil {
mon.healthy.Store(false)
mon.status.Store(StatusError)
if !errors.Is(err, context.Canceled) {
logger.Errorf("server %q failed to check health: %s", mon, err)
logger.Errorf("%s failed to check health: %s", mon.String(), err)
}
mon.Stop()
return false
}
if healthy != mon.healthy.Swap(healthy) {
var status Status
if healthy {
status = StatusHealthy
} else {
status = StatusUnhealthy
}
if healthy != (mon.status.Swap(status) == StatusHealthy) {
if healthy {
logger.Infof("server %q is up", mon)
logger.Infof("%s is up", mon.String())
} else {
logger.Warnf("server %q is down: %s", mon, detail)
logger.Warnf("%s is down: %s", mon.String(), detail)
}
}

View File

@@ -14,9 +14,9 @@ type (
}
)
func NewRawHealthMonitor(task common.Task, url types.URL, config HealthCheckConfig) HealthMonitor {
func NewRawHealthMonitor(task common.Task, url types.URL, config *HealthCheckConfig) HealthMonitor {
mon := new(RawHealthMonitor)
mon.monitor = newMonitor(task, url, &config, mon.checkAvail)
mon.monitor = newMonitor(task, url, config, mon.checkAvail)
mon.dialer = &net.Dialer{
Timeout: config.Timeout,
FallbackDelay: -1,
@@ -25,7 +25,7 @@ func NewRawHealthMonitor(task common.Task, url types.URL, config HealthCheckConf
}
func (mon *RawHealthMonitor) checkAvail() (avail bool, detail string, err error) {
conn, dialErr := mon.dialer.DialContext(mon.task.Context(), mon.URL.Scheme, mon.URL.Host)
conn, dialErr := mon.dialer.DialContext(mon.task.Context(), mon.url.Scheme, mon.url.Host)
if dialErr != nil {
detail = dialErr.Error()
/* trunk-ignore(golangci-lint/nilerr) */

View File

@@ -0,0 +1,48 @@
package health
import "encoding/json"
type Status int
const (
StatusUnknown Status = (iota << 1)
StatusHealthy
StatusNapping
StatusStarting
StatusUnhealthy
StatusError
NumStatuses int = iota - 1
HealthyMask = StatusHealthy | StatusNapping | StatusStarting
)
func (s Status) String() string {
switch s {
case StatusHealthy:
return "healthy"
case StatusUnhealthy:
return "unhealthy"
case StatusNapping:
return "napping"
case StatusStarting:
return "starting"
case StatusError:
return "error"
default:
return "unknown"
}
}
func (s Status) MarshalJSON() ([]byte, error) {
return json.Marshal(s.String())
}
func (s Status) Good() bool {
return s&HealthyMask != 0
}
func (s Status) Bad() bool {
return s&HealthyMask == 0
}