mirror of
https://github.com/yusing/godoxy.git
synced 2026-01-11 22:30:47 +01:00
294 lines
7.4 KiB
Go
294 lines
7.4 KiB
Go
package monitor
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"net/url"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog"
|
|
"github.com/rs/zerolog/log"
|
|
config "github.com/yusing/godoxy/internal/config/types"
|
|
"github.com/yusing/godoxy/internal/notif"
|
|
"github.com/yusing/godoxy/internal/types"
|
|
gperr "github.com/yusing/goutils/errs"
|
|
strutils "github.com/yusing/goutils/strings"
|
|
"github.com/yusing/goutils/synk"
|
|
"github.com/yusing/goutils/task"
|
|
)
|
|
|
|
type (
|
|
HealthCheckFunc func(url *url.URL) (result types.HealthCheckResult, err error)
|
|
monitor struct {
|
|
service string
|
|
config types.HealthCheckConfig
|
|
url synk.Value[*url.URL]
|
|
|
|
status synk.Value[types.HealthStatus]
|
|
lastResult synk.Value[types.HealthCheckResult]
|
|
|
|
checkHealth HealthCheckFunc
|
|
startTime time.Time
|
|
|
|
notifyFunc notif.NotifyFunc
|
|
numConsecFailures atomic.Int64
|
|
downNotificationSent atomic.Bool
|
|
|
|
task *task.Task
|
|
}
|
|
)
|
|
|
|
var ErrNegativeInterval = gperr.New("negative interval")
|
|
|
|
func (mon *monitor) init(u *url.URL, cfg types.HealthCheckConfig, healthCheckFunc HealthCheckFunc) {
|
|
if state := config.WorkingState.Load(); state != nil {
|
|
cfg.ApplyDefaults(state.Value().Defaults.HealthCheck)
|
|
} else {
|
|
cfg.ApplyDefaults(types.HealthCheckConfig{}) // use defaults from constants
|
|
}
|
|
mon.config = cfg
|
|
mon.checkHealth = healthCheckFunc
|
|
mon.startTime = time.Now()
|
|
mon.notifyFunc = notif.Notify
|
|
mon.status.Store(types.StatusHealthy)
|
|
mon.lastResult.Store(types.HealthCheckResult{Healthy: true, Detail: "started"})
|
|
|
|
if u == nil {
|
|
mon.url.Store(&url.URL{})
|
|
} else {
|
|
mon.url.Store(u)
|
|
}
|
|
}
|
|
|
|
func (mon *monitor) Context() context.Context {
|
|
if mon.config.BaseContext != nil {
|
|
return mon.config.BaseContext()
|
|
}
|
|
if mon.task != nil {
|
|
return mon.task.Context()
|
|
}
|
|
return context.Background()
|
|
}
|
|
|
|
func (mon *monitor) CheckHealth() (types.HealthCheckResult, error) {
|
|
return mon.checkHealth(mon.url.Load())
|
|
}
|
|
|
|
// Start implements task.TaskStarter.
|
|
func (mon *monitor) Start(parent task.Parent) gperr.Error {
|
|
if mon.config.Interval <= 0 {
|
|
return ErrNegativeInterval
|
|
}
|
|
|
|
mon.service = parent.Name()
|
|
mon.task = parent.Subtask("health_monitor", true)
|
|
|
|
go func() {
|
|
logger := log.With().Str("name", mon.service).Logger()
|
|
|
|
defer func() {
|
|
if mon.status.Load() != types.StatusError {
|
|
mon.status.Store(types.StatusUnhealthy)
|
|
}
|
|
mon.task.Finish(nil)
|
|
}()
|
|
|
|
failures := 0
|
|
|
|
if err := mon.checkUpdateHealth(); err != nil {
|
|
logger.Err(err).Msg("healthchecker error")
|
|
failures++
|
|
}
|
|
|
|
// add a random delay between 0 and 10 seconds to avoid thundering herd
|
|
time.Sleep(time.Duration(rand.Intn(10)) * time.Second)
|
|
|
|
ticker := time.NewTicker(mon.config.Interval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-mon.task.Context().Done():
|
|
return
|
|
case <-ticker.C:
|
|
err := mon.checkUpdateHealth()
|
|
if err != nil {
|
|
logger.Err(err).Msg("healthchecker error")
|
|
failures++
|
|
} else {
|
|
failures = 0
|
|
}
|
|
if failures >= 5 {
|
|
mon.status.Store(types.StatusError)
|
|
mon.task.Finish(err)
|
|
logger.Error().Msg("healthchecker stopped after 5 trials")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
return nil
|
|
}
|
|
|
|
// Task implements task.TaskStarter.
|
|
func (mon *monitor) Task() *task.Task {
|
|
return mon.task
|
|
}
|
|
|
|
// Finish implements task.TaskFinisher.
|
|
func (mon *monitor) Finish(reason any) {
|
|
if mon.task != nil {
|
|
mon.task.Finish(reason)
|
|
}
|
|
}
|
|
|
|
// UpdateURL implements HealthChecker.
|
|
func (mon *monitor) UpdateURL(url *url.URL) {
|
|
if url == nil {
|
|
log.Warn().Msg("attempting to update health monitor URL with nil")
|
|
return
|
|
}
|
|
mon.url.Store(url)
|
|
}
|
|
|
|
// URL implements HealthChecker.
|
|
func (mon *monitor) URL() *url.URL {
|
|
return mon.url.Load()
|
|
}
|
|
|
|
// Config implements HealthChecker.
|
|
func (mon *monitor) Config() *types.HealthCheckConfig {
|
|
return &mon.config
|
|
}
|
|
|
|
// Status implements HealthMonitor.
|
|
func (mon *monitor) Status() types.HealthStatus {
|
|
return mon.status.Load()
|
|
}
|
|
|
|
// Uptime implements HealthMonitor.
|
|
func (mon *monitor) Uptime() time.Duration {
|
|
return time.Since(mon.startTime)
|
|
}
|
|
|
|
// Latency implements HealthMonitor.
|
|
func (mon *monitor) Latency() time.Duration {
|
|
res := mon.lastResult.Load()
|
|
return res.Latency
|
|
}
|
|
|
|
// Detail implements HealthMonitor.
|
|
func (mon *monitor) Detail() string {
|
|
res := mon.lastResult.Load()
|
|
return res.Detail
|
|
}
|
|
|
|
// Name implements HealthMonitor.
|
|
func (mon *monitor) Name() string {
|
|
parts := strutils.SplitRune(mon.service, '/')
|
|
return parts[len(parts)-1]
|
|
}
|
|
|
|
// String implements fmt.Stringer of HealthMonitor.
|
|
func (mon *monitor) String() string {
|
|
return mon.Name()
|
|
}
|
|
|
|
// MarshalJSON implements health.HealthMonitor.
|
|
func (mon *monitor) MarshalJSON() ([]byte, error) {
|
|
res := mon.lastResult.Load()
|
|
return (&types.HealthJSONRepr{
|
|
Name: mon.service,
|
|
Config: &mon.config,
|
|
Status: mon.status.Load(),
|
|
Started: mon.startTime,
|
|
Uptime: mon.Uptime(),
|
|
Latency: res.Latency,
|
|
LastSeen: GetLastSeen(mon.service),
|
|
Detail: res.Detail,
|
|
URL: mon.url.Load(),
|
|
}).MarshalJSON()
|
|
}
|
|
|
|
func (mon *monitor) checkUpdateHealth() error {
|
|
logger := log.With().Str("name", mon.Name()).Logger()
|
|
result, err := mon.checkHealth(mon.url.Load())
|
|
|
|
var lastStatus types.HealthStatus
|
|
switch {
|
|
case err != nil:
|
|
result = types.HealthCheckResult{Healthy: false, Detail: err.Error()}
|
|
lastStatus = mon.status.Swap(types.StatusError)
|
|
case result.Healthy:
|
|
lastStatus = mon.status.Swap(types.StatusHealthy)
|
|
UpdateLastSeen(mon.service)
|
|
default:
|
|
lastStatus = mon.status.Swap(types.StatusUnhealthy)
|
|
}
|
|
mon.lastResult.Store(result)
|
|
|
|
// change of status
|
|
if result.Healthy != (lastStatus == types.StatusHealthy) {
|
|
if result.Healthy {
|
|
mon.notifyServiceUp(&logger, &result)
|
|
mon.numConsecFailures.Store(0)
|
|
mon.downNotificationSent.Store(false) // Reset notification state when service comes back up
|
|
} else if mon.config.Retries < 0 {
|
|
// immediate notification when retries < 0
|
|
mon.notifyServiceDown(&logger, &result)
|
|
mon.downNotificationSent.Store(true)
|
|
}
|
|
}
|
|
|
|
// if threshold >= 0, notify after threshold consecutive failures (but only once)
|
|
if !result.Healthy && mon.config.Retries >= 0 {
|
|
failureCount := mon.numConsecFailures.Add(1)
|
|
if failureCount >= mon.config.Retries && !mon.downNotificationSent.Load() {
|
|
mon.notifyServiceDown(&logger, &result)
|
|
mon.downNotificationSent.Store(true)
|
|
}
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (mon *monitor) notifyServiceUp(logger *zerolog.Logger, result *types.HealthCheckResult) {
|
|
logger.Info().Msg("service is up")
|
|
extras := mon.buildNotificationExtras(result)
|
|
extras.Add("Ping", fmt.Sprintf("%d ms", result.Latency.Milliseconds()))
|
|
mon.notifyFunc(¬if.LogMessage{
|
|
Level: zerolog.InfoLevel,
|
|
Title: "✅ Service is up ✅",
|
|
Body: extras,
|
|
Color: notif.ColorSuccess,
|
|
})
|
|
}
|
|
|
|
func (mon *monitor) notifyServiceDown(logger *zerolog.Logger, result *types.HealthCheckResult) {
|
|
logger.Warn().Str("detail", result.Detail).Msg("service went down")
|
|
extras := mon.buildNotificationExtras(result)
|
|
extras.Add("Last Seen", strutils.FormatLastSeen(GetLastSeen(mon.service)))
|
|
mon.notifyFunc(¬if.LogMessage{
|
|
Level: zerolog.WarnLevel,
|
|
Title: "❌ Service went down ❌",
|
|
Body: extras,
|
|
Color: notif.ColorError,
|
|
})
|
|
}
|
|
|
|
func (mon *monitor) buildNotificationExtras(result *types.HealthCheckResult) notif.FieldsBody {
|
|
extras := notif.FieldsBody{
|
|
{Name: "Service Name", Value: mon.service},
|
|
{Name: "Time", Value: strutils.FormatTime(time.Now())},
|
|
}
|
|
if mon.url.Load() != nil {
|
|
extras.Add("Service URL", mon.url.Load().String())
|
|
}
|
|
if result.Detail != "" {
|
|
extras.Add("Detail", result.Detail)
|
|
}
|
|
return extras
|
|
}
|