mirror of
https://github.com/yusing/godoxy.git
synced 2026-04-18 06:29:42 +02:00
perf(healthcheck): stop docker client from hogging resources in health checks
This commit is contained in:
2
go.mod
2
go.mod
@@ -117,7 +117,7 @@ require (
|
|||||||
github.com/sony/gobreaker v1.0.0 // indirect
|
github.com/sony/gobreaker v1.0.0 // indirect
|
||||||
github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect
|
github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect
|
||||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0
|
||||||
go.opentelemetry.io/otel v1.38.0 // indirect
|
go.opentelemetry.io/otel v1.38.0 // indirect
|
||||||
go.opentelemetry.io/otel/metric v1.38.0 // indirect
|
go.opentelemetry.io/otel/metric v1.38.0 // indirect
|
||||||
go.opentelemetry.io/otel/trace v1.38.0 // indirect
|
go.opentelemetry.io/otel/trace v1.38.0 // indirect
|
||||||
|
|||||||
2
goutils
2
goutils
Submodule goutils updated: d25032447f...7fe25e088e
@@ -7,16 +7,20 @@ import (
|
|||||||
"maps"
|
"maps"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"reflect"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
"github.com/docker/cli/cli/connhelper"
|
"github.com/docker/cli/cli/connhelper"
|
||||||
"github.com/docker/docker/client"
|
"github.com/docker/docker/client"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/yusing/godoxy/agent/pkg/agent"
|
"github.com/yusing/godoxy/agent/pkg/agent"
|
||||||
"github.com/yusing/godoxy/internal/common"
|
"github.com/yusing/godoxy/internal/common"
|
||||||
|
httputils "github.com/yusing/goutils/http"
|
||||||
"github.com/yusing/goutils/task"
|
"github.com/yusing/goutils/task"
|
||||||
|
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: implement reconnect here.
|
// TODO: implement reconnect here.
|
||||||
@@ -30,6 +34,8 @@ type (
|
|||||||
key string
|
key string
|
||||||
addr string
|
addr string
|
||||||
dial func(ctx context.Context) (net.Conn, error)
|
dial func(ctx context.Context) (net.Conn, error)
|
||||||
|
|
||||||
|
unique bool
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -114,16 +120,23 @@ func Clients() map[string]*SharedClient {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - Client: the Docker client connection.
|
// - Client: the Docker client connection.
|
||||||
// - error: an error if the connection failed.
|
// - error: an error if the connection failed.
|
||||||
func NewClient(host string) (*SharedClient, error) {
|
func NewClient(host string, unique ...bool) (*SharedClient, error) {
|
||||||
initClientCleanerOnce.Do(initClientCleaner)
|
initClientCleanerOnce.Do(initClientCleaner)
|
||||||
|
|
||||||
clientMapMu.Lock()
|
u := false
|
||||||
defer clientMapMu.Unlock()
|
if len(unique) > 0 {
|
||||||
|
u = unique[0]
|
||||||
|
}
|
||||||
|
|
||||||
if client, ok := clientMap[host]; ok {
|
if !u {
|
||||||
client.closedOn.Store(0)
|
clientMapMu.Lock()
|
||||||
client.refCount.Add(1)
|
defer clientMapMu.Unlock()
|
||||||
return client, nil
|
|
||||||
|
if client, ok := clientMap[host]; ok {
|
||||||
|
client.closedOn.Store(0)
|
||||||
|
client.refCount.Add(1)
|
||||||
|
return client, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// create client
|
// create client
|
||||||
@@ -188,7 +201,9 @@ func NewClient(host string) (*SharedClient, error) {
|
|||||||
addr: addr,
|
addr: addr,
|
||||||
key: host,
|
key: host,
|
||||||
dial: dial,
|
dial: dial,
|
||||||
|
unique: u,
|
||||||
}
|
}
|
||||||
|
c.unotel()
|
||||||
c.refCount.Store(1)
|
c.refCount.Store(1)
|
||||||
|
|
||||||
// non-agent client
|
// non-agent client
|
||||||
@@ -201,10 +216,28 @@ func NewClient(host string) (*SharedClient, error) {
|
|||||||
|
|
||||||
defer log.Debug().Str("host", host).Msg("docker client initialized")
|
defer log.Debug().Str("host", host).Msg("docker client initialized")
|
||||||
|
|
||||||
clientMap[c.Key()] = c
|
if !u {
|
||||||
|
clientMap[c.Key()] = c
|
||||||
|
}
|
||||||
return c, nil
|
return c, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *SharedClient) GetHTTPClient() **http.Client {
|
||||||
|
return (**http.Client)(unsafe.Pointer(uintptr(unsafe.Pointer(c.Client)) + clientClientOffset))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *SharedClient) InterceptHTTPClient(intercept httputils.InterceptFunc) {
|
||||||
|
httpClient := *c.GetHTTPClient()
|
||||||
|
httpClient.Transport = httputils.NewInterceptedTransport(httpClient.Transport, intercept)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *SharedClient) CloneUnique() *SharedClient {
|
||||||
|
// there will be no error here
|
||||||
|
// since we are using the same host from a valid client.
|
||||||
|
c, _ = NewClient(c.key, true)
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
func (c *SharedClient) Key() string {
|
func (c *SharedClient) Key() string {
|
||||||
return c.key
|
return c.key
|
||||||
}
|
}
|
||||||
@@ -222,8 +255,41 @@ func (c *SharedClient) CheckConnection(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the client is still referenced, this is no-op.
|
// for shared clients, if the client is still referenced, this is no-op.
|
||||||
func (c *SharedClient) Close() {
|
func (c *SharedClient) Close() {
|
||||||
|
if c.unique {
|
||||||
|
c.Client.Close()
|
||||||
|
return
|
||||||
|
}
|
||||||
c.closedOn.Store(time.Now().Unix())
|
c.closedOn.Store(time.Now().Unix())
|
||||||
c.refCount.Add(-1)
|
c.refCount.Add(-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var clientClientOffset = func() uintptr {
|
||||||
|
field, ok := reflect.TypeFor[client.Client]().FieldByName("client")
|
||||||
|
if !ok {
|
||||||
|
panic("client.Client has no client field")
|
||||||
|
}
|
||||||
|
return field.Offset
|
||||||
|
}()
|
||||||
|
|
||||||
|
var otelRtOffset = func() uintptr {
|
||||||
|
field, ok := reflect.TypeFor[otelhttp.Transport]().FieldByName("rt")
|
||||||
|
if !ok {
|
||||||
|
panic("otelhttp.Transport has no rt field")
|
||||||
|
}
|
||||||
|
return field.Offset
|
||||||
|
}()
|
||||||
|
|
||||||
|
func (c *SharedClient) unotel() {
|
||||||
|
// we don't need and don't want otelhttp.Transport here.
|
||||||
|
httpClient := *c.GetHTTPClient()
|
||||||
|
|
||||||
|
otelTransport, ok := httpClient.Transport.(*otelhttp.Transport)
|
||||||
|
if !ok {
|
||||||
|
log.Debug().Str("host", c.DaemonHost()).Msgf("docker client transport is not an otelhttp.Transport: %T", httpClient.Transport)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
transport := *(*http.RoundTripper)(unsafe.Pointer(uintptr(unsafe.Pointer(otelTransport)) + otelRtOffset))
|
||||||
|
httpClient.Transport = transport
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,9 +1,16 @@
|
|||||||
package monitor
|
package monitor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/bytedance/sonic"
|
||||||
"github.com/docker/docker/api/types/container"
|
"github.com/docker/docker/api/types/container"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/yusing/godoxy/internal/docker"
|
"github.com/yusing/godoxy/internal/docker"
|
||||||
"github.com/yusing/godoxy/internal/types"
|
"github.com/yusing/godoxy/internal/types"
|
||||||
|
gperr "github.com/yusing/goutils/errs"
|
||||||
|
httputils "github.com/yusing/goutils/http"
|
||||||
|
"github.com/yusing/goutils/task"
|
||||||
)
|
)
|
||||||
|
|
||||||
type DockerHealthMonitor struct {
|
type DockerHealthMonitor struct {
|
||||||
@@ -27,6 +34,41 @@ func NewDockerHealthMonitor(client *docker.SharedClient, containerID, alias stri
|
|||||||
return mon
|
return mon
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (mon *DockerHealthMonitor) Start(parent task.Parent) gperr.Error {
|
||||||
|
mon.client = mon.client.CloneUnique()
|
||||||
|
err := mon.monitor.Start(parent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
mon.client.InterceptHTTPClient(mon.interceptInspectResponse)
|
||||||
|
mon.monitor.task.OnFinished("close docker client", mon.client.Close)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type inspectState struct {
|
||||||
|
State *container.State
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mon *DockerHealthMonitor) interceptInspectResponse(resp *http.Response) (intercepted bool, err error) {
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
body, release, err := httputils.ReadAllBody(resp)
|
||||||
|
resp.Body.Close()
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var state inspectState
|
||||||
|
err = sonic.Unmarshal(body, &state)
|
||||||
|
release(body)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return true, httputils.NewRequestInterceptedError(resp, state)
|
||||||
|
}
|
||||||
|
|
||||||
func (mon *DockerHealthMonitor) CheckHealth() (types.HealthCheckResult, error) {
|
func (mon *DockerHealthMonitor) CheckHealth() (types.HealthCheckResult, error) {
|
||||||
// if docker health check failed too many times, use fallback forever
|
// if docker health check failed too many times, use fallback forever
|
||||||
if mon.numDockerFailures > dockerFailuresThreshold {
|
if mon.numDockerFailures > dockerFailuresThreshold {
|
||||||
@@ -36,13 +78,18 @@ func (mon *DockerHealthMonitor) CheckHealth() (types.HealthCheckResult, error) {
|
|||||||
ctx, cancel := mon.ContextWithTimeout("docker health check timed out")
|
ctx, cancel := mon.ContextWithTimeout("docker health check timed out")
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
cont, err := mon.client.ContainerInspect(ctx, mon.containerID)
|
// the actual inspect response is intercepted and returned as RequestInterceptedError
|
||||||
if err != nil {
|
_, err := mon.client.ContainerInspect(ctx, mon.containerID)
|
||||||
|
|
||||||
|
var interceptedErr *httputils.RequestInterceptedError
|
||||||
|
if err != nil && !httputils.AsRequestInterceptedError(err, &interceptedErr) {
|
||||||
mon.numDockerFailures++
|
mon.numDockerFailures++
|
||||||
|
log.Debug().Err(err).Str("container_id", mon.containerID).Msg("docker health check failed, using fallback")
|
||||||
return mon.fallback.CheckHealth()
|
return mon.fallback.CheckHealth()
|
||||||
}
|
}
|
||||||
|
|
||||||
status := cont.State.Status
|
state := interceptedErr.Data.(inspectState).State
|
||||||
|
status := state.Status
|
||||||
switch status {
|
switch status {
|
||||||
case "dead", "exited", "paused", "restarting", "removing":
|
case "dead", "exited", "paused", "restarting", "removing":
|
||||||
mon.numDockerFailures = 0
|
mon.numDockerFailures = 0
|
||||||
@@ -57,17 +104,17 @@ func (mon *DockerHealthMonitor) CheckHealth() (types.HealthCheckResult, error) {
|
|||||||
Detail: "container is not started",
|
Detail: "container is not started",
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
if cont.State.Health == nil { // no health check from docker, directly use fallback starting from next check
|
if state.Health == nil { // no health check from docker, directly use fallback starting from next check
|
||||||
mon.numDockerFailures = dockerFailuresThreshold + 1
|
mon.numDockerFailures = dockerFailuresThreshold + 1
|
||||||
return mon.fallback.CheckHealth()
|
return mon.fallback.CheckHealth()
|
||||||
}
|
}
|
||||||
|
|
||||||
mon.numDockerFailures = 0
|
mon.numDockerFailures = 0
|
||||||
result := types.HealthCheckResult{
|
result := types.HealthCheckResult{
|
||||||
Healthy: cont.State.Health.Status == container.Healthy,
|
Healthy: state.Health.Status == container.Healthy,
|
||||||
}
|
}
|
||||||
if len(cont.State.Health.Log) > 0 {
|
if len(state.Health.Log) > 0 {
|
||||||
lastLog := cont.State.Health.Log[len(cont.State.Health.Log)-1]
|
lastLog := state.Health.Log[len(state.Health.Log)-1]
|
||||||
result.Detail = lastLog.Output
|
result.Detail = lastLog.Output
|
||||||
result.Latency = lastLog.End.Sub(lastLog.Start)
|
result.Latency = lastLog.End.Sub(lastLog.Start)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user