Files
godoxy-yusing/internal/metrics/systeminfo/system_info.go
yusing 424398442b refactor: replace gperr.Builder with gperr.Group for concurrent error handling
- Updated various files to utilize gperr.Group for cleaner concurrency error handling.
- Removed sync.WaitGroup usage, simplifying the code structure.
- Ensured consistent error reporting across different components.
2026-01-06 16:29:35 +08:00

351 lines
10 KiB
Go

package systeminfo // import github.com/yusing/godoxy/internal/metrics/systeminfo
import (
"context"
"errors"
"net/url"
"syscall"
"time"
"github.com/rs/zerolog/log"
"github.com/shirou/gopsutil/v4/cpu"
"github.com/shirou/gopsutil/v4/disk"
"github.com/shirou/gopsutil/v4/mem"
"github.com/shirou/gopsutil/v4/net"
"github.com/shirou/gopsutil/v4/sensors"
"github.com/shirou/gopsutil/v4/warning"
"github.com/yusing/godoxy/internal/common"
"github.com/yusing/godoxy/internal/metrics/period"
gperr "github.com/yusing/goutils/errs"
)
// json tags are left for tests
type (
Sensors []sensors.TemperatureStat // @name Sensors
Aggregated []map[string]any
)
type SystemInfo struct {
Timestamp int64 `json:"timestamp"`
CPUAverage *float64 `json:"cpu_average"`
Memory mem.VirtualMemoryStat `json:"memory"`
Disks map[string]disk.UsageStat `json:"disks"` // disk usage by partition
DisksIO map[string]*disk.IOCountersStat `json:"disks_io"` // disk IO by device
Network net.IOCountersStat `json:"network"`
Sensors Sensors `json:"sensors"` // sensor temperature by key
} // @name SystemInfo
type SystemInfoAggregateMode string // @name SystemInfoAggregateMode
const (
SystemInfoAggregateModeCPUAverage SystemInfoAggregateMode = "cpu_average" // @name SystemInfoAggregateModeCPUAverage
SystemInfoAggregateModeMemoryUsage SystemInfoAggregateMode = "memory_usage" // @name SystemInfoAggregateModeMemoryUsage
SystemInfoAggregateModeMemoryUsagePercent SystemInfoAggregateMode = "memory_usage_percent" // @name SystemInfoAggregateModeMemoryUsagePercent
SystemInfoAggregateModeDisksReadSpeed SystemInfoAggregateMode = "disks_read_speed" // @name SystemInfoAggregateModeDisksReadSpeed
SystemInfoAggregateModeDisksWriteSpeed SystemInfoAggregateMode = "disks_write_speed" // @name SystemInfoAggregateModeDisksWriteSpeed
SystemInfoAggregateModeDisksIOPS SystemInfoAggregateMode = "disks_iops" // @name SystemInfoAggregateModeDisksIOPS
SystemInfoAggregateModeDiskUsage SystemInfoAggregateMode = "disk_usage" // @name SystemInfoAggregateModeDiskUsage
SystemInfoAggregateModeNetworkSpeed SystemInfoAggregateMode = "network_speed" // @name SystemInfoAggregateModeNetworkSpeed
SystemInfoAggregateModeNetworkTransfer SystemInfoAggregateMode = "network_transfer" // @name SystemInfoAggregateModeNetworkTransfer
SystemInfoAggregateModeSensorTemperature SystemInfoAggregateMode = "sensor_temperature" // @name SystemInfoAggregateModeSensorTemperature
)
var allQueries = []SystemInfoAggregateMode{
SystemInfoAggregateModeCPUAverage,
SystemInfoAggregateModeMemoryUsage,
SystemInfoAggregateModeMemoryUsagePercent,
SystemInfoAggregateModeDisksReadSpeed,
SystemInfoAggregateModeDisksWriteSpeed,
SystemInfoAggregateModeDisksIOPS,
SystemInfoAggregateModeDiskUsage,
SystemInfoAggregateModeNetworkSpeed,
SystemInfoAggregateModeNetworkTransfer,
SystemInfoAggregateModeSensorTemperature,
}
var Poller = period.NewPoller("system_info", getSystemInfo, aggregate)
func isNoDataAvailable(err error) bool {
return errors.Is(err, syscall.ENODATA)
}
func getSystemInfo(ctx context.Context, lastResult *SystemInfo) (*SystemInfo, error) {
errs := gperr.NewGroup("failed to get system info")
var s SystemInfo
s.Timestamp = time.Now().Unix()
if !common.MetricsDisableCPU {
errs.Go(func() error {
return s.collectCPUInfo(ctx)
})
}
if !common.MetricsDisableMemory {
errs.Go(func() error {
return s.collectMemoryInfo(ctx)
})
}
if !common.MetricsDisableDisk {
errs.Go(func() error {
return s.collectDisksInfo(ctx, lastResult)
})
}
if !common.MetricsDisableNetwork {
errs.Go(func() error {
return s.collectNetworkInfo(ctx, lastResult)
})
}
if !common.MetricsDisableSensors {
errs.Go(func() error {
return s.collectSensorsInfo(ctx)
})
}
result := errs.Wait()
if result.HasError() {
allWarnings := gperr.NewBuilder("")
allErrors := gperr.NewBuilder("failed to get system info")
result.ForEach(func(err error) {
warnings := new(warning.Warning)
if errors.As(err, &warnings) {
for _, warning := range warnings.List {
if isNoDataAvailable(warning) {
continue
}
allWarnings.Add(warning)
}
} else {
allErrors.Add(err)
}
})
if allWarnings.HasError() {
log.Warn().Msg(allWarnings.String())
}
if allErrors.HasError() {
return nil, allErrors.Error()
}
}
return &s, nil
}
func (s *SystemInfo) collectCPUInfo(ctx context.Context) error {
cpuAverage, err := cpu.PercentWithContext(ctx, 500*time.Millisecond, false)
if err != nil {
return err
}
s.CPUAverage = new(float64)
*s.CPUAverage = cpuAverage[0]
return nil
}
func (s *SystemInfo) collectMemoryInfo(ctx context.Context) error {
memoryInfo, err := mem.VirtualMemoryWithContext(ctx)
if err != nil {
return err
}
s.Memory = memoryInfo
return nil
}
func (s *SystemInfo) collectDisksInfo(ctx context.Context, lastResult *SystemInfo) error {
ioCounters, err := disk.IOCountersWithContext(ctx)
if err != nil {
return err
}
s.DisksIO = ioCounters
if lastResult != nil {
interval := since(lastResult.Timestamp)
for name, disk := range s.DisksIO {
if lastUsage, ok := lastResult.DisksIO[name]; ok {
disk.ReadSpeed = float32(disk.ReadBytes-lastUsage.ReadBytes) / float32(interval)
disk.WriteSpeed = float32(disk.WriteBytes-lastUsage.WriteBytes) / float32(interval)
disk.Iops = diff(disk.ReadCount+disk.WriteCount, lastUsage.ReadCount+lastUsage.WriteCount) / uint64(interval) //nolint:gosec
}
}
}
partitions, err := disk.PartitionsWithContext(ctx, false)
if err != nil {
return err
}
s.Disks = make(map[string]disk.UsageStat, len(partitions))
errs := gperr.NewBuilder("failed to get disks info")
for _, partition := range partitions {
diskInfo, err := disk.UsageWithContext(ctx, partition.Mountpoint.Value())
if err != nil {
errs.Add(err)
continue
}
s.Disks[partition.Device.Value()] = diskInfo
}
if errs.HasError() {
if len(s.Disks) == 0 {
return errs.Error()
}
log.Warn().Msg(errs.String())
}
return nil
}
func (s *SystemInfo) collectNetworkInfo(ctx context.Context, lastResult *SystemInfo) error {
networkIO, err := net.IOCountersWithContext(ctx, false)
if err != nil {
return err
}
s.Network = networkIO[0]
if lastResult != nil {
interval := float32(since(lastResult.Timestamp))
s.Network.UploadSpeed = float32(networkIO[0].BytesSent-lastResult.Network.BytesSent) / interval
s.Network.DownloadSpeed = float32(networkIO[0].BytesRecv-lastResult.Network.BytesRecv) / interval
}
return nil
}
func (s *SystemInfo) collectSensorsInfo(ctx context.Context) error {
sensorsInfo, err := sensors.TemperaturesWithContext(ctx)
if err != nil {
return err
}
s.Sensors = sensorsInfo
return nil
}
// recharts friendly.
func aggregate(entries []*SystemInfo, query url.Values) (total int, result Aggregated) {
n := len(entries)
aggregated := make([]map[string]any, 0, n)
switch SystemInfoAggregateMode(query.Get("aggregate")) {
case SystemInfoAggregateModeCPUAverage:
for _, entry := range entries {
if entry.CPUAverage != nil {
aggregated = append(aggregated, map[string]any{
"timestamp": entry.Timestamp,
"cpu_average": *entry.CPUAverage,
})
}
}
case SystemInfoAggregateModeMemoryUsage:
for _, entry := range entries {
if entry.Memory.Used > 0 {
aggregated = append(aggregated, map[string]any{
"timestamp": entry.Timestamp,
"memory_usage": entry.Memory.Used,
})
}
}
case SystemInfoAggregateModeMemoryUsagePercent:
for _, entry := range entries {
if percent := entry.Memory.UsedPercent(); percent > 0 {
aggregated = append(aggregated, map[string]any{
"timestamp": entry.Timestamp,
"memory_usage_percent": percent,
})
}
}
case SystemInfoAggregateModeDisksReadSpeed:
for _, entry := range entries {
if entry.DisksIO == nil {
continue
}
m := make(map[string]any, len(entry.DisksIO)+1)
for name, usage := range entry.DisksIO {
m[name] = usage.ReadSpeed
}
m["timestamp"] = entry.Timestamp
aggregated = append(aggregated, m)
}
case SystemInfoAggregateModeDisksWriteSpeed:
for _, entry := range entries {
if entry.DisksIO == nil {
continue
}
m := make(map[string]any, len(entry.DisksIO)+1)
for name, usage := range entry.DisksIO {
m[name] = usage.WriteSpeed
}
m["timestamp"] = entry.Timestamp
aggregated = append(aggregated, m)
}
case SystemInfoAggregateModeDisksIOPS:
for _, entry := range entries {
if entry.DisksIO == nil {
continue
}
m := make(map[string]any, len(entry.DisksIO)+1)
for name, usage := range entry.DisksIO {
m[name] = usage.Iops
}
m["timestamp"] = entry.Timestamp
aggregated = append(aggregated, m)
}
case SystemInfoAggregateModeDiskUsage:
for _, entry := range entries {
if entry.Disks == nil {
continue
}
m := make(map[string]any, len(entry.Disks)+1)
for name, disk := range entry.Disks {
m[name] = disk.Used
}
m["timestamp"] = entry.Timestamp
aggregated = append(aggregated, m)
}
case SystemInfoAggregateModeNetworkSpeed:
for _, entry := range entries {
if entry.Network.BytesSent == 0 && entry.Network.BytesRecv == 0 {
continue
}
aggregated = append(aggregated, map[string]any{
"timestamp": entry.Timestamp,
"upload": entry.Network.UploadSpeed,
"download": entry.Network.DownloadSpeed,
})
}
case SystemInfoAggregateModeNetworkTransfer:
for _, entry := range entries {
if entry.Network.BytesRecv > 0 || entry.Network.BytesSent > 0 {
aggregated = append(aggregated, map[string]any{
"timestamp": entry.Timestamp,
"upload": entry.Network.BytesSent,
"download": entry.Network.BytesRecv,
})
}
}
case SystemInfoAggregateModeSensorTemperature:
for _, entry := range entries {
if entry.Sensors == nil {
continue
}
m := make(map[string]any, len(entry.Sensors)+1)
for _, sensor := range entry.Sensors {
m[sensor.SensorKey.Value()] = sensor.Temperature
}
m["timestamp"] = entry.Timestamp
aggregated = append(aggregated, m)
}
default:
return -1, nil
}
return len(aggregated), aggregated
}
func diff(x, y uint64) uint64 {
if x > y {
return x - y
}
return y - x
}
func since(last int64) int64 {
now := time.Now().Unix()
if last > now { // should not happen but just in case
return 1
}
if last == now { // two consecutive polls occur within the same second
return 1
}
return now - last
}