1
0
Fork 0
mirror of https://github.com/portainer/portainer.git synced 2025-07-18 21:09:40 +02:00

feat(observability): alerting experimental feature (#801)

Co-authored-by: JamesPlayer <james.player@portainer.io>
This commit is contained in:
Steven Kang 2025-07-11 16:55:23 +12:00 committed by GitHub
parent b7e906701a
commit 96f2d69ae5
6 changed files with 98 additions and 28 deletions

View file

@ -776,6 +776,7 @@
"ImageCount": 9, "ImageCount": 9,
"IsPodman": false, "IsPodman": false,
"NodeCount": 0, "NodeCount": 0,
"PerformanceMetrics": null,
"RunningContainerCount": 5, "RunningContainerCount": 5,
"ServiceCount": 0, "ServiceCount": 0,
"StackCount": 2, "StackCount": 2,

View file

@ -215,26 +215,34 @@ type (
// DockerSnapshot represents a snapshot of a specific Docker environment(endpoint) at a specific time // DockerSnapshot represents a snapshot of a specific Docker environment(endpoint) at a specific time
DockerSnapshot struct { DockerSnapshot struct {
Time int64 `json:"Time"` Time int64 `json:"Time"`
DockerVersion string `json:"DockerVersion"` DockerVersion string `json:"DockerVersion"`
Swarm bool `json:"Swarm"` Swarm bool `json:"Swarm"`
TotalCPU int `json:"TotalCPU"` TotalCPU int `json:"TotalCPU"`
TotalMemory int64 `json:"TotalMemory"` TotalMemory int64 `json:"TotalMemory"`
ContainerCount int `json:"ContainerCount"` ContainerCount int `json:"ContainerCount"`
RunningContainerCount int `json:"RunningContainerCount"` RunningContainerCount int `json:"RunningContainerCount"`
StoppedContainerCount int `json:"StoppedContainerCount"` StoppedContainerCount int `json:"StoppedContainerCount"`
HealthyContainerCount int `json:"HealthyContainerCount"` HealthyContainerCount int `json:"HealthyContainerCount"`
UnhealthyContainerCount int `json:"UnhealthyContainerCount"` UnhealthyContainerCount int `json:"UnhealthyContainerCount"`
VolumeCount int `json:"VolumeCount"` VolumeCount int `json:"VolumeCount"`
ImageCount int `json:"ImageCount"` ImageCount int `json:"ImageCount"`
ServiceCount int `json:"ServiceCount"` ServiceCount int `json:"ServiceCount"`
StackCount int `json:"StackCount"` StackCount int `json:"StackCount"`
SnapshotRaw DockerSnapshotRaw `json:"DockerSnapshotRaw"` SnapshotRaw DockerSnapshotRaw `json:"DockerSnapshotRaw"`
NodeCount int `json:"NodeCount"` NodeCount int `json:"NodeCount"`
GpuUseAll bool `json:"GpuUseAll"` GpuUseAll bool `json:"GpuUseAll"`
GpuUseList []string `json:"GpuUseList"` GpuUseList []string `json:"GpuUseList"`
IsPodman bool `json:"IsPodman"` IsPodman bool `json:"IsPodman"`
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"` DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
PerformanceMetrics *PerformanceMetrics `json:"PerformanceMetrics"`
}
// PerformanceMetrics represents the performance metrics of a Docker, Swarm, Podman, and Kubernetes environments
PerformanceMetrics struct {
CPUUsage float64 `json:"CPUUsage,omitempty"`
MemoryUsage float64 `json:"MemoryUsage,omitempty"`
NetworkUsage float64 `json:"NetworkUsage,omitempty"`
} }
// DockerContainerSnapshot is an extent of Docker's Container struct // DockerContainerSnapshot is an extent of Docker's Container struct
@ -663,12 +671,13 @@ type (
// KubernetesSnapshot represents a snapshot of a specific Kubernetes environment(endpoint) at a specific time // KubernetesSnapshot represents a snapshot of a specific Kubernetes environment(endpoint) at a specific time
KubernetesSnapshot struct { KubernetesSnapshot struct {
Time int64 `json:"Time"` Time int64 `json:"Time"`
KubernetesVersion string `json:"KubernetesVersion"` KubernetesVersion string `json:"KubernetesVersion"`
NodeCount int `json:"NodeCount"` NodeCount int `json:"NodeCount"`
TotalCPU int64 `json:"TotalCPU"` TotalCPU int64 `json:"TotalCPU"`
TotalMemory int64 `json:"TotalMemory"` TotalMemory int64 `json:"TotalMemory"`
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"` DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
PerformanceMetrics *PerformanceMetrics `json:"PerformanceMetrics"`
} }
// KubernetesConfiguration represents the configuration of a Kubernetes environment(endpoint) // KubernetesConfiguration represents the configuration of a Kubernetes environment(endpoint)

1
go.mod
View file

@ -61,6 +61,7 @@ require (
k8s.io/cli-runtime v0.33.2 k8s.io/cli-runtime v0.33.2
k8s.io/client-go v0.33.2 k8s.io/client-go v0.33.2
k8s.io/kubectl v0.33.2 k8s.io/kubectl v0.33.2
k8s.io/kubelet v0.33.2
k8s.io/metrics v0.33.2 k8s.io/metrics v0.33.2
software.sslmate.com/src/go-pkcs12 v0.0.0-20210415151418-c5206de65a78 software.sslmate.com/src/go-pkcs12 v0.0.0-20210415151418-c5206de65a78
) )

2
go.sum
View file

@ -974,6 +974,8 @@ k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUy
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
k8s.io/kubectl v0.33.2 h1:7XKZ6DYCklu5MZQzJe+CkCjoGZwD1wWl7t/FxzhMz7Y= k8s.io/kubectl v0.33.2 h1:7XKZ6DYCklu5MZQzJe+CkCjoGZwD1wWl7t/FxzhMz7Y=
k8s.io/kubectl v0.33.2/go.mod h1:8rC67FB8tVTYraovAGNi/idWIK90z2CHFNMmGJZJ3KI= k8s.io/kubectl v0.33.2/go.mod h1:8rC67FB8tVTYraovAGNi/idWIK90z2CHFNMmGJZJ3KI=
k8s.io/kubelet v0.33.2 h1:wxEau5/563oJb3j3KfrCKlNWWx35YlSgDLOYUBCQ0pg=
k8s.io/kubelet v0.33.2/go.mod h1:way8VCDTUMiX1HTOvJv7M3xS/xNysJI6qh7TOqMe5KM=
k8s.io/metrics v0.33.2 h1:gNCBmtnUMDMCRg9Ly5ehxP3OdKISMsOnh1vzk01iCgE= k8s.io/metrics v0.33.2 h1:gNCBmtnUMDMCRg9Ly5ehxP3OdKISMsOnh1vzk01iCgE=
k8s.io/metrics v0.33.2/go.mod h1:yxoAosKGRsZisv3BGekC5W6T1J8XSV+PoUEevACRv7c= k8s.io/metrics v0.33.2/go.mod h1:yxoAosKGRsZisv3BGekC5W6T1J8XSV+PoUEevACRv7c=
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro= k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=

View file

@ -100,7 +100,10 @@ func dockerSnapshotNodes(snapshot *portainer.DockerSnapshot, cli *client.Client)
snapshot.TotalCPU = int(nanoCpus / 1e9) snapshot.TotalCPU = int(nanoCpus / 1e9)
snapshot.TotalMemory = totalMem snapshot.TotalMemory = totalMem
snapshot.NodeCount = len(nodes) snapshot.NodeCount = 1
if snapshot.Swarm {
snapshot.NodeCount = len(nodes)
}
return nil return nil
} }

View file

@ -5,7 +5,9 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"math"
"os" "os"
"reflect"
"strings" "strings"
"time" "time"
@ -19,11 +21,11 @@ import (
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
) )
func CreateKubernetesSnapshot(cli *kubernetes.Clientset) (*portainer.KubernetesSnapshot, error) { func CreateKubernetesSnapshot(cli *kubernetes.Clientset) (*portainer.KubernetesSnapshot, error) {
kubernetesSnapshot := &portainer.KubernetesSnapshot{} kubernetesSnapshot := &portainer.KubernetesSnapshot{}
err := kubernetesSnapshotVersion(kubernetesSnapshot, cli) err := kubernetesSnapshotVersion(kubernetesSnapshot, cli)
if err != nil { if err != nil {
log.Warn().Err(err).Msg("unable to snapshot cluster version") log.Warn().Err(err).Msg("unable to snapshot cluster version")
@ -54,10 +56,28 @@ func kubernetesSnapshotNodes(snapshot *portainer.KubernetesSnapshot, cli *kubern
return err return err
} }
if len(nodeList.Items) == 0 {
return nil
}
var totalCPUs, totalMemory int64 var totalCPUs, totalMemory int64
performanceMetrics := &portainer.PerformanceMetrics{
CPUUsage: 0,
MemoryUsage: 0,
NetworkUsage: 0,
}
for _, node := range nodeList.Items { for _, node := range nodeList.Items {
totalCPUs += node.Status.Capacity.Cpu().Value() totalCPUs += node.Status.Capacity.Cpu().Value()
totalMemory += node.Status.Capacity.Memory().Value() totalMemory += node.Status.Capacity.Memory().Value()
performanceMetrics, err = kubernetesSnapshotNodePerformanceMetrics(cli, node, performanceMetrics)
if err != nil {
return fmt.Errorf("failed to get node performance metrics: %w", err)
}
if performanceMetrics != nil {
snapshot.PerformanceMetrics = performanceMetrics
}
} }
snapshot.TotalCPU = totalCPUs snapshot.TotalCPU = totalCPUs
@ -123,6 +143,40 @@ func kubernetesSnapshotPodErrorLogs(snapshot *portainer.KubernetesSnapshot, cli
return nil return nil
} }
func kubernetesSnapshotNodePerformanceMetrics(cli *kubernetes.Clientset, node corev1.Node, performanceMetrics *portainer.PerformanceMetrics) (*portainer.PerformanceMetrics, error) {
result := cli.RESTClient().Get().AbsPath(fmt.Sprintf("/api/v1/nodes/%s/proxy/stats/summary", node.Name)).Do(context.TODO())
if result.Error() != nil {
return nil, fmt.Errorf("failed to get node performance metrics: %w", result.Error())
}
raw, err := result.Raw()
if err != nil {
return nil, fmt.Errorf("failed to get node performance metrics: %w", err)
}
stats := statsapi.Summary{}
err = json.Unmarshal(raw, &stats)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal node performance metrics: %w", err)
}
nodeStats := stats.Node
if reflect.DeepEqual(nodeStats, statsapi.NodeStats{}) {
return nil, nil
}
if nodeStats.CPU != nil && nodeStats.CPU.UsageNanoCores != nil {
performanceMetrics.CPUUsage += math.Round(float64(*nodeStats.CPU.UsageNanoCores) / float64(node.Status.Capacity.Cpu().Value()*1000000000) * 100)
}
if nodeStats.Memory != nil && nodeStats.Memory.WorkingSetBytes != nil {
performanceMetrics.MemoryUsage += math.Round(float64(*nodeStats.Memory.WorkingSetBytes) / float64(node.Status.Capacity.Memory().Value()) * 100)
}
if nodeStats.Network != nil && nodeStats.Network.RxBytes != nil && nodeStats.Network.TxBytes != nil {
performanceMetrics.NetworkUsage += math.Round((float64(*nodeStats.Network.RxBytes) + float64(*nodeStats.Network.TxBytes)) / 1024 / 1024) // MB
}
return performanceMetrics, nil
}
// filterLogsByPattern filters the logs by the given patterns and returns a list of logs that match the patterns // filterLogsByPattern filters the logs by the given patterns and returns a list of logs that match the patterns
// the logs are returned as a list of maps with the keys "timestamp" and "message" // the logs are returned as a list of maps with the keys "timestamp" and "message"
func filterLogsByPattern(logBytes []byte, patterns []string) []map[string]string { func filterLogsByPattern(logBytes []byte, patterns []string) []map[string]string {