mirror of
https://github.com/portainer/portainer.git
synced 2025-07-18 21:09:40 +02:00
feat(observability): alerting experimental feature (#801)
Co-authored-by: JamesPlayer <james.player@portainer.io>
This commit is contained in:
parent
b7e906701a
commit
96f2d69ae5
6 changed files with 98 additions and 28 deletions
|
@ -776,6 +776,7 @@
|
||||||
"ImageCount": 9,
|
"ImageCount": 9,
|
||||||
"IsPodman": false,
|
"IsPodman": false,
|
||||||
"NodeCount": 0,
|
"NodeCount": 0,
|
||||||
|
"PerformanceMetrics": null,
|
||||||
"RunningContainerCount": 5,
|
"RunningContainerCount": 5,
|
||||||
"ServiceCount": 0,
|
"ServiceCount": 0,
|
||||||
"StackCount": 2,
|
"StackCount": 2,
|
||||||
|
|
|
@ -215,26 +215,34 @@ type (
|
||||||
|
|
||||||
// DockerSnapshot represents a snapshot of a specific Docker environment(endpoint) at a specific time
|
// DockerSnapshot represents a snapshot of a specific Docker environment(endpoint) at a specific time
|
||||||
DockerSnapshot struct {
|
DockerSnapshot struct {
|
||||||
Time int64 `json:"Time"`
|
Time int64 `json:"Time"`
|
||||||
DockerVersion string `json:"DockerVersion"`
|
DockerVersion string `json:"DockerVersion"`
|
||||||
Swarm bool `json:"Swarm"`
|
Swarm bool `json:"Swarm"`
|
||||||
TotalCPU int `json:"TotalCPU"`
|
TotalCPU int `json:"TotalCPU"`
|
||||||
TotalMemory int64 `json:"TotalMemory"`
|
TotalMemory int64 `json:"TotalMemory"`
|
||||||
ContainerCount int `json:"ContainerCount"`
|
ContainerCount int `json:"ContainerCount"`
|
||||||
RunningContainerCount int `json:"RunningContainerCount"`
|
RunningContainerCount int `json:"RunningContainerCount"`
|
||||||
StoppedContainerCount int `json:"StoppedContainerCount"`
|
StoppedContainerCount int `json:"StoppedContainerCount"`
|
||||||
HealthyContainerCount int `json:"HealthyContainerCount"`
|
HealthyContainerCount int `json:"HealthyContainerCount"`
|
||||||
UnhealthyContainerCount int `json:"UnhealthyContainerCount"`
|
UnhealthyContainerCount int `json:"UnhealthyContainerCount"`
|
||||||
VolumeCount int `json:"VolumeCount"`
|
VolumeCount int `json:"VolumeCount"`
|
||||||
ImageCount int `json:"ImageCount"`
|
ImageCount int `json:"ImageCount"`
|
||||||
ServiceCount int `json:"ServiceCount"`
|
ServiceCount int `json:"ServiceCount"`
|
||||||
StackCount int `json:"StackCount"`
|
StackCount int `json:"StackCount"`
|
||||||
SnapshotRaw DockerSnapshotRaw `json:"DockerSnapshotRaw"`
|
SnapshotRaw DockerSnapshotRaw `json:"DockerSnapshotRaw"`
|
||||||
NodeCount int `json:"NodeCount"`
|
NodeCount int `json:"NodeCount"`
|
||||||
GpuUseAll bool `json:"GpuUseAll"`
|
GpuUseAll bool `json:"GpuUseAll"`
|
||||||
GpuUseList []string `json:"GpuUseList"`
|
GpuUseList []string `json:"GpuUseList"`
|
||||||
IsPodman bool `json:"IsPodman"`
|
IsPodman bool `json:"IsPodman"`
|
||||||
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
|
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
|
||||||
|
PerformanceMetrics *PerformanceMetrics `json:"PerformanceMetrics"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PerformanceMetrics represents the performance metrics of a Docker, Swarm, Podman, and Kubernetes environments
|
||||||
|
PerformanceMetrics struct {
|
||||||
|
CPUUsage float64 `json:"CPUUsage,omitempty"`
|
||||||
|
MemoryUsage float64 `json:"MemoryUsage,omitempty"`
|
||||||
|
NetworkUsage float64 `json:"NetworkUsage,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// DockerContainerSnapshot is an extent of Docker's Container struct
|
// DockerContainerSnapshot is an extent of Docker's Container struct
|
||||||
|
@ -663,12 +671,13 @@ type (
|
||||||
|
|
||||||
// KubernetesSnapshot represents a snapshot of a specific Kubernetes environment(endpoint) at a specific time
|
// KubernetesSnapshot represents a snapshot of a specific Kubernetes environment(endpoint) at a specific time
|
||||||
KubernetesSnapshot struct {
|
KubernetesSnapshot struct {
|
||||||
Time int64 `json:"Time"`
|
Time int64 `json:"Time"`
|
||||||
KubernetesVersion string `json:"KubernetesVersion"`
|
KubernetesVersion string `json:"KubernetesVersion"`
|
||||||
NodeCount int `json:"NodeCount"`
|
NodeCount int `json:"NodeCount"`
|
||||||
TotalCPU int64 `json:"TotalCPU"`
|
TotalCPU int64 `json:"TotalCPU"`
|
||||||
TotalMemory int64 `json:"TotalMemory"`
|
TotalMemory int64 `json:"TotalMemory"`
|
||||||
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
|
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
|
||||||
|
PerformanceMetrics *PerformanceMetrics `json:"PerformanceMetrics"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// KubernetesConfiguration represents the configuration of a Kubernetes environment(endpoint)
|
// KubernetesConfiguration represents the configuration of a Kubernetes environment(endpoint)
|
||||||
|
|
1
go.mod
1
go.mod
|
@ -61,6 +61,7 @@ require (
|
||||||
k8s.io/cli-runtime v0.33.2
|
k8s.io/cli-runtime v0.33.2
|
||||||
k8s.io/client-go v0.33.2
|
k8s.io/client-go v0.33.2
|
||||||
k8s.io/kubectl v0.33.2
|
k8s.io/kubectl v0.33.2
|
||||||
|
k8s.io/kubelet v0.33.2
|
||||||
k8s.io/metrics v0.33.2
|
k8s.io/metrics v0.33.2
|
||||||
software.sslmate.com/src/go-pkcs12 v0.0.0-20210415151418-c5206de65a78
|
software.sslmate.com/src/go-pkcs12 v0.0.0-20210415151418-c5206de65a78
|
||||||
)
|
)
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -974,6 +974,8 @@ k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUy
|
||||||
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
|
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
|
||||||
k8s.io/kubectl v0.33.2 h1:7XKZ6DYCklu5MZQzJe+CkCjoGZwD1wWl7t/FxzhMz7Y=
|
k8s.io/kubectl v0.33.2 h1:7XKZ6DYCklu5MZQzJe+CkCjoGZwD1wWl7t/FxzhMz7Y=
|
||||||
k8s.io/kubectl v0.33.2/go.mod h1:8rC67FB8tVTYraovAGNi/idWIK90z2CHFNMmGJZJ3KI=
|
k8s.io/kubectl v0.33.2/go.mod h1:8rC67FB8tVTYraovAGNi/idWIK90z2CHFNMmGJZJ3KI=
|
||||||
|
k8s.io/kubelet v0.33.2 h1:wxEau5/563oJb3j3KfrCKlNWWx35YlSgDLOYUBCQ0pg=
|
||||||
|
k8s.io/kubelet v0.33.2/go.mod h1:way8VCDTUMiX1HTOvJv7M3xS/xNysJI6qh7TOqMe5KM=
|
||||||
k8s.io/metrics v0.33.2 h1:gNCBmtnUMDMCRg9Ly5ehxP3OdKISMsOnh1vzk01iCgE=
|
k8s.io/metrics v0.33.2 h1:gNCBmtnUMDMCRg9Ly5ehxP3OdKISMsOnh1vzk01iCgE=
|
||||||
k8s.io/metrics v0.33.2/go.mod h1:yxoAosKGRsZisv3BGekC5W6T1J8XSV+PoUEevACRv7c=
|
k8s.io/metrics v0.33.2/go.mod h1:yxoAosKGRsZisv3BGekC5W6T1J8XSV+PoUEevACRv7c=
|
||||||
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
|
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
|
||||||
|
|
|
@ -100,7 +100,10 @@ func dockerSnapshotNodes(snapshot *portainer.DockerSnapshot, cli *client.Client)
|
||||||
|
|
||||||
snapshot.TotalCPU = int(nanoCpus / 1e9)
|
snapshot.TotalCPU = int(nanoCpus / 1e9)
|
||||||
snapshot.TotalMemory = totalMem
|
snapshot.TotalMemory = totalMem
|
||||||
snapshot.NodeCount = len(nodes)
|
snapshot.NodeCount = 1
|
||||||
|
if snapshot.Swarm {
|
||||||
|
snapshot.NodeCount = len(nodes)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,9 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -19,11 +21,11 @@ import (
|
||||||
corev1 "k8s.io/api/core/v1"
|
corev1 "k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
|
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||||
)
|
)
|
||||||
|
|
||||||
func CreateKubernetesSnapshot(cli *kubernetes.Clientset) (*portainer.KubernetesSnapshot, error) {
|
func CreateKubernetesSnapshot(cli *kubernetes.Clientset) (*portainer.KubernetesSnapshot, error) {
|
||||||
kubernetesSnapshot := &portainer.KubernetesSnapshot{}
|
kubernetesSnapshot := &portainer.KubernetesSnapshot{}
|
||||||
|
|
||||||
err := kubernetesSnapshotVersion(kubernetesSnapshot, cli)
|
err := kubernetesSnapshotVersion(kubernetesSnapshot, cli)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn().Err(err).Msg("unable to snapshot cluster version")
|
log.Warn().Err(err).Msg("unable to snapshot cluster version")
|
||||||
|
@ -54,10 +56,28 @@ func kubernetesSnapshotNodes(snapshot *portainer.KubernetesSnapshot, cli *kubern
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(nodeList.Items) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
var totalCPUs, totalMemory int64
|
var totalCPUs, totalMemory int64
|
||||||
|
performanceMetrics := &portainer.PerformanceMetrics{
|
||||||
|
CPUUsage: 0,
|
||||||
|
MemoryUsage: 0,
|
||||||
|
NetworkUsage: 0,
|
||||||
|
}
|
||||||
|
|
||||||
for _, node := range nodeList.Items {
|
for _, node := range nodeList.Items {
|
||||||
totalCPUs += node.Status.Capacity.Cpu().Value()
|
totalCPUs += node.Status.Capacity.Cpu().Value()
|
||||||
totalMemory += node.Status.Capacity.Memory().Value()
|
totalMemory += node.Status.Capacity.Memory().Value()
|
||||||
|
|
||||||
|
performanceMetrics, err = kubernetesSnapshotNodePerformanceMetrics(cli, node, performanceMetrics)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get node performance metrics: %w", err)
|
||||||
|
}
|
||||||
|
if performanceMetrics != nil {
|
||||||
|
snapshot.PerformanceMetrics = performanceMetrics
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
snapshot.TotalCPU = totalCPUs
|
snapshot.TotalCPU = totalCPUs
|
||||||
|
@ -123,6 +143,40 @@ func kubernetesSnapshotPodErrorLogs(snapshot *portainer.KubernetesSnapshot, cli
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func kubernetesSnapshotNodePerformanceMetrics(cli *kubernetes.Clientset, node corev1.Node, performanceMetrics *portainer.PerformanceMetrics) (*portainer.PerformanceMetrics, error) {
|
||||||
|
result := cli.RESTClient().Get().AbsPath(fmt.Sprintf("/api/v1/nodes/%s/proxy/stats/summary", node.Name)).Do(context.TODO())
|
||||||
|
if result.Error() != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get node performance metrics: %w", result.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
raw, err := result.Raw()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get node performance metrics: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
stats := statsapi.Summary{}
|
||||||
|
err = json.Unmarshal(raw, &stats)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal node performance metrics: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeStats := stats.Node
|
||||||
|
if reflect.DeepEqual(nodeStats, statsapi.NodeStats{}) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodeStats.CPU != nil && nodeStats.CPU.UsageNanoCores != nil {
|
||||||
|
performanceMetrics.CPUUsage += math.Round(float64(*nodeStats.CPU.UsageNanoCores) / float64(node.Status.Capacity.Cpu().Value()*1000000000) * 100)
|
||||||
|
}
|
||||||
|
if nodeStats.Memory != nil && nodeStats.Memory.WorkingSetBytes != nil {
|
||||||
|
performanceMetrics.MemoryUsage += math.Round(float64(*nodeStats.Memory.WorkingSetBytes) / float64(node.Status.Capacity.Memory().Value()) * 100)
|
||||||
|
}
|
||||||
|
if nodeStats.Network != nil && nodeStats.Network.RxBytes != nil && nodeStats.Network.TxBytes != nil {
|
||||||
|
performanceMetrics.NetworkUsage += math.Round((float64(*nodeStats.Network.RxBytes) + float64(*nodeStats.Network.TxBytes)) / 1024 / 1024) // MB
|
||||||
|
}
|
||||||
|
return performanceMetrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
// filterLogsByPattern filters the logs by the given patterns and returns a list of logs that match the patterns
|
// filterLogsByPattern filters the logs by the given patterns and returns a list of logs that match the patterns
|
||||||
// the logs are returned as a list of maps with the keys "timestamp" and "message"
|
// the logs are returned as a list of maps with the keys "timestamp" and "message"
|
||||||
func filterLogsByPattern(logBytes []byte, patterns []string) []map[string]string {
|
func filterLogsByPattern(logBytes []byte, patterns []string) []map[string]string {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue