mirror of
https://github.com/portainer/portainer.git
synced 2025-08-02 20:35:25 +02:00
feat(support): collect system info bundle to assist support troubleshooting [r8s-157] (#154)
This commit is contained in:
parent
17648d12fe
commit
783ab253af
17 changed files with 1367 additions and 440 deletions
File diff suppressed because it is too large
Load diff
|
@ -672,6 +672,7 @@
|
|||
{
|
||||
"Docker": {
|
||||
"ContainerCount": 0,
|
||||
"DiagnosticsData": {},
|
||||
"DockerSnapshotRaw": {
|
||||
"Containers": null,
|
||||
"Images": null,
|
||||
|
|
|
@ -1,20 +1,9 @@
|
|||
package docker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
portainer "github.com/portainer/portainer/api"
|
||||
dockerclient "github.com/portainer/portainer/api/docker/client"
|
||||
"github.com/portainer/portainer/api/docker/consts"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/container"
|
||||
_container "github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/volume"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/portainer/portainer/pkg/snapshot"
|
||||
)
|
||||
|
||||
// Snapshotter represents a service used to create environment(endpoint) snapshots
|
||||
|
@ -37,247 +26,5 @@ func (snapshotter *Snapshotter) CreateSnapshot(endpoint *portainer.Endpoint) (*p
|
|||
}
|
||||
defer cli.Close()
|
||||
|
||||
return snapshot(cli, endpoint)
|
||||
}
|
||||
|
||||
func snapshot(cli *client.Client, endpoint *portainer.Endpoint) (*portainer.DockerSnapshot, error) {
|
||||
if _, err := cli.Ping(context.Background()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
snapshot := &portainer.DockerSnapshot{
|
||||
StackCount: 0,
|
||||
}
|
||||
|
||||
if err := snapshotInfo(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot engine information")
|
||||
}
|
||||
|
||||
if snapshot.Swarm {
|
||||
if err := snapshotSwarmServices(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot Swarm services")
|
||||
}
|
||||
|
||||
if err := snapshotNodes(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot Swarm nodes")
|
||||
}
|
||||
}
|
||||
|
||||
if err := snapshotContainers(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot containers")
|
||||
}
|
||||
|
||||
if err := snapshotImages(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot images")
|
||||
}
|
||||
|
||||
if err := snapshotVolumes(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot volumes")
|
||||
}
|
||||
|
||||
if err := snapshotNetworks(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot networks")
|
||||
}
|
||||
|
||||
if err := snapshotVersion(snapshot, cli); err != nil {
|
||||
log.Warn().Str("environment", endpoint.Name).Err(err).Msg("unable to snapshot engine version")
|
||||
}
|
||||
|
||||
snapshot.Time = time.Now().Unix()
|
||||
|
||||
return snapshot, nil
|
||||
}
|
||||
|
||||
func snapshotInfo(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
info, err := cli.Info(context.Background())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
snapshot.Swarm = info.Swarm.ControlAvailable
|
||||
snapshot.DockerVersion = info.ServerVersion
|
||||
snapshot.TotalCPU = info.NCPU
|
||||
snapshot.TotalMemory = info.MemTotal
|
||||
snapshot.SnapshotRaw.Info = info
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotNodes(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
nodes, err := cli.NodeList(context.Background(), types.NodeListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var nanoCpus int64
|
||||
var totalMem int64
|
||||
|
||||
for _, node := range nodes {
|
||||
nanoCpus += node.Description.Resources.NanoCPUs
|
||||
totalMem += node.Description.Resources.MemoryBytes
|
||||
}
|
||||
|
||||
snapshot.TotalCPU = int(nanoCpus / 1e9)
|
||||
snapshot.TotalMemory = totalMem
|
||||
snapshot.NodeCount = len(nodes)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotSwarmServices(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
stacks := make(map[string]struct{})
|
||||
|
||||
services, err := cli.ServiceList(context.Background(), types.ServiceListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, service := range services {
|
||||
for k, v := range service.Spec.Labels {
|
||||
if k == "com.docker.stack.namespace" {
|
||||
stacks[v] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
snapshot.ServiceCount = len(services)
|
||||
snapshot.StackCount += len(stacks)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
containers, err := cli.ContainerList(context.Background(), container.ListOptions{All: true})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stacks := make(map[string]struct{})
|
||||
gpuUseSet := make(map[string]struct{})
|
||||
gpuUseAll := false
|
||||
|
||||
for _, container := range containers {
|
||||
if container.State == "running" {
|
||||
// Snapshot GPUs
|
||||
response, err := cli.ContainerInspect(context.Background(), container.ID)
|
||||
if err != nil {
|
||||
// Inspect a container will fail when the container runs on a different
|
||||
// Swarm node, so it is better to log the error instead of return error
|
||||
// when the Swarm mode is enabled
|
||||
if !snapshot.Swarm {
|
||||
return err
|
||||
} else {
|
||||
if !strings.Contains(err.Error(), "No such container") {
|
||||
return err
|
||||
}
|
||||
// It is common to have containers running on different Swarm nodes,
|
||||
// so we just log the error in the debug level
|
||||
log.Debug().Str("container", container.ID).Err(err).Msg("unable to inspect container in other Swarm nodes")
|
||||
}
|
||||
} else {
|
||||
var gpuOptions *_container.DeviceRequest = nil
|
||||
for _, deviceRequest := range response.HostConfig.Resources.DeviceRequests {
|
||||
if deviceRequest.Driver == "nvidia" || deviceRequest.Capabilities[0][0] == "gpu" {
|
||||
gpuOptions = &deviceRequest
|
||||
}
|
||||
}
|
||||
|
||||
if gpuOptions != nil {
|
||||
if gpuOptions.Count == -1 {
|
||||
gpuUseAll = true
|
||||
}
|
||||
|
||||
for _, id := range gpuOptions.DeviceIDs {
|
||||
gpuUseSet[id] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range container.Labels {
|
||||
if k == consts.ComposeStackNameLabel {
|
||||
stacks[v] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gpuUseList := make([]string, 0, len(gpuUseSet))
|
||||
for gpuUse := range gpuUseSet {
|
||||
gpuUseList = append(gpuUseList, gpuUse)
|
||||
}
|
||||
|
||||
snapshot.GpuUseAll = gpuUseAll
|
||||
snapshot.GpuUseList = gpuUseList
|
||||
|
||||
stats := CalculateContainerStats(containers)
|
||||
|
||||
snapshot.ContainerCount = stats.Total
|
||||
snapshot.RunningContainerCount = stats.Running
|
||||
snapshot.StoppedContainerCount = stats.Stopped
|
||||
snapshot.HealthyContainerCount = stats.Healthy
|
||||
snapshot.UnhealthyContainerCount = stats.Unhealthy
|
||||
snapshot.StackCount += len(stacks)
|
||||
|
||||
for _, container := range containers {
|
||||
snapshot.SnapshotRaw.Containers = append(snapshot.SnapshotRaw.Containers, portainer.DockerContainerSnapshot{Container: container})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotImages(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
images, err := cli.ImageList(context.Background(), types.ImageListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
snapshot.ImageCount = len(images)
|
||||
snapshot.SnapshotRaw.Images = images
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotVolumes(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
volumes, err := cli.VolumeList(context.Background(), volume.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
snapshot.VolumeCount = len(volumes.Volumes)
|
||||
snapshot.SnapshotRaw.Volumes = volumes
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotNetworks(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
networks, err := cli.NetworkList(context.Background(), types.NetworkListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
snapshot.SnapshotRaw.Networks = networks
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotVersion(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
||||
version, err := cli.ServerVersion(context.Background())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
snapshot.SnapshotRaw.Version = version
|
||||
snapshot.IsPodman = isPodman(version)
|
||||
return nil
|
||||
}
|
||||
|
||||
// isPodman checks if the version is for Podman by checking if any of the components contain "podman".
|
||||
// If it's podman, a component name should be "Podman Engine"
|
||||
func isPodman(version types.Version) bool {
|
||||
for _, component := range version.Components {
|
||||
if strings.Contains(strings.ToLower(component.Name), "podman") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return snapshot.CreateDockerSnapshot(cli)
|
||||
}
|
||||
|
|
|
@ -1,15 +1,9 @@
|
|||
package kubernetes
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
portainer "github.com/portainer/portainer/api"
|
||||
"github.com/portainer/portainer/api/kubernetes/cli"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"github.com/portainer/portainer/pkg/snapshot"
|
||||
)
|
||||
|
||||
type Snapshotter struct {
|
||||
|
@ -30,55 +24,5 @@ func (snapshotter *Snapshotter) CreateSnapshot(endpoint *portainer.Endpoint) (*p
|
|||
return nil, err
|
||||
}
|
||||
|
||||
return snapshot(client, endpoint)
|
||||
}
|
||||
|
||||
func snapshot(cli *kubernetes.Clientset, endpoint *portainer.Endpoint) (*portainer.KubernetesSnapshot, error) {
|
||||
res := cli.RESTClient().Get().AbsPath("/healthz").Do(context.TODO())
|
||||
if res.Error() != nil {
|
||||
return nil, res.Error()
|
||||
}
|
||||
|
||||
snapshot := &portainer.KubernetesSnapshot{}
|
||||
|
||||
err := snapshotVersion(snapshot, cli)
|
||||
if err != nil {
|
||||
log.Warn().Str("endpoint", endpoint.Name).Err(err).Msg("unable to snapshot cluster version")
|
||||
}
|
||||
|
||||
err = snapshotNodes(snapshot, cli)
|
||||
if err != nil {
|
||||
log.Warn().Str("endpoint", endpoint.Name).Err(err).Msg("unable to snapshot cluster nodes")
|
||||
}
|
||||
|
||||
snapshot.Time = time.Now().Unix()
|
||||
return snapshot, nil
|
||||
}
|
||||
|
||||
func snapshotVersion(snapshot *portainer.KubernetesSnapshot, cli *kubernetes.Clientset) error {
|
||||
versionInfo, err := cli.ServerVersion()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
snapshot.KubernetesVersion = versionInfo.GitVersion
|
||||
return nil
|
||||
}
|
||||
|
||||
func snapshotNodes(snapshot *portainer.KubernetesSnapshot, cli *kubernetes.Clientset) error {
|
||||
nodeList, err := cli.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var totalCPUs, totalMemory int64
|
||||
for _, node := range nodeList.Items {
|
||||
totalCPUs += node.Status.Capacity.Cpu().Value()
|
||||
totalMemory += node.Status.Capacity.Memory().Value()
|
||||
}
|
||||
|
||||
snapshot.TotalCPU = totalCPUs
|
||||
snapshot.TotalMemory = totalMemory
|
||||
snapshot.NodeCount = len(nodeList.Items)
|
||||
return nil
|
||||
return snapshot.CreateKubernetesSnapshot(client)
|
||||
}
|
||||
|
|
|
@ -185,6 +185,16 @@ type (
|
|||
// CustomTemplatePlatform represents a custom template platform
|
||||
CustomTemplatePlatform int
|
||||
|
||||
// DiagnosticsData represents the diagnostics data for an environment
|
||||
// this contains the logs, telnet, traceroute, dns and proxy information
|
||||
// which will be part of the DockerSnapshot and KubernetesSnapshot structs
|
||||
DiagnosticsData struct {
|
||||
Log string `json:"Log,omitempty"`
|
||||
Telnet map[string]string `json:"Telnet,omitempty"`
|
||||
DNS map[string]string `json:"DNS,omitempty"`
|
||||
Proxy map[string]string `json:"Proxy,omitempty"`
|
||||
}
|
||||
|
||||
// DockerHub represents all the required information to connect and use the
|
||||
// Docker Hub
|
||||
DockerHub struct {
|
||||
|
@ -217,6 +227,7 @@ type (
|
|||
GpuUseAll bool `json:"GpuUseAll"`
|
||||
GpuUseList []string `json:"GpuUseList"`
|
||||
IsPodman bool `json:"IsPodman"`
|
||||
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
|
||||
}
|
||||
|
||||
// DockerContainerSnapshot is an extent of Docker's Container struct
|
||||
|
@ -636,11 +647,12 @@ type (
|
|||
|
||||
// KubernetesSnapshot represents a snapshot of a specific Kubernetes environment(endpoint) at a specific time
|
||||
KubernetesSnapshot struct {
|
||||
Time int64 `json:"Time"`
|
||||
KubernetesVersion string `json:"KubernetesVersion"`
|
||||
NodeCount int `json:"NodeCount"`
|
||||
TotalCPU int64 `json:"TotalCPU"`
|
||||
TotalMemory int64 `json:"TotalMemory"`
|
||||
Time int64 `json:"Time"`
|
||||
KubernetesVersion string `json:"KubernetesVersion"`
|
||||
NodeCount int `json:"NodeCount"`
|
||||
TotalCPU int64 `json:"TotalCPU"`
|
||||
TotalMemory int64 `json:"TotalMemory"`
|
||||
DiagnosticsData *DiagnosticsData `json:"DiagnosticsData"`
|
||||
}
|
||||
|
||||
// KubernetesConfiguration represents the configuration of a Kubernetes environment(endpoint)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue