mirror of
https://github.com/portainer/portainer.git
synced 2025-08-05 05:45:22 +02:00
feat(gpu): rework docker GPU for UI performance [EE-4918] (#8518)
This commit is contained in:
parent
769c8372fb
commit
fd916bc8a2
52 changed files with 692 additions and 285 deletions
116
api/datastore/migrate_post_init.go
Normal file
116
api/datastore/migrate_post_init.go
Normal file
|
@ -0,0 +1,116 @@
|
|||
package datastore
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
portainer "github.com/portainer/portainer/api"
|
||||
"github.com/portainer/portainer/api/dataservices"
|
||||
"github.com/portainer/portainer/api/docker"
|
||||
"github.com/portainer/portainer/api/kubernetes/cli"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type PostInitMigrator struct {
|
||||
kubeFactory *cli.ClientFactory
|
||||
dockerFactory *docker.ClientFactory
|
||||
dataStore dataservices.DataStore
|
||||
}
|
||||
|
||||
func NewPostInitMigrator(
|
||||
kubeFactory *cli.ClientFactory,
|
||||
dockerFactory *docker.ClientFactory,
|
||||
dataStore dataservices.DataStore,
|
||||
) *PostInitMigrator {
|
||||
return &PostInitMigrator{
|
||||
kubeFactory: kubeFactory,
|
||||
dockerFactory: dockerFactory,
|
||||
dataStore: dataStore,
|
||||
}
|
||||
}
|
||||
|
||||
func (migrator *PostInitMigrator) PostInitMigrate() error {
|
||||
if err := migrator.PostInitMigrateIngresses(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
migrator.PostInitMigrateGPUs()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (migrator *PostInitMigrator) PostInitMigrateIngresses() error {
|
||||
endpoints, err := migrator.dataStore.Endpoint().Endpoints()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i := range endpoints {
|
||||
// Early exit if we do not need to migrate!
|
||||
if endpoints[i].PostInitMigrations.MigrateIngresses == false {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := migrator.kubeFactory.MigrateEndpointIngresses(&endpoints[i])
|
||||
if err != nil {
|
||||
log.Debug().Err(err).Msg("failure migrating endpoint ingresses")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PostInitMigrateGPUs will check all docker endpoints for containers with GPUs and set EnableGPUManagement to true if any are found
|
||||
// If there's an error getting the containers, we'll log it and move on
|
||||
func (migrator *PostInitMigrator) PostInitMigrateGPUs() {
|
||||
environments, err := migrator.dataStore.Endpoint().Endpoints()
|
||||
if err != nil {
|
||||
log.Err(err).Msg("failure getting endpoints")
|
||||
return
|
||||
}
|
||||
for i := range environments {
|
||||
if environments[i].Type == portainer.DockerEnvironment {
|
||||
// // Early exit if we do not need to migrate!
|
||||
if environments[i].PostInitMigrations.MigrateGPUs == false {
|
||||
return
|
||||
}
|
||||
|
||||
// set the MigrateGPUs flag to false so we don't run this again
|
||||
environments[i].PostInitMigrations.MigrateGPUs = false
|
||||
migrator.dataStore.Endpoint().UpdateEndpoint(environments[i].ID, &environments[i])
|
||||
|
||||
// create a docker client
|
||||
dockerClient, err := migrator.dockerFactory.CreateClient(&environments[i], "", nil)
|
||||
if err != nil {
|
||||
log.Err(err).Msg("failure creating docker client for environment: " + environments[i].Name)
|
||||
return
|
||||
}
|
||||
defer dockerClient.Close()
|
||||
|
||||
// get all containers
|
||||
containers, err := dockerClient.ContainerList(context.Background(), types.ContainerListOptions{All: true})
|
||||
if err != nil {
|
||||
log.Err(err).Msg("failed to list containers")
|
||||
return
|
||||
}
|
||||
|
||||
// check for a gpu on each container. If even one GPU is found, set EnableGPUManagement to true for the whole endpoint
|
||||
containersLoop:
|
||||
for _, container := range containers {
|
||||
// https://www.sobyte.net/post/2022-10/go-docker/ has nice documentation on the docker client with GPUs
|
||||
containerDetails, err := dockerClient.ContainerInspect(context.Background(), container.ID)
|
||||
if err != nil {
|
||||
log.Err(err).Msg("failed to inspect container")
|
||||
return
|
||||
}
|
||||
deviceRequests := containerDetails.HostConfig.Resources.DeviceRequests
|
||||
for _, deviceRequest := range deviceRequests {
|
||||
if deviceRequest.Driver == "nvidia" {
|
||||
environments[i].EnableGPUManagement = true
|
||||
migrator.dataStore.Endpoint().UpdateEndpoint(environments[i].ID, &environments[i])
|
||||
break containersLoop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,11 +3,16 @@ package migrator
|
|||
import (
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
portainer "github.com/portainer/portainer/api"
|
||||
portainerDsErrors "github.com/portainer/portainer/api/dataservices/errors"
|
||||
)
|
||||
|
||||
func (m *Migrator) migrateDBVersionToDB90() error {
|
||||
if err := m.updateUserThemForDB90(); err != nil {
|
||||
if err := m.updateUserThemeForDB90(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := m.updateEnableGpuManagementFeatures(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -39,7 +44,7 @@ func (m *Migrator) updateEdgeStackStatusForDB90() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Migrator) updateUserThemForDB90() error {
|
||||
func (m *Migrator) updateUserThemeForDB90() error {
|
||||
log.Info().Msg("updating existing user theme settings")
|
||||
|
||||
users, err := m.userService.Users()
|
||||
|
@ -60,3 +65,28 @@ func (m *Migrator) updateUserThemForDB90() error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Migrator) updateEnableGpuManagementFeatures() error {
|
||||
// get all environments
|
||||
environments, err := m.endpointService.Endpoints()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, environment := range environments {
|
||||
if environment.Type == portainer.DockerEnvironment {
|
||||
// set the PostInitMigrations.MigrateGPUs to true on this environment to run the migration only on the 2.18 upgrade
|
||||
environment.PostInitMigrations.MigrateGPUs = true
|
||||
// if there's one or more gpu, set the EnableGpuManagement setting to true
|
||||
gpuList := environment.Gpus
|
||||
if len(gpuList) > 0 {
|
||||
environment.EnableGPUManagement = true
|
||||
}
|
||||
// update the environment
|
||||
if err := m.endpointService.UpdateEndpoint(environment.ID, &environment); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
},
|
||||
"EdgeCheckinInterval": 0,
|
||||
"EdgeKey": "",
|
||||
"EnableGPUManagement": false,
|
||||
"Gpus": [],
|
||||
"GroupId": 1,
|
||||
"Id": 1,
|
||||
|
@ -71,6 +72,7 @@
|
|||
"LastCheckInDate": 0,
|
||||
"Name": "local",
|
||||
"PostInitMigrations": {
|
||||
"MigrateGPUs": true,
|
||||
"MigrateIngresses": true
|
||||
},
|
||||
"PublicURL": "",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue