1
0
Fork 0
mirror of https://github.com/portainer/portainer.git synced 2025-08-04 21:35:23 +02:00

feat(gpu) EE-3191 Add GPU support for containers (#7146)

This commit is contained in:
congs 2022-07-18 11:02:14 +12:00 committed by GitHub
parent f0456cbf5f
commit 4997e9c7be
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
43 changed files with 758 additions and 10 deletions

View file

@ -103,6 +103,9 @@ func (m *Migrator) Migrate() error {
// Portainer 2.14
newMigration(50, m.migrateDBVersionToDB50),
// Portainer 2.15
newMigration(60, m.migrateDBVersionToDB60),
}
var lastDbVersion int

View file

@ -0,0 +1,30 @@
package migrator
import portainer "github.com/portainer/portainer/api"
func (m *Migrator) migrateDBVersionToDB60() error {
if err := m.addGpuInputFieldDB60(); err != nil {
return err
}
return nil
}
func (m *Migrator) addGpuInputFieldDB60() error {
migrateLog.Info("- add gpu input field")
endpoints, err := m.endpointService.Endpoints()
if err != nil {
return err
}
for _, endpoint := range endpoints {
endpoint.Gpus = []portainer.Pair{}
err = m.endpointService.UpdateEndpoint(endpoint.ID, &endpoint)
if err != nil {
return err
}
}
return nil
}

View file

@ -43,6 +43,7 @@
},
"EdgeCheckinInterval": 0,
"EdgeKey": "",
"Gpus": [],
"GroupId": 1,
"Id": 1,
"IsEdgeDevice": false,
@ -175,6 +176,8 @@
}
},
"DockerVersion": "20.10.13",
"GpuUseAll": false,
"GpuUseList": null,
"HealthyContainerCount": 0,
"ImageCount": 9,
"NodeCount": 0,

View file

@ -7,9 +7,10 @@ import (
"time"
"github.com/docker/docker/api/types"
_container "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
"github.com/portainer/portainer/api"
portainer "github.com/portainer/portainer/api"
)
// Snapshotter represents a service used to create environment(endpoint) snapshots
@ -154,11 +155,35 @@ func snapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Client)
healthyContainers := 0
unhealthyContainers := 0
stacks := make(map[string]struct{})
gpuUseSet := make(map[string]struct{})
gpuUseAll := false
for _, container := range containers {
if container.State == "exited" {
stoppedContainers++
} else if container.State == "running" {
runningContainers++
// snapshot GPUs
response, err := cli.ContainerInspect(context.Background(), container.ID)
if err != nil {
return err
}
var gpuOptions *_container.DeviceRequest = nil
for _, deviceRequest := range response.HostConfig.Resources.DeviceRequests {
if deviceRequest.Driver == "nvidia" || deviceRequest.Capabilities[0][0] == "gpu" {
gpuOptions = &deviceRequest
}
}
if gpuOptions != nil {
if gpuOptions.Count == -1 {
gpuUseAll = true
}
for _, id := range gpuOptions.DeviceIDs {
gpuUseSet[id] = struct{}{}
}
}
}
if strings.Contains(container.Status, "(healthy)") {
@ -174,6 +199,14 @@ func snapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Client)
}
}
gpuUseList := make([]string, 0, len(gpuUseSet))
for gpuUse := range gpuUseSet {
gpuUseList = append(gpuUseList, gpuUse)
}
snapshot.GpuUseAll = gpuUseAll
snapshot.GpuUseList = gpuUseList
snapshot.RunningContainerCount = runningContainers
snapshot.StoppedContainerCount = stoppedContainers
snapshot.HealthyContainerCount = healthyContainers

View file

@ -25,6 +25,7 @@ type endpointCreatePayload struct {
URL string
EndpointCreationType endpointCreationEnum
PublicURL string
Gpus []portainer.Pair
GroupID int
TLS bool
TLSSkipVerify bool
@ -142,6 +143,13 @@ func (payload *endpointCreatePayload) Validate(r *http.Request) error {
payload.PublicURL = publicURL
}
gpus := make([]portainer.Pair, 0)
err = request.RetrieveMultiPartFormJSONValue(r, "Gpus", &gpus, true)
if err != nil {
return errors.New("Invalid Gpus parameter")
}
payload.Gpus = gpus
checkinInterval, _ := request.RetrieveNumericMultiPartFormValue(r, "CheckinInterval", true)
payload.EdgeCheckinInterval = checkinInterval
@ -290,6 +298,7 @@ func (handler *Handler) createAzureEndpoint(payload *endpointCreatePayload) (*po
Type: portainer.AzureEnvironment,
GroupID: portainer.EndpointGroupID(payload.GroupID),
PublicURL: payload.PublicURL,
Gpus: payload.Gpus,
UserAccessPolicies: portainer.UserAccessPolicies{},
TeamAccessPolicies: portainer.TeamAccessPolicies{},
AzureCredentials: credentials,
@ -323,6 +332,7 @@ func (handler *Handler) createEdgeAgentEndpoint(payload *endpointCreatePayload)
URL: portainerHost,
Type: portainer.EdgeAgentOnDockerEnvironment,
GroupID: portainer.EndpointGroupID(payload.GroupID),
Gpus: payload.Gpus,
TLSConfig: portainer.TLSConfiguration{
TLS: false,
},
@ -378,6 +388,7 @@ func (handler *Handler) createUnsecuredEndpoint(payload *endpointCreatePayload)
Type: endpointType,
GroupID: portainer.EndpointGroupID(payload.GroupID),
PublicURL: payload.PublicURL,
Gpus: payload.Gpus,
TLSConfig: portainer.TLSConfiguration{
TLS: false,
},
@ -412,6 +423,7 @@ func (handler *Handler) createKubernetesEndpoint(payload *endpointCreatePayload)
Type: portainer.KubernetesLocalEnvironment,
GroupID: portainer.EndpointGroupID(payload.GroupID),
PublicURL: payload.PublicURL,
Gpus: payload.Gpus,
TLSConfig: portainer.TLSConfiguration{
TLS: payload.TLS,
TLSSkipVerify: payload.TLSSkipVerify,
@ -441,6 +453,7 @@ func (handler *Handler) createTLSSecuredEndpoint(payload *endpointCreatePayload,
Type: endpointType,
GroupID: portainer.EndpointGroupID(payload.GroupID),
PublicURL: payload.PublicURL,
Gpus: payload.Gpus,
TLSConfig: portainer.TLSConfiguration{
TLS: payload.TLS,
TLSSkipVerify: payload.TLSSkipVerify,

View file

@ -22,6 +22,8 @@ type endpointUpdatePayload struct {
// URL or IP address where exposed containers will be reachable.\
// Defaults to URL if not specified
PublicURL *string `example:"docker.mydomain.tld:2375"`
// GPUs information
Gpus []portainer.Pair
// Group identifier
GroupID *int `example:"1"`
// Require TLS to connect against this environment(endpoint)
@ -110,6 +112,10 @@ func (handler *Handler) endpointUpdate(w http.ResponseWriter, r *http.Request) *
endpoint.PublicURL = *payload.PublicURL
}
if payload.Gpus != nil {
endpoint.Gpus = payload.Gpus
}
if payload.EdgeCheckinInterval != nil {
endpoint.EdgeCheckinInterval = *payload.EdgeCheckinInterval
}

View file

@ -199,6 +199,8 @@ type (
StackCount int `json:"StackCount"`
SnapshotRaw DockerSnapshotRaw `json:"DockerSnapshotRaw"`
NodeCount int `json:"NodeCount"`
GpuUseAll bool `json:"GpuUseAll"`
GpuUseList []string `json:"GpuUseList"`
}
// DockerSnapshotRaw represents all the information related to a snapshot as returned by the Docker API
@ -310,6 +312,7 @@ type (
GroupID EndpointGroupID `json:"GroupId" example:"1"`
// URL or IP address where exposed containers will be reachable
PublicURL string `json:"PublicURL" example:"docker.mydomain.tld:2375"`
Gpus []Pair `json:"Gpus"`
TLSConfig TLSConfiguration `json:"TLSConfig"`
AzureCredentials AzureCredentials `json:"AzureCredentials,omitempty" example:""`
// List of tag identifiers to which this environment(endpoint) is associated

View file

@ -693,6 +693,12 @@ definitions:
$ref: '#/definitions/portainer.DockerSnapshotRaw'
DockerVersion:
type: string
GpuUseAll:
type: boolean
GpuUseList:
items:
type: string
type: array
HealthyContainerCount:
type: integer
ImageCount:
@ -849,6 +855,11 @@ definitions:
EdgeKey:
description: The key which is used to map the agent to Portainer
type: string
Gpus:
description: Endpoint Gpus information
items:
$ref: '#/definitions/portainer.Pair'
type: array
GroupId:
description: Endpoint group identifier
example: 1