mirror of
https://github.com/portainer/portainer.git
synced 2025-08-04 21:35:23 +02:00
feat(gpu) EE-3191 Add GPU support for containers (#7146)
This commit is contained in:
parent
f0456cbf5f
commit
4997e9c7be
43 changed files with 758 additions and 10 deletions
|
@ -103,6 +103,9 @@ func (m *Migrator) Migrate() error {
|
|||
|
||||
// Portainer 2.14
|
||||
newMigration(50, m.migrateDBVersionToDB50),
|
||||
|
||||
// Portainer 2.15
|
||||
newMigration(60, m.migrateDBVersionToDB60),
|
||||
}
|
||||
|
||||
var lastDbVersion int
|
||||
|
|
30
api/datastore/migrator/migrate_dbversion60.go
Normal file
30
api/datastore/migrator/migrate_dbversion60.go
Normal file
|
@ -0,0 +1,30 @@
|
|||
package migrator
|
||||
|
||||
import portainer "github.com/portainer/portainer/api"
|
||||
|
||||
func (m *Migrator) migrateDBVersionToDB60() error {
|
||||
if err := m.addGpuInputFieldDB60(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Migrator) addGpuInputFieldDB60() error {
|
||||
migrateLog.Info("- add gpu input field")
|
||||
endpoints, err := m.endpointService.Endpoints()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, endpoint := range endpoints {
|
||||
endpoint.Gpus = []portainer.Pair{}
|
||||
err = m.endpointService.UpdateEndpoint(endpoint.ID, &endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -43,6 +43,7 @@
|
|||
},
|
||||
"EdgeCheckinInterval": 0,
|
||||
"EdgeKey": "",
|
||||
"Gpus": [],
|
||||
"GroupId": 1,
|
||||
"Id": 1,
|
||||
"IsEdgeDevice": false,
|
||||
|
@ -175,6 +176,8 @@
|
|||
}
|
||||
},
|
||||
"DockerVersion": "20.10.13",
|
||||
"GpuUseAll": false,
|
||||
"GpuUseList": null,
|
||||
"HealthyContainerCount": 0,
|
||||
"ImageCount": 9,
|
||||
"NodeCount": 0,
|
||||
|
|
|
@ -7,9 +7,10 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
_container "github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/portainer/portainer/api"
|
||||
portainer "github.com/portainer/portainer/api"
|
||||
)
|
||||
|
||||
// Snapshotter represents a service used to create environment(endpoint) snapshots
|
||||
|
@ -154,11 +155,35 @@ func snapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Client)
|
|||
healthyContainers := 0
|
||||
unhealthyContainers := 0
|
||||
stacks := make(map[string]struct{})
|
||||
gpuUseSet := make(map[string]struct{})
|
||||
gpuUseAll := false
|
||||
for _, container := range containers {
|
||||
if container.State == "exited" {
|
||||
stoppedContainers++
|
||||
} else if container.State == "running" {
|
||||
runningContainers++
|
||||
|
||||
// snapshot GPUs
|
||||
response, err := cli.ContainerInspect(context.Background(), container.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var gpuOptions *_container.DeviceRequest = nil
|
||||
for _, deviceRequest := range response.HostConfig.Resources.DeviceRequests {
|
||||
if deviceRequest.Driver == "nvidia" || deviceRequest.Capabilities[0][0] == "gpu" {
|
||||
gpuOptions = &deviceRequest
|
||||
}
|
||||
}
|
||||
|
||||
if gpuOptions != nil {
|
||||
if gpuOptions.Count == -1 {
|
||||
gpuUseAll = true
|
||||
}
|
||||
for _, id := range gpuOptions.DeviceIDs {
|
||||
gpuUseSet[id] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(container.Status, "(healthy)") {
|
||||
|
@ -174,6 +199,14 @@ func snapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Client)
|
|||
}
|
||||
}
|
||||
|
||||
gpuUseList := make([]string, 0, len(gpuUseSet))
|
||||
for gpuUse := range gpuUseSet {
|
||||
gpuUseList = append(gpuUseList, gpuUse)
|
||||
}
|
||||
|
||||
snapshot.GpuUseAll = gpuUseAll
|
||||
snapshot.GpuUseList = gpuUseList
|
||||
|
||||
snapshot.RunningContainerCount = runningContainers
|
||||
snapshot.StoppedContainerCount = stoppedContainers
|
||||
snapshot.HealthyContainerCount = healthyContainers
|
||||
|
|
|
@ -25,6 +25,7 @@ type endpointCreatePayload struct {
|
|||
URL string
|
||||
EndpointCreationType endpointCreationEnum
|
||||
PublicURL string
|
||||
Gpus []portainer.Pair
|
||||
GroupID int
|
||||
TLS bool
|
||||
TLSSkipVerify bool
|
||||
|
@ -142,6 +143,13 @@ func (payload *endpointCreatePayload) Validate(r *http.Request) error {
|
|||
payload.PublicURL = publicURL
|
||||
}
|
||||
|
||||
gpus := make([]portainer.Pair, 0)
|
||||
err = request.RetrieveMultiPartFormJSONValue(r, "Gpus", &gpus, true)
|
||||
if err != nil {
|
||||
return errors.New("Invalid Gpus parameter")
|
||||
}
|
||||
payload.Gpus = gpus
|
||||
|
||||
checkinInterval, _ := request.RetrieveNumericMultiPartFormValue(r, "CheckinInterval", true)
|
||||
payload.EdgeCheckinInterval = checkinInterval
|
||||
|
||||
|
@ -290,6 +298,7 @@ func (handler *Handler) createAzureEndpoint(payload *endpointCreatePayload) (*po
|
|||
Type: portainer.AzureEnvironment,
|
||||
GroupID: portainer.EndpointGroupID(payload.GroupID),
|
||||
PublicURL: payload.PublicURL,
|
||||
Gpus: payload.Gpus,
|
||||
UserAccessPolicies: portainer.UserAccessPolicies{},
|
||||
TeamAccessPolicies: portainer.TeamAccessPolicies{},
|
||||
AzureCredentials: credentials,
|
||||
|
@ -323,6 +332,7 @@ func (handler *Handler) createEdgeAgentEndpoint(payload *endpointCreatePayload)
|
|||
URL: portainerHost,
|
||||
Type: portainer.EdgeAgentOnDockerEnvironment,
|
||||
GroupID: portainer.EndpointGroupID(payload.GroupID),
|
||||
Gpus: payload.Gpus,
|
||||
TLSConfig: portainer.TLSConfiguration{
|
||||
TLS: false,
|
||||
},
|
||||
|
@ -378,6 +388,7 @@ func (handler *Handler) createUnsecuredEndpoint(payload *endpointCreatePayload)
|
|||
Type: endpointType,
|
||||
GroupID: portainer.EndpointGroupID(payload.GroupID),
|
||||
PublicURL: payload.PublicURL,
|
||||
Gpus: payload.Gpus,
|
||||
TLSConfig: portainer.TLSConfiguration{
|
||||
TLS: false,
|
||||
},
|
||||
|
@ -412,6 +423,7 @@ func (handler *Handler) createKubernetesEndpoint(payload *endpointCreatePayload)
|
|||
Type: portainer.KubernetesLocalEnvironment,
|
||||
GroupID: portainer.EndpointGroupID(payload.GroupID),
|
||||
PublicURL: payload.PublicURL,
|
||||
Gpus: payload.Gpus,
|
||||
TLSConfig: portainer.TLSConfiguration{
|
||||
TLS: payload.TLS,
|
||||
TLSSkipVerify: payload.TLSSkipVerify,
|
||||
|
@ -441,6 +453,7 @@ func (handler *Handler) createTLSSecuredEndpoint(payload *endpointCreatePayload,
|
|||
Type: endpointType,
|
||||
GroupID: portainer.EndpointGroupID(payload.GroupID),
|
||||
PublicURL: payload.PublicURL,
|
||||
Gpus: payload.Gpus,
|
||||
TLSConfig: portainer.TLSConfiguration{
|
||||
TLS: payload.TLS,
|
||||
TLSSkipVerify: payload.TLSSkipVerify,
|
||||
|
|
|
@ -22,6 +22,8 @@ type endpointUpdatePayload struct {
|
|||
// URL or IP address where exposed containers will be reachable.\
|
||||
// Defaults to URL if not specified
|
||||
PublicURL *string `example:"docker.mydomain.tld:2375"`
|
||||
// GPUs information
|
||||
Gpus []portainer.Pair
|
||||
// Group identifier
|
||||
GroupID *int `example:"1"`
|
||||
// Require TLS to connect against this environment(endpoint)
|
||||
|
@ -110,6 +112,10 @@ func (handler *Handler) endpointUpdate(w http.ResponseWriter, r *http.Request) *
|
|||
endpoint.PublicURL = *payload.PublicURL
|
||||
}
|
||||
|
||||
if payload.Gpus != nil {
|
||||
endpoint.Gpus = payload.Gpus
|
||||
}
|
||||
|
||||
if payload.EdgeCheckinInterval != nil {
|
||||
endpoint.EdgeCheckinInterval = *payload.EdgeCheckinInterval
|
||||
}
|
||||
|
|
|
@ -199,6 +199,8 @@ type (
|
|||
StackCount int `json:"StackCount"`
|
||||
SnapshotRaw DockerSnapshotRaw `json:"DockerSnapshotRaw"`
|
||||
NodeCount int `json:"NodeCount"`
|
||||
GpuUseAll bool `json:"GpuUseAll"`
|
||||
GpuUseList []string `json:"GpuUseList"`
|
||||
}
|
||||
|
||||
// DockerSnapshotRaw represents all the information related to a snapshot as returned by the Docker API
|
||||
|
@ -310,6 +312,7 @@ type (
|
|||
GroupID EndpointGroupID `json:"GroupId" example:"1"`
|
||||
// URL or IP address where exposed containers will be reachable
|
||||
PublicURL string `json:"PublicURL" example:"docker.mydomain.tld:2375"`
|
||||
Gpus []Pair `json:"Gpus"`
|
||||
TLSConfig TLSConfiguration `json:"TLSConfig"`
|
||||
AzureCredentials AzureCredentials `json:"AzureCredentials,omitempty" example:""`
|
||||
// List of tag identifiers to which this environment(endpoint) is associated
|
||||
|
|
|
@ -693,6 +693,12 @@ definitions:
|
|||
$ref: '#/definitions/portainer.DockerSnapshotRaw'
|
||||
DockerVersion:
|
||||
type: string
|
||||
GpuUseAll:
|
||||
type: boolean
|
||||
GpuUseList:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
HealthyContainerCount:
|
||||
type: integer
|
||||
ImageCount:
|
||||
|
@ -849,6 +855,11 @@ definitions:
|
|||
EdgeKey:
|
||||
description: The key which is used to map the agent to Portainer
|
||||
type: string
|
||||
Gpus:
|
||||
description: Endpoint Gpus information
|
||||
items:
|
||||
$ref: '#/definitions/portainer.Pair'
|
||||
type: array
|
||||
GroupId:
|
||||
description: Endpoint group identifier
|
||||
example: 1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue