Skip to content

Commit

Permalink
Expose Flavors in LocalQueue Status.
Browse files Browse the repository at this point in the history
  • Loading branch information
mbobrovskyi committed Sep 26, 2024
1 parent fd12357 commit 61629ef
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 32 deletions.
6 changes: 6 additions & 0 deletions apis/kueue/v1beta1/localqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ type LocalQueueStatus struct {
// +kubebuilder:validation:MaxItems=16
// +optional
FlavorUsage []LocalQueueFlavorUsage `json:"flavorUsage"`

// availableFlavors lists all currently available ResourceFlavors.
//
// +listType=set
// +optional
AvailableFlavors []ResourceFlavorReference `json:"availableFlavors"`
}

const (
Expand Down
5 changes: 5 additions & 0 deletions apis/kueue/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions charts/kueue/templates/crd/kueue.x-k8s.io_localqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ spec:
admitted to a ClusterQueue and that haven't finished yet.
format: int32
type: integer
availableFlavors:
description: availableFlavors lists all currently available ResourceFlavors.
items:
description: ResourceFlavorReference is the name of the ResourceFlavor.
maxLength: 253
pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
type: array
x-kubernetes-list-type: set
conditions:
description: |-
Conditions hold the latest available observations of the LocalQueue
Expand Down
12 changes: 12 additions & 0 deletions client-go/applyconfiguration/kueue/v1beta1/localqueuestatus.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions config/components/crd/bases/kueue.x-k8s.io_localqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ spec:
admitted to a ClusterQueue and that haven't finished yet.
format: int32
type: integer
availableFlavors:
description: availableFlavors lists all currently available ResourceFlavors.
items:
description: ResourceFlavorReference is the name of the ResourceFlavor.
maxLength: 253
pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
type: array
x-kubernetes-list-type: set
conditions:
description: |-
Conditions hold the latest available observations of the LocalQueue
Expand Down
8 changes: 8 additions & 0 deletions pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
"k8s.io/utils/set"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand Down Expand Up @@ -668,6 +669,7 @@ type LocalQueueUsageStats struct {
ReservingWorkloads int
AdmittedResources []kueue.LocalQueueFlavorUsage
AdmittedWorkloads int
AvailableFlavors []kueue.ResourceFlavorReference
}

func (c *Cache) LocalQueueUsage(qObj *kueue.LocalQueue) (*LocalQueueUsageStats, error) {
Expand All @@ -683,11 +685,17 @@ func (c *Cache) LocalQueueUsage(qObj *kueue.LocalQueue) (*LocalQueueUsageStats,
return nil, errQNotFound
}

availableFlavors := set.New[kueue.ResourceFlavorReference]()
for _, rg := range cqImpl.ResourceGroups {
availableFlavors.Insert(rg.Flavors...)
}

return &LocalQueueUsageStats{
ReservedResources: filterLocalQueueUsage(qImpl.usage, cqImpl.ResourceGroups),
ReservingWorkloads: qImpl.reservingWorkloads,
AdmittedResources: filterLocalQueueUsage(qImpl.admittedUsage, cqImpl.ResourceGroups),
AdmittedWorkloads: qImpl.admittedWorkloads,
AvailableFlavors: availableFlavors.SortedList(),
}, nil
}

Expand Down
1 change: 1 addition & 0 deletions pkg/controller/core/localqueue_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ func (r *LocalQueueReconciler) UpdateStatusIfChanged(
queue.Status.AdmittedWorkloads = int32(stats.AdmittedWorkloads)
queue.Status.FlavorsReservation = stats.ReservedResources
queue.Status.FlavorUsage = stats.AdmittedResources
queue.Status.AvailableFlavors = stats.AvailableFlavors
if len(conditionStatus) != 0 && len(reason) != 0 && len(msg) != 0 {
meta.SetStatusCondition(&queue.Status.Conditions, metav1.Condition{
Type: kueue.LocalQueueActive,
Expand Down
9 changes: 9 additions & 0 deletions site/content/en/docs/reference/kueue.v1beta1.md
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,13 @@ workloads assigned to this LocalQueue.</p>
workloads assigned to this LocalQueue.</p>
</td>
</tr>
<tr><td><code>availableFlavors</code><br/>
<a href="#kueue-x-k8s-io-v1beta1-ResourceFlavorReference"><code>[]ResourceFlavorReference</code></a>
</td>
<td>
<p>availableFlavors lists all currently available ResourceFlavors.</p>
</td>
</tr>
</tbody>
</table>

Expand Down Expand Up @@ -1619,6 +1626,8 @@ this time would be reset to null.</p>

- [LocalQueueFlavorUsage](#kueue-x-k8s-io-v1beta1-LocalQueueFlavorUsage)

- [LocalQueueStatus](#kueue-x-k8s-io-v1beta1-LocalQueueStatus)

- [PodSetAssignment](#kueue-x-k8s-io-v1beta1-PodSetAssignment)


Expand Down
103 changes: 71 additions & 32 deletions test/integration/controller/core/localqueue_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,32 +91,60 @@ var _ = ginkgo.Describe("Queue controller", ginkgo.Ordered, ginkgo.ContinueOnFai
})

ginkgo.It("Should update conditions when clusterQueues that its localQueue references are updated", func() {
gomega.Eventually(func() []metav1.Condition {
gomega.Eventually(func() kueue.LocalQueueStatus {
var updatedQueue kueue.LocalQueue
gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(queue), &updatedQueue)).To(gomega.Succeed())
return updatedQueue.Status.Conditions
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionFalse,
Reason: "ClusterQueueDoesNotExist",
Message: "Can't submit new workloads to clusterQueue",
return updatedQueue.Status
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(kueue.LocalQueueStatus{
Conditions: []metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionFalse,
Reason: "ClusterQueueDoesNotExist",
Message: "Can't submit new workloads to clusterQueue",
},
},
}, util.IgnoreConditionTimestampsAndObservedGeneration))

emptyUsage := []kueue.LocalQueueFlavorUsage{
{
Name: flavorModelC,
Resources: []kueue.LocalQueueResourceUsage{
{
Name: resourceGPU,
Total: resource.MustParse("0"),
},
},
},
{
Name: flavorModelD,
Resources: []kueue.LocalQueueResourceUsage{
{
Name: resourceGPU,
Total: resource.MustParse("0"),
},
},
},
}

ginkgo.By("Creating a clusterQueue")
gomega.Expect(k8sClient.Create(ctx, clusterQueue)).To(gomega.Succeed())
gomega.Eventually(func() []metav1.Condition {
gomega.Eventually(func() kueue.LocalQueueStatus {
var updatedQueue kueue.LocalQueue
gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(queue), &updatedQueue)).To(gomega.Succeed())
return updatedQueue.Status.Conditions
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionFalse,
Reason: "ClusterQueueIsInactive",
Message: "Can't submit new workloads to clusterQueue",
return updatedQueue.Status
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(kueue.LocalQueueStatus{
Conditions: []metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionFalse,
Reason: "ClusterQueueIsInactive",
Message: "Can't submit new workloads to clusterQueue",
},
},
FlavorsReservation: emptyUsage,
FlavorUsage: emptyUsage,
AvailableFlavors: []kueue.ResourceFlavorReference{"model-c", "model-d"},
}, util.IgnoreConditionTimestampsAndObservedGeneration))

ginkgo.By("Creating resourceFlavors")
Expand All @@ -135,31 +163,38 @@ var _ = ginkgo.Describe("Queue controller", ginkgo.Ordered, ginkgo.ContinueOnFai
Message: "Can admit new workloads",
},
}, util.IgnoreConditionTimestampsAndObservedGeneration))
gomega.Eventually(func() []metav1.Condition {
gomega.Eventually(func() kueue.LocalQueueStatus {
var updatedQueue kueue.LocalQueue
gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(queue), &updatedQueue)).To(gomega.Succeed())
return updatedQueue.Status.Conditions
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionTrue,
Reason: "Ready",
Message: "Can submit new workloads to clusterQueue",
return updatedQueue.Status
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(kueue.LocalQueueStatus{
Conditions: []metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionTrue,
Reason: "Ready",
Message: "Can submit new workloads to clusterQueue",
},
},
FlavorsReservation: emptyUsage,
FlavorUsage: emptyUsage,
AvailableFlavors: []kueue.ResourceFlavorReference{"model-c", "model-d"},
}, util.IgnoreConditionTimestampsAndObservedGeneration))

ginkgo.By("Deleting a clusterQueue")
gomega.Expect(k8sClient.Delete(ctx, clusterQueue)).To(gomega.Succeed())
gomega.Eventually(func() []metav1.Condition {
gomega.Eventually(func() kueue.LocalQueueStatus {
var updatedQueue kueue.LocalQueue
gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(queue), &updatedQueue)).To(gomega.Succeed())
return updatedQueue.Status.Conditions
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionFalse,
Reason: "ClusterQueueDoesNotExist",
Message: "Can't submit new workloads to clusterQueue",
return updatedQueue.Status
}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(kueue.LocalQueueStatus{
Conditions: []metav1.Condition{
{
Type: kueue.LocalQueueActive,
Status: metav1.ConditionFalse,
Reason: "ClusterQueueDoesNotExist",
Message: "Can't submit new workloads to clusterQueue",
},
},
}, util.IgnoreConditionTimestampsAndObservedGeneration))
})
Expand Down Expand Up @@ -239,6 +274,7 @@ var _ = ginkgo.Describe("Queue controller", ginkgo.Ordered, ginkgo.ContinueOnFai
},
FlavorsReservation: emptyUsage,
FlavorUsage: emptyUsage,
AvailableFlavors: []kueue.ResourceFlavorReference{"model-c", "model-d"},
}, util.IgnoreConditionTimestampsAndObservedGeneration))

ginkgo.By("Setting the workloads quota reservation")
Expand Down Expand Up @@ -289,6 +325,7 @@ var _ = ginkgo.Describe("Queue controller", ginkgo.Ordered, ginkgo.ContinueOnFai
},
FlavorsReservation: fullUsage,
FlavorUsage: emptyUsage,
AvailableFlavors: []kueue.ResourceFlavorReference{"model-c", "model-d"},
}, util.IgnoreConditionTimestampsAndObservedGeneration))

ginkgo.By("Setting the workloads admission checks")
Expand All @@ -314,6 +351,7 @@ var _ = ginkgo.Describe("Queue controller", ginkgo.Ordered, ginkgo.ContinueOnFai
},
FlavorsReservation: fullUsage,
FlavorUsage: fullUsage,
AvailableFlavors: []kueue.ResourceFlavorReference{"model-c", "model-d"},
}, util.IgnoreConditionTimestampsAndObservedGeneration))

ginkgo.By("Finishing workloads")
Expand All @@ -333,6 +371,7 @@ var _ = ginkgo.Describe("Queue controller", ginkgo.Ordered, ginkgo.ContinueOnFai
},
FlavorsReservation: emptyUsage,
FlavorUsage: emptyUsage,
AvailableFlavors: []kueue.ResourceFlavorReference{"model-c", "model-d"},
}, util.IgnoreConditionTimestampsAndObservedGeneration))
})
})

0 comments on commit 61629ef

Please sign in to comment.