kubernetes · k8s-ci-robot · Dec 12, 2025 · Dec 11, 2025
diff --git a/cluster-autoscaler/cloudprovider/coreweave/coreweave_instance_types.go b/cluster-autoscaler/cloudprovider/coreweave/coreweave_instance_types.go
@@ -0,0 +1,195 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package coreweave
+
+import "fmt"
+
+// InstanceType represents the resource specifications for a CoreWeave instance type.
+// Units are chosen to match the Kubernetes Node resource representation.
+type InstanceType struct {
+	// VCPU is the number of virtual CPU cores
+	VCPU int64
+	// MemoryKi is the amount of memory in kibibytes (1 Ki = 1024 bytes)
+	MemoryKi int64
+	// GPU is the number of GPUs
+	GPU int64
+	// EphemeralStorageKi is the amount of ephemeral storage in kibibytes (1 Ki = 1024 bytes)
+	EphemeralStorageKi int64
+	// Architecture is the CPU architecture (e.g., "amd64", "arm64")
+	Architecture string
+	// MaxPods is the maximum number of pods that can run on this instance type
+	MaxPods int64
+}
+
+// InstanceTypes is a map of CoreWeave instance type names to their specifications.
+// This map should be populated with the actual instance types supported by CoreWeave.
+var InstanceTypes = map[string]*InstanceType{
+	"b200-8x": {
+		VCPU:               128,
+		MemoryKi:           2112277172,
+		GPU:                8,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"cd-gp-a192-genoa": {
+		VCPU:               192,
+		MemoryKi:           1583811548,
+		GPU:                0,
+		EphemeralStorageKi: 7499230528,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"cd-gp-l-a192-genoa": {
+		VCPU:               192,
+		MemoryKi:           1583796048,
+		GPU:                0,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"cd-gp-i64-erapids": {
+		VCPU:               64,
+		MemoryKi:           526674536,
+		GPU:                0,
+		EphemeralStorageKi: 7499230528,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"cd-gp-l-i64-erapids": {
+		VCPU:               64,
+		MemoryKi:           526668108,
+		GPU:                0,
+		EphemeralStorageKi: 15000547328,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"cd-gp-i96-icelake": {
+		VCPU:               96,
+		MemoryKi:           394209340,
+		GPU:                0,
+		EphemeralStorageKi: 6248987968,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"cd-hc-a384ib-genoa": {
+		VCPU:               384,
+		MemoryKi:           1583672504,
+		GPU:                0,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"cd-hc-a384-genoa": {
+		VCPU:               384,
+		MemoryKi:           1583673336,
+		GPU:                0,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            300,
+	},
+	"cd-hp-a96-genoa": {
+		VCPU:               96,
+		MemoryKi:           791111968,
+		GPU:                0,
+		EphemeralStorageKi: 7499230528,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"gd-1xgh200": {
+		VCPU:               72,
+		MemoryKi:           600218240,
+		GPU:                1,
+		EphemeralStorageKi: 7499362648,
+		Architecture:       "arm64",
+		MaxPods:            110,
+	},
+	"gd-8xa100-i128": {
+		VCPU:               128,
+		MemoryKi:           2112249840,
+		GPU:                8,
+		EphemeralStorageKi: 7499362648,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"gd-8xh100ib-i128": {
+		VCPU:               128,
+		MemoryKi:           2112109804,
+		GPU:                8,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"gd-8xh200ib-i128": {
+		VCPU:               128,
+		MemoryKi:           2112109800,
+		GPU:                8,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"gd-8xl40-i128": {
+		VCPU:               128,
+		MemoryKi:           1055335508,
+		GPU:                8,
+		EphemeralStorageKi: 7499362648,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"gd-8xl40s-i128": {
+		VCPU:               128,
+		MemoryKi:           1055337468,
+		GPU:                8,
+		EphemeralStorageKi: 7499362648,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"rtxp6000-8x": {
+		VCPU:               128,
+		MemoryKi:           1055335468,
+		GPU:                8,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"turin-gp-l": {
+		VCPU:               192,
+		MemoryKi:           1583282436,
+		GPU:                0,
+		EphemeralStorageKi: 30003181568,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+	"turin-gp": {
+		VCPU:               192,
+		MemoryKi:           1583297960,
+		GPU:                0,
+		EphemeralStorageKi: 7499230528,
+		Architecture:       "amd64",
+		MaxPods:            110,
+	},
+}
+
+// GetInstanceType returns the InstanceType for the given instance type name.
+// It returns an error if the instance type is not found in the InstanceTypes map.
+func GetInstanceType(instanceTypeName string) (*InstanceType, error) {
+	instanceType, exists := InstanceTypes[instanceTypeName]
+	if !exists {
+		return nil, fmt.Errorf("unknown instance type: %s", instanceTypeName)
+	}
+	return instanceType, nil
+}
diff --git a/cluster-autoscaler/cloudprovider/coreweave/coreweave_nodegroup.go b/cluster-autoscaler/cloudprovider/coreweave/coreweave_nodegroup.go
@@ -18,12 +18,16 @@ package coreweave
 
 import (
 	"fmt"
+	"math/rand"
 	"sync"
 
 	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
+	"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
 	"k8s.io/klog/v2"
 )
 
@@ -123,9 +127,108 @@ func (ng *CoreWeaveNodeGroup) Nodes() ([]cloudprovider.Instance, error) {
 }
 
 // TemplateNodeInfo returns a template NodeInfo for the node group.
-// This method is not implemented for CoreWeaveNodeGroup.
+// This is used by the autoscaler to simulate what a new node would look like
+// when scaling from zero or when no nodes currently exist in the node group.
 func (ng *CoreWeaveNodeGroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
-	return nil, cloudprovider.ErrNotImplemented
+	instanceTypeName := ng.nodepool.GetInstanceType()
+	if instanceTypeName == "" {
+		return nil, fmt.Errorf("node pool %s has no instance type defined", ng.Name)
+	}
+
+	instanceType, err := GetInstanceType(instanceTypeName)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get instance type info for %s: %v", instanceTypeName, err)
+	}
+
+	node, err := ng.buildNodeFromInstanceType(instanceTypeName, instanceType)
+	if err != nil {
+		return nil, fmt.Errorf("failed to build node from instance type: %v", err)
+	}
+
+	// The second parameter is for ResourceSlices when using DRA. CoreWeave only DRA for rack based instances which are
+	// not supported by the Cluster Autoscaler at this time
+	nodeInfo := framework.NewNodeInfo(node, nil)
+
+	return nodeInfo, nil
+}
+
+// buildNodeFromInstanceType creates a template Node from the instance type and node pool configuration.
+func (ng *CoreWeaveNodeGroup) buildNodeFromInstanceType(instanceTypeName string, instanceType *InstanceType) (*apiv1.Node, error) {
+	nodeName := fmt.Sprintf("%s-template-%d", ng.Name, rand.Int63())
+
+	capacity := ng.buildResourceList(instanceType)
+
+	labels := ng.buildNodeLabels(nodeName, instanceTypeName, instanceType)
+
+	taints := ng.nodepool.GetNodeTaints()
+
+	node := &apiv1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   nodeName,
+			Labels: labels,
+		},
+		Status: apiv1.NodeStatus{
+			// Capacity and Allocatable are set to the same value, ignoring system pods
+			Capacity:    capacity,
+			Allocatable: capacity,
+			Conditions:  cloudprovider.BuildReadyConditions(),
+		},
+		Spec: apiv1.NodeSpec{
+			Taints: taints,
+		},
+	}
+
+	return node, nil
+}
+
+// buildResourceList creates a ResourceList from the instance type specifications.
+func (ng *CoreWeaveNodeGroup) buildResourceList(instanceType *InstanceType) apiv1.ResourceList {
+	resources := apiv1.ResourceList{}
+
+	// CPU
+	resources[apiv1.ResourceCPU] = *resource.NewQuantity(instanceType.VCPU, resource.DecimalSI)
+
+	// Memory - stored in kibibytes (Ki), convert to bytes for template
+	resources[apiv1.ResourceMemory] = *resource.NewQuantity(instanceType.MemoryKi*1024, resource.BinarySI)
+
+	// Ephemeral storage - stored in kibibytes (Ki), convert to bytes for template
+	if instanceType.EphemeralStorageKi > 0 {
+		resources[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(instanceType.EphemeralStorageKi*1024, resource.BinarySI)
+	}
+
+	// GPU - use nvidia.com/gpu as the resource name
+	if instanceType.GPU > 0 {
+		resources[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(instanceType.GPU, resource.DecimalSI)
+	}
+
+	// Default to max of 110 pods if not specified (Kubernetes default)
+	resources[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
+	if instanceType.MaxPods > 0 {
+		resources[apiv1.ResourcePods] = *resource.NewQuantity(instanceType.MaxPods, resource.DecimalSI)
+	}
+
+	return resources
+}
+
+// buildNodeLabels creates the labels for a template node.
+func (ng *CoreWeaveNodeGroup) buildNodeLabels(nodeName, instanceTypeName string, instanceType *InstanceType) map[string]string {
+	labels := make(map[string]string)
+
+	labels[apiv1.LabelInstanceTypeStable] = instanceTypeName
+	labels[apiv1.LabelArchStable] = cloudprovider.DefaultArch
+	if instanceType.Architecture != "" {
+		labels[apiv1.LabelArchStable] = instanceType.Architecture
+	}
+	labels[apiv1.LabelOSStable] = cloudprovider.DefaultOS
+
+	labels[coreWeaveNodePoolUID] = ng.nodepool.GetUID()
+	labels[coreWeaveNodePoolName] = ng.nodepool.GetName()
+
+	for k, v := range ng.nodepool.GetNodeLabels() {
+		labels[k] = v
+	}
+
+	return labels
 }
 
 // Exist checks if the node group exists.