Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/csi/blockstorage/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,10 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs

_, err = cloud.AttachVolume(ctx, instanceID, volumeID)
if err != nil {
// Trigger's an immediate `NodeGetInfo` RPC call when MutableCSINodeAllocatableCount is enabled
if stackiterrors.IsTooManyDevicesError(err) {
return nil, status.Errorf(codes.ResourceExhausted, "[ControllerPublishVolume] Node can't accept any more volumes %v. All PCIe lanes are exhausted!", err)
}
klog.Errorf("Failed to AttachVolume: %v", err)
return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] Attach Volume failed with error %v", err)
}
Expand Down
16 changes: 10 additions & 6 deletions pkg/csi/blockstorage/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,15 +302,19 @@ func (ns *nodeServer) NodeGetInfo(ctx context.Context, _ *csi.NodeGetInfoRequest
return nil, status.Errorf(codes.Internal, "[NodeGetInfo] unable to retrieve instance id of node %v", err)
}

flavor, err := ns.Metadata.GetFlavor(ctx)
emptyPCIeRootPorts, err := mount.CountFreePCIeSlots()
if err != nil {
return nil, status.Errorf(codes.Internal, "[NodeGetInfo] unable to retrieve flavor of node %v", err)
klog.Errorf("[NodeGetInfo] unable to retrieve PCIe root ports %v", err)
emptyPCIeRootPorts = 0
}

maxVolumesPerNode := DetermineMaxVolumesByFlavor(flavor)
// Subtract 1 for root disk and another for configDrive/spare
maxVolumesPerNode -= 2
klog.V(4).Infof("Determined node to support %d volumes", maxVolumesPerNode)
vols, err := mount.CountLocalCSIVolumes(driverName)
if err != nil {
klog.Errorf("[NodeGetInfo] unable to retrieve volume count %v", err)
}

// maxVolumesPerNode is the result of all free/empty PCIClassBridgePCI ports plus all already mounted volumes.
maxVolumesPerNode := emptyPCIeRootPorts + vols

nodeInfo := &csi.NodeGetInfoResponse{
NodeId: nodeID,
Expand Down
17 changes: 0 additions & 17 deletions pkg/csi/blockstorage/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,23 +84,6 @@ func ParseEndpoint(ep string) (proto, addr string, err error) {
return "", "", fmt.Errorf("invalid endpoint: %v", ep)
}

func DetermineMaxVolumesByFlavor(flavor string) int64 {
flavorParts := strings.Split(flavor, ".")

// The following numbers were specified by the IaaS team. They are based on actual tests.
switch {
case strings.HasPrefix(flavor, "n"):
// Flavors starting with 'n' are nvidia GPU flavors, all GPU VM's can only mount 10 volumes
return 10
case strings.HasSuffix(flavorParts[0], "2a"):
// AMD 2nd Gen
return 159
default:
// All other flavors can mount 28 volumes
return 25
}
}

func logGRPC(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) {
callID := serverGRPCEndpointCallCounter.Add(1)

Expand Down
25 changes: 0 additions & 25 deletions pkg/csi/blockstorage/utils_test.go

This file was deleted.

9 changes: 9 additions & 0 deletions pkg/csi/util/mount/mount_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,12 @@ func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats {
UsedInodes: int64(statfs.Files) - int64(statfs.Ffree),
}
}

func CountLocalCSIVolumes(_ string) (int64, error) {
// not implemented
return 0, nil
}

func CountFreePCIeSlots() (int64, error) {
return 0, nil
}
99 changes: 98 additions & 1 deletion pkg/csi/util/mount/mount_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,26 @@

package mount

import "golang.org/x/sys/unix"
import (
"fmt"
"os"
"path/filepath"
"regexp"
"slices"
"strings"

"golang.org/x/sys/unix"
"k8s.io/klog/v2"
)

var (
pciAddressRegex = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`)
)

const (
// PCIClassBridgePCI Linux constant: https://github.com/torvalds/linux/blob/e43ffb69e0438cddd72aaa30898b4dc446f664f8/include/linux/pci_ids.h#L62
PCIClassBridgePCI = "0x0604"
)

func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats {
return &DeviceStats{
Expand All @@ -17,3 +36,81 @@ func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats {
UsedInodes: int64(statfs.Files) - int64(statfs.Ffree),
}
}

// CountFreePCIeSlots returns the number of PCIe Root ports who
// are currently not occupied by anything.
func CountFreePCIeSlots() (int64, error) {
const pciPath = "/sys/bus/pci/devices"

// Get all PCI devices
devices, err := os.ReadDir(pciPath)
if err != nil {
return 0, fmt.Errorf("failed to read PCI bus: %w", err)
}

freePCIeSlots := 0

for _, dev := range devices {
devPath := filepath.Join(pciPath, dev.Name())

// 1. Identify if it's a Root Port / Bridge
// We check the 'class' file. PCI Bridge class code starts with 0x0604
classBuf, err := os.ReadFile(filepath.Join(devPath, "class"))
if err != nil {
klog.Errorf("failed to read PCI device class %s : %v", devPath, err)
continue
}
class := strings.TrimSpace(string(classBuf))

// Class 0x060400 is a PCI-to-PCI bridge (standard for Root Ports)
if strings.HasPrefix(class, PCIClassBridgePCI) {
// 2. Check if the port has downstream devices
// If the bridge has children, they appear as subdirectories
// matching the PCI address format (e.g., 0000:01:00.0)
files, err2 := os.ReadDir(devPath)
if err2 != nil {
klog.Errorf("failed to read dir %s : %v", devPath, err2)
}
hasDownStreamFolder := slices.ContainsFunc(files, func(s os.DirEntry) bool {
return pciAddressRegex.MatchString(s.Name())
})
if !hasDownStreamFolder {
freePCIeSlots += 1
}
} else {
klog.V(4).Infof("skipping class %s: path: %s", class, devPath)
Comment thread
nschad marked this conversation as resolved.
}
}

return int64(freePCIeSlots), nil
}

// CountLocalCSIVolumes tries to count how many volumes are mounted for a given driverName.
func CountLocalCSIVolumes(driverName string) (int64, error) {
const kubeletDir = "/var/lib/kubelet"
volumeCount := 0
// The path where Kubelet mounts global tracking directories for a specific CSI driver
targetDir := filepath.Join(kubeletDir, "plugins", "kubernetes.io", "csi", driverName)

if _, err := os.Stat(targetDir); os.IsNotExist(err) {
return 0, nil
} else if err != nil {
return 0, fmt.Errorf("failed to check directory: %w", err)
}

volumes, err := os.ReadDir(targetDir)
if err != nil {
return 0, fmt.Errorf("failed to read dir %s: %w", targetDir, err)
}
for _, vol := range volumes {
// Check if volume has a "globalmount" dir to determine if it's mounted correctly
globalMountPath := filepath.Join(vol.Name(), "globalmount")
if _, err := os.Stat(globalMountPath); os.IsNotExist(err) {
continue
}

volumeCount++
}

return int64(volumeCount), nil
}
14 changes: 13 additions & 1 deletion pkg/stackit/stackiterrors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import (
"errors"
"fmt"
"net/http"
"strings"

oapiError "github.com/stackitcloud/stackit-sdk-go/core/oapierror"
wait "github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait"
"github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait"
)

var ErrNotFound = errors.New("failed to find object")
Expand All @@ -20,6 +21,17 @@ func IsNotFound(err error) bool {
return oAPIError.StatusCode == http.StatusNotFound
}

func IsTooManyDevicesError(err error) bool {
var oAPIError *oapiError.GenericOpenAPIError
if ok := errors.As(err, &oAPIError); !ok {
return false
}

// TODO: Improve this if possible
return oAPIError.StatusCode == http.StatusForbidden &&
strings.Contains(string(oAPIError.Body), "maximum allowed number of disk devices")
}

func IgnoreNotFound(err error) error {
if IsNotFound(err) {
return nil
Expand Down