Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Fix Hyperdisk ControllerExpandVolume Edge Cases #1899

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pkg/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,7 @@ const (

// Label that is set on a disk when it is used by a 'multi-zone' VolumeHandle
MultiZoneLabel = "goog-gke-multi-zone"

// Hyperdisk disk type prefix
HyperdiskTypesPrefix = "hyperdisk"
)
2 changes: 2 additions & 0 deletions pkg/common/parameters.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ type ParameterProcessor struct {
type ModifyVolumeParameters struct {
IOPS *int64
Throughput *int64
// Only set SizeGb for ControllerVolumeExpand
SizeGb *int64
}

// ExtractAndDefaultParameters will take the relevant parameters from a map and
Expand Down
4 changes: 4 additions & 0 deletions pkg/common/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ var (
regexValue = regexp.MustCompile(`^[a-zA-Z0-9]([0-9A-Za-z_.@%=+:,*#&()\[\]{}\-\s]{0,61}[a-zA-Z0-9])?$`)

csiRetryableErrorCodes = []codes.Code{codes.Canceled, codes.DeadlineExceeded, codes.Unavailable, codes.Aborted, codes.ResourceExhausted}

// Minimum Iops for Hyperdisk >= 6 Gi
MinHyperdiskIops int64 = 3000
Hyperdisk5GbIops int64 = 2500
)

func BytesToGbRoundDown(bytes int64) int64 {
Expand Down
5 changes: 3 additions & 2 deletions pkg/gce-cloud-provider/compute/fake-gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,9 @@ func (cloud *FakeCloudProvider) UpdateDisk(ctx context.Context, project string,
}
specifiedIops := params.IOPS != nil && *params.IOPS != 0
specifiedThroughput := params.Throughput != nil && *params.Throughput != 0
if !specifiedIops && !specifiedThroughput {
return fmt.Errorf("no IOPS or Throughput specified for disk %v", existingDisk.GetSelfLink())
specifiedSizeGb := params.SizeGb != nil && *params.SizeGb != 0
if !specifiedIops && !specifiedThroughput && !specifiedSizeGb {
return fmt.Errorf("no IOPS or Throughput or SizeGb specified for disk %v", existingDisk.GetSelfLink())
}

if params.IOPS != nil {
Expand Down
18 changes: 16 additions & 2 deletions pkg/gce-cloud-provider/compute/gce-compute.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"net/http"
"reflect"
"regexp"
"strings"
"time"
Expand Down Expand Up @@ -473,8 +474,17 @@ func (cloud *CloudProvider) UpdateDisk(ctx context.Context, project string, volK
func (cloud *CloudProvider) updateZonalDisk(ctx context.Context, project string, volKey *meta.Key, existingDisk *CloudDisk, params common.ModifyVolumeParameters) error {
specifiedIops := params.IOPS != nil && *params.IOPS != 0
specifiedThroughput := params.Throughput != nil && *params.Throughput != 0
if !specifiedIops && !specifiedThroughput {
return fmt.Errorf("no IOPS or Throughput specified for disk %v", existingDisk.GetSelfLink())
specifiedSizeGb := params.SizeGb != nil && *params.SizeGb != 0

v := reflect.ValueOf(params)
typeOfS := v.Type()

for i := 0; i < v.NumField(); i++ {
klog.V(5).Infof("===== params key is %v, value is %v =====", typeOfS.Field(i).Name, v.Field(i).Interface())
}

if !specifiedIops && !specifiedThroughput && !specifiedSizeGb {
return fmt.Errorf("no IOPS or Throughput or SizeGb specified for disk %v", existingDisk.GetSelfLink())
}
updatedDisk := &computev1.Disk{
Name: existingDisk.GetName(),
Expand All @@ -488,6 +498,10 @@ func (cloud *CloudProvider) updateZonalDisk(ctx context.Context, project string,
updatedDisk.ProvisionedThroughput = *params.Throughput
paths = append(paths, "provisionedThroughput")
}
if specifiedSizeGb {
updatedDisk.SizeGb = *params.SizeGb
paths = append(paths, "sizeGb")
}

diskUpdateOp := cloud.service.Disks.Update(project, volKey.Zone, volKey.Name, updatedDisk)
diskUpdateOp.Paths(paths...)
Expand Down
67 changes: 58 additions & 9 deletions pkg/gce-pd-csi-driver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"math/rand"
neturl "net/url"
"reflect"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -779,6 +780,7 @@ func (gceCS *GCEControllerServer) ControllerModifyVolume(ctx context.Context, re
return nil, err
}

// ExtractModifyVolumeParameters throws err if sizeGb is specified
volumeModifyParams, err := common.ExtractModifyVolumeParameters(req.GetMutableParameters())
if err != nil {
klog.Errorf("Failed to extract parameters for volume %s: %v", volumeID, err)
Expand Down Expand Up @@ -1883,17 +1885,64 @@ func (gceCS *GCEControllerServer) ControllerExpandVolume(ctx context.Context, re

sourceDisk, err := gceCS.CloudProvider.GetDisk(ctx, project, volKey, gce.GCEAPIVersionV1)
metrics.UpdateRequestMetadataFromDisk(ctx, sourceDisk)
resizedGb, err := gceCS.CloudProvider.ResizeDisk(ctx, project, volKey, reqBytes)
// Non Hyperdisk types, still use GCE disk resize API
if !strings.HasPrefix(sourceDisk.GetPDType(), common.HyperdiskTypesPrefix) {
resizedGb, err := gceCS.CloudProvider.ResizeDisk(ctx, project, volKey, reqBytes)
if err != nil {
return nil, common.LoggedError("ControllerExpandVolume failed to resize disk: ", err)
}

if err != nil {
return nil, common.LoggedError("ControllerExpandVolume failed to resize disk: ", err)
}
klog.V(4).Infof("ControllerExpandVolume succeeded for disk %v to size %v", volKey, resizedGb)
return &csi.ControllerExpandVolumeResponse{
CapacityBytes: common.GbToBytes(resizedGb),
NodeExpansionRequired: true,
}, nil
} else {
// Hyperdisks use GCE disk update API
updatedVolumeParams := common.ModifyVolumeParameters{}
updatedSizeGb := common.BytesToGbRoundUp(reqBytes)
updatedVolumeParams.SizeGb = &updatedSizeGb

// If disk is already of size equal or greater than requested size, we
// simply return the found size
if sourceDisk.GetSizeGb() >= updatedSizeGb {
klog.V(4).Infof("ControllerExpandVolume succeeded for disk %v to size %v", volKey, updatedSizeGb)
return &csi.ControllerExpandVolumeResponse{
CapacityBytes: common.GbToBytes(sourceDisk.GetSizeGb()),
NodeExpansionRequired: true,
}, nil
}

v := reflect.ValueOf(updatedVolumeParams)
typeOfS := v.Type()

for i := 0; i < v.NumField(); i++ {
klog.V(5).Infof("===== updatedVolumeParams key is %v, value is %v =====", typeOfS.Field(i).Name, v.Field(i).Interface())
}

// For hyperdisk-throughput, there is no need to update the iops
if gceCS.diskSupportsIopsChange(sourceDisk.GetPDType()) {
// Resize hyperdisk-balanced to 5 Gi requires minimum of 2500 iops
if updatedSizeGb == 5 {
updatedVolumeParams.IOPS = &common.Hyperdisk5GbIops
} else if sourceDisk.GetSizeGb() < 6 && updatedSizeGb >= 6 {
// if old sizeGb is less than 6Gi and the updated value is more or equal than 6Gi, still set iops to 3000
updatedVolumeParams.IOPS = &common.MinHyperdiskIops
}
}

klog.V(4).Infof("ControllerExpandVolume succeeded for disk %v to size %v", volKey, resizedGb)
return &csi.ControllerExpandVolumeResponse{
CapacityBytes: common.GbToBytes(resizedGb),
NodeExpansionRequired: true,
}, nil
err = gceCS.CloudProvider.UpdateDisk(ctx, project, volKey, sourceDisk, updatedVolumeParams)

if err != nil {
return nil, common.LoggedError("ControllerExpandVolume failed to resize disk: ", err)
}

klog.V(4).Infof("ControllerExpandVolume succeeded for hyperdisk %v to size %v", volKey, updatedSizeGb)
return &csi.ControllerExpandVolumeResponse{
CapacityBytes: common.GbToBytes(updatedSizeGb),
NodeExpansionRequired: true,
}, nil
}
}

func (gceCS *GCEControllerServer) getSnapshots(ctx context.Context, req *csi.ListSnapshotsRequest) ([]*csi.ListSnapshotsResponse_Entry, error) {
Expand Down
24 changes: 20 additions & 4 deletions pkg/gce-pd-csi-driver/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func TestCreateSnapshotArguments(t *testing.T) {

if !reflect.DeepEqual(snapshot, tc.expSnapshot) {
errStr := fmt.Sprintf("Expected snapshot: %#v\n to equal snapshot: %#v\n", snapshot, tc.expSnapshot)
t.Errorf(errStr)
t.Errorf("Err: %v", errStr)
}
}
}
Expand Down Expand Up @@ -547,7 +547,7 @@ func TestListSnapshotsArguments(t *testing.T) {
}
if len(snapshots) != tc.expectedCount {
errStr := fmt.Sprintf("Expected snapshot number to equal: %v", tc.numSnapshots)
t.Errorf(errStr)
t.Errorf("Err: %v", errStr)
}
}
}
Expand Down Expand Up @@ -1132,7 +1132,7 @@ func TestCreateVolumeArguments(t *testing.T) {
for i := 0; i < len(vol.GetAccessibleTopology()); i++ {
errStr = errStr + fmt.Sprintf("Got topology %#v\nExpected toplogy %#v\n\n", vol.GetAccessibleTopology()[i], tc.expVol.GetAccessibleTopology()[i])
}
t.Errorf(errStr)
t.Errorf("Err: %v", errStr)
}
}
}
Expand Down Expand Up @@ -1916,7 +1916,23 @@ func TestVolumeModifyOperation(t *testing.T) {
},
expIops: 10000,
expThroughput: 500,
expErrMessage: "no IOPS or Throughput specified for disk",
expErrMessage: "no IOPS or Throughput or SizeGb specified for disk",
},
{
name: "Modify volume with unsupported sizeGb",
req: &csi.ControllerModifyVolumeRequest{
VolumeId: testVolumeID,
MutableParameters: map[string]string{"sizeGb": "800"},
},
diskType: "hyperdisk-balanced",
params: &common.DiskParameters{
DiskType: "hyperdisk-balanced",
ProvisionedIOPSOnCreate: 10000,
ProvisionedThroughputOnCreate: 500,
},
expIops: 10000,
expThroughput: 500,
expErrMessage: "parameters contain unknown parameter: sizeGb",
},
{
name: "Update volume with valid parameters but invalid disk type",
Expand Down
14 changes: 11 additions & 3 deletions test/e2e/tests/resize_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ var _ = Describe("GCE PD CSI Driver", func() {

// Create Disk
volName := testNamePrefix + string(uuid.NewUUID())
volume, err := client.CreateVolume(volName, nil, defaultSizeGb,

hdbParams := map[string]string{
common.ParameterKeyType: "hyperdisk-balanced",
common.ParameterKeyProvisionedIOPSOnCreate: "2500",
common.ParameterKeyProvisionedThroughputOnCreate: "250Mi",
}
volume, err := client.CreateVolume(volName, hdbParams, defaultSizeGb,
&csi.TopologyRequirement{
Requisite: []*csi.Topology{
{
Expand All @@ -51,10 +57,11 @@ var _ = Describe("GCE PD CSI Driver", func() {
// Validate Disk Created
cloudDisk, err := computeService.Disks.Get(p, z, volName).Do()
Expect(err).To(BeNil(), "Could not get disk from cloud directly")
Expect(cloudDisk.Type).To(ContainSubstring(standardDiskType))
Expect(cloudDisk.Type).To(ContainSubstring("hyperdisk-balanced"))
Expect(cloudDisk.Status).To(Equal(readyState))
Expect(cloudDisk.SizeGb).To(Equal(defaultSizeGb))
Expect(cloudDisk.Name).To(Equal(volName))
Expect(cloudDisk.ProvisionedIops).To(Equal(int64(2500)))

defer func() {
// Delete Disk
Expand Down Expand Up @@ -115,7 +122,7 @@ var _ = Describe("GCE PD CSI Driver", func() {
Expect(sizeGb).To(Equal(defaultSizeGb))

// Resize controller
var newSizeGb int64 = 10
var newSizeGb int64 = 6
err = client.ControllerExpandVolume(volume.VolumeId, newSizeGb)

Expect(err).To(BeNil(), "Controller expand volume failed")
Expand All @@ -124,6 +131,7 @@ var _ = Describe("GCE PD CSI Driver", func() {
cloudDisk, err = computeService.Disks.Get(p, z, volName).Do()
Expect(err).To(BeNil(), "Get cloud disk failed")
Expect(cloudDisk.SizeGb).To(Equal(newSizeGb))
Expect(cloudDisk.ProvisionedIops).To(Equal(int64(3000)))

// Resize node
_, err = client.NodeExpandVolume(volume.VolumeId, publishDir, newSizeGb)
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/tests/setup_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ var (
serviceAccount = flag.String("service-account", "", "Service account to bring up instance with")
vmNamePrefix = flag.String("vm-name-prefix", "gce-pd-csi-e2e", "VM name prefix")
architecture = flag.String("arch", "amd64", "Architecture pd csi driver build on")
minCpuPlatform = flag.String("min-cpu-platform", "AMD Milan", "Minimum CPU architecture")
minCpuPlatform = flag.String("min-cpu-platform", "Intel Sapphire Rapids", "Minimum CPU architecture")
zones = flag.String("zones", "us-east4-a,us-east4-c", "Zones to run tests in. If there are multiple zones, separate each by comma")
machineType = flag.String("machine-type", "n2d-standard-2", "Type of machine to provision instance on")
machineType = flag.String("machine-type", "c3-standard-4", "Type of machine to provision instance on")
imageURL = flag.String("image-url", "projects/ubuntu-os-cloud/global/images/family/ubuntu-minimal-2404-lts-amd64", "OS image url to get image from")
runInProw = flag.Bool("run-in-prow", false, "If true, use a Boskos loaned project and special CI service accounts and ssh keys")
deleteInstances = flag.Bool("delete-instances", false, "Delete the instances after tests run")
Expand Down
Loading