From d2e2b7639bfd180512dabb3a40a9fcd51d4af750 Mon Sep 17 00:00:00 2001 From: Serge Logvinov Date: Mon, 13 May 2024 17:10:16 +0300 Subject: [PATCH] feat(occm): support multi region cluster Currently, it supports only single auth section. Set the regions in config as: [Global] region=REGION1 regions=REGION1 regions=REGION2 regions=REGION3 --- ...sing-openstack-cloud-controller-manager.md | 4 +- pkg/client/client.go | 16 ++ pkg/csi/cinder/openstack/openstack.go | 6 +- pkg/csi/cinder/openstack/openstack_test.go | 6 + pkg/openstack/instances.go | 178 ++++++++++++++---- pkg/openstack/openstack.go | 9 +- 6 files changed, 178 insertions(+), 41 deletions(-) diff --git a/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md b/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md index 3379c861f8..7da91d883d 100644 --- a/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md +++ b/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md @@ -114,6 +114,8 @@ The options in `Global` section are used for openstack-cloud-controller-manager Keystone user password. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required. * `region` Required. Keystone region name. +* `regions` + Optional. Keystone region name, which is used to specify regions for the cloud provider where the instance is running. Region is default region name. Can be specified multiple times. * `domain-id` Keystone user domain ID. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required. * `domain-name` @@ -317,7 +319,7 @@ Although the openstack-cloud-controller-manager was initially implemented with N call](https://docs.openstack.org/api-ref/load-balancer/v2/?expanded=create-a-load-balancer-detail#creating-a-fully-populated-load-balancer). Setting this option to true will create loadbalancers using serial API calls which first create an unpopulated loadbalancer, then populate its listeners, pools and members. This is a compatibility option at the expense of - increased load on the OpenStack API. Default: false + increased load on the OpenStack API. Default: false NOTE: diff --git a/pkg/client/client.go b/pkg/client/client.go index e9be5312b2..e13cb222f5 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -53,6 +53,7 @@ type AuthOpts struct { UserDomainID string `gcfg:"user-domain-id" mapstructure:"user-domain-id" name:"os-userDomainID" value:"optional"` UserDomainName string `gcfg:"user-domain-name" mapstructure:"user-domain-name" name:"os-userDomainName" value:"optional"` Region string `name:"os-region"` + Regions []string `name:"os-regions" value:"optional"` EndpointType gophercloud.Availability `gcfg:"os-endpoint-type" mapstructure:"os-endpoint-type" name:"os-endpointType" value:"optional"` CAFile string `gcfg:"ca-file" mapstructure:"ca-file" name:"os-certAuthorityPath" value:"optional"` TLSInsecure string `gcfg:"tls-insecure" mapstructure:"tls-insecure" name:"os-TLSInsecure" value:"optional" matches:"^true|false$"` @@ -87,6 +88,7 @@ func LogCfg(authOpts AuthOpts) { klog.V(5).Infof("UserDomainID: %s", authOpts.UserDomainID) klog.V(5).Infof("UserDomainName: %s", authOpts.UserDomainName) klog.V(5).Infof("Region: %s", authOpts.Region) + klog.V(5).Infof("Regions: %s", authOpts.Regions) klog.V(5).Infof("EndpointType: %s", authOpts.EndpointType) klog.V(5).Infof("CAFile: %s", authOpts.CAFile) klog.V(5).Infof("CertFile: %s", authOpts.CertFile) @@ -232,6 +234,20 @@ func ReadClouds(authOpts *AuthOpts) error { authOpts.ApplicationCredentialName = replaceEmpty(authOpts.ApplicationCredentialName, cloud.AuthInfo.ApplicationCredentialName) authOpts.ApplicationCredentialSecret = replaceEmpty(authOpts.ApplicationCredentialSecret, cloud.AuthInfo.ApplicationCredentialSecret) + regions := strings.Split(authOpts.Region, ",") + if len(regions) > 1 { + authOpts.Region = regions[0] + } + + for _, r := range cloud.Regions { + // Support only single auth section in clouds.yaml + if r.Values.AuthInfo == nil && r.Name != authOpts.Region { + regions = append(regions, r.Name) + } + } + + authOpts.Regions = regions + return nil } diff --git a/pkg/csi/cinder/openstack/openstack.go b/pkg/csi/cinder/openstack/openstack.go index 26f96dd01b..ab75ef6242 100644 --- a/pkg/csi/cinder/openstack/openstack.go +++ b/pkg/csi/cinder/openstack/openstack.go @@ -126,7 +126,7 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) { } } - for _, global := range cfg.Global { + for idx, global := range cfg.Global { // Update the config with data from clouds.yaml if UseClouds is enabled if global.UseClouds { if global.CloudsFile != "" { @@ -138,6 +138,10 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) { } klog.V(5).Infof("Credentials are loaded from %s:", global.CloudsFile) } + + if len(global.Regions) == 0 { + cfg.Global[idx].Regions = []string{global.Region} + } } return cfg, nil diff --git a/pkg/csi/cinder/openstack/openstack_test.go b/pkg/csi/cinder/openstack/openstack_test.go index 647787e9bb..de31b088ff 100644 --- a/pkg/csi/cinder/openstack/openstack_test.go +++ b/pkg/csi/cinder/openstack/openstack_test.go @@ -112,6 +112,7 @@ rescan-on-resize=true` CAFile: fakeCAfile, TenantID: fakeTenantID, Region: fakeRegion, + Regions: []string{fakeRegion}, } expectedOpts.Global["cloud2"] = &client.AuthOpts{ Username: fakeUserName_cloud2, @@ -121,6 +122,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud2, TenantID: fakeTenantID_cloud2, Region: fakeRegion_cloud2, + Regions: []string{fakeRegion_cloud2}, } expectedOpts.Global["cloud3"] = &client.AuthOpts{ Username: fakeUserName_cloud3, @@ -130,6 +132,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud3, TenantID: fakeTenantID_cloud3, Region: fakeRegion_cloud3, + Regions: []string{fakeRegion_cloud3}, } expectedOpts.BlockStorage.RescanOnResize = true @@ -224,6 +227,7 @@ rescan-on-resize=true` CAFile: fakeCAfile, TenantID: fakeTenantID, Region: fakeRegion, + Regions: []string{fakeRegion}, EndpointType: gophercloud.AvailabilityPublic, UseClouds: true, CloudsFile: wd + "/fixtures/clouds.yaml", @@ -237,6 +241,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud2, TenantID: fakeTenantID_cloud2, Region: fakeRegion_cloud2, + Regions: []string{fakeRegion_cloud2}, EndpointType: gophercloud.AvailabilityPublic, UseClouds: true, CloudsFile: wd + "/fixtures/clouds.yaml", @@ -250,6 +255,7 @@ rescan-on-resize=true` CAFile: fakeCAfile_cloud3, TenantID: fakeTenantID_cloud3, Region: fakeRegion_cloud3, + Regions: []string{fakeRegion_cloud3}, EndpointType: gophercloud.AvailabilityPublic, UseClouds: true, CloudsFile: wd + "/fixtures/clouds.yaml", diff --git a/pkg/openstack/instances.go b/pkg/openstack/instances.go index f9b617736f..4aec590b5c 100644 --- a/pkg/openstack/instances.go +++ b/pkg/openstack/instances.go @@ -21,6 +21,7 @@ import ( "fmt" sysos "os" "regexp" + "slices" "strings" "github.com/gophercloud/gophercloud/v2" @@ -46,9 +47,9 @@ const ( // InstancesV2 encapsulates an implementation of InstancesV2 for OpenStack. type InstancesV2 struct { - compute *gophercloud.ServiceClient - network *gophercloud.ServiceClient - region string + compute map[string]*gophercloud.ServiceClient + network map[string]*gophercloud.ServiceClient + regions []string regionProviderID bool networkingOpts NetworkingOpts } @@ -57,16 +58,25 @@ type InstancesV2 struct { func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) { klog.V(4).Info("openstack.Instancesv2() called") - compute, err := client.NewComputeV2(os.provider, os.epOpts) - if err != nil { - klog.Errorf("unable to access compute v2 API : %v", err) - return nil, false - } + var err error + compute := make(map[string]*gophercloud.ServiceClient, len(os.regions)) + network := make(map[string]*gophercloud.ServiceClient, len(os.regions)) - network, err := client.NewNetworkV2(os.provider, os.epOpts) - if err != nil { - klog.Errorf("unable to access network v2 API : %v", err) - return nil, false + for _, region := range os.regions { + opt := os.epOpts + opt.Region = region + + compute[region], err = client.NewComputeV2(os.provider, opt) + if err != nil { + klog.Errorf("unable to access compute v2 API : %v", err) + return nil, false + } + + network[region], err = client.NewNetworkV2(os.provider, opt) + if err != nil { + klog.Errorf("unable to access network v2 API : %v", err) + return nil, false + } } regionalProviderID := false @@ -77,7 +87,7 @@ func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) { return &InstancesV2{ compute: compute, network: network, - region: os.epOpts.Region, + regions: os.regions, regionProviderID: regionalProviderID, networkingOpts: os.networkingOpts, }, true @@ -85,9 +95,15 @@ func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) { // InstanceExists indicates whether a given node exists according to the cloud provider func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) { - _, err := i.getInstance(ctx, node) + klog.V(4).InfoS("openstack.InstanceExists() called", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) + + _, _, err := i.getInstance(ctx, node) if err == cloudprovider.InstanceNotFound { - klog.V(6).Infof("instance not found for node: %s", node.Name) + klog.V(6).InfoS("Node is not found in cloud provider", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) return false, nil } @@ -100,7 +116,11 @@ func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, // InstanceShutdown returns true if the instance is shutdown according to the cloud provider. func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) { - server, err := i.getInstance(ctx, node) + klog.V(4).InfoS("openstack.InstanceShutdown() called", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) + + server, _, err := i.getInstance(ctx, node) if err != nil { return false, err } @@ -115,7 +135,11 @@ func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool // InstanceMetadata returns the instance's metadata. func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) { - srv, err := i.getInstance(ctx, node) + klog.V(4).InfoS("openstack.InstanceMetadata() called", "node", klog.KObj(node), + "providerID", node.Spec.ProviderID, + "region", node.Labels[v1.LabelTopologyRegion]) + + srv, region, err := i.getInstance(ctx, node) if err != nil { return nil, err } @@ -124,17 +148,17 @@ func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*clo server = *srv } - instanceType, err := srvInstanceType(ctx, i.compute, &server) + instanceType, err := srvInstanceType(ctx, i.compute[region], &server) if err != nil { return nil, err } - ports, err := getAttachedPorts(ctx, i.network, server.ID) + ports, err := getAttachedPorts(ctx, i.network[region], server.ID) if err != nil { return nil, err } - addresses, err := nodeAddresses(ctx, &server, ports, i.network, i.networkingOpts) + addresses, err := nodeAddresses(ctx, &server, ports, i.network[region], i.networkingOpts) if err != nil { return nil, err } @@ -142,44 +166,122 @@ func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*clo availabilityZone := util.SanitizeLabel(server.AvailabilityZone) return &cloudprovider.InstanceMetadata{ - ProviderID: i.makeInstanceID(&server), + ProviderID: i.makeInstanceID(&server, region), InstanceType: instanceType, NodeAddresses: addresses, Zone: availabilityZone, - Region: i.region, + Region: region, }, nil } -func (i *InstancesV2) makeInstanceID(srv *servers.Server) string { +func (i *InstancesV2) makeInstanceID(srv *servers.Server, region string) string { if i.regionProviderID { - return fmt.Sprintf("%s://%s/%s", ProviderName, i.region, srv.ID) + return fmt.Sprintf("%s://%s/%s", ProviderName, region, srv.ID) } return fmt.Sprintf("%s:///%s", ProviderName, srv.ID) } -func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, error) { - if node.Spec.ProviderID == "" { - return getServerByName(ctx, i.compute, node.Name) +func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, string, error) { + var instanceID, instanceRegion string + + if node.Spec.ProviderID != "" { + var err error + + instanceID, instanceRegion, err = instanceIDFromProviderID(node.Spec.ProviderID) + if err != nil { + return nil, "", err + } } - instanceID, instanceRegion, err := instanceIDFromProviderID(node.Spec.ProviderID) - if err != nil { - return nil, err + if instanceRegion != "" { + if slices.Contains(i.regions, instanceRegion) { + return i.getInstanceByID(ctx, instanceID, []string{instanceRegion}) + } + + return nil, "", fmt.Errorf("getInstance: ProviderID \"%s\" didn't match supported regions \"%s\"", node.Spec.ProviderID, strings.Join(i.regions, ",")) + } + + // At this point we know that ProviderID is not properly set or it doesn't contain region information + // We need to search for the instance in all regions + var searchRegions []string + + // We cannot trust the region label, so we need to check the region + instanceRegion = node.Labels[v1.LabelTopologyRegion] + if slices.Contains(i.regions, instanceRegion) { + searchRegions = []string{instanceRegion} } - if instanceRegion != "" && instanceRegion != i.region { - return nil, fmt.Errorf("ProviderID \"%s\" didn't match supported region \"%s\"", node.Spec.ProviderID, i.region) + for _, r := range i.regions { + if r != instanceRegion { + searchRegions = append(searchRegions, r) + } } + klog.V(4).InfoS("openstack.getInstance() trying to find the instance in regions", "node", klog.KObj(node), + "instanceID", instanceID, + "regions", strings.Join(searchRegions, ",")) + + if instanceID == "" { + return i.getInstanceByName(ctx, node.Name, searchRegions) + } + + return i.getInstanceByID(ctx, instanceID, searchRegions) +} + +func (i *InstancesV2) getInstanceByID(ctx context.Context, instanceID string, searchRegions []string) (*servers.Server, string, error) { + server := servers.Server{} + mc := metrics.NewMetricContext("server", "get") - server, err := servers.Get(ctx, i.compute, instanceID).Extract() - if mc.ObserveRequest(err) != nil { - if errors.IsNotFound(err) { - return nil, cloudprovider.InstanceNotFound + for _, r := range searchRegions { + err := servers.Get(ctx, i.compute[r], instanceID).ExtractInto(&server) + if mc.ObserveRequest(err) != nil { + if errors.IsNotFound(err) { + continue + } + + return nil, "", err } - return nil, err + + return &server, r, nil } - return server, nil + + return nil, "", cloudprovider.InstanceNotFound +} + +func (i *InstancesV2) getInstanceByName(ctx context.Context, name string, searchRegions []string) (*servers.Server, string, error) { + opts := servers.ListOpts{ + Name: fmt.Sprintf("^%s$", regexp.QuoteMeta(name)), + } + + serverList := make([]servers.Server, 0, 1) + mc := metrics.NewMetricContext("server", "list") + + for _, r := range searchRegions { + pager := servers.List(i.compute[r], opts) + + err := pager.EachPage(ctx, func(_ context.Context, page pagination.Page) (bool, error) { + s, err := servers.ExtractServers(page) + if err != nil { + return false, err + } + serverList = append(serverList, s...) + if len(serverList) > 1 { + return false, errors.ErrMultipleResults + } + return true, nil + }) + if mc.ObserveRequest(err) != nil { + return nil, "", err + } + + if len(serverList) == 0 { + continue + } + + return &serverList[0], r, nil + } + + return nil, "", cloudprovider.InstanceNotFound } func getServerByName(ctx context.Context, client *gophercloud.ServiceClient, name string) (*servers.Server, error) { diff --git a/pkg/openstack/openstack.go b/pkg/openstack/openstack.go index bbd94cfda6..a251c612c8 100644 --- a/pkg/openstack/openstack.go +++ b/pkg/openstack/openstack.go @@ -114,7 +114,7 @@ type LoadBalancerOpts struct { MonitorMaxRetriesDown uint `gcfg:"monitor-max-retries-down"` ManageSecurityGroups bool `gcfg:"manage-security-groups"` InternalLB bool `gcfg:"internal-lb"` // default false - NodeSelector string `gcfg:"node-selector"` // If specified, the loadbalancer members will be assined only from nodes list filtered by node-selector labels + NodeSelector string `gcfg:"node-selector"` // If specified, the loadbalancer members will be assigned only from nodes list filtered by node-selector labels CascadeDelete bool `gcfg:"cascade-delete"` FlavorID string `gcfg:"flavor-id"` AvailabilityZone string `gcfg:"availability-zone"` @@ -154,6 +154,7 @@ type RouterOpts struct { // OpenStack is an implementation of cloud provider Interface for OpenStack. type OpenStack struct { + regions []string provider *gophercloud.ProviderClient epOpts *gophercloud.EndpointOpts lbOpts LoadBalancerOpts @@ -250,6 +251,11 @@ func ReadConfig(config io.Reader) (Config, error) { klog.V(5).Infof("Config, loaded from the %s:", cfg.Global.CloudsFile) client.LogCfg(cfg.Global) } + + if len(cfg.Global.Regions) == 0 { + cfg.Global.Regions = []string{cfg.Global.Region} + } + // Set the default values for search order if not set if cfg.Metadata.SearchOrder == "" { cfg.Metadata.SearchOrder = fmt.Sprintf("%s,%s", metadata.ConfigDriveID, metadata.MetadataID) @@ -296,6 +302,7 @@ func NewOpenStack(cfg Config) (*OpenStack, error) { provider.HTTPClient.Timeout = cfg.Metadata.RequestTimeout.Duration os := OpenStack{ + regions: cfg.Global.Regions, provider: provider, epOpts: &gophercloud.EndpointOpts{ Region: cfg.Global.Region,