From 383a51a3e349700d9eb7095ac9f952e074fb55b2 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Wed, 11 Mar 2026 19:34:49 +0000 Subject: [PATCH 01/17] try using address pointers Signed-off-by: Alex Leong --- .../endpoint_profile_translator.go | 4 +-- .../api/destination/endpoint_translator.go | 18 +++++------ .../destination/endpoint_translator_test.go | 14 ++++----- .../destination/watcher/endpoints_watcher.go | 30 +++++++++---------- .../watcher/endpoints_watcher_test.go | 4 +-- 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/controller/api/destination/endpoint_profile_translator.go b/controller/api/destination/endpoint_profile_translator.go index f788649dfd0a4..790af2324232d 100644 --- a/controller/api/destination/endpoint_profile_translator.go +++ b/controller/api/destination/endpoint_profile_translator.go @@ -130,7 +130,7 @@ func (ept *endpointProfileTranslator) update(address *watcher.Address) { } else { opaquePorts = watcher.GetAnnotatedOpaquePortsForExternalWorkload(address.ExternalWorkload, ept.defaultOpaquePorts) } - endpoint, err := ept.createEndpoint(*address, opaquePorts) + endpoint, err := ept.createEndpoint(address, opaquePorts) if err != nil { ept.log.Errorf("Failed to create endpoint for %s:%d: %s", address.IP, address.Port, err) @@ -158,7 +158,7 @@ func (ept *endpointProfileTranslator) update(address *watcher.Address) { ept.current = profile } -func (ept *endpointProfileTranslator) createEndpoint(address watcher.Address, opaquePorts map[uint32]struct{}) (*pb.WeightedAddr, error) { +func (ept *endpointProfileTranslator) createEndpoint(address *watcher.Address, opaquePorts map[uint32]struct{}) (*pb.WeightedAddr, error) { var weightedAddr *pb.WeightedAddr var err error if address.ExternalWorkload != nil { diff --git a/controller/api/destination/endpoint_translator.go b/controller/api/destination/endpoint_translator.go index 7fd292574cfd5..079678f36fc97 100644 --- a/controller/api/destination/endpoint_translator.go +++ b/controller/api/destination/endpoint_translator.go @@ -244,7 +244,7 @@ func (et *endpointTranslator) remove(set watcher.AddressSet) { func (et *endpointTranslator) noEndpoints(exists bool) { et.log.Debugf("NoEndpoints(%+v)", exists) - et.availableEndpoints.Addresses = map[watcher.ID]watcher.Address{} + et.availableEndpoints.Addresses = map[watcher.ID]*watcher.Address{} et.sendFilteredUpdate() } @@ -265,7 +265,7 @@ func (et *endpointTranslator) sendFilteredUpdate() { } func (et *endpointTranslator) selectAddressFamily(addresses watcher.AddressSet) watcher.AddressSet { - filtered := make(map[watcher.ID]watcher.Address) + filtered := make(map[watcher.ID]*watcher.Address) for id, addr := range addresses.Addresses { if id.IPFamily == corev1.IPv6Protocol && !et.enableIPv6 { continue @@ -298,7 +298,7 @@ func (et *endpointTranslator) selectAddressFamily(addresses watcher.AddressSet) // when service.spec.internalTrafficPolicy is set to local, Topology Aware // Hints are not used. func (et *endpointTranslator) filterAddresses() watcher.AddressSet { - filtered := make(map[watcher.ID]watcher.Address) + filtered := make(map[watcher.ID]*watcher.Address) // If endpoint filtering is disabled, return all available addresses. if !et.enableEndpointFiltering { @@ -381,8 +381,8 @@ func (et *endpointTranslator) filterAddresses() watcher.AddressSet { // the endpoints that match the topological zone, by adding new endpoints and // removing stale ones. func (et *endpointTranslator) diffEndpoints(filtered watcher.AddressSet) (watcher.AddressSet, watcher.AddressSet) { - add := make(map[watcher.ID]watcher.Address) - remove := make(map[watcher.ID]watcher.Address) + add := make(map[watcher.ID]*watcher.Address) + remove := make(map[watcher.ID]*watcher.Address) for id, new := range filtered.Addresses { old, ok := et.filteredSnapshot.Addresses[id] @@ -529,7 +529,7 @@ func (et *endpointTranslator) sendClientRemove(set watcher.AddressSet) { } } -func toAddr(address watcher.Address) (*net.TcpAddress, error) { +func toAddr(address *watcher.Address) (*net.TcpAddress, error) { ip, err := addr.ParseProxyIP(address.IP) if err != nil { return nil, err @@ -541,7 +541,7 @@ func toAddr(address watcher.Address) (*net.TcpAddress, error) { } func createWeightedAddrForExternalWorkload( - address watcher.Address, + address *watcher.Address, forceOpaqueTransport bool, opaquePorts map[uint32]struct{}, http2 *pb.Http2ClientParams, @@ -612,7 +612,7 @@ func createWeightedAddrForExternalWorkload( } func createWeightedAddr( - address watcher.Address, + address *watcher.Address, opaquePorts map[uint32]struct{}, forceOpaqueTransport bool, enableH2Upgrade bool, @@ -726,7 +726,7 @@ func getNodeTopologyZone(k8sAPI *k8s.MetadataAPI, srcNode string) (string, error func newEmptyAddressSet() watcher.AddressSet { return watcher.AddressSet{ - Addresses: make(map[watcher.ID]watcher.Address), + Addresses: make(map[watcher.ID]*watcher.Address), Labels: make(map[string]string), } } diff --git a/controller/api/destination/endpoint_translator_test.go b/controller/api/destination/endpoint_translator_test.go index 43fe66fbbe21f..c67ab2c2ec313 100644 --- a/controller/api/destination/endpoint_translator_test.go +++ b/controller/api/destination/endpoint_translator_test.go @@ -1166,7 +1166,7 @@ func TestEndpointTranslatorForLocalTrafficPolicy(t *testing.T) { addressSet.LocalTrafficPolicy = true translator.Add(addressSet) set := watcher.AddressSet{ - Addresses: make(map[watcher.ServiceID]watcher.Address), + Addresses: make(map[watcher.ServiceID]*watcher.Address), Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, LocalTrafficPolicy: false, } @@ -1238,7 +1238,7 @@ func TestGetInboundPort(t *testing.T) { func mkAddressSetForServices(gatewayAddresses ...watcher.Address) watcher.AddressSet { set := watcher.AddressSet{ - Addresses: make(map[watcher.ServiceID]watcher.Address), + Addresses: make(map[watcher.ServiceID]*watcher.Address), Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, } for _, a := range gatewayAddresses { @@ -1250,7 +1250,7 @@ func mkAddressSetForServices(gatewayAddresses ...watcher.Address) watcher.Addres fmt.Sprint(a.Port), }, "-"), } - set.Addresses[id] = a + set.Addresses[id] = &a } return set } @@ -1259,7 +1259,7 @@ func mkAddressSetForPods(t *testing.T, podAddresses ...watcher.Address) watcher. t.Helper() set := watcher.AddressSet{ - Addresses: make(map[watcher.PodID]watcher.Address), + Addresses: make(map[watcher.PodID]*watcher.Address), Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, } for _, p := range podAddresses { @@ -1279,19 +1279,19 @@ func mkAddressSetForPods(t *testing.T, podAddresses ...watcher.Address) watcher. Namespace: p.Pod.Namespace, IPFamily: fam, } - set.Addresses[id] = p + set.Addresses[id] = &p } return set } func mkAddressSetForExternalWorkloads(ewAddresses ...watcher.Address) watcher.AddressSet { set := watcher.AddressSet{ - Addresses: make(map[watcher.PodID]watcher.Address), + Addresses: make(map[watcher.PodID]*watcher.Address), Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, } for _, ew := range ewAddresses { id := watcher.ExternalWorkloadID{Name: ew.ExternalWorkload.Name, Namespace: ew.ExternalWorkload.Namespace} - set.Addresses[id] = ew + set.Addresses[id] = &ew } return set } diff --git a/controller/api/destination/watcher/endpoints_watcher.go b/controller/api/destination/watcher/endpoints_watcher.go index 394970cc76ec9..415e883d902ca 100644 --- a/controller/api/destination/watcher/endpoints_watcher.go +++ b/controller/api/destination/watcher/endpoints_watcher.go @@ -67,7 +67,7 @@ type ( // id.IPFamily refers to the ES AddressType (see newPodRefAddress). // 3) A reference to an ExternalWorkload: id.Name refers to the EW's name. AddressSet struct { - Addresses map[ID]Address + Addresses map[ID]*Address Labels map[string]string LocalTrafficPolicy bool } @@ -164,7 +164,7 @@ var undefinedEndpointPort = Port(0) // ExternalWorkload fields of the Addresses map values still point to the // locations of the original variable func (addr AddressSet) shallowCopy() AddressSet { - addresses := make(map[ID]Address) + addresses := make(map[ID]*Address) for k, v := range addr.Addresses { addresses[k] = v } @@ -829,7 +829,7 @@ func (pp *portPublisher) addEndpointSlice(slice *discovery.EndpointSlice) { func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, newSlice *discovery.EndpointSlice) { updatedAddressSet := AddressSet{ - Addresses: make(map[ID]Address), + Addresses: make(map[ID]*Address), Labels: pp.addresses.Labels, LocalTrafficPolicy: pp.localTrafficPolicy, } @@ -905,7 +905,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A if resolvedPort == undefinedEndpointPort { return AddressSet{ Labels: metricLabels(es), - Addresses: make(map[ID]Address), + Addresses: make(map[ID]*Address), LocalTrafficPolicy: pp.localTrafficPolicy, } } @@ -915,7 +915,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A pp.log.Errorf("Could not fetch resource service name:%v", err) } - addresses := make(map[ID]Address) + addresses := make(map[ID]*Address) for _, endpoint := range es.Endpoints { if endpoint.Hostname != nil { if pp.hostname != "" && pp.hostname != *endpoint.Hostname { @@ -942,7 +942,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A copy(zones, endpoint.Hints.ForZones) address.ForZones = zones } - addresses[id] = address + addresses[id] = &address } continue } @@ -971,7 +971,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A copy(zones, endpoint.Hints.ForZones) address.ForZones = zones } - addresses[id] = address + addresses[id] = &address } } @@ -996,7 +996,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A address.ForZones = zones } - addresses[id] = address + addresses[id] = &address } } @@ -1065,7 +1065,7 @@ func (pp *portPublisher) endpointSliceToIDs(es *discovery.EndpointSlice) []ID { } func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) AddressSet { - addresses := make(map[ID]Address) + addresses := make(map[ID]*Address) for _, subset := range endpoints.Subsets { resolvedPort := pp.resolveTargetPort(subset) if resolvedPort == undefinedEndpointPort { @@ -1086,7 +1086,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, endpoints.Name, endpoints.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride - addresses[id] = address + addresses[id] = &address continue } @@ -1106,7 +1106,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre if err != nil { pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) } - addresses[id] = address + addresses[id] = &address } } } @@ -1341,7 +1341,7 @@ func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { } } -func (pp *portPublisher) isAddressSelected(address Address, server *v1beta3.Server) bool { +func (pp *portPublisher) isAddressSelected(address *Address, server *v1beta3.Server) bool { if server == nil { return false } @@ -1427,7 +1427,7 @@ func getTargetPort(service *corev1.Service, port Port) namedPort { return targetPort } -func addressChanged(oldAddress Address, newAddress Address) bool { +func addressChanged(oldAddress *Address, newAddress *Address) bool { if oldAddress.Identity != newAddress.Identity { // in this case the identity could have changed; this can happen when for @@ -1469,8 +1469,8 @@ func diffAddresses(oldAddresses, newAddresses AddressSet) (add, remove AddressSe // TODO: this detects pods which have been added or removed, but does not // detect addresses which have been modified. A modified address should trigger // an add of the new version. - addAddresses := make(map[ID]Address) - removeAddresses := make(map[ID]Address) + addAddresses := make(map[ID]*Address) + removeAddresses := make(map[ID]*Address) for id, newAddress := range newAddresses.Addresses { if oldAddress, ok := oldAddresses.Addresses[id]; ok { if addressChanged(oldAddress, newAddress) { diff --git a/controller/api/destination/watcher/endpoints_watcher_test.go b/controller/api/destination/watcher/endpoints_watcher_test.go index 3c1c20ae1349d..26db1b496b0b1 100644 --- a/controller/api/destination/watcher/endpoints_watcher_test.go +++ b/controller/api/destination/watcher/endpoints_watcher_test.go @@ -36,7 +36,7 @@ func newBufferingEndpointListener() *bufferingEndpointListener { } } -func addressString(address Address) string { +func addressString(address *Address) string { addressString := fmt.Sprintf("%s:%d", address.IP, address.Port) if address.Identity != "" { addressString = fmt.Sprintf("%s/%s", addressString, address.Identity) @@ -114,7 +114,7 @@ func newBufferingEndpointListenerWithResVersion() *bufferingEndpointListenerWith } } -func addressStringWithResVersion(address Address) string { +func addressStringWithResVersion(address *Address) string { return fmt.Sprintf("%s:%d:%s", address.IP, address.Port, address.Pod.ResourceVersion) } From 695262dc3fe288cafb3401d96eec058f7ce6d33d Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Thu, 12 Mar 2026 22:26:28 +0000 Subject: [PATCH 02/17] Load controller Signed-off-by: Alex Leong --- Cargo.lock | 215 ++++++++-- Cargo.toml | 7 +- Dockerfile.load-controller | 24 ++ test/destination-test/Dockerfile | 24 ++ test/destination-test/README.md | 247 ++++++++++++ test/destination-test/chart/Chart.yaml | 12 + test/destination-test/chart/README.md | 127 ++++++ .../chart/templates/_helpers.tpl | 49 +++ .../chart/templates/churn.yaml | 71 ++++ .../chart/templates/client.yaml | 134 +++++++ .../chart/templates/rbac.yaml | 52 +++ .../chart/templates/test-services.yaml | 64 +++ test/destination-test/chart/values.yaml | 126 ++++++ .../dst-load-controller/Cargo.toml | 36 ++ .../dst-load-controller/src/churn.rs | 227 +++++++++++ .../dst-load-controller/src/client.rs | 375 ++++++++++++++++++ .../dst-load-controller/src/main.rs | 281 +++++++++++++ test/destination-test/hack/gen-certs.sh | 81 ++++ test/destination-test/hack/kwok-node.yaml | 42 ++ test/destination-test/helmfile.yaml.gotmpl | 118 ++++++ test/destination-test/values/README.md | 153 +++++++ .../values/kube-prometheus-stack.yaml | 106 +++++ .../values/linkerd-monitoring.yaml | 17 + 23 files changed, 2559 insertions(+), 29 deletions(-) create mode 100644 Dockerfile.load-controller create mode 100644 test/destination-test/Dockerfile create mode 100644 test/destination-test/README.md create mode 100644 test/destination-test/chart/Chart.yaml create mode 100644 test/destination-test/chart/README.md create mode 100644 test/destination-test/chart/templates/_helpers.tpl create mode 100644 test/destination-test/chart/templates/churn.yaml create mode 100644 test/destination-test/chart/templates/client.yaml create mode 100644 test/destination-test/chart/templates/rbac.yaml create mode 100644 test/destination-test/chart/templates/test-services.yaml create mode 100644 test/destination-test/chart/values.yaml create mode 100644 test/destination-test/dst-load-controller/Cargo.toml create mode 100644 test/destination-test/dst-load-controller/src/churn.rs create mode 100644 test/destination-test/dst-load-controller/src/client.rs create mode 100644 test/destination-test/dst-load-controller/src/main.rs create mode 100755 test/destination-test/hack/gen-certs.sh create mode 100644 test/destination-test/hack/kwok-node.yaml create mode 100644 test/destination-test/helmfile.yaml.gotmpl create mode 100644 test/destination-test/values/README.md create mode 100644 test/destination-test/values/kube-prometheus-stack.yaml create mode 100644 test/destination-test/values/linkerd-monitoring.yaml diff --git a/Cargo.lock b/Cargo.lock index d6cb86760855f..36e9bcfeebb89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,12 +39,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.0", +] + [[package]] name = "anyhow" version = "1.0.102" @@ -274,8 +318,10 @@ version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", ] [[package]] @@ -305,6 +351,12 @@ dependencies = [ "cc", ] +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -466,6 +518,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "dst-load-controller" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "futures", + "k8s-openapi", + "kube", + "kubert", + "linkerd2-proxy-api", + "prometheus-client", + "rand 0.8.5", + "serde", + "serde_json", + "tokio", + "tonic", + "tracing", + "tracing-subscriber", +] + [[package]] name = "dtoa" version = "1.0.10" @@ -621,6 +694,7 @@ checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", + "futures-executor", "futures-io", "futures-sink", "futures-task", @@ -643,6 +717,17 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.32" @@ -991,7 +1076,7 @@ dependencies = [ "hyper", "libc", "pin-project-lite", - "socket2", + "socket2 0.6.2", "tokio", "tower-service", "tracing", @@ -1055,6 +1140,12 @@ dependencies = [ "serde", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -1385,7 +1476,7 @@ dependencies = [ "linkerd2-proxy-api", "maplit", "prometheus-client", - "prost-types", + "prost-types 0.14.3", "serde", "serde_json", "tokio", @@ -1523,17 +1614,16 @@ dependencies = [ [[package]] name = "linkerd2-proxy-api" -version = "0.18.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9e3b341ca4992feaf43a4d2bdbfe2081aa3e2b9a503753544ce55242af6342" +checksum = "bb83fdbbcea49285182d75aacc20ced8ebce60030be1d72d87b00f58f07d267d" dependencies = [ "http", "ipnet", - "prost", - "prost-types", + "prost 0.13.5", + "prost-types 0.13.5", "thiserror 2.0.18", "tonic", - "tonic-prost", ] [[package]] @@ -1625,6 +1715,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "openssl" version = "0.10.75" @@ -1877,6 +1973,16 @@ dependencies = [ "syn", ] +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive 0.13.5", +] + [[package]] name = "prost" version = "0.14.3" @@ -1884,7 +1990,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.14.3", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1900,13 +2019,22 @@ dependencies = [ "syn", ] +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost 0.13.5", +] + [[package]] name = "prost-types" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ - "prost", + "prost 0.14.3", ] [[package]] @@ -1924,13 +2052,24 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha", + "rand_chacha 0.9.0", "rand_core 0.9.3", ] @@ -1945,6 +2084,16 @@ dependencies = [ "rand_core 0.10.0", ] +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + [[package]] name = "rand_chacha" version = "0.9.0" @@ -1955,6 +2104,15 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.16", +] + [[package]] name = "rand_core" version = "0.9.3" @@ -2353,6 +2511,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.2" @@ -2453,7 +2621,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.2", "tokio-macros", "windows-sys 0.61.0", ] @@ -2542,9 +2710,9 @@ dependencies = [ [[package]] name = "tonic" -version = "0.14.5" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" +checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" dependencies = [ "async-trait", "axum", @@ -2559,8 +2727,8 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "socket2", - "sync_wrapper", + "prost 0.13.5", + "socket2 0.5.10", "tokio", "tokio-stream", "tower", @@ -2569,17 +2737,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "tonic-prost" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" -dependencies = [ - "bytes", - "prost", - "tonic", -] - [[package]] name = "tower" version = "0.5.3" @@ -2769,6 +2926,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "valuable" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index f54cee5f841ae..21e5b3fcf4909 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "policy-controller/k8s/status", "policy-controller/runtime", "policy-test", + "test/destination-test/dst-load-controller", ] [profile.release] @@ -22,7 +23,7 @@ k8s-openapi = { version = "0.25", features = ["v1_33"] } kube = { version = "1.1", default-features = false } kubert = { version = "0.25", default-features = false } prometheus-client = { version = "0.23", default-features = false } -tonic = { version = "0.14", default-features = false } +tonic = { version = "0.13", default-features = false } tower = { version = "0.5", default-features = false } linkerd-policy-controller = { path = "./policy-controller" } @@ -47,5 +48,5 @@ path = "./policy-controller/runtime" default-features = false [workspace.dependencies.linkerd2-proxy-api] -version = "0.18.0" -features = ["inbound", "outbound"] +version = "0.17.0" +features = ["destination", "inbound", "outbound"] diff --git a/Dockerfile.load-controller b/Dockerfile.load-controller new file mode 100644 index 0000000000000..9df405578f15b --- /dev/null +++ b/Dockerfile.load-controller @@ -0,0 +1,24 @@ +# Build stage +FROM docker.io/rust:1.90-bookworm AS build + +WORKDIR /build + +RUN mkdir -p target/bin +COPY Cargo.toml Cargo.lock . +COPY policy-controller ./policy-controller +COPY policy-test ./policy-test +COPY test ./test + +# Build the binary +RUN cargo build --release --bin dst-load-controller + +# Runtime stage +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /build/target/release/dst-load-controller /usr/local/bin/dst-load-controller + +ENTRYPOINT ["/usr/local/bin/dst-load-controller"] diff --git a/test/destination-test/Dockerfile b/test/destination-test/Dockerfile new file mode 100644 index 0000000000000..f5f6ddf4e499b --- /dev/null +++ b/test/destination-test/Dockerfile @@ -0,0 +1,24 @@ +# Build stage +FROM docker.io/rust:1.90-bookworm AS build + +WORKDIR /build + +# Copy workspace manifest +COPY Cargo.toml ./ + +# Copy binary crate +COPY dst-load-controller ./dst-load-controller + +# Build the binary +RUN cargo build --release --bin dst-load-controller + +# Runtime stage +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /build/target/release/dst-load-controller /usr/local/bin/dst-load-controller + +ENTRYPOINT ["/usr/local/bin/dst-load-controller"] diff --git a/test/destination-test/README.md b/test/destination-test/README.md new file mode 100644 index 0000000000000..fdc7586d03bbf --- /dev/null +++ b/test/destination-test/README.md @@ -0,0 +1,247 @@ +# Destination Service Load Testing + +This directory contains load testing infrastructure for Linkerd's destination service. + +## Overview + +The `dst-load-controller` is a Rust binary with two subcommands: + +- **`churn`**: Creates and manages Services/Deployments in a target cluster +- **`client`**: Creates gRPC clients that subscribe to the Destination service + +## Architecture + +The load testing framework uses: + +- **Helmfile** for declarative cluster configuration (production-aligned) +- **KWOK** for creating fake Pods/Nodes without actual containers +- **Linkerd** installed via Helm (matching customer deployments) +- **step-cli** for generating shared trust roots (multicluster identity) +- **kube-prometheus-stack** (optional) for metrics collection + +## Prerequisites + +1. **Tools** (all available in dev container): + - `k3d` - local Kubernetes clusters + - `helm` - Kubernetes package manager + - `helmfile` - declarative Helm release management + - `step` - certificate generation + - `kubectl` - Kubernetes CLI + - `linkerd` - Linkerd CLI (for linking clusters) + - Rust toolchain - for building `dst-load-controller` + +2. **Container registry access**: + - Images will be built locally and loaded into k3d clusters + +## Quick Start + +### 1. Generate Shared Trust Root + +```bash +# Generate certificates for Linkerd identity +./hack/gen-certs.sh +export LINKERD_CA_DIR=/tmp/linkerd-ca +``` + +This creates: + +- `ca.crt` - Trust anchor +- `issuer.crt` - Issuer certificate +- `issuer.key` - Issuer private key + +### 2. Create k3d Cluster + +```bash +# Create test cluster +k3d cluster create test \ + --no-lb \ + --k3s-arg '--disable=local-storage,traefik,servicelb,metrics-server@server:*' +``` + +### 3. Deploy Infrastructure (Linkerd + KWOK) + +```bash +# Install all dependencies via Helmfile +LINKERD_CA_DIR=/tmp/linkerd-ca helmfile sync + +# Create a fake KWOK node for pod scheduling +kubectl apply -f hack/kwok-node.yaml + +# Wait for Linkerd to be ready +linkerd check +``` + +### 4. Optional: Install Monitoring + +```bash +# Install kube-prometheus-stack +LINKERD_CA_DIR=/tmp/linkerd-ca helmfile --state-values-set monitoring.enabled=true apply +``` + +### 5. Build and Deploy Load Controllers + +```bash +# Build Docker image +docker build -t dst-load-controller:latest -f Dockerfile . + +# Load into k3d cluster +k3d image import dst-load-controller:latest --cluster k3s-default + +# Deploy via Helm (churn controller only) +helm install dst-load chart/ -n dst-test + +# Or with custom configuration +helm install dst-load chart/ -n dst-test \ + --set churn.stable.services=20 \ + --set churn.stable.endpoints=50 \ + --set churn.oscillate.services=5 + +# Monitor churn metrics +kubectl port-forward -n dst-test pod/dst-load-dst-load-test-churn 8080:8080 & +curl localhost:8080/metrics | grep churn_ +``` + +### 6. Verify Load Test + +```bash +# Check services created by churn controller +kubectl get svc,deploy,pods -n dst-test + +# Verify endpoints are visible to Linkerd +linkerd diagnostics endpoints stable-svc-0.dst-test.svc.cluster.local:8080 +``` + +k3d image import dst-load-controller:latest --cluster test + +# Deploy controllers + +# TODO: Helm chart for deploying controllers + +``` + +## Running Load Tests + +### Scenario 1: Baseline (Stable Observation Load) + +```bash +# Create 10 stable services with 100 endpoints each +dst-load-controller churn \ + --namespace=dst-load-test \ + --stable-services=10 \ + --stable-endpoints=100 + +# In another terminal: 100 clients watching all services +dst-load-controller client \ + --destination-addr=linkerd-dst.linkerd:8086 \ + --target-services=$(for i in {0..9}; do echo "stable-svc-$i.dst-load-test.svc.cluster.local:8080"; done | paste -sd,) +``` + +### Scenario 2: Small Oscillation (Autoscaler Pattern) + +```bash +# 10 services oscillating 10→200→10 endpoints +dst-load-controller churn \ + --namespace=dst-load-test \ + --oscillate-services=10 \ + --oscillate-min-endpoints=10 \ + --oscillate-max-endpoints=200 \ + --oscillate-hold-duration=2m \ + --oscillate-jitter-percent=5 + +# In another terminal: 100 clients watching oscillating services +dst-load-controller client \ + --destination-addr=linkerd-dst.linkerd:8086 \ + --target-services=$(for i in {0..9}; do echo "oscillate-svc-$i.dst-load-test.svc.cluster.local:8080"; done | paste -sd,) +``` + +## Observability + +### Prometheus Metrics + +Access Prometheus: + +```bash +kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090 +# Open http://localhost:9090 +``` + +Key metrics to monitor: + +```promql +# Destination controller +destination_endpoint_views_active +rate(destination_stream_send_timeouts_total[5m]) +container_memory_working_set_bytes{pod=~"linkerd-destination-.*"} + +# Load controller metrics +churn_services_created_total +churn_scale_events_total +churn_current_replicas +``` + +### Grafana Dashboards + +Access Grafana: + +```bash +kubectl port-forward -n monitoring svc/kube-prometheus-stack-grafana 3000:80 +# Open http://localhost:3000 (admin/admin) +``` + +## Development + +### Building the Binary + +```bash +cargo build # Debug build +cargo build --release # Release build +cargo run -- churn --help # Test CLI +``` + +### Testing Certificate Generation + +```bash +./hack/gen-certs.sh /tmp/test-ca +step certificate inspect /tmp/test-ca/ca.crt +step certificate inspect /tmp/test-ca/issuer.crt +``` + +### Cleanup + +```bash +# Uninstall everything +helmfile destroy + +# Delete cluster +k3d cluster delete test + +# Clean certificates +rm -rf /tmp/linkerd-ca +``` + +## Project Structure + +``` +test/destination-test/ +├── Cargo.toml # Rust workspace +├── Dockerfile # Container build +├── helmfile.yaml # Infrastructure as code +├── README.md # This file +├── dst-load-controller/ # Binary crate +│ ├── Cargo.toml +│ └── src/ +│ ├── main.rs # CLI + orchestration +│ └── churn.rs # Service/Deployment churn logic +├── hack/ +│ └── gen-certs.sh # Certificate generation (step) +└── values/ + ├── kube-prometheus-stack.yaml + ├── linkerd-multicluster-source.yaml + └── linkerd-multicluster-target.yaml +``` + +## See Also + +- [LOAD_TEST_PLAN.md](../../controller/api/destination/LOAD_TEST_PLAN.md) - Full test scenarios and architecture +- [Linkerd Helm docs](https://linkerd.io/2/tasks/install-helm/) - Production Helm installation guide +- [KWOK documentation](https://kwok.sigs.k8s.io/) - Fake node/pod simulation diff --git a/test/destination-test/chart/Chart.yaml b/test/destination-test/chart/Chart.yaml new file mode 100644 index 0000000000000..58d84f4022e29 --- /dev/null +++ b/test/destination-test/chart/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: dst-load-test +description: Destination service load testing controllers (churn + client) +type: application +version: 0.1.0 +appVersion: "0.1.0" +keywords: + - linkerd + - load-testing + - destination +maintainers: + - name: Linkerd Authors diff --git a/test/destination-test/chart/README.md b/test/destination-test/chart/README.md new file mode 100644 index 0000000000000..aed2f0978df63 --- /dev/null +++ b/test/destination-test/chart/README.md @@ -0,0 +1,127 @@ +# dst-load-test Helm Chart + +This Helm chart deploys load testing controllers for Linkerd's destination service. + +## Components + +- **Churn Controller**: Creates and manages Services/Deployments to simulate endpoint churn +- **Client Controller**: Creates gRPC clients that subscribe to the Destination service (coming soon) + +## Prerequisites + +- Kubernetes cluster (local k3d recommended) +- Linkerd control plane installed +- KWOK for fake pod simulation +- Docker image built and imported into cluster + +## Building and Loading Image + +```bash +# Build Docker image +docker build -t dst-load-controller:latest -f Dockerfile . + +# Load into k3d cluster +k3d image import dst-load-controller:latest --cluster k3s-default +``` + +## Installation + +```bash +# Create test namespace first +kubectl create namespace dst-test + +# Install with default values (10 stable services, 100 endpoints each) +helm install dst-load chart/ -n dst-test + +# Install with custom churn configuration +helm install dst-load chart/ -n dst-test \ + --set churn.stable.services=20 \ + --set churn.stable.endpoints=50 \ + --set churn.oscillate.services=5 \ + --set churn.oscillate.minEndpoints=10 \ + --set churn.oscillate.maxEndpoints=200 +``` + +## Configuration + +### Churn Controller + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `churn.enabled` | Enable churn controller | `true` | +| `churn.stable.services` | Number of stable services | `10` | +| `churn.stable.endpoints` | Endpoints per stable service | `100` | +| `churn.oscillate.services` | Number of oscillating services | `5` | +| `churn.oscillate.minEndpoints` | Minimum endpoints for oscillation | `10` | +| `churn.oscillate.maxEndpoints` | Maximum endpoints for oscillation | `200` | +| `churn.oscillate.holdDuration` | Time to hold at min/max | `"30s"` | +| `churn.oscillate.jitterPercent` | Jitter percentage | `10` | +| `churn.metricsPort` | Prometheus metrics port | `8080` | + +### Client Controller + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `client.enabled` | Enable client controller | `false` | +| `client.replicas` | Number of client pods | `10` | +| `client.destinationAddr` | Linkerd destination service address | `linkerd-dst-headless.linkerd.svc.cluster.local:8086` | +| `client.metricsPort` | Prometheus metrics port | `8080` | + +### Common + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `image.repository` | Docker image repository | `dst-load-controller` | +| `image.tag` | Docker image tag | `latest` | +| `image.pullPolicy` | Image pull policy | `Never` | +| `testNamespace` | Namespace for test services | `dst-test` | +| `podAnnotations` | Pod annotations (Linkerd injection) | `linkerd.io/inject: enabled` | + +## Metrics + +Both controllers expose Prometheus metrics on port 8080 (configurable): + +### Churn Controller Metrics + +- `churn_services_created` - Total services created (by pattern) +- `churn_deployments_created` - Total deployments created +- `churn_scale_events` - Total scale operations (by pattern, service) +- `churn_current_replicas` - Current replica count (by pattern, service) + +### Client Controller Metrics (TODO) + +- `client_streams_active` - Active gRPC streams +- `client_updates_received` - Destination updates received +- `client_endpoints_watched` - Endpoints being watched + +## Uninstallation + +```bash +# Remove load test +helm uninstall dst-load -n dst-test + +# Clean up test services (created by churn controller) +kubectl delete all --all -n dst-test +``` + +## Example: Full Load Test + +```bash +# 1. Install with large churn +helm install dst-load chart/ -n dst-test \ + --set churn.stable.services=50 \ + --set churn.stable.endpoints=200 \ + --set churn.oscillate.services=10 \ + --set churn.oscillate.minEndpoints=50 \ + --set churn.oscillate.maxEndpoints=500 + +# 2. Monitor churn metrics +kubectl port-forward -n dst-test pod/dst-load-dst-load-test-churn 8080:8080 & +curl localhost:8080/metrics + +# 3. Enable clients (once implemented) +helm upgrade dst-load chart/ -n dst-test \ + --set client.enabled=true \ + --set client.replicas=100 \ + --reuse-values +``` diff --git a/test/destination-test/chart/templates/_helpers.tpl b/test/destination-test/chart/templates/_helpers.tpl new file mode 100644 index 0000000000000..c4a512b9095e1 --- /dev/null +++ b/test/destination-test/chart/templates/_helpers.tpl @@ -0,0 +1,49 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "dst-load-test.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "dst-load-test.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "dst-load-test.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "dst-load-test.labels" -}} +helm.sh/chart: {{ include "dst-load-test.chart" . }} +{{ include "dst-load-test.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "dst-load-test.selectorLabels" -}} +app.kubernetes.io/name: {{ include "dst-load-test.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/test/destination-test/chart/templates/churn.yaml b/test/destination-test/chart/templates/churn.yaml new file mode 100644 index 0000000000000..846cec32baf8d --- /dev/null +++ b/test/destination-test/chart/templates/churn.yaml @@ -0,0 +1,71 @@ +{{- if .Values.churn.enabled }} +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "dst-load-test.fullname" . }}-churn + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ include "dst-load-test.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: churn + annotations: + {{- toYaml .Values.podAnnotations | nindent 4 }} +spec: + serviceAccountName: {{ .Values.serviceAccount.name }} + restartPolicy: Never + containers: + - name: churn + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - scale + - --namespace={{ .Values.testNamespace }} + - --deployment-pattern={{ .Values.churn.deploymentPattern }} + - --min-replicas={{ .Values.churn.minReplicas }} + - --max-replicas={{ .Values.churn.maxReplicas }} + - --hold-duration={{ .Values.churn.holdDuration }} + - --jitter-percent={{ .Values.churn.jitterPercent }} + ports: + - name: admin-http + containerPort: {{ .Values.churn.metricsPort }} + protocol: TCP + resources: + {{- toYaml .Values.churn.resources | nindent 8 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 4 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "dst-load-test.fullname" . }}-churn-metrics + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ include "dst-load-test.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: churn +spec: + type: ClusterIP + ports: + - port: {{ .Values.churn.metricsPort }} + targetPort: metrics + protocol: TCP + name: metrics + selector: + app.kubernetes.io/name: {{ include "dst-load-test.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: churn +{{- end }} diff --git a/test/destination-test/chart/templates/client.yaml b/test/destination-test/chart/templates/client.yaml new file mode 100644 index 0000000000000..99a17b075d19d --- /dev/null +++ b/test/destination-test/chart/templates/client.yaml @@ -0,0 +1,134 @@ +{{- if .Values.client.enabled }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "dst-load-test.fullname" . }}-client + namespace: {{ .Release.Namespace }} + labels: + {{- include "dst-load-test.labels" . | nindent 4 }} + app.kubernetes.io/component: client +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "dst-load-test.fullname" . }}-client + namespace: {{ .Release.Namespace }} + labels: + {{- include "dst-load-test.labels" . | nindent 4 }} + app.kubernetes.io/component: client +rules: +- apiGroups: [""] + resources: ["services"] + verbs: ["list", "watch", "get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "dst-load-test.fullname" . }}-client + namespace: {{ .Release.Namespace }} + labels: + {{- include "dst-load-test.labels" . | nindent 4 }} + app.kubernetes.io/component: client +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "dst-load-test.fullname" . }}-client +subjects: +- kind: ServiceAccount + name: {{ include "dst-load-test.fullname" . }}-client + namespace: {{ .Release.Namespace }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "dst-load-test.fullname" . }}-client + namespace: {{ .Release.Namespace }} + labels: + {{- include "dst-load-test.labels" . | nindent 4 }} + app.kubernetes.io/component: client +spec: + replicas: {{ .Values.client.replicas }} + selector: + matchLabels: + {{- include "dst-load-test.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: client + template: + metadata: + labels: + {{- include "dst-load-test.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: client + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "dst-load-test.fullname" . }}-client + containers: + - name: client + image: "{{ .Values.client.image.repository }}:{{ .Values.client.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.client.image.pullPolicy }} + args: + - client + - --destination-addr={{ .Values.client.destinationAddr }} + - --service-label-selector={{ .Values.client.serviceLabelSelector }} + - --watchers-per-service={{ .Values.client.watchersPerService }} + - --min-stream-lifetime={{ .Values.client.minStreamLifetime }} + - --max-stream-lifetime={{ .Values.client.maxStreamLifetime }} + - --namespace={{ .Release.Namespace }} + env: + # Downward API - inject pod metadata for context token + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - name: admin-http + containerPort: {{ .Values.client.metricsPort }} + protocol: TCP + resources: + {{- toYaml .Values.client.resources | nindent 10 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "dst-load-test.fullname" . }}-client-metrics + namespace: {{ .Release.Namespace }} + labels: + {{- include "dst-load-test.labels" . | nindent 4 }} + app.kubernetes.io/component: client +spec: + type: ClusterIP + ports: + - port: {{ .Values.client.metricsPort }} + targetPort: metrics + protocol: TCP + name: metrics + selector: + {{- include "dst-load-test.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: client +{{- end }} diff --git a/test/destination-test/chart/templates/rbac.yaml b/test/destination-test/chart/templates/rbac.yaml new file mode 100644 index 0000000000000..ec2ea41a0ee1f --- /dev/null +++ b/test/destination-test/chart/templates/rbac.yaml @@ -0,0 +1,52 @@ +{{- if .Values.rbac.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Values.serviceAccount.name }} + labels: + app.kubernetes.io/name: {{ include "dst-load-test.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "dst-load-test.fullname" . }} + namespace: {{ .Values.testNamespace }} + labels: + app.kubernetes.io/name: {{ include "dst-load-test.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} +rules: + # Allow listing and getting Deployments (to find matching pattern) + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch"] + # Allow patching Deployments (to update spec.replicas) + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["patch"] + # Allow reading Services (for validation/metrics) + - apiGroups: [""] + resources: ["services"] + verbs: ["get", "list", "watch"] + # Allow reading Pods (for metrics) + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "dst-load-test.fullname" . }} + namespace: {{ .Values.testNamespace }} + labels: + app.kubernetes.io/name: {{ include "dst-load-test.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "dst-load-test.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ .Values.serviceAccount.name }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/test/destination-test/chart/templates/test-services.yaml b/test/destination-test/chart/templates/test-services.yaml new file mode 100644 index 0000000000000..6560cf752a806 --- /dev/null +++ b/test/destination-test/chart/templates/test-services.yaml @@ -0,0 +1,64 @@ +{{- range $i := until (int .Values.testServices.count) }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $.Values.testServices.namePrefix }}-{{ $i }} + namespace: {{ $.Values.testNamespace }} + labels: + app.kubernetes.io/name: {{ include "dst-load-test.name" $ }} + app.kubernetes.io/instance: {{ $.Release.Name }} + app.kubernetes.io/component: test-service + app: {{ $.Values.testServices.namePrefix }}-{{ $i }} + dst-load.l5d.io/role: target + {{- if $.Values.multicluster.enabled }} + mirror.linkerd.io/exported: "true" + {{- end }} + {{- if $.Values.multicluster.annotations }} + annotations: + {{- toYaml $.Values.multicluster.annotations | nindent 4 }} + {{- end }} +spec: + type: ClusterIP + ports: + - port: {{ $.Values.testServices.port }} + targetPort: {{ $.Values.testServices.port }} + protocol: TCP + name: http + selector: + app: {{ $.Values.testServices.namePrefix }}-{{ $i }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $.Values.testServices.namePrefix }}-{{ $i }} + namespace: {{ $.Values.testNamespace }} + labels: + app.kubernetes.io/name: {{ include "dst-load-test.name" $ }} + app.kubernetes.io/instance: {{ $.Release.Name }} + app.kubernetes.io/component: test-service + app: {{ $.Values.testServices.namePrefix }}-{{ $i }} +spec: + replicas: {{ $.Values.testServices.initialReplicas }} + selector: + matchLabels: + app: {{ $.Values.testServices.namePrefix }}-{{ $i }} + template: + metadata: + labels: + app: {{ $.Values.testServices.namePrefix }}-{{ $i }} + kwok.x-k8s.io/node: kwok-node + spec: + nodeSelector: + type: kwok + tolerations: + - key: kwok.x-k8s.io/node + operator: Exists + effect: NoSchedule + containers: + - name: app + image: fake-image:latest + ports: + - containerPort: {{ $.Values.testServices.port }} + protocol: TCP +{{- end }} diff --git a/test/destination-test/chart/values.yaml b/test/destination-test/chart/values.yaml new file mode 100644 index 0000000000000..7e1eed027490a --- /dev/null +++ b/test/destination-test/chart/values.yaml @@ -0,0 +1,126 @@ +# Default values for dst-load-test +# This Helm chart deploys load testing controllers for Linkerd's destination service + +# Docker image configuration +image: + repository: dst-load-controller + tag: latest + pullPolicy: Never # Use local image imported into k3d + +# Image pull secrets (if needed for private registries) +imagePullSecrets: [] + +# Service account configuration +serviceAccount: + create: true + name: dst-load-test + +# RBAC permissions +rbac: + create: true + +# Namespace where test services will be created +testNamespace: dst-test + +# Test services configuration (created by Helm chart) +testServices: + # Number of services to create + count: 10 + + # Service name prefix (will create {namePrefix}-0, {namePrefix}-1, etc.) + namePrefix: test-svc + + # Initial replica count for each Deployment + initialReplicas: 100 + + # Service port + port: 8080 + +# Multicluster configuration (optional) +multicluster: + # Enable multicluster annotations on services + enabled: false + + # Additional annotations for multicluster services + # Example: config.linkerd.io/enable-gateway: "true" + annotations: {} + +# Churn controller configuration +churn: + enabled: true + + # Deployment name pattern to scale (supports wildcards) + deploymentPattern: "test-svc-*" + + # Oscillation configuration + minReplicas: 10 + maxReplicas: 200 + holdDuration: "30s" + jitterPercent: 10 + + # Metrics server port + metricsPort: 8080 + + # Resource limits + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 256Mi + +# Client controller configuration +client: + enabled: false + + # Number of client replicas + replicas: 1 + + # Image configuration (overrides global image settings if specified) + image: + repository: dst-load-controller + tag: latest + pullPolicy: Never + + # Destination service address + destinationAddr: linkerd-dst-headless.linkerd.svc.cluster.local:8086 + + # Label selector for discovering services to watch + # Example: "dst-load.l5d.io/role=target" for load test targets + serviceLabelSelector: "dst-load.l5d.io/role=target" + + # Number of concurrent watchers per discovered service + watchersPerService: 10 + + # Stream lifetime configuration + # Minimum time before reconnecting (with jitter) + minStreamLifetime: "5m" + # Maximum time before reconnecting (with jitter) + maxStreamLifetime: "30m" + + # Metrics server port + metricsPort: 8080 + + # Resource limits + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 256Mi + +# Pod annotations (for Linkerd injection) +podAnnotations: + linkerd.io/inject: enabled + config.linkerd.io/skip-outbound-ports: "8080" # Skip metrics port + +# Node selector +nodeSelector: {} + +# Tolerations +tolerations: [] + +# Affinity +affinity: {} diff --git a/test/destination-test/dst-load-controller/Cargo.toml b/test/destination-test/dst-load-controller/Cargo.toml new file mode 100644 index 0000000000000..4e521321ceb66 --- /dev/null +++ b/test/destination-test/dst-load-controller/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "dst-load-controller" +version = "0.1.0" +edition = "2021" +publish = false + +[[bin]] +name = "dst-load-controller" +path = "src/main.rs" + +[dependencies] +anyhow = "1" +clap = { version = "4", features = ["derive", "env"] } +futures = "0.3" +k8s-openapi = { workspace = true } +kube = { workspace = true, default-features = false, features = [ + "client", + "rustls-tls", +] } +linkerd2-proxy-api = { workspace = true } +prometheus-client = { workspace = true } +rand = "0.8" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] } +tonic = { workspace = true, default-features = false, features = ["transport"] } +tracing = "0.1" + +[dependencies.kubert] +workspace = true +default-features = false +features = ["clap", "prometheus-client", "runtime", "rustls-tls"] + +[dependencies.tracing-subscriber] +version = "0.3" +features = ["env-filter", "fmt", "json"] diff --git a/test/destination-test/dst-load-controller/src/churn.rs b/test/destination-test/dst-load-controller/src/churn.rs new file mode 100644 index 0000000000000..41a236bc373bf --- /dev/null +++ b/test/destination-test/dst-load-controller/src/churn.rs @@ -0,0 +1,227 @@ +//! Churn controller: Scales existing Deployments to simulate endpoint churn + +use std::time::Duration; + +use k8s_openapi::api::apps::v1::Deployment; +use kube::{ + api::{Api, Patch, PatchParams}, + Client, +}; +use prometheus_client::{ + encoding::EncodeLabelSet, + metrics::{counter::Counter, family::Family, gauge::Gauge}, + registry::Registry, +}; +use tracing::{error, info}; + +#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] +pub struct ChurnLabels { + pattern: String, + service: String, +} + +pub struct ChurnMetrics { + pub scale_events: Family, + pub current_replicas: Family, +} + +impl ChurnMetrics { + pub fn new(registry: &mut Registry) -> Self { + let scale_events = Family::default(); + let current_replicas = Family::default(); + + registry.register( + "churn_scale_events", + "Total number of scale events", + scale_events.clone(), + ); + registry.register( + "churn_current_replicas", + "Current replica count per service", + current_replicas.clone(), + ); + + Self { + scale_events, + current_replicas, + } + } +} + +pub struct ChurnController { + pub client: Client, + pub namespace: String, + pub metrics: ChurnMetrics, +} + +impl ChurnController { + pub fn new(client: Client, namespace: String, metrics: ChurnMetrics) -> Self { + Self { + client, + namespace, + metrics, + } + } + + /// Scale a Deployment to the specified replica count + async fn scale_deployment( + &self, + name: &str, + replicas: i32, + pattern: &str, + ) -> anyhow::Result<()> { + let deployments: Api = Api::namespaced(self.client.clone(), &self.namespace); + + let patch = serde_json::json!({ + "spec": { + "replicas": replicas + } + }); + + deployments + .patch( + name, + &PatchParams::apply("dst-load-controller"), + &Patch::Merge(&patch), + ) + .await + .map_err(|e| { + error!(?e, deployment = name, replicas, "Failed to scale deployment"); + e + })?; + + info!(deployment = name, replicas, pattern, "Scaled deployment"); + + let labels = ChurnLabels { + pattern: pattern.to_string(), + service: name.to_string(), + }; + self.metrics.scale_events.get_or_create(&labels).inc(); + self.metrics + .current_replicas + .get_or_create(&labels) + .set(replicas as i64); + + Ok(()) + } + + /// Oscillate replicas for deployments matching a pattern + /// This is the simplified version that only scales existing deployments + pub async fn run_oscillate_deployments( + &self, + pattern: &str, + min_replicas: i32, + max_replicas: i32, + hold_duration: Duration, + jitter_percent: u8, + ) -> anyhow::Result<()> { + info!( + pattern, + min_replicas, + max_replicas, + ?hold_duration, + jitter_percent, + "Starting deployment oscillation" + ); + + let deployments: Api = Api::namespaced(self.client.clone(), &self.namespace); + + // List all deployments and filter by pattern + let deployment_list = deployments.list(&Default::default()).await?; + + // Simple glob matching (supports wildcards like "test-svc-*") + let matching_deployments: Vec = deployment_list + .items + .iter() + .filter_map(|d| { + let name = d.metadata.name.as_ref()?; + if matches_pattern(name, pattern) { + Some(name.clone()) + } else { + None + } + }) + .collect(); + + if matching_deployments.is_empty() { + anyhow::bail!("No deployments found matching pattern: {}", pattern); + } + + info!( + count = matching_deployments.len(), + deployments = ?matching_deployments, + "Found matching deployments" + ); + + // Oscillate forever + let mut current_replicas = max_replicas; + loop { + // Toggle between min and max + current_replicas = if current_replicas == max_replicas { + min_replicas + } else { + max_replicas + }; + + // Scale all matching deployments + for deployment_name in &matching_deployments { + self.scale_deployment(deployment_name, current_replicas, "oscillate") + .await?; + } + + info!( + replicas = current_replicas, + deployments = matching_deployments.len(), + "Scaled deployments" + ); + + // Wait with jitter + let jitter = if jitter_percent > 0 { + use rand::Rng; + let max_jitter = hold_duration.as_millis() * jitter_percent as u128 / 100; + Duration::from_millis(rand::thread_rng().gen_range(0..=max_jitter as u64)) + } else { + Duration::from_secs(0) + }; + + let sleep_duration = hold_duration + jitter; + info!(?sleep_duration, "Holding at current scale"); + tokio::time::sleep(sleep_duration).await; + } + } +} + +/// Simple glob pattern matching (supports * wildcard) +fn matches_pattern(name: &str, pattern: &str) -> bool { + if pattern == "*" { + return true; + } + + if let Some(prefix) = pattern.strip_suffix('*') { + name.starts_with(prefix) + } else if let Some(suffix) = pattern.strip_prefix('*') { + name.ends_with(suffix) + } else { + name == pattern + } +} + +/// Parse duration string (e.g., "30s", "5m", "1h") +pub fn parse_duration(s: &str) -> anyhow::Result { + let s = s.trim(); + if s.is_empty() { + anyhow::bail!("Empty duration string"); + } + + let (num_str, unit) = s.split_at(s.len() - 1); + let num: u64 = num_str + .parse() + .map_err(|_| anyhow::anyhow!("Invalid number: {}", num_str))?; + + match unit { + "s" => Ok(Duration::from_secs(num)), + "m" => Ok(Duration::from_secs(num * 60)), + "h" => Ok(Duration::from_secs(num * 3600)), + _ => anyhow::bail!("Invalid duration unit: {}", unit), + } +} diff --git a/test/destination-test/dst-load-controller/src/client.rs b/test/destination-test/dst-load-controller/src/client.rs new file mode 100644 index 0000000000000..cfd05f27b2fcb --- /dev/null +++ b/test/destination-test/dst-load-controller/src/client.rs @@ -0,0 +1,375 @@ +//! Client controller: Creates gRPC clients that subscribe to the Destination service + +use std::time::Duration; + +use k8s_openapi::api::core::v1::Service; +use kube::{ + api::{Api, ListParams}, + Client, +}; +use linkerd2_proxy_api::destination as dst_api; +use prometheus_client::{ + encoding::EncodeLabelSet, + metrics::{counter::Counter, family::Family, gauge::Gauge}, + registry::Registry, +}; +use rand::Rng; +use tokio::time::{sleep, timeout}; +use tonic::transport::Channel; +use tracing::{error, info, warn}; + +#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] +pub struct ClientLabels { + target: String, + request_type: String, +} + +pub struct ClientMetrics { + pub streams_active: Family, + pub updates_received: Family, + pub endpoints_current: Family, + pub stream_errors: Family, +} + +impl ClientMetrics { + pub fn new(registry: &mut Registry) -> Self { + let streams_active = Family::default(); + let updates_received = Family::default(); + let endpoints_current = Family::default(); + let stream_errors = Family::default(); + + registry.register( + "client_streams_active", + "Number of active gRPC streams", + streams_active.clone(), + ); + registry.register( + "client_updates_received", + "Total number of updates received", + updates_received.clone(), + ); + registry.register( + "client_endpoints_current", + "Current number of endpoints for a service", + endpoints_current.clone(), + ); + registry.register( + "client_stream_errors", + "Total number of stream errors", + stream_errors.clone(), + ); + + Self { + streams_active, + updates_received, + endpoints_current, + stream_errors, + } + } +} + +pub struct ClientController { + pub client: Client, + pub destination_addr: String, + pub namespace: String, + pub context_token: String, + pub metrics: ClientMetrics, +} + +impl ClientController { + pub fn new( + client: Client, + destination_addr: String, + namespace: String, + context_token: String, + metrics: ClientMetrics, + ) -> Self { + Self { + client, + destination_addr, + namespace, + context_token, + metrics, + } + } + + /// Discover services by label selector and create watchers + pub async fn run_get_requests( + &self, + service_label_selector: String, + watchers_per_service: u32, + min_stream_lifetime: Duration, + max_stream_lifetime: Duration, + ) -> anyhow::Result<()> { + info!( + destination_addr = %self.destination_addr, + namespace = %self.namespace, + label_selector = %service_label_selector, + watchers_per_service, + "Starting Get requests" + ); + + // Discover services via Kubernetes API + let services: Api = Api::namespaced(self.client.clone(), &self.namespace); + let lp = ListParams::default().labels(&service_label_selector); + + let service_list = services.list(&lp).await?; + + if service_list.items.is_empty() { + anyhow::bail!( + "No services found with label selector: {}", + service_label_selector + ); + } + + info!( + service_count = service_list.items.len(), + "Discovered services" + ); + + // Connect to destination service + let channel = Channel::from_shared(format!("http://{}", self.destination_addr))? + .connect() + .await?; + + info!("Connected to destination service"); + + // Spawn watchers for each service + let mut tasks = Vec::new(); + for svc in service_list.items { + let svc_name = svc.metadata.name.as_ref().ok_or_else(|| anyhow::anyhow!("Service missing name"))?; + + // Get the service port (assume first port) + let port = svc + .spec + .as_ref() + .and_then(|spec| spec.ports.as_ref()) + .and_then(|ports| ports.first()) + .map(|p| p.port) + .ok_or_else(|| anyhow::anyhow!("Service missing port"))?; + + // Build the destination path (authority) + let target = format!( + "{}.{}.svc.cluster.local:{}", + svc_name, self.namespace, port + ); + + info!( + service = %svc_name, + target = %target, + watchers = watchers_per_service, + "Creating watchers for service" + ); + + // Spawn multiple watchers for this service + for watcher_id in 0..watchers_per_service { + let channel = channel.clone(); + let target = target.clone(); + let context_token = self.context_token.clone(); + let metrics = self.metrics.clone(); + let svc_name = svc_name.clone(); + let min_lifetime = min_stream_lifetime; + let max_lifetime = max_stream_lifetime; + + let task = tokio::spawn(async move { + if let Err(e) = subscribe_to_destination( + channel, + target.clone(), + context_token, + metrics, + watcher_id, + min_lifetime, + max_lifetime, + ) + .await + { + error!( + service = %svc_name, + target = %target, + watcher_id, + error = ?e, + "Get stream failed" + ); + } + }); + tasks.push(task); + } + } + + // Wait for all tasks (they should run forever) + futures::future::join_all(tasks).await; + + Ok(()) + } +} + +/// Subscribe to a destination service and process updates +/// Streams have a bounded lifetime with randomized jitter to simulate realistic client behavior +async fn subscribe_to_destination( + channel: Channel, + target: String, + context_token: String, + metrics: ClientMetrics, + watcher_id: u32, + min_stream_lifetime: Duration, + max_stream_lifetime: Duration, +) -> anyhow::Result<()> { + let mut client = dst_api::destination_client::DestinationClient::new(channel); + + info!( + target = %target, + watcher_id, + min_lifetime_secs = min_stream_lifetime.as_secs(), + max_lifetime_secs = max_stream_lifetime.as_secs(), + "Subscribing to destination" + ); + + let labels = ClientLabels { + target: target.clone(), + request_type: "Get".to_string(), + }; + + loop { + // Randomize stream lifetime between min and max + let lifetime_secs = rand::thread_rng().gen_range( + min_stream_lifetime.as_secs()..=max_stream_lifetime.as_secs() + ); + let stream_lifetime = Duration::from_secs(lifetime_secs); + + info!( + target = %target, + watcher_id, + lifetime_secs = stream_lifetime.as_secs(), + "Starting bounded stream" + ); + + // Create Get request with context token + let request = tonic::Request::new(dst_api::GetDestination { + scheme: "k8s".to_string(), + path: target.clone(), + context_token: context_token.clone(), + }); + + // Track active stream + metrics.streams_active.get_or_create(&labels).inc(); + + // Subscribe to stream with timeout + let stream_result = timeout(stream_lifetime, async { + match client.get(request).await { + Ok(response) => { + let mut stream = response.into_inner(); + info!( + target = %target, + watcher_id, + "Stream established" + ); + + // Process updates until stream ends or timeout + while let Ok(Some(update)) = stream.message().await { + handle_update(&target, update, &metrics, &labels, watcher_id); + } + + Ok(()) + } + Err(e) => { + error!( + target = %target, + watcher_id, + error = ?e, + "Failed to establish stream" + ); + metrics.stream_errors.get_or_create(&labels).inc(); + Err(e) + } + } + }) + .await; + + // Stream closed (either timeout or natural end), mark as inactive + metrics.streams_active.get_or_create(&labels).dec(); + + match stream_result { + Ok(_) => { + info!( + target = %target, + watcher_id, + "Stream ended naturally, reconnecting..." + ); + } + Err(_) => { + info!( + target = %target, + watcher_id, + lifetime_secs = stream_lifetime.as_secs(), + "Stream lifetime expired, reconnecting..." + ); + } + } + + // Wait before reconnecting (short jitter) + let reconnect_delay = Duration::from_secs(rand::thread_rng().gen_range(1..5)); + sleep(reconnect_delay).await; + } +} + +/// Handle a destination update +fn handle_update( + target: &str, + update: dst_api::Update, + metrics: &ClientMetrics, + labels: &ClientLabels, + watcher_id: u32, +) { + metrics.updates_received.get_or_create(labels).inc(); + + match update.update { + Some(dst_api::update::Update::Add(add)) => { + let endpoint_count = add.addrs.len(); + info!( + target = %target, + watcher_id, + endpoints = endpoint_count, + "Received Add update" + ); + metrics + .endpoints_current + .get_or_create(labels) + .set(endpoint_count as i64); + } + Some(dst_api::update::Update::Remove(remove)) => { + info!( + target = %target, + watcher_id, + removed = remove.addrs.len(), + "Received Remove update" + ); + } + Some(dst_api::update::Update::NoEndpoints(no_endpoints)) => { + info!( + target = %target, + watcher_id, + exists = no_endpoints.exists, + "Received NoEndpoints update" + ); + metrics.endpoints_current.get_or_create(labels).set(0); + } + None => { + warn!( + target = %target, + watcher_id, + "Received update with no data" + ); + } + } +} + +impl Clone for ClientMetrics { + fn clone(&self) -> Self { + Self { + streams_active: self.streams_active.clone(), + updates_received: self.updates_received.clone(), + endpoints_current: self.endpoints_current.clone(), + stream_errors: self.stream_errors.clone(), + } + } +} diff --git a/test/destination-test/dst-load-controller/src/main.rs b/test/destination-test/dst-load-controller/src/main.rs new file mode 100644 index 0000000000000..d4fffe64df272 --- /dev/null +++ b/test/destination-test/dst-load-controller/src/main.rs @@ -0,0 +1,281 @@ +use anyhow::Result; +use clap::{Parser, Subcommand}; + +mod churn; +mod client; + +#[derive(Parser)] +#[command(name = "dst-load-controller")] +#[command(about = "Destination service load testing controller", long_about = None)] +struct Args { + #[clap(long, default_value = "linkerd=info,warn")] + log_level: kubert::LogFilter, + + #[clap(long, default_value = "plain")] + log_format: kubert::LogFormat, + + #[clap(flatten)] + client: kubert::ClientArgs, + + #[clap(flatten)] + admin: kubert::AdminArgs, + + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Scale controller: Oscillates Deployment replicas between min/max to simulate autoscaler behavior + Scale { + /// Deployment name pattern to scale (supports wildcards, e.g., "test-svc-*") + #[arg(long)] + deployment_pattern: String, + + /// Minimum replica count + #[arg(long)] + min_replicas: i32, + + /// Maximum replica count + #[arg(long)] + max_replicas: i32, + + /// Hold time at min/max before changing (e.g., "30s", "1m") + #[arg(long, default_value = "30s")] + hold_duration: String, + + /// Jitter percentage (0-100) to spread oscillation timing + #[arg(long, default_value = "0")] + jitter_percent: u8, + + /// Namespace where deployments exist + #[arg(long, default_value = "default")] + namespace: String, + }, + + /// Client controller: Creates gRPC clients and subscribes to Destination service + Client { + /// Destination service address (e.g., "linkerd-destination.linkerd:8086") + #[arg(long)] + destination_addr: String, + + /// Label selector to discover target services (e.g., "app.kubernetes.io/component=test-service") + #[arg(long)] + service_label_selector: String, + + /// Number of concurrent watchers per service + #[arg(long, default_value = "1")] + watchers_per_service: u32, + + /// Minimum stream lifetime before reconnection (e.g., "30s", "5m") + #[arg(long, default_value = "5m")] + min_stream_lifetime: String, + + /// Maximum stream lifetime before reconnection (e.g., "1h", "30m") + #[arg(long, default_value = "30m")] + max_stream_lifetime: String, + + /// Namespace where services exist + #[arg(long, default_value = "default")] + namespace: String, + + /// Pod name (for context token, typically from downward API) + #[arg(long, env = "POD_NAME")] + pod_name: Option, + + /// Pod namespace (for context token, typically from downward API) + #[arg(long, env = "POD_NAMESPACE")] + pod_namespace: Option, + + /// Node name (for context token, typically from downward API) + #[arg(long, env = "NODE_NAME")] + node_name: Option, + }, +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Args::parse(); + args.run().await +} + +impl Args { + async fn run(self) -> Result<()> { + let Args { + log_level, + log_format, + client: client_args, + admin, + command, + } = self; + + match command { + Commands::Scale { + deployment_pattern, + min_replicas, + max_replicas, + hold_duration, + jitter_percent, + namespace, + } => { + tracing::info!( + deployment_pattern, + min_replicas, + max_replicas, + hold_duration, + jitter_percent, + namespace, + "Starting scale controller" + ); + + // Validate inputs + if min_replicas < 0 || max_replicas < 0 { + anyhow::bail!("Replica counts must be >= 0"); + } + if min_replicas >= max_replicas { + anyhow::bail!("--min-replicas must be < --max-replicas"); + } + if jitter_percent > 100 { + anyhow::bail!("--jitter-percent must be 0-100"); + } + + let hold_duration = churn::parse_duration(&hold_duration)?; + + // Set up metrics + let mut prom = prometheus_client::registry::Registry::default(); + let metrics = churn::ChurnMetrics::new(&mut prom); + + // Build runtime with admin server (provides /metrics, /ready, /live) + let runtime = kubert::Runtime::builder() + .with_log(log_level, log_format) + .with_admin(admin.into_builder().with_prometheus(prom)) + .with_client(client_args) + .build() + .await?; + + // Get Kubernetes client from runtime + let client = runtime.client(); + + // Create churn controller + let controller = churn::ChurnController::new(client, namespace, metrics); + + // Run oscillate pattern on matching deployments + controller + .run_oscillate_deployments( + &deployment_pattern, + min_replicas, + max_replicas, + hold_duration, + jitter_percent, + ) + .await?; + } + + Commands::Client { + destination_addr, + service_label_selector, + watchers_per_service, + min_stream_lifetime, + max_stream_lifetime, + namespace, + pod_name, + pod_namespace, + node_name, + } => { + tracing::info!( + destination_addr, + service_label_selector, + watchers_per_service, + min_stream_lifetime, + max_stream_lifetime, + namespace, + ?pod_name, + ?pod_namespace, + ?node_name, + "Starting client controller" + ); + + if watchers_per_service == 0 { + anyhow::bail!("--watchers-per-service must be > 0"); + } + + // Parse stream lifetime durations + let min_lifetime = churn::parse_duration(&min_stream_lifetime)?; + let max_lifetime = churn::parse_duration(&max_stream_lifetime)?; + + if min_lifetime >= max_lifetime { + anyhow::bail!("--min-stream-lifetime must be < --max-stream-lifetime"); + } + + // Build context token (mimics linkerd proxy injector) + let context_token = build_context_token( + pod_name.as_deref(), + pod_namespace.as_deref(), + node_name.as_deref(), + )?; + + tracing::info!(context_token, "Built context token"); + + // Set up metrics + let mut prom = prometheus_client::registry::Registry::default(); + let metrics = client::ClientMetrics::new(&mut prom); + + // Build runtime with admin server (provides /metrics, /ready, /live) + let runtime = kubert::Runtime::builder() + .with_log(log_level, log_format) + .with_admin(admin.into_builder().with_prometheus(prom)) + .with_client(client_args) + .build() + .await?; + + // Get Kubernetes client from runtime + let client = runtime.client(); + + // Create client controller + let controller = client::ClientController::new( + client, + destination_addr, + namespace, + context_token, + metrics, + ); + + // Run Get requests + controller + .run_get_requests( + service_label_selector, + watchers_per_service, + min_lifetime, + max_lifetime, + ) + .await?; + } + } + + Ok(()) + } +} + +/// Build a context token for destination service requests +/// Format matches what linkerd proxy-injector does: {"ns":"namespace","nodeName":"node","pod":"podname"} +fn build_context_token( + pod_name: Option<&str>, + pod_namespace: Option<&str>, + node_name: Option<&str>, +) -> Result { + let mut token = serde_json::json!({}); + + if let Some(ns) = pod_namespace { + token["ns"] = serde_json::json!(ns); + } + + if let Some(pod) = pod_name { + token["pod"] = serde_json::json!(pod); + } + + if let Some(node) = node_name { + token["nodeName"] = serde_json::json!(node); + } + + Ok(token.to_string()) +} diff --git a/test/destination-test/hack/gen-certs.sh b/test/destination-test/hack/gen-certs.sh new file mode 100755 index 0000000000000..0c48e5d3574b7 --- /dev/null +++ b/test/destination-test/hack/gen-certs.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# +# Generate shared trust root and issuer certificates for Linkerd multicluster +# using step-cli. This creates a single CA that both clusters will share, +# enabling cross-cluster mTLS. +# +# Usage: +# ./gen-certs.sh [output-dir] +# +# Output: +# LINKERD_CA_DIR/ca.crt - Trust anchor (root CA) +# LINKERD_CA_DIR/issuer.crt - Issuer certificate (intermediate) +# LINKERD_CA_DIR/issuer.key - Issuer private key +# +# These files are read by helmfile.yaml during cluster setup. + +set -euo pipefail + +# Default CA directory +CA_DIR="${1:-${LINKERD_CA_DIR:-/tmp/linkerd-ca}}" + +# Certificate validity periods +CA_VALIDITY="87600h" # 10 years +ISSUER_VALIDITY="8760h" # 1 year + +echo "==> Generating Linkerd certificates in $CA_DIR" + +# Create output directory +mkdir -p "$CA_DIR" + +# Generate root CA (trust anchor) +echo "==> Generating root CA (trust anchor)" +step certificate create \ + "root.linkerd.cluster.local" \ + "$CA_DIR/ca.crt" \ + "$CA_DIR/ca.key" \ + --profile root-ca \ + --no-password \ + --insecure \ + --not-after="$CA_VALIDITY" \ + --kty=EC \ + --crv=P-256 + +echo "==> Root CA fingerprint:" +step certificate fingerprint "$CA_DIR/ca.crt" + +# Generate issuer certificate (intermediate CA) +echo "==> Generating issuer certificate (intermediate CA)" +step certificate create \ + "identity.linkerd.cluster.local" \ + "$CA_DIR/issuer.crt" \ + "$CA_DIR/issuer.key" \ + --profile intermediate-ca \ + --ca "$CA_DIR/ca.crt" \ + --ca-key "$CA_DIR/ca.key" \ + --no-password \ + --insecure \ + --not-after="$ISSUER_VALIDITY" \ + --kty=EC \ + --crv=P-256 + +echo "==> Issuer certificate fingerprint:" +step certificate fingerprint "$CA_DIR/issuer.crt" + +# Verify issuer is signed by CA +echo "==> Verifying certificate chain" +step certificate verify \ + "$CA_DIR/issuer.crt" \ + --roots "$CA_DIR/ca.crt" + +echo "" +echo "✓ Certificate generation complete!" +echo "" +echo "Files created in $CA_DIR:" +ls -lh "$CA_DIR" +echo "" +echo "Set environment variable:" +echo " export LINKERD_CA_DIR=$CA_DIR" +echo "" +echo "Or pass to helmfile:" +echo " LINKERD_CA_DIR=$CA_DIR helmfile sync" diff --git a/test/destination-test/hack/kwok-node.yaml b/test/destination-test/hack/kwok-node.yaml new file mode 100644 index 0000000000000..68508f68d22b8 --- /dev/null +++ b/test/destination-test/hack/kwok-node.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: Node +metadata: + name: kwok-node-0 + annotations: + node.alpha.kubernetes.io/ttl: "0" + kwok.x-k8s.io/node: fake + labels: + beta.kubernetes.io/arch: amd64 + beta.kubernetes.io/os: linux + kubernetes.io/arch: amd64 + kubernetes.io/hostname: kwok-node-0 + kubernetes.io/os: linux + kubernetes.io/role: agent + node-role.kubernetes.io/agent: "" + type: kwok +spec: + taints: + - effect: NoSchedule + key: kwok.x-k8s.io/node + value: fake +status: + allocatable: + cpu: "1000" + memory: 1000Gi + pods: "1000" + capacity: + cpu: "1000" + memory: 1000Gi + pods: "1000" + nodeInfo: + architecture: amd64 + bootID: "" + containerRuntimeVersion: "" + kernelVersion: "" + kubeProxyVersion: fake + kubeletVersion: fake + machineID: "" + operatingSystem: linux + osImage: "" + systemUUID: "" + phase: Running diff --git a/test/destination-test/helmfile.yaml.gotmpl b/test/destination-test/helmfile.yaml.gotmpl new file mode 100644 index 0000000000000..5072628de2a06 --- /dev/null +++ b/test/destination-test/helmfile.yaml.gotmpl @@ -0,0 +1,118 @@ +# Helmfile for destination load testing infrastructure +# +# Prerequisites: +# 1. k3d cluster created (e.g., k3d-test) +# 2. LINKERD_CA_DIR set with generated certificates: +# - ca.crt (trust anchor) +# - issuer.crt (issuer certificate) +# - issuer.key (issuer private key) +# 3. Run: hack/gen-certs.sh to generate these files +# +# Usage: +# # Setup cluster +# LINKERD_CA_DIR=/tmp/linkerd-ca helmfile sync +# +# # Without monitoring +# LINKERD_CA_DIR=/tmp/linkerd-ca helmfile --state-values-set monitoring.enabled=false sync + +environments: + default: + values: + - monitoring: + enabled: true + +--- + +helmDefaults: + wait: true + timeout: 300 + createNamespace: true + atomic: true + +repositories: + - name: linkerd-edge + url: https://helm.linkerd.io/edge + - name: kwok + url: https://kwok.sigs.k8s.io/charts/ + - name: prometheus-community + url: https://prometheus-community.github.io/helm-charts + - name: linkerd-monitoring + url: https://ghcr.io/olix0r/charts + oci: true + +releases: + ############################################################################# + # KWOK - Fake pods/nodes for load testing + ############################################################################# + - name: kwok + namespace: kwok-system + chart: kwok/kwok + # Latest release (0.2.0 = app v0.7.0) + + ############################################################################# + # KWOK Stage Fast - Default pod/node emulation behavior + ############################################################################# + - name: kwok-stage + namespace: default + chart: kwok/stage-fast + # Configures pods to transition quickly through lifecycle phases + needs: + - kwok + + ############################################################################# + # Linkerd CRDs + ############################################################################# + - name: linkerd-crds + namespace: linkerd + chart: linkerd-edge/linkerd-crds + # Latest edge release + + ############################################################################# + # Linkerd Control Plane + ############################################################################# + - name: linkerd-control-plane + namespace: linkerd + chart: linkerd-edge/linkerd-control-plane + # Latest edge release + needs: + - linkerd-crds + set: + - name: identityTrustDomain + value: cluster.local + - name: controllerLogLevel + value: debug + - name: controllerLogFormat + value: json + - name: identity.issuer.scheme + value: linkerd.io/tls + - name: identityTrustAnchorsPEM + value: {{ readFile (printf "%s/ca.crt" (env "LINKERD_CA_DIR" | default "/tmp/linkerd-ca")) | quote }} + - name: identity.issuer.tls.crtPEM + value: {{ readFile (printf "%s/issuer.crt" (env "LINKERD_CA_DIR" | default "/tmp/linkerd-ca")) | quote }} + - name: identity.issuer.tls.keyPEM + value: {{ readFile (printf "%s/issuer.key" (env "LINKERD_CA_DIR" | default "/tmp/linkerd-ca")) | quote }} + + ############################################################################# + # kube-prometheus-stack (monitoring infrastructure) + ############################################################################# + - name: kube-prometheus-stack + namespace: monitoring + chart: prometheus-community/kube-prometheus-stack + version: ~67.0.0 + condition: monitoring.enabled + values: + - values/kube-prometheus-stack.yaml + + ############################################################################# + # linkerd-monitoring (Linkerd dashboards and ServiceMonitors) + ############################################################################# + - name: linkerd-monitoring + namespace: linkerd-viz + chart: oci://ghcr.io/olix0r/charts/linkerd-monitoring + version: 0.1.1 + condition: monitoring.enabled + needs: + - linkerd-control-plane + - kube-prometheus-stack + values: + - values/linkerd-monitoring.yaml diff --git a/test/destination-test/values/README.md b/test/destination-test/values/README.md new file mode 100644 index 0000000000000..ae273bb7ebb2e --- /dev/null +++ b/test/destination-test/values/README.md @@ -0,0 +1,153 @@ +# Monitoring Dashboard Access + +This directory contains monitoring configuration for the destination load test infrastructure. + +## Architecture + +- **kube-prometheus-stack**: Core monitoring infrastructure (Prometheus + Grafana) +- **linkerd-monitoring**: Linkerd-specific dashboards and ServiceMonitors +- **Custom scrape configs**: Additional targets for dst-load-controller metrics + +## Accessing Grafana + +After deploying with `helmfile sync`, Grafana is available via NodePort: + +```bash +# Get the NodePort +kubectl -n monitoring get svc kube-prometheus-stack-grafana + +# Forward the port to localhost +kubectl -n monitoring port-forward svc/kube-prometheus-stack-grafana 3000:80 + +# Open in browser +open http://localhost:3000 +``` + +**Default credentials:** + +- Username: `admin` +- Password: `admin` + +## Available Dashboards + +### Linkerd Dashboards (from linkerd-monitoring chart) + +- **Linkerd Top**: Overview of all meshed workloads +- **Linkerd Deployment**: Per-deployment metrics +- **Linkerd Pod**: Per-pod metrics +- **Linkerd Service**: Per-service metrics +- **Linkerd Namespace**: Per-namespace aggregated metrics +- **Linkerd Health**: Control plane health metrics +- **Linkerd Authority**: Destination service metrics +- **Linkerd Route**: HTTPRoute metrics (if using policy) + +### Exploring dst-load-controller Metrics + +In Grafana, go to **Explore** and query: + +**Client Controller Metrics:** + +```promql +# Active gRPC streams +client_streams_active + +# Updates received over time +rate(client_updates_received_total[1m]) + +# Current endpoints per service +client_endpoints_current + +# Stream errors +rate(client_stream_errors_total[1m]) +``` + +**Scale Controller Metrics:** + +```promql +# Current replica counts +churn_deployments_current_replicas + +# Scale operations over time +rate(churn_scale_operations_total[1m]) + +# Time spent at each scale level +churn_hold_duration_seconds +``` + +**Linkerd Destination Service Load:** + +```promql +# Request rate to destination service +rate(request_total{deployment="linkerd-destination"}[1m]) + +# Destination service latency +histogram_quantile(0.99, rate(response_latency_ms_bucket{deployment="linkerd-destination"}[1m])) + +# Active gRPC streams on destination service +grpc_server_handling_seconds_count{grpc_method="Get", grpc_service="io.linkerd.proxy.destination.Destination"} +``` + +## Custom Dashboards + +To add custom dashboards for dst-load-test: + +1. Create a dashboard JSON in `values/dashboards/` +2. Update `values/linkerd-monitoring.yaml` to include the dashboard +3. Run `helmfile sync` to apply + +Example structure: + +```yaml +# values/linkerd-monitoring.yaml +grafanaDashboards: + dst-load-test: + json: | + {{ readFile "values/dashboards/dst-load-test.json" | quote }} +``` + +## Troubleshooting + +**Metrics not appearing?** + +Check Prometheus targets: + +```bash +kubectl -n monitoring port-forward svc/kube-prometheus-stack-prometheus 9090:9090 +open http://localhost:9090/targets +``` + +Look for: + +- `linkerd-controller` job (should show linkerd control plane pods) +- `dst-load-controller` job (should show client/churn pods in dst-test namespace) + +**ServiceMonitors not being picked up?** + +Check ServiceMonitor labels match Prometheus selector: + +```bash +kubectl -n monitoring get prometheus kube-prometheus-stack-prometheus -o yaml | grep serviceMonitorSelector -A5 +``` + +## Metrics Reference + +### Client Controller + +| Metric | Type | Description | +|--------|------|-------------| +| `client_streams_active` | Gauge | Number of active gRPC streams to destination service | +| `client_updates_received_total` | Counter | Total updates received (Add/Remove/NoEndpoints) | +| `client_endpoints_current` | Gauge | Current number of endpoints for each service | +| `client_stream_errors_total` | Counter | Total stream errors (connection failures, etc.) | + +Labels: `target` (service FQDN), `request_type` (always "Get") + +### Scale Controller + +| Metric | Type | Description | +|--------|------|-------------| +| `churn_deployments_current_replicas` | Gauge | Current replica count for each deployment | +| `churn_scale_operations_total` | Counter | Total scale operations performed | +| `churn_hold_duration_seconds` | Histogram | Time spent holding at min/max replicas | + +Labels: `deployment`, `namespace`, `pattern` (oscillate/stable) diff --git a/test/destination-test/values/kube-prometheus-stack.yaml b/test/destination-test/values/kube-prometheus-stack.yaml new file mode 100644 index 0000000000000..8287bf22fa571 --- /dev/null +++ b/test/destination-test/values/kube-prometheus-stack.yaml @@ -0,0 +1,106 @@ +# kube-prometheus-stack configuration for load testing +# Minimal monitoring stack optimized for observing load tests + +prometheus: + prometheusSpec: + # Allow all ServiceMonitors/PodMonitors regardless of labels + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false + ruleSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + + enableRemoteWriteReceiver: true + retention: 24h + retentionSize: 2GiB + + storageSpec: + emptyDir: + medium: Memory + sizeLimit: 2Gi + + # Schedule on control plane nodes if available + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + + resources: + requests: + memory: 1Gi + cpu: 250m + limits: + memory: 2Gi + cpu: 1000m + + # Scrape Linkerd control plane metrics + additionalScrapeConfigs: + - job_name: linkerd-controller + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - linkerd + - linkerd-viz + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_port_name] + action: keep + regex: admin-http + - source_labels: [__meta_kubernetes_pod_container_name] + action: replace + target_label: component + + # Scrape destination load test controllers + - job_name: dst-load-controller + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - dst-test + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + action: keep + regex: (client|churn) + - source_labels: [__meta_kubernetes_pod_container_port_name] + action: keep + regex: admin-http + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + action: replace + target_label: component + +grafana: + enabled: true + adminPassword: admin + + persistence: + enabled: true + type: pvc + storageClassName: local-path + accessModes: [ReadWriteOnce] + size: 2Gi + + sidecar: + datasources: + enabled: true + isDefaultDatasource: true + uid: prometheus + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + + service: + type: NodePort + +# Disable components we don't need for load testing +alertmanager: + enabled: false + +nodeExporter: + enabled: false # Don't need node metrics for KWOK-based tests + +kubeStateMetrics: + enabled: true # Keep this for pod/service state diff --git a/test/destination-test/values/linkerd-monitoring.yaml b/test/destination-test/values/linkerd-monitoring.yaml new file mode 100644 index 0000000000000..5e01228ce4fb3 --- /dev/null +++ b/test/destination-test/values/linkerd-monitoring.yaml @@ -0,0 +1,17 @@ +# linkerd-monitoring configuration +# This chart provides Linkerd-specific Grafana dashboards and ServiceMonitors +# for kube-prometheus-stack integration + +# The chart automatically creates ServiceMonitors for: +# - linkerd-controller (destination, identity, proxy-injector) +# - linkerd-prometheus (if using linkerd-viz) +# - linkerd-proxy (via PodMonitors) + +# Additional configuration can be added here for custom dashboards +# See: https://github.com/olix0r/linkerd-monitoring + +# Future: Add custom dst-load-test dashboards here +# grafanaDashboards: +# dst-load-test: +# json: | +# { ... custom dashboard JSON ... } From 95cd33f12e4e9126c53743235b4ee2cd149d7458 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Tue, 17 Mar 2026 21:10:50 +0000 Subject: [PATCH 03/17] WIP Signed-off-by: Alex Leong --- Cargo.lock | 215 ++------------ Cargo.toml | 7 +- .../api/destination/endpoint_translator.go | 192 ++---------- .../destination/watcher/endpoints_watcher.go | 280 ++++++++++++++---- .../watcher/endpoints_watcher_test.go | 48 +++ 5 files changed, 324 insertions(+), 418 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 36e9bcfeebb89..d6cb86760855f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,56 +39,12 @@ dependencies = [ "libc", ] -[[package]] -name = "anstream" -version = "0.6.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - [[package]] name = "anstyle" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" -[[package]] -name = "anstyle-parse" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" -dependencies = [ - "windows-sys 0.61.0", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" -dependencies = [ - "anstyle", - "once_cell_polyfill", - "windows-sys 0.61.0", -] - [[package]] name = "anyhow" version = "1.0.102" @@ -318,10 +274,8 @@ version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ - "anstream", "anstyle", "clap_lex", - "strsim", ] [[package]] @@ -351,12 +305,6 @@ dependencies = [ "cc", ] -[[package]] -name = "colorchoice" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" - [[package]] name = "concurrent-queue" version = "2.5.0" @@ -518,27 +466,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "dst-load-controller" -version = "0.1.0" -dependencies = [ - "anyhow", - "clap", - "futures", - "k8s-openapi", - "kube", - "kubert", - "linkerd2-proxy-api", - "prometheus-client", - "rand 0.8.5", - "serde", - "serde_json", - "tokio", - "tonic", - "tracing", - "tracing-subscriber", -] - [[package]] name = "dtoa" version = "1.0.10" @@ -694,7 +621,6 @@ checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", - "futures-executor", "futures-io", "futures-sink", "futures-task", @@ -717,17 +643,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" -[[package]] -name = "futures-executor" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - [[package]] name = "futures-io" version = "0.3.32" @@ -1076,7 +991,7 @@ dependencies = [ "hyper", "libc", "pin-project-lite", - "socket2 0.6.2", + "socket2", "tokio", "tower-service", "tracing", @@ -1140,12 +1055,6 @@ dependencies = [ "serde", ] -[[package]] -name = "is_terminal_polyfill" -version = "1.70.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" - [[package]] name = "itertools" version = "0.14.0" @@ -1476,7 +1385,7 @@ dependencies = [ "linkerd2-proxy-api", "maplit", "prometheus-client", - "prost-types 0.14.3", + "prost-types", "serde", "serde_json", "tokio", @@ -1614,16 +1523,17 @@ dependencies = [ [[package]] name = "linkerd2-proxy-api" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb83fdbbcea49285182d75aacc20ced8ebce60030be1d72d87b00f58f07d267d" +checksum = "ba9e3b341ca4992feaf43a4d2bdbfe2081aa3e2b9a503753544ce55242af6342" dependencies = [ "http", "ipnet", - "prost 0.13.5", - "prost-types 0.13.5", + "prost", + "prost-types", "thiserror 2.0.18", "tonic", + "tonic-prost", ] [[package]] @@ -1715,12 +1625,6 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -[[package]] -name = "once_cell_polyfill" -version = "1.70.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" - [[package]] name = "openssl" version = "0.10.75" @@ -1973,16 +1877,6 @@ dependencies = [ "syn", ] -[[package]] -name = "prost" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" -dependencies = [ - "bytes", - "prost-derive 0.13.5", -] - [[package]] name = "prost" version = "0.14.3" @@ -1990,20 +1884,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", - "prost-derive 0.14.3", -] - -[[package]] -name = "prost-derive" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn", + "prost-derive", ] [[package]] @@ -2019,22 +1900,13 @@ dependencies = [ "syn", ] -[[package]] -name = "prost-types" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" -dependencies = [ - "prost 0.13.5", -] - [[package]] name = "prost-types" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ - "prost 0.14.3", + "prost", ] [[package]] @@ -2052,24 +1924,13 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha 0.9.0", + "rand_chacha", "rand_core 0.9.3", ] @@ -2084,16 +1945,6 @@ dependencies = [ "rand_core 0.10.0", ] -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - [[package]] name = "rand_chacha" version = "0.9.0" @@ -2104,15 +1955,6 @@ dependencies = [ "rand_core 0.9.3", ] -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.16", -] - [[package]] name = "rand_core" version = "0.9.3" @@ -2511,16 +2353,6 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" -[[package]] -name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - [[package]] name = "socket2" version = "0.6.2" @@ -2621,7 +2453,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.2", + "socket2", "tokio-macros", "windows-sys 0.61.0", ] @@ -2710,9 +2542,9 @@ dependencies = [ [[package]] name = "tonic" -version = "0.13.1" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" dependencies = [ "async-trait", "axum", @@ -2727,8 +2559,8 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost 0.13.5", - "socket2 0.5.10", + "socket2", + "sync_wrapper", "tokio", "tokio-stream", "tower", @@ -2737,6 +2569,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "tonic-prost" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" +dependencies = [ + "bytes", + "prost", + "tonic", +] + [[package]] name = "tower" version = "0.5.3" @@ -2926,12 +2769,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf8parse" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" - [[package]] name = "valuable" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 21e5b3fcf4909..f54cee5f841ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,6 @@ members = [ "policy-controller/k8s/status", "policy-controller/runtime", "policy-test", - "test/destination-test/dst-load-controller", ] [profile.release] @@ -23,7 +22,7 @@ k8s-openapi = { version = "0.25", features = ["v1_33"] } kube = { version = "1.1", default-features = false } kubert = { version = "0.25", default-features = false } prometheus-client = { version = "0.23", default-features = false } -tonic = { version = "0.13", default-features = false } +tonic = { version = "0.14", default-features = false } tower = { version = "0.5", default-features = false } linkerd-policy-controller = { path = "./policy-controller" } @@ -48,5 +47,5 @@ path = "./policy-controller/runtime" default-features = false [workspace.dependencies.linkerd2-proxy-api] -version = "0.17.0" -features = ["destination", "inbound", "outbound"] +version = "0.18.0" +features = ["inbound", "outbound"] diff --git a/controller/api/destination/endpoint_translator.go b/controller/api/destination/endpoint_translator.go index 079678f36fc97..c1066f9565cdf 100644 --- a/controller/api/destination/endpoint_translator.go +++ b/controller/api/destination/endpoint_translator.go @@ -3,7 +3,6 @@ package destination import ( "fmt" "net/netip" - "reflect" pb "github.com/linkerd/linkerd2-proxy-api/go/destination" "github.com/linkerd/linkerd2-proxy-api/go/net" @@ -49,12 +48,10 @@ type ( meshedHTTP2ClientParams *pb.Http2ClientParams - availableEndpoints watcher.AddressSet - filteredSnapshot watcher.AddressSet - stream pb.Destination_GetServer - endStream chan struct{} - log *logging.Entry - overflowCounter prometheus.Counter + stream pb.Destination_GetServer + endStream chan struct{} + log *logging.Entry + overflowCounter prometheus.Counter updates chan interface{} stop chan struct{} @@ -110,9 +107,6 @@ func newEndpointTranslator( if err != nil { log.Errorf("Failed to get node topology zone for node %s: %s", srcNodeName, err) } - availableEndpoints := newEmptyAddressSet() - - filteredSnapshot := newEmptyAddressSet() counter, err := updatesQueueOverflowCounter.GetMetricWith(prometheus.Labels{"service": service}) if err != nil { @@ -132,8 +126,6 @@ func newEndpointTranslator( extEndpointZoneWeights, meshedHTTP2ClientParams, - availableEndpoints, - filteredSnapshot, stream, endStream, log, @@ -147,6 +139,22 @@ func (et *endpointTranslator) Add(set watcher.AddressSet) { et.enqueueUpdate(&addUpdate{set}) } +func (et *endpointTranslator) NodeName() string { + return et.nodeName +} + +func (et *endpointTranslator) NodeTopologyZone() string { + return et.nodeTopologyZone +} + +func (et *endpointTranslator) EnableEndpointFiltering() bool { + return et.enableEndpointFiltering +} + +func (et *endpointTranslator) EnableIPv6() bool { + return et.enableIPv6 +} + func (et *endpointTranslator) Remove(set watcher.AddressSet) { et.enqueueUpdate(&removeUpdate{set}) } @@ -214,54 +222,18 @@ func (et *endpointTranslator) DrainAndStop() { func (et *endpointTranslator) processUpdate(update interface{}) { switch update := update.(type) { case *addUpdate: - et.add(update.set) + et.sendClientAdd(update.set) case *removeUpdate: - et.remove(update.set) + et.sendClientRemove(update.set) case *noEndpointsUpdate: et.noEndpoints(update.exists) } } -func (et *endpointTranslator) add(set watcher.AddressSet) { - for id, address := range set.Addresses { - et.availableEndpoints.Addresses[id] = address - } - - et.availableEndpoints.Labels = set.Labels - et.availableEndpoints.LocalTrafficPolicy = set.LocalTrafficPolicy - - et.sendFilteredUpdate() -} - -func (et *endpointTranslator) remove(set watcher.AddressSet) { - for id := range set.Addresses { - delete(et.availableEndpoints.Addresses, id) - } - - et.sendFilteredUpdate() -} - func (et *endpointTranslator) noEndpoints(exists bool) { et.log.Debugf("NoEndpoints(%+v)", exists) - et.availableEndpoints.Addresses = map[watcher.ID]*watcher.Address{} - - et.sendFilteredUpdate() -} - -func (et *endpointTranslator) sendFilteredUpdate() { - filtered := et.filterAddresses() - filtered = et.selectAddressFamily(filtered) - diffAdd, diffRemove := et.diffEndpoints(filtered) - - if len(diffAdd.Addresses) > 0 { - et.sendClientAdd(diffAdd) - } - if len(diffRemove.Addresses) > 0 { - et.sendClientRemove(diffRemove) - } - - et.filteredSnapshot = filtered + //et.sendFilteredUpdate() } func (et *endpointTranslator) selectAddressFamily(addresses watcher.AddressSet) watcher.AddressSet { @@ -291,124 +263,6 @@ func (et *endpointTranslator) selectAddressFamily(addresses watcher.AddressSet) } } -// filterAddresses is responsible for filtering endpoints based on the node's -// topology zone. The client will only receive endpoints with the same -// consumption zone as the node. An endpoints consumption zone is set -// by its Hints field and can be different than its actual Topology zone. -// when service.spec.internalTrafficPolicy is set to local, Topology Aware -// Hints are not used. -func (et *endpointTranslator) filterAddresses() watcher.AddressSet { - filtered := make(map[watcher.ID]*watcher.Address) - - // If endpoint filtering is disabled, return all available addresses. - if !et.enableEndpointFiltering { - for k, v := range et.availableEndpoints.Addresses { - filtered[k] = v - } - return watcher.AddressSet{ - Addresses: filtered, - Labels: et.availableEndpoints.Labels, - } - } - - // If service.spec.internalTrafficPolicy is set to local, filter and return the addresses - // for local node only - if et.availableEndpoints.LocalTrafficPolicy { - et.log.Debugf("Filtering through addresses that should be consumed by node %s", et.nodeName) - for id, address := range et.availableEndpoints.Addresses { - if address.Pod != nil && address.Pod.Spec.NodeName == et.nodeName { - filtered[id] = address - } - } - et.log.Debugf("Filtered from %d to %d addresses", len(et.availableEndpoints.Addresses), len(filtered)) - return watcher.AddressSet{ - Addresses: filtered, - Labels: et.availableEndpoints.Labels, - LocalTrafficPolicy: et.availableEndpoints.LocalTrafficPolicy, - } - } - // If any address does not have a hint, then all hints are ignored and all - // available addresses are returned. This replicates kube-proxy behavior - // documented in the KEP: https://github.com/kubernetes/enhancements/blob/master/keps/sig-network/2433-topology-aware-hints/README.md#kube-proxy - for _, address := range et.availableEndpoints.Addresses { - if len(address.ForZones) == 0 { - for k, v := range et.availableEndpoints.Addresses { - filtered[k] = v - } - et.log.Debugf("Hints not available on endpointslice. Zone Filtering disabled. Falling back to routing to all pods") - return watcher.AddressSet{ - Addresses: filtered, - Labels: et.availableEndpoints.Labels, - LocalTrafficPolicy: et.availableEndpoints.LocalTrafficPolicy, - } - } - } - - // Each address that has a hint matching the node's zone should be added - // to the set of addresses that will be returned. - et.log.Debugf("Filtering through addresses that should be consumed by zone %s", et.nodeTopologyZone) - for id, address := range et.availableEndpoints.Addresses { - for _, zone := range address.ForZones { - if zone.Name == et.nodeTopologyZone { - filtered[id] = address - } - } - } - if len(filtered) > 0 { - et.log.Debugf("Filtered from %d to %d addresses", len(et.availableEndpoints.Addresses), len(filtered)) - return watcher.AddressSet{ - Addresses: filtered, - Labels: et.availableEndpoints.Labels, - LocalTrafficPolicy: et.availableEndpoints.LocalTrafficPolicy, - } - } - - // If there were no filtered addresses, then fall to using endpoints from - // all zones. - for k, v := range et.availableEndpoints.Addresses { - filtered[k] = v - } - return watcher.AddressSet{ - Addresses: filtered, - Labels: et.availableEndpoints.Labels, - LocalTrafficPolicy: et.availableEndpoints.LocalTrafficPolicy, - } -} - -// diffEndpoints calculates the difference between the filtered set of -// endpoints in the current (Add/Remove) operation and the snapshot of -// previously filtered endpoints. This diff allows the client to receive only -// the endpoints that match the topological zone, by adding new endpoints and -// removing stale ones. -func (et *endpointTranslator) diffEndpoints(filtered watcher.AddressSet) (watcher.AddressSet, watcher.AddressSet) { - add := make(map[watcher.ID]*watcher.Address) - remove := make(map[watcher.ID]*watcher.Address) - - for id, new := range filtered.Addresses { - old, ok := et.filteredSnapshot.Addresses[id] - if !ok { - add[id] = new - } else if !reflect.DeepEqual(old, new) { - add[id] = new - } - } - - for id, address := range et.filteredSnapshot.Addresses { - if _, ok := filtered.Addresses[id]; !ok { - remove[id] = address - } - } - - return watcher.AddressSet{ - Addresses: add, - Labels: filtered.Labels, - }, - watcher.AddressSet{ - Addresses: remove, - Labels: filtered.Labels, - } -} - func (et *endpointTranslator) sendClientAdd(set watcher.AddressSet) { addrs := []*pb.WeightedAddr{} for _, address := range set.Addresses { diff --git a/controller/api/destination/watcher/endpoints_watcher.go b/controller/api/destination/watcher/endpoints_watcher.go index 415e883d902ca..a53e01ab62c62 100644 --- a/controller/api/destination/watcher/endpoints_watcher.go +++ b/controller/api/destination/watcher/endpoints_watcher.go @@ -77,6 +77,19 @@ type ( hostname string } + filterKey struct { + nodeName string + nodeTopologyZone string + enableEndpointFiltering bool + enableIPv6 bool + } + + filteredListenerGroup struct { + key filterKey + snapshot AddressSet + listeners []EndpointUpdateListener + } + // EndpointsWatcher watches all endpoints and services in the Kubernetes // cluster. Listeners can subscribe to a particular service and port and // EndpointsWatcher will publish the address set and all future changes for @@ -143,16 +156,19 @@ type ( enableEndpointSlices bool exists bool addresses AddressSet - listeners []EndpointUpdateListener + filteredListeners map[filterKey]*filteredListenerGroup metrics endpointsMetrics localTrafficPolicy bool } - // EndpointUpdateListener is the interface that subscribers must implement. EndpointUpdateListener interface { Add(set AddressSet) Remove(set AddressSet) NoEndpoints(exists bool) + NodeName() string + NodeTopologyZone() string + EnableEndpointFiltering() bool + EnableIPv6() bool } ) @@ -690,7 +706,7 @@ func (sp *servicePublisher) unsubscribe(srcPort Port, hostname string, listener port, ok := sp.ports[key] if ok { port.unsubscribe(listener) - if len(port.listeners) == 0 { + if port.totalListeners() == 0 { endpointsVecs.unregister(sp.metricsLabels(srcPort, hostname)) delete(sp.ports, key) } @@ -716,7 +732,7 @@ func (sp *servicePublisher) newPortPublisher(srcPort Port, hostname string) (*po return nil, err } port := &portPublisher{ - listeners: []EndpointUpdateListener{}, + filteredListeners: map[filterKey]*filteredListenerGroup{}, targetPort: targetPort, srcPort: srcPort, hostname: hostname, @@ -779,19 +795,9 @@ func (sp *servicePublisher) updateServer(oldServer, newServer *v1beta3.Server) { func (pp *portPublisher) updateEndpoints(endpoints *corev1.Endpoints) { newAddressSet := pp.endpointsToAddresses(endpoints) if len(newAddressSet.Addresses) == 0 { - for _, listener := range pp.listeners { - listener.NoEndpoints(true) - } + pp.publishNoEndpoints(true) } else { - add, remove := diffAddresses(pp.addresses, newAddressSet) - for _, listener := range pp.listeners { - if len(remove.Addresses) > 0 { - listener.Remove(remove) - } - if len(add.Addresses) > 0 { - listener.Add(add) - } - } + pp.publishAddressChange(newAddressSet) } pp.addresses = newAddressSet pp.exists = true @@ -808,12 +814,7 @@ func (pp *portPublisher) addEndpointSlice(slice *discovery.EndpointSlice) { } } - add, _ := diffAddresses(pp.addresses, newAddressSet) - if len(add.Addresses) > 0 { - for _, listener := range pp.listeners { - listener.Add(add) - } - } + pp.publishAddressChange(newAddressSet) // even if the ES doesn't have addresses yet we need to create a new // pp.addresses entry with the appropriate Labels and LocalTrafficPolicy, @@ -847,15 +848,7 @@ func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, updatedAddressSet.Addresses[id] = address } - add, remove := diffAddresses(pp.addresses, updatedAddressSet) - for _, listener := range pp.listeners { - if len(remove.Addresses) > 0 { - listener.Remove(remove) - } - if len(add.Addresses) > 0 { - listener.Add(add) - } - } + pp.publishAddressChange(updatedAddressSet) pp.addresses = updatedAddressSet pp.exists = true @@ -1226,9 +1219,7 @@ func (pp *portPublisher) resolveTargetPort(subset corev1.EndpointSubset) Port { func (pp *portPublisher) updateLocalTrafficPolicy(localTrafficPolicy bool) { pp.localTrafficPolicy = localTrafficPolicy pp.addresses.LocalTrafficPolicy = localTrafficPolicy - for _, listener := range pp.listeners { - listener.Add(pp.addresses.shallowCopy()) - } + pp.publishFilteredSnapshots() } func (pp *portPublisher) updatePort(targetPort namedPort) { @@ -1258,14 +1249,22 @@ func (pp *portPublisher) updatePort(targetPort namedPort) { } func (pp *portPublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { + updatedAddressSet := AddressSet{ + Addresses: make(map[ID]*Address), + Labels: pp.addresses.Labels, + LocalTrafficPolicy: pp.localTrafficPolicy, + } + for id, address := range pp.addresses.Addresses { + updatedAddressSet.Addresses[id] = address + } + addrSet := pp.endpointSliceToAddresses(es) for id := range addrSet.Addresses { - delete(pp.addresses.Addresses, id) + delete(updatedAddressSet.Addresses, id) } - for _, listener := range pp.listeners { - listener.Remove(addrSet) - } + pp.publishAddressChange(updatedAddressSet) + pp.addresses = updatedAddressSet if len(pp.addresses.Addresses) == 0 { pp.noEndpoints(false) @@ -1280,9 +1279,7 @@ func (pp *portPublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { func (pp *portPublisher) noEndpoints(exists bool) { pp.exists = exists pp.addresses = AddressSet{} - for _, listener := range pp.listeners { - listener.NoEndpoints(exists) - } + pp.publishNoEndpoints(exists) pp.metrics.incUpdates() pp.metrics.setExists(exists) @@ -1290,32 +1287,43 @@ func (pp *portPublisher) noEndpoints(exists bool) { } func (pp *portPublisher) subscribe(listener EndpointUpdateListener) { + group := pp.filteredListenerGroup(listener) if pp.exists { if len(pp.addresses.Addresses) > 0 { - listener.Add(pp.addresses.shallowCopy()) + filteredSet := group.filterAddresses(pp.addresses) + group.snapshot = filteredSet + if len(filteredSet.Addresses) > 0 { + listener.Add(filteredSet.shallowCopy()) + } } else { listener.NoEndpoints(true) } } else { listener.NoEndpoints(false) } - pp.listeners = append(pp.listeners, listener) + group.listeners = append(group.listeners, listener) - pp.metrics.setSubscribers(len(pp.listeners)) + pp.metrics.setSubscribers(pp.totalListeners()) } func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener) { - for i, e := range pp.listeners { - if e == listener { - n := len(pp.listeners) - pp.listeners[i] = pp.listeners[n-1] - pp.listeners[n-1] = nil - pp.listeners = pp.listeners[:n-1] - break + key := makeFilterKey(listener) + group, ok := pp.filteredListeners[key] + if ok { + for i, existing := range group.listeners { + if existing == listener { + n := len(group.listeners) + group.listeners[i] = group.listeners[n-1] + group.listeners[n-1] = nil + group.listeners = group.listeners[:n-1] + break + } + } + if len(group.listeners) == 0 { + delete(pp.filteredListeners, key) } } - - pp.metrics.setSubscribers(len(pp.listeners)) + pp.metrics.setSubscribers(pp.totalListeners()) } func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { updated := false @@ -1334,13 +1342,173 @@ func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { } } if updated { - for _, listener := range pp.listeners { - listener.Add(pp.addresses.shallowCopy()) - } + pp.publishFilteredSnapshots() pp.metrics.incUpdates() } } +func makeFilterKey(listener EndpointUpdateListener) filterKey { + return filterKey{ + nodeName: listener.NodeName(), + nodeTopologyZone: listener.NodeTopologyZone(), + enableEndpointFiltering: listener.EnableEndpointFiltering(), + enableIPv6: listener.EnableIPv6(), + } +} + +func (pp *portPublisher) filteredListenerGroup(listener EndpointUpdateListener) *filteredListenerGroup { + key := makeFilterKey(listener) + group, ok := pp.filteredListeners[key] + if !ok { + group = &filteredListenerGroup{ + key: key, + snapshot: AddressSet{Addresses: make(map[ID]*Address)}, + } + pp.filteredListeners[key] = group + } + return group +} + +func (pp *portPublisher) totalListeners() int { + total := 0 + for _, group := range pp.filteredListeners { + total += len(group.listeners) + } + return total +} + +func (pp *portPublisher) publishAddressChange(newAddressSet AddressSet) { + for _, group := range pp.filteredListeners { + group.publishDiff(newAddressSet) + } +} + +func (pp *portPublisher) publishFilteredSnapshots() { + for _, group := range pp.filteredListeners { + group.publishDiff(pp.addresses) + } +} + +func (pp *portPublisher) publishNoEndpoints(exists bool) { + for _, group := range pp.filteredListeners { + group.publishNoEndpoints(exists) + } +} + +func (group *filteredListenerGroup) publishDiff(addresses AddressSet) { + filtered := group.filterAddresses(addresses) + add, remove := diffAddresses(group.snapshot, filtered) + group.snapshot = filtered + + for _, listener := range group.listeners { + if len(remove.Addresses) > 0 { + listener.Remove(remove) + } + if len(add.Addresses) > 0 { + listener.Add(add) + } + } +} + +func (group *filteredListenerGroup) publishNoEndpoints(exists bool) { + remove := group.snapshot + group.snapshot = AddressSet{Addresses: make(map[ID]*Address)} + + for _, listener := range group.listeners { + if len(remove.Addresses) > 0 { + listener.Remove(remove) + } + listener.NoEndpoints(exists) + } +} + +func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) AddressSet { + filtered := make(map[ID]*Address) + + if !group.key.enableEndpointFiltering { + for k, v := range addresses.Addresses { + filtered[k] = v + } + return selectAddressFamily(AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + LocalTrafficPolicy: addresses.LocalTrafficPolicy, + }, group.key.enableIPv6) + } + + if addresses.LocalTrafficPolicy { + for id, address := range addresses.Addresses { + if address.Pod != nil && address.Pod.Spec.NodeName == group.key.nodeName { + filtered[id] = address + } + } + return selectAddressFamily(AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + LocalTrafficPolicy: addresses.LocalTrafficPolicy, + }, group.key.enableIPv6) + } + + for _, address := range addresses.Addresses { + if len(address.ForZones) == 0 { + for k, v := range addresses.Addresses { + filtered[k] = v + } + return selectAddressFamily(AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + LocalTrafficPolicy: addresses.LocalTrafficPolicy, + }, group.key.enableIPv6) + } + } + + for id, address := range addresses.Addresses { + for _, zone := range address.ForZones { + if zone.Name == group.key.nodeTopologyZone { + filtered[id] = address + break + } + } + } + + if len(filtered) == 0 { + for k, v := range addresses.Addresses { + filtered[k] = v + } + } + + return selectAddressFamily(AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + LocalTrafficPolicy: addresses.LocalTrafficPolicy, + }, group.key.enableIPv6) +} + +func selectAddressFamily(addresses AddressSet, enableIPv6 bool) AddressSet { + filtered := make(map[ID]*Address) + for id, addr := range addresses.Addresses { + if id.IPFamily == corev1.IPv6Protocol && !enableIPv6 { + continue + } + + if id.IPFamily == corev1.IPv4Protocol && enableIPv6 { + altID := id + altID.IPFamily = corev1.IPv6Protocol + if _, ok := addresses.Addresses[altID]; ok { + continue + } + } + + filtered[id] = addr + } + + return AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + LocalTrafficPolicy: addresses.LocalTrafficPolicy, + } +} + func (pp *portPublisher) isAddressSelected(address *Address, server *v1beta3.Server) bool { if server == nil { return false diff --git a/controller/api/destination/watcher/endpoints_watcher_test.go b/controller/api/destination/watcher/endpoints_watcher_test.go index 26db1b496b0b1..91c28ce2ff503 100644 --- a/controller/api/destination/watcher/endpoints_watcher_test.go +++ b/controller/api/destination/watcher/endpoints_watcher_test.go @@ -84,6 +84,10 @@ func (bel *bufferingEndpointListener) Add(set AddressSet) { bel.localTrafficPolicy = set.LocalTrafficPolicy } +func (bel *bufferingEndpointListener) AddFiltered(set AddressSet) { + bel.Add(set) +} + func (bel *bufferingEndpointListener) Remove(set AddressSet) { bel.Lock() defer bel.Unlock() @@ -93,6 +97,10 @@ func (bel *bufferingEndpointListener) Remove(set AddressSet) { bel.localTrafficPolicy = set.LocalTrafficPolicy } +func (bel *bufferingEndpointListener) RemoveFiltered(set AddressSet) { + bel.Remove(set) +} + func (bel *bufferingEndpointListener) NoEndpoints(exists bool) { bel.Lock() defer bel.Unlock() @@ -100,6 +108,22 @@ func (bel *bufferingEndpointListener) NoEndpoints(exists bool) { bel.noEndpointsExist = exists } +func (bel *bufferingEndpointListener) NodeName() string { + return "" +} + +func (bel *bufferingEndpointListener) NodeTopologyZone() string { + return "" +} + +func (bel *bufferingEndpointListener) EnableEndpointFiltering() bool { + return false +} + +func (bel *bufferingEndpointListener) EnableIPv6() bool { + return false +} + type bufferingEndpointListenerWithResVersion struct { added []string removed []string @@ -140,6 +164,10 @@ func (bel *bufferingEndpointListenerWithResVersion) Add(set AddressSet) { } } +func (bel *bufferingEndpointListenerWithResVersion) AddFiltered(set AddressSet) { + bel.Add(set) +} + func (bel *bufferingEndpointListenerWithResVersion) Remove(set AddressSet) { bel.Lock() defer bel.Unlock() @@ -148,8 +176,28 @@ func (bel *bufferingEndpointListenerWithResVersion) Remove(set AddressSet) { } } +func (bel *bufferingEndpointListenerWithResVersion) RemoveFiltered(set AddressSet) { + bel.Remove(set) +} + func (bel *bufferingEndpointListenerWithResVersion) NoEndpoints(exists bool) {} +func (bel *bufferingEndpointListenerWithResVersion) NodeName() string { + return "" +} + +func (bel *bufferingEndpointListenerWithResVersion) NodeTopologyZone() string { + return "" +} + +func (bel *bufferingEndpointListenerWithResVersion) EnableEndpointFiltering() bool { + return false +} + +func (bel *bufferingEndpointListenerWithResVersion) EnableIPv6() bool { + return false +} + func TestEndpointsWatcher(t *testing.T) { for _, tt := range []struct { serviceType string From b08fc505f680f4bc1b017e27efd0d43473d9be7f Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Thu, 9 Apr 2026 00:47:50 +0000 Subject: [PATCH 04/17] WIP Signed-off-by: Alex Leong --- Dockerfile.load-controller | 24 - .../api/destination/endpoint_translator.go | 67 - .../destination/federated_service_watcher.go | 48 +- controller/api/destination/server.go | 26 +- controller/api/destination/watcher/address.go | 60 + .../api/destination/watcher/cluster_store.go | 9 +- .../destination/watcher/endpoints_watcher.go | 1130 +---------------- .../watcher/filtered_listener_group.go | 138 ++ .../api/destination/watcher/port_publisher.go | 689 ++++++++++ .../api/destination/watcher/prometheus.go | 5 +- .../destination/watcher/service_publisher.go | 222 ++++ controller/cmd/destination/main.go | 2 +- test/destination-test/Dockerfile | 24 - test/destination-test/README.md | 247 ---- test/destination-test/chart/Chart.yaml | 12 - test/destination-test/chart/README.md | 127 -- .../chart/templates/_helpers.tpl | 49 - .../chart/templates/churn.yaml | 71 -- .../chart/templates/client.yaml | 134 -- .../chart/templates/rbac.yaml | 52 - .../chart/templates/test-services.yaml | 64 - test/destination-test/chart/values.yaml | 126 -- .../dst-load-controller/Cargo.toml | 36 - .../dst-load-controller/src/churn.rs | 227 ---- .../dst-load-controller/src/client.rs | 375 ------ .../dst-load-controller/src/main.rs | 281 ---- test/destination-test/hack/gen-certs.sh | 81 -- test/destination-test/hack/kwok-node.yaml | 42 - test/destination-test/helmfile.yaml.gotmpl | 118 -- test/destination-test/values/README.md | 153 --- .../values/kube-prometheus-stack.yaml | 106 -- .../values/linkerd-monitoring.yaml | 17 - 32 files changed, 1191 insertions(+), 3571 deletions(-) delete mode 100644 Dockerfile.load-controller create mode 100644 controller/api/destination/watcher/address.go create mode 100644 controller/api/destination/watcher/filtered_listener_group.go create mode 100644 controller/api/destination/watcher/port_publisher.go create mode 100644 controller/api/destination/watcher/service_publisher.go delete mode 100644 test/destination-test/Dockerfile delete mode 100644 test/destination-test/README.md delete mode 100644 test/destination-test/chart/Chart.yaml delete mode 100644 test/destination-test/chart/README.md delete mode 100644 test/destination-test/chart/templates/_helpers.tpl delete mode 100644 test/destination-test/chart/templates/churn.yaml delete mode 100644 test/destination-test/chart/templates/client.yaml delete mode 100644 test/destination-test/chart/templates/rbac.yaml delete mode 100644 test/destination-test/chart/templates/test-services.yaml delete mode 100644 test/destination-test/chart/values.yaml delete mode 100644 test/destination-test/dst-load-controller/Cargo.toml delete mode 100644 test/destination-test/dst-load-controller/src/churn.rs delete mode 100644 test/destination-test/dst-load-controller/src/client.rs delete mode 100644 test/destination-test/dst-load-controller/src/main.rs delete mode 100755 test/destination-test/hack/gen-certs.sh delete mode 100644 test/destination-test/hack/kwok-node.yaml delete mode 100644 test/destination-test/helmfile.yaml.gotmpl delete mode 100644 test/destination-test/values/README.md delete mode 100644 test/destination-test/values/kube-prometheus-stack.yaml delete mode 100644 test/destination-test/values/linkerd-monitoring.yaml diff --git a/Dockerfile.load-controller b/Dockerfile.load-controller deleted file mode 100644 index 9df405578f15b..0000000000000 --- a/Dockerfile.load-controller +++ /dev/null @@ -1,24 +0,0 @@ -# Build stage -FROM docker.io/rust:1.90-bookworm AS build - -WORKDIR /build - -RUN mkdir -p target/bin -COPY Cargo.toml Cargo.lock . -COPY policy-controller ./policy-controller -COPY policy-test ./policy-test -COPY test ./test - -# Build the binary -RUN cargo build --release --bin dst-load-controller - -# Runtime stage -FROM debian:bookworm-slim - -RUN apt-get update && apt-get install -y \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -COPY --from=build /build/target/release/dst-load-controller /usr/local/bin/dst-load-controller - -ENTRYPOINT ["/usr/local/bin/dst-load-controller"] diff --git a/controller/api/destination/endpoint_translator.go b/controller/api/destination/endpoint_translator.go index c1066f9565cdf..e5686ed02bb17 100644 --- a/controller/api/destination/endpoint_translator.go +++ b/controller/api/destination/endpoint_translator.go @@ -36,13 +36,10 @@ type ( controllerNS string identityTrustDomain string nodeTopologyZone string - nodeName string defaultOpaquePorts map[uint32]struct{} forceOpaqueTransport, enableH2Upgrade, - enableEndpointFiltering, - enableIPv6, extEndpointZoneWeights bool @@ -64,10 +61,6 @@ type ( removeUpdate struct { set watcher.AddressSet } - - noEndpointsUpdate struct { - exists bool - } ) var updatesQueueOverflowCounter = promauto.NewCounterVec( @@ -85,8 +78,6 @@ func newEndpointTranslator( identityTrustDomain string, forceOpaqueTransport, enableH2Upgrade, - enableEndpointFiltering, - enableIPv6, extEndpointZoneWeights bool, meshedHTTP2ClientParams *pb.Http2ClientParams, service string, @@ -117,12 +108,9 @@ func newEndpointTranslator( controllerNS, identityTrustDomain, nodeTopologyZone, - srcNodeName, defaultOpaquePorts, forceOpaqueTransport, enableH2Upgrade, - enableEndpointFiltering, - enableIPv6, extEndpointZoneWeights, meshedHTTP2ClientParams, @@ -139,30 +127,10 @@ func (et *endpointTranslator) Add(set watcher.AddressSet) { et.enqueueUpdate(&addUpdate{set}) } -func (et *endpointTranslator) NodeName() string { - return et.nodeName -} - -func (et *endpointTranslator) NodeTopologyZone() string { - return et.nodeTopologyZone -} - -func (et *endpointTranslator) EnableEndpointFiltering() bool { - return et.enableEndpointFiltering -} - -func (et *endpointTranslator) EnableIPv6() bool { - return et.enableIPv6 -} - func (et *endpointTranslator) Remove(set watcher.AddressSet) { et.enqueueUpdate(&removeUpdate{set}) } -func (et *endpointTranslator) NoEndpoints(exists bool) { - et.enqueueUpdate(&noEndpointsUpdate{exists}) -} - // Add, Remove, and NoEndpoints are called from a client-go informer callback // and therefore must not block. For each of these, we enqueue an update in // a channel so that it can be processed asyncronously. To ensure that enqueuing @@ -225,41 +193,6 @@ func (et *endpointTranslator) processUpdate(update interface{}) { et.sendClientAdd(update.set) case *removeUpdate: et.sendClientRemove(update.set) - case *noEndpointsUpdate: - et.noEndpoints(update.exists) - } -} - -func (et *endpointTranslator) noEndpoints(exists bool) { - et.log.Debugf("NoEndpoints(%+v)", exists) - - //et.sendFilteredUpdate() -} - -func (et *endpointTranslator) selectAddressFamily(addresses watcher.AddressSet) watcher.AddressSet { - filtered := make(map[watcher.ID]*watcher.Address) - for id, addr := range addresses.Addresses { - if id.IPFamily == corev1.IPv6Protocol && !et.enableIPv6 { - continue - } - - if id.IPFamily == corev1.IPv4Protocol && et.enableIPv6 { - // Only consider IPv4 address for which there's not already an IPv6 - // alternative - altID := id - altID.IPFamily = corev1.IPv6Protocol - if _, ok := addresses.Addresses[altID]; ok { - continue - } - } - - filtered[id] = addr - } - - return watcher.AddressSet{ - Addresses: filtered, - Labels: addresses.Labels, - LocalTrafficPolicy: addresses.LocalTrafficPolicy, } } diff --git a/controller/api/destination/federated_service_watcher.go b/controller/api/destination/federated_service_watcher.go index 96bbd22654e27..5a8a6a03f8135 100644 --- a/controller/api/destination/federated_service_watcher.go +++ b/controller/api/destination/federated_service_watcher.go @@ -266,17 +266,27 @@ func (fs *federatedService) delete() { defer fs.Unlock() for _, subscriber := range fs.subscribers { + remoteFilterKey := watcher.FilterKey{ + Hostname: subscriber.instanceID, + NodeName: subscriber.nodeName, + EnableEndpointFiltering: false, + } for id, translator := range subscriber.remoteTranslators { remoteWatcher, _, found := fs.clusterStore.Get(id.cluster) if !found { fs.log.Errorf("Failed to get remote cluster %s", id.cluster) continue } - remoteWatcher.Unsubscribe(id.service, subscriber.port, subscriber.instanceID, translator) + remoteWatcher.Unsubscribe(id.service, subscriber.port, remoteFilterKey, translator) translator.Stop() } + localFilterKey := watcher.FilterKey{ + Hostname: subscriber.instanceID, + NodeName: subscriber.nodeName, + EnableEndpointFiltering: true, // Endpoint filtering is enabled for local discovery. + } for localDiscovery, translator := range subscriber.localTranslators { - fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, subscriber.instanceID, translator) + fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, localFilterKey, translator) translator.Stop() } close(subscriber.endStream) @@ -353,8 +363,6 @@ func (fs *federatedService) remoteDiscoverySubscribe( remoteConfig.TrustDomain, fs.config.ForceOpaqueTransport, fs.config.EnableH2Upgrade, - false, // Disable endpoint filtering for remote discovery. - fs.config.EnableIPv6, fs.config.ExtEndpointZoneWeights, fs.config.MeshedHttp2ClientParams, fmt.Sprintf("%s.%s.svc.%s:%d", id.service, fs.namespace, remoteConfig.ClusterDomain, subscriber.port), @@ -375,7 +383,12 @@ func (fs *federatedService) remoteDiscoverySubscribe( subscriber.remoteTranslators[id] = translator fs.log.Debugf("Subscribing to remote discovery service %s in cluster %s", id.service, id.cluster) - err = remoteWatcher.Subscribe(watcher.ServiceID{Namespace: id.service.Namespace, Name: id.service.Name}, subscriber.port, subscriber.instanceID, translator) + filterKey := watcher.FilterKey{ + Hostname: subscriber.instanceID, + NodeName: subscriber.nodeName, + EnableEndpointFiltering: false, // Endpoint filtering is disabled for remote discovery. + } + err = remoteWatcher.Subscribe(watcher.ServiceID{Namespace: id.service.Namespace, Name: id.service.Name}, subscriber.port, filterKey, translator) if err != nil { fs.log.Errorf("Failed to subscribe to remote discovery service %q in cluster %s: %s", id.service.Name, id.cluster, err) } @@ -393,8 +406,12 @@ func (fs *federatedService) remoteDiscoveryUnsubscribe( translator := subscriber.remoteTranslators[id] fs.log.Debugf("Unsubscribing from remote discovery service %s in cluster %s", id.service, id.cluster) - remoteWatcher.Unsubscribe(id.service, subscriber.port, subscriber.instanceID, translator) - translator.NoEndpoints(true) + filterKey := watcher.FilterKey{ + Hostname: subscriber.instanceID, + NodeName: subscriber.nodeName, + EnableEndpointFiltering: false, // Endpoint filtering is disabled for remote discovery. + } + remoteWatcher.Unsubscribe(id.service, subscriber.port, filterKey, translator) translator.DrainAndStop() delete(subscriber.remoteTranslators, id) } @@ -408,8 +425,6 @@ func (fs *federatedService) localDiscoverySubscribe( fs.config.IdentityTrustDomain, fs.config.ForceOpaqueTransport, fs.config.EnableH2Upgrade, - true, - fs.config.EnableIPv6, fs.config.ExtEndpointZoneWeights, fs.config.MeshedHttp2ClientParams, localDiscovery, @@ -429,7 +444,12 @@ func (fs *federatedService) localDiscoverySubscribe( subscriber.localTranslators[localDiscovery] = translator fs.log.Debugf("Subscribing to local discovery service %s", localDiscovery) - err = fs.localEndpoints.Subscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, subscriber.instanceID, translator) + filterKey := watcher.FilterKey{ + Hostname: subscriber.instanceID, + NodeName: subscriber.nodeName, + EnableEndpointFiltering: true, // Endpoint filtering is enabled for local discovery. + } + err = fs.localEndpoints.Subscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, filterKey, translator) if err != nil { fs.log.Errorf("Failed to subscribe to %s: %s", localDiscovery, err) } @@ -442,8 +462,12 @@ func (fs *federatedService) localDiscoveryUnsubscribe( translator, found := subscriber.localTranslators[localDiscovery] if found { fs.log.Debugf("Unsubscribing to local discovery service %s", localDiscovery) - fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, subscriber.instanceID, translator) - translator.NoEndpoints(true) + filterKey := watcher.FilterKey{ + Hostname: subscriber.instanceID, + NodeName: subscriber.nodeName, + EnableEndpointFiltering: true, // Endpoint filtering is enabled for local discovery. + } + fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, filterKey, translator) translator.DrainAndStop() delete(subscriber.localTranslators, localDiscovery) } diff --git a/controller/api/destination/server.go b/controller/api/destination/server.go index 62131b729552d..1657d482dde02 100644 --- a/controller/api/destination/server.go +++ b/controller/api/destination/server.go @@ -101,7 +101,7 @@ func NewServer( if err != nil { return nil, err } - endpoints, err := watcher.NewEndpointsWatcher(k8sAPI, metadataAPI, log, config.EnableEndpointSlices, "local") + endpoints, err := watcher.NewEndpointsWatcher(k8sAPI, metadataAPI, log, config.EnableEndpointSlices, config.EnableIPv6, "local") if err != nil { return nil, err } @@ -214,8 +214,6 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e remoteConfig.TrustDomain, s.config.ForceOpaqueTransport, s.config.EnableH2Upgrade, - false, // Disable endpoint filtering for remote discovery. - s.config.EnableIPv6, s.config.ExtEndpointZoneWeights, s.config.MeshedHttp2ClientParams, fmt.Sprintf("%s.%s.svc.%s:%d", remoteSvc, service.Namespace, remoteConfig.ClusterDomain, port), @@ -233,7 +231,13 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e translator.Start() defer translator.Stop() - err = remoteWatcher.Subscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, instanceID, translator) + filterKey := watcher.FilterKey{ + Hostname: instanceID, + NodeName: token.NodeName, + EnableEndpointFiltering: false, // Disable endpoint filtering for remote discovery. + } + + err = remoteWatcher.Subscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, filterKey, translator) if err != nil { var ise watcher.InvalidService if errors.As(err, &ise) { @@ -243,7 +247,7 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e log.Errorf("Failed to subscribe to remote discovery service %q in cluster %s: %s", dest.GetPath(), cluster, err) return err } - defer remoteWatcher.Unsubscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, instanceID, translator) + defer remoteWatcher.Unsubscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, filterKey, translator) } else { log.Debug("Local discovery service detected") @@ -253,8 +257,6 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e s.config.IdentityTrustDomain, s.config.ForceOpaqueTransport, s.config.EnableH2Upgrade, - true, - s.config.EnableIPv6, s.config.ExtEndpointZoneWeights, s.config.MeshedHttp2ClientParams, dest.GetPath(), @@ -272,7 +274,13 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e translator.Start() defer translator.Stop() - err = s.endpoints.Subscribe(service, port, instanceID, translator) + filterKey := watcher.FilterKey{ + Hostname: instanceID, + NodeName: token.NodeName, + EnableEndpointFiltering: true, // Enable endpoint filtering for local discovery. + } + + err = s.endpoints.Subscribe(service, port, filterKey, translator) if err != nil { var ise watcher.InvalidService if errors.As(err, &ise) { @@ -282,7 +290,7 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e log.Errorf("Failed to subscribe to %s: %s", dest.GetPath(), err) return err } - defer s.endpoints.Unsubscribe(service, port, instanceID, translator) + defer s.endpoints.Unsubscribe(service, port, filterKey, translator) } select { diff --git a/controller/api/destination/watcher/address.go b/controller/api/destination/watcher/address.go new file mode 100644 index 0000000000000..1b388a7a14f39 --- /dev/null +++ b/controller/api/destination/watcher/address.go @@ -0,0 +1,60 @@ +package watcher + +import ( + ewv1beta1 "github.com/linkerd/linkerd2/controller/gen/apis/externalworkload/v1beta1" + corev1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1" +) + +type ( + // Address represents an individual port on a specific endpoint. + // This endpoint might be the result of a the existence of a pod + // that is targeted by this service; alternatively it can be the + // case that this endpoint is not associated with a pod and maps + // to some other IP (i.e. a remote service gateway) + Address struct { + IP string + Port Port + Pod *corev1.Pod + ExternalWorkload *ewv1beta1.ExternalWorkload + OwnerName string + OwnerKind string + Identity string + AuthorityOverride string + Zone *string + ForZones []discovery.ForZone + OpaqueProtocol bool + } + + // AddressSet is a set of Address, indexed by ID. + // The ID can be either: + // 1) A reference to service: id.Name contains both the service name and + // the target IP and port (see newServiceRefAddress) + // 2) A reference to a pod: id.Name refers to the pod's name, and + // id.IPFamily refers to the ES AddressType (see newPodRefAddress). + // 3) A reference to an ExternalWorkload: id.Name refers to the EW's name. + AddressSet struct { + Addresses map[ID]*Address + Labels map[string]string + } +) + +// shallowCopy returns a shallow copy of addr, in the sense that the Pod and +// ExternalWorkload fields of the Addresses map values still point to the +// locations of the original variable +func (addr AddressSet) shallowCopy() AddressSet { + addresses := make(map[ID]*Address) + for k, v := range addr.Addresses { + addresses[k] = v + } + + labels := make(map[string]string) + for k, v := range addr.Labels { + labels[k] = v + } + + return AddressSet{ + Addresses: addresses, + Labels: labels, + } +} diff --git a/controller/api/destination/watcher/cluster_store.go b/controller/api/destination/watcher/cluster_store.go index c8161e217420e..789d43415b370 100644 --- a/controller/api/destination/watcher/cluster_store.go +++ b/controller/api/destination/watcher/cluster_store.go @@ -29,6 +29,7 @@ type ( api *k8s.API store map[string]remoteCluster enableEndpointSlices bool + enableIPv6 bool log *logging.Entry // Function used to parse a kubeconfig from a byte buffer. Based on the @@ -69,8 +70,8 @@ const ( // When created, a pair of event handlers are registered for the local cluster's // Secret informer. The event handlers are responsible for driving the discovery // of remote clusters and their configuration -func NewClusterStore(client kubernetes.Interface, namespace string, enableEndpointSlices bool) (*ClusterStore, error) { - return NewClusterStoreWithDecoder(client, namespace, enableEndpointSlices, decodeK8sConfigFromSecret, prometheus.DefaultRegisterer) +func NewClusterStore(client kubernetes.Interface, namespace string, enableEndpointSlices bool, enableIPv6 bool) (*ClusterStore, error) { + return NewClusterStoreWithDecoder(client, namespace, enableEndpointSlices, enableIPv6, decodeK8sConfigFromSecret, prometheus.DefaultRegisterer) } func (cs *ClusterStore) Sync(stopCh <-chan struct{}) { @@ -81,7 +82,7 @@ func (cs *ClusterStore) Sync(stopCh <-chan struct{}) { // store with an arbitrary `configDecoder` function. func NewClusterStoreWithDecoder( client kubernetes.Interface, - namespace string, enableEndpointSlices bool, + namespace string, enableEndpointSlices bool, enableIPv6 bool, decodeFn configDecoder, prom prometheus.Registerer, ) (*ClusterStore, error) { @@ -93,6 +94,7 @@ func NewClusterStoreWithDecoder( "component": "cluster-store", }), enableEndpointSlices: enableEndpointSlices, + enableIPv6: enableIPv6, api: api, decodeFn: decodeFn, } @@ -228,6 +230,7 @@ func (cs *ClusterStore) addCluster(clusterName string, secret *v1.Secret) error "remote-cluster": clusterName, }), cs.enableEndpointSlices, + cs.enableIPv6, clusterName, ) if err != nil { diff --git a/controller/api/destination/watcher/endpoints_watcher.go b/controller/api/destination/watcher/endpoints_watcher.go index a53e01ab62c62..c7c45d90cd533 100644 --- a/controller/api/destination/watcher/endpoints_watcher.go +++ b/controller/api/destination/watcher/endpoints_watcher.go @@ -1,24 +1,16 @@ package watcher import ( - "context" "fmt" - "net" "sort" - "strconv" - "strings" "sync" "time" - ewv1beta1 "github.com/linkerd/linkerd2/controller/gen/apis/externalworkload/v1beta1" "github.com/linkerd/linkerd2/controller/gen/apis/server/v1beta3" "github.com/linkerd/linkerd2/controller/k8s" - consts "github.com/linkerd/linkerd2/pkg/k8s" - "github.com/prometheus/client_golang/prometheus" logging "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" @@ -40,55 +32,6 @@ const endpointTargetRefPod = "Pod" const endpointTargetRefExternalWorkload = "ExternalWorkload" type ( - // Address represents an individual port on a specific endpoint. - // This endpoint might be the result of a the existence of a pod - // that is targeted by this service; alternatively it can be the - // case that this endpoint is not associated with a pod and maps - // to some other IP (i.e. a remote service gateway) - Address struct { - IP string - Port Port - Pod *corev1.Pod - ExternalWorkload *ewv1beta1.ExternalWorkload - OwnerName string - OwnerKind string - Identity string - AuthorityOverride string - Zone *string - ForZones []discovery.ForZone - OpaqueProtocol bool - } - - // AddressSet is a set of Address, indexed by ID. - // The ID can be either: - // 1) A reference to service: id.Name contains both the service name and - // the target IP and port (see newServiceRefAddress) - // 2) A reference to a pod: id.Name refers to the pod's name, and - // id.IPFamily refers to the ES AddressType (see newPodRefAddress). - // 3) A reference to an ExternalWorkload: id.Name refers to the EW's name. - AddressSet struct { - Addresses map[ID]*Address - Labels map[string]string - LocalTrafficPolicy bool - } - - portAndHostname struct { - port Port - hostname string - } - - filterKey struct { - nodeName string - nodeTopologyZone string - enableEndpointFiltering bool - enableIPv6 bool - } - - filteredListenerGroup struct { - key filterKey - snapshot AddressSet - listeners []EndpointUpdateListener - } // EndpointsWatcher watches all endpoints and services in the Kubernetes // cluster. Listeners can subscribe to a particular service and port and @@ -102,6 +45,7 @@ type ( cluster string log *logging.Entry enableEndpointSlices bool + enableIPv6 bool sync.RWMutex // This mutex protects modification of the map itself. informerHandlers @@ -117,58 +61,15 @@ type ( srvHandle cache.ResourceEventHandlerRegistration } - // servicePublisher represents a service. It keeps a map of portPublishers - // keyed by port and hostname. This is because each watch on a service - // will have a port and optionally may specify a hostname. The port - // and hostname will influence the endpoint set which is why a separate - // portPublisher is required for each port and hostname combination. The - // service's port mapping will be applied to the requested port and the - // mapped port will be used in the addresses set. If a hostname is - // requested, the address set will be filtered to only include addresses - // with the requested hostname. - servicePublisher struct { - id ServiceID - log *logging.Entry - k8sAPI *k8s.API - metadataAPI *k8s.MetadataAPI - enableEndpointSlices bool - localTrafficPolicy bool - cluster string - ports map[portAndHostname]*portPublisher - // All access to the servicePublisher and its portPublishers is explicitly synchronized by - // this mutex. - sync.Mutex - } - - // portPublisher represents a service along with a port and optionally a - // hostname. Multiple listeners may be subscribed to a portPublisher. - // portPublisher maintains the current state of the address set and - // publishes diffs to all listeners when updates come from either the - // endpoints API or the service API. - portPublisher struct { - id ServiceID - targetPort namedPort - srcPort Port - hostname string - log *logging.Entry - k8sAPI *k8s.API - metadataAPI *k8s.MetadataAPI - enableEndpointSlices bool - exists bool - addresses AddressSet - filteredListeners map[filterKey]*filteredListenerGroup - metrics endpointsMetrics - localTrafficPolicy bool - } - EndpointUpdateListener interface { Add(set AddressSet) Remove(set AddressSet) - NoEndpoints(exists bool) - NodeName() string - NodeTopologyZone() string - EnableEndpointFiltering() bool - EnableIPv6() bool + } + + FilterKey struct { + EnableEndpointFiltering bool + NodeName string + Hostname string } ) @@ -176,36 +77,16 @@ var endpointsVecs = newEndpointsMetricsVecs() var undefinedEndpointPort = Port(0) -// shallowCopy returns a shallow copy of addr, in the sense that the Pod and -// ExternalWorkload fields of the Addresses map values still point to the -// locations of the original variable -func (addr AddressSet) shallowCopy() AddressSet { - addresses := make(map[ID]*Address) - for k, v := range addr.Addresses { - addresses[k] = v - } - - labels := make(map[string]string) - for k, v := range addr.Labels { - labels[k] = v - } - - return AddressSet{ - Addresses: addresses, - Labels: labels, - LocalTrafficPolicy: addr.LocalTrafficPolicy, - } -} - // NewEndpointsWatcher creates an EndpointsWatcher and begins watching the // k8sAPI for pod, service, and endpoint changes. An EndpointsWatcher will // watch on Endpoints or EndpointSlice resources, depending on cluster configuration. -func NewEndpointsWatcher(k8sAPI *k8s.API, metadataAPI *k8s.MetadataAPI, log *logging.Entry, enableEndpointSlices bool, cluster string) (*EndpointsWatcher, error) { +func NewEndpointsWatcher(k8sAPI *k8s.API, metadataAPI *k8s.MetadataAPI, log *logging.Entry, enableEndpointSlices bool, enableIPv6 bool, cluster string) (*EndpointsWatcher, error) { ew := &EndpointsWatcher{ publishers: make(map[ServiceID]*servicePublisher), k8sAPI: k8sAPI, metadataAPI: metadataAPI, enableEndpointSlices: enableEndpointSlices, + enableIPv6: enableIPv6, cluster: cluster, log: log.WithFields(logging.Fields{ "component": "endpoints-watcher", @@ -263,29 +144,29 @@ func NewEndpointsWatcher(k8sAPI *k8s.API, metadataAPI *k8s.MetadataAPI, log *log // Subscribe to an authority. // The provided listener will be updated each time the address set for the // given authority is changed. -func (ew *EndpointsWatcher) Subscribe(id ServiceID, port Port, hostname string, listener EndpointUpdateListener) error { +func (ew *EndpointsWatcher) Subscribe(id ServiceID, port Port, filterKey FilterKey, listener EndpointUpdateListener) error { svc, _ := ew.k8sAPI.Svc().Lister().Services(id.Namespace).Get(id.Name) if svc != nil && svc.Spec.Type == corev1.ServiceTypeExternalName { return invalidService(id.String()) } - if hostname == "" { + if filterKey.Hostname == "" { ew.log.Debugf("Establishing watch on endpoint [%s:%d]", id, port) } else { - ew.log.Debugf("Establishing watch on endpoint [%s.%s:%d]", hostname, id, port) + ew.log.Debugf("Establishing watch on endpoint [%s.%s:%d]", filterKey.Hostname, id, port) } sp := ew.getOrNewServicePublisher(id) - return sp.subscribe(port, hostname, listener) + return sp.subscribe(port, listener, filterKey) } // Unsubscribe removes a listener from the subscribers list for this authority. -func (ew *EndpointsWatcher) Unsubscribe(id ServiceID, port Port, hostname string, listener EndpointUpdateListener) { - if hostname == "" { +func (ew *EndpointsWatcher) Unsubscribe(id ServiceID, port Port, filterKey FilterKey, listener EndpointUpdateListener) { + if filterKey.Hostname == "" { ew.log.Debugf("Stopping watch on endpoint [%s:%d]", id, port) } else { - ew.log.Debugf("Stopping watch on endpoint [%s.%s:%d]", hostname, id, port) + ew.log.Debugf("Stopping watch on endpoint [%s.%s:%d]", filterKey.Hostname, id, port) } sp, ok := ew.getServicePublisher(id) @@ -293,7 +174,7 @@ func (ew *EndpointsWatcher) Unsubscribe(id ServiceID, port Port, hostname string ew.log.Errorf("Cannot unsubscribe from unknown service [%s:%d]", id, port) return } - sp.unsubscribe(port, hostname, listener) + sp.unsubscribe(port, listener, filterKey) } // removeHandlers will de-register any event handlers used by the @@ -533,7 +414,7 @@ func (ew *EndpointsWatcher) getOrNewServicePublisher(id ServiceID) *servicePubli k8sAPI: ew.k8sAPI, metadataAPI: ew.metadataAPI, cluster: ew.cluster, - ports: make(map[portAndHostname]*portPublisher), + ports: make(map[Port]*portPublisher), enableEndpointSlices: ew.enableEndpointSlices, } ew.publishers[id] = sp @@ -597,976 +478,6 @@ func (ew *EndpointsWatcher) deleteServer(obj interface{}) { } } -//////////////////////// -/// servicePublisher /// -//////////////////////// - -func (sp *servicePublisher) updateEndpoints(newEndpoints *corev1.Endpoints) { - sp.Lock() - defer sp.Unlock() - sp.log.Debugf("Updating endpoints for %s", sp.id) - for _, port := range sp.ports { - port.updateEndpoints(newEndpoints) - } -} - -func (sp *servicePublisher) deleteEndpoints() { - sp.Lock() - defer sp.Unlock() - sp.log.Debugf("Deleting endpoints for %s", sp.id) - for _, port := range sp.ports { - port.noEndpoints(false) - } -} - -func (sp *servicePublisher) addEndpointSlice(newSlice *discovery.EndpointSlice) { - sp.Lock() - defer sp.Unlock() - - sp.log.Debugf("Adding ES %s/%s", newSlice.Namespace, newSlice.Name) - for _, port := range sp.ports { - port.addEndpointSlice(newSlice) - } -} - -func (sp *servicePublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, newSlice *discovery.EndpointSlice) { - sp.Lock() - defer sp.Unlock() - - sp.log.Debugf("Updating ES %s/%s", oldSlice.Namespace, oldSlice.Name) - for _, port := range sp.ports { - port.updateEndpointSlice(oldSlice, newSlice) - } -} - -func (sp *servicePublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { - sp.Lock() - defer sp.Unlock() - - sp.log.Debugf("Deleting ES %s/%s", es.Namespace, es.Name) - for _, port := range sp.ports { - port.deleteEndpointSlice(es) - } -} - -func (sp *servicePublisher) updateService(newService *corev1.Service) { - sp.Lock() - defer sp.Unlock() - sp.log.Debugf("Updating service for %s", sp.id) - - // set localTrafficPolicy to true if InternalTrafficPolicy is set to local - if newService.Spec.InternalTrafficPolicy != nil { - sp.localTrafficPolicy = *newService.Spec.InternalTrafficPolicy == corev1.ServiceInternalTrafficPolicyLocal - } else { - sp.localTrafficPolicy = false - } - - for key, port := range sp.ports { - newTargetPort := getTargetPort(newService, key.port) - if newTargetPort != port.targetPort { - port.updatePort(newTargetPort) - } - // update service endpoints with new localTrafficPolicy - if port.localTrafficPolicy != sp.localTrafficPolicy { - port.updateLocalTrafficPolicy(sp.localTrafficPolicy) - } - } - -} - -func (sp *servicePublisher) subscribe(srcPort Port, hostname string, listener EndpointUpdateListener) error { - sp.Lock() - defer sp.Unlock() - - key := portAndHostname{ - port: srcPort, - hostname: hostname, - } - port, ok := sp.ports[key] - if !ok { - var err error - port, err = sp.newPortPublisher(srcPort, hostname) - if err != nil { - return err - } - sp.ports[key] = port - } - port.subscribe(listener) - return nil -} - -func (sp *servicePublisher) unsubscribe(srcPort Port, hostname string, listener EndpointUpdateListener) { - sp.Lock() - defer sp.Unlock() - - key := portAndHostname{ - port: srcPort, - hostname: hostname, - } - port, ok := sp.ports[key] - if ok { - port.unsubscribe(listener) - if port.totalListeners() == 0 { - endpointsVecs.unregister(sp.metricsLabels(srcPort, hostname)) - delete(sp.ports, key) - } - } -} - -func (sp *servicePublisher) newPortPublisher(srcPort Port, hostname string) (*portPublisher, error) { - targetPort := intstr.FromInt(int(srcPort)) - svc, err := sp.k8sAPI.Svc().Lister().Services(sp.id.Namespace).Get(sp.id.Name) - if err != nil && !apierrors.IsNotFound(err) { - sp.log.Errorf("error getting service: %s", err) - } - exists := false - if err == nil { - targetPort = getTargetPort(svc, srcPort) - exists = true - } - - log := sp.log.WithField("port", srcPort) - - metrics, err := endpointsVecs.newEndpointsMetrics(sp.metricsLabels(srcPort, hostname)) - if err != nil { - return nil, err - } - port := &portPublisher{ - filteredListeners: map[filterKey]*filteredListenerGroup{}, - targetPort: targetPort, - srcPort: srcPort, - hostname: hostname, - exists: exists, - k8sAPI: sp.k8sAPI, - metadataAPI: sp.metadataAPI, - log: log, - metrics: metrics, - enableEndpointSlices: sp.enableEndpointSlices, - localTrafficPolicy: sp.localTrafficPolicy, - } - - if port.enableEndpointSlices { - matchLabels := map[string]string{discovery.LabelServiceName: sp.id.Name} - selector := labels.Set(matchLabels).AsSelector() - - sliceList, err := sp.k8sAPI.ES().Lister().EndpointSlices(sp.id.Namespace).List(selector) - if err != nil && !apierrors.IsNotFound(err) { - sp.log.Errorf("error getting endpointSlice list: %s", err) - } - if err == nil { - for _, slice := range sliceList { - port.addEndpointSlice(slice) - } - } - } else { - endpoints, err := sp.k8sAPI.Endpoint().Lister().Endpoints(sp.id.Namespace).Get(sp.id.Name) - if err != nil && !apierrors.IsNotFound(err) { - sp.log.Errorf("error getting endpoints: %s", err) - } - if err == nil { - port.updateEndpoints(endpoints) - } - } - - return port, nil -} - -func (sp *servicePublisher) metricsLabels(port Port, hostname string) prometheus.Labels { - return endpointsLabels(sp.cluster, sp.id.Namespace, sp.id.Name, strconv.Itoa(int(port)), hostname) -} - -func (sp *servicePublisher) updateServer(oldServer, newServer *v1beta3.Server) { - sp.Lock() - defer sp.Unlock() - - for _, pp := range sp.ports { - pp.updateServer(oldServer, newServer) - } -} - -///////////////////// -/// portPublisher /// -///////////////////// - -// Note that portPublishers methods are generally NOT thread-safe. You should -// hold the parent servicePublisher's mutex before calling methods on a -// portPublisher. - -func (pp *portPublisher) updateEndpoints(endpoints *corev1.Endpoints) { - newAddressSet := pp.endpointsToAddresses(endpoints) - if len(newAddressSet.Addresses) == 0 { - pp.publishNoEndpoints(true) - } else { - pp.publishAddressChange(newAddressSet) - } - pp.addresses = newAddressSet - pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) -} - -func (pp *portPublisher) addEndpointSlice(slice *discovery.EndpointSlice) { - newAddressSet := pp.endpointSliceToAddresses(slice) - for id, addr := range pp.addresses.Addresses { - if _, ok := newAddressSet.Addresses[id]; !ok { - newAddressSet.Addresses[id] = addr - } - } - - pp.publishAddressChange(newAddressSet) - - // even if the ES doesn't have addresses yet we need to create a new - // pp.addresses entry with the appropriate Labels and LocalTrafficPolicy, - // which isn't going to be captured during the ES update event when - // addresses get added - - pp.addresses = newAddressSet - pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) -} - -func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, newSlice *discovery.EndpointSlice) { - updatedAddressSet := AddressSet{ - Addresses: make(map[ID]*Address), - Labels: pp.addresses.Labels, - LocalTrafficPolicy: pp.localTrafficPolicy, - } - - for id, address := range pp.addresses.Addresses { - updatedAddressSet.Addresses[id] = address - } - - for _, id := range pp.endpointSliceToIDs(oldSlice) { - delete(updatedAddressSet.Addresses, id) - } - - newAddressSet := pp.endpointSliceToAddresses(newSlice) - for id, address := range newAddressSet.Addresses { - updatedAddressSet.Addresses[id] = address - } - - pp.publishAddressChange(updatedAddressSet) - - pp.addresses = updatedAddressSet - pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) -} - -func metricLabels(resource interface{}) map[string]string { - var serviceName, ns string - var resLabels, resAnnotations map[string]string - switch res := resource.(type) { - case *corev1.Endpoints: - { - serviceName, ns = res.Name, res.Namespace - resLabels, resAnnotations = res.Labels, res.Annotations - } - case *discovery.EndpointSlice: - { - serviceName, ns = res.Labels[discovery.LabelServiceName], res.Namespace - resLabels, resAnnotations = res.Labels, res.Annotations - } - } - - labels := map[string]string{service: serviceName, namespace: ns} - - remoteClusterName, hasRemoteClusterName := resLabels[consts.RemoteClusterNameLabel] - serviceFqn, hasServiceFqn := resAnnotations[consts.RemoteServiceFqName] - - if hasRemoteClusterName { - // this means we are looking at Endpoints created for the purpose of mirroring - // an out of cluster service. - labels[targetCluster] = remoteClusterName - if hasServiceFqn { - fqParts := strings.Split(serviceFqn, ".") - if len(fqParts) >= 2 { - labels[targetService] = fqParts[0] - labels[targetServiceNamespace] = fqParts[1] - } - } - } - return labels -} - -func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) AddressSet { - resolvedPort := pp.resolveESTargetPort(es.Ports) - if resolvedPort == undefinedEndpointPort { - return AddressSet{ - Labels: metricLabels(es), - Addresses: make(map[ID]*Address), - LocalTrafficPolicy: pp.localTrafficPolicy, - } - } - - serviceID, err := getEndpointSliceServiceID(es) - if err != nil { - pp.log.Errorf("Could not fetch resource service name:%v", err) - } - - addresses := make(map[ID]*Address) - for _, endpoint := range es.Endpoints { - if endpoint.Hostname != nil { - if pp.hostname != "" && pp.hostname != *endpoint.Hostname { - continue - } - } - if endpoint.Conditions.Ready != nil && !*endpoint.Conditions.Ready { - continue - } - - if endpoint.TargetRef == nil { - for _, IPAddr := range endpoint.Addresses { - var authorityOverride string - if fqName, ok := es.Annotations[consts.RemoteServiceFqName]; ok { - authorityOverride = net.JoinHostPort(fqName, fmt.Sprintf("%d", pp.srcPort)) - } - - identity := es.Annotations[consts.RemoteGatewayIdentity] - address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, serviceID.Name, es.Namespace) - address.Identity, address.AuthorityOverride = identity, authorityOverride - - if endpoint.Hints != nil { - zones := make([]discovery.ForZone, len(endpoint.Hints.ForZones)) - copy(zones, endpoint.Hints.ForZones) - address.ForZones = zones - } - addresses[id] = &address - } - continue - } - - if endpoint.TargetRef.Kind == endpointTargetRefPod { - for _, IPAddr := range endpoint.Addresses { - address, id, err := pp.newPodRefAddress( - resolvedPort, - es.AddressType, - IPAddr, - endpoint.TargetRef.Name, - endpoint.TargetRef.Namespace, - ) - if err != nil { - pp.log.Errorf("Unable to create new address:%v", err) - continue - } - err = SetToServerProtocol(pp.k8sAPI, &address, pp.log) - if err != nil { - pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) - } - - address.Zone = endpoint.Zone - if endpoint.Hints != nil { - zones := make([]discovery.ForZone, len(endpoint.Hints.ForZones)) - copy(zones, endpoint.Hints.ForZones) - address.ForZones = zones - } - addresses[id] = &address - } - } - - if endpoint.TargetRef.Kind == endpointTargetRefExternalWorkload { - for _, IPAddr := range endpoint.Addresses { - address, id, err := pp.newExtRefAddress(resolvedPort, IPAddr, endpoint.TargetRef.Name, es.Namespace) - if err != nil { - pp.log.Errorf("Unable to create new address: %v", err) - continue - } - - err = SetToServerProtocolExternalWorkload(pp.k8sAPI, &address) - if err != nil { - pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) - continue - } - - address.Zone = endpoint.Zone - if endpoint.Hints != nil { - zones := make([]discovery.ForZone, len(endpoint.Hints.ForZones)) - copy(zones, endpoint.Hints.ForZones) - address.ForZones = zones - } - - addresses[id] = &address - } - - } - - } - return AddressSet{ - Addresses: addresses, - Labels: metricLabels(es), - LocalTrafficPolicy: pp.localTrafficPolicy, - } -} - -// endpointSliceToIDs is similar to endpointSliceToAddresses but instead returns -// only the IDs of the endpoints rather than the addresses themselves. -func (pp *portPublisher) endpointSliceToIDs(es *discovery.EndpointSlice) []ID { - resolvedPort := pp.resolveESTargetPort(es.Ports) - if resolvedPort == undefinedEndpointPort { - return []ID{} - } - - serviceID, err := getEndpointSliceServiceID(es) - if err != nil { - pp.log.Errorf("Could not fetch resource service name:%v", err) - } - - ids := []ID{} - for _, endpoint := range es.Endpoints { - if endpoint.Hostname != nil { - if pp.hostname != "" && pp.hostname != *endpoint.Hostname { - continue - } - } - if endpoint.Conditions.Ready != nil && !*endpoint.Conditions.Ready { - continue - } - - if endpoint.TargetRef == nil { - for _, IPAddr := range endpoint.Addresses { - ids = append(ids, ServiceID{ - Name: strings.Join([]string{ - serviceID.Name, - IPAddr, - fmt.Sprint(resolvedPort), - }, "-"), - Namespace: es.Namespace, - }) - } - continue - } - - if endpoint.TargetRef.Kind == endpointTargetRefPod { - ids = append(ids, PodID{ - Name: endpoint.TargetRef.Name, - Namespace: endpoint.TargetRef.Namespace, - IPFamily: corev1.IPFamily(es.AddressType), - }) - } else if endpoint.TargetRef.Kind == endpointTargetRefExternalWorkload { - ids = append(ids, ExternalWorkloadID{ - Name: endpoint.TargetRef.Name, - Namespace: endpoint.TargetRef.Namespace, - }) - } - - } - return ids -} - -func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) AddressSet { - addresses := make(map[ID]*Address) - for _, subset := range endpoints.Subsets { - resolvedPort := pp.resolveTargetPort(subset) - if resolvedPort == undefinedEndpointPort { - continue - } - for _, endpoint := range subset.Addresses { - if pp.hostname != "" && pp.hostname != endpoint.Hostname { - continue - } - - if endpoint.TargetRef == nil { - var authorityOverride string - if fqName, ok := endpoints.Annotations[consts.RemoteServiceFqName]; ok { - authorityOverride = fmt.Sprintf("%s:%d", fqName, pp.srcPort) - } - - identity := endpoints.Annotations[consts.RemoteGatewayIdentity] - address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, endpoints.Name, endpoints.Namespace) - address.Identity, address.AuthorityOverride = identity, authorityOverride - - addresses[id] = &address - continue - } - - if endpoint.TargetRef.Kind == endpointTargetRefPod { - address, id, err := pp.newPodRefAddress( - resolvedPort, - "", - endpoint.IP, - endpoint.TargetRef.Name, - endpoint.TargetRef.Namespace, - ) - if err != nil { - pp.log.Errorf("Unable to create new address:%v", err) - continue - } - err = SetToServerProtocol(pp.k8sAPI, &address, pp.log) - if err != nil { - pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) - } - addresses[id] = &address - } - } - } - return AddressSet{ - Addresses: addresses, - Labels: metricLabels(endpoints), - } -} - -func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP, serviceName, serviceNamespace string) (Address, ServiceID) { - id := ServiceID{ - Name: strings.Join([]string{ - serviceName, - endpointIP, - fmt.Sprint(endpointPort), - }, "-"), - Namespace: serviceNamespace, - } - - return Address{IP: endpointIP, Port: endpointPort}, id -} - -func (pp *portPublisher) newPodRefAddress( - endpointPort Port, - ipFamily discovery.AddressType, - endpointIP, - podName, - podNamespace string, -) (Address, PodID, error) { - id := PodID{ - Name: podName, - Namespace: podNamespace, - IPFamily: corev1.IPFamily(ipFamily), - } - pod, err := pp.k8sAPI.Pod().Lister().Pods(id.Namespace).Get(id.Name) - if err != nil { - return Address{}, PodID{}, fmt.Errorf("unable to fetch pod %v: %w", id, err) - } - ownerKind, ownerName, err := pp.metadataAPI.GetOwnerKindAndName(context.Background(), pod, false) - if err != nil { - return Address{}, PodID{}, err - } - addr := Address{ - IP: endpointIP, - Port: endpointPort, - Pod: pod, - OwnerName: ownerName, - OwnerKind: ownerKind, - } - - return addr, id, nil -} - -func (pp *portPublisher) newExtRefAddress(endpointPort Port, endpointIP, externalWorkloadName, externalWorkloadNamespace string) (Address, ExternalWorkloadID, error) { - id := ExternalWorkloadID{ - Name: externalWorkloadName, - Namespace: externalWorkloadNamespace, - } - - ew, err := pp.k8sAPI.ExtWorkload().Lister().ExternalWorkloads(id.Namespace).Get(id.Name) - if err != nil { - return Address{}, ExternalWorkloadID{}, fmt.Errorf("unable to fetch ExternalWorkload %v: %w", id, err) - } - - addr := Address{ - IP: endpointIP, - Port: endpointPort, - ExternalWorkload: ew, - } - - ownerRefs := ew.GetOwnerReferences() - if len(ownerRefs) == 1 { - parent := ownerRefs[0] - addr.OwnerName = parent.Name - addr.OwnerName = strings.ToLower(parent.Kind) - } - - return addr, id, nil -} - -func (pp *portPublisher) resolveESTargetPort(slicePorts []discovery.EndpointPort) Port { - if slicePorts == nil { - return undefinedEndpointPort - } - - switch pp.targetPort.Type { - case intstr.Int: - return Port(pp.targetPort.IntVal) - case intstr.String: - for _, p := range slicePorts { - name := "" - if p.Name != nil { - name = *p.Name - } - if name == pp.targetPort.StrVal { - return Port(*p.Port) - } - } - } - return undefinedEndpointPort -} - -func (pp *portPublisher) resolveTargetPort(subset corev1.EndpointSubset) Port { - switch pp.targetPort.Type { - case intstr.Int: - return Port(pp.targetPort.IntVal) - case intstr.String: - for _, p := range subset.Ports { - if p.Name == pp.targetPort.StrVal { - return Port(p.Port) - } - } - } - return undefinedEndpointPort -} - -func (pp *portPublisher) updateLocalTrafficPolicy(localTrafficPolicy bool) { - pp.localTrafficPolicy = localTrafficPolicy - pp.addresses.LocalTrafficPolicy = localTrafficPolicy - pp.publishFilteredSnapshots() -} - -func (pp *portPublisher) updatePort(targetPort namedPort) { - pp.targetPort = targetPort - - if pp.enableEndpointSlices { - matchLabels := map[string]string{discovery.LabelServiceName: pp.id.Name} - selector := labels.Set(matchLabels).AsSelector() - - endpointSlices, err := pp.k8sAPI.ES().Lister().EndpointSlices(pp.id.Namespace).List(selector) - if err == nil { - pp.addresses = AddressSet{} - for _, slice := range endpointSlices { - pp.addEndpointSlice(slice) - } - } else { - pp.log.Errorf("Unable to get EndpointSlices during port update: %s", err) - } - } else { - endpoints, err := pp.k8sAPI.Endpoint().Lister().Endpoints(pp.id.Namespace).Get(pp.id.Name) - if err == nil { - pp.updateEndpoints(endpoints) - } else { - pp.log.Errorf("Unable to get endpoints during port update: %s", err) - } - } -} - -func (pp *portPublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { - updatedAddressSet := AddressSet{ - Addresses: make(map[ID]*Address), - Labels: pp.addresses.Labels, - LocalTrafficPolicy: pp.localTrafficPolicy, - } - for id, address := range pp.addresses.Addresses { - updatedAddressSet.Addresses[id] = address - } - - addrSet := pp.endpointSliceToAddresses(es) - for id := range addrSet.Addresses { - delete(updatedAddressSet.Addresses, id) - } - - pp.publishAddressChange(updatedAddressSet) - pp.addresses = updatedAddressSet - - if len(pp.addresses.Addresses) == 0 { - pp.noEndpoints(false) - } else { - pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) - } -} - -func (pp *portPublisher) noEndpoints(exists bool) { - pp.exists = exists - pp.addresses = AddressSet{} - pp.publishNoEndpoints(exists) - - pp.metrics.incUpdates() - pp.metrics.setExists(exists) - pp.metrics.setPods(0) -} - -func (pp *portPublisher) subscribe(listener EndpointUpdateListener) { - group := pp.filteredListenerGroup(listener) - if pp.exists { - if len(pp.addresses.Addresses) > 0 { - filteredSet := group.filterAddresses(pp.addresses) - group.snapshot = filteredSet - if len(filteredSet.Addresses) > 0 { - listener.Add(filteredSet.shallowCopy()) - } - } else { - listener.NoEndpoints(true) - } - } else { - listener.NoEndpoints(false) - } - group.listeners = append(group.listeners, listener) - - pp.metrics.setSubscribers(pp.totalListeners()) -} - -func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener) { - key := makeFilterKey(listener) - group, ok := pp.filteredListeners[key] - if ok { - for i, existing := range group.listeners { - if existing == listener { - n := len(group.listeners) - group.listeners[i] = group.listeners[n-1] - group.listeners[n-1] = nil - group.listeners = group.listeners[:n-1] - break - } - } - if len(group.listeners) == 0 { - delete(pp.filteredListeners, key) - } - } - pp.metrics.setSubscribers(pp.totalListeners()) -} -func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { - updated := false - for id, address := range pp.addresses.Addresses { - - if pp.isAddressSelected(address, oldServer) || pp.isAddressSelected(address, newServer) { - if newServer != nil && pp.isAddressSelected(address, newServer) && newServer.Spec.ProxyProtocol == opaqueProtocol { - address.OpaqueProtocol = true - } else { - address.OpaqueProtocol = false - } - if pp.addresses.Addresses[id].OpaqueProtocol != address.OpaqueProtocol { - pp.addresses.Addresses[id] = address - updated = true - } - } - } - if updated { - pp.publishFilteredSnapshots() - pp.metrics.incUpdates() - } -} - -func makeFilterKey(listener EndpointUpdateListener) filterKey { - return filterKey{ - nodeName: listener.NodeName(), - nodeTopologyZone: listener.NodeTopologyZone(), - enableEndpointFiltering: listener.EnableEndpointFiltering(), - enableIPv6: listener.EnableIPv6(), - } -} - -func (pp *portPublisher) filteredListenerGroup(listener EndpointUpdateListener) *filteredListenerGroup { - key := makeFilterKey(listener) - group, ok := pp.filteredListeners[key] - if !ok { - group = &filteredListenerGroup{ - key: key, - snapshot: AddressSet{Addresses: make(map[ID]*Address)}, - } - pp.filteredListeners[key] = group - } - return group -} - -func (pp *portPublisher) totalListeners() int { - total := 0 - for _, group := range pp.filteredListeners { - total += len(group.listeners) - } - return total -} - -func (pp *portPublisher) publishAddressChange(newAddressSet AddressSet) { - for _, group := range pp.filteredListeners { - group.publishDiff(newAddressSet) - } -} - -func (pp *portPublisher) publishFilteredSnapshots() { - for _, group := range pp.filteredListeners { - group.publishDiff(pp.addresses) - } -} - -func (pp *portPublisher) publishNoEndpoints(exists bool) { - for _, group := range pp.filteredListeners { - group.publishNoEndpoints(exists) - } -} - -func (group *filteredListenerGroup) publishDiff(addresses AddressSet) { - filtered := group.filterAddresses(addresses) - add, remove := diffAddresses(group.snapshot, filtered) - group.snapshot = filtered - - for _, listener := range group.listeners { - if len(remove.Addresses) > 0 { - listener.Remove(remove) - } - if len(add.Addresses) > 0 { - listener.Add(add) - } - } -} - -func (group *filteredListenerGroup) publishNoEndpoints(exists bool) { - remove := group.snapshot - group.snapshot = AddressSet{Addresses: make(map[ID]*Address)} - - for _, listener := range group.listeners { - if len(remove.Addresses) > 0 { - listener.Remove(remove) - } - listener.NoEndpoints(exists) - } -} - -func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) AddressSet { - filtered := make(map[ID]*Address) - - if !group.key.enableEndpointFiltering { - for k, v := range addresses.Addresses { - filtered[k] = v - } - return selectAddressFamily(AddressSet{ - Addresses: filtered, - Labels: addresses.Labels, - LocalTrafficPolicy: addresses.LocalTrafficPolicy, - }, group.key.enableIPv6) - } - - if addresses.LocalTrafficPolicy { - for id, address := range addresses.Addresses { - if address.Pod != nil && address.Pod.Spec.NodeName == group.key.nodeName { - filtered[id] = address - } - } - return selectAddressFamily(AddressSet{ - Addresses: filtered, - Labels: addresses.Labels, - LocalTrafficPolicy: addresses.LocalTrafficPolicy, - }, group.key.enableIPv6) - } - - for _, address := range addresses.Addresses { - if len(address.ForZones) == 0 { - for k, v := range addresses.Addresses { - filtered[k] = v - } - return selectAddressFamily(AddressSet{ - Addresses: filtered, - Labels: addresses.Labels, - LocalTrafficPolicy: addresses.LocalTrafficPolicy, - }, group.key.enableIPv6) - } - } - - for id, address := range addresses.Addresses { - for _, zone := range address.ForZones { - if zone.Name == group.key.nodeTopologyZone { - filtered[id] = address - break - } - } - } - - if len(filtered) == 0 { - for k, v := range addresses.Addresses { - filtered[k] = v - } - } - - return selectAddressFamily(AddressSet{ - Addresses: filtered, - Labels: addresses.Labels, - LocalTrafficPolicy: addresses.LocalTrafficPolicy, - }, group.key.enableIPv6) -} - -func selectAddressFamily(addresses AddressSet, enableIPv6 bool) AddressSet { - filtered := make(map[ID]*Address) - for id, addr := range addresses.Addresses { - if id.IPFamily == corev1.IPv6Protocol && !enableIPv6 { - continue - } - - if id.IPFamily == corev1.IPv4Protocol && enableIPv6 { - altID := id - altID.IPFamily = corev1.IPv6Protocol - if _, ok := addresses.Addresses[altID]; ok { - continue - } - } - - filtered[id] = addr - } - - return AddressSet{ - Addresses: filtered, - Labels: addresses.Labels, - LocalTrafficPolicy: addresses.LocalTrafficPolicy, - } -} - -func (pp *portPublisher) isAddressSelected(address *Address, server *v1beta3.Server) bool { - if server == nil { - return false - } - - if address.Pod != nil { - selector, err := metav1.LabelSelectorAsSelector(server.Spec.PodSelector) - if err != nil { - pp.log.Errorf("failed to create Selector: %s", err) - return false - } - - if !selector.Matches(labels.Set(address.Pod.Labels)) { - return false - } - - switch server.Spec.Port.Type { - case intstr.Int: - if server.Spec.Port.IntVal == int32(address.Port) { - return true - } - case intstr.String: - for _, c := range append(address.Pod.Spec.InitContainers, address.Pod.Spec.Containers...) { - for _, p := range c.Ports { - if p.ContainerPort == int32(address.Port) && p.Name == server.Spec.Port.StrVal { - return true - } - } - } - } - - } else if address.ExternalWorkload != nil { - selector, err := metav1.LabelSelectorAsSelector(server.Spec.ExternalWorkloadSelector) - if err != nil { - pp.log.Errorf("failed to create Selector: %s", err) - return false - } - - if !selector.Matches(labels.Set(address.ExternalWorkload.Labels)) { - return false - } - - switch server.Spec.Port.Type { - case intstr.Int: - if server.Spec.Port.IntVal == int32(address.Port) { - return true - } - case intstr.String: - for _, p := range address.ExternalWorkload.Spec.Ports { - if p.Port == int32(address.Port) && p.Name == server.Spec.Port.StrVal { - return true - } - } - } - } - return false -} - //////////// /// util /// //////////// @@ -1655,9 +566,8 @@ func diffAddresses(oldAddresses, newAddresses AddressSet) (add, remove AddressSe } } add = AddressSet{ - Addresses: addAddresses, - Labels: newAddresses.Labels, - LocalTrafficPolicy: newAddresses.LocalTrafficPolicy, + Addresses: addAddresses, + Labels: newAddresses.Labels, } remove = AddressSet{ Addresses: removeAddresses, diff --git a/controller/api/destination/watcher/filtered_listener_group.go b/controller/api/destination/watcher/filtered_listener_group.go new file mode 100644 index 0000000000000..e040f1f012f56 --- /dev/null +++ b/controller/api/destination/watcher/filtered_listener_group.go @@ -0,0 +1,138 @@ +package watcher + +import ( + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/discovery/v1" +) + +type ( + filteredListenerGroup struct { + key FilterKey + nodeTopologyZone string + enableEndpointFiltering bool + enableIPv6 bool + localTrafficPolicy bool + snapshot AddressSet + listeners []EndpointUpdateListener + } +) + +func newFilteredListenerGroup(key FilterKey, nodeTopologyZone string, enableIPv6 bool, localTrafficPolicy bool) *filteredListenerGroup { + return &filteredListenerGroup{ + key: key, + nodeTopologyZone: nodeTopologyZone, + enableEndpointFiltering: key.EnableEndpointFiltering, + enableIPv6: enableIPv6, + localTrafficPolicy: localTrafficPolicy, + snapshot: AddressSet{Addresses: make(map[ID]*Address)}, + } +} + +func (group *filteredListenerGroup) publishDiff(addresses AddressSet) { + filtered := group.filterAddresses(addresses) + add, remove := diffAddresses(group.snapshot, filtered) + group.snapshot = filtered + + for _, listener := range group.listeners { + if len(remove.Addresses) > 0 { + listener.Remove(remove) + } + if len(add.Addresses) > 0 { + listener.Add(add) + } + } +} + +func (group *filteredListenerGroup) publishNoEndpoints() { + remove := group.snapshot + group.snapshot = AddressSet{Addresses: make(map[ID]*Address)} + + for _, listener := range group.listeners { + if len(remove.Addresses) > 0 { + listener.Remove(remove) + } + } +} + +func (group *filteredListenerGroup) updateLocalTrafficPolicy(localTrafficPolicy bool) { + group.localTrafficPolicy = localTrafficPolicy + group.publishDiff(group.snapshot) +} + +func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) AddressSet { + filtered := make(map[ID]*Address) + + for id, address := range addresses.Addresses { + // If hostname filtering is specified, only include addresses that match the hostname. + // This filtering should be applied even if endpoint filtering is disabled. + if group.key.Hostname != "" && group.key.Hostname != address.Pod.Spec.Hostname { + continue + } + + if group.enableEndpointFiltering { + // If the Service has local traffic policy enabled, only include addresses that are local to the node. + // Otherwise, perform zone filtering if the address has zone information. + if group.localTrafficPolicy { + if address.Pod == nil || address.Pod.Spec.NodeName != group.key.NodeName { + continue + } + } else { + if len(address.ForZones) > 0 { + if !containsZone(address.ForZones, group.nodeTopologyZone) { + continue + } + } + } + } + + filtered[id] = address + } + + // If zone filtering removed all addresses, we fall back to including all addresses. + // Note that hostname filtering is still applied in this case, if specified. + if group.enableEndpointFiltering && !group.localTrafficPolicy && len(filtered) == 0 { + for k, v := range addresses.Addresses { + if group.key.Hostname == "" || v.Pod.Spec.Hostname == group.key.Hostname { + filtered[k] = v + } + } + } + + return selectAddressFamily(AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + }, group.enableIPv6) +} + +func containsZone(zones []v1.ForZone, zone string) bool { + for _, z := range zones { + if z.Name == zone { + return true + } + } + return false +} + +func selectAddressFamily(addresses AddressSet, enableIPv6 bool) AddressSet { + filtered := make(map[ID]*Address) + for id, addr := range addresses.Addresses { + if id.IPFamily == corev1.IPv6Protocol && !enableIPv6 { + continue + } + + if id.IPFamily == corev1.IPv4Protocol && enableIPv6 { + altID := id + altID.IPFamily = corev1.IPv6Protocol + if _, ok := addresses.Addresses[altID]; ok { + continue + } + } + + filtered[id] = addr + } + + return AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + } +} diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go new file mode 100644 index 0000000000000..0da4c9562d7f7 --- /dev/null +++ b/controller/api/destination/watcher/port_publisher.go @@ -0,0 +1,689 @@ +package watcher + +import ( + "context" + "fmt" + "net" + "strings" + + "github.com/linkerd/linkerd2/controller/gen/apis/server/v1beta3" + "github.com/linkerd/linkerd2/controller/k8s" + consts "github.com/linkerd/linkerd2/pkg/k8s" + logging "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/intstr" +) + +type ( + // portPublisher represents a service along with a port and optionally a + // hostname. Multiple listeners may be subscribed to a portPublisher. + // portPublisher maintains the current state of the address set and + // publishes diffs to all listeners when updates come from either the + // endpoints API or the service API. + portPublisher struct { + id ServiceID + + targetPort namedPort + srcPort Port + log *logging.Entry + k8sAPI *k8s.API + metadataAPI *k8s.MetadataAPI + enableEndpointSlices bool + enableIPv6 bool + exists bool + addresses AddressSet + filteredListeners map[FilterKey]*filteredListenerGroup + metrics endpointsMetrics + localTrafficPolicy bool + } +) + +///////////////////// +/// portPublisher /// +///////////////////// + +// Note that portPublishers methods are generally NOT thread-safe. You should +// hold the parent servicePublisher's mutex before calling methods on a +// portPublisher. + +func (pp *portPublisher) updateEndpoints(endpoints *corev1.Endpoints) { + newAddressSet := pp.endpointsToAddresses(endpoints) + if len(newAddressSet.Addresses) == 0 { + pp.publishNoEndpoints() + } else { + pp.publishAddressChange(newAddressSet) + } + pp.addresses = newAddressSet + pp.exists = true + pp.metrics.incUpdates() + pp.metrics.setPods(len(pp.addresses.Addresses)) + pp.metrics.setExists(true) +} + +func (pp *portPublisher) addEndpointSlice(slice *discovery.EndpointSlice) { + newAddressSet := pp.endpointSliceToAddresses(slice) + for id, addr := range pp.addresses.Addresses { + if _, ok := newAddressSet.Addresses[id]; !ok { + newAddressSet.Addresses[id] = addr + } + } + + pp.publishAddressChange(newAddressSet) + + // even if the ES doesn't have addresses yet we need to create a new + // pp.addresses entry with the appropriate Labels and LocalTrafficPolicy, + // which isn't going to be captured during the ES update event when + // addresses get added + + pp.addresses = newAddressSet + pp.exists = true + pp.metrics.incUpdates() + pp.metrics.setPods(len(pp.addresses.Addresses)) + pp.metrics.setExists(true) +} + +func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, newSlice *discovery.EndpointSlice) { + updatedAddressSet := AddressSet{ + Addresses: make(map[ID]*Address), + Labels: pp.addresses.Labels, + } + + for id, address := range pp.addresses.Addresses { + updatedAddressSet.Addresses[id] = address + } + + for _, id := range pp.endpointSliceToIDs(oldSlice) { + delete(updatedAddressSet.Addresses, id) + } + + newAddressSet := pp.endpointSliceToAddresses(newSlice) + for id, address := range newAddressSet.Addresses { + updatedAddressSet.Addresses[id] = address + } + + pp.publishAddressChange(updatedAddressSet) + + pp.addresses = updatedAddressSet + pp.exists = true + pp.metrics.incUpdates() + pp.metrics.setPods(len(pp.addresses.Addresses)) + pp.metrics.setExists(true) +} + +func metricLabels(resource interface{}) map[string]string { + var serviceName, ns string + var resLabels, resAnnotations map[string]string + switch res := resource.(type) { + case *corev1.Endpoints: + { + serviceName, ns = res.Name, res.Namespace + resLabels, resAnnotations = res.Labels, res.Annotations + } + case *discovery.EndpointSlice: + { + serviceName, ns = res.Labels[discovery.LabelServiceName], res.Namespace + resLabels, resAnnotations = res.Labels, res.Annotations + } + } + + labels := map[string]string{service: serviceName, namespace: ns} + + remoteClusterName, hasRemoteClusterName := resLabels[consts.RemoteClusterNameLabel] + serviceFqn, hasServiceFqn := resAnnotations[consts.RemoteServiceFqName] + + if hasRemoteClusterName { + // this means we are looking at Endpoints created for the purpose of mirroring + // an out of cluster service. + labels[targetCluster] = remoteClusterName + if hasServiceFqn { + fqParts := strings.Split(serviceFqn, ".") + if len(fqParts) >= 2 { + labels[targetService] = fqParts[0] + labels[targetServiceNamespace] = fqParts[1] + } + } + } + return labels +} + +func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) AddressSet { + resolvedPort := pp.resolveESTargetPort(es.Ports) + if resolvedPort == undefinedEndpointPort { + return AddressSet{ + Labels: metricLabels(es), + Addresses: make(map[ID]*Address), + } + } + + serviceID, err := getEndpointSliceServiceID(es) + if err != nil { + pp.log.Errorf("Could not fetch resource service name:%v", err) + } + + addresses := make(map[ID]*Address) + for _, endpoint := range es.Endpoints { + if endpoint.Conditions.Ready != nil && !*endpoint.Conditions.Ready { + continue + } + + if endpoint.TargetRef == nil { + for _, IPAddr := range endpoint.Addresses { + var authorityOverride string + if fqName, ok := es.Annotations[consts.RemoteServiceFqName]; ok { + authorityOverride = net.JoinHostPort(fqName, fmt.Sprintf("%d", pp.srcPort)) + } + + identity := es.Annotations[consts.RemoteGatewayIdentity] + address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, serviceID.Name, es.Namespace) + address.Identity, address.AuthorityOverride = identity, authorityOverride + + if endpoint.Hints != nil { + zones := make([]discovery.ForZone, len(endpoint.Hints.ForZones)) + copy(zones, endpoint.Hints.ForZones) + address.ForZones = zones + } + addresses[id] = &address + } + continue + } + + if endpoint.TargetRef.Kind == endpointTargetRefPod { + for _, IPAddr := range endpoint.Addresses { + address, id, err := pp.newPodRefAddress( + resolvedPort, + es.AddressType, + IPAddr, + endpoint.TargetRef.Name, + endpoint.TargetRef.Namespace, + ) + if err != nil { + pp.log.Errorf("Unable to create new address:%v", err) + continue + } + err = SetToServerProtocol(pp.k8sAPI, &address, pp.log) + if err != nil { + pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) + } + + address.Zone = endpoint.Zone + if endpoint.Hints != nil { + zones := make([]discovery.ForZone, len(endpoint.Hints.ForZones)) + copy(zones, endpoint.Hints.ForZones) + address.ForZones = zones + } + addresses[id] = &address + } + } + + if endpoint.TargetRef.Kind == endpointTargetRefExternalWorkload { + for _, IPAddr := range endpoint.Addresses { + address, id, err := pp.newExtRefAddress(resolvedPort, IPAddr, endpoint.TargetRef.Name, es.Namespace) + if err != nil { + pp.log.Errorf("Unable to create new address: %v", err) + continue + } + + err = SetToServerProtocolExternalWorkload(pp.k8sAPI, &address) + if err != nil { + pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) + continue + } + + address.Zone = endpoint.Zone + if endpoint.Hints != nil { + zones := make([]discovery.ForZone, len(endpoint.Hints.ForZones)) + copy(zones, endpoint.Hints.ForZones) + address.ForZones = zones + } + + addresses[id] = &address + } + + } + + } + return AddressSet{ + Addresses: addresses, + Labels: metricLabels(es), + } +} + +// endpointSliceToIDs is similar to endpointSliceToAddresses but instead returns +// only the IDs of the endpoints rather than the addresses themselves. +func (pp *portPublisher) endpointSliceToIDs(es *discovery.EndpointSlice) []ID { + resolvedPort := pp.resolveESTargetPort(es.Ports) + if resolvedPort == undefinedEndpointPort { + return []ID{} + } + + serviceID, err := getEndpointSliceServiceID(es) + if err != nil { + pp.log.Errorf("Could not fetch resource service name:%v", err) + } + + ids := []ID{} + for _, endpoint := range es.Endpoints { + if endpoint.Conditions.Ready != nil && !*endpoint.Conditions.Ready { + continue + } + + if endpoint.TargetRef == nil { + for _, IPAddr := range endpoint.Addresses { + ids = append(ids, ServiceID{ + Name: strings.Join([]string{ + serviceID.Name, + IPAddr, + fmt.Sprint(resolvedPort), + }, "-"), + Namespace: es.Namespace, + }) + } + continue + } + + if endpoint.TargetRef.Kind == endpointTargetRefPod { + ids = append(ids, PodID{ + Name: endpoint.TargetRef.Name, + Namespace: endpoint.TargetRef.Namespace, + IPFamily: corev1.IPFamily(es.AddressType), + }) + } else if endpoint.TargetRef.Kind == endpointTargetRefExternalWorkload { + ids = append(ids, ExternalWorkloadID{ + Name: endpoint.TargetRef.Name, + Namespace: endpoint.TargetRef.Namespace, + }) + } + + } + return ids +} + +func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) AddressSet { + addresses := make(map[ID]*Address) + for _, subset := range endpoints.Subsets { + resolvedPort := pp.resolveTargetPort(subset) + if resolvedPort == undefinedEndpointPort { + continue + } + for _, endpoint := range subset.Addresses { + if endpoint.TargetRef == nil { + var authorityOverride string + if fqName, ok := endpoints.Annotations[consts.RemoteServiceFqName]; ok { + authorityOverride = fmt.Sprintf("%s:%d", fqName, pp.srcPort) + } + + identity := endpoints.Annotations[consts.RemoteGatewayIdentity] + address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, endpoints.Name, endpoints.Namespace) + address.Identity, address.AuthorityOverride = identity, authorityOverride + + addresses[id] = &address + continue + } + + if endpoint.TargetRef.Kind == endpointTargetRefPod { + address, id, err := pp.newPodRefAddress( + resolvedPort, + "", + endpoint.IP, + endpoint.TargetRef.Name, + endpoint.TargetRef.Namespace, + ) + if err != nil { + pp.log.Errorf("Unable to create new address:%v", err) + continue + } + err = SetToServerProtocol(pp.k8sAPI, &address, pp.log) + if err != nil { + pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) + } + addresses[id] = &address + } + } + } + return AddressSet{ + Addresses: addresses, + Labels: metricLabels(endpoints), + } +} + +func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP, serviceName, serviceNamespace string) (Address, ServiceID) { + id := ServiceID{ + Name: strings.Join([]string{ + serviceName, + endpointIP, + fmt.Sprint(endpointPort), + }, "-"), + Namespace: serviceNamespace, + } + + return Address{IP: endpointIP, Port: endpointPort}, id +} + +func (pp *portPublisher) newPodRefAddress( + endpointPort Port, + ipFamily discovery.AddressType, + endpointIP, + podName, + podNamespace string, +) (Address, PodID, error) { + id := PodID{ + Name: podName, + Namespace: podNamespace, + IPFamily: corev1.IPFamily(ipFamily), + } + pod, err := pp.k8sAPI.Pod().Lister().Pods(id.Namespace).Get(id.Name) + if err != nil { + return Address{}, PodID{}, fmt.Errorf("unable to fetch pod %v: %w", id, err) + } + ownerKind, ownerName, err := pp.metadataAPI.GetOwnerKindAndName(context.Background(), pod, false) + if err != nil { + return Address{}, PodID{}, err + } + addr := Address{ + IP: endpointIP, + Port: endpointPort, + Pod: pod, + OwnerName: ownerName, + OwnerKind: ownerKind, + } + + return addr, id, nil +} + +func (pp *portPublisher) newExtRefAddress(endpointPort Port, endpointIP, externalWorkloadName, externalWorkloadNamespace string) (Address, ExternalWorkloadID, error) { + id := ExternalWorkloadID{ + Name: externalWorkloadName, + Namespace: externalWorkloadNamespace, + } + + ew, err := pp.k8sAPI.ExtWorkload().Lister().ExternalWorkloads(id.Namespace).Get(id.Name) + if err != nil { + return Address{}, ExternalWorkloadID{}, fmt.Errorf("unable to fetch ExternalWorkload %v: %w", id, err) + } + + addr := Address{ + IP: endpointIP, + Port: endpointPort, + ExternalWorkload: ew, + } + + ownerRefs := ew.GetOwnerReferences() + if len(ownerRefs) == 1 { + parent := ownerRefs[0] + addr.OwnerName = parent.Name + addr.OwnerName = strings.ToLower(parent.Kind) + } + + return addr, id, nil +} + +func (pp *portPublisher) resolveESTargetPort(slicePorts []discovery.EndpointPort) Port { + if slicePorts == nil { + return undefinedEndpointPort + } + + switch pp.targetPort.Type { + case intstr.Int: + return Port(pp.targetPort.IntVal) + case intstr.String: + for _, p := range slicePorts { + name := "" + if p.Name != nil { + name = *p.Name + } + if name == pp.targetPort.StrVal { + return Port(*p.Port) + } + } + } + return undefinedEndpointPort +} + +func (pp *portPublisher) resolveTargetPort(subset corev1.EndpointSubset) Port { + switch pp.targetPort.Type { + case intstr.Int: + return Port(pp.targetPort.IntVal) + case intstr.String: + for _, p := range subset.Ports { + if p.Name == pp.targetPort.StrVal { + return Port(p.Port) + } + } + } + return undefinedEndpointPort +} + +func (pp *portPublisher) updateLocalTrafficPolicy(localTrafficPolicy bool) { + pp.localTrafficPolicy = localTrafficPolicy + for _, group := range pp.filteredListeners { + group.updateLocalTrafficPolicy(localTrafficPolicy) + } + pp.publishFilteredSnapshots() +} + +func (pp *portPublisher) updatePort(targetPort namedPort) { + pp.targetPort = targetPort + + if pp.enableEndpointSlices { + matchLabels := map[string]string{discovery.LabelServiceName: pp.id.Name} + selector := labels.Set(matchLabels).AsSelector() + + endpointSlices, err := pp.k8sAPI.ES().Lister().EndpointSlices(pp.id.Namespace).List(selector) + if err == nil { + pp.addresses = AddressSet{} + for _, slice := range endpointSlices { + pp.addEndpointSlice(slice) + } + } else { + pp.log.Errorf("Unable to get EndpointSlices during port update: %s", err) + } + } else { + endpoints, err := pp.k8sAPI.Endpoint().Lister().Endpoints(pp.id.Namespace).Get(pp.id.Name) + if err == nil { + pp.updateEndpoints(endpoints) + } else { + pp.log.Errorf("Unable to get endpoints during port update: %s", err) + } + } +} + +func (pp *portPublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { + updatedAddressSet := AddressSet{ + Addresses: make(map[ID]*Address), + Labels: pp.addresses.Labels, + } + for id, address := range pp.addresses.Addresses { + updatedAddressSet.Addresses[id] = address + } + + addrSet := pp.endpointSliceToAddresses(es) + for id := range addrSet.Addresses { + delete(updatedAddressSet.Addresses, id) + } + + pp.publishAddressChange(updatedAddressSet) + pp.addresses = updatedAddressSet + + if len(pp.addresses.Addresses) == 0 { + pp.noEndpoints(false) + } else { + pp.exists = true + pp.metrics.incUpdates() + pp.metrics.setPods(len(pp.addresses.Addresses)) + pp.metrics.setExists(true) + } +} + +func (pp *portPublisher) noEndpoints(exists bool) { + pp.exists = exists + pp.addresses = AddressSet{} + pp.publishNoEndpoints() + + pp.metrics.incUpdates() + pp.metrics.setExists(exists) + pp.metrics.setPods(0) +} + +func (pp *portPublisher) subscribe(listener EndpointUpdateListener, filterKey FilterKey) { + group := pp.filteredListenerGroup(listener, filterKey) + if pp.exists { + if len(pp.addresses.Addresses) > 0 { + filteredSet := group.filterAddresses(pp.addresses) + group.snapshot = filteredSet + if len(filteredSet.Addresses) > 0 { + listener.Add(filteredSet.shallowCopy()) + } + } + } + group.listeners = append(group.listeners, listener) + + pp.metrics.setSubscribers(pp.totalListeners()) +} + +func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener, filterKey FilterKey) { + group, ok := pp.filteredListeners[filterKey] + listener.Remove(group.snapshot) + + if ok { + for i, existing := range group.listeners { + if existing == listener { + n := len(group.listeners) + group.listeners[i] = group.listeners[n-1] + group.listeners[n-1] = nil + group.listeners = group.listeners[:n-1] + break + } + } + if len(group.listeners) == 0 { + delete(pp.filteredListeners, filterKey) + } + } + pp.metrics.setSubscribers(pp.totalListeners()) +} +func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { + updated := false + for id, address := range pp.addresses.Addresses { + + if pp.isAddressSelected(address, oldServer) || pp.isAddressSelected(address, newServer) { + if newServer != nil && pp.isAddressSelected(address, newServer) && newServer.Spec.ProxyProtocol == opaqueProtocol { + address.OpaqueProtocol = true + } else { + address.OpaqueProtocol = false + } + if pp.addresses.Addresses[id].OpaqueProtocol != address.OpaqueProtocol { + pp.addresses.Addresses[id] = address + updated = true + } + } + } + if updated { + pp.publishFilteredSnapshots() + pp.metrics.incUpdates() + } +} + +func (pp *portPublisher) filteredListenerGroup(listener EndpointUpdateListener, filterKey FilterKey) *filteredListenerGroup { + group, ok := pp.filteredListeners[filterKey] + if !ok { + nodeTopologyZone := "" + if filterKey.NodeName != "" { + node, err := pp.k8sAPI.Node().Lister().Get(filterKey.NodeName) + if err != nil { + pp.log.Errorf("Unable to get node %s: %s", filterKey.NodeName, err) + } else { + nodeTopologyZone = node.Labels[corev1.LabelTopologyZone] + } + } + + group = newFilteredListenerGroup(filterKey, nodeTopologyZone, pp.enableIPv6, pp.localTrafficPolicy) + pp.filteredListeners[filterKey] = group + } + return group +} + +func (pp *portPublisher) totalListeners() int { + total := 0 + for _, group := range pp.filteredListeners { + total += len(group.listeners) + } + return total +} + +func (pp *portPublisher) publishAddressChange(newAddressSet AddressSet) { + for _, group := range pp.filteredListeners { + group.publishDiff(newAddressSet) + } +} + +func (pp *portPublisher) publishFilteredSnapshots() { + for _, group := range pp.filteredListeners { + group.publishDiff(pp.addresses) + } +} + +func (pp *portPublisher) publishNoEndpoints() { + for _, group := range pp.filteredListeners { + group.publishNoEndpoints() + } +} + +func (pp *portPublisher) isAddressSelected(address *Address, server *v1beta3.Server) bool { + if server == nil { + return false + } + + if address.Pod != nil { + selector, err := metav1.LabelSelectorAsSelector(server.Spec.PodSelector) + if err != nil { + pp.log.Errorf("failed to create Selector: %s", err) + return false + } + + if !selector.Matches(labels.Set(address.Pod.Labels)) { + return false + } + + switch server.Spec.Port.Type { + case intstr.Int: + if server.Spec.Port.IntVal == int32(address.Port) { + return true + } + case intstr.String: + for _, c := range append(address.Pod.Spec.InitContainers, address.Pod.Spec.Containers...) { + for _, p := range c.Ports { + if p.ContainerPort == int32(address.Port) && p.Name == server.Spec.Port.StrVal { + return true + } + } + } + } + + } else if address.ExternalWorkload != nil { + selector, err := metav1.LabelSelectorAsSelector(server.Spec.ExternalWorkloadSelector) + if err != nil { + pp.log.Errorf("failed to create Selector: %s", err) + return false + } + + if !selector.Matches(labels.Set(address.ExternalWorkload.Labels)) { + return false + } + + switch server.Spec.Port.Type { + case intstr.Int: + if server.Spec.Port.IntVal == int32(address.Port) { + return true + } + case intstr.String: + for _, p := range address.ExternalWorkload.Spec.Ports { + if p.Port == int32(address.Port) && p.Name == server.Spec.Port.StrVal { + return true + } + } + } + } + return false +} diff --git a/controller/api/destination/watcher/prometheus.go b/controller/api/destination/watcher/prometheus.go index 16c789c4b276a..1b483f38555e3 100644 --- a/controller/api/destination/watcher/prometheus.go +++ b/controller/api/destination/watcher/prometheus.go @@ -128,13 +128,12 @@ func newMetricsVecs(name string, labels []string) metricsVecs { } } -func endpointsLabels(cluster, namespace, service, port string, hostname string) prometheus.Labels { +func endpointsLabels(cluster, namespace, service, port string) prometheus.Labels { return prometheus.Labels{ "cluster": cluster, "namespace": namespace, "service": service, "port": port, - "hostname": hostname, } } @@ -147,7 +146,7 @@ func labelNames(labels prometheus.Labels) []string { } func newEndpointsMetricsVecs() endpointsMetricsVecs { - labels := labelNames(endpointsLabels("", "", "", "", "")) + labels := labelNames(endpointsLabels("", "", "", "")) vecs := newMetricsVecs("endpoints", labels) pods := promauto.NewGaugeVec( diff --git a/controller/api/destination/watcher/service_publisher.go b/controller/api/destination/watcher/service_publisher.go new file mode 100644 index 0000000000000..ae4a3762cd300 --- /dev/null +++ b/controller/api/destination/watcher/service_publisher.go @@ -0,0 +1,222 @@ +package watcher + +import ( + "strconv" + "sync" + + "github.com/linkerd/linkerd2/controller/gen/apis/server/v1beta3" + "github.com/linkerd/linkerd2/controller/k8s" + "github.com/prometheus/client_golang/prometheus" + logging "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/intstr" +) + +type ( + + // servicePublisher represents a service. It keeps a map of portPublishers + // keyed by port and hostname. This is because each watch on a service + // will have a port and optionally may specify a hostname. The port + // and hostname will influence the endpoint set which is why a separate + // portPublisher is required for each port and hostname combination. The + // service's port mapping will be applied to the requested port and the + // mapped port will be used in the addresses set. If a hostname is + // requested, the address set will be filtered to only include addresses + // with the requested hostname. + servicePublisher struct { + id ServiceID + log *logging.Entry + k8sAPI *k8s.API + metadataAPI *k8s.MetadataAPI + enableEndpointSlices bool + enableIPv6 bool + localTrafficPolicy bool + cluster string + ports map[Port]*portPublisher + // All access to the servicePublisher and its portPublishers is explicitly synchronized by + // this mutex. + sync.Mutex + } +) + +//////////////////////// +/// servicePublisher /// +//////////////////////// + +func (sp *servicePublisher) updateEndpoints(newEndpoints *corev1.Endpoints) { + sp.Lock() + defer sp.Unlock() + sp.log.Debugf("Updating endpoints for %s", sp.id) + for _, port := range sp.ports { + port.updateEndpoints(newEndpoints) + } +} + +func (sp *servicePublisher) deleteEndpoints() { + sp.Lock() + defer sp.Unlock() + sp.log.Debugf("Deleting endpoints for %s", sp.id) + for _, port := range sp.ports { + port.noEndpoints(false) + } +} + +func (sp *servicePublisher) addEndpointSlice(newSlice *discovery.EndpointSlice) { + sp.Lock() + defer sp.Unlock() + + sp.log.Debugf("Adding ES %s/%s", newSlice.Namespace, newSlice.Name) + for _, port := range sp.ports { + port.addEndpointSlice(newSlice) + } +} + +func (sp *servicePublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, newSlice *discovery.EndpointSlice) { + sp.Lock() + defer sp.Unlock() + + sp.log.Debugf("Updating ES %s/%s", oldSlice.Namespace, oldSlice.Name) + for _, port := range sp.ports { + port.updateEndpointSlice(oldSlice, newSlice) + } +} + +func (sp *servicePublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { + sp.Lock() + defer sp.Unlock() + + sp.log.Debugf("Deleting ES %s/%s", es.Namespace, es.Name) + for _, port := range sp.ports { + port.deleteEndpointSlice(es) + } +} + +func (sp *servicePublisher) updateService(newService *corev1.Service) { + sp.Lock() + defer sp.Unlock() + sp.log.Debugf("Updating service for %s", sp.id) + + // set localTrafficPolicy to true if InternalTrafficPolicy is set to local + if newService.Spec.InternalTrafficPolicy != nil { + sp.localTrafficPolicy = *newService.Spec.InternalTrafficPolicy == corev1.ServiceInternalTrafficPolicyLocal + } else { + sp.localTrafficPolicy = false + } + + for port, publisher := range sp.ports { + newTargetPort := getTargetPort(newService, port) + if newTargetPort != publisher.targetPort { + publisher.updatePort(newTargetPort) + } + // update service endpoints with new localTrafficPolicy + if publisher.localTrafficPolicy != sp.localTrafficPolicy { + publisher.updateLocalTrafficPolicy(sp.localTrafficPolicy) + } + } + +} + +func (sp *servicePublisher) subscribe(srcPort Port, listener EndpointUpdateListener, filterKey FilterKey) error { + sp.Lock() + defer sp.Unlock() + + publisher, ok := sp.ports[srcPort] + if !ok { + var err error + publisher, err = sp.newPortPublisher(srcPort) + if err != nil { + return err + } + sp.ports[srcPort] = publisher + } + publisher.subscribe(listener, filterKey) + return nil +} + +func (sp *servicePublisher) unsubscribe(srcPort Port, listener EndpointUpdateListener, filterKey FilterKey) { + sp.Lock() + defer sp.Unlock() + + publisher, ok := sp.ports[srcPort] + if ok { + publisher.unsubscribe(listener, filterKey) + if publisher.totalListeners() == 0 { + endpointsVecs.unregister(sp.metricsLabels(srcPort)) + delete(sp.ports, srcPort) + } + } +} + +func (sp *servicePublisher) newPortPublisher(srcPort Port) (*portPublisher, error) { + targetPort := intstr.FromInt(int(srcPort)) + svc, err := sp.k8sAPI.Svc().Lister().Services(sp.id.Namespace).Get(sp.id.Name) + if err != nil && !apierrors.IsNotFound(err) { + sp.log.Errorf("error getting service: %s", err) + } + exists := false + if err == nil { + targetPort = getTargetPort(svc, srcPort) + exists = true + } + + log := sp.log.WithField("port", srcPort) + + metrics, err := endpointsVecs.newEndpointsMetrics(sp.metricsLabels(srcPort)) + if err != nil { + return nil, err + } + port := &portPublisher{ + filteredListeners: map[FilterKey]*filteredListenerGroup{}, + targetPort: targetPort, + srcPort: srcPort, + exists: exists, + k8sAPI: sp.k8sAPI, + metadataAPI: sp.metadataAPI, + log: log, + metrics: metrics, + enableEndpointSlices: sp.enableEndpointSlices, + enableIPv6: sp.enableIPv6, + localTrafficPolicy: sp.localTrafficPolicy, + } + + if port.enableEndpointSlices { + matchLabels := map[string]string{discovery.LabelServiceName: sp.id.Name} + selector := labels.Set(matchLabels).AsSelector() + + sliceList, err := sp.k8sAPI.ES().Lister().EndpointSlices(sp.id.Namespace).List(selector) + if err != nil && !apierrors.IsNotFound(err) { + sp.log.Errorf("error getting endpointSlice list: %s", err) + } + if err == nil { + for _, slice := range sliceList { + port.addEndpointSlice(slice) + } + } + } else { + endpoints, err := sp.k8sAPI.Endpoint().Lister().Endpoints(sp.id.Namespace).Get(sp.id.Name) + if err != nil && !apierrors.IsNotFound(err) { + sp.log.Errorf("error getting endpoints: %s", err) + } + if err == nil { + port.updateEndpoints(endpoints) + } + } + + return port, nil +} + +func (sp *servicePublisher) metricsLabels(port Port) prometheus.Labels { + return endpointsLabels(sp.cluster, sp.id.Namespace, sp.id.Name, strconv.Itoa(int(port))) +} + +func (sp *servicePublisher) updateServer(oldServer, newServer *v1beta3.Server) { + sp.Lock() + defer sp.Unlock() + + for _, pp := range sp.ports { + pp.updateServer(oldServer, newServer) + } +} diff --git a/controller/cmd/destination/main.go b/controller/cmd/destination/main.go index 5e67a22fa9e80..62508f48180c6 100644 --- a/controller/cmd/destination/main.go +++ b/controller/cmd/destination/main.go @@ -168,7 +168,7 @@ func Main(args []string) { log.Fatalf("Failed to initialize Kubernetes metadata API: %s", err) } - clusterStore, err := watcher.NewClusterStore(k8Client, *controllerNamespace, *enableEndpointSlices) + clusterStore, err := watcher.NewClusterStore(k8Client, *controllerNamespace, *enableEndpointSlices, *enableIPv6) if err != nil { log.Fatalf("Failed to initialize Cluster Store: %s", err) } diff --git a/test/destination-test/Dockerfile b/test/destination-test/Dockerfile deleted file mode 100644 index f5f6ddf4e499b..0000000000000 --- a/test/destination-test/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -# Build stage -FROM docker.io/rust:1.90-bookworm AS build - -WORKDIR /build - -# Copy workspace manifest -COPY Cargo.toml ./ - -# Copy binary crate -COPY dst-load-controller ./dst-load-controller - -# Build the binary -RUN cargo build --release --bin dst-load-controller - -# Runtime stage -FROM debian:bookworm-slim - -RUN apt-get update && apt-get install -y \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -COPY --from=build /build/target/release/dst-load-controller /usr/local/bin/dst-load-controller - -ENTRYPOINT ["/usr/local/bin/dst-load-controller"] diff --git a/test/destination-test/README.md b/test/destination-test/README.md deleted file mode 100644 index fdc7586d03bbf..0000000000000 --- a/test/destination-test/README.md +++ /dev/null @@ -1,247 +0,0 @@ -# Destination Service Load Testing - -This directory contains load testing infrastructure for Linkerd's destination service. - -## Overview - -The `dst-load-controller` is a Rust binary with two subcommands: - -- **`churn`**: Creates and manages Services/Deployments in a target cluster -- **`client`**: Creates gRPC clients that subscribe to the Destination service - -## Architecture - -The load testing framework uses: - -- **Helmfile** for declarative cluster configuration (production-aligned) -- **KWOK** for creating fake Pods/Nodes without actual containers -- **Linkerd** installed via Helm (matching customer deployments) -- **step-cli** for generating shared trust roots (multicluster identity) -- **kube-prometheus-stack** (optional) for metrics collection - -## Prerequisites - -1. **Tools** (all available in dev container): - - `k3d` - local Kubernetes clusters - - `helm` - Kubernetes package manager - - `helmfile` - declarative Helm release management - - `step` - certificate generation - - `kubectl` - Kubernetes CLI - - `linkerd` - Linkerd CLI (for linking clusters) - - Rust toolchain - for building `dst-load-controller` - -2. **Container registry access**: - - Images will be built locally and loaded into k3d clusters - -## Quick Start - -### 1. Generate Shared Trust Root - -```bash -# Generate certificates for Linkerd identity -./hack/gen-certs.sh -export LINKERD_CA_DIR=/tmp/linkerd-ca -``` - -This creates: - -- `ca.crt` - Trust anchor -- `issuer.crt` - Issuer certificate -- `issuer.key` - Issuer private key - -### 2. Create k3d Cluster - -```bash -# Create test cluster -k3d cluster create test \ - --no-lb \ - --k3s-arg '--disable=local-storage,traefik,servicelb,metrics-server@server:*' -``` - -### 3. Deploy Infrastructure (Linkerd + KWOK) - -```bash -# Install all dependencies via Helmfile -LINKERD_CA_DIR=/tmp/linkerd-ca helmfile sync - -# Create a fake KWOK node for pod scheduling -kubectl apply -f hack/kwok-node.yaml - -# Wait for Linkerd to be ready -linkerd check -``` - -### 4. Optional: Install Monitoring - -```bash -# Install kube-prometheus-stack -LINKERD_CA_DIR=/tmp/linkerd-ca helmfile --state-values-set monitoring.enabled=true apply -``` - -### 5. Build and Deploy Load Controllers - -```bash -# Build Docker image -docker build -t dst-load-controller:latest -f Dockerfile . - -# Load into k3d cluster -k3d image import dst-load-controller:latest --cluster k3s-default - -# Deploy via Helm (churn controller only) -helm install dst-load chart/ -n dst-test - -# Or with custom configuration -helm install dst-load chart/ -n dst-test \ - --set churn.stable.services=20 \ - --set churn.stable.endpoints=50 \ - --set churn.oscillate.services=5 - -# Monitor churn metrics -kubectl port-forward -n dst-test pod/dst-load-dst-load-test-churn 8080:8080 & -curl localhost:8080/metrics | grep churn_ -``` - -### 6. Verify Load Test - -```bash -# Check services created by churn controller -kubectl get svc,deploy,pods -n dst-test - -# Verify endpoints are visible to Linkerd -linkerd diagnostics endpoints stable-svc-0.dst-test.svc.cluster.local:8080 -``` - -k3d image import dst-load-controller:latest --cluster test - -# Deploy controllers - -# TODO: Helm chart for deploying controllers - -``` - -## Running Load Tests - -### Scenario 1: Baseline (Stable Observation Load) - -```bash -# Create 10 stable services with 100 endpoints each -dst-load-controller churn \ - --namespace=dst-load-test \ - --stable-services=10 \ - --stable-endpoints=100 - -# In another terminal: 100 clients watching all services -dst-load-controller client \ - --destination-addr=linkerd-dst.linkerd:8086 \ - --target-services=$(for i in {0..9}; do echo "stable-svc-$i.dst-load-test.svc.cluster.local:8080"; done | paste -sd,) -``` - -### Scenario 2: Small Oscillation (Autoscaler Pattern) - -```bash -# 10 services oscillating 10→200→10 endpoints -dst-load-controller churn \ - --namespace=dst-load-test \ - --oscillate-services=10 \ - --oscillate-min-endpoints=10 \ - --oscillate-max-endpoints=200 \ - --oscillate-hold-duration=2m \ - --oscillate-jitter-percent=5 - -# In another terminal: 100 clients watching oscillating services -dst-load-controller client \ - --destination-addr=linkerd-dst.linkerd:8086 \ - --target-services=$(for i in {0..9}; do echo "oscillate-svc-$i.dst-load-test.svc.cluster.local:8080"; done | paste -sd,) -``` - -## Observability - -### Prometheus Metrics - -Access Prometheus: - -```bash -kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090 -# Open http://localhost:9090 -``` - -Key metrics to monitor: - -```promql -# Destination controller -destination_endpoint_views_active -rate(destination_stream_send_timeouts_total[5m]) -container_memory_working_set_bytes{pod=~"linkerd-destination-.*"} - -# Load controller metrics -churn_services_created_total -churn_scale_events_total -churn_current_replicas -``` - -### Grafana Dashboards - -Access Grafana: - -```bash -kubectl port-forward -n monitoring svc/kube-prometheus-stack-grafana 3000:80 -# Open http://localhost:3000 (admin/admin) -``` - -## Development - -### Building the Binary - -```bash -cargo build # Debug build -cargo build --release # Release build -cargo run -- churn --help # Test CLI -``` - -### Testing Certificate Generation - -```bash -./hack/gen-certs.sh /tmp/test-ca -step certificate inspect /tmp/test-ca/ca.crt -step certificate inspect /tmp/test-ca/issuer.crt -``` - -### Cleanup - -```bash -# Uninstall everything -helmfile destroy - -# Delete cluster -k3d cluster delete test - -# Clean certificates -rm -rf /tmp/linkerd-ca -``` - -## Project Structure - -``` -test/destination-test/ -├── Cargo.toml # Rust workspace -├── Dockerfile # Container build -├── helmfile.yaml # Infrastructure as code -├── README.md # This file -├── dst-load-controller/ # Binary crate -│ ├── Cargo.toml -│ └── src/ -│ ├── main.rs # CLI + orchestration -│ └── churn.rs # Service/Deployment churn logic -├── hack/ -│ └── gen-certs.sh # Certificate generation (step) -└── values/ - ├── kube-prometheus-stack.yaml - ├── linkerd-multicluster-source.yaml - └── linkerd-multicluster-target.yaml -``` - -## See Also - -- [LOAD_TEST_PLAN.md](../../controller/api/destination/LOAD_TEST_PLAN.md) - Full test scenarios and architecture -- [Linkerd Helm docs](https://linkerd.io/2/tasks/install-helm/) - Production Helm installation guide -- [KWOK documentation](https://kwok.sigs.k8s.io/) - Fake node/pod simulation diff --git a/test/destination-test/chart/Chart.yaml b/test/destination-test/chart/Chart.yaml deleted file mode 100644 index 58d84f4022e29..0000000000000 --- a/test/destination-test/chart/Chart.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v2 -name: dst-load-test -description: Destination service load testing controllers (churn + client) -type: application -version: 0.1.0 -appVersion: "0.1.0" -keywords: - - linkerd - - load-testing - - destination -maintainers: - - name: Linkerd Authors diff --git a/test/destination-test/chart/README.md b/test/destination-test/chart/README.md deleted file mode 100644 index aed2f0978df63..0000000000000 --- a/test/destination-test/chart/README.md +++ /dev/null @@ -1,127 +0,0 @@ -# dst-load-test Helm Chart - -This Helm chart deploys load testing controllers for Linkerd's destination service. - -## Components - -- **Churn Controller**: Creates and manages Services/Deployments to simulate endpoint churn -- **Client Controller**: Creates gRPC clients that subscribe to the Destination service (coming soon) - -## Prerequisites - -- Kubernetes cluster (local k3d recommended) -- Linkerd control plane installed -- KWOK for fake pod simulation -- Docker image built and imported into cluster - -## Building and Loading Image - -```bash -# Build Docker image -docker build -t dst-load-controller:latest -f Dockerfile . - -# Load into k3d cluster -k3d image import dst-load-controller:latest --cluster k3s-default -``` - -## Installation - -```bash -# Create test namespace first -kubectl create namespace dst-test - -# Install with default values (10 stable services, 100 endpoints each) -helm install dst-load chart/ -n dst-test - -# Install with custom churn configuration -helm install dst-load chart/ -n dst-test \ - --set churn.stable.services=20 \ - --set churn.stable.endpoints=50 \ - --set churn.oscillate.services=5 \ - --set churn.oscillate.minEndpoints=10 \ - --set churn.oscillate.maxEndpoints=200 -``` - -## Configuration - -### Churn Controller - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `churn.enabled` | Enable churn controller | `true` | -| `churn.stable.services` | Number of stable services | `10` | -| `churn.stable.endpoints` | Endpoints per stable service | `100` | -| `churn.oscillate.services` | Number of oscillating services | `5` | -| `churn.oscillate.minEndpoints` | Minimum endpoints for oscillation | `10` | -| `churn.oscillate.maxEndpoints` | Maximum endpoints for oscillation | `200` | -| `churn.oscillate.holdDuration` | Time to hold at min/max | `"30s"` | -| `churn.oscillate.jitterPercent` | Jitter percentage | `10` | -| `churn.metricsPort` | Prometheus metrics port | `8080` | - -### Client Controller - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `client.enabled` | Enable client controller | `false` | -| `client.replicas` | Number of client pods | `10` | -| `client.destinationAddr` | Linkerd destination service address | `linkerd-dst-headless.linkerd.svc.cluster.local:8086` | -| `client.metricsPort` | Prometheus metrics port | `8080` | - -### Common - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `image.repository` | Docker image repository | `dst-load-controller` | -| `image.tag` | Docker image tag | `latest` | -| `image.pullPolicy` | Image pull policy | `Never` | -| `testNamespace` | Namespace for test services | `dst-test` | -| `podAnnotations` | Pod annotations (Linkerd injection) | `linkerd.io/inject: enabled` | - -## Metrics - -Both controllers expose Prometheus metrics on port 8080 (configurable): - -### Churn Controller Metrics - -- `churn_services_created` - Total services created (by pattern) -- `churn_deployments_created` - Total deployments created -- `churn_scale_events` - Total scale operations (by pattern, service) -- `churn_current_replicas` - Current replica count (by pattern, service) - -### Client Controller Metrics (TODO) - -- `client_streams_active` - Active gRPC streams -- `client_updates_received` - Destination updates received -- `client_endpoints_watched` - Endpoints being watched - -## Uninstallation - -```bash -# Remove load test -helm uninstall dst-load -n dst-test - -# Clean up test services (created by churn controller) -kubectl delete all --all -n dst-test -``` - -## Example: Full Load Test - -```bash -# 1. Install with large churn -helm install dst-load chart/ -n dst-test \ - --set churn.stable.services=50 \ - --set churn.stable.endpoints=200 \ - --set churn.oscillate.services=10 \ - --set churn.oscillate.minEndpoints=50 \ - --set churn.oscillate.maxEndpoints=500 - -# 2. Monitor churn metrics -kubectl port-forward -n dst-test pod/dst-load-dst-load-test-churn 8080:8080 & -curl localhost:8080/metrics - -# 3. Enable clients (once implemented) -helm upgrade dst-load chart/ -n dst-test \ - --set client.enabled=true \ - --set client.replicas=100 \ - --reuse-values -``` diff --git a/test/destination-test/chart/templates/_helpers.tpl b/test/destination-test/chart/templates/_helpers.tpl deleted file mode 100644 index c4a512b9095e1..0000000000000 --- a/test/destination-test/chart/templates/_helpers.tpl +++ /dev/null @@ -1,49 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "dst-load-test.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -*/}} -{{- define "dst-load-test.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "dst-load-test.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "dst-load-test.labels" -}} -helm.sh/chart: {{ include "dst-load-test.chart" . }} -{{ include "dst-load-test.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "dst-load-test.selectorLabels" -}} -app.kubernetes.io/name: {{ include "dst-load-test.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} diff --git a/test/destination-test/chart/templates/churn.yaml b/test/destination-test/chart/templates/churn.yaml deleted file mode 100644 index 846cec32baf8d..0000000000000 --- a/test/destination-test/chart/templates/churn.yaml +++ /dev/null @@ -1,71 +0,0 @@ -{{- if .Values.churn.enabled }} -apiVersion: v1 -kind: Pod -metadata: - name: {{ include "dst-load-test.fullname" . }}-churn - namespace: {{ .Release.Namespace }} - labels: - app.kubernetes.io/name: {{ include "dst-load-test.name" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/component: churn - annotations: - {{- toYaml .Values.podAnnotations | nindent 4 }} -spec: - serviceAccountName: {{ .Values.serviceAccount.name }} - restartPolicy: Never - containers: - - name: churn - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - args: - - scale - - --namespace={{ .Values.testNamespace }} - - --deployment-pattern={{ .Values.churn.deploymentPattern }} - - --min-replicas={{ .Values.churn.minReplicas }} - - --max-replicas={{ .Values.churn.maxReplicas }} - - --hold-duration={{ .Values.churn.holdDuration }} - - --jitter-percent={{ .Values.churn.jitterPercent }} - ports: - - name: admin-http - containerPort: {{ .Values.churn.metricsPort }} - protocol: TCP - resources: - {{- toYaml .Values.churn.resources | nindent 8 }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 4 }} - {{- end }} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ include "dst-load-test.fullname" . }}-churn-metrics - namespace: {{ .Release.Namespace }} - labels: - app.kubernetes.io/name: {{ include "dst-load-test.name" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/component: churn -spec: - type: ClusterIP - ports: - - port: {{ .Values.churn.metricsPort }} - targetPort: metrics - protocol: TCP - name: metrics - selector: - app.kubernetes.io/name: {{ include "dst-load-test.name" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/component: churn -{{- end }} diff --git a/test/destination-test/chart/templates/client.yaml b/test/destination-test/chart/templates/client.yaml deleted file mode 100644 index 99a17b075d19d..0000000000000 --- a/test/destination-test/chart/templates/client.yaml +++ /dev/null @@ -1,134 +0,0 @@ -{{- if .Values.client.enabled }} ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "dst-load-test.fullname" . }}-client - namespace: {{ .Release.Namespace }} - labels: - {{- include "dst-load-test.labels" . | nindent 4 }} - app.kubernetes.io/component: client ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: {{ include "dst-load-test.fullname" . }}-client - namespace: {{ .Release.Namespace }} - labels: - {{- include "dst-load-test.labels" . | nindent 4 }} - app.kubernetes.io/component: client -rules: -- apiGroups: [""] - resources: ["services"] - verbs: ["list", "watch", "get"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: {{ include "dst-load-test.fullname" . }}-client - namespace: {{ .Release.Namespace }} - labels: - {{- include "dst-load-test.labels" . | nindent 4 }} - app.kubernetes.io/component: client -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: {{ include "dst-load-test.fullname" . }}-client -subjects: -- kind: ServiceAccount - name: {{ include "dst-load-test.fullname" . }}-client - namespace: {{ .Release.Namespace }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "dst-load-test.fullname" . }}-client - namespace: {{ .Release.Namespace }} - labels: - {{- include "dst-load-test.labels" . | nindent 4 }} - app.kubernetes.io/component: client -spec: - replicas: {{ .Values.client.replicas }} - selector: - matchLabels: - {{- include "dst-load-test.selectorLabels" . | nindent 6 }} - app.kubernetes.io/component: client - template: - metadata: - labels: - {{- include "dst-load-test.selectorLabels" . | nindent 8 }} - app.kubernetes.io/component: client - {{- with .Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - serviceAccountName: {{ include "dst-load-test.fullname" . }}-client - containers: - - name: client - image: "{{ .Values.client.image.repository }}:{{ .Values.client.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.client.image.pullPolicy }} - args: - - client - - --destination-addr={{ .Values.client.destinationAddr }} - - --service-label-selector={{ .Values.client.serviceLabelSelector }} - - --watchers-per-service={{ .Values.client.watchersPerService }} - - --min-stream-lifetime={{ .Values.client.minStreamLifetime }} - - --max-stream-lifetime={{ .Values.client.maxStreamLifetime }} - - --namespace={{ .Release.Namespace }} - env: - # Downward API - inject pod metadata for context token - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - ports: - - name: admin-http - containerPort: {{ .Values.client.metricsPort }} - protocol: TCP - resources: - {{- toYaml .Values.client.resources | nindent 10 }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ include "dst-load-test.fullname" . }}-client-metrics - namespace: {{ .Release.Namespace }} - labels: - {{- include "dst-load-test.labels" . | nindent 4 }} - app.kubernetes.io/component: client -spec: - type: ClusterIP - ports: - - port: {{ .Values.client.metricsPort }} - targetPort: metrics - protocol: TCP - name: metrics - selector: - {{- include "dst-load-test.selectorLabels" . | nindent 4 }} - app.kubernetes.io/component: client -{{- end }} diff --git a/test/destination-test/chart/templates/rbac.yaml b/test/destination-test/chart/templates/rbac.yaml deleted file mode 100644 index ec2ea41a0ee1f..0000000000000 --- a/test/destination-test/chart/templates/rbac.yaml +++ /dev/null @@ -1,52 +0,0 @@ -{{- if .Values.rbac.create -}} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ .Values.serviceAccount.name }} - labels: - app.kubernetes.io/name: {{ include "dst-load-test.name" . }} - app.kubernetes.io/instance: {{ .Release.Name }} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: {{ include "dst-load-test.fullname" . }} - namespace: {{ .Values.testNamespace }} - labels: - app.kubernetes.io/name: {{ include "dst-load-test.name" . }} - app.kubernetes.io/instance: {{ .Release.Name }} -rules: - # Allow listing and getting Deployments (to find matching pattern) - - apiGroups: ["apps"] - resources: ["deployments"] - verbs: ["get", "list", "watch"] - # Allow patching Deployments (to update spec.replicas) - - apiGroups: ["apps"] - resources: ["deployments"] - verbs: ["patch"] - # Allow reading Services (for validation/metrics) - - apiGroups: [""] - resources: ["services"] - verbs: ["get", "list", "watch"] - # Allow reading Pods (for metrics) - - apiGroups: [""] - resources: ["pods"] - verbs: ["get", "list", "watch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: {{ include "dst-load-test.fullname" . }} - namespace: {{ .Values.testNamespace }} - labels: - app.kubernetes.io/name: {{ include "dst-load-test.name" . }} - app.kubernetes.io/instance: {{ .Release.Name }} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: {{ include "dst-load-test.fullname" . }} -subjects: - - kind: ServiceAccount - name: {{ .Values.serviceAccount.name }} - namespace: {{ .Release.Namespace }} -{{- end }} diff --git a/test/destination-test/chart/templates/test-services.yaml b/test/destination-test/chart/templates/test-services.yaml deleted file mode 100644 index 6560cf752a806..0000000000000 --- a/test/destination-test/chart/templates/test-services.yaml +++ /dev/null @@ -1,64 +0,0 @@ -{{- range $i := until (int .Values.testServices.count) }} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ $.Values.testServices.namePrefix }}-{{ $i }} - namespace: {{ $.Values.testNamespace }} - labels: - app.kubernetes.io/name: {{ include "dst-load-test.name" $ }} - app.kubernetes.io/instance: {{ $.Release.Name }} - app.kubernetes.io/component: test-service - app: {{ $.Values.testServices.namePrefix }}-{{ $i }} - dst-load.l5d.io/role: target - {{- if $.Values.multicluster.enabled }} - mirror.linkerd.io/exported: "true" - {{- end }} - {{- if $.Values.multicluster.annotations }} - annotations: - {{- toYaml $.Values.multicluster.annotations | nindent 4 }} - {{- end }} -spec: - type: ClusterIP - ports: - - port: {{ $.Values.testServices.port }} - targetPort: {{ $.Values.testServices.port }} - protocol: TCP - name: http - selector: - app: {{ $.Values.testServices.namePrefix }}-{{ $i }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ $.Values.testServices.namePrefix }}-{{ $i }} - namespace: {{ $.Values.testNamespace }} - labels: - app.kubernetes.io/name: {{ include "dst-load-test.name" $ }} - app.kubernetes.io/instance: {{ $.Release.Name }} - app.kubernetes.io/component: test-service - app: {{ $.Values.testServices.namePrefix }}-{{ $i }} -spec: - replicas: {{ $.Values.testServices.initialReplicas }} - selector: - matchLabels: - app: {{ $.Values.testServices.namePrefix }}-{{ $i }} - template: - metadata: - labels: - app: {{ $.Values.testServices.namePrefix }}-{{ $i }} - kwok.x-k8s.io/node: kwok-node - spec: - nodeSelector: - type: kwok - tolerations: - - key: kwok.x-k8s.io/node - operator: Exists - effect: NoSchedule - containers: - - name: app - image: fake-image:latest - ports: - - containerPort: {{ $.Values.testServices.port }} - protocol: TCP -{{- end }} diff --git a/test/destination-test/chart/values.yaml b/test/destination-test/chart/values.yaml deleted file mode 100644 index 7e1eed027490a..0000000000000 --- a/test/destination-test/chart/values.yaml +++ /dev/null @@ -1,126 +0,0 @@ -# Default values for dst-load-test -# This Helm chart deploys load testing controllers for Linkerd's destination service - -# Docker image configuration -image: - repository: dst-load-controller - tag: latest - pullPolicy: Never # Use local image imported into k3d - -# Image pull secrets (if needed for private registries) -imagePullSecrets: [] - -# Service account configuration -serviceAccount: - create: true - name: dst-load-test - -# RBAC permissions -rbac: - create: true - -# Namespace where test services will be created -testNamespace: dst-test - -# Test services configuration (created by Helm chart) -testServices: - # Number of services to create - count: 10 - - # Service name prefix (will create {namePrefix}-0, {namePrefix}-1, etc.) - namePrefix: test-svc - - # Initial replica count for each Deployment - initialReplicas: 100 - - # Service port - port: 8080 - -# Multicluster configuration (optional) -multicluster: - # Enable multicluster annotations on services - enabled: false - - # Additional annotations for multicluster services - # Example: config.linkerd.io/enable-gateway: "true" - annotations: {} - -# Churn controller configuration -churn: - enabled: true - - # Deployment name pattern to scale (supports wildcards) - deploymentPattern: "test-svc-*" - - # Oscillation configuration - minReplicas: 10 - maxReplicas: 200 - holdDuration: "30s" - jitterPercent: 10 - - # Metrics server port - metricsPort: 8080 - - # Resource limits - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 256Mi - -# Client controller configuration -client: - enabled: false - - # Number of client replicas - replicas: 1 - - # Image configuration (overrides global image settings if specified) - image: - repository: dst-load-controller - tag: latest - pullPolicy: Never - - # Destination service address - destinationAddr: linkerd-dst-headless.linkerd.svc.cluster.local:8086 - - # Label selector for discovering services to watch - # Example: "dst-load.l5d.io/role=target" for load test targets - serviceLabelSelector: "dst-load.l5d.io/role=target" - - # Number of concurrent watchers per discovered service - watchersPerService: 10 - - # Stream lifetime configuration - # Minimum time before reconnecting (with jitter) - minStreamLifetime: "5m" - # Maximum time before reconnecting (with jitter) - maxStreamLifetime: "30m" - - # Metrics server port - metricsPort: 8080 - - # Resource limits - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 256Mi - -# Pod annotations (for Linkerd injection) -podAnnotations: - linkerd.io/inject: enabled - config.linkerd.io/skip-outbound-ports: "8080" # Skip metrics port - -# Node selector -nodeSelector: {} - -# Tolerations -tolerations: [] - -# Affinity -affinity: {} diff --git a/test/destination-test/dst-load-controller/Cargo.toml b/test/destination-test/dst-load-controller/Cargo.toml deleted file mode 100644 index 4e521321ceb66..0000000000000 --- a/test/destination-test/dst-load-controller/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -[package] -name = "dst-load-controller" -version = "0.1.0" -edition = "2021" -publish = false - -[[bin]] -name = "dst-load-controller" -path = "src/main.rs" - -[dependencies] -anyhow = "1" -clap = { version = "4", features = ["derive", "env"] } -futures = "0.3" -k8s-openapi = { workspace = true } -kube = { workspace = true, default-features = false, features = [ - "client", - "rustls-tls", -] } -linkerd2-proxy-api = { workspace = true } -prometheus-client = { workspace = true } -rand = "0.8" -serde = { version = "1", features = ["derive"] } -serde_json = "1" -tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] } -tonic = { workspace = true, default-features = false, features = ["transport"] } -tracing = "0.1" - -[dependencies.kubert] -workspace = true -default-features = false -features = ["clap", "prometheus-client", "runtime", "rustls-tls"] - -[dependencies.tracing-subscriber] -version = "0.3" -features = ["env-filter", "fmt", "json"] diff --git a/test/destination-test/dst-load-controller/src/churn.rs b/test/destination-test/dst-load-controller/src/churn.rs deleted file mode 100644 index 41a236bc373bf..0000000000000 --- a/test/destination-test/dst-load-controller/src/churn.rs +++ /dev/null @@ -1,227 +0,0 @@ -//! Churn controller: Scales existing Deployments to simulate endpoint churn - -use std::time::Duration; - -use k8s_openapi::api::apps::v1::Deployment; -use kube::{ - api::{Api, Patch, PatchParams}, - Client, -}; -use prometheus_client::{ - encoding::EncodeLabelSet, - metrics::{counter::Counter, family::Family, gauge::Gauge}, - registry::Registry, -}; -use tracing::{error, info}; - -#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] -pub struct ChurnLabels { - pattern: String, - service: String, -} - -pub struct ChurnMetrics { - pub scale_events: Family, - pub current_replicas: Family, -} - -impl ChurnMetrics { - pub fn new(registry: &mut Registry) -> Self { - let scale_events = Family::default(); - let current_replicas = Family::default(); - - registry.register( - "churn_scale_events", - "Total number of scale events", - scale_events.clone(), - ); - registry.register( - "churn_current_replicas", - "Current replica count per service", - current_replicas.clone(), - ); - - Self { - scale_events, - current_replicas, - } - } -} - -pub struct ChurnController { - pub client: Client, - pub namespace: String, - pub metrics: ChurnMetrics, -} - -impl ChurnController { - pub fn new(client: Client, namespace: String, metrics: ChurnMetrics) -> Self { - Self { - client, - namespace, - metrics, - } - } - - /// Scale a Deployment to the specified replica count - async fn scale_deployment( - &self, - name: &str, - replicas: i32, - pattern: &str, - ) -> anyhow::Result<()> { - let deployments: Api = Api::namespaced(self.client.clone(), &self.namespace); - - let patch = serde_json::json!({ - "spec": { - "replicas": replicas - } - }); - - deployments - .patch( - name, - &PatchParams::apply("dst-load-controller"), - &Patch::Merge(&patch), - ) - .await - .map_err(|e| { - error!(?e, deployment = name, replicas, "Failed to scale deployment"); - e - })?; - - info!(deployment = name, replicas, pattern, "Scaled deployment"); - - let labels = ChurnLabels { - pattern: pattern.to_string(), - service: name.to_string(), - }; - self.metrics.scale_events.get_or_create(&labels).inc(); - self.metrics - .current_replicas - .get_or_create(&labels) - .set(replicas as i64); - - Ok(()) - } - - /// Oscillate replicas for deployments matching a pattern - /// This is the simplified version that only scales existing deployments - pub async fn run_oscillate_deployments( - &self, - pattern: &str, - min_replicas: i32, - max_replicas: i32, - hold_duration: Duration, - jitter_percent: u8, - ) -> anyhow::Result<()> { - info!( - pattern, - min_replicas, - max_replicas, - ?hold_duration, - jitter_percent, - "Starting deployment oscillation" - ); - - let deployments: Api = Api::namespaced(self.client.clone(), &self.namespace); - - // List all deployments and filter by pattern - let deployment_list = deployments.list(&Default::default()).await?; - - // Simple glob matching (supports wildcards like "test-svc-*") - let matching_deployments: Vec = deployment_list - .items - .iter() - .filter_map(|d| { - let name = d.metadata.name.as_ref()?; - if matches_pattern(name, pattern) { - Some(name.clone()) - } else { - None - } - }) - .collect(); - - if matching_deployments.is_empty() { - anyhow::bail!("No deployments found matching pattern: {}", pattern); - } - - info!( - count = matching_deployments.len(), - deployments = ?matching_deployments, - "Found matching deployments" - ); - - // Oscillate forever - let mut current_replicas = max_replicas; - loop { - // Toggle between min and max - current_replicas = if current_replicas == max_replicas { - min_replicas - } else { - max_replicas - }; - - // Scale all matching deployments - for deployment_name in &matching_deployments { - self.scale_deployment(deployment_name, current_replicas, "oscillate") - .await?; - } - - info!( - replicas = current_replicas, - deployments = matching_deployments.len(), - "Scaled deployments" - ); - - // Wait with jitter - let jitter = if jitter_percent > 0 { - use rand::Rng; - let max_jitter = hold_duration.as_millis() * jitter_percent as u128 / 100; - Duration::from_millis(rand::thread_rng().gen_range(0..=max_jitter as u64)) - } else { - Duration::from_secs(0) - }; - - let sleep_duration = hold_duration + jitter; - info!(?sleep_duration, "Holding at current scale"); - tokio::time::sleep(sleep_duration).await; - } - } -} - -/// Simple glob pattern matching (supports * wildcard) -fn matches_pattern(name: &str, pattern: &str) -> bool { - if pattern == "*" { - return true; - } - - if let Some(prefix) = pattern.strip_suffix('*') { - name.starts_with(prefix) - } else if let Some(suffix) = pattern.strip_prefix('*') { - name.ends_with(suffix) - } else { - name == pattern - } -} - -/// Parse duration string (e.g., "30s", "5m", "1h") -pub fn parse_duration(s: &str) -> anyhow::Result { - let s = s.trim(); - if s.is_empty() { - anyhow::bail!("Empty duration string"); - } - - let (num_str, unit) = s.split_at(s.len() - 1); - let num: u64 = num_str - .parse() - .map_err(|_| anyhow::anyhow!("Invalid number: {}", num_str))?; - - match unit { - "s" => Ok(Duration::from_secs(num)), - "m" => Ok(Duration::from_secs(num * 60)), - "h" => Ok(Duration::from_secs(num * 3600)), - _ => anyhow::bail!("Invalid duration unit: {}", unit), - } -} diff --git a/test/destination-test/dst-load-controller/src/client.rs b/test/destination-test/dst-load-controller/src/client.rs deleted file mode 100644 index cfd05f27b2fcb..0000000000000 --- a/test/destination-test/dst-load-controller/src/client.rs +++ /dev/null @@ -1,375 +0,0 @@ -//! Client controller: Creates gRPC clients that subscribe to the Destination service - -use std::time::Duration; - -use k8s_openapi::api::core::v1::Service; -use kube::{ - api::{Api, ListParams}, - Client, -}; -use linkerd2_proxy_api::destination as dst_api; -use prometheus_client::{ - encoding::EncodeLabelSet, - metrics::{counter::Counter, family::Family, gauge::Gauge}, - registry::Registry, -}; -use rand::Rng; -use tokio::time::{sleep, timeout}; -use tonic::transport::Channel; -use tracing::{error, info, warn}; - -#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] -pub struct ClientLabels { - target: String, - request_type: String, -} - -pub struct ClientMetrics { - pub streams_active: Family, - pub updates_received: Family, - pub endpoints_current: Family, - pub stream_errors: Family, -} - -impl ClientMetrics { - pub fn new(registry: &mut Registry) -> Self { - let streams_active = Family::default(); - let updates_received = Family::default(); - let endpoints_current = Family::default(); - let stream_errors = Family::default(); - - registry.register( - "client_streams_active", - "Number of active gRPC streams", - streams_active.clone(), - ); - registry.register( - "client_updates_received", - "Total number of updates received", - updates_received.clone(), - ); - registry.register( - "client_endpoints_current", - "Current number of endpoints for a service", - endpoints_current.clone(), - ); - registry.register( - "client_stream_errors", - "Total number of stream errors", - stream_errors.clone(), - ); - - Self { - streams_active, - updates_received, - endpoints_current, - stream_errors, - } - } -} - -pub struct ClientController { - pub client: Client, - pub destination_addr: String, - pub namespace: String, - pub context_token: String, - pub metrics: ClientMetrics, -} - -impl ClientController { - pub fn new( - client: Client, - destination_addr: String, - namespace: String, - context_token: String, - metrics: ClientMetrics, - ) -> Self { - Self { - client, - destination_addr, - namespace, - context_token, - metrics, - } - } - - /// Discover services by label selector and create watchers - pub async fn run_get_requests( - &self, - service_label_selector: String, - watchers_per_service: u32, - min_stream_lifetime: Duration, - max_stream_lifetime: Duration, - ) -> anyhow::Result<()> { - info!( - destination_addr = %self.destination_addr, - namespace = %self.namespace, - label_selector = %service_label_selector, - watchers_per_service, - "Starting Get requests" - ); - - // Discover services via Kubernetes API - let services: Api = Api::namespaced(self.client.clone(), &self.namespace); - let lp = ListParams::default().labels(&service_label_selector); - - let service_list = services.list(&lp).await?; - - if service_list.items.is_empty() { - anyhow::bail!( - "No services found with label selector: {}", - service_label_selector - ); - } - - info!( - service_count = service_list.items.len(), - "Discovered services" - ); - - // Connect to destination service - let channel = Channel::from_shared(format!("http://{}", self.destination_addr))? - .connect() - .await?; - - info!("Connected to destination service"); - - // Spawn watchers for each service - let mut tasks = Vec::new(); - for svc in service_list.items { - let svc_name = svc.metadata.name.as_ref().ok_or_else(|| anyhow::anyhow!("Service missing name"))?; - - // Get the service port (assume first port) - let port = svc - .spec - .as_ref() - .and_then(|spec| spec.ports.as_ref()) - .and_then(|ports| ports.first()) - .map(|p| p.port) - .ok_or_else(|| anyhow::anyhow!("Service missing port"))?; - - // Build the destination path (authority) - let target = format!( - "{}.{}.svc.cluster.local:{}", - svc_name, self.namespace, port - ); - - info!( - service = %svc_name, - target = %target, - watchers = watchers_per_service, - "Creating watchers for service" - ); - - // Spawn multiple watchers for this service - for watcher_id in 0..watchers_per_service { - let channel = channel.clone(); - let target = target.clone(); - let context_token = self.context_token.clone(); - let metrics = self.metrics.clone(); - let svc_name = svc_name.clone(); - let min_lifetime = min_stream_lifetime; - let max_lifetime = max_stream_lifetime; - - let task = tokio::spawn(async move { - if let Err(e) = subscribe_to_destination( - channel, - target.clone(), - context_token, - metrics, - watcher_id, - min_lifetime, - max_lifetime, - ) - .await - { - error!( - service = %svc_name, - target = %target, - watcher_id, - error = ?e, - "Get stream failed" - ); - } - }); - tasks.push(task); - } - } - - // Wait for all tasks (they should run forever) - futures::future::join_all(tasks).await; - - Ok(()) - } -} - -/// Subscribe to a destination service and process updates -/// Streams have a bounded lifetime with randomized jitter to simulate realistic client behavior -async fn subscribe_to_destination( - channel: Channel, - target: String, - context_token: String, - metrics: ClientMetrics, - watcher_id: u32, - min_stream_lifetime: Duration, - max_stream_lifetime: Duration, -) -> anyhow::Result<()> { - let mut client = dst_api::destination_client::DestinationClient::new(channel); - - info!( - target = %target, - watcher_id, - min_lifetime_secs = min_stream_lifetime.as_secs(), - max_lifetime_secs = max_stream_lifetime.as_secs(), - "Subscribing to destination" - ); - - let labels = ClientLabels { - target: target.clone(), - request_type: "Get".to_string(), - }; - - loop { - // Randomize stream lifetime between min and max - let lifetime_secs = rand::thread_rng().gen_range( - min_stream_lifetime.as_secs()..=max_stream_lifetime.as_secs() - ); - let stream_lifetime = Duration::from_secs(lifetime_secs); - - info!( - target = %target, - watcher_id, - lifetime_secs = stream_lifetime.as_secs(), - "Starting bounded stream" - ); - - // Create Get request with context token - let request = tonic::Request::new(dst_api::GetDestination { - scheme: "k8s".to_string(), - path: target.clone(), - context_token: context_token.clone(), - }); - - // Track active stream - metrics.streams_active.get_or_create(&labels).inc(); - - // Subscribe to stream with timeout - let stream_result = timeout(stream_lifetime, async { - match client.get(request).await { - Ok(response) => { - let mut stream = response.into_inner(); - info!( - target = %target, - watcher_id, - "Stream established" - ); - - // Process updates until stream ends or timeout - while let Ok(Some(update)) = stream.message().await { - handle_update(&target, update, &metrics, &labels, watcher_id); - } - - Ok(()) - } - Err(e) => { - error!( - target = %target, - watcher_id, - error = ?e, - "Failed to establish stream" - ); - metrics.stream_errors.get_or_create(&labels).inc(); - Err(e) - } - } - }) - .await; - - // Stream closed (either timeout or natural end), mark as inactive - metrics.streams_active.get_or_create(&labels).dec(); - - match stream_result { - Ok(_) => { - info!( - target = %target, - watcher_id, - "Stream ended naturally, reconnecting..." - ); - } - Err(_) => { - info!( - target = %target, - watcher_id, - lifetime_secs = stream_lifetime.as_secs(), - "Stream lifetime expired, reconnecting..." - ); - } - } - - // Wait before reconnecting (short jitter) - let reconnect_delay = Duration::from_secs(rand::thread_rng().gen_range(1..5)); - sleep(reconnect_delay).await; - } -} - -/// Handle a destination update -fn handle_update( - target: &str, - update: dst_api::Update, - metrics: &ClientMetrics, - labels: &ClientLabels, - watcher_id: u32, -) { - metrics.updates_received.get_or_create(labels).inc(); - - match update.update { - Some(dst_api::update::Update::Add(add)) => { - let endpoint_count = add.addrs.len(); - info!( - target = %target, - watcher_id, - endpoints = endpoint_count, - "Received Add update" - ); - metrics - .endpoints_current - .get_or_create(labels) - .set(endpoint_count as i64); - } - Some(dst_api::update::Update::Remove(remove)) => { - info!( - target = %target, - watcher_id, - removed = remove.addrs.len(), - "Received Remove update" - ); - } - Some(dst_api::update::Update::NoEndpoints(no_endpoints)) => { - info!( - target = %target, - watcher_id, - exists = no_endpoints.exists, - "Received NoEndpoints update" - ); - metrics.endpoints_current.get_or_create(labels).set(0); - } - None => { - warn!( - target = %target, - watcher_id, - "Received update with no data" - ); - } - } -} - -impl Clone for ClientMetrics { - fn clone(&self) -> Self { - Self { - streams_active: self.streams_active.clone(), - updates_received: self.updates_received.clone(), - endpoints_current: self.endpoints_current.clone(), - stream_errors: self.stream_errors.clone(), - } - } -} diff --git a/test/destination-test/dst-load-controller/src/main.rs b/test/destination-test/dst-load-controller/src/main.rs deleted file mode 100644 index d4fffe64df272..0000000000000 --- a/test/destination-test/dst-load-controller/src/main.rs +++ /dev/null @@ -1,281 +0,0 @@ -use anyhow::Result; -use clap::{Parser, Subcommand}; - -mod churn; -mod client; - -#[derive(Parser)] -#[command(name = "dst-load-controller")] -#[command(about = "Destination service load testing controller", long_about = None)] -struct Args { - #[clap(long, default_value = "linkerd=info,warn")] - log_level: kubert::LogFilter, - - #[clap(long, default_value = "plain")] - log_format: kubert::LogFormat, - - #[clap(flatten)] - client: kubert::ClientArgs, - - #[clap(flatten)] - admin: kubert::AdminArgs, - - #[command(subcommand)] - command: Commands, -} - -#[derive(Subcommand)] -enum Commands { - /// Scale controller: Oscillates Deployment replicas between min/max to simulate autoscaler behavior - Scale { - /// Deployment name pattern to scale (supports wildcards, e.g., "test-svc-*") - #[arg(long)] - deployment_pattern: String, - - /// Minimum replica count - #[arg(long)] - min_replicas: i32, - - /// Maximum replica count - #[arg(long)] - max_replicas: i32, - - /// Hold time at min/max before changing (e.g., "30s", "1m") - #[arg(long, default_value = "30s")] - hold_duration: String, - - /// Jitter percentage (0-100) to spread oscillation timing - #[arg(long, default_value = "0")] - jitter_percent: u8, - - /// Namespace where deployments exist - #[arg(long, default_value = "default")] - namespace: String, - }, - - /// Client controller: Creates gRPC clients and subscribes to Destination service - Client { - /// Destination service address (e.g., "linkerd-destination.linkerd:8086") - #[arg(long)] - destination_addr: String, - - /// Label selector to discover target services (e.g., "app.kubernetes.io/component=test-service") - #[arg(long)] - service_label_selector: String, - - /// Number of concurrent watchers per service - #[arg(long, default_value = "1")] - watchers_per_service: u32, - - /// Minimum stream lifetime before reconnection (e.g., "30s", "5m") - #[arg(long, default_value = "5m")] - min_stream_lifetime: String, - - /// Maximum stream lifetime before reconnection (e.g., "1h", "30m") - #[arg(long, default_value = "30m")] - max_stream_lifetime: String, - - /// Namespace where services exist - #[arg(long, default_value = "default")] - namespace: String, - - /// Pod name (for context token, typically from downward API) - #[arg(long, env = "POD_NAME")] - pod_name: Option, - - /// Pod namespace (for context token, typically from downward API) - #[arg(long, env = "POD_NAMESPACE")] - pod_namespace: Option, - - /// Node name (for context token, typically from downward API) - #[arg(long, env = "NODE_NAME")] - node_name: Option, - }, -} - -#[tokio::main] -async fn main() -> Result<()> { - let args = Args::parse(); - args.run().await -} - -impl Args { - async fn run(self) -> Result<()> { - let Args { - log_level, - log_format, - client: client_args, - admin, - command, - } = self; - - match command { - Commands::Scale { - deployment_pattern, - min_replicas, - max_replicas, - hold_duration, - jitter_percent, - namespace, - } => { - tracing::info!( - deployment_pattern, - min_replicas, - max_replicas, - hold_duration, - jitter_percent, - namespace, - "Starting scale controller" - ); - - // Validate inputs - if min_replicas < 0 || max_replicas < 0 { - anyhow::bail!("Replica counts must be >= 0"); - } - if min_replicas >= max_replicas { - anyhow::bail!("--min-replicas must be < --max-replicas"); - } - if jitter_percent > 100 { - anyhow::bail!("--jitter-percent must be 0-100"); - } - - let hold_duration = churn::parse_duration(&hold_duration)?; - - // Set up metrics - let mut prom = prometheus_client::registry::Registry::default(); - let metrics = churn::ChurnMetrics::new(&mut prom); - - // Build runtime with admin server (provides /metrics, /ready, /live) - let runtime = kubert::Runtime::builder() - .with_log(log_level, log_format) - .with_admin(admin.into_builder().with_prometheus(prom)) - .with_client(client_args) - .build() - .await?; - - // Get Kubernetes client from runtime - let client = runtime.client(); - - // Create churn controller - let controller = churn::ChurnController::new(client, namespace, metrics); - - // Run oscillate pattern on matching deployments - controller - .run_oscillate_deployments( - &deployment_pattern, - min_replicas, - max_replicas, - hold_duration, - jitter_percent, - ) - .await?; - } - - Commands::Client { - destination_addr, - service_label_selector, - watchers_per_service, - min_stream_lifetime, - max_stream_lifetime, - namespace, - pod_name, - pod_namespace, - node_name, - } => { - tracing::info!( - destination_addr, - service_label_selector, - watchers_per_service, - min_stream_lifetime, - max_stream_lifetime, - namespace, - ?pod_name, - ?pod_namespace, - ?node_name, - "Starting client controller" - ); - - if watchers_per_service == 0 { - anyhow::bail!("--watchers-per-service must be > 0"); - } - - // Parse stream lifetime durations - let min_lifetime = churn::parse_duration(&min_stream_lifetime)?; - let max_lifetime = churn::parse_duration(&max_stream_lifetime)?; - - if min_lifetime >= max_lifetime { - anyhow::bail!("--min-stream-lifetime must be < --max-stream-lifetime"); - } - - // Build context token (mimics linkerd proxy injector) - let context_token = build_context_token( - pod_name.as_deref(), - pod_namespace.as_deref(), - node_name.as_deref(), - )?; - - tracing::info!(context_token, "Built context token"); - - // Set up metrics - let mut prom = prometheus_client::registry::Registry::default(); - let metrics = client::ClientMetrics::new(&mut prom); - - // Build runtime with admin server (provides /metrics, /ready, /live) - let runtime = kubert::Runtime::builder() - .with_log(log_level, log_format) - .with_admin(admin.into_builder().with_prometheus(prom)) - .with_client(client_args) - .build() - .await?; - - // Get Kubernetes client from runtime - let client = runtime.client(); - - // Create client controller - let controller = client::ClientController::new( - client, - destination_addr, - namespace, - context_token, - metrics, - ); - - // Run Get requests - controller - .run_get_requests( - service_label_selector, - watchers_per_service, - min_lifetime, - max_lifetime, - ) - .await?; - } - } - - Ok(()) - } -} - -/// Build a context token for destination service requests -/// Format matches what linkerd proxy-injector does: {"ns":"namespace","nodeName":"node","pod":"podname"} -fn build_context_token( - pod_name: Option<&str>, - pod_namespace: Option<&str>, - node_name: Option<&str>, -) -> Result { - let mut token = serde_json::json!({}); - - if let Some(ns) = pod_namespace { - token["ns"] = serde_json::json!(ns); - } - - if let Some(pod) = pod_name { - token["pod"] = serde_json::json!(pod); - } - - if let Some(node) = node_name { - token["nodeName"] = serde_json::json!(node); - } - - Ok(token.to_string()) -} diff --git a/test/destination-test/hack/gen-certs.sh b/test/destination-test/hack/gen-certs.sh deleted file mode 100755 index 0c48e5d3574b7..0000000000000 --- a/test/destination-test/hack/gen-certs.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env bash -# -# Generate shared trust root and issuer certificates for Linkerd multicluster -# using step-cli. This creates a single CA that both clusters will share, -# enabling cross-cluster mTLS. -# -# Usage: -# ./gen-certs.sh [output-dir] -# -# Output: -# LINKERD_CA_DIR/ca.crt - Trust anchor (root CA) -# LINKERD_CA_DIR/issuer.crt - Issuer certificate (intermediate) -# LINKERD_CA_DIR/issuer.key - Issuer private key -# -# These files are read by helmfile.yaml during cluster setup. - -set -euo pipefail - -# Default CA directory -CA_DIR="${1:-${LINKERD_CA_DIR:-/tmp/linkerd-ca}}" - -# Certificate validity periods -CA_VALIDITY="87600h" # 10 years -ISSUER_VALIDITY="8760h" # 1 year - -echo "==> Generating Linkerd certificates in $CA_DIR" - -# Create output directory -mkdir -p "$CA_DIR" - -# Generate root CA (trust anchor) -echo "==> Generating root CA (trust anchor)" -step certificate create \ - "root.linkerd.cluster.local" \ - "$CA_DIR/ca.crt" \ - "$CA_DIR/ca.key" \ - --profile root-ca \ - --no-password \ - --insecure \ - --not-after="$CA_VALIDITY" \ - --kty=EC \ - --crv=P-256 - -echo "==> Root CA fingerprint:" -step certificate fingerprint "$CA_DIR/ca.crt" - -# Generate issuer certificate (intermediate CA) -echo "==> Generating issuer certificate (intermediate CA)" -step certificate create \ - "identity.linkerd.cluster.local" \ - "$CA_DIR/issuer.crt" \ - "$CA_DIR/issuer.key" \ - --profile intermediate-ca \ - --ca "$CA_DIR/ca.crt" \ - --ca-key "$CA_DIR/ca.key" \ - --no-password \ - --insecure \ - --not-after="$ISSUER_VALIDITY" \ - --kty=EC \ - --crv=P-256 - -echo "==> Issuer certificate fingerprint:" -step certificate fingerprint "$CA_DIR/issuer.crt" - -# Verify issuer is signed by CA -echo "==> Verifying certificate chain" -step certificate verify \ - "$CA_DIR/issuer.crt" \ - --roots "$CA_DIR/ca.crt" - -echo "" -echo "✓ Certificate generation complete!" -echo "" -echo "Files created in $CA_DIR:" -ls -lh "$CA_DIR" -echo "" -echo "Set environment variable:" -echo " export LINKERD_CA_DIR=$CA_DIR" -echo "" -echo "Or pass to helmfile:" -echo " LINKERD_CA_DIR=$CA_DIR helmfile sync" diff --git a/test/destination-test/hack/kwok-node.yaml b/test/destination-test/hack/kwok-node.yaml deleted file mode 100644 index 68508f68d22b8..0000000000000 --- a/test/destination-test/hack/kwok-node.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: v1 -kind: Node -metadata: - name: kwok-node-0 - annotations: - node.alpha.kubernetes.io/ttl: "0" - kwok.x-k8s.io/node: fake - labels: - beta.kubernetes.io/arch: amd64 - beta.kubernetes.io/os: linux - kubernetes.io/arch: amd64 - kubernetes.io/hostname: kwok-node-0 - kubernetes.io/os: linux - kubernetes.io/role: agent - node-role.kubernetes.io/agent: "" - type: kwok -spec: - taints: - - effect: NoSchedule - key: kwok.x-k8s.io/node - value: fake -status: - allocatable: - cpu: "1000" - memory: 1000Gi - pods: "1000" - capacity: - cpu: "1000" - memory: 1000Gi - pods: "1000" - nodeInfo: - architecture: amd64 - bootID: "" - containerRuntimeVersion: "" - kernelVersion: "" - kubeProxyVersion: fake - kubeletVersion: fake - machineID: "" - operatingSystem: linux - osImage: "" - systemUUID: "" - phase: Running diff --git a/test/destination-test/helmfile.yaml.gotmpl b/test/destination-test/helmfile.yaml.gotmpl deleted file mode 100644 index 5072628de2a06..0000000000000 --- a/test/destination-test/helmfile.yaml.gotmpl +++ /dev/null @@ -1,118 +0,0 @@ -# Helmfile for destination load testing infrastructure -# -# Prerequisites: -# 1. k3d cluster created (e.g., k3d-test) -# 2. LINKERD_CA_DIR set with generated certificates: -# - ca.crt (trust anchor) -# - issuer.crt (issuer certificate) -# - issuer.key (issuer private key) -# 3. Run: hack/gen-certs.sh to generate these files -# -# Usage: -# # Setup cluster -# LINKERD_CA_DIR=/tmp/linkerd-ca helmfile sync -# -# # Without monitoring -# LINKERD_CA_DIR=/tmp/linkerd-ca helmfile --state-values-set monitoring.enabled=false sync - -environments: - default: - values: - - monitoring: - enabled: true - ---- - -helmDefaults: - wait: true - timeout: 300 - createNamespace: true - atomic: true - -repositories: - - name: linkerd-edge - url: https://helm.linkerd.io/edge - - name: kwok - url: https://kwok.sigs.k8s.io/charts/ - - name: prometheus-community - url: https://prometheus-community.github.io/helm-charts - - name: linkerd-monitoring - url: https://ghcr.io/olix0r/charts - oci: true - -releases: - ############################################################################# - # KWOK - Fake pods/nodes for load testing - ############################################################################# - - name: kwok - namespace: kwok-system - chart: kwok/kwok - # Latest release (0.2.0 = app v0.7.0) - - ############################################################################# - # KWOK Stage Fast - Default pod/node emulation behavior - ############################################################################# - - name: kwok-stage - namespace: default - chart: kwok/stage-fast - # Configures pods to transition quickly through lifecycle phases - needs: - - kwok - - ############################################################################# - # Linkerd CRDs - ############################################################################# - - name: linkerd-crds - namespace: linkerd - chart: linkerd-edge/linkerd-crds - # Latest edge release - - ############################################################################# - # Linkerd Control Plane - ############################################################################# - - name: linkerd-control-plane - namespace: linkerd - chart: linkerd-edge/linkerd-control-plane - # Latest edge release - needs: - - linkerd-crds - set: - - name: identityTrustDomain - value: cluster.local - - name: controllerLogLevel - value: debug - - name: controllerLogFormat - value: json - - name: identity.issuer.scheme - value: linkerd.io/tls - - name: identityTrustAnchorsPEM - value: {{ readFile (printf "%s/ca.crt" (env "LINKERD_CA_DIR" | default "/tmp/linkerd-ca")) | quote }} - - name: identity.issuer.tls.crtPEM - value: {{ readFile (printf "%s/issuer.crt" (env "LINKERD_CA_DIR" | default "/tmp/linkerd-ca")) | quote }} - - name: identity.issuer.tls.keyPEM - value: {{ readFile (printf "%s/issuer.key" (env "LINKERD_CA_DIR" | default "/tmp/linkerd-ca")) | quote }} - - ############################################################################# - # kube-prometheus-stack (monitoring infrastructure) - ############################################################################# - - name: kube-prometheus-stack - namespace: monitoring - chart: prometheus-community/kube-prometheus-stack - version: ~67.0.0 - condition: monitoring.enabled - values: - - values/kube-prometheus-stack.yaml - - ############################################################################# - # linkerd-monitoring (Linkerd dashboards and ServiceMonitors) - ############################################################################# - - name: linkerd-monitoring - namespace: linkerd-viz - chart: oci://ghcr.io/olix0r/charts/linkerd-monitoring - version: 0.1.1 - condition: monitoring.enabled - needs: - - linkerd-control-plane - - kube-prometheus-stack - values: - - values/linkerd-monitoring.yaml diff --git a/test/destination-test/values/README.md b/test/destination-test/values/README.md deleted file mode 100644 index ae273bb7ebb2e..0000000000000 --- a/test/destination-test/values/README.md +++ /dev/null @@ -1,153 +0,0 @@ -# Monitoring Dashboard Access - -This directory contains monitoring configuration for the destination load test infrastructure. - -## Architecture - -- **kube-prometheus-stack**: Core monitoring infrastructure (Prometheus + Grafana) -- **linkerd-monitoring**: Linkerd-specific dashboards and ServiceMonitors -- **Custom scrape configs**: Additional targets for dst-load-controller metrics - -## Accessing Grafana - -After deploying with `helmfile sync`, Grafana is available via NodePort: - -```bash -# Get the NodePort -kubectl -n monitoring get svc kube-prometheus-stack-grafana - -# Forward the port to localhost -kubectl -n monitoring port-forward svc/kube-prometheus-stack-grafana 3000:80 - -# Open in browser -open http://localhost:3000 -``` - -**Default credentials:** - -- Username: `admin` -- Password: `admin` - -## Available Dashboards - -### Linkerd Dashboards (from linkerd-monitoring chart) - -- **Linkerd Top**: Overview of all meshed workloads -- **Linkerd Deployment**: Per-deployment metrics -- **Linkerd Pod**: Per-pod metrics -- **Linkerd Service**: Per-service metrics -- **Linkerd Namespace**: Per-namespace aggregated metrics -- **Linkerd Health**: Control plane health metrics -- **Linkerd Authority**: Destination service metrics -- **Linkerd Route**: HTTPRoute metrics (if using policy) - -### Exploring dst-load-controller Metrics - -In Grafana, go to **Explore** and query: - -**Client Controller Metrics:** - -```promql -# Active gRPC streams -client_streams_active - -# Updates received over time -rate(client_updates_received_total[1m]) - -# Current endpoints per service -client_endpoints_current - -# Stream errors -rate(client_stream_errors_total[1m]) -``` - -**Scale Controller Metrics:** - -```promql -# Current replica counts -churn_deployments_current_replicas - -# Scale operations over time -rate(churn_scale_operations_total[1m]) - -# Time spent at each scale level -churn_hold_duration_seconds -``` - -**Linkerd Destination Service Load:** - -```promql -# Request rate to destination service -rate(request_total{deployment="linkerd-destination"}[1m]) - -# Destination service latency -histogram_quantile(0.99, rate(response_latency_ms_bucket{deployment="linkerd-destination"}[1m])) - -# Active gRPC streams on destination service -grpc_server_handling_seconds_count{grpc_method="Get", grpc_service="io.linkerd.proxy.destination.Destination"} -``` - -## Custom Dashboards - -To add custom dashboards for dst-load-test: - -1. Create a dashboard JSON in `values/dashboards/` -2. Update `values/linkerd-monitoring.yaml` to include the dashboard -3. Run `helmfile sync` to apply - -Example structure: - -```yaml -# values/linkerd-monitoring.yaml -grafanaDashboards: - dst-load-test: - json: | - {{ readFile "values/dashboards/dst-load-test.json" | quote }} -``` - -## Troubleshooting - -**Metrics not appearing?** - -Check Prometheus targets: - -```bash -kubectl -n monitoring port-forward svc/kube-prometheus-stack-prometheus 9090:9090 -open http://localhost:9090/targets -``` - -Look for: - -- `linkerd-controller` job (should show linkerd control plane pods) -- `dst-load-controller` job (should show client/churn pods in dst-test namespace) - -**ServiceMonitors not being picked up?** - -Check ServiceMonitor labels match Prometheus selector: - -```bash -kubectl -n monitoring get prometheus kube-prometheus-stack-prometheus -o yaml | grep serviceMonitorSelector -A5 -``` - -## Metrics Reference - -### Client Controller - -| Metric | Type | Description | -|--------|------|-------------| -| `client_streams_active` | Gauge | Number of active gRPC streams to destination service | -| `client_updates_received_total` | Counter | Total updates received (Add/Remove/NoEndpoints) | -| `client_endpoints_current` | Gauge | Current number of endpoints for each service | -| `client_stream_errors_total` | Counter | Total stream errors (connection failures, etc.) | - -Labels: `target` (service FQDN), `request_type` (always "Get") - -### Scale Controller - -| Metric | Type | Description | -|--------|------|-------------| -| `churn_deployments_current_replicas` | Gauge | Current replica count for each deployment | -| `churn_scale_operations_total` | Counter | Total scale operations performed | -| `churn_hold_duration_seconds` | Histogram | Time spent holding at min/max replicas | - -Labels: `deployment`, `namespace`, `pattern` (oscillate/stable) diff --git a/test/destination-test/values/kube-prometheus-stack.yaml b/test/destination-test/values/kube-prometheus-stack.yaml deleted file mode 100644 index 8287bf22fa571..0000000000000 --- a/test/destination-test/values/kube-prometheus-stack.yaml +++ /dev/null @@ -1,106 +0,0 @@ -# kube-prometheus-stack configuration for load testing -# Minimal monitoring stack optimized for observing load tests - -prometheus: - prometheusSpec: - # Allow all ServiceMonitors/PodMonitors regardless of labels - podMonitorSelectorNilUsesHelmValues: false - probeSelectorNilUsesHelmValues: false - ruleSelectorNilUsesHelmValues: false - serviceMonitorSelectorNilUsesHelmValues: false - - enableRemoteWriteReceiver: true - retention: 24h - retentionSize: 2GiB - - storageSpec: - emptyDir: - medium: Memory - sizeLimit: 2Gi - - # Schedule on control plane nodes if available - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - preference: - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists - - resources: - requests: - memory: 1Gi - cpu: 250m - limits: - memory: 2Gi - cpu: 1000m - - # Scrape Linkerd control plane metrics - additionalScrapeConfigs: - - job_name: linkerd-controller - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - linkerd - - linkerd-viz - relabel_configs: - - source_labels: [__meta_kubernetes_pod_container_port_name] - action: keep - regex: admin-http - - source_labels: [__meta_kubernetes_pod_container_name] - action: replace - target_label: component - - # Scrape destination load test controllers - - job_name: dst-load-controller - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - dst-test - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] - action: keep - regex: (client|churn) - - source_labels: [__meta_kubernetes_pod_container_port_name] - action: keep - regex: admin-http - - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] - action: replace - target_label: component - -grafana: - enabled: true - adminPassword: admin - - persistence: - enabled: true - type: pvc - storageClassName: local-path - accessModes: [ReadWriteOnce] - size: 2Gi - - sidecar: - datasources: - enabled: true - isDefaultDatasource: true - uid: prometheus - dashboards: - enabled: true - label: grafana_dashboard - labelValue: "1" - - service: - type: NodePort - -# Disable components we don't need for load testing -alertmanager: - enabled: false - -nodeExporter: - enabled: false # Don't need node metrics for KWOK-based tests - -kubeStateMetrics: - enabled: true # Keep this for pod/service state diff --git a/test/destination-test/values/linkerd-monitoring.yaml b/test/destination-test/values/linkerd-monitoring.yaml deleted file mode 100644 index 5e01228ce4fb3..0000000000000 --- a/test/destination-test/values/linkerd-monitoring.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# linkerd-monitoring configuration -# This chart provides Linkerd-specific Grafana dashboards and ServiceMonitors -# for kube-prometheus-stack integration - -# The chart automatically creates ServiceMonitors for: -# - linkerd-controller (destination, identity, proxy-injector) -# - linkerd-prometheus (if using linkerd-viz) -# - linkerd-proxy (via PodMonitors) - -# Additional configuration can be added here for custom dashboards -# See: https://github.com/olix0r/linkerd-monitoring - -# Future: Add custom dst-load-test dashboards here -# grafanaDashboards: -# dst-load-test: -# json: | -# { ... custom dashboard JSON ... } From 8761c56ca6ad394d3f905c4369c37bfbac823d49 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Thu, 9 Apr 2026 01:18:33 +0000 Subject: [PATCH 05/17] WIP Signed-off-by: Alex Leong --- controller/api/destination/watcher/address.go | 1 + .../watcher/filtered_listener_group.go | 72 ++++++++++++------- .../api/destination/watcher/port_publisher.go | 19 +++-- 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/controller/api/destination/watcher/address.go b/controller/api/destination/watcher/address.go index 1b388a7a14f39..ba721c6655cde 100644 --- a/controller/api/destination/watcher/address.go +++ b/controller/api/destination/watcher/address.go @@ -24,6 +24,7 @@ type ( Zone *string ForZones []discovery.ForZone OpaqueProtocol bool + Hostname string } // AddressSet is a set of Address, indexed by ID. diff --git a/controller/api/destination/watcher/filtered_listener_group.go b/controller/api/destination/watcher/filtered_listener_group.go index e040f1f012f56..dbb6cdd843372 100644 --- a/controller/api/destination/watcher/filtered_listener_group.go +++ b/controller/api/destination/watcher/filtered_listener_group.go @@ -60,44 +60,64 @@ func (group *filteredListenerGroup) updateLocalTrafficPolicy(localTrafficPolicy } func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) AddressSet { - filtered := make(map[ID]*Address) + candidates := make(map[ID]*Address) + // If hostname filtering is specified, only include addresses that match the hostname. + // This filtering should be applied even if endpoint filtering is disabled. for id, address := range addresses.Addresses { - // If hostname filtering is specified, only include addresses that match the hostname. - // This filtering should be applied even if endpoint filtering is disabled. - if group.key.Hostname != "" && group.key.Hostname != address.Pod.Spec.Hostname { - continue + if group.key.Hostname != "" { + if address.Hostname != group.key.Hostname { + continue + } } + candidates[id] = address + } + + // If endpoint filtering is disabled, return all hostname-matching addresses. + if !group.enableEndpointFiltering { + return selectAddressFamily(AddressSet{ + Addresses: candidates, + Labels: addresses.Labels, + }, group.enableIPv6) + } - if group.enableEndpointFiltering { - // If the Service has local traffic policy enabled, only include addresses that are local to the node. - // Otherwise, perform zone filtering if the address has zone information. - if group.localTrafficPolicy { - if address.Pod == nil || address.Pod.Spec.NodeName != group.key.NodeName { - continue - } - } else { - if len(address.ForZones) > 0 { - if !containsZone(address.ForZones, group.nodeTopologyZone) { - continue - } - } + // If internalTrafficPolicy=Local, only keep pod endpoints on the same node. + if group.localTrafficPolicy { + filtered := make(map[ID]*Address) + for id, address := range candidates { + if address.Pod != nil && address.Pod.Spec.NodeName == group.key.NodeName { + filtered[id] = address } } + return selectAddressFamily(AddressSet{ + Addresses: filtered, + Labels: addresses.Labels, + }, group.enableIPv6) + } - filtered[id] = address + // If ANY address lacks hints ForZone hints, disable zone filtering and return all candidates. + for _, address := range candidates { + if len(address.ForZones) == 0 { + return selectAddressFamily(AddressSet{ + Addresses: candidates, + Labels: addresses.Labels, + }, group.enableIPv6) + } } - // If zone filtering removed all addresses, we fall back to including all addresses. - // Note that hostname filtering is still applied in this case, if specified. - if group.enableEndpointFiltering && !group.localTrafficPolicy && len(filtered) == 0 { - for k, v := range addresses.Addresses { - if group.key.Hostname == "" || v.Pod.Spec.Hostname == group.key.Hostname { - filtered[k] = v - } + // Otherwise, perform zone filtering:keep only endpoints whose hints include this node's zone. + filtered := make(map[ID]*Address) + for id, address := range candidates { + if containsZone(address.ForZones, group.nodeTopologyZone) { + filtered[id] = address } } + // If zone filtering produced nothing, fall back to all candidates. + if len(filtered) == 0 { + filtered = candidates + } + return selectAddressFamily(AddressSet{ Addresses: filtered, Labels: addresses.Labels, diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index 0da4c9562d7f7..c06d81a09dc3d 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -177,7 +177,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A } identity := es.Annotations[consts.RemoteGatewayIdentity] - address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, serviceID.Name, es.Namespace) + address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, *endpoint.Hostname, serviceID.Name, es.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride if endpoint.Hints != nil { @@ -196,6 +196,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A resolvedPort, es.AddressType, IPAddr, + *endpoint.Hostname, endpoint.TargetRef.Name, endpoint.TargetRef.Namespace, ) @@ -316,7 +317,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre } identity := endpoints.Annotations[consts.RemoteGatewayIdentity] - address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, endpoints.Name, endpoints.Namespace) + address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, endpoint.Hostname, endpoints.Name, endpoints.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride addresses[id] = &address @@ -328,6 +329,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre resolvedPort, "", endpoint.IP, + endpoint.Hostname, endpoint.TargetRef.Name, endpoint.TargetRef.Namespace, ) @@ -349,7 +351,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre } } -func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP, serviceName, serviceNamespace string) (Address, ServiceID) { +func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP, hostname string, serviceName, serviceNamespace string) (Address, ServiceID) { id := ServiceID{ Name: strings.Join([]string{ serviceName, @@ -359,13 +361,14 @@ func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP, ser Namespace: serviceNamespace, } - return Address{IP: endpointIP, Port: endpointPort}, id + return Address{IP: endpointIP, Port: endpointPort, Hostname: hostname}, id } func (pp *portPublisher) newPodRefAddress( endpointPort Port, ipFamily discovery.AddressType, endpointIP, + hostname string, podName, podNamespace string, ) (Address, PodID, error) { @@ -388,6 +391,7 @@ func (pp *portPublisher) newPodRefAddress( Pod: pod, OwnerName: ownerName, OwnerKind: ownerKind, + Hostname: hostname, } return addr, id, nil @@ -545,9 +549,9 @@ func (pp *portPublisher) subscribe(listener EndpointUpdateListener, filterKey Fi func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener, filterKey FilterKey) { group, ok := pp.filteredListeners[filterKey] - listener.Remove(group.snapshot) - if ok { + listener.Remove(group.snapshot) + for i, existing := range group.listeners { if existing == listener { n := len(group.listeners) @@ -568,12 +572,13 @@ func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { for id, address := range pp.addresses.Addresses { if pp.isAddressSelected(address, oldServer) || pp.isAddressSelected(address, newServer) { + oldOpaque := address.OpaqueProtocol if newServer != nil && pp.isAddressSelected(address, newServer) && newServer.Spec.ProxyProtocol == opaqueProtocol { address.OpaqueProtocol = true } else { address.OpaqueProtocol = false } - if pp.addresses.Addresses[id].OpaqueProtocol != address.OpaqueProtocol { + if oldOpaque != address.OpaqueProtocol { pp.addresses.Addresses[id] = address updated = true } From 15aa61a3cce88f19c1b4ee1b7df541b15f5c9049 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Thu, 9 Apr 2026 17:45:09 +0000 Subject: [PATCH 06/17] WIP Signed-off-by: Alex Leong --- .../watcher/filtered_listener_group.go | 6 ++--- .../api/destination/watcher/port_publisher.go | 27 ++++++++++++++++--- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/controller/api/destination/watcher/filtered_listener_group.go b/controller/api/destination/watcher/filtered_listener_group.go index dbb6cdd843372..96c140d40988e 100644 --- a/controller/api/destination/watcher/filtered_listener_group.go +++ b/controller/api/destination/watcher/filtered_listener_group.go @@ -65,10 +65,8 @@ func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) Addres // If hostname filtering is specified, only include addresses that match the hostname. // This filtering should be applied even if endpoint filtering is disabled. for id, address := range addresses.Addresses { - if group.key.Hostname != "" { - if address.Hostname != group.key.Hostname { - continue - } + if group.key.Hostname != "" && address.Hostname != "" && address.Hostname != group.key.Hostname { + continue } candidates[id] = address } diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index c06d81a09dc3d..c74d71cf55948 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -177,7 +177,11 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A } identity := es.Annotations[consts.RemoteGatewayIdentity] - address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, *endpoint.Hostname, serviceID.Name, es.Namespace) + hostname := "" + if endpoint.Hostname != nil { + hostname = *endpoint.Hostname + } + address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, hostname, serviceID.Name, es.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride if endpoint.Hints != nil { @@ -192,11 +196,15 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A if endpoint.TargetRef.Kind == endpointTargetRefPod { for _, IPAddr := range endpoint.Addresses { + hostname := "" + if endpoint.Hostname != nil { + hostname = *endpoint.Hostname + } address, id, err := pp.newPodRefAddress( resolvedPort, es.AddressType, IPAddr, - *endpoint.Hostname, + hostname, endpoint.TargetRef.Name, endpoint.TargetRef.Namespace, ) @@ -221,7 +229,11 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A if endpoint.TargetRef.Kind == endpointTargetRefExternalWorkload { for _, IPAddr := range endpoint.Addresses { - address, id, err := pp.newExtRefAddress(resolvedPort, IPAddr, endpoint.TargetRef.Name, es.Namespace) + hostname := "" + if endpoint.Hostname != nil { + hostname = *endpoint.Hostname + } + address, id, err := pp.newExtRefAddress(resolvedPort, IPAddr, hostname, endpoint.TargetRef.Name, es.Namespace) if err != nil { pp.log.Errorf("Unable to create new address: %v", err) continue @@ -397,7 +409,13 @@ func (pp *portPublisher) newPodRefAddress( return addr, id, nil } -func (pp *portPublisher) newExtRefAddress(endpointPort Port, endpointIP, externalWorkloadName, externalWorkloadNamespace string) (Address, ExternalWorkloadID, error) { +func (pp *portPublisher) newExtRefAddress( + endpointPort Port, + endpointIP, + hostname string, + externalWorkloadName, + externalWorkloadNamespace string, +) (Address, ExternalWorkloadID, error) { id := ExternalWorkloadID{ Name: externalWorkloadName, Namespace: externalWorkloadNamespace, @@ -412,6 +430,7 @@ func (pp *portPublisher) newExtRefAddress(endpointPort Port, endpointIP, externa IP: endpointIP, Port: endpointPort, ExternalWorkload: ew, + Hostname: hostname, } ownerRefs := ew.GetOwnerReferences() From d829469ecefa18a660519f7328079ce0c44d6952 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Sat, 11 Apr 2026 01:01:48 +0000 Subject: [PATCH 07/17] WIP Signed-off-by: Alex Leong --- .../endpoint_profile_translator.go | 4 +-- .../api/destination/endpoint_translator.go | 8 +++--- .../destination/federated_service_watcher.go | 8 +++--- controller/api/destination/server.go | 4 +-- controller/api/destination/watcher/address.go | 4 +-- .../destination/watcher/endpoints_watcher.go | 10 +++---- .../watcher/filtered_listener_group.go | 14 +++++----- .../api/destination/watcher/port_publisher.go | 28 ++++++++++--------- .../destination/watcher/service_publisher.go | 4 +-- 9 files changed, 43 insertions(+), 41 deletions(-) diff --git a/controller/api/destination/endpoint_profile_translator.go b/controller/api/destination/endpoint_profile_translator.go index 790af2324232d..f788649dfd0a4 100644 --- a/controller/api/destination/endpoint_profile_translator.go +++ b/controller/api/destination/endpoint_profile_translator.go @@ -130,7 +130,7 @@ func (ept *endpointProfileTranslator) update(address *watcher.Address) { } else { opaquePorts = watcher.GetAnnotatedOpaquePortsForExternalWorkload(address.ExternalWorkload, ept.defaultOpaquePorts) } - endpoint, err := ept.createEndpoint(address, opaquePorts) + endpoint, err := ept.createEndpoint(*address, opaquePorts) if err != nil { ept.log.Errorf("Failed to create endpoint for %s:%d: %s", address.IP, address.Port, err) @@ -158,7 +158,7 @@ func (ept *endpointProfileTranslator) update(address *watcher.Address) { ept.current = profile } -func (ept *endpointProfileTranslator) createEndpoint(address *watcher.Address, opaquePorts map[uint32]struct{}) (*pb.WeightedAddr, error) { +func (ept *endpointProfileTranslator) createEndpoint(address watcher.Address, opaquePorts map[uint32]struct{}) (*pb.WeightedAddr, error) { var weightedAddr *pb.WeightedAddr var err error if address.ExternalWorkload != nil { diff --git a/controller/api/destination/endpoint_translator.go b/controller/api/destination/endpoint_translator.go index e5686ed02bb17..af87bd1aaa8b1 100644 --- a/controller/api/destination/endpoint_translator.go +++ b/controller/api/destination/endpoint_translator.go @@ -316,7 +316,7 @@ func (et *endpointTranslator) sendClientRemove(set watcher.AddressSet) { } } -func toAddr(address *watcher.Address) (*net.TcpAddress, error) { +func toAddr(address watcher.Address) (*net.TcpAddress, error) { ip, err := addr.ParseProxyIP(address.IP) if err != nil { return nil, err @@ -328,7 +328,7 @@ func toAddr(address *watcher.Address) (*net.TcpAddress, error) { } func createWeightedAddrForExternalWorkload( - address *watcher.Address, + address watcher.Address, forceOpaqueTransport bool, opaquePorts map[uint32]struct{}, http2 *pb.Http2ClientParams, @@ -399,7 +399,7 @@ func createWeightedAddrForExternalWorkload( } func createWeightedAddr( - address *watcher.Address, + address watcher.Address, opaquePorts map[uint32]struct{}, forceOpaqueTransport bool, enableH2Upgrade bool, @@ -513,7 +513,7 @@ func getNodeTopologyZone(k8sAPI *k8s.MetadataAPI, srcNode string) (string, error func newEmptyAddressSet() watcher.AddressSet { return watcher.AddressSet{ - Addresses: make(map[watcher.ID]*watcher.Address), + Addresses: make(map[watcher.ID]watcher.Address), Labels: make(map[string]string), } } diff --git a/controller/api/destination/federated_service_watcher.go b/controller/api/destination/federated_service_watcher.go index 5a8a6a03f8135..93c6253ec6897 100644 --- a/controller/api/destination/federated_service_watcher.go +++ b/controller/api/destination/federated_service_watcher.go @@ -277,7 +277,7 @@ func (fs *federatedService) delete() { fs.log.Errorf("Failed to get remote cluster %s", id.cluster) continue } - remoteWatcher.Unsubscribe(id.service, subscriber.port, remoteFilterKey, translator) + remoteWatcher.Unsubscribe(id.service, subscriber.port, remoteFilterKey, translator, false) translator.Stop() } localFilterKey := watcher.FilterKey{ @@ -286,7 +286,7 @@ func (fs *federatedService) delete() { EnableEndpointFiltering: true, // Endpoint filtering is enabled for local discovery. } for localDiscovery, translator := range subscriber.localTranslators { - fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, localFilterKey, translator) + fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, localFilterKey, translator, false) translator.Stop() } close(subscriber.endStream) @@ -411,7 +411,7 @@ func (fs *federatedService) remoteDiscoveryUnsubscribe( NodeName: subscriber.nodeName, EnableEndpointFiltering: false, // Endpoint filtering is disabled for remote discovery. } - remoteWatcher.Unsubscribe(id.service, subscriber.port, filterKey, translator) + remoteWatcher.Unsubscribe(id.service, subscriber.port, filterKey, translator, true) translator.DrainAndStop() delete(subscriber.remoteTranslators, id) } @@ -467,7 +467,7 @@ func (fs *federatedService) localDiscoveryUnsubscribe( NodeName: subscriber.nodeName, EnableEndpointFiltering: true, // Endpoint filtering is enabled for local discovery. } - fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, filterKey, translator) + fs.localEndpoints.Unsubscribe(watcher.ServiceID{Namespace: fs.namespace, Name: localDiscovery}, subscriber.port, filterKey, translator, true) translator.DrainAndStop() delete(subscriber.localTranslators, localDiscovery) } diff --git a/controller/api/destination/server.go b/controller/api/destination/server.go index 1657d482dde02..7c0b1833fd643 100644 --- a/controller/api/destination/server.go +++ b/controller/api/destination/server.go @@ -247,7 +247,7 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e log.Errorf("Failed to subscribe to remote discovery service %q in cluster %s: %s", dest.GetPath(), cluster, err) return err } - defer remoteWatcher.Unsubscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, filterKey, translator) + defer remoteWatcher.Unsubscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, filterKey, translator, false) } else { log.Debug("Local discovery service detected") @@ -290,7 +290,7 @@ func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) e log.Errorf("Failed to subscribe to %s: %s", dest.GetPath(), err) return err } - defer s.endpoints.Unsubscribe(service, port, filterKey, translator) + defer s.endpoints.Unsubscribe(service, port, filterKey, translator, false) } select { diff --git a/controller/api/destination/watcher/address.go b/controller/api/destination/watcher/address.go index ba721c6655cde..398a38855e346 100644 --- a/controller/api/destination/watcher/address.go +++ b/controller/api/destination/watcher/address.go @@ -35,7 +35,7 @@ type ( // id.IPFamily refers to the ES AddressType (see newPodRefAddress). // 3) A reference to an ExternalWorkload: id.Name refers to the EW's name. AddressSet struct { - Addresses map[ID]*Address + Addresses map[ID]Address Labels map[string]string } ) @@ -44,7 +44,7 @@ type ( // ExternalWorkload fields of the Addresses map values still point to the // locations of the original variable func (addr AddressSet) shallowCopy() AddressSet { - addresses := make(map[ID]*Address) + addresses := make(map[ID]Address) for k, v := range addr.Addresses { addresses[k] = v } diff --git a/controller/api/destination/watcher/endpoints_watcher.go b/controller/api/destination/watcher/endpoints_watcher.go index c7c45d90cd533..7dd77b05218df 100644 --- a/controller/api/destination/watcher/endpoints_watcher.go +++ b/controller/api/destination/watcher/endpoints_watcher.go @@ -162,7 +162,7 @@ func (ew *EndpointsWatcher) Subscribe(id ServiceID, port Port, filterKey FilterK } // Unsubscribe removes a listener from the subscribers list for this authority. -func (ew *EndpointsWatcher) Unsubscribe(id ServiceID, port Port, filterKey FilterKey, listener EndpointUpdateListener) { +func (ew *EndpointsWatcher) Unsubscribe(id ServiceID, port Port, filterKey FilterKey, listener EndpointUpdateListener, withRemove bool) { if filterKey.Hostname == "" { ew.log.Debugf("Stopping watch on endpoint [%s:%d]", id, port) } else { @@ -174,7 +174,7 @@ func (ew *EndpointsWatcher) Unsubscribe(id ServiceID, port Port, filterKey Filte ew.log.Errorf("Cannot unsubscribe from unknown service [%s:%d]", id, port) return } - sp.unsubscribe(port, listener, filterKey) + sp.unsubscribe(port, listener, filterKey, withRemove) } // removeHandlers will de-register any event handlers used by the @@ -506,7 +506,7 @@ func getTargetPort(service *corev1.Service, port Port) namedPort { return targetPort } -func addressChanged(oldAddress *Address, newAddress *Address) bool { +func addressChanged(oldAddress Address, newAddress Address) bool { if oldAddress.Identity != newAddress.Identity { // in this case the identity could have changed; this can happen when for @@ -548,8 +548,8 @@ func diffAddresses(oldAddresses, newAddresses AddressSet) (add, remove AddressSe // TODO: this detects pods which have been added or removed, but does not // detect addresses which have been modified. A modified address should trigger // an add of the new version. - addAddresses := make(map[ID]*Address) - removeAddresses := make(map[ID]*Address) + addAddresses := make(map[ID]Address) + removeAddresses := make(map[ID]Address) for id, newAddress := range newAddresses.Addresses { if oldAddress, ok := oldAddresses.Addresses[id]; ok { if addressChanged(oldAddress, newAddress) { diff --git a/controller/api/destination/watcher/filtered_listener_group.go b/controller/api/destination/watcher/filtered_listener_group.go index 96c140d40988e..ce2180418a136 100644 --- a/controller/api/destination/watcher/filtered_listener_group.go +++ b/controller/api/destination/watcher/filtered_listener_group.go @@ -24,7 +24,7 @@ func newFilteredListenerGroup(key FilterKey, nodeTopologyZone string, enableIPv6 enableEndpointFiltering: key.EnableEndpointFiltering, enableIPv6: enableIPv6, localTrafficPolicy: localTrafficPolicy, - snapshot: AddressSet{Addresses: make(map[ID]*Address)}, + snapshot: AddressSet{Addresses: make(map[ID]Address)}, } } @@ -45,7 +45,7 @@ func (group *filteredListenerGroup) publishDiff(addresses AddressSet) { func (group *filteredListenerGroup) publishNoEndpoints() { remove := group.snapshot - group.snapshot = AddressSet{Addresses: make(map[ID]*Address)} + group.snapshot = AddressSet{Addresses: make(map[ID]Address)} for _, listener := range group.listeners { if len(remove.Addresses) > 0 { @@ -60,12 +60,12 @@ func (group *filteredListenerGroup) updateLocalTrafficPolicy(localTrafficPolicy } func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) AddressSet { - candidates := make(map[ID]*Address) + candidates := make(map[ID]Address) // If hostname filtering is specified, only include addresses that match the hostname. // This filtering should be applied even if endpoint filtering is disabled. for id, address := range addresses.Addresses { - if group.key.Hostname != "" && address.Hostname != "" && address.Hostname != group.key.Hostname { + if group.key.Hostname != "" && address.Hostname != group.key.Hostname { continue } candidates[id] = address @@ -81,7 +81,7 @@ func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) Addres // If internalTrafficPolicy=Local, only keep pod endpoints on the same node. if group.localTrafficPolicy { - filtered := make(map[ID]*Address) + filtered := make(map[ID]Address) for id, address := range candidates { if address.Pod != nil && address.Pod.Spec.NodeName == group.key.NodeName { filtered[id] = address @@ -104,7 +104,7 @@ func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) Addres } // Otherwise, perform zone filtering:keep only endpoints whose hints include this node's zone. - filtered := make(map[ID]*Address) + filtered := make(map[ID]Address) for id, address := range candidates { if containsZone(address.ForZones, group.nodeTopologyZone) { filtered[id] = address @@ -132,7 +132,7 @@ func containsZone(zones []v1.ForZone, zone string) bool { } func selectAddressFamily(addresses AddressSet, enableIPv6 bool) AddressSet { - filtered := make(map[ID]*Address) + filtered := make(map[ID]Address) for id, addr := range addresses.Addresses { if id.IPFamily == corev1.IPv6Protocol && !enableIPv6 { continue diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index c74d71cf55948..8214b79fdeb74 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -87,7 +87,7 @@ func (pp *portPublisher) addEndpointSlice(slice *discovery.EndpointSlice) { func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, newSlice *discovery.EndpointSlice) { updatedAddressSet := AddressSet{ - Addresses: make(map[ID]*Address), + Addresses: make(map[ID]Address), Labels: pp.addresses.Labels, } @@ -154,7 +154,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A if resolvedPort == undefinedEndpointPort { return AddressSet{ Labels: metricLabels(es), - Addresses: make(map[ID]*Address), + Addresses: make(map[ID]Address), } } @@ -163,7 +163,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A pp.log.Errorf("Could not fetch resource service name:%v", err) } - addresses := make(map[ID]*Address) + addresses := make(map[ID]Address) for _, endpoint := range es.Endpoints { if endpoint.Conditions.Ready != nil && !*endpoint.Conditions.Ready { continue @@ -189,7 +189,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A copy(zones, endpoint.Hints.ForZones) address.ForZones = zones } - addresses[id] = &address + addresses[id] = address } continue } @@ -223,7 +223,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A copy(zones, endpoint.Hints.ForZones) address.ForZones = zones } - addresses[id] = &address + addresses[id] = address } } @@ -252,7 +252,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A address.ForZones = zones } - addresses[id] = &address + addresses[id] = address } } @@ -315,7 +315,7 @@ func (pp *portPublisher) endpointSliceToIDs(es *discovery.EndpointSlice) []ID { } func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) AddressSet { - addresses := make(map[ID]*Address) + addresses := make(map[ID]Address) for _, subset := range endpoints.Subsets { resolvedPort := pp.resolveTargetPort(subset) if resolvedPort == undefinedEndpointPort { @@ -332,7 +332,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, endpoint.Hostname, endpoints.Name, endpoints.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride - addresses[id] = &address + addresses[id] = address continue } @@ -353,7 +353,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre if err != nil { pp.log.Errorf("failed to set address OpaqueProtocol: %s", err) } - addresses[id] = &address + addresses[id] = address } } } @@ -515,7 +515,7 @@ func (pp *portPublisher) updatePort(targetPort namedPort) { func (pp *portPublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { updatedAddressSet := AddressSet{ - Addresses: make(map[ID]*Address), + Addresses: make(map[ID]Address), Labels: pp.addresses.Labels, } for id, address := range pp.addresses.Addresses { @@ -566,10 +566,12 @@ func (pp *portPublisher) subscribe(listener EndpointUpdateListener, filterKey Fi pp.metrics.setSubscribers(pp.totalListeners()) } -func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener, filterKey FilterKey) { +func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener, filterKey FilterKey, withRemove bool) { group, ok := pp.filteredListeners[filterKey] if ok { - listener.Remove(group.snapshot) + if withRemove { + listener.Remove(group.snapshot) + } for i, existing := range group.listeners { if existing == listener { @@ -654,7 +656,7 @@ func (pp *portPublisher) publishNoEndpoints() { } } -func (pp *portPublisher) isAddressSelected(address *Address, server *v1beta3.Server) bool { +func (pp *portPublisher) isAddressSelected(address Address, server *v1beta3.Server) bool { if server == nil { return false } diff --git a/controller/api/destination/watcher/service_publisher.go b/controller/api/destination/watcher/service_publisher.go index ae4a3762cd300..801c88bcd3c79 100644 --- a/controller/api/destination/watcher/service_publisher.go +++ b/controller/api/destination/watcher/service_publisher.go @@ -136,13 +136,13 @@ func (sp *servicePublisher) subscribe(srcPort Port, listener EndpointUpdateListe return nil } -func (sp *servicePublisher) unsubscribe(srcPort Port, listener EndpointUpdateListener, filterKey FilterKey) { +func (sp *servicePublisher) unsubscribe(srcPort Port, listener EndpointUpdateListener, filterKey FilterKey, withRemove bool) { sp.Lock() defer sp.Unlock() publisher, ok := sp.ports[srcPort] if ok { - publisher.unsubscribe(listener, filterKey) + publisher.unsubscribe(listener, filterKey, withRemove) if publisher.totalListeners() == 0 { endpointsVecs.unregister(sp.metricsLabels(srcPort)) delete(sp.ports, srcPort) From 791370527117674fff0e291e02f9b9066225d256 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Sat, 11 Apr 2026 01:18:44 +0000 Subject: [PATCH 08/17] WIP Signed-off-by: Alex Leong --- controller/api/destination/watcher/address.go | 2 +- .../watcher/filtered_listener_group.go | 12 ++++--- .../api/destination/watcher/port_publisher.go | 32 ++++++------------- 3 files changed, 18 insertions(+), 28 deletions(-) diff --git a/controller/api/destination/watcher/address.go b/controller/api/destination/watcher/address.go index 398a38855e346..79b925e40c5ba 100644 --- a/controller/api/destination/watcher/address.go +++ b/controller/api/destination/watcher/address.go @@ -24,7 +24,7 @@ type ( Zone *string ForZones []discovery.ForZone OpaqueProtocol bool - Hostname string + Hostname *string } // AddressSet is a set of Address, indexed by ID. diff --git a/controller/api/destination/watcher/filtered_listener_group.go b/controller/api/destination/watcher/filtered_listener_group.go index ce2180418a136..a56a1302088c0 100644 --- a/controller/api/destination/watcher/filtered_listener_group.go +++ b/controller/api/destination/watcher/filtered_listener_group.go @@ -34,12 +34,12 @@ func (group *filteredListenerGroup) publishDiff(addresses AddressSet) { group.snapshot = filtered for _, listener := range group.listeners { - if len(remove.Addresses) > 0 { - listener.Remove(remove) - } if len(add.Addresses) > 0 { listener.Add(add) } + if len(remove.Addresses) > 0 { + listener.Remove(remove) + } } } @@ -65,8 +65,10 @@ func (group *filteredListenerGroup) filterAddresses(addresses AddressSet) Addres // If hostname filtering is specified, only include addresses that match the hostname. // This filtering should be applied even if endpoint filtering is disabled. for id, address := range addresses.Addresses { - if group.key.Hostname != "" && address.Hostname != group.key.Hostname { - continue + if address.Hostname != nil { + if group.key.Hostname != "" && group.key.Hostname != *address.Hostname { + continue + } } candidates[id] = address } diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index 8214b79fdeb74..34db8ae4028e1 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -177,11 +177,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A } identity := es.Annotations[consts.RemoteGatewayIdentity] - hostname := "" - if endpoint.Hostname != nil { - hostname = *endpoint.Hostname - } - address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, hostname, serviceID.Name, es.Namespace) + address, id := pp.newServiceRefAddress(resolvedPort, IPAddr, endpoint.Hostname, serviceID.Name, es.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride if endpoint.Hints != nil { @@ -196,15 +192,11 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A if endpoint.TargetRef.Kind == endpointTargetRefPod { for _, IPAddr := range endpoint.Addresses { - hostname := "" - if endpoint.Hostname != nil { - hostname = *endpoint.Hostname - } address, id, err := pp.newPodRefAddress( resolvedPort, es.AddressType, IPAddr, - hostname, + endpoint.Hostname, endpoint.TargetRef.Name, endpoint.TargetRef.Namespace, ) @@ -229,11 +221,7 @@ func (pp *portPublisher) endpointSliceToAddresses(es *discovery.EndpointSlice) A if endpoint.TargetRef.Kind == endpointTargetRefExternalWorkload { for _, IPAddr := range endpoint.Addresses { - hostname := "" - if endpoint.Hostname != nil { - hostname = *endpoint.Hostname - } - address, id, err := pp.newExtRefAddress(resolvedPort, IPAddr, hostname, endpoint.TargetRef.Name, es.Namespace) + address, id, err := pp.newExtRefAddress(resolvedPort, IPAddr, endpoint.Hostname, endpoint.TargetRef.Name, es.Namespace) if err != nil { pp.log.Errorf("Unable to create new address: %v", err) continue @@ -329,7 +317,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre } identity := endpoints.Annotations[consts.RemoteGatewayIdentity] - address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, endpoint.Hostname, endpoints.Name, endpoints.Namespace) + address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, &endpoint.Hostname, endpoints.Name, endpoints.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride addresses[id] = address @@ -341,7 +329,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre resolvedPort, "", endpoint.IP, - endpoint.Hostname, + &endpoint.Hostname, endpoint.TargetRef.Name, endpoint.TargetRef.Namespace, ) @@ -363,7 +351,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre } } -func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP, hostname string, serviceName, serviceNamespace string) (Address, ServiceID) { +func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP string, hostname *string, serviceName, serviceNamespace string) (Address, ServiceID) { id := ServiceID{ Name: strings.Join([]string{ serviceName, @@ -379,8 +367,8 @@ func (pp *portPublisher) newServiceRefAddress(endpointPort Port, endpointIP, hos func (pp *portPublisher) newPodRefAddress( endpointPort Port, ipFamily discovery.AddressType, - endpointIP, - hostname string, + endpointIP string, + hostname *string, podName, podNamespace string, ) (Address, PodID, error) { @@ -411,8 +399,8 @@ func (pp *portPublisher) newPodRefAddress( func (pp *portPublisher) newExtRefAddress( endpointPort Port, - endpointIP, - hostname string, + endpointIP string, + hostname *string, externalWorkloadName, externalWorkloadNamespace string, ) (Address, ExternalWorkloadID, error) { From b78049fe3554f65ecdd3fcb0895e21c9c5efc245 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Sat, 11 Apr 2026 03:45:39 +0000 Subject: [PATCH 09/17] WIP Signed-off-by: Alex Leong --- controller/api/destination/watcher/endpoints_watcher.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/controller/api/destination/watcher/endpoints_watcher.go b/controller/api/destination/watcher/endpoints_watcher.go index 7dd77b05218df..0622d93ff6052 100644 --- a/controller/api/destination/watcher/endpoints_watcher.go +++ b/controller/api/destination/watcher/endpoints_watcher.go @@ -416,6 +416,7 @@ func (ew *EndpointsWatcher) getOrNewServicePublisher(id ServiceID) *servicePubli cluster: ew.cluster, ports: make(map[Port]*portPublisher), enableEndpointSlices: ew.enableEndpointSlices, + enableIPv6: ew.enableIPv6, } ew.publishers[id] = sp } @@ -537,6 +538,10 @@ func addressChanged(oldAddress Address, newAddress Address) bool { } } + if oldAddress.OpaqueProtocol != newAddress.OpaqueProtocol { + return true + } + if oldAddress.Pod != nil && newAddress.Pod != nil { // if these addresses are owned by pods we can check the resource versions return oldAddress.Pod.ResourceVersion != newAddress.Pod.ResourceVersion From a42cae9788a95e56188fbfb4ec279b5583c14b87 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Sat, 11 Apr 2026 04:24:44 +0000 Subject: [PATCH 10/17] WIP Signed-off-by: Alex Leong --- controller/api/destination/watcher/port_publisher.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index 34db8ae4028e1..a7a2a7b60e4d1 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -310,6 +310,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre continue } for _, endpoint := range subset.Addresses { + hostname := endpoint.Hostname if endpoint.TargetRef == nil { var authorityOverride string if fqName, ok := endpoints.Annotations[consts.RemoteServiceFqName]; ok { @@ -317,7 +318,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre } identity := endpoints.Annotations[consts.RemoteGatewayIdentity] - address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, &endpoint.Hostname, endpoints.Name, endpoints.Namespace) + address, id := pp.newServiceRefAddress(resolvedPort, endpoint.IP, &hostname, endpoints.Name, endpoints.Namespace) address.Identity, address.AuthorityOverride = identity, authorityOverride addresses[id] = address @@ -329,7 +330,7 @@ func (pp *portPublisher) endpointsToAddresses(endpoints *corev1.Endpoints) Addre resolvedPort, "", endpoint.IP, - &endpoint.Hostname, + &hostname, endpoint.TargetRef.Name, endpoint.TargetRef.Namespace, ) From d7c4c6f5189e641000966ac7e7c60d6464450b8c Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Sun, 12 Apr 2026 21:04:05 +0000 Subject: [PATCH 11/17] WIP Signed-off-by: Alex Leong --- controller/cmd/destination/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/cmd/destination/main.go b/controller/cmd/destination/main.go index 62508f48180c6..a043773b050a7 100644 --- a/controller/cmd/destination/main.go +++ b/controller/cmd/destination/main.go @@ -148,7 +148,7 @@ func Main(args []string) { *kubeConfigPath, true, "local", - k8s.Endpoint, k8s.ES, k8s.Pod, k8s.Svc, k8s.SP, k8s.Job, k8s.Srv, k8s.ExtWorkload, + k8s.Endpoint, k8s.ES, k8s.Pod, k8s.Svc, k8s.Node, k8s.SP, k8s.Job, k8s.Srv, k8s.ExtWorkload, ) } else { k8sAPI, err = k8s.InitializeAPI( From 70a014502934c8a30970cefc66da1363fccb58a6 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Mon, 13 Apr 2026 04:14:21 +0000 Subject: [PATCH 12/17] update unit tests Signed-off-by: Alex Leong --- .../api/destination/endpoint_translator.go | 2 +- .../destination/endpoint_translator_test.go | 132 +---- .../federated_service_watcher_test.go | 4 +- controller/api/destination/test_util.go | 6 +- .../destination/watcher/cluster_store_test.go | 2 +- .../watcher/endpoints_watcher_test.go | 545 +++++++----------- 6 files changed, 210 insertions(+), 481 deletions(-) diff --git a/controller/api/destination/endpoint_translator.go b/controller/api/destination/endpoint_translator.go index af87bd1aaa8b1..78d61a6d2991c 100644 --- a/controller/api/destination/endpoint_translator.go +++ b/controller/api/destination/endpoint_translator.go @@ -131,7 +131,7 @@ func (et *endpointTranslator) Remove(set watcher.AddressSet) { et.enqueueUpdate(&removeUpdate{set}) } -// Add, Remove, and NoEndpoints are called from a client-go informer callback +// Add and Remove are called from a client-go informer callback // and therefore must not block. For each of these, we enqueue an update in // a channel so that it can be processed asyncronously. To ensure that enqueuing // does not block, we first check to see if there is capacity in the buffered diff --git a/controller/api/destination/endpoint_translator_test.go b/controller/api/destination/endpoint_translator_test.go index c67ab2c2ec313..e5339da805af3 100644 --- a/controller/api/destination/endpoint_translator_test.go +++ b/controller/api/destination/endpoint_translator_test.go @@ -826,56 +826,6 @@ func TestEndpointTranslatorForPods(t *testing.T) { t.Fatalf("ProtocolHint: %v", diff) } }) - - t.Run("Sends IPv6 only when pod has both IPv4 and IPv6", func(t *testing.T) { - mockGetServer, translator := makeEndpointTranslator(t) - translator.Start() - defer translator.Stop() - - translator.Add(mkAddressSetForPods(t, pod1, pod1IPv6)) - - addrs := (<-mockGetServer.updatesReceived).GetAdd().GetAddrs() - if len(addrs) != 1 { - t.Fatalf("Expected [1] address returned, got %v", addrs) - } - if ipPort := addr.ProxyAddressToString(addrs[0].GetAddr()); ipPort != "[2001:db8:85a3::8a2e:370:7333]:1" { - t.Fatalf("Expected address to be [%s], got [%s]", "[2001:db8:85a3::8a2e:370:7333]:1", ipPort) - } - - if updates := len(mockGetServer.updatesReceived); updates > 0 { - t.Fatalf("Expected to receive no more messages, received [%d]", updates) - } - }) - - t.Run("Sends IPv4 only when pod has both IPv4 and IPv6 but the latter in another zone ", func(t *testing.T) { - mockGetServer, translator := makeEndpointTranslator(t) - translator.Start() - defer translator.Stop() - - pod1West1a := pod1 - pod1West1a.ForZones = []v1.ForZone{ - {Name: "west-1a"}, - } - - pod1IPv6West1b := pod1IPv6 - pod1IPv6West1b.ForZones = []v1.ForZone{ - {Name: "west-1b"}, - } - - translator.Add(mkAddressSetForPods(t, pod1West1a, pod1IPv6West1b)) - - addrs := (<-mockGetServer.updatesReceived).GetAdd().GetAddrs() - if len(addrs) != 1 { - t.Fatalf("Expected [1] address returned, got %v", addrs) - } - if ipPort := addr.ProxyAddressToString(addrs[0].GetAddr()); ipPort != "1.1.1.1:1" { - t.Fatalf("Expected address to be [%s], got [%s]", "1.1.1.1:1", ipPort) - } - - if updates := len(mockGetServer.updatesReceived); updates > 0 { - t.Fatalf("Expected to receive no more messages, received [%d]", updates) - } - }) } func TestEndpointTranslatorExternalWorkloads(t *testing.T) { @@ -1063,27 +1013,6 @@ func TestEndpointTranslatorExternalWorkloads(t *testing.T) { }) } -func TestEndpointTranslatorTopologyAwareFilter(t *testing.T) { - t.Run("Sends one update for add and none for remove", func(t *testing.T) { - mockGetServer, translator := makeEndpointTranslator(t) - translator.Start() - defer translator.Stop() - - translator.Add(mkAddressSetForServices(west1aAddress, west1bAddress)) - translator.Remove(mkAddressSetForServices(west1bAddress)) - - // Only the address meant for west-1a should be added, which means - // that when we try to remove the address meant for west-1b there - // should be no remove update. - expectedNumUpdates := 1 - <-mockGetServer.updatesReceived // Add - - if len(mockGetServer.updatesReceived) != 0 { - t.Fatalf("Expecting [%d] updates, got [%d].", expectedNumUpdates, expectedNumUpdates+len(mockGetServer.updatesReceived)) - } - }) -} - func TestEndpointTranslatorExperimentalZoneWeights(t *testing.T) { zoneA := "west-1a" zoneB := "west-1b" @@ -1137,53 +1066,6 @@ func TestEndpointTranslatorExperimentalZoneWeights(t *testing.T) { }) } -func TestEndpointTranslatorForLocalTrafficPolicy(t *testing.T) { - t.Run("Sends one update for add and none for remove", func(t *testing.T) { - mockGetServer, translator := makeEndpointTranslator(t) - translator.Start() - defer translator.Stop() - addressSet := mkAddressSetForServices(AddressOnTest123Node, AddressNotOnTest123Node) - addressSet.LocalTrafficPolicy = true - translator.Add(addressSet) - translator.Remove(mkAddressSetForServices(AddressNotOnTest123Node)) - - // Only the address meant for AddressOnTest123Node should be added, which means - // that when we try to remove the address meant for AddressNotOnTest123Node there - // should be no remove update. - expectedNumUpdates := 1 - <-mockGetServer.updatesReceived // Add - - if len(mockGetServer.updatesReceived) != 0 { - t.Fatalf("Expecting [%d] updates, got [%d].", expectedNumUpdates, expectedNumUpdates+len(mockGetServer.updatesReceived)) - } - }) - - t.Run("Removes cannot change LocalTrafficPolicy", func(t *testing.T) { - mockGetServer, translator := makeEndpointTranslator(t) - translator.Start() - defer translator.Stop() - addressSet := mkAddressSetForServices(AddressOnTest123Node, AddressNotOnTest123Node) - addressSet.LocalTrafficPolicy = true - translator.Add(addressSet) - set := watcher.AddressSet{ - Addresses: make(map[watcher.ServiceID]*watcher.Address), - Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, - LocalTrafficPolicy: false, - } - translator.Remove(set) - - // Only the address meant for AddressOnTest123Node should be added. - // The remove with no addresses should not change the LocalTrafficPolicy - // and should be a noop that does not send an update. - expectedNumUpdates := 1 - <-mockGetServer.updatesReceived // Add - - if len(mockGetServer.updatesReceived) != 0 { - t.Fatalf("Expecting [%d] updates, got [%d].", expectedNumUpdates, expectedNumUpdates+len(mockGetServer.updatesReceived)) - } - }) -} - // TestConcurrency, to be triggered with `go test -race`, shouldn't report a race condition func TestConcurrency(t *testing.T) { _, translator := makeEndpointTranslator(t) @@ -1238,19 +1120,17 @@ func TestGetInboundPort(t *testing.T) { func mkAddressSetForServices(gatewayAddresses ...watcher.Address) watcher.AddressSet { set := watcher.AddressSet{ - Addresses: make(map[watcher.ServiceID]*watcher.Address), + Addresses: make(map[watcher.ID]watcher.Address), Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, } for _, a := range gatewayAddresses { - a := a // pin - id := watcher.ServiceID{ Name: strings.Join([]string{ a.IP, fmt.Sprint(a.Port), }, "-"), } - set.Addresses[id] = &a + set.Addresses[id] = a } return set } @@ -1259,7 +1139,7 @@ func mkAddressSetForPods(t *testing.T, podAddresses ...watcher.Address) watcher. t.Helper() set := watcher.AddressSet{ - Addresses: make(map[watcher.PodID]*watcher.Address), + Addresses: make(map[watcher.ID]watcher.Address), Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, } for _, p := range podAddresses { @@ -1279,19 +1159,19 @@ func mkAddressSetForPods(t *testing.T, podAddresses ...watcher.Address) watcher. Namespace: p.Pod.Namespace, IPFamily: fam, } - set.Addresses[id] = &p + set.Addresses[id] = p } return set } func mkAddressSetForExternalWorkloads(ewAddresses ...watcher.Address) watcher.AddressSet { set := watcher.AddressSet{ - Addresses: make(map[watcher.PodID]*watcher.Address), + Addresses: make(map[watcher.ID]watcher.Address), Labels: map[string]string{"service": "service-name", "namespace": "service-ns"}, } for _, ew := range ewAddresses { id := watcher.ExternalWorkloadID{Name: ew.ExternalWorkload.Name, Namespace: ew.ExternalWorkload.Namespace} - set.Addresses[id] = &ew + set.Addresses[id] = ew } return set } diff --git a/controller/api/destination/federated_service_watcher_test.go b/controller/api/destination/federated_service_watcher_test.go index ef1a0d6bbd13a..d5b6f59786ac8 100644 --- a/controller/api/destination/federated_service_watcher_test.go +++ b/controller/api/destination/federated_service_watcher_test.go @@ -135,13 +135,13 @@ func mockFederatedServiceWatcher(t *testing.T) (*federatedServiceWatcher, error) if err != nil { return nil, fmt.Errorf("NewFakeMetadataAPI returned an error: %w", err) } - localEndpoints, err := watcher.NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, "local") + localEndpoints, err := watcher.NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, false, "local") if err != nil { return nil, fmt.Errorf("NewEndpointsWatcher returned an error: %w", err) } prom := prometheus.NewRegistry() - clusterStore, err := watcher.NewClusterStoreWithDecoder(k8sAPI.Client, "linkerd", false, + clusterStore, err := watcher.NewClusterStoreWithDecoder(k8sAPI.Client, "linkerd", false, false, watcher.CreateMulticlusterDecoder(map[string][]string{ "east": eastConfigs, "north": northConfigs, diff --git a/controller/api/destination/test_util.go b/controller/api/destination/test_util.go index 6ba8b4161cbad..8bd2a8930d68d 100644 --- a/controller/api/destination/test_util.go +++ b/controller/api/destination/test_util.go @@ -1043,7 +1043,7 @@ spec: if err != nil { t.Fatalf("can't create Workloads watcher: %s", err) } - endpoints, err := watcher.NewEndpointsWatcher(k8sAPI, metadataAPI, log, true, "local") + endpoints, err := watcher.NewEndpointsWatcher(k8sAPI, metadataAPI, log, true, true, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -1057,7 +1057,7 @@ spec: } prom := prometheus.NewRegistry() - clusterStore, err := watcher.NewClusterStoreWithDecoder(k8sAPI.Client, "linkerd", true, watcher.CreateMockDecoder(exportedServiceResources...), prom) + clusterStore, err := watcher.NewClusterStoreWithDecoder(k8sAPI.Client, "linkerd", true, true, watcher.CreateMockDecoder(exportedServiceResources...), prom) if err != nil { t.Fatalf("can't create cluster store: %s", err) } @@ -1181,8 +1181,6 @@ metadata: "trust.domain", forceOpaqueTransport, true, // enableH2Upgrade - true, // enableEndpointFiltering - true, // enableIPv6 false, // extEndpointZoneWeights nil, // meshedHttp2ClientParams "service-name.service-ns", diff --git a/controller/api/destination/watcher/cluster_store_test.go b/controller/api/destination/watcher/cluster_store_test.go index e94b2490d8a00..f4e3acd35e327 100644 --- a/controller/api/destination/watcher/cluster_store_test.go +++ b/controller/api/destination/watcher/cluster_store_test.go @@ -84,7 +84,7 @@ func TestClusterStoreHandlers(t *testing.T) { } prom := prometheus.NewRegistry() - cs, err := NewClusterStoreWithDecoder(k8sAPI.Client, "linkerd", tt.enableEndpointSlices, CreateMockDecoder(), prom) + cs, err := NewClusterStoreWithDecoder(k8sAPI.Client, "linkerd", tt.enableEndpointSlices, false, CreateMockDecoder(), prom) if err != nil { t.Fatalf("Unexpected error when starting watcher cache: %s", err) } diff --git a/controller/api/destination/watcher/endpoints_watcher_test.go b/controller/api/destination/watcher/endpoints_watcher_test.go index 91c28ce2ff503..b05257e8b0574 100644 --- a/controller/api/destination/watcher/endpoints_watcher_test.go +++ b/controller/api/destination/watcher/endpoints_watcher_test.go @@ -20,11 +20,8 @@ import ( ) type bufferingEndpointListener struct { - added []string - removed []string - localTrafficPolicy bool - noEndpointsCalled bool - noEndpointsExist bool + added []string + removed []string sync.Mutex } @@ -36,7 +33,7 @@ func newBufferingEndpointListener() *bufferingEndpointListener { } } -func addressString(address *Address) string { +func addressString(address Address) string { addressString := fmt.Sprintf("%s:%d", address.IP, address.Port) if address.Identity != "" { addressString = fmt.Sprintf("%s/%s", addressString, address.Identity) @@ -63,25 +60,12 @@ func (bel *bufferingEndpointListener) ExpectRemoved(expected []string, t *testin testCompare(t, expected, bel.removed) } -func (bel *bufferingEndpointListener) endpointsAreNotCalled() bool { - bel.Lock() - defer bel.Unlock() - return bel.noEndpointsCalled -} - -func (bel *bufferingEndpointListener) endpointsDoNotExist() bool { - bel.Lock() - defer bel.Unlock() - return bel.noEndpointsExist -} - func (bel *bufferingEndpointListener) Add(set AddressSet) { bel.Lock() defer bel.Unlock() for _, address := range set.Addresses { bel.added = append(bel.added, addressString(address)) } - bel.localTrafficPolicy = set.LocalTrafficPolicy } func (bel *bufferingEndpointListener) AddFiltered(set AddressSet) { @@ -94,20 +78,12 @@ func (bel *bufferingEndpointListener) Remove(set AddressSet) { for _, address := range set.Addresses { bel.removed = append(bel.removed, addressString(address)) } - bel.localTrafficPolicy = set.LocalTrafficPolicy } func (bel *bufferingEndpointListener) RemoveFiltered(set AddressSet) { bel.Remove(set) } -func (bel *bufferingEndpointListener) NoEndpoints(exists bool) { - bel.Lock() - defer bel.Unlock() - bel.noEndpointsCalled = true - bel.noEndpointsExist = exists -} - func (bel *bufferingEndpointListener) NodeName() string { return "" } @@ -138,7 +114,7 @@ func newBufferingEndpointListenerWithResVersion() *bufferingEndpointListenerWith } } -func addressStringWithResVersion(address *Address) string { +func addressStringWithResVersion(address Address) string { return fmt.Sprintf("%s:%d:%s", address.IP, address.Port, address.Pod.ResourceVersion) } @@ -198,17 +174,19 @@ func (bel *bufferingEndpointListenerWithResVersion) EnableIPv6() bool { return false } +func testFilterKey(hostname string) FilterKey { + return FilterKey{Hostname: hostname} +} + func TestEndpointsWatcher(t *testing.T) { for _, tt := range []struct { - serviceType string - k8sConfigs []string - id ServiceID - hostname string - port Port - expectedAddresses []string - expectedNoEndpoints bool - expectedNoEndpointsServiceExists bool - expectedError bool + serviceType string + k8sConfigs []string + id ServiceID + hostname string + port Port + expectedAddresses []string + expectedError bool }{ { serviceType: "local services", @@ -293,9 +271,7 @@ status: "172.17.0.20:8989", "172.17.0.21:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + expectedError: false, }, { // Test for the issue described in linkerd/linkerd2#1405. @@ -363,9 +339,7 @@ status: "10.233.66.239:8990", "10.233.88.244:8990", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + expectedError: false, }, { // Test for the issue described in linkerd/linkerd2#1853. @@ -417,9 +391,7 @@ status: expectedAddresses: []string{ "10.1.30.135:7779", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + expectedError: false, }, { serviceType: "local services with missing addresses", @@ -476,9 +448,7 @@ status: expectedAddresses: []string{ "172.17.0.25:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + expectedError: false, }, { serviceType: "local services with no endpoints", @@ -493,12 +463,10 @@ spec: ports: - port: 7979`, }, - id: ServiceID{Name: "name2", Namespace: "ns"}, - port: 7979, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: true, - expectedError: false, + id: ServiceID{Name: "name2", Namespace: "ns"}, + port: 7979, + expectedAddresses: []string{}, + expectedError: false, }, { serviceType: "external name services", @@ -512,22 +480,18 @@ spec: type: ExternalName externalName: foo`, }, - id: ServiceID{Name: "name3", Namespace: "ns"}, - port: 6969, - expectedAddresses: []string{}, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: true, + id: ServiceID{Name: "name3", Namespace: "ns"}, + port: 6969, + expectedAddresses: []string{}, + expectedError: true, }, { - serviceType: "services that do not yet exist", - k8sConfigs: []string{}, - id: ServiceID{Name: "name4", Namespace: "ns"}, - port: 5959, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: false, - expectedError: false, + serviceType: "services that do not yet exist", + k8sConfigs: []string{}, + id: ServiceID{Name: "name4", Namespace: "ns"}, + port: 5959, + expectedAddresses: []string{}, + expectedError: false, }, { serviceType: "stateful sets", @@ -606,12 +570,10 @@ status: phase: Running podIP: 172.17.0.20`, }, - id: ServiceID{Name: "name1", Namespace: "ns"}, - hostname: "name1-3", - port: 5959, - expectedAddresses: []string{"172.17.0.20:5959"}, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, + id: ServiceID{Name: "name1", Namespace: "ns"}, + hostname: "name1-3", + port: 5959, + expectedAddresses: []string{"172.17.0.20:5959"}, }, { serviceType: "local service with new named port mid rollout and two subsets but only first subset is relevant", @@ -706,9 +668,7 @@ status: "172.17.0.1:8989", "172.17.0.2:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + expectedError: false, }, } { tt := tt // pin @@ -723,7 +683,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -733,7 +693,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(tt.hostname), listener) if tt.expectedError && err == nil { t.Fatal("Expected error but was ok") } @@ -742,32 +702,20 @@ status: } listener.ExpectAdded(tt.expectedAddresses, t) - - if listener.endpointsAreNotCalled() != tt.expectedNoEndpoints { - t.Fatalf("Expected noEndpointsCalled to be [%t], got [%t]", - tt.expectedNoEndpoints, listener.endpointsAreNotCalled()) - } - - if listener.endpointsDoNotExist() != tt.expectedNoEndpointsServiceExists { - t.Fatalf("Expected noEndpointsExist to be [%t], got [%t]", - tt.expectedNoEndpointsServiceExists, listener.endpointsDoNotExist()) - } }) } } func TestEndpointsWatcherWithEndpointSlices(t *testing.T) { for _, tt := range []struct { - serviceType string - k8sConfigs []string - id ServiceID - hostname string - port Port - expectedAddresses []string - expectedNoEndpoints bool - expectedNoEndpointsServiceExists bool - expectedError bool - expectedLocalTrafficPolicy bool + serviceType string + k8sConfigs []string + id ServiceID + hostname string + port Port + expectedAddresses []string + expectedError bool + expectedLocalTrafficPolicy bool }{ { serviceType: "local services with EndpointSlice", @@ -894,10 +842,8 @@ status: "172.17.0.20:8989", "172.17.0.21:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, - expectedLocalTrafficPolicy: true, + expectedError: false, + expectedLocalTrafficPolicy: true, }, { serviceType: "local services with missing addresses and EndpointSlice", @@ -983,12 +929,10 @@ status: podIP: 172.17.0.25 phase: Running`, }, - id: ServiceID{Name: "name-1", Namespace: "ns"}, - port: 8989, - expectedAddresses: []string{"172.17.0.25:8989"}, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + id: ServiceID{Name: "name-1", Namespace: "ns"}, + port: 8989, + expectedAddresses: []string{"172.17.0.25:8989"}, + expectedError: false, }, { serviceType: "local services with no EndpointSlices", @@ -1021,12 +965,10 @@ spec: ports: - port: 7979`, }, - id: ServiceID{Name: "name-2", Namespace: "ns"}, - port: 7979, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: true, - expectedError: false, + id: ServiceID{Name: "name-2", Namespace: "ns"}, + port: 7979, + expectedAddresses: []string{}, + expectedError: false, }, { serviceType: "external name services with EndpointSlices", @@ -1058,22 +1000,18 @@ spec: type: ExternalName externalName: foo`, }, - id: ServiceID{Name: "name-3-external-svc", Namespace: "ns"}, - port: 7777, - expectedAddresses: []string{}, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: true, + id: ServiceID{Name: "name-3-external-svc", Namespace: "ns"}, + port: 7777, + expectedAddresses: []string{}, + expectedError: true, }, { - serviceType: "services that do not exist", - k8sConfigs: []string{}, - id: ServiceID{Name: "name-4-inexistent-svc", Namespace: "ns"}, - port: 5555, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: false, - expectedError: false, + serviceType: "services that do not exist", + k8sConfigs: []string{}, + id: ServiceID{Name: "name-4-inexistent-svc", Namespace: "ns"}, + port: 5555, + expectedAddresses: []string{}, + expectedError: false, }, { serviceType: "stateful sets with EndpointSlices", @@ -1186,13 +1124,11 @@ status: phase: Running podIP: 172.17.0.20`, }, - id: ServiceID{Name: "name-1", Namespace: "ns"}, - hostname: "name-1-3", - port: 6000, - expectedAddresses: []string{"172.17.0.20:6000"}, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + id: ServiceID{Name: "name-1", Namespace: "ns"}, + hostname: "name-1-3", + port: 6000, + expectedAddresses: []string{"172.17.0.20:6000"}, + expectedError: false, }, { serviceType: "service with EndpointSlice without labels", @@ -1258,12 +1194,10 @@ status: phase: Running podIP: 172.17.0.12`, }, - id: ServiceID{Name: "name-5", Namespace: "ns"}, - port: 8989, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: true, - expectedError: false, + id: ServiceID{Name: "name-5", Namespace: "ns"}, + port: 8989, + expectedAddresses: []string{}, + expectedError: false, }, { serviceType: "service with IPv6 address type EndpointSlice", @@ -1332,12 +1266,10 @@ status: phase: Running podIP: 0:0:0:0:0:0:0:1`, }, - id: ServiceID{Name: "name-5", Namespace: "ns"}, - port: 9000, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: true, - expectedError: false, + id: ServiceID{Name: "name-5", Namespace: "ns"}, + port: 9000, + expectedAddresses: []string{}, + expectedError: false, }} { tt := tt // pin t.Run("subscribes listener to "+tt.serviceType, func(t *testing.T) { @@ -1351,7 +1283,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -1361,7 +1293,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(tt.hostname), listener) if tt.expectedError && err == nil { t.Fatal("Expected error but was ok") } @@ -1369,37 +1301,21 @@ status: t.Fatalf("Expected no error, got [%s]", err) } - if listener.localTrafficPolicy != tt.expectedLocalTrafficPolicy { - t.Fatalf("Expected localTrafficPolicy [%v], got [%v]", tt.expectedLocalTrafficPolicy, listener.localTrafficPolicy) - } - listener.ExpectAdded(tt.expectedAddresses, t) - - if listener.endpointsAreNotCalled() != tt.expectedNoEndpoints { - t.Fatalf("Expected noEndpointsCalled to be [%t], got [%t]", - tt.expectedNoEndpoints, listener.endpointsAreNotCalled()) - } - - if listener.endpointsDoNotExist() != tt.expectedNoEndpointsServiceExists { - t.Fatalf("Expected noEndpointsExist to be [%t], got [%t]", - tt.expectedNoEndpointsServiceExists, listener.endpointsDoNotExist()) - } }) } } func TestEndpointsWatcherWithEndpointSlicesExternalWorkload(t *testing.T) { for _, tt := range []struct { - serviceType string - k8sConfigs []string - id ServiceID - hostname string - port Port - expectedAddresses []string - expectedNoEndpoints bool - expectedNoEndpointsServiceExists bool - expectedError bool - expectedLocalTrafficPolicy bool + serviceType string + k8sConfigs []string + id ServiceID + hostname string + port Port + expectedAddresses []string + expectedError bool + expectedLocalTrafficPolicy bool }{ { serviceType: "local services with EndpointSlice", @@ -1517,10 +1433,8 @@ status: "172.17.0.20:8989", "172.17.0.21:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, - expectedLocalTrafficPolicy: true, + expectedError: false, + expectedLocalTrafficPolicy: true, }, { serviceType: "local services with missing addresses and EndpointSlice", @@ -1603,12 +1517,10 @@ status: conditions: ready: true`, }, - id: ServiceID{Name: "name-1", Namespace: "ns"}, - port: 8989, - expectedAddresses: []string{"172.17.0.25:8989"}, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - expectedError: false, + id: ServiceID{Name: "name-1", Namespace: "ns"}, + port: 8989, + expectedAddresses: []string{"172.17.0.25:8989"}, + expectedError: false, }, { serviceType: "service with EndpointSlice without labels", @@ -1671,12 +1583,10 @@ status: conditions: ready: true`, }, - id: ServiceID{Name: "name-5", Namespace: "ns"}, - port: 8989, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: true, - expectedError: false, + id: ServiceID{Name: "name-5", Namespace: "ns"}, + port: 8989, + expectedAddresses: []string{}, + expectedError: false, }, { @@ -1743,12 +1653,10 @@ status: conditions: ready: true`, }, - id: ServiceID{Name: "name-5", Namespace: "ns"}, - port: 9000, - expectedAddresses: []string{}, - expectedNoEndpoints: true, - expectedNoEndpointsServiceExists: true, - expectedError: false, + id: ServiceID{Name: "name-5", Namespace: "ns"}, + port: 9000, + expectedAddresses: []string{}, + expectedError: false, }, } { tt := tt // pin @@ -1763,7 +1671,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -1773,7 +1681,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(tt.hostname), listener) if tt.expectedError && err == nil { t.Fatal("Expected error but was ok") } @@ -1781,21 +1689,7 @@ status: t.Fatalf("Expected no error, got [%s]", err) } - if listener.localTrafficPolicy != tt.expectedLocalTrafficPolicy { - t.Fatalf("Expected localTrafficPolicy [%v], got [%v]", tt.expectedLocalTrafficPolicy, listener.localTrafficPolicy) - } - listener.ExpectAdded(tt.expectedAddresses, t) - - if listener.endpointsAreNotCalled() != tt.expectedNoEndpoints { - t.Fatalf("Expected noEndpointsCalled to be [%t], got [%t]", - tt.expectedNoEndpoints, listener.endpointsAreNotCalled()) - } - - if listener.endpointsDoNotExist() != tt.expectedNoEndpointsServiceExists { - t.Fatalf("Expected noEndpointsExist to be [%t], got [%t]", - tt.expectedNoEndpointsServiceExists, listener.endpointsDoNotExist()) - } }) } } @@ -1840,7 +1734,6 @@ status: serviceType string k8sConfigs []string id ServiceID - hostname string port Port objectToDelete interface{} deletingServices bool @@ -1850,7 +1743,6 @@ status: k8sConfigs: k8sConfigs, id: ServiceID{Name: "name1", Namespace: "ns"}, port: 8989, - hostname: "name1-1", objectToDelete: &corev1.Endpoints{ObjectMeta: metav1.ObjectMeta{Name: "name1", Namespace: "ns"}}, }, { @@ -1858,7 +1750,6 @@ status: k8sConfigs: k8sConfigs, id: ServiceID{Name: "name1", Namespace: "ns"}, port: 8989, - hostname: "name1-1", objectToDelete: &corev1.Endpoints{ObjectMeta: metav1.ObjectMeta{Name: "name1", Namespace: "ns"}}, }, { @@ -1866,7 +1757,6 @@ status: k8sConfigs: k8sConfigs, id: ServiceID{Name: "name1", Namespace: "ns"}, port: 8989, - hostname: "name1-1", objectToDelete: &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "name1", Namespace: "ns"}}, deletingServices: true, }, @@ -1875,7 +1765,6 @@ status: k8sConfigs: k8sConfigs, id: ServiceID{Name: "name1", Namespace: "ns"}, port: 8989, - hostname: "name1-1", objectToDelete: &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "name1", Namespace: "ns"}}, deletingServices: true, }, @@ -1893,7 +1782,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -1903,7 +1792,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, FilterKey{}, listener) if err != nil { t.Fatal(err) } @@ -1914,9 +1803,7 @@ status: watcher.deleteEndpoints(tt.objectToDelete) } - if !listener.endpointsAreNotCalled() { - t.Fatal("Expected NoEndpoints to be Called") - } + listener.ExpectRemoved([]string{"172.17.0.12:8989"}, t) }) } @@ -2014,45 +1901,41 @@ status: podIP: 172.17.0.13`}...) for _, tt := range []struct { - serviceType string - k8sConfigs []string - id ServiceID - hostname string - port Port - objectToDelete interface{} - deletingServices bool - hasSliceAccess bool - noEndpointsCalled bool + serviceType string + k8sConfigs []string + id ServiceID + hostname string + port Port + objectToDelete interface{} + deletingServices bool + hasSliceAccess bool }{ { - serviceType: "can delete an EndpointSlice", - k8sConfigs: k8sConfigsWithES, - id: ServiceID{Name: "name1", Namespace: "ns"}, - port: 8989, - hostname: "name1-1", - objectToDelete: createTestEndpointSlice(consts.PodKind), - hasSliceAccess: true, - noEndpointsCalled: true, + serviceType: "can delete an EndpointSlice", + k8sConfigs: k8sConfigsWithES, + id: ServiceID{Name: "name1", Namespace: "ns"}, + port: 8989, + hostname: "name1-1", + objectToDelete: createTestEndpointSlice(consts.PodKind), + hasSliceAccess: true, }, { - serviceType: "can delete an EndpointSlice when wrapped in a DeletedFinalStateUnknown", - k8sConfigs: k8sConfigsWithES, - id: ServiceID{Name: "name1", Namespace: "ns"}, - port: 8989, - hostname: "name1-1", - objectToDelete: createTestEndpointSlice(consts.PodKind), - hasSliceAccess: true, - noEndpointsCalled: true, + serviceType: "can delete an EndpointSlice when wrapped in a DeletedFinalStateUnknown", + k8sConfigs: k8sConfigsWithES, + id: ServiceID{Name: "name1", Namespace: "ns"}, + port: 8989, + hostname: "name1-1", + objectToDelete: createTestEndpointSlice(consts.PodKind), + hasSliceAccess: true, }, { - serviceType: "can delete an EndpointSlice when there are multiple ones", - k8sConfigs: k8sConfigWithMultipleES, - id: ServiceID{Name: "name1", Namespace: "ns"}, - port: 8989, - hostname: "name1-1", - objectToDelete: createTestEndpointSlice(consts.PodKind), - hasSliceAccess: true, - noEndpointsCalled: false, + serviceType: "can delete an EndpointSlice when there are multiple ones", + k8sConfigs: k8sConfigWithMultipleES, + id: ServiceID{Name: "name1", Namespace: "ns"}, + port: 8989, + hostname: "name1-1", + objectToDelete: createTestEndpointSlice(consts.PodKind), + hasSliceAccess: true, }, } { tt := tt // pin @@ -2067,7 +1950,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -2077,17 +1960,14 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(tt.hostname), listener) if err != nil { t.Fatal(err) } watcher.deleteEndpointSlice(tt.objectToDelete) - if listener.endpointsAreNotCalled() != tt.noEndpointsCalled { - t.Fatalf("Expected noEndpointsCalled to be [%t], got [%t]", - tt.noEndpointsCalled, listener.endpointsAreNotCalled()) - } + listener.ExpectRemoved([]string{"172.17.0.12:8989"}, t) }) } } @@ -2184,45 +2064,41 @@ status: ready: true`}...) for _, tt := range []struct { - serviceType string - k8sConfigs []string - id ServiceID - hostname string - port Port - objectToDelete interface{} - deletingServices bool - hasSliceAccess bool - noEndpointsCalled bool + serviceType string + k8sConfigs []string + id ServiceID + hostname string + port Port + objectToDelete interface{} + deletingServices bool + hasSliceAccess bool }{ { - serviceType: "can delete an EndpointSlice", - k8sConfigs: k8sConfigsWithES, - id: ServiceID{Name: "name1", Namespace: "ns"}, - port: 8989, - hostname: "name1-1", - objectToDelete: createTestEndpointSlice(consts.ExtWorkloadKind), - hasSliceAccess: true, - noEndpointsCalled: true, + serviceType: "can delete an EndpointSlice", + k8sConfigs: k8sConfigsWithES, + id: ServiceID{Name: "name1", Namespace: "ns"}, + port: 8989, + hostname: "name1-1", + objectToDelete: createTestEndpointSlice(consts.ExtWorkloadKind), + hasSliceAccess: true, }, { - serviceType: "can delete an EndpointSlice when wrapped in a DeletedFinalStateUnknown", - k8sConfigs: k8sConfigsWithES, - id: ServiceID{Name: "name1", Namespace: "ns"}, - port: 8989, - hostname: "name1-1", - objectToDelete: createTestEndpointSlice(consts.ExtWorkloadKind), - hasSliceAccess: true, - noEndpointsCalled: true, + serviceType: "can delete an EndpointSlice when wrapped in a DeletedFinalStateUnknown", + k8sConfigs: k8sConfigsWithES, + id: ServiceID{Name: "name1", Namespace: "ns"}, + port: 8989, + hostname: "name1-1", + objectToDelete: createTestEndpointSlice(consts.ExtWorkloadKind), + hasSliceAccess: true, }, { - serviceType: "can delete an EndpointSlice when there are multiple ones", - k8sConfigs: k8sConfigWithMultipleES, - id: ServiceID{Name: "name1", Namespace: "ns"}, - port: 8989, - hostname: "name1-1", - objectToDelete: createTestEndpointSlice(consts.ExtWorkloadKind), - hasSliceAccess: true, - noEndpointsCalled: false, + serviceType: "can delete an EndpointSlice when there are multiple ones", + k8sConfigs: k8sConfigWithMultipleES, + id: ServiceID{Name: "name1", Namespace: "ns"}, + port: 8989, + hostname: "name1-1", + objectToDelete: createTestEndpointSlice(consts.ExtWorkloadKind), + hasSliceAccess: true, }, } { tt := tt // pin @@ -2237,7 +2113,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -2247,32 +2123,27 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(tt.hostname), listener) if err != nil { t.Fatal(err) } watcher.deleteEndpointSlice(tt.objectToDelete) - if listener.endpointsAreNotCalled() != tt.noEndpointsCalled { - t.Fatalf("Expected noEndpointsCalled to be [%t], got [%t]", - tt.noEndpointsCalled, listener.endpointsAreNotCalled()) - } + listener.ExpectRemoved([]string{"172.17.0.12:8989"}, t) }) } } func TestEndpointsWatcherServiceMirrors(t *testing.T) { for _, tt := range []struct { - serviceType string - k8sConfigs []string - id ServiceID - hostname string - port Port - expectedAddresses []string - expectedNoEndpoints bool - expectedNoEndpointsServiceExists bool - enableEndpointSlices bool + serviceType string + k8sConfigs []string + id ServiceID + hostname string + port Port + expectedAddresses []string + enableEndpointSlices bool }{ { k8sConfigs: []string{` @@ -2308,8 +2179,6 @@ subsets: expectedAddresses: []string{ "172.17.0.12:8989/gateway-identity-1/name1-remote-fq:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, }, { k8sConfigs: []string{` @@ -2346,9 +2215,7 @@ ports: expectedAddresses: []string{ "172.17.0.12:8989/gateway-identity-1/name1-remote-fq:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, - enableEndpointSlices: true, + enableEndpointSlices: true, }, { k8sConfigs: []string{` @@ -2383,8 +2250,6 @@ subsets: expectedAddresses: []string{ "172.17.0.12:8989/name1-remote-fq:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, }, { @@ -2421,8 +2286,6 @@ subsets: expectedAddresses: []string{ "172.17.0.12:9999/gateway-identity-1/name1-remote-fq:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, }, { k8sConfigs: []string{` @@ -2458,8 +2321,6 @@ subsets: expectedAddresses: []string{ "172.17.0.12:9999/name1-remote-fq:8989", }, - expectedNoEndpoints: false, - expectedNoEndpointsServiceExists: false, }, } { tt := tt // pin @@ -2474,7 +2335,7 @@ subsets: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), tt.enableEndpointSlices, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), tt.enableEndpointSlices, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -2484,23 +2345,13 @@ subsets: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(tt.hostname), listener) if err != nil { t.Fatalf("NewFakeAPI returned an error: %s", err) } listener.ExpectAdded(tt.expectedAddresses, t) - - if listener.endpointsAreNotCalled() != tt.expectedNoEndpoints { - t.Fatalf("Expected noEndpointsCalled to be [%t], got [%t]", - tt.expectedNoEndpoints, listener.endpointsAreNotCalled()) - } - - if listener.endpointsDoNotExist() != tt.expectedNoEndpointsServiceExists { - t.Fatalf("Expected noEndpointsExist to be [%t], got [%t]", - tt.expectedNoEndpointsServiceExists, listener.endpointsDoNotExist()) - } }) } } @@ -2643,7 +2494,7 @@ subsets: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -2653,7 +2504,7 @@ subsets: listener := newBufferingEndpointListener() - err = watcher.Subscribe(tt.id, tt.port, "", listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(""), listener) if err != nil { t.Fatal(err) } @@ -2775,7 +2626,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), false, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -2785,7 +2636,7 @@ status: listener := newBufferingEndpointListenerWithResVersion() - err = watcher.Subscribe(tt.id, tt.port, tt.hostname, listener) + err = watcher.Subscribe(tt.id, tt.port, testFilterKey(tt.hostname), listener) if err != nil { t.Fatal(err) } @@ -2877,7 +2728,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -2887,7 +2738,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, "", listener) + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, testFilterKey(""), listener) if err != nil { t.Fatal(err) } @@ -3040,7 +2891,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -3050,7 +2901,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, "", listener) + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, testFilterKey(""), listener) if err != nil { t.Fatal(err) } @@ -3227,7 +3078,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -3237,7 +3088,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, "", listener) + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, testFilterKey(""), listener) if err != nil { t.Fatal(err) } @@ -3347,7 +3198,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -3357,7 +3208,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, "", listener) + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, testFilterKey(""), listener) if err != nil { t.Fatal(err) } @@ -3467,7 +3318,7 @@ status: t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) } - watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, "local") + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") if err != nil { t.Fatalf("can't create Endpoints watcher: %s", err) } @@ -3477,7 +3328,7 @@ status: listener := newBufferingEndpointListener() - err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, "", listener) + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 8989, testFilterKey(""), listener) if err != nil { t.Fatal(err) } From c8d087fb000f6f8abc2cf1d3fdd9a2c664a1ec8a Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Tue, 14 Apr 2026 19:49:45 +0000 Subject: [PATCH 13/17] fix node watches Signed-off-by: Alex Leong --- controller/api/destination/watcher/port_publisher.go | 8 ++++---- controller/cmd/destination/main.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index a7a2a7b60e4d1..66abf5dc4fabf 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -540,7 +540,7 @@ func (pp *portPublisher) noEndpoints(exists bool) { } func (pp *portPublisher) subscribe(listener EndpointUpdateListener, filterKey FilterKey) { - group := pp.filteredListenerGroup(listener, filterKey) + group := pp.filteredListenerGroup(filterKey) if pp.exists { if len(pp.addresses.Addresses) > 0 { filteredSet := group.filterAddresses(pp.addresses) @@ -600,12 +600,12 @@ func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { } } -func (pp *portPublisher) filteredListenerGroup(listener EndpointUpdateListener, filterKey FilterKey) *filteredListenerGroup { +func (pp *portPublisher) filteredListenerGroup(filterKey FilterKey) *filteredListenerGroup { group, ok := pp.filteredListeners[filterKey] if !ok { nodeTopologyZone := "" - if filterKey.NodeName != "" { - node, err := pp.k8sAPI.Node().Lister().Get(filterKey.NodeName) + if filterKey.EnableEndpointFiltering && filterKey.NodeName != "" { + node, err := pp.metadataAPI.Get(k8s.Node, filterKey.NodeName) if err != nil { pp.log.Errorf("Unable to get node %s: %s", filterKey.NodeName, err) } else { diff --git a/controller/cmd/destination/main.go b/controller/cmd/destination/main.go index a043773b050a7..62508f48180c6 100644 --- a/controller/cmd/destination/main.go +++ b/controller/cmd/destination/main.go @@ -148,7 +148,7 @@ func Main(args []string) { *kubeConfigPath, true, "local", - k8s.Endpoint, k8s.ES, k8s.Pod, k8s.Svc, k8s.Node, k8s.SP, k8s.Job, k8s.Srv, k8s.ExtWorkload, + k8s.Endpoint, k8s.ES, k8s.Pod, k8s.Svc, k8s.SP, k8s.Job, k8s.Srv, k8s.ExtWorkload, ) } else { k8sAPI, err = k8s.InitializeAPI( From d089f00f7588b60d84ae1bec4525dc756ebc1672 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Tue, 14 Apr 2026 21:51:36 +0000 Subject: [PATCH 14/17] lint Signed-off-by: Alex Leong --- .../api/destination/endpoint_translator.go | 7 ---- .../destination/endpoint_translator_test.go | 39 ------------------- 2 files changed, 46 deletions(-) diff --git a/controller/api/destination/endpoint_translator.go b/controller/api/destination/endpoint_translator.go index 78d61a6d2991c..753623077952d 100644 --- a/controller/api/destination/endpoint_translator.go +++ b/controller/api/destination/endpoint_translator.go @@ -511,13 +511,6 @@ func getNodeTopologyZone(k8sAPI *k8s.MetadataAPI, srcNode string) (string, error return "", nil } -func newEmptyAddressSet() watcher.AddressSet { - return watcher.AddressSet{ - Addresses: make(map[watcher.ID]watcher.Address), - Labels: make(map[string]string), - } -} - // getInboundPort gets the inbound port from the proxy container's environment // variable. func getInboundPort(podSpec *corev1.PodSpec) (uint32, error) { diff --git a/controller/api/destination/endpoint_translator_test.go b/controller/api/destination/endpoint_translator_test.go index e5339da805af3..363514306b417 100644 --- a/controller/api/destination/endpoint_translator_test.go +++ b/controller/api/destination/endpoint_translator_test.go @@ -61,45 +61,6 @@ var ( OwnerName: "rc-name", } - pod1IPv6 = watcher.Address{ - IP: "2001:0db8:85a3:0000:0000:8a2e:0370:7333", - Port: 1, - Pod: &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pod1", - Namespace: "ns", - Labels: map[string]string{ - k8s.ControllerNSLabel: "linkerd", - k8s.ProxyDeploymentLabel: "deployment-name", - }, - }, - Spec: corev1.PodSpec{ - ServiceAccountName: "serviceaccount-name", - Containers: []corev1.Container{ - { - Name: k8s.ProxyContainerName, - Env: []corev1.EnvVar{ - { - Name: envInboundListenAddr, - Value: "[::]:4143", - }, - { - Name: envAdminListenAddr, - Value: "[::]:4191", - }, - { - Name: envControlListenAddr, - Value: "[::]:4190", - }, - }, - }, - }, - }, - }, - OwnerKind: "replicationcontroller", - OwnerName: "rc-name", - } - pod2 = watcher.Address{ IP: "1.1.1.2", Port: 2, From f9a6fe91b5e00291f4d20558bcd05bf12f4a4435 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Tue, 21 Apr 2026 23:21:11 +0000 Subject: [PATCH 15/17] review feedback Signed-off-by: Alex Leong --- controller/api/destination/watcher/address.go | 10 ++++------ .../api/destination/watcher/port_publisher.go | 16 +++------------- .../api/destination/watcher/service_publisher.go | 4 ---- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/controller/api/destination/watcher/address.go b/controller/api/destination/watcher/address.go index 79b925e40c5ba..812f604a6b1e5 100644 --- a/controller/api/destination/watcher/address.go +++ b/controller/api/destination/watcher/address.go @@ -1,6 +1,8 @@ package watcher import ( + "maps" + ewv1beta1 "github.com/linkerd/linkerd2/controller/gen/apis/externalworkload/v1beta1" corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" @@ -45,14 +47,10 @@ type ( // locations of the original variable func (addr AddressSet) shallowCopy() AddressSet { addresses := make(map[ID]Address) - for k, v := range addr.Addresses { - addresses[k] = v - } + maps.Copy(addr.Addresses, addresses) labels := make(map[string]string) - for k, v := range addr.Labels { - labels[k] = v - } + maps.Copy(addr.Labels, labels) return AddressSet{ Addresses: addresses, diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index 66abf5dc4fabf..3c6bef599f102 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -3,6 +3,7 @@ package watcher import ( "context" "fmt" + "maps" "net" "strings" @@ -41,10 +42,6 @@ type ( } ) -///////////////////// -/// portPublisher /// -///////////////////// - // Note that portPublishers methods are generally NOT thread-safe. You should // hold the parent servicePublisher's mutex before calling methods on a // portPublisher. @@ -90,20 +87,14 @@ func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, Addresses: make(map[ID]Address), Labels: pp.addresses.Labels, } - - for id, address := range pp.addresses.Addresses { - updatedAddressSet.Addresses[id] = address - } + maps.Copy(pp.addresses.Addresses, updatedAddressSet.Addresses) for _, id := range pp.endpointSliceToIDs(oldSlice) { delete(updatedAddressSet.Addresses, id) } newAddressSet := pp.endpointSliceToAddresses(newSlice) - for id, address := range newAddressSet.Addresses { - updatedAddressSet.Addresses[id] = address - } - + maps.Copy(newAddressSet.Addresses, updatedAddressSet.Addresses) pp.publishAddressChange(updatedAddressSet) pp.addresses = updatedAddressSet @@ -473,7 +464,6 @@ func (pp *portPublisher) updateLocalTrafficPolicy(localTrafficPolicy bool) { for _, group := range pp.filteredListeners { group.updateLocalTrafficPolicy(localTrafficPolicy) } - pp.publishFilteredSnapshots() } func (pp *portPublisher) updatePort(targetPort namedPort) { diff --git a/controller/api/destination/watcher/service_publisher.go b/controller/api/destination/watcher/service_publisher.go index 801c88bcd3c79..a96c946c5fbfb 100644 --- a/controller/api/destination/watcher/service_publisher.go +++ b/controller/api/destination/watcher/service_publisher.go @@ -42,10 +42,6 @@ type ( } ) -//////////////////////// -/// servicePublisher /// -//////////////////////// - func (sp *servicePublisher) updateEndpoints(newEndpoints *corev1.Endpoints) { sp.Lock() defer sp.Unlock() From 2eab5da9a7dc7b61c7f18672231f779764bdf939 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Wed, 22 Apr 2026 00:18:57 +0000 Subject: [PATCH 16/17] fix maps.Copy and add tests Signed-off-by: Alex Leong --- controller/api/destination/watcher/address.go | 4 +- .../watcher/endpoints_watcher_test.go | 409 ++++++++++++++++++ .../api/destination/watcher/port_publisher.go | 4 +- 3 files changed, 413 insertions(+), 4 deletions(-) diff --git a/controller/api/destination/watcher/address.go b/controller/api/destination/watcher/address.go index 812f604a6b1e5..bf05a6b143eba 100644 --- a/controller/api/destination/watcher/address.go +++ b/controller/api/destination/watcher/address.go @@ -47,10 +47,10 @@ type ( // locations of the original variable func (addr AddressSet) shallowCopy() AddressSet { addresses := make(map[ID]Address) - maps.Copy(addr.Addresses, addresses) + maps.Copy(addresses, addr.Addresses) labels := make(map[string]string) - maps.Copy(addr.Labels, labels) + maps.Copy(labels, addr.Labels) return AddressSet{ Addresses: addresses, diff --git a/controller/api/destination/watcher/endpoints_watcher_test.go b/controller/api/destination/watcher/endpoints_watcher_test.go index b05257e8b0574..a45eb9f98b633 100644 --- a/controller/api/destination/watcher/endpoints_watcher_test.go +++ b/controller/api/destination/watcher/endpoints_watcher_test.go @@ -3361,3 +3361,412 @@ status: listener.ExpectAdded([]string{"172.17.0.12:8989", "172.17.0.12:8989"}, t) } + +func TestEndpointSliceSelectsAddressFamilyAfterZoneFiltering(t *testing.T) { + nodeConfig := ` +apiVersion: v1 +kind: Node +metadata: + name: node-1 + labels: + topology.kubernetes.io/zone: west-1a` + + for _, tc := range []struct { + name string + ipv4Zone string + ipv6Zone string + expectedAddresses []string + }{ + { + name: "Sends IPv6 only when pod has both IPv4 and IPv6", + ipv4Zone: "west-1a", + ipv6Zone: "west-1a", + expectedAddresses: []string{"2001:db8:85a3::8a2e:370:7333:1"}, + }, + { + name: "Sends IPv4 only when pod has both IPv4 and IPv6 but the latter in another zone", + ipv4Zone: "west-1a", + ipv6Zone: "west-1b", + expectedAddresses: []string{"1.1.1.1:1"}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + k8sConfigsWithES := []string{` +kind: APIResourceList +apiVersion: v1 +groupVersion: discovery.k8s.io/v1 +resources: +- name: endpointslices + singularName: endpointslice + namespaced: true + kind: EndpointSlice + verbs: + - delete + - deletecollection + - get + - list + - patch + - create + - update + - watch +`, ` +apiVersion: v1 +kind: Service +metadata: + name: name1 + namespace: ns +spec: + type: LoadBalancer + ports: + - port: 1`, fmt.Sprintf(` +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: +- addresses: + - 1.1.1.1 + conditions: + ready: true + hints: + forZones: + - name: %s + targetRef: + kind: Pod + name: name1-1 + namespace: ns +kind: EndpointSlice +metadata: + labels: + kubernetes.io/service-name: name1 + name: name1-ipv4 + namespace: ns +ports: +- name: "" + port: 1`, tc.ipv4Zone), fmt.Sprintf(` +addressType: IPv6 +apiVersion: discovery.k8s.io/v1 +endpoints: +- addresses: + - 2001:db8:85a3::8a2e:370:7333 + conditions: + ready: true + hints: + forZones: + - name: %s + targetRef: + kind: Pod + name: name1-1 + namespace: ns +kind: EndpointSlice +metadata: + labels: + kubernetes.io/service-name: name1 + name: name1-ipv6 + namespace: ns +ports: +- name: "" + port: 1`, tc.ipv6Zone), ` +apiVersion: v1 +kind: Pod +metadata: + name: name1-1 + namespace: ns +status: + phase: Running + podIP: 1.1.1.1`} + + k8sAPI, err := k8s.NewFakeAPI(k8sConfigsWithES...) + if err != nil { + t.Fatalf("NewFakeAPI returned an error: %s", err) + } + + metadataAPI, err := k8s.NewFakeMetadataAPI([]string{nodeConfig}) + if err != nil { + t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) + } + + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, true, "local") + if err != nil { + t.Fatalf("can't create Endpoints watcher: %s", err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + listener := newBufferingEndpointListener() + + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 1, FilterKey{ + EnableEndpointFiltering: true, + NodeName: "node-1", + }, listener) + if err != nil { + t.Fatal(err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + listener.ExpectAdded(tc.expectedAddresses, t) + listener.ExpectRemoved([]string{}, t) + }) + } +} + +func TestEndpointSliceTopologyAwareFilter(t *testing.T) { + nodeConfig := ` +apiVersion: v1 +kind: Node +metadata: + name: node-1 + labels: + topology.kubernetes.io/zone: west-1a` + + k8sConfigsWithES := []string{` +kind: APIResourceList +apiVersion: v1 +groupVersion: discovery.k8s.io/v1 +resources: +- name: endpointslices + singularName: endpointslice + namespaced: true + kind: EndpointSlice + verbs: + - delete + - deletecollection + - get + - list + - patch + - create + - update + - watch +`, ` +apiVersion: v1 +kind: Service +metadata: + name: name1 + namespace: ns +spec: + type: LoadBalancer + ports: + - port: 1`, ` +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: +- addresses: + - 1.1.1.1 + conditions: + ready: true + hints: + forZones: + - name: west-1a +- addresses: + - 1.1.1.2 + conditions: + ready: true + hints: + forZones: + - name: west-1b +kind: EndpointSlice +metadata: + labels: + kubernetes.io/service-name: name1 + name: name1-es + namespace: ns +ports: +- name: "" + port: 1 +`} + + k8sAPI, err := k8s.NewFakeAPI(k8sConfigsWithES...) + if err != nil { + t.Fatalf("NewFakeAPI returned an error: %s", err) + } + + metadataAPI, err := k8s.NewFakeMetadataAPI([]string{nodeConfig}) + if err != nil { + t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) + } + + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") + if err != nil { + t.Fatalf("can't create Endpoints watcher: %s", err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + listener := newBufferingEndpointListener() + + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 1, FilterKey{ + EnableEndpointFiltering: true, + NodeName: "node-1", + }, listener) + if err != nil { + t.Fatal(err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + listener.ExpectAdded([]string{"1.1.1.1:1"}, t) + listener.ExpectRemoved([]string{}, t) + + es, err := k8sAPI.Client.DiscoveryV1().EndpointSlices("ns").Get(context.Background(), "name1-es", metav1.GetOptions{}) + if err != nil { + t.Fatal(err) + } + + es.Endpoints = es.Endpoints[:1] + + _, err = k8sAPI.Client.DiscoveryV1().EndpointSlices("ns").Update(context.Background(), es, metav1.UpdateOptions{}) + if err != nil { + t.Fatal(err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + time.Sleep(50 * time.Millisecond) + + listener.ExpectAdded([]string{"1.1.1.1:1"}, t) + listener.ExpectRemoved([]string{}, t) +} + +func TestEndpointSliceLocalTrafficPolicyIgnoresRemovalOfUnfilteredEndpoint(t *testing.T) { + nodeConfig := ` +apiVersion: v1 +kind: Node +metadata: + name: node-1` + + k8sConfigsWithES := []string{` +kind: APIResourceList +apiVersion: v1 +groupVersion: discovery.k8s.io/v1 +resources: +- name: endpointslices + singularName: endpointslice + namespaced: true + kind: EndpointSlice + verbs: + - delete + - deletecollection + - get + - list + - patch + - create + - update + - watch +`, ` +apiVersion: v1 +kind: Service +metadata: + name: name1 + namespace: ns +spec: + type: LoadBalancer + internalTrafficPolicy: Local + ports: + - port: 1`, ` +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: +- addresses: + - 1.1.1.1 + conditions: + ready: true + targetRef: + kind: Pod + name: name1-1 + namespace: ns +- addresses: + - 1.1.1.2 + conditions: + ready: true + targetRef: + kind: Pod + name: name1-2 + namespace: ns +kind: EndpointSlice +metadata: + labels: + kubernetes.io/service-name: name1 + name: name1-es + namespace: ns +ports: +- name: "" + port: 1`, ` +apiVersion: v1 +kind: Pod +metadata: + name: name1-1 + namespace: ns +spec: + nodeName: node-1 +status: + phase: Running + podIP: 1.1.1.1`, ` +apiVersion: v1 +kind: Pod +metadata: + name: name1-2 + namespace: ns +spec: + nodeName: node-2 +status: + phase: Running + podIP: 1.1.1.2`} + + k8sAPI, err := k8s.NewFakeAPI(k8sConfigsWithES...) + if err != nil { + t.Fatalf("NewFakeAPI returned an error: %s", err) + } + + metadataAPI, err := k8s.NewFakeMetadataAPI([]string{nodeConfig}) + if err != nil { + t.Fatalf("NewFakeMetadataAPI returned an error: %s", err) + } + + watcher, err := NewEndpointsWatcher(k8sAPI, metadataAPI, logging.WithField("test", t.Name()), true, false, "local") + if err != nil { + t.Fatalf("can't create Endpoints watcher: %s", err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + listener := newBufferingEndpointListener() + + err = watcher.Subscribe(ServiceID{Name: "name1", Namespace: "ns"}, 1, FilterKey{ + EnableEndpointFiltering: true, + NodeName: "node-1", + }, listener) + if err != nil { + t.Fatal(err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + listener.ExpectAdded([]string{"1.1.1.1:1"}, t) + listener.ExpectRemoved([]string{}, t) + + es, err := k8sAPI.Client.DiscoveryV1().EndpointSlices("ns").Get(context.Background(), "name1-es", metav1.GetOptions{}) + if err != nil { + t.Fatal(err) + } + + es.Endpoints = es.Endpoints[:1] + + _, err = k8sAPI.Client.DiscoveryV1().EndpointSlices("ns").Update(context.Background(), es, metav1.UpdateOptions{}) + if err != nil { + t.Fatal(err) + } + + k8sAPI.Sync(nil) + metadataAPI.Sync(nil) + + time.Sleep(50 * time.Millisecond) + + listener.ExpectAdded([]string{"1.1.1.1:1"}, t) + listener.ExpectRemoved([]string{}, t) +} diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index 3c6bef599f102..d884a722583bf 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -87,14 +87,14 @@ func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, Addresses: make(map[ID]Address), Labels: pp.addresses.Labels, } - maps.Copy(pp.addresses.Addresses, updatedAddressSet.Addresses) + maps.Copy(updatedAddressSet.Addresses, pp.addresses.Addresses) for _, id := range pp.endpointSliceToIDs(oldSlice) { delete(updatedAddressSet.Addresses, id) } newAddressSet := pp.endpointSliceToAddresses(newSlice) - maps.Copy(newAddressSet.Addresses, updatedAddressSet.Addresses) + maps.Copy(updatedAddressSet.Addresses, newAddressSet.Addresses) pp.publishAddressChange(updatedAddressSet) pp.addresses = updatedAddressSet From da01ec848830db4e0096b0ba80927de8f26e6b90 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Wed, 22 Apr 2026 01:13:40 +0000 Subject: [PATCH 17/17] add hostname and nodename labels to endpoint metrics Signed-off-by: Alex Leong --- .../watcher/filtered_listener_group.go | 14 ++++- .../api/destination/watcher/port_publisher.go | 61 +++++++------------ .../api/destination/watcher/prometheus.go | 6 +- .../destination/watcher/service_publisher.go | 45 +++++--------- 4 files changed, 53 insertions(+), 73 deletions(-) diff --git a/controller/api/destination/watcher/filtered_listener_group.go b/controller/api/destination/watcher/filtered_listener_group.go index a56a1302088c0..92057487ac773 100644 --- a/controller/api/destination/watcher/filtered_listener_group.go +++ b/controller/api/destination/watcher/filtered_listener_group.go @@ -14,16 +14,18 @@ type ( localTrafficPolicy bool snapshot AddressSet listeners []EndpointUpdateListener + metrics endpointsMetrics } ) -func newFilteredListenerGroup(key FilterKey, nodeTopologyZone string, enableIPv6 bool, localTrafficPolicy bool) *filteredListenerGroup { +func newFilteredListenerGroup(key FilterKey, nodeTopologyZone string, enableIPv6 bool, localTrafficPolicy bool, metrics endpointsMetrics) *filteredListenerGroup { return &filteredListenerGroup{ key: key, nodeTopologyZone: nodeTopologyZone, enableEndpointFiltering: key.EnableEndpointFiltering, enableIPv6: enableIPv6, localTrafficPolicy: localTrafficPolicy, + metrics: metrics, snapshot: AddressSet{Addresses: make(map[ID]Address)}, } } @@ -41,9 +43,13 @@ func (group *filteredListenerGroup) publishDiff(addresses AddressSet) { listener.Remove(remove) } } + + group.metrics.incUpdates() + group.metrics.setPods(len(group.snapshot.Addresses)) + group.metrics.setExists(true) } -func (group *filteredListenerGroup) publishNoEndpoints() { +func (group *filteredListenerGroup) publishNoEndpoints(exists bool) { remove := group.snapshot group.snapshot = AddressSet{Addresses: make(map[ID]Address)} @@ -52,6 +58,10 @@ func (group *filteredListenerGroup) publishNoEndpoints() { listener.Remove(remove) } } + + group.metrics.incUpdates() + group.metrics.setPods(0) + group.metrics.setExists(exists) } func (group *filteredListenerGroup) updateLocalTrafficPolicy(localTrafficPolicy bool) { diff --git a/controller/api/destination/watcher/port_publisher.go b/controller/api/destination/watcher/port_publisher.go index d884a722583bf..47ae5179e20d8 100644 --- a/controller/api/destination/watcher/port_publisher.go +++ b/controller/api/destination/watcher/port_publisher.go @@ -37,7 +37,7 @@ type ( exists bool addresses AddressSet filteredListeners map[FilterKey]*filteredListenerGroup - metrics endpointsMetrics + cluster string localTrafficPolicy bool } ) @@ -49,15 +49,11 @@ type ( func (pp *portPublisher) updateEndpoints(endpoints *corev1.Endpoints) { newAddressSet := pp.endpointsToAddresses(endpoints) if len(newAddressSet.Addresses) == 0 { - pp.publishNoEndpoints() + pp.publishNoEndpoints(true) } else { pp.publishAddressChange(newAddressSet) } pp.addresses = newAddressSet - pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) } func (pp *portPublisher) addEndpointSlice(slice *discovery.EndpointSlice) { @@ -77,9 +73,6 @@ func (pp *portPublisher) addEndpointSlice(slice *discovery.EndpointSlice) { pp.addresses = newAddressSet pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) } func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, newSlice *discovery.EndpointSlice) { @@ -99,9 +92,6 @@ func (pp *portPublisher) updateEndpointSlice(oldSlice *discovery.EndpointSlice, pp.addresses = updatedAddressSet pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) } func metricLabels(resource interface{}) map[string]string { @@ -513,24 +503,20 @@ func (pp *portPublisher) deleteEndpointSlice(es *discovery.EndpointSlice) { pp.noEndpoints(false) } else { pp.exists = true - pp.metrics.incUpdates() - pp.metrics.setPods(len(pp.addresses.Addresses)) - pp.metrics.setExists(true) } } func (pp *portPublisher) noEndpoints(exists bool) { pp.exists = exists pp.addresses = AddressSet{} - pp.publishNoEndpoints() - - pp.metrics.incUpdates() - pp.metrics.setExists(exists) - pp.metrics.setPods(0) + pp.publishNoEndpoints(exists) } -func (pp *portPublisher) subscribe(listener EndpointUpdateListener, filterKey FilterKey) { - group := pp.filteredListenerGroup(filterKey) +func (pp *portPublisher) subscribe(listener EndpointUpdateListener, filterKey FilterKey) error { + group, err := pp.filteredListenerGroup(filterKey) + if err != nil { + return err + } if pp.exists { if len(pp.addresses.Addresses) > 0 { filteredSet := group.filterAddresses(pp.addresses) @@ -541,8 +527,9 @@ func (pp *portPublisher) subscribe(listener EndpointUpdateListener, filterKey Fi } } group.listeners = append(group.listeners, listener) + group.metrics.setSubscribers(len(group.listeners)) - pp.metrics.setSubscribers(pp.totalListeners()) + return nil } func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener, filterKey FilterKey, withRemove bool) { @@ -562,10 +549,13 @@ func (pp *portPublisher) unsubscribe(listener EndpointUpdateListener, filterKey } } if len(group.listeners) == 0 { + endpointsVecs.unregister(endpointsLabels( + pp.cluster, pp.id.Namespace, pp.id.Name, fmt.Sprintf("%d", pp.srcPort), filterKey.Hostname, filterKey.NodeName, + )) delete(pp.filteredListeners, filterKey) } } - pp.metrics.setSubscribers(pp.totalListeners()) + group.metrics.setSubscribers(len(group.listeners)) } func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { updated := false @@ -586,11 +576,10 @@ func (pp *portPublisher) updateServer(oldServer, newServer *v1beta3.Server) { } if updated { pp.publishFilteredSnapshots() - pp.metrics.incUpdates() } } -func (pp *portPublisher) filteredListenerGroup(filterKey FilterKey) *filteredListenerGroup { +func (pp *portPublisher) filteredListenerGroup(filterKey FilterKey) (*filteredListenerGroup, error) { group, ok := pp.filteredListeners[filterKey] if !ok { nodeTopologyZone := "" @@ -603,18 +592,14 @@ func (pp *portPublisher) filteredListenerGroup(filterKey FilterKey) *filteredLis } } - group = newFilteredListenerGroup(filterKey, nodeTopologyZone, pp.enableIPv6, pp.localTrafficPolicy) + metrics, err := endpointsVecs.newEndpointsMetrics(endpointsLabels(pp.cluster, pp.id.Namespace, pp.id.Name, fmt.Sprintf("%d", pp.srcPort), filterKey.Hostname, filterKey.NodeName)) + if err != nil { + return nil, err + } + group = newFilteredListenerGroup(filterKey, nodeTopologyZone, pp.enableIPv6, pp.localTrafficPolicy, metrics) pp.filteredListeners[filterKey] = group } - return group -} - -func (pp *portPublisher) totalListeners() int { - total := 0 - for _, group := range pp.filteredListeners { - total += len(group.listeners) - } - return total + return group, nil } func (pp *portPublisher) publishAddressChange(newAddressSet AddressSet) { @@ -629,9 +614,9 @@ func (pp *portPublisher) publishFilteredSnapshots() { } } -func (pp *portPublisher) publishNoEndpoints() { +func (pp *portPublisher) publishNoEndpoints(exists bool) { for _, group := range pp.filteredListeners { - group.publishNoEndpoints() + group.publishNoEndpoints(exists) } } diff --git a/controller/api/destination/watcher/prometheus.go b/controller/api/destination/watcher/prometheus.go index 1b483f38555e3..b3e23856c2a6e 100644 --- a/controller/api/destination/watcher/prometheus.go +++ b/controller/api/destination/watcher/prometheus.go @@ -128,12 +128,14 @@ func newMetricsVecs(name string, labels []string) metricsVecs { } } -func endpointsLabels(cluster, namespace, service, port string) prometheus.Labels { +func endpointsLabels(cluster, namespace, service, port, hostname, nodename string) prometheus.Labels { return prometheus.Labels{ "cluster": cluster, "namespace": namespace, "service": service, "port": port, + "hostname": hostname, + "nodename": nodename, } } @@ -146,7 +148,7 @@ func labelNames(labels prometheus.Labels) []string { } func newEndpointsMetricsVecs() endpointsMetricsVecs { - labels := labelNames(endpointsLabels("", "", "", "")) + labels := labelNames(endpointsLabels("", "", "", "", "", "")) vecs := newMetricsVecs("endpoints", labels) pods := promauto.NewGaugeVec( diff --git a/controller/api/destination/watcher/service_publisher.go b/controller/api/destination/watcher/service_publisher.go index a96c946c5fbfb..f80bc5229c9b5 100644 --- a/controller/api/destination/watcher/service_publisher.go +++ b/controller/api/destination/watcher/service_publisher.go @@ -1,12 +1,10 @@ package watcher import ( - "strconv" "sync" "github.com/linkerd/linkerd2/controller/gen/apis/server/v1beta3" "github.com/linkerd/linkerd2/controller/k8s" - "github.com/prometheus/client_golang/prometheus" logging "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" @@ -121,15 +119,11 @@ func (sp *servicePublisher) subscribe(srcPort Port, listener EndpointUpdateListe publisher, ok := sp.ports[srcPort] if !ok { - var err error - publisher, err = sp.newPortPublisher(srcPort) - if err != nil { - return err - } + publisher = sp.newPortPublisher(srcPort) sp.ports[srcPort] = publisher } - publisher.subscribe(listener, filterKey) - return nil + err := publisher.subscribe(listener, filterKey) + return err } func (sp *servicePublisher) unsubscribe(srcPort Port, listener EndpointUpdateListener, filterKey FilterKey, withRemove bool) { @@ -139,14 +133,10 @@ func (sp *servicePublisher) unsubscribe(srcPort Port, listener EndpointUpdateLis publisher, ok := sp.ports[srcPort] if ok { publisher.unsubscribe(listener, filterKey, withRemove) - if publisher.totalListeners() == 0 { - endpointsVecs.unregister(sp.metricsLabels(srcPort)) - delete(sp.ports, srcPort) - } } } -func (sp *servicePublisher) newPortPublisher(srcPort Port) (*portPublisher, error) { +func (sp *servicePublisher) newPortPublisher(srcPort Port) *portPublisher { targetPort := intstr.FromInt(int(srcPort)) svc, err := sp.k8sAPI.Svc().Lister().Services(sp.id.Namespace).Get(sp.id.Name) if err != nil && !apierrors.IsNotFound(err) { @@ -160,19 +150,16 @@ func (sp *servicePublisher) newPortPublisher(srcPort Port) (*portPublisher, erro log := sp.log.WithField("port", srcPort) - metrics, err := endpointsVecs.newEndpointsMetrics(sp.metricsLabels(srcPort)) - if err != nil { - return nil, err - } port := &portPublisher{ - filteredListeners: map[FilterKey]*filteredListenerGroup{}, - targetPort: targetPort, - srcPort: srcPort, - exists: exists, - k8sAPI: sp.k8sAPI, - metadataAPI: sp.metadataAPI, - log: log, - metrics: metrics, + filteredListeners: map[FilterKey]*filteredListenerGroup{}, + targetPort: targetPort, + srcPort: srcPort, + exists: exists, + k8sAPI: sp.k8sAPI, + metadataAPI: sp.metadataAPI, + log: log, + cluster: sp.cluster, + enableEndpointSlices: sp.enableEndpointSlices, enableIPv6: sp.enableIPv6, localTrafficPolicy: sp.localTrafficPolicy, @@ -201,11 +188,7 @@ func (sp *servicePublisher) newPortPublisher(srcPort Port) (*portPublisher, erro } } - return port, nil -} - -func (sp *servicePublisher) metricsLabels(port Port) prometheus.Labels { - return endpointsLabels(sp.cluster, sp.id.Namespace, sp.id.Name, strconv.Itoa(int(port))) + return port } func (sp *servicePublisher) updateServer(oldServer, newServer *v1beta3.Server) {