//go:build linux || windows package libnetwork import ( "context" "net" "slices" "github.com/containerd/log" ) const maxSetStringLen = 350 func (c *Controller) addEndpointNameResolution(svcName, svcID, nID, eID, containerName string, vip net.IP, serviceAliases, taskAliases []string, ip net.IP, addService bool, method string) error { n, err := c.NetworkByID(nID) if err != nil { return err } log.G(context.TODO()).Debugf("addEndpointNameResolution %s %s add_service:%t sAliases:%v tAliases:%v", eID, svcName, addService, serviceAliases, taskAliases) // Add container resolution mappings if err := c.addContainerNameResolution(nID, eID, containerName, taskAliases, ip, method); err != nil { return err } serviceID := svcID if serviceID == "" { // This is the case of a normal container not part of a service serviceID = eID } // Add endpoint IP to special "tasks.svc_name" so that the applications have access to DNS RR. n.addSvcRecords(eID, "tasks."+svcName, serviceID, ip, nil, false, method) for _, alias := range serviceAliases { n.addSvcRecords(eID, "tasks."+alias, serviceID, ip, nil, false, method) } // Add service name to vip in DNS, if vip is valid. Otherwise resort to DNS RR if len(vip) == 0 { n.addSvcRecords(eID, svcName, serviceID, ip, nil, false, method) for _, alias := range serviceAliases { n.addSvcRecords(eID, alias, serviceID, ip, nil, false, method) } } if addService && len(vip) != 0 { n.addSvcRecords(eID, svcName, serviceID, vip, nil, false, method) // VIP records for service aliases are managed by addServiceBinding // via service.aliasRefs, not here, so they survive rolling updates // where aliases can change between tasks. } return nil } func (c *Controller) addContainerNameResolution(nID, eID, containerName string, taskAliases []string, ip net.IP, method string) error { n, err := c.NetworkByID(nID) if err != nil { return err } log.G(context.TODO()).Debugf("addContainerNameResolution %s %s", eID, containerName) // Add resolution for container name n.addSvcRecords(eID, containerName, eID, ip, nil, true, method) // Add resolution for taskaliases for _, alias := range taskAliases { n.addSvcRecords(eID, alias, eID, ip, nil, false, method) } return nil } func (c *Controller) deleteEndpointNameResolution(svcName, svcID, nID, eID, containerName string, vip net.IP, serviceAliases, taskAliases []string, ip net.IP, rmService, multipleEntries bool, method string) error { n, err := c.NetworkByID(nID) if err != nil { return err } log.G(context.TODO()).Debugf("deleteEndpointNameResolution %s %s rm_service:%t suppress:%t sAliases:%v tAliases:%v", eID, svcName, rmService, multipleEntries, serviceAliases, taskAliases) // Delete container resolution mappings if err := c.delContainerNameResolution(nID, eID, containerName, taskAliases, ip, method); err != nil { log.G(context.TODO()).WithError(err).Warn("Error deleting container from resolver") } serviceID := svcID if serviceID == "" { // This is the case of a normal container not part of a service serviceID = eID } // Delete the special "tasks.svc_name" backend record. if !multipleEntries { n.deleteSvcRecords(eID, "tasks."+svcName, serviceID, ip, nil, false, method) for _, alias := range serviceAliases { n.deleteSvcRecords(eID, "tasks."+alias, serviceID, ip, nil, false, method) } } // If we are doing DNS RR delete the endpoint IP from DNS record right away. if !multipleEntries && len(vip) == 0 { n.deleteSvcRecords(eID, svcName, serviceID, ip, nil, false, method) for _, alias := range serviceAliases { n.deleteSvcRecords(eID, alias, serviceID, ip, nil, false, method) } } // Remove the DNS record for VIP only if we are removing the service if rmService && len(vip) != 0 && !multipleEntries { n.deleteSvcRecords(eID, svcName, serviceID, vip, nil, false, method) // VIP records for service aliases are managed by rmServiceBinding // via service.aliasRefs, not here. } return nil } func (c *Controller) delContainerNameResolution(nID, eID, containerName string, taskAliases []string, ip net.IP, method string) error { n, err := c.NetworkByID(nID) if err != nil { return err } log.G(context.TODO()).Debugf("delContainerNameResolution %s %s", eID, containerName) // Delete resolution for container name n.deleteSvcRecords(eID, containerName, eID, ip, nil, true, method) // Delete resolution for taskaliases for _, alias := range taskAliases { n.deleteSvcRecords(eID, alias, eID, ip, nil, true, method) } return nil } func newService(name string, id string, ingressPorts []*PortConfig) *service { return &service{ name: name, id: id, ingressPorts: ingressPorts, loadBalancers: make(map[string]*loadBalancer), } } func (c *Controller) getLBIndex(sid, nid string, ingressPorts []*PortConfig) int { skey := serviceKey{ id: sid, ports: portConfigs(ingressPorts).String(), } c.mu.Lock() s, ok := c.serviceBindings[skey] c.mu.Unlock() if !ok { return 0 } s.Lock() defer s.Unlock() if s.deleted { return 0 } lb := s.loadBalancers[nid] if lb == nil { return 0 } return int(lb.fwMark) } // cleanupServiceDiscovery when the network is being deleted, erase all the associated service discovery records func (c *Controller) cleanupServiceDiscovery(cleanupNID string) { c.mu.Lock() defer c.mu.Unlock() if cleanupNID == "" { log.G(context.TODO()).Debugf("cleanupServiceDiscovery for all networks") c.svcRecords = make(map[string]*svcInfo) return } log.G(context.TODO()).Debugf("cleanupServiceDiscovery for network:%s", cleanupNID) delete(c.svcRecords, cleanupNID) } func (c *Controller) cleanupServiceBindings(cleanupNID string) { var cleanupFuncs []func() log.G(context.TODO()).Debugf("cleanupServiceBindings for %s", cleanupNID) c.mu.Lock() services := make([]*service, 0, len(c.serviceBindings)) for _, s := range c.serviceBindings { services = append(services, s) } c.mu.Unlock() for _, s := range services { s.Lock() // Skip the serviceBindings that got deleted if s.deleted { s.Unlock() continue } for nid, lb := range s.loadBalancers { if cleanupNID != "" && nid != cleanupNID { continue } for eid, be := range lb.backEnds { cleanupFuncs = append(cleanupFuncs, makeServiceCleanupFunc(c, s, nid, eid, lb.vip, be.ip, be.aliases)) } } s.Unlock() } for _, f := range cleanupFuncs { f() } } func makeServiceCleanupFunc(c *Controller, s *service, nID, eID string, vip net.IP, ip net.IP, aliases []string) func() { // ContainerName and taskAliases are not available here, this is still fine because the Service discovery // cleanup already happened before. The only thing that rmServiceBinding is still doing here a part from the Load // Balancer bookkeeping, is to keep consistent the mapping of endpoint to IP. return func() { if err := c.rmServiceBinding(s.name, s.id, nID, eID, "", vip, s.ingressPorts, aliases, []string{}, ip, "cleanupServiceBindings", false, true); err != nil { log.G(context.TODO()).Errorf("Failed to remove service bindings for service %s network %s endpoint %s while cleanup: %v", s.id, nID, eID, err) } } } func (c *Controller) addServiceBinding(svcName, svcID, nID, eID, containerName string, vip net.IP, ingressPorts []*PortConfig, serviceAliases, taskAliases []string, ip net.IP, method string) error { var addService bool // Failure to lock the network ID on add can result in racing // against network deletion resulting in inconsistent state // in the c.serviceBindings map and it's sub-maps. Also, // always lock network ID before services to avoid deadlock. c.networkLocker.Lock(nID) defer c.networkLocker.Unlock(nID) //nolint:errcheck n, err := c.NetworkByID(nID) if err != nil { return err } skey := serviceKey{ id: svcID, ports: portConfigs(ingressPorts).String(), } var s *service for { c.mu.Lock() var ok bool s, ok = c.serviceBindings[skey] if !ok { // Create a new service if we are seeing this service // for the first time. s = newService(svcName, svcID, ingressPorts) c.serviceBindings[skey] = s } c.mu.Unlock() s.Lock() if !s.deleted { // ok the object is good to be used break } s.Unlock() } log.G(context.TODO()).Debugf("addServiceBinding from %s START for %s %s p:%p nid:%s skey:%v", method, svcName, eID, s, nID, skey) defer s.Unlock() lb, ok := s.loadBalancers[nID] if !ok { // Create a new load balancer if we are seeing this // network attachment on the service for the first // time. fwMarkCtrMu.Lock() lb = &loadBalancer{ vip: vip, fwMark: fwMarkCtr, backEnds: make(map[string]*lbBackend), aliasRefs: make(map[string]int), service: s, } fwMarkCtr++ fwMarkCtrMu.Unlock() s.loadBalancers[nID] = lb addService = true } // Diff the task's aliases against the ones previously recorded for // this endpoint. addServiceBinding can be re-invoked for the same eID // (re-enabling a disabled backend, or a remote endpoint event whose // alias set has changed); only the delta should adjust ref counts. // On a 0→1 transition, a VIP DNS record is added on this network's LB; // on a 1→0 transition it is removed. This keeps aliases registered for // as long as any task — old or new — still claims them on this network. var prevAliases []string if existing, ok := lb.backEnds[eID]; ok { prevAliases = existing.aliases } if len(lb.vip) != 0 { for _, alias := range serviceAliases { if !slices.Contains(prevAliases, alias) { lb.aliasRefs[alias]++ if lb.aliasRefs[alias] == 1 { n.addSvcRecords(eID, alias, svcID, lb.vip, nil, false, "addServiceBinding") } } } for _, alias := range prevAliases { if !slices.Contains(serviceAliases, alias) { lb.aliasRefs[alias]-- if lb.aliasRefs[alias] == 0 { delete(lb.aliasRefs, alias) n.deleteSvcRecords(eID, alias, svcID, lb.vip, nil, false, "addServiceBinding") } } } } lb.backEnds[eID] = &lbBackend{ip: ip, aliases: slices.Clone(serviceAliases)} ok, entries := s.assignIPToEndpoint(ip.String(), eID) if !ok || entries > 1 { setStr, b := s.printIPToEndpoint(ip.String()) if len(setStr) > maxSetStringLen { setStr = setStr[:maxSetStringLen] } log.G(context.TODO()).Warnf("addServiceBinding %s possible transient state ok:%t entries:%d set:%t %s", eID, ok, entries, b, setStr) } // Add loadbalancer service and backend to the network n.addLBBackend(ip, lb) // Add the appropriate name resolutions if err := c.addEndpointNameResolution(svcName, svcID, nID, eID, containerName, vip, serviceAliases, taskAliases, ip, addService, "addServiceBinding"); err != nil { return err } log.G(context.TODO()).Debugf("addServiceBinding from %s END for %s %s", method, svcName, eID) return nil } func (c *Controller) rmServiceBinding(svcName, svcID, nID, eID, containerName string, vip net.IP, ingressPorts []*PortConfig, serviceAliases []string, taskAliases []string, ip net.IP, method string, deleteSvcRecords bool, fullRemove bool) error { var rmService bool skey := serviceKey{ id: svcID, ports: portConfigs(ingressPorts).String(), } c.mu.Lock() s, ok := c.serviceBindings[skey] c.mu.Unlock() if !ok { log.G(context.TODO()).Warnf("rmServiceBinding %s %s %s aborted c.serviceBindings[skey] !ok", method, svcName, eID) return nil } s.Lock() defer s.Unlock() log.G(context.TODO()).Debugf("rmServiceBinding from %s START for %s %s p:%p nid:%s sKey:%v deleteSvc:%t", method, svcName, eID, s, nID, skey, deleteSvcRecords) lb, ok := s.loadBalancers[nID] if !ok { log.G(context.TODO()).Warnf("rmServiceBinding %s %s %s aborted s.loadBalancers[nid] !ok", method, svcName, eID) return nil } be, ok := lb.backEnds[eID] if !ok { log.G(context.TODO()).Warnf("rmServiceBinding %s %s %s aborted lb.backEnds[eid] && lb.disabled[eid] !ok", method, svcName, eID) return nil } if fullRemove { // delete regardless delete(lb.backEnds, eID) // Drop this backend's contribution from the per-alias ref counts. // On a 1→0 transition, remove the VIP DNS record on this network's // LB. The network handle may be gone (e.g. when the network has // already been deleted from the controller); in that case the // resolver state is torn down with the network and there is // nothing to do here. if len(lb.vip) != 0 && len(be.aliases) > 0 { n, err := c.NetworkByID(nID) for _, alias := range be.aliases { lb.aliasRefs[alias]-- if lb.aliasRefs[alias] == 0 { delete(lb.aliasRefs, alias) if err == nil { n.deleteSvcRecords(eID, alias, svcID, lb.vip, nil, false, "rmServiceBinding") } } } } } else { be.disabled = true } if len(lb.backEnds) == 0 { // All the backends for this service have been // removed. Time to remove the load balancer and also // remove the service entry in IPVS. rmService = true delete(s.loadBalancers, nID) log.G(context.TODO()).Debugf("rmServiceBinding %s delete %s, p:%p in loadbalancers len:%d", eID, nID, lb, len(s.loadBalancers)) } ok, entries := s.removeIPToEndpoint(ip.String(), eID) if !ok || entries > 0 { setStr, b := s.printIPToEndpoint(ip.String()) if len(setStr) > maxSetStringLen { setStr = setStr[:maxSetStringLen] } log.G(context.TODO()).Warnf("rmServiceBinding %s possible transient state ok:%t entries:%d set:%t %s", eID, ok, entries, b, setStr) } // Remove loadbalancer service(if needed) and backend in all // sandboxes in the network only if the vip is valid. if entries == 0 { // The network may well have been deleted from the store (and // dataplane) before the last of the service bindings. On Linux that's // ok because removing the network sandbox from the dataplane // implicitly cleans up all related dataplane state. // On the Windows dataplane, VFP policylists must be removed // independently of the network, and they must be removed before the HNS // network. Otherwise, policylist removal fails with "network not // found." On Windows cleanupServiceBindings must be called prior to // removing the network from the store or dataplane. n, err := c.NetworkByID(nID) if err == nil { n.rmLBBackend(ip, lb, rmService, fullRemove) } } // Delete the name resolutions if deleteSvcRecords { if err := c.deleteEndpointNameResolution(svcName, svcID, nID, eID, containerName, vip, serviceAliases, taskAliases, ip, rmService, entries > 0, "rmServiceBinding"); err != nil { return err } } if len(s.loadBalancers) == 0 { // All loadbalancers for the service removed. Time to // remove the service itself. c.mu.Lock() // Mark the object as deleted so that the add won't use it wrongly s.deleted = true // NOTE The delete from the serviceBindings map has to be the last operation else we are allowing a race between this service // that is getting deleted and a new service that will be created if the entry is not anymore there delete(c.serviceBindings, skey) c.mu.Unlock() } log.G(context.TODO()).Debugf("rmServiceBinding from %s END for %s %s", method, svcName, eID) return nil }