func setupVeth(netns string, br *netlink.Bridge, ifName string, mtu int, hairpinMode bool) error { var hostVethName string err := ns.WithNetNSPath(netns, false, func(hostNS *os.File) error { // create the veth pair in the container and move host end into host netns hostVeth, _, err := ip.SetupVeth(ifName, mtu, hostNS) if err != nil { return err } hostVethName = hostVeth.Attrs().Name return nil }) if err != nil { return err } // need to lookup hostVeth again as its index has changed during ns move hostVeth, err := netlink.LinkByName(hostVethName) if err != nil { return fmt.Errorf("failed to lookup %q: %v", hostVethName, err) } // connect host veth end to the bridge if err = netlink.LinkSetMaster(hostVeth, br); err != nil { return fmt.Errorf("failed to connect %q to bridge %v: %v", hostVethName, br.Attrs().Name, err) } // set hairpin mode if err = netlink.LinkSetHairpin(hostVeth, hairpinMode); err != nil { return fmt.Errorf("failed to setup hairpin mode for %v: %v", hostVethName, err) } return nil }
func setupContainerVeth(netns, ifName string, mtu int, pr *types.Result) (string, error) { // The IPAM result will be something like IP=192.168.3.5/24, GW=192.168.3.1. // What we want is really a point-to-point link but veth does not support IFF_POINTOPONT. // Next best thing would be to let it ARP but set interface to 192.168.3.5/32 and // add a route like "192.168.3.0/24 via 192.168.3.1 dev $ifName". // Unfortunately that won't work as the GW will be outside the interface's subnet. // Our solution is to configure the interface with 192.168.3.5/24, then delete the // "192.168.3.0/24 dev $ifName" route that was automatically added. Then we add // "192.168.3.1/32 dev $ifName" and "192.168.3.0/24 via 192.168.3.1 dev $ifName". // In other words we force all traffic to ARP via the gateway except for GW itself. var hostVethName string err := ns.WithNetNSPath(netns, func(hostNS ns.NetNS) error { hostVeth, _, err := ip.SetupVeth(ifName, mtu, hostNS) if err != nil { return err } hostNS.Do(func(_ ns.NetNS) error { hostVethName = hostVeth.Attrs().Name if err := ip.SetHWAddrByIP(hostVethName, pr.IP4.IP.IP, nil /* TODO IPv6 */); err != nil { return fmt.Errorf("failed to set hardware addr by IP: %v", err) } return nil }) if err = ipam.ConfigureIface(ifName, pr); err != nil { return err } contVeth, err := netlink.LinkByName(ifName) if err != nil { return fmt.Errorf("failed to look up %q: %v", ifName, err) } if err := ip.SetHWAddrByIP(contVeth.Attrs().Name, pr.IP4.IP.IP, nil /* TODO IPv6 */); err != nil { return fmt.Errorf("failed to set hardware addr by IP: %v", err) } // Delete the route that was automatically added route := netlink.Route{ LinkIndex: contVeth.Attrs().Index, Dst: &net.IPNet{ IP: pr.IP4.IP.IP.Mask(pr.IP4.IP.Mask), Mask: pr.IP4.IP.Mask, }, Scope: netlink.SCOPE_NOWHERE, } if err := netlink.RouteDel(&route); err != nil { return fmt.Errorf("failed to delete route %v: %v", route, err) } for _, r := range []netlink.Route{ netlink.Route{ LinkIndex: contVeth.Attrs().Index, Dst: &net.IPNet{ IP: pr.IP4.Gateway, Mask: net.CIDRMask(32, 32), }, Scope: netlink.SCOPE_LINK, Src: pr.IP4.IP.IP, }, netlink.Route{ LinkIndex: contVeth.Attrs().Index, Dst: &net.IPNet{ IP: pr.IP4.IP.IP.Mask(pr.IP4.IP.Mask), Mask: pr.IP4.IP.Mask, }, Scope: netlink.SCOPE_UNIVERSE, Gw: pr.IP4.Gateway, Src: pr.IP4.IP.IP, }, } { if err := netlink.RouteAdd(&r); err != nil { return fmt.Errorf("failed to add route %v: %v", r, err) } } return nil }) return hostVethName, err }
// Set up all networking (host/container veth, OVS flows, IPAM, loopback, etc) func (m *podManager) setup(req *cniserver.PodRequest) (*cnitypes.Result, *kubehostport.RunningPod, error) { podConfig, pod, err := m.getPodConfig(req) if err != nil { return nil, nil, err } ipamResult, err := m.ipamAdd(req.Netns, req.ContainerId) if err != nil { // TODO: Remove this hack once we've figured out how to retrieve the netns // of an exited container. Currently, restarting docker will leak a bunch of // ips. This will exhaust available ip space unless we cleanup old ips. At the // same time we don't want to try GC'ing them periodically as that could lead // to a performance regression in starting pods. So on each setup failure, try // GC on the assumption that the kubelet is going to retry pod creation, and // when it does, there will be ips. m.ipamGarbageCollection() return nil, nil, fmt.Errorf("failed to run IPAM for %v: %v", req.ContainerId, err) } podIP := ipamResult.IP4.IP.IP // Release any IPAM allocations and hostports if the setup failed var success bool defer func() { if !success { m.ipamDel(req.ContainerId) if err := m.hostportHandler.SyncHostports(TUN, m.getRunningPods()); err != nil { glog.Warningf("failed syncing hostports: %v", err) } } }() // Open any hostports the pod wants newPod := &kubehostport.RunningPod{Pod: pod, IP: podIP} if err := m.hostportHandler.OpenPodHostportsAndSync(newPod, TUN, m.getRunningPods()); err != nil { return nil, nil, err } var hostVeth, contVeth netlink.Link err = ns.WithNetNSPath(req.Netns, func(hostNS ns.NetNS) error { hostVeth, contVeth, err = ip.SetupVeth(podInterfaceName, int(m.mtu), hostNS) if err != nil { return fmt.Errorf("failed to create container veth: %v", err) } // refetch to get hardware address and other properties contVeth, err = netlink.LinkByIndex(contVeth.Attrs().Index) if err != nil { return fmt.Errorf("failed to fetch container veth: %v", err) } // Clear out gateway to prevent ConfigureIface from adding the cluster // subnet via the gateway ipamResult.IP4.Gateway = nil if err = ipam.ConfigureIface(podInterfaceName, ipamResult); err != nil { return fmt.Errorf("failed to configure container IPAM: %v", err) } lo, err := netlink.LinkByName("lo") if err == nil { err = netlink.LinkSetUp(lo) } if err != nil { return fmt.Errorf("failed to configure container loopback: %v", err) } return nil }) if err != nil { return nil, nil, err } if podConfig.wantMacvlan { if err := addMacvlan(req.Netns); err != nil { return nil, nil, err } } contVethMac := contVeth.Attrs().HardwareAddr.String() vnidStr := vnidToString(podConfig.vnid) out, err := exec.Command(sdnScript, setUpCmd, hostVeth.Attrs().Name, contVethMac, podIP.String(), vnidStr, podConfig.ingressBandwidth, podConfig.egressBandwidth).CombinedOutput() glog.V(5).Infof("SetUpPod network plugin output: %s, %v", string(out), err) if isScriptError(err) { return nil, nil, fmt.Errorf("error running network setup script:\nhostVethName %s, contVethMac %s, podIP %s, podConfig %#v\n %s", hostVeth.Attrs().Name, contVethMac, podIP.String(), podConfig, getScriptError(out)) } else if err != nil { return nil, nil, err } success = true return ipamResult, newPod, nil }
// Set up all networking (host/container veth, OVS flows, IPAM, loopback, etc) func (m *podManager) setup(req *cniserver.PodRequest) (*cnitypes.Result, *kubehostport.RunningPod, error) { podConfig, pod, err := m.getPodConfig(req) if err != nil { return nil, nil, err } ipamResult, err := m.runIPAM(req.Netns, cniserver.CNI_ADD, req.ContainerId) if err != nil { return nil, nil, fmt.Errorf("failed to run IPAM for %v: %v", req.ContainerId, err) } podIP := ipamResult.IP4.IP.IP // Release any IPAM allocations and hostports if the setup failed var success bool defer func() { if !success { m.runIPAM(req.Netns, cniserver.CNI_DEL, req.ContainerId) if err := m.hostportHandler.SyncHostports(TUN, m.getRunningPods()); err != nil { glog.Warningf("failed syncing hostports: %v", err) } } }() // Open any hostports the pod wants newPod := &kubehostport.RunningPod{Pod: pod, IP: podIP} if err := m.hostportHandler.OpenPodHostportsAndSync(newPod, TUN, m.getRunningPods()); err != nil { return nil, nil, err } var hostVeth, contVeth netlink.Link err = ns.WithNetNSPath(req.Netns, func(hostNS ns.NetNS) error { hostVeth, contVeth, err = ip.SetupVeth(podInterfaceName, int(m.mtu), hostNS) if err != nil { return fmt.Errorf("failed to create container veth: %v", err) } // refetch to get hardware address and other properties contVeth, err = netlink.LinkByIndex(contVeth.Attrs().Index) if err != nil { return fmt.Errorf("failed to fetch container veth: %v", err) } // Clear out gateway to prevent ConfigureIface from adding the cluster // subnet via the gateway ipamResult.IP4.Gateway = nil if err = ipam.ConfigureIface(podInterfaceName, ipamResult); err != nil { return fmt.Errorf("failed to configure container IPAM: %v", err) } lo, err := netlink.LinkByName("lo") if err == nil { err = netlink.LinkSetUp(lo) } if err != nil { return fmt.Errorf("failed to configure container loopback: %v", err) } return nil }) if err != nil { return nil, nil, err } if podConfig.wantMacvlan { if err := addMacvlan(req.Netns); err != nil { return nil, nil, err } } contVethMac := contVeth.Attrs().HardwareAddr.String() vnidStr := vnidToString(podConfig.vnid) out, err := exec.Command(sdnScript, setUpCmd, hostVeth.Attrs().Name, contVethMac, podIP.String(), vnidStr, podConfig.ingressBandwidth, podConfig.egressBandwidth).CombinedOutput() glog.V(5).Infof("SetUpPod network plugin output: %s, %v", string(out), err) if isScriptError(err) { return nil, nil, fmt.Errorf("error running network setup script:\nhostVethName %s, contVethMac %s, podIP %s, podConfig %#v\n %s", hostVeth.Attrs().Name, contVethMac, podIP.String(), podConfig, getScriptError(out)) } else if err != nil { return nil, nil, err } success = true return ipamResult, newPod, nil }
var err error hostNetNS, err = ns.NewNS() Expect(err).NotTo(HaveOccurred()) containerNetNS, err = ns.NewNS() Expect(err).NotTo(HaveOccurred()) fakeBytes := make([]byte, 20) //to be reset in AfterEach block rand.Reader = bytes.NewReader(fakeBytes) _ = containerNetNS.Do(func(ns.NetNS) error { defer GinkgoRecover() hostVeth, containerVeth, err = ip.SetupVeth(fmt.Sprintf(ifaceFormatString, ifaceCounter), mtu, hostNetNS) if err != nil { return err } Expect(err).NotTo(HaveOccurred()) hostVethName = hostVeth.Attrs().Name containerVethName = containerVeth.Attrs().Name return nil }) }) AfterEach(func() { Expect(containerNetNS.Close()).To(Succeed()) Expect(hostNetNS.Close()).To(Succeed())