// CleanupLeftovers removes all iptables rules and chains created by the Proxier // It returns true if an error was encountered. Errors are logged. func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) { //TODO: actually tear down all rules and chains. args := []string{"-m", "comment", "--comment", "kubernetes service portals", "-j", string(iptablesServicesChain)} if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainOutput, args...); err != nil { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) encounteredError = true } if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPrerouting, args...); err != nil { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) encounteredError = true } args = []string{"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", "-m", "mark", "--mark", iptablesMasqueradeMark, "-j", "MASQUERADE"} if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) encounteredError = true } // flush and delete chains. chains := []utiliptables.Chain{iptablesServicesChain, iptablesNodePortsChain} for _, c := range chains { // flush chain, then if sucessful delete, delete will fail if flush fails. if err := ipt.FlushChain(utiliptables.TableNAT, c); err != nil { glog.Errorf("Error flushing pure-iptables proxy chain: %v", err) encounteredError = true } else { if err = ipt.DeleteChain(utiliptables.TableNAT, c); err != nil { glog.Errorf("Error deleting pure-iptables proxy chain: %v", err) encounteredError = true } } } return encounteredError }
// remove the iptables rules from the pure iptables Proxier func tearDownIptablesProxierRules(ipt iptables.Interface) { //TODO: actually tear down all rules and chains. //NOTE: this needs to be kept in sync with the proxy/iptables Proxier's rules. args := []string{"-j", "KUBE-SERVICES"} if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainOutput, args...); err != nil { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) } if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainPrerouting, args...); err != nil { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) } }
// nonMasqueradeCIDR is the CIDR for our internal IP range; traffic to IPs // outside this range will use IP masquerade. func ensureIPTablesMasqRule(client iptables.Interface, nonMasqueradeCIDR string) error { if _, err := client.EnsureRule(iptables.Append, iptables.TableNAT, iptables.ChainPostrouting, "-m", "comment", "--comment", "kubelet: SNAT outbound cluster traffic", "-m", "addrtype", "!", "--dst-type", "LOCAL", "!", "-d", nonMasqueradeCIDR, "-j", "MASQUERADE"); err != nil { return fmt.Errorf("Failed to ensure masquerading for %s chain %s: %v", iptables.TableNAT, iptables.ChainPostrouting, err) } return nil }
// CleanupLeftovers removes all iptables rules and chains created by the Proxier // It returns true if an error was encountered. Errors are logged. func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) { //TODO: actually tear down all rules and chains. args := []string{"-j", "KUBE-SERVICES"} if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainOutput, args...); err != nil { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) encounteredError = true } if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPrerouting, args...); err != nil { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) encounteredError = true } return encounteredError }
func SetupIptables(ipt iptables.Interface, clusterNetworkCIDR string) error { rules := []FirewallRule{ {"nat", "POSTROUTING", []string{"-s", clusterNetworkCIDR, "!", "-d", clusterNetworkCIDR, "-j", "MASQUERADE"}}, {"filter", "INPUT", []string{"-p", "udp", "-m", "multiport", "--dports", "4789", "-m", "comment", "--comment", "001 vxlan incoming", "-j", "ACCEPT"}}, {"filter", "INPUT", []string{"-i", "tun0", "-m", "comment", "--comment", "traffic from docker for internet", "-j", "ACCEPT"}}, {"filter", "FORWARD", []string{"-d", clusterNetworkCIDR, "-j", "ACCEPT"}}, {"filter", "FORWARD", []string{"-s", clusterNetworkCIDR, "-j", "ACCEPT"}}, } for _, rule := range rules { _, err := ipt.EnsureRule(iptables.Prepend, iptables.Table(rule.table), iptables.Chain(rule.chain), rule.args...) if err != nil { return err } } return nil }
// Flush all of our custom iptables rules. func iptablesFlush(ipt iptables.Interface) error { el := []error{} if err := ipt.FlushChain(iptables.TableNAT, iptablesContainerPortalChain); err != nil { el = append(el, err) } if err := ipt.FlushChain(iptables.TableNAT, iptablesHostPortalChain); err != nil { el = append(el, err) } if err := ipt.FlushChain(iptables.TableNAT, iptablesContainerNodePortChain); err != nil { el = append(el, err) } if err := ipt.FlushChain(iptables.TableNAT, iptablesHostNodePortChain); err != nil { el = append(el, err) } if err := ipt.FlushChain(iptables.TableFilter, iptablesNonLocalNodePortChain); err != nil { el = append(el, err) } if len(el) != 0 { glog.Errorf("Some errors flushing old iptables portals: %v", el) } return utilerrors.NewAggregate(el) }
// Ensure that the iptables infrastructure we use is set up. This can safely be called periodically. func iptablesInit(ipt iptables.Interface) error { // TODO: There is almost certainly room for optimization here. E.g. If // we knew the service-cluster-ip-range CIDR we could fast-track outbound packets not // destined for a service. There's probably more, help wanted. // Danger - order of these rules matters here: // // We match portal rules first, then NodePort rules. For NodePort rules, we filter primarily on --dst-type LOCAL, // because we want to listen on all local addresses, but don't match internet traffic with the same dst port number. // // There is one complication (per thockin): // -m addrtype --dst-type LOCAL is what we want except that it is broken (by intent without foresight to our usecase) // on at least GCE. Specifically, GCE machines have a daemon which learns what external IPs are forwarded to that // machine, and configure a local route for that IP, making a match for --dst-type LOCAL when we don't want it to. // Removing the route gives correct behavior until the daemon recreates it. // Killing the daemon is an option, but means that any non-kubernetes use of the machine with external IP will be broken. // // This applies to IPs on GCE that are actually from a load-balancer; they will be categorized as LOCAL. // _If_ the chains were in the wrong order, and the LB traffic had dst-port == a NodePort on some other service, // the NodePort would take priority (incorrectly). // This is unlikely (and would only affect outgoing traffic from the cluster to the load balancer, which seems // doubly-unlikely), but we need to be careful to keep the rules in the right order. args := []string{ /* service-cluster-ip-range matching could go here */ } args = append(args, "-m", "comment", "--comment", "handle ClusterIPs; NOTE: this must be before the NodePort rules") if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesContainerPortalChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Prepend, iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerPortalChain))...); err != nil { return err } if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesHostPortalChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Prepend, iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostPortalChain))...); err != nil { return err } // This set of rules matches broadly (addrtype & destination port), and therefore must come after the portal rules args = []string{"-m", "addrtype", "--dst-type", "LOCAL"} args = append(args, "-m", "comment", "--comment", "handle service NodePorts; NOTE: this must be the last rule in the chain") if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesContainerNodePortChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Append, iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerNodePortChain))...); err != nil { return err } if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesHostNodePortChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Append, iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostNodePortChain))...); err != nil { return err } // TODO: Verify order of rules. return nil }
// CleanupLeftovers removes all iptables rules and chains created by the Proxier // It returns true if an error was encountered. Errors are logged. func CleanupLeftovers(ipt iptables.Interface) (encounteredError bool) { // NOTE: Warning, this needs to be kept in sync with the userspace Proxier, // we want to ensure we remove all of the iptables rules it creates. // Currently they are all in iptablesInit() // Delete Rules first, then Flush and Delete Chains args := []string{"-m", "comment", "--comment", "handle ClusterIPs; NOTE: this must be before the NodePort rules"} if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostPortalChain))...); err != nil { glog.Errorf("Error removing userspace rule: %v", err) encounteredError = true } if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerPortalChain))...); err != nil { glog.Errorf("Error removing userspace rule: %v", err) encounteredError = true } args = []string{"-m", "addrtype", "--dst-type", "LOCAL"} args = append(args, "-m", "comment", "--comment", "handle service NodePorts; NOTE: this must be the last rule in the chain") if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostNodePortChain))...); err != nil { glog.Errorf("Error removing userspace rule: %v", err) encounteredError = true } if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerNodePortChain))...); err != nil { glog.Errorf("Error removing userspace rule: %v", err) encounteredError = true } // flush and delete chains. chains := []iptables.Chain{iptablesContainerPortalChain, iptablesHostPortalChain, iptablesHostNodePortChain, iptablesContainerNodePortChain} for _, c := range chains { // flush chain, then if successful delete, delete will fail if flush fails. if err := ipt.FlushChain(iptables.TableNAT, c); err != nil { glog.Errorf("Error flushing userspace chain: %v", err) encounteredError = true } else { if err = ipt.DeleteChain(iptables.TableNAT, c); err != nil { glog.Errorf("Error deleting userspace chain: %v", err) encounteredError = true } } } return encounteredError }
// CleanupLeftovers removes all iptables rules and chains created by the Proxier // It returns true if an error was encountered. Errors are logged. func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) { // Unlink the services chain. args := []string{ "-m", "comment", "--comment", "kubernetes service portals", "-j", string(kubeServicesChain), } tableChainsWithJumpServices := []struct { table utiliptables.Table chain utiliptables.Chain }{ {utiliptables.TableFilter, utiliptables.ChainOutput}, {utiliptables.TableNAT, utiliptables.ChainOutput}, {utiliptables.TableNAT, utiliptables.ChainPrerouting}, } for _, tc := range tableChainsWithJumpServices { if err := ipt.DeleteRule(tc.table, tc.chain, args...); err != nil { if !utiliptables.IsNotFoundError(err) { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) encounteredError = true } } } // Unlink the postrouting chain. args = []string{ "-m", "comment", "--comment", "kubernetes postrouting rules", "-j", string(kubePostroutingChain), } if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { if !utiliptables.IsNotFoundError(err) { glog.Errorf("Error removing pure-iptables proxy rule: %v", err) encounteredError = true } } // Flush and remove all of our chains. if iptablesSaveRaw, err := ipt.Save(utiliptables.TableNAT); err != nil { glog.Errorf("Failed to execute iptables-save for %s: %v", utiliptables.TableNAT, err) encounteredError = true } else { existingNATChains := getChainLines(utiliptables.TableNAT, iptablesSaveRaw) natChains := bytes.NewBuffer(nil) natRules := bytes.NewBuffer(nil) writeLine(natChains, "*nat") // Start with chains we know we need to remove. for _, chain := range []utiliptables.Chain{kubeServicesChain, kubeNodePortsChain, kubePostroutingChain, kubeMarkMasqChain} { if _, found := existingNATChains[chain]; found { chainString := string(chain) writeLine(natChains, existingNATChains[chain]) // flush writeLine(natRules, "-X", chainString) // delete } } // Hunt for service and endpoint chains. for chain := range existingNATChains { chainString := string(chain) if strings.HasPrefix(chainString, "KUBE-SVC-") || strings.HasPrefix(chainString, "KUBE-SEP-") { writeLine(natChains, existingNATChains[chain]) // flush writeLine(natRules, "-X", chainString) // delete } } writeLine(natRules, "COMMIT") natLines := append(natChains.Bytes(), natRules.Bytes()...) // Write it. err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) if err != nil { glog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableNAT, err) encounteredError = true } } { filterBuf := bytes.NewBuffer(nil) writeLine(filterBuf, "*filter") writeLine(filterBuf, fmt.Sprintf(":%s - [0:0]", kubeServicesChain)) writeLine(filterBuf, fmt.Sprintf("-X %s", kubeServicesChain)) writeLine(filterBuf, "COMMIT") // Write it. if err := ipt.Restore(utiliptables.TableFilter, filterBuf.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil { glog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableFilter, err) encounteredError = true } } // Clean up the older SNAT rule which was directly in POSTROUTING. // TODO(thockin): Remove this for v1.3 or v1.4. args = []string{ "-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", "-m", "mark", "--mark", oldIptablesMasqueradeMark, "-j", "MASQUERADE", } if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { if !utiliptables.IsNotFoundError(err) { glog.Errorf("Error removing old-style SNAT rule: %v", err) encounteredError = true } } return encounteredError }
// NewProxyServerDefault creates a new ProxyServer object with default parameters. func NewProxyServerDefault(config *options.ProxyServerConfig) (*ProxyServer, error) { if c, err := configz.New("componentconfig"); err == nil { c.Set(config.KubeProxyConfiguration) } else { glog.Errorf("unable to register configz: %s", err) } protocol := utiliptables.ProtocolIpv4 if net.ParseIP(config.BindAddress).To4() == nil { protocol = utiliptables.ProtocolIpv6 } var netshInterface utilnetsh.Interface var iptInterface utiliptables.Interface var dbus utildbus.Interface // Create a iptables utils. execer := exec.New() if runtime.GOOS == "windows" { netshInterface = utilnetsh.New(execer) } else { dbus = utildbus.New() iptInterface = utiliptables.New(execer, dbus, protocol) } // We omit creation of pretty much everything if we run in cleanup mode if config.CleanupAndExit { return &ProxyServer{ Config: config, IptInterface: iptInterface, }, nil } // TODO(vmarmol): Use container config for this. var oomAdjuster *oom.OOMAdjuster if config.OOMScoreAdj != nil { oomAdjuster = oom.NewOOMAdjuster() if err := oomAdjuster.ApplyOOMScoreAdj(0, int(*config.OOMScoreAdj)); err != nil { glog.V(2).Info(err) } } if config.ResourceContainer != "" { // Run in its own container. if err := resourcecontainer.RunInResourceContainer(config.ResourceContainer); err != nil { glog.Warningf("Failed to start in resource-only container %q: %v", config.ResourceContainer, err) } else { glog.V(2).Infof("Running in resource-only container %q", config.ResourceContainer) } } // Create a Kube Client // define api config source if config.Kubeconfig == "" && config.Master == "" { glog.Warningf("Neither --kubeconfig nor --master was specified. Using default API client. This might not work.") } // This creates a client, first loading any specified kubeconfig // file, and then overriding the Master flag, if non-empty. kubeconfig, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( &clientcmd.ClientConfigLoadingRules{ExplicitPath: config.Kubeconfig}, &clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: config.Master}}).ClientConfig() if err != nil { return nil, err } kubeconfig.ContentType = config.ContentType // Override kubeconfig qps/burst settings from flags kubeconfig.QPS = config.KubeAPIQPS kubeconfig.Burst = int(config.KubeAPIBurst) client, err := clientset.NewForConfig(kubeconfig) if err != nil { glog.Fatalf("Invalid API configuration: %v", err) } // Create event recorder hostname := nodeutil.GetHostname(config.HostnameOverride) eventBroadcaster := record.NewBroadcaster() recorder := eventBroadcaster.NewRecorder(v1.EventSource{Component: "kube-proxy", Host: hostname}) var proxier proxy.ProxyProvider var endpointsHandler proxyconfig.EndpointsConfigHandler proxyMode := getProxyMode(string(config.Mode), client.Core().Nodes(), hostname, iptInterface, iptables.LinuxKernelCompatTester{}) if proxyMode == proxyModeIPTables { glog.V(0).Info("Using iptables Proxier.") if config.IPTablesMasqueradeBit == nil { // IPTablesMasqueradeBit must be specified or defaulted. return nil, fmt.Errorf("Unable to read IPTablesMasqueradeBit from config") } proxierIPTables, err := iptables.NewProxier(iptInterface, utilsysctl.New(), execer, config.IPTablesSyncPeriod.Duration, config.IPTablesMinSyncPeriod.Duration, config.MasqueradeAll, int(*config.IPTablesMasqueradeBit), config.ClusterCIDR, hostname, getNodeIP(client, hostname)) if err != nil { glog.Fatalf("Unable to create proxier: %v", err) } proxier = proxierIPTables endpointsHandler = proxierIPTables // No turning back. Remove artifacts that might still exist from the userspace Proxier. glog.V(0).Info("Tearing down userspace rules.") userspace.CleanupLeftovers(iptInterface) } else { glog.V(0).Info("Using userspace Proxier.") // This is a proxy.LoadBalancer which NewProxier needs but has methods we don't need for // our config.EndpointsConfigHandler. loadBalancer := userspace.NewLoadBalancerRR() // set EndpointsConfigHandler to our loadBalancer endpointsHandler = loadBalancer var proxierUserspace proxy.ProxyProvider if runtime.GOOS == "windows" { proxierUserspace, err = winuserspace.NewProxier( loadBalancer, net.ParseIP(config.BindAddress), netshInterface, *utilnet.ParsePortRangeOrDie(config.PortRange), // TODO @pires replace below with default values, if applicable config.IPTablesSyncPeriod.Duration, config.UDPIdleTimeout.Duration, ) } else { proxierUserspace, err = userspace.NewProxier( loadBalancer, net.ParseIP(config.BindAddress), iptInterface, *utilnet.ParsePortRangeOrDie(config.PortRange), config.IPTablesSyncPeriod.Duration, config.IPTablesMinSyncPeriod.Duration, config.UDPIdleTimeout.Duration, ) } if err != nil { glog.Fatalf("Unable to create proxier: %v", err) } proxier = proxierUserspace // Remove artifacts from the pure-iptables Proxier, if not on Windows. if runtime.GOOS != "windows" { glog.V(0).Info("Tearing down pure-iptables proxy rules.") iptables.CleanupLeftovers(iptInterface) } } // Add iptables reload function, if not on Windows. if runtime.GOOS != "windows" { iptInterface.AddReloadFunc(proxier.Sync) } // Create configs (i.e. Watches for Services and Endpoints) // Note: RegisterHandler() calls need to happen before creation of Sources because sources // only notify on changes, and the initial update (on process start) may be lost if no handlers // are registered yet. serviceConfig := proxyconfig.NewServiceConfig() serviceConfig.RegisterHandler(proxier) endpointsConfig := proxyconfig.NewEndpointsConfig() endpointsConfig.RegisterHandler(endpointsHandler) proxyconfig.NewSourceAPI( client.Core().RESTClient(), config.ConfigSyncPeriod, serviceConfig.Channel("api"), endpointsConfig.Channel("api"), ) config.NodeRef = &api.ObjectReference{ Kind: "Node", Name: hostname, UID: types.UID(hostname), Namespace: "", } conntracker := realConntracker{} return NewProxyServer(client, config, iptInterface, proxier, eventBroadcaster, recorder, conntracker, proxyMode) }