// nonMasqueradeCIDR is the CIDR for our internal IP range; traffic to IPs // outside this range will use IP masquerade. func ensureIPTablesMasqRule(client iptables.Interface, nonMasqueradeCIDR string) error { if _, err := client.EnsureRule(iptables.Append, iptables.TableNAT, iptables.ChainPostrouting, "-m", "comment", "--comment", "kubelet: SNAT outbound cluster traffic", "-m", "addrtype", "!", "--dst-type", "LOCAL", "!", "-d", nonMasqueradeCIDR, "-j", "MASQUERADE"); err != nil { return fmt.Errorf("Failed to ensure masquerading for %s chain %s: %v", iptables.TableNAT, iptables.ChainPostrouting, err) } return nil }
// Ensure that the iptables infrastructure we use is set up. This can safely be called periodically. func iptablesInit(ipt iptables.Interface) error { // TODO: There is almost certainly room for optimization here. E.g. If // we knew the service-cluster-ip-range CIDR we could fast-track outbound packets not // destined for a service. There's probably more, help wanted. // Danger - order of these rules matters here: // // We match portal rules first, then NodePort rules. For NodePort rules, we filter primarily on --dst-type LOCAL, // because we want to listen on all local addresses, but don't match internet traffic with the same dst port number. // // There is one complication (per thockin): // -m addrtype --dst-type LOCAL is what we want except that it is broken (by intent without foresight to our usecase) // on at least GCE. Specifically, GCE machines have a daemon which learns what external IPs are forwarded to that // machine, and configure a local route for that IP, making a match for --dst-type LOCAL when we don't want it to. // Removing the route gives correct behavior until the daemon recreates it. // Killing the daemon is an option, but means that any non-kubernetes use of the machine with external IP will be broken. // // This applies to IPs on GCE that are actually from a load-balancer; they will be categorized as LOCAL. // _If_ the chains were in the wrong order, and the LB traffic had dst-port == a NodePort on some other service, // the NodePort would take priority (incorrectly). // This is unlikely (and would only affect outgoing traffic from the cluster to the load balancer, which seems // doubly-unlikely), but we need to be careful to keep the rules in the right order. args := []string{ /* service-cluster-ip-range matching could go here */ } args = append(args, "-m", "comment", "--comment", "handle ClusterIPs; NOTE: this must be before the NodePort rules") if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesContainerPortalChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Prepend, iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerPortalChain))...); err != nil { return err } if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesHostPortalChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Prepend, iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostPortalChain))...); err != nil { return err } // This set of rules matches broadly (addrtype & destination port), and therefore must come after the portal rules args = []string{"-m", "addrtype", "--dst-type", "LOCAL"} args = append(args, "-m", "comment", "--comment", "handle service NodePorts; NOTE: this must be the last rule in the chain") if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesContainerNodePortChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Append, iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerNodePortChain))...); err != nil { return err } if _, err := ipt.EnsureChain(iptables.TableNAT, iptablesHostNodePortChain); err != nil { return err } if _, err := ipt.EnsureRule(iptables.Append, iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostNodePortChain))...); err != nil { return err } // TODO: Verify order of rules. return nil }
func SetupIptables(ipt iptables.Interface, clusterNetworkCIDR string) error { rules := []FirewallRule{ {"nat", "POSTROUTING", []string{"-s", clusterNetworkCIDR, "!", "-d", clusterNetworkCIDR, "-j", "MASQUERADE"}}, {"filter", "INPUT", []string{"-p", "udp", "-m", "multiport", "--dports", "4789", "-m", "comment", "--comment", "001 vxlan incoming", "-j", "ACCEPT"}}, {"filter", "INPUT", []string{"-i", "tun0", "-m", "comment", "--comment", "traffic from docker for internet", "-j", "ACCEPT"}}, {"filter", "FORWARD", []string{"-d", clusterNetworkCIDR, "-j", "ACCEPT"}}, {"filter", "FORWARD", []string{"-s", clusterNetworkCIDR, "-j", "ACCEPT"}}, } for _, rule := range rules { _, err := ipt.EnsureRule(iptables.Prepend, iptables.Table(rule.table), iptables.Chain(rule.chain), rule.args...) if err != nil { return err } } return nil }